From 1632dc0d4ec9c917a44ce02965d83a4d6f948f37 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 15 Nov 2023 18:38:00 +0100 Subject: [PATCH 01/24] First version of ACD --- Makefile | 5 +- src/acd/ac_decomposition.hpp | 1500 ++++++++++++++++++++++++++++++++ src/acd/ac_wrapper.cpp | 69 ++ src/acd/ac_wrapper.h | 23 + src/acd/kitty_algorithm.hpp | 119 +++ src/acd/kitty_constants.hpp | 91 ++ src/acd/kitty_constructors.hpp | 92 ++ src/acd/kitty_dynamic_tt.hpp | 147 ++++ src/acd/kitty_operations.hpp | 333 +++++++ src/acd/kitty_operators.hpp | 86 ++ src/acd/kitty_static_tt.hpp | 131 +++ src/acd/module.make | 1 + src/base/abci/abc.c | 14 +- src/base/abci/abcIf.c | 162 +++- src/map/if/if.h | 11 +- src/map/if/ifCore.c | 18 + src/map/if/ifCut.c | 6 +- src/map/if/ifDelay.c | 126 +++ src/map/if/ifMap.c | 47 +- src/map/if/ifTime.c | 6 + 20 files changed, 2918 insertions(+), 69 deletions(-) create mode 100644 src/acd/ac_decomposition.hpp create mode 100644 src/acd/ac_wrapper.cpp create mode 100644 src/acd/ac_wrapper.h create mode 100644 src/acd/kitty_algorithm.hpp create mode 100644 src/acd/kitty_constants.hpp create mode 100644 src/acd/kitty_constructors.hpp create mode 100644 src/acd/kitty_dynamic_tt.hpp create mode 100644 src/acd/kitty_operations.hpp create mode 100644 src/acd/kitty_operators.hpp create mode 100644 src/acd/kitty_static_tt.hpp create mode 100644 src/acd/module.make diff --git a/Makefile b/Makefile index 3976cf7b1..d770c3faf 100644 --- a/Makefile +++ b/Makefile @@ -17,6 +17,7 @@ OS := $(shell uname -s) MODULES := \ $(wildcard src/ext*) \ + src/acd \ src/base/abc src/base/abci src/base/cmd src/base/io src/base/main src/base/exor \ src/base/ver src/base/wlc src/base/wln src/base/acb src/base/bac src/base/cba src/base/pla src/base/test \ src/map/mapper src/map/mio src/map/super src/map/if \ @@ -56,7 +57,7 @@ ARCHFLAGS := $(ARCHFLAGS) OPTFLAGS ?= -g -O -CFLAGS += -Wall -Wno-unused-function -Wno-write-strings -Wno-sign-compare $(ARCHFLAGS) +CFLAGS += -std=c17 -Wall -Wno-unused-function -Wno-write-strings -Wno-sign-compare $(ARCHFLAGS) ifneq ($(findstring arm,$(shell uname -m)),) CFLAGS += -DABC_MEMALIGN=4 endif @@ -151,7 +152,7 @@ ifdef ABC_USE_LIBSTDCXX endif $(info $(MSG_PREFIX)Using CFLAGS=$(CFLAGS)) -CXXFLAGS += $(CFLAGS) +CXXFLAGS += $(CFLAGS) -std=c++17 SRC := GARBAGE := core core.* *.stackdump ./tags $(PROG) arch_flags diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp new file mode 100644 index 000000000..4f94bcba4 --- /dev/null +++ b/src/acd/ac_decomposition.hpp @@ -0,0 +1,1500 @@ +/* mockturtle: C++ logic network library + * Copyright (C) 2018-2023 EPFL + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/*! + \file ac_decomposition.hpp + \brief Ashenhurst-Curtis decomposition + + \author Alessandro Tempia Calvino +*/ + +#ifndef _ACD_H_ +#define _ACD_H_ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "kitty_constants.hpp" +#include "kitty_constructors.hpp" +#include "kitty_static_tt.hpp" +#include "kitty_dynamic_tt.hpp" +#include "kitty_operations.hpp" +#include "kitty_operators.hpp" + +namespace mockturtle +{ + +/*! \brief Parameters for ac_decomposition */ +struct ac_decomposition_params +{ + /*! \brief LUT size for decomposition. */ + uint32_t lut_size{ 6 }; + + /*! \brief Maximum number of iterations for covering. */ + uint32_t max_iter{ 5000 }; +}; + +/*! \brief Statistics for ac_decomposition */ +struct ac_decomposition_stats +{ + uint32_t num_luts{ 0 }; + uint32_t num_edges{ 0 }; + uint32_t num_levels{ 0 }; +}; + +struct ac_decomposition_result +{ + kitty::dynamic_truth_table tt; + std::vector support; +}; + +template +class ac_decomposition_impl +{ +private: + struct encoding_matrix + { + uint64_t column{ 0 }; + uint32_t cost{ 0 }; + uint32_t index{ 0 }; + uint32_t sort_cost{ 0 }; + }; + +private: + static constexpr uint32_t max_num_vars = 8; + using STT = kitty::static_truth_table; + +public: + explicit ac_decomposition_impl( TT const& tt, uint32_t num_vars, ac_decomposition_params const& ps, ac_decomposition_stats* pst = nullptr ) + : num_vars( num_vars ), ps( ps ), pst( pst ), permutations( num_vars ) + { + tt_start = tt; + std::iota( permutations.begin(), permutations.end(), 0 ); + } + + /*! \brief Runs ACD using late arriving variables */ + int run( unsigned delay_profile ) + { + /* truth table is too large for the settings */ + if ( num_vars > max_num_vars ) + { + return -1; + } + + uint32_t late_arriving = __builtin_popcount( delay_profile ); + + /* return a high cost if too many late arriving variables */ + if ( late_arriving > ps.lut_size / 2 || late_arriving > 3 ) + { + return -1; + } + + /* convert to static TT */ + best_tt = kitty::extend_to( tt_start ); + best_multiplicity = UINT32_MAX; + uint32_t best_cost = UINT32_MAX; + + /* permute late arriving variables to be the least significant */ + reposition_late_arriving_variables( delay_profile, late_arriving ); + + /* run ACD trying different bound sets and free sets */ + uint32_t free_set_size = late_arriving; + uint32_t offset = std::max( static_cast( late_arriving ), 1u ); + for ( uint32_t i = offset; i <= ps.lut_size / 2 && i <= 3; ++i ) + { + auto evaluate_fn = [&]( STT const& tt ) { return column_multiplicity( tt, i ); }; + auto [tt_p, perm, cost] = enumerate_iset_combinations_offset( i, offset, evaluate_fn, false ); + + /* add cost if not support reducing */ + uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0; + /* check for feasible solution that improves the cost */ + if ( cost <= ( 1 << ( ps.lut_size - i ) ) && cost + additional_cost < best_cost ) + { + best_tt = tt_p; + permutations = perm; + best_multiplicity = cost; + best_cost = cost + additional_cost; + free_set_size = i; + } + } + + if ( best_multiplicity == UINT32_MAX ) + return -1; + + /* compute isets */ + // std::vector isets = compute_isets( free_set_size ); + + // generate_support_minimization_encodings(); + // solve_min_support_exact( isets, free_set_size ); + + /* unfeasible decomposition */ + // if ( best_bound_sets.empty() ) + // { + // return -1; + // } + + pst->num_luts = ps.lut_size - free_set_size; + best_free_set = free_set_size; + + /* TODO generate decomposition only when returning the result */ + // dec_result = generate_decomposition( free_set_size ); + + /* TODO: change return value */ + return 0; + } + + int compute_decomposition() + { + if ( best_multiplicity == UINT32_MAX ) + return -1; + + /* compute isets */ + std::vector isets = compute_isets( best_free_set ); + + generate_support_minimization_encodings(); + solve_min_support_exact( isets, best_free_set ); + + /* unfeasible decomposition */ + if ( best_bound_sets.empty() ) + { + return -1; + } + + return 0; + } + + unsigned get_profile() + { + unsigned profile = 0; + + if ( best_free_set > num_vars ) + return -1; + + for ( uint32_t i = 0; i < best_free_set; ++i ) + { + profile |= 1 << permutations[i]; + } + + return profile; + } + + std::vector get_result() + { + return dec_result; + } + + void get_decomposition( unsigned char *decompArray ) + { + if ( best_free_set > num_vars ) + return; + + dec_result = generate_decomposition( best_free_set ); + return get_decomposition_abc( decompArray ); + } + +private: + uint32_t column_multiplicity( STT tt, uint32_t free_set_size ) + { + uint64_t multiplicity_set[4] = { 0u, 0u, 0u, 0u }; + uint32_t multiplicity = 0; + uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + + /* supports up to 64 values of free set (256 for |FS| == 3)*/ + assert( free_set_size <= 3 ); + + /* extract iset functions */ + if ( free_set_size == 1 ) + { + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < 32; ++j ) + { + multiplicity_set[0] |= UINT64_C( 1 ) << ( *it & 0x3 ); + *it >>= 2; + } + ++it; + } + } + else if ( free_set_size == 2 ) + { + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < 16; ++j ) + { + multiplicity_set[0] |= UINT64_C( 1 ) << ( *it & 0xF ); + *it >>= 4; + } + ++it; + } + } + else /* free set size 3 */ + { + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < 8; ++j ) + { + multiplicity_set[( *it >> 6 ) & 0x3] |= UINT64_C( 1 ) << ( *it & 0x3F ); + *it >>= 8; + } + ++it; + } + } + + multiplicity = __builtin_popcountl( multiplicity_set[0] ); + + if ( free_set_size == 3 ) + { + multiplicity += __builtin_popcountl( multiplicity_set[1] ); + multiplicity += __builtin_popcountl( multiplicity_set[2] ); + multiplicity += __builtin_popcountl( multiplicity_set[3] ); + } + + return multiplicity; + } + + template + std::tuple, uint32_t> enumerate_iset_combinations( uint32_t free_set_size, Fn&& fn, bool verbose = false ) + { + /* works up to 16 input truth tables */ + assert( num_vars <= 16 ); + + /* special case */ + STT tt = best_tt; + if ( num_vars <= free_set_size || free_set_size == 0 ) + { + return { tt, permutations, UINT32_MAX }; + } + + /* select k */ + // free_set_size = std::min( free_set_size, num_vars - free_set_size ); + + /* init permutation array */ + std::array perm, best_perm; + std::copy( permutations.begin(), permutations.begin() + num_vars, perm.begin() ); + best_perm = perm; + + /* TT with best cost */ + STT best = tt; + uint32_t best_cost = UINT32_MAX; + + /* enumerate combinations */ + if ( free_set_size == 1 ) + { + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size ); + } + + for ( uint32_t i = 1; i < num_vars; ++i ) + { + std::swap( perm[0], perm[i] ); + kitty::swap_inplace( tt, 0, i ); + + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size ); + } + } + } + else if ( free_set_size == 2 ) + { + for ( uint32_t i = 0; i < num_vars - 1; ++i ) + { + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size ); + } + + for ( uint32_t j = 2; j < num_vars - i; ++j ) + { + std::swap( perm[1], perm[j] ); + kitty::swap_inplace( tt, 1, j ); + + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size ); + } + } + + std::swap( perm[0], perm[num_vars - i - 1] ); + kitty::swap_inplace( tt, 0, num_vars - i - 1 ); + } + } + else if ( free_set_size == 3 ) + { + for ( uint32_t i = 0; i < num_vars - 2; ++i ) + { + for ( uint32_t j = i; j < num_vars - 2; ++j ) + { + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size ); + } + + for ( uint32_t k = 3; k < num_vars - j; ++k ) + { + std::swap( perm[2], perm[k] ); + kitty::swap_inplace( tt, 2, k ); + + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size ); + } + } + + std::swap( perm[1], perm[num_vars - j - 1] ); + kitty::swap_inplace( tt, 1, num_vars - j - 1 ); + } + + std::swap( perm[0], perm[num_vars - i - 1] ); + kitty::swap_inplace( tt, 0, num_vars - i - 1 ); + } + } + + std::vector res_perm( num_vars ); + std::copy( best_perm.begin(), best_perm.begin() + num_vars, res_perm.begin() ); + + return std::make_tuple( best, res_perm, best_cost ); + } + + template + std::tuple, uint32_t> enumerate_iset_combinations_offset( uint32_t free_set_size, uint32_t offset, Fn&& fn, bool verbose = false ) + { + STT tt = best_tt; + + /* TT with best cost */ + STT best_tt = tt; + uint32_t best_cost = UINT32_MAX; + + /* works up to 16 input truth tables */ + assert( num_vars <= 16 ); + + /* select k */ + free_set_size = std::min( free_set_size, num_vars - free_set_size ); + + /* special case */ + if ( num_vars <= free_set_size || free_set_size <= offset ) + { + if ( offset == free_set_size ) + { + best_cost = fn( tt ); + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << best_cost << " "; + print_perm( permutations.begin(), free_set_size ); + } + + return { tt, permutations, best_cost }; + } + else + { + return { tt, permutations, UINT32_MAX }; + } + } + + /* decrease combinations */ + free_set_size -= offset; + + /* init permutation array */ + std::array perm, best_perm; + std::copy( permutations.begin(), permutations.begin() + num_vars, perm.begin() ); + best_perm = perm; + + /* enumerate combinations */ + if ( free_set_size == 1 ) + { + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best_tt = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size + offset ); + } + + for ( uint32_t i = offset + 1; i < num_vars; ++i ) + { + std::swap( perm[offset], perm[i] ); + kitty::swap_inplace( tt, offset, i ); + + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best_tt = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size + offset ); + } + } + } + else if ( free_set_size == 2 ) + { + for ( uint32_t i = 0; i < num_vars - 1 - offset; ++i ) + { + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best_tt = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size + offset ); + } + + for ( uint32_t j = offset + 2; j < num_vars - i; ++j ) + { + std::swap( perm[offset + 1], perm[j] ); + kitty::swap_inplace( tt, offset + 1, j ); + + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best_tt = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size + offset ); + } + } + + std::swap( perm[offset], perm[num_vars - i - 1] ); + kitty::swap_inplace( tt, offset, num_vars - i - 1 ); + } + } + else if ( free_set_size == 3 ) + { + for ( uint32_t i = 0; i < num_vars - 2 - offset; ++i ) + { + for ( uint32_t j = i; j < num_vars - 2 - offset; ++j ) + { + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best_tt = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size + offset ); + } + + for ( uint32_t k = offset + 3; k < num_vars - j; ++k ) + { + std::swap( perm[offset + 2], perm[k] ); + kitty::swap_inplace( tt, offset + 2, k ); + + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best_tt = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size + offset ); + } + } + + std::swap( perm[offset + 1], perm[num_vars - j - 1] ); + kitty::swap_inplace( tt, offset + 1, num_vars - j - 1 ); + } + + std::swap( perm[offset], perm[num_vars - i - 1] ); + kitty::swap_inplace( tt, offset, num_vars - i - 1 ); + } + } + + std::vector res_perm( num_vars ); + std::copy( best_perm.begin(), best_perm.begin() + num_vars, res_perm.begin() ); + + return std::make_tuple( best_tt, res_perm, best_cost ); + } + + std::vector compute_isets( uint32_t free_set_size, bool verbose = false ) + { + /* construct isets involved in multiplicity */ + uint32_t isets_support = num_vars - free_set_size; + std::vector isets( best_multiplicity ); + + /* construct isets */ + std::unordered_map column_to_iset; + STT tt = best_tt; + uint32_t offset = 0; + uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + + if ( free_set_size == 1 ) + { + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < 32; ++j ) + { + uint64_t val = *it & 0x3; + + if ( auto el = column_to_iset.find( val ); el != column_to_iset.end() ) + { + isets[el->second]._bits[i / 2] |= UINT64_C( 1 ) << ( j + offset ); + } + else + { + isets[column_to_iset.size()]._bits[i / 2] |= UINT64_C( 1 ) << ( j + offset ); + column_to_iset[val] = column_to_iset.size(); + } + + *it >>= 2; + } + + offset ^= 32; + ++it; + } + } + else if ( free_set_size == 2 ) + { + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < 16; ++j ) + { + uint64_t val = *it & 0xF; + + if ( auto el = column_to_iset.find( val ); el != column_to_iset.end() ) + { + isets[el->second]._bits[i / 4] |= UINT64_C( 1 ) << ( j + offset ); + } + else + { + isets[column_to_iset.size()]._bits[i / 4] |= UINT64_C( 1 ) << ( j + offset ); + column_to_iset[val] = column_to_iset.size(); + } + + *it >>= 4; + } + + offset = ( offset + 16 ) % 64; + ++it; + } + } + else /* free set size 3 */ + { + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < 8; ++j ) + { + uint64_t val = *it & 0xFF; + + if ( auto el = column_to_iset.find( val ); el != column_to_iset.end() ) + { + isets[el->second]._bits[i / 8] |= UINT64_C( 1 ) << ( j + offset ); + } + else + { + isets[column_to_iset.size()]._bits[i / 8] |= UINT64_C( 1 ) << ( j + offset ); + column_to_iset[val] = column_to_iset.size(); + } + + *it >>= 8; + } + + offset = ( offset + 8 ) % 64; + ++it; + } + } + + /* extend isets to cover the whole truth table */ + for ( STT& iset : isets ) + { + local_extend_to( iset, isets_support ); + } + + /* save free_set functions */ + std::vector free_set_tts( best_multiplicity ); + + /* TODO: possible conflict */ + for ( auto const& pair : column_to_iset ) + { + free_set_tts[pair.second]._bits[0] = pair.first; + local_extend_to( free_set_tts[pair.second], free_set_size ); + } + + /* print isets and free set*/ + if ( verbose ) + { + std::cout << "iSets\n"; + uint32_t i = 0; + for ( auto iset : isets ) + { + kitty::print_hex( iset ); + std::cout << " of func "; + kitty::print_hex( free_set_tts[i++] ); + std::cout << "\n"; + } + } + + best_free_set_tts = std::move( free_set_tts ); + + return isets; + } + + std::vector generate_decomposition( uint32_t free_set_size ) + { + std::vector res; + + for ( uint32_t i = 0; i < best_bound_sets.size(); ++i ) + { + ac_decomposition_result dec; + auto tt = best_bound_sets[i]; + auto care = best_care_sets[i]; + + /* compute and minimize support for bound set variables */ + uint32_t k = 0; + for ( uint32_t j = 0; j < num_vars - free_set_size; ++j ) + { + if ( !kitty::has_var( tt, j ) ) + continue; + + if ( !kitty::has_var( tt, care, j ) ) + { + /* fix truth table */ + adjust_truth_table_on_dc( tt, care, j ); + continue; + } + + if ( k < j ) + { + kitty::swap_inplace( tt, k, j ); + kitty::swap_inplace( care, k, j ); + } + dec.support.push_back( permutations[free_set_size + j] ); + ++k; + } + + dec.tt = kitty::shrink_to( tt, dec.support.size() ); + res.push_back( dec ); + } + + /* compute the decomposition for the top-level LUT */ + compute_top_lut_decomposition( res, free_set_size ); + + return res; + } + + void compute_top_lut_decomposition( std::vector& res, uint32_t free_set_size ) + { + uint32_t top_vars = best_bound_sets.size() + free_set_size; + assert( top_vars <= ps.lut_size ); + + /* extend bound set functions with free_set_size LSB vars */ + kitty::dynamic_truth_table tt( top_vars ); + + /* compute support */ + res.emplace_back(); + for ( uint32_t i = 0; i < free_set_size; ++i ) + { + res.back().support.push_back( permutations[i] ); + } + + /* create functions for bound set */ + std::vector bound_set_vars; + auto res_it = res.begin(); + uint32_t offset = 0; + for ( uint32_t i = 0; i < best_bound_sets.size(); ++i ) + { + bound_set_vars.emplace_back( top_vars ); + kitty::create_nth_var( bound_set_vars[i], free_set_size + i ); + + /* add bound-set variables to the support, remove buffers */ + if ( res_it->support.size() == 1 ) + { + res.back().support.push_back( res_it->support.front() ); + /* it is a NOT */ + if ( ( res_it->tt._bits[0] & 1 ) == 1 ) + { + bound_set_vars[i] = ~bound_set_vars[i]; + } + res.erase( res_it ); + ++offset; + } + else + { + res.back().support.push_back( num_vars + i - offset ); + ++res_it; + } + } + + /* create composition function */ + for ( uint32_t i = 0; i < best_free_set_tts.size(); ++i ) + { + kitty::dynamic_truth_table free_set_tt = kitty::shrink_to( best_free_set_tts[i], top_vars ); + + /* find MUX assignments */ + for ( uint32_t j = 0; j < bound_set_vars.size(); ++j ) + { + /* AND with ONSET or OFFSET */ + if ( ( ( best_iset_onset[j] >> i ) & 1 ) ) + { + free_set_tt &= bound_set_vars[j]; + } + else if ( ( ( best_iset_offset[j] >> i ) & 1 ) ) + { + free_set_tt &= ~bound_set_vars[j]; + } + } + + tt |= free_set_tt; + } + + /* add top-level LUT to result */ + res.back().tt = tt; + } + + inline void reposition_late_arriving_variables( unsigned delay_profile, uint32_t late_arriving ) + { + uint32_t k = 0; + for ( uint32_t i = 0; i < late_arriving; ++i ) + { + while ( ( ( delay_profile >> k ) & 1 ) == 0 ) + ++k; + + if ( permutations[i] == k ) + { + ++k; + continue; + } + + std::swap( permutations[i], permutations[k] ); + kitty::swap_inplace( best_tt, i, k ); + ++k; + } + } + + template + void print_perm( Iterator begin, uint32_t free_set ) + { + std::cout << "["; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + if ( i == free_set ) + { + std::cout << ", "; + } + std::cout << *begin << " "; + ++begin; + } + std::cout << "]\n"; + } + + void generate_support_minimization_encodings() + { + uint32_t count = 0; + uint32_t num_combs_exact[4] = { 2, 6, 70, 12870 }; + + /* enable don't cares only if not a power of 2 */ + uint32_t num_combs = 3; + if ( __builtin_popcount( best_multiplicity ) == 1 ) + { + for ( uint32_t i = 0; i < 4; ++i ) + { + if ( ( best_multiplicity >> i ) == 2u ) + { + num_combs = num_combs_exact[i]; + } + } + support_minimization_encodings = std::vector>( num_combs ); + generate_support_minimization_encodings_rec( 0, 0, 0, count ); + } + else + { + for ( uint32_t i = 1; i < best_multiplicity; ++i ) + { + num_combs = ( num_combs << 1 ) + num_combs; + } + support_minimization_encodings = std::vector>( num_combs ); + generate_support_minimization_encodings_rec( 0, 0, 0, count ); + } + + assert( count == num_combs ); + + /* print combinations */ + // std::cout << "{ "; + // for ( auto const& entry : support_minimization_encodings ) + // { + // std::cout << "{ " << entry[0] << ", " << entry[1] << " }, "; + // } + // std::cout << "}\n"; + } + + template + void generate_support_minimization_encodings_rec( uint64_t onset, uint64_t offset, uint32_t var, uint32_t& count ) + { + if ( var == best_multiplicity ) + { + if constexpr ( !enable_dcset ) + { + /* sets must be equally populated */ + if ( __builtin_popcountl( onset ) != __builtin_popcountl( offset ) ) + { + return; + } + } + + support_minimization_encodings[count][0] = onset; + support_minimization_encodings[count][1] = offset; + ++count; + return; + } + + /* move var in DCSET */ + if constexpr ( enable_dcset ) + { + generate_support_minimization_encodings_rec( onset, offset, var + 1, count ); + } + + /* move var in ONSET */ + onset |= 1 << var; + generate_support_minimization_encodings_rec( onset, offset, var + 1, count ); + onset &= ~( 1 << var ); + + /* move var in OFFSET */ + offset |= 1 << var; + generate_support_minimization_encodings_rec( onset, offset, var + 1, count ); + offset &= ~( 1 << var ); + } + + void solve_min_support_exact( std::vector const& isets, uint32_t free_set_size ) + { + std::vector matrix; + matrix.reserve( support_minimization_encodings.size() ); + best_bound_sets.clear(); + + /* create covering matrix */ + if ( !create_covering_matrix( isets, matrix, free_set_size, best_multiplicity > 4 ) ) + { + return; + } + + /* solve the covering problem */ + std::array solution = covering_solve_exact( matrix, 100, ps.max_iter ); + + /* check for failed decomposition */ + if ( solution[0] == UINT32_MAX ) + { + return; + } + + /* compute best bound sets */ + uint32_t num_luts = 1 + solution[4]; + uint32_t num_levels = 2; + uint32_t num_edges = free_set_size + solution[4]; + uint32_t isets_support = num_vars - free_set_size; + best_care_sets.clear(); + best_iset_onset.clear(); + best_iset_offset.clear(); + for ( uint32_t i = 0; i < solution[4]; ++i ) + { + STT tt; + STT care; + + const uint32_t onset = support_minimization_encodings[matrix[solution[i]].index][0]; + const uint32_t offset = support_minimization_encodings[matrix[solution[i]].index][1]; + for ( uint32_t j = 0; j < best_multiplicity; ++j ) + { + if ( ( ( onset >> j ) & 1 ) ) + { + tt |= isets[j]; + } + if ( ( ( offset >> j ) & 1 ) ) + { + care |= isets[j]; + } + } + + care |= tt; + num_edges += matrix[solution[i]].cost & ( ( 1 << isets_support ) - 1 ); + + best_bound_sets.push_back( tt ); + best_care_sets.push_back( care ); + best_iset_onset.push_back( onset ); + best_iset_offset.push_back( offset ); + } + + if ( pst != nullptr ) + { + pst->num_luts = num_luts; + pst->num_levels = num_levels; + pst->num_edges = num_edges; + } + } + + bool create_covering_matrix( std::vector const& isets, std::vector& matrix, uint32_t free_set_size, bool sort ) + { + assert( best_multiplicity < 12 ); + uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2; + uint64_t sol_existance = 0; + uint32_t iset_support = num_vars - free_set_size; + + /* insert dichotomies */ + for ( uint32_t i = 0; i < support_minimization_encodings.size(); ++i ) + { + uint32_t const onset = support_minimization_encodings[i][0]; + uint32_t const offset = support_minimization_encodings[i][1]; + + uint32_t ones_onset = __builtin_popcount( onset ); + uint32_t ones_offset = __builtin_popcount( offset ); + + /* filter columns that do not distinguish pairs */ + if ( ones_onset == 0 || ones_offset == 0 || ones_onset == best_multiplicity || ones_offset == best_multiplicity ) + { + continue; + } + + /* compute function and distinguishable seed dichotomies */ + uint64_t column = 0; + STT tt; + STT care; + uint32_t pair_pointer = 0; + for ( uint32_t j = 0; j < best_multiplicity; ++j ) + { + auto onset_shift = ( onset >> j ); + auto offset_shift = ( offset >> j ); + if ( ( onset_shift & 1 ) ) + { + tt |= isets[j]; + } + + if ( ( offset_shift & 1 ) ) + { + care |= isets[j]; + } + + /* compute included seed dichotomies */ + for ( uint32_t k = j + 1; k < best_multiplicity; ++k ) + { + /* if is are in diffent sets */ + if ( ( ( ( onset_shift & ( offset >> k ) ) | ( ( onset >> k ) & offset_shift ) ) & 1 ) ) + { + column |= UINT64_C( 1 ) << ( pair_pointer ); + } + + ++pair_pointer; + } + } + + care |= tt; + + /* compute cost */ + uint32_t cost = 0; + for ( uint32_t j = 0; j < iset_support; ++j ) + { + cost += has_var_support( tt, care, iset_support, j ) ? 1 : 0; + } + + /* discard solutions with support over LUT size */ + if ( cost > ps.lut_size ) + continue; + + if ( cost > 1 ) + { + cost |= 1 << iset_support; + } + + uint32_t sort_cost = cost + ( ( combinations - __builtin_popcountl( column ) ) << num_vars ); + + /* insert */ + matrix.emplace_back( encoding_matrix{ column, cost, i, sort_cost } ); + + sol_existance |= column; + } + + /* necessary condition for the existance of a solution */ + if ( __builtin_popcountl( sol_existance ) != combinations ) + { + return false; + } + + if ( !sort ) + { + return true; + } + + std::sort( matrix.begin(), matrix.end(), [&]( auto const& a, auto const& b ) { + return a.sort_cost < b.sort_cost; + } ); + + /* print */ + // if ( best_multiplicity < 6 ) + // { + // for ( uint32_t i = 0; i < columns.size(); ++i ) + // { + // std::cout << indexes[i] << " " << costs[i] << " \t" << columns[i] << "\n"; + // } + // } + + return true; + } + + template + std::array covering_solve_exact( std::vector& matrix, uint32_t max_iter = 100, int32_t limit = 2000 ) + { + /* last value of res contains the size of the bound set */ + std::array res = { UINT32_MAX }; + uint32_t best_cost = UINT32_MAX; + uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2; + bool looping = true; + + assert( best_multiplicity <= 16 ); + + /* determine the number of needed loops*/ + if ( best_multiplicity <= 4 ) + { + res[4] = 2; + for ( uint32_t i = 0; i < matrix.size() - 1; ++i ) + { + for ( uint32_t j = 1; j < matrix.size(); ++j ) + { + /* filter by cost */ + if ( matrix[i].cost + matrix[j].cost >= best_cost ) + continue; + + /* check validity */ + if ( __builtin_popcountl( matrix[i].column | matrix[j].column ) == combinations ) + { + res[0] = i; + res[1] = j; + best_cost = matrix[i].cost + matrix[j].cost; + } + } + } + } + else if ( best_multiplicity <= 8 ) + { + res[4] = 3; + for ( uint32_t i = 0; i < matrix.size() - 2 && looping; ++i ) + { + /* limit */ + if constexpr ( limit_iter ) + { + if ( limit <= 0 || ( best_cost < UINT32_MAX && max_iter == 0 ) ) + { + looping = false; + } + } + + for ( uint32_t j = 1; j < matrix.size() - 1 && looping; ++j ) + { + uint64_t current_columns = matrix[i].column | matrix[j].column; + uint32_t current_cost = matrix[i].cost + matrix[j].cost; + + /* limit */ + if constexpr ( limit_iter ) + { + if ( limit <= 0 || ( best_cost < UINT32_MAX && max_iter == 0 ) ) + { + looping = false; + } + } + + /* bound */ + if ( current_cost >= best_cost ) + { + continue; + } + + for ( uint32_t k = 2; k < matrix.size() && looping; ++k ) + { + /* limit */ + if constexpr ( limit_iter ) + { + if ( limit-- <= 0 || ( best_cost < UINT32_MAX && max_iter-- == 0 ) ) + { + looping = false; + } + } + + /* filter by cost */ + if ( current_cost + matrix[k].cost >= best_cost ) + continue; + + /* check validity */ + if ( __builtin_popcountl( current_columns | matrix[k].column ) == combinations ) + { + res[0] = i; + res[1] = j; + res[2] = k; + best_cost = current_cost + matrix[k].cost; + } + } + } + } + } + else + { + res[4] = 4; + for ( uint32_t i = 0; i < matrix.size() - 3 && looping; ++i ) + { + /* limit */ + if constexpr ( limit_iter ) + { + if ( limit <= 0 || ( best_cost < UINT32_MAX && max_iter == 0 ) ) + { + looping = false; + } + } + + for ( uint32_t j = 1; j < matrix.size() - 2 && looping; ++j ) + { + uint64_t current_columns0 = matrix[i].column | matrix[j].column; + uint32_t current_cost0 = matrix[i].cost + matrix[j].cost; + + /* limit */ + if constexpr ( limit_iter ) + { + if ( limit <= 0 || ( best_cost < UINT32_MAX && max_iter == 0 ) ) + { + looping = false; + } + } + + /* bound */ + if ( current_cost0 >= best_cost ) + { + continue; + } + + for ( uint32_t k = 2; k < matrix.size() - 1 && looping; ++k ) + { + uint64_t current_columns1 = current_columns0 | matrix[k].column; + uint32_t current_cost1 = current_cost0 + matrix[k].cost; + + /* limit */ + if constexpr ( limit_iter ) + { + if ( limit <= 0 || ( best_cost < UINT32_MAX && max_iter == 0 ) ) + { + looping = false; + } + } + + /* bound */ + if ( current_cost1 >= best_cost ) + { + continue; + } + + for ( uint32_t t = 3; t < matrix.size() && looping; ++t ) + { + /* limit */ + if constexpr ( limit_iter ) + { + if ( limit-- <= 0 || ( best_cost < UINT32_MAX && max_iter-- == 0 ) ) + { + looping = false; + } + } + + /* filter by cost */ + if ( current_cost1 + matrix[t].cost >= best_cost ) + continue; + + /* check validity */ + if ( __builtin_popcountl( current_columns1 | matrix[t].column ) == combinations ) + { + res[0] = i; + res[1] = j; + res[2] = k; + res[3] = t; + best_cost = current_cost1 + matrix[t].cost; + } + } + } + } + } + } + + return res; + } + + void adjust_truth_table_on_dc( STT& tt, STT& care, uint32_t var_index ) + { + assert( var_index < tt.num_vars() ); + assert( tt.num_vars() == care.num_vars() ); + + if ( tt.num_vars() <= 6 || var_index < 6 ) + { + auto it_tt = std::begin( tt._bits ); + auto it_care = std::begin( care._bits ); + while ( it_tt != std::end( tt._bits ) ) + { + uint64_t new_bits = *it_tt & *it_care; + *it_tt = ( ( new_bits | ( new_bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index] ) | + ( ( new_bits | ( new_bits << ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections[var_index] ); + *it_care = *it_care | ( *it_care >> ( uint64_t( 1 ) << var_index ) ); + + ++it_tt; + ++it_care; + } + return; + } + + const auto step = 1 << ( var_index - 6 ); + for ( auto i = 0u; i < static_cast( tt.num_blocks() ); i += 2 * step ) + { + for ( auto j = 0; j < step; ++j ) + { + tt._bits[i + j] = ( tt._bits[i + j] & care._bits[i + j] ) | ( tt._bits[i + j + step] & care._bits[i + j + step] ); + tt._bits[i + j + step] = tt._bits[i + j]; + care._bits[i + j] = care._bits[i + j] | care._bits[i + j + step]; + care._bits[i + j + step] = care._bits[i + j]; + } + } + } + + void local_extend_to( STT& tt, uint32_t real_num_vars ) + { + if ( real_num_vars < 6 ) + { + auto mask = *tt.begin(); + + for ( auto i = real_num_vars; i < num_vars; ++i ) + { + mask |= ( mask << ( 1 << i ) ); + } + + std::fill( tt.begin(), tt.end(), mask ); + } + else + { + uint32_t num_blocks = ( 1u << ( real_num_vars - 6 ) ); + auto it = tt.begin(); + while ( it != tt.end() ) + { + it = std::copy( tt.cbegin(), tt.cbegin() + num_blocks, it ); + } + } + } + + bool has_var_support( const STT& tt, const STT& care, uint32_t real_num_vars, uint8_t var_index ) + { + assert( var_index < real_num_vars ); + assert( real_num_vars <= tt.num_vars() ); + assert( tt.num_vars() == care.num_vars() ); + + const uint32_t num_blocks = real_num_vars <= 6 ? 1 : ( 1 << ( real_num_vars - 6 ) ); + if ( real_num_vars <= 6 || var_index < 6 ) + { + auto it_tt = std::begin( tt._bits ); + auto it_care = std::begin( care._bits ); + while ( it_tt != std::begin( tt._bits ) + num_blocks ) + { + if ( ( ( ( *it_tt >> ( uint64_t( 1 ) << var_index ) ) ^ *it_tt ) & kitty::detail::projections_neg[var_index] + & ( *it_care >> ( uint64_t( 1 ) << var_index ) ) & *it_care ) != 0 ) + { + return true; + } + ++it_tt; + ++it_care; + } + + return false; + } + + const auto step = 1 << ( var_index - 6 ); + for ( auto i = 0u; i < num_blocks; i += 2 * step ) + { + for ( auto j = 0; j < step; ++j ) + { + if ( ( ( tt._bits[i + j] ^ tt._bits[i + j + step] ) & care._bits[i + j] & care._bits[i + j + step] ) != 0 ) + { + return true; + } + } + } + + return false; + } + + void get_decomposition_abc( unsigned char *decompArray ) + { + unsigned char *pArray = decompArray; + unsigned char bytes = 2; + + /* write number of LUTs */ + pArray++; + *pArray = dec_result.size(); + pArray++; + + /* write LUTs */ + for ( ac_decomposition_result const& lut : dec_result ) + { + /* write fanin size*/ + *pArray = lut.support.size(); + pArray++; ++bytes; + + /* write support */ + for ( uint32_t i : lut.support ) + { + *pArray = (unsigned char) i; + pArray++; ++bytes; + } + + /* write truth table */ + uint32_t tt_num_bytes = ( lut.tt.num_vars() <= 3 ) ? 1 : ( 1 << ( lut.tt.num_vars() - 3 ) ); + tt_num_bytes = std::min( tt_num_bytes, 8u ); + for ( uint32_t i = 0; i < lut.tt.num_blocks(); ++i ) + { + for ( uint32_t j = 0; j < tt_num_bytes; ++j ) + { + *pArray = (unsigned char) ( ( lut.tt._bits[i] >> ( 8 * j ) ) & 0xFF ); + pArray++; ++bytes; + } + } + } + + /* write numBytes */ + *decompArray = bytes; + } + +private: + uint32_t best_multiplicity{ UINT32_MAX }; + uint32_t best_free_set{ UINT32_MAX }; + STT best_tt; + std::vector best_bound_sets; + std::vector best_care_sets; + std::vector best_free_set_tts; + std::vector best_iset_onset; + std::vector best_iset_offset; + std::vector dec_result; + + std::vector> support_minimization_encodings; + + TT tt_start; + uint32_t num_vars; + ac_decomposition_params const& ps; + ac_decomposition_stats* pst; + std::vector permutations; +}; + +} // namespace mockturtle + +#endif // _ACD_H_ \ No newline at end of file diff --git a/src/acd/ac_wrapper.cpp b/src/acd/ac_wrapper.cpp new file mode 100644 index 000000000..b7cee0dd7 --- /dev/null +++ b/src/acd/ac_wrapper.cpp @@ -0,0 +1,69 @@ +// #include "base/main/main.h" +#include "ac_wrapper.h" +#include "ac_decomposition.hpp" + +// ABC_NAMESPACE_IMPL_START + +int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost ) +{ + using namespace mockturtle; + + int num_blocks = ( nVars <= 6 ) ? 1 : ( 1 << ( nVars - 6 ) ); + + /* translate truth table into static table */ + kitty::dynamic_truth_table tt( nVars ); + for ( int i = 0; i < num_blocks; ++i ) + tt._bits[i] = pTruth[i]; + + ac_decomposition_params ps; + ps.lut_size = lutSize; + ac_decomposition_stats st; + + ac_decomposition_impl acd( tt, nVars, ps, &st ); + acd.run( *pdelay ); + int val = acd.compute_decomposition(); + + if ( val < 0 ) + { + *pdelay = 0; + return -1; + } + + *pdelay = acd.get_profile(); + *cost = st.num_luts; + + return 0; +} + +int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ) +{ + using namespace mockturtle; + + int num_blocks = ( nVars <= 6 ) ? 1 : ( 1 << ( nVars - 6 ) ); + + /* translate truth table into static table */ + kitty::dynamic_truth_table tt( nVars ); + for ( int i = 0; i < num_blocks; ++i ) + tt._bits[i] = pTruth[i]; + + ac_decomposition_params ps; + ps.lut_size = lutSize; + ac_decomposition_stats st; + + ac_decomposition_impl acd( tt, nVars, ps, &st ); + acd.run( *pdelay ); + int val = acd.compute_decomposition(); + + if ( val < 0 ) + { + *pdelay = 0; + return -1; + } + + *pdelay = acd.get_profile(); + + acd.get_decomposition( decomposition ); + return 0; +} + +// ABC_NAMESPACE_IMPL_END \ No newline at end of file diff --git a/src/acd/ac_wrapper.h b/src/acd/ac_wrapper.h new file mode 100644 index 000000000..522a60b86 --- /dev/null +++ b/src/acd/ac_wrapper.h @@ -0,0 +1,23 @@ +// #pragma once +#ifndef __ACD_WRAPPER_H_ +#define __ACD_WRAPPER_H_ + +// #include "base/main/main.h" +#include "misc/util/abc_global.h" + +// ABC_NAMESPACE_HEADER_START + +#ifdef __cplusplus +extern "C" { +#endif + +int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost ); +int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ); + +#ifdef __cplusplus +} +#endif + +// ABC_NAMESPACE_HEADER_END + +#endif \ No newline at end of file diff --git a/src/acd/kitty_algorithm.hpp b/src/acd/kitty_algorithm.hpp new file mode 100644 index 000000000..6460a802c --- /dev/null +++ b/src/acd/kitty_algorithm.hpp @@ -0,0 +1,119 @@ +#ifndef _KITTY_ALGORITHM_H_ +#define _KITTY_ALGORITHM_H_ +#pragma once + +#include +#include + +#include "kitty_constants.hpp" +#include "kitty_dynamic_tt.hpp" +#include "kitty_static_tt.hpp" + +namespace kitty +{ + +/*! \brief Perform bitwise unary operation on truth table + + \param tt Truth table + \param op Unary operation that takes as input a word (`uint64_t`) and returns a word + + \return new constructed truth table of same type and dimensions + */ +template +auto unary_operation( const TT& tt, Fn&& op ) +{ + auto result = tt.construct(); + std::transform( tt.cbegin(), tt.cend(), result.begin(), op ); + result.mask_bits(); + return result; +} + +/*! \brief Perform bitwise binary operation on two truth tables + + The dimensions of `first` and `second` must match. This is ensured + at compile-time for static truth tables, but at run-time for dynamic + truth tables. + + \param first First truth table + \param second Second truth table + \param op Binary operation that takes as input two words (`uint64_t`) and returns a word + + \return new constructed truth table of same type and dimensions + */ +template +auto binary_operation( const TT& first, const TT& second, Fn&& op ) +{ + assert( first.num_vars() == second.num_vars() ); + + auto result = first.construct(); + std::transform( first.cbegin(), first.cend(), second.cbegin(), result.begin(), op ); + result.mask_bits(); + return result; +} + +/*! \brief Computes a predicate based on two truth tables + + The dimensions of `first` and `second` must match. This is ensured + at compile-time for static truth tables, but at run-time for dynamic + truth tables. + + \param first First truth table + \param second Second truth table + \param op Binary operation that takes as input two words (`uint64_t`) and returns a Boolean + + \return true or false based on the predicate + */ +template +bool binary_predicate( const TT& first, const TT& second, Fn&& op ) +{ + assert( first.num_vars() == second.num_vars() ); + + return std::equal( first.begin(), first.end(), second.begin(), op ); +} + +/*! \brief Assign computed values to bits + + The functor `op` computes bits which are assigned to the bits of the + truth table. + + \param tt Truth table + \param op Unary operation that takes no input and returns a word (`uint64_t`) +*/ +template +void assign_operation( TT& tt, Fn&& op ) +{ + std::generate( tt.begin(), tt.end(), op ); + tt.mask_bits(); +} + +/*! \brief Iterates through each block of a truth table + + The functor `op` is called for every block of the truth table. + + \param tt Truth table + \param op Unary operation that takes as input a word (`uint64_t`) and returns void +*/ +template +void for_each_block( const TT& tt, Fn&& op ) +{ + std::for_each( tt.cbegin(), tt.cend(), op ); +} + +/*! \brief Iterates through each block of a truth table in reverse + order + + The functor `op` is called for every block of the truth table in + reverse order. + + \param tt Truth table + \param op Unary operation that takes as input a word (`uint64_t`) and returns void +*/ +template +void for_each_block_reversed( const TT& tt, Fn&& op ) +{ + std::for_each( tt.crbegin(), tt.crend(), op ); +} + +} // namespace kitty + +#endif // _KITTY_ALGORITHM_H_ \ No newline at end of file diff --git a/src/acd/kitty_constants.hpp b/src/acd/kitty_constants.hpp new file mode 100644 index 000000000..55cfcd650 --- /dev/null +++ b/src/acd/kitty_constants.hpp @@ -0,0 +1,91 @@ +#ifndef _KITTY_CONSTANTS_H_ +#define _KITTY_CONSTANTS_H_ +#pragma once + +#include +#include + +namespace kitty +{ + +namespace detail +{ + +static constexpr uint64_t projections[] = { + UINT64_C( 0xaaaaaaaaaaaaaaaa ), + UINT64_C( 0xcccccccccccccccc ), + UINT64_C( 0xf0f0f0f0f0f0f0f0 ), + UINT64_C( 0xff00ff00ff00ff00 ), + UINT64_C( 0xffff0000ffff0000 ), + UINT64_C( 0xffffffff00000000 ) }; + +static constexpr uint64_t projections_neg[] = { + UINT64_C( 0x5555555555555555 ), + UINT64_C( 0x3333333333333333 ), + UINT64_C( 0x0f0f0f0f0f0f0f0f ), + UINT64_C( 0x00ff00ff00ff00ff ), + UINT64_C( 0x0000ffff0000ffff ), + UINT64_C( 0x00000000ffffffff ) }; + +static constexpr uint64_t masks[] = { + UINT64_C( 0x0000000000000001 ), + UINT64_C( 0x0000000000000003 ), + UINT64_C( 0x000000000000000f ), + UINT64_C( 0x00000000000000ff ), + UINT64_C( 0x000000000000ffff ), + UINT64_C( 0x00000000ffffffff ), + UINT64_C( 0xffffffffffffffff ) }; + +static constexpr uint64_t permutation_masks[][3] = { + { UINT64_C( 0x9999999999999999 ), UINT64_C( 0x2222222222222222 ), UINT64_C( 0x4444444444444444 ) }, + { UINT64_C( 0xc3c3c3c3c3c3c3c3 ), UINT64_C( 0x0c0c0c0c0c0c0c0c ), UINT64_C( 0x3030303030303030 ) }, + { UINT64_C( 0xf00ff00ff00ff00f ), UINT64_C( 0x00f000f000f000f0 ), UINT64_C( 0x0f000f000f000f00 ) }, + { UINT64_C( 0xff0000ffff0000ff ), UINT64_C( 0x0000ff000000ff00 ), UINT64_C( 0x00ff000000ff0000 ) }, + { UINT64_C( 0xffff00000000ffff ), UINT64_C( 0x00000000ffff0000 ), UINT64_C( 0x0000ffff00000000 ) } }; + +static constexpr uint64_t ppermutation_masks[][6][3] = { + { { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x9999999999999999 ), UINT64_C( 0x2222222222222222 ), UINT64_C( 0x4444444444444444 ) }, + { UINT64_C( 0xa5a5a5a5a5a5a5a5 ), UINT64_C( 0x0a0a0a0a0a0a0a0a ), UINT64_C( 0x5050505050505050 ) }, + { UINT64_C( 0xaa55aa55aa55aa55 ), UINT64_C( 0x00aa00aa00aa00aa ), UINT64_C( 0x5500550055005500 ) }, + { UINT64_C( 0xaaaa5555aaaa5555 ), UINT64_C( 0x0000aaaa0000aaaa ), UINT64_C( 0x5555000055550000 ) }, + { UINT64_C( 0xaaaaaaaa55555555 ), UINT64_C( 0x00000000aaaaaaaa ), UINT64_C( 0x5555555500000000 ) } }, + { { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0xc3c3c3c3c3c3c3c3 ), UINT64_C( 0x0c0c0c0c0c0c0c0c ), UINT64_C( 0x3030303030303030 ) }, + { UINT64_C( 0xcc33cc33cc33cc33 ), UINT64_C( 0x00cc00cc00cc00cc ), UINT64_C( 0x3300330033003300 ) }, + { UINT64_C( 0xcccc3333cccc3333 ), UINT64_C( 0x0000cccc0000cccc ), UINT64_C( 0x3333000033330000 ) }, + { UINT64_C( 0xcccccccc33333333 ), UINT64_C( 0x00000000cccccccc ), UINT64_C( 0x3333333300000000 ) } }, + { { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0xf00ff00ff00ff00f ), UINT64_C( 0x00f000f000f000f0 ), UINT64_C( 0x0f000f000f000f00 ) }, + { UINT64_C( 0xf0f00f0ff0f00f0f ), UINT64_C( 0x0000f0f00000f0f0 ), UINT64_C( 0x0f0f00000f0f0000 ) }, + { UINT64_C( 0xf0f0f0f00f0f0f0f ), UINT64_C( 0x00000000f0f0f0f0 ), UINT64_C( 0x0f0f0f0f00000000 ) } }, + { { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0xff0000ffff0000ff ), UINT64_C( 0x0000ff000000ff00 ), UINT64_C( 0x00ff000000ff0000 ) }, + { UINT64_C( 0xff00ff0000ff00ff ), UINT64_C( 0x00000000ff00ff00 ), UINT64_C( 0x00ff00ff00000000 ) } }, + { { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0xffff00000000ffff ), UINT64_C( 0x00000000ffff0000 ), UINT64_C( 0x0000ffff00000000 ) } } }; + +static constexpr int32_t hex_to_int[] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; + +} // namespace detail + +} // namespace kitty + +#endif //_KITTY_CONSTANTS_H_ \ No newline at end of file diff --git a/src/acd/kitty_constructors.hpp b/src/acd/kitty_constructors.hpp new file mode 100644 index 000000000..43408b8cc --- /dev/null +++ b/src/acd/kitty_constructors.hpp @@ -0,0 +1,92 @@ +#ifndef _KITTY_CONSTRUCT_TT_H_ +#define _KITTY_CONSTRUCT_TT_H_ +#pragma once + +#include +#include +#include +#include +#include + +#include "kitty_constants.hpp" +#include "kitty_dynamic_tt.hpp" +#include "kitty_static_tt.hpp" + +namespace kitty +{ + +/*! \brief Creates truth table with number of variables + + If some truth table instance is given, one can create a truth table with the + same type by calling the `construct()` method on it. This function helps if + only the number of variables is known and the base type and uniforms the + creation of static and dynamic truth tables. Note, however, that for static + truth tables `num_vars` must be consistent to the number of variables in the + truth table type. + + \param num_vars Number of variables +*/ +template +inline TT create( unsigned num_vars ) +{ + (void)num_vars; + TT tt; + assert( tt.num_vars() == num_vars ); + return tt; +} + +/*! \cond PRIVATE */ +template<> +inline dynamic_truth_table create( unsigned num_vars ) +{ + return dynamic_truth_table( num_vars ); +} +/*! \endcond */ + +/*! \brief Constructs projections (single-variable functions) + + \param tt Truth table + \param var_index Index of the variable, must be smaller than the truth table's number of variables + \param complement If true, realize inverse projection +*/ +template +void create_nth_var( TT& tt, uint8_t var_index, bool complement = false ) +{ + if ( tt.num_vars() <= 6 ) + { + /* assign from precomputed table */ + tt._bits[0] = complement ? ~detail::projections[var_index] : detail::projections[var_index]; + + /* mask if truth table does not require all bits */ + tt.mask_bits(); + return; + } + + if ( var_index < 6 ) + { + std::fill( std::begin( tt._bits ), std::end( tt._bits ), complement ? ~detail::projections[var_index] : detail::projections[var_index] ); + } + else + { + const auto c = 1 << ( var_index - 6 ); + const auto zero = uint64_t( 0 ); + const auto one = ~zero; + auto block = uint64_t( 0u ); + + while ( block < tt.num_blocks() ) + { + for ( auto i = 0; i < c; ++i ) + { + tt._bits[block++] = complement ? one : zero; + } + for ( auto i = 0; i < c; ++i ) + { + tt._bits[block++] = complement ? zero : one; + } + } + } +} + +} // namespace kitty + +#endif // _KITTY_CONSTRUCT_TT_H_ \ No newline at end of file diff --git a/src/acd/kitty_dynamic_tt.hpp b/src/acd/kitty_dynamic_tt.hpp new file mode 100644 index 000000000..f3ef0c7d9 --- /dev/null +++ b/src/acd/kitty_dynamic_tt.hpp @@ -0,0 +1,147 @@ +#ifndef _KITTY_DYNAMIC_TT_H_ +#define _KITTY_DYNAMIC_TT_H_ +#pragma once + +#include +#include +#include + +#include "kitty_constants.hpp" + +namespace kitty +{ + +/*! Truth table in which number of variables is known at runtime. + */ +struct dynamic_truth_table +{ + /*! Standard constructor. + + The number of variables provided to the truth table can be + computed at runtime. However, once the truth table is constructed + its number of variables cannot change anymore. + + The constructor computes the number of blocks and resizes the + vector accordingly. + + \param num_vars Number of variables + */ + explicit dynamic_truth_table( uint32_t num_vars ) + : _bits( ( num_vars <= 6 ) ? 1u : ( 1u << ( num_vars - 6 ) ) ), + _num_vars( num_vars ) + { + } + + /*! Empty constructor. + + Creates an empty truth table. It has 0 variables, but no bits, i.e., it is + different from a truth table for the constant function. This constructor is + only used for convenience, if algorithms require the existence of default + constructable classes. + */ + dynamic_truth_table() : _num_vars( 0 ) {} + + /*! Constructs a new dynamic truth table instance with the same number of variables. */ + inline dynamic_truth_table construct() const + { + return dynamic_truth_table( _num_vars ); + } + + /*! Returns number of variables. + */ + inline auto num_vars() const noexcept { return _num_vars; } + + /*! Returns number of blocks. + */ + inline auto num_blocks() const noexcept { return _bits.size(); } + + /*! Returns number of bits. + */ + inline auto num_bits() const noexcept { return uint64_t( 1 ) << _num_vars; } + + /*! \brief Begin iterator to bits. + */ + inline auto begin() noexcept { return _bits.begin(); } + + /*! \brief End iterator to bits. + */ + inline auto end() noexcept { return _bits.end(); } + + /*! \brief Begin iterator to bits. + */ + inline auto begin() const noexcept { return _bits.begin(); } + + /*! \brief End iterator to bits. + */ + inline auto end() const noexcept { return _bits.end(); } + + /*! \brief Reverse begin iterator to bits. + */ + inline auto rbegin() noexcept { return _bits.rbegin(); } + + /*! \brief Reverse end iterator to bits. + */ + inline auto rend() noexcept { return _bits.rend(); } + + /*! \brief Constant begin iterator to bits. + */ + inline auto cbegin() const noexcept { return _bits.cbegin(); } + + /*! \brief Constant end iterator to bits. + */ + inline auto cend() const noexcept { return _bits.cend(); } + + /*! \brief Constant reverse begin iterator to bits. + */ + inline auto crbegin() const noexcept { return _bits.crbegin(); } + + /*! \brief Constant teverse end iterator to bits. + */ + inline auto crend() const noexcept { return _bits.crend(); } + + /*! \brief Assign other truth table. + + This replaces the current truth table with another truth table. The truth + table type has to be complete. The vector of bits is resized accordingly. + + \param other Other truth table + */ + template + dynamic_truth_table& operator=( const TT& other ) + { + _bits.resize( other.num_blocks() ); + std::copy( other.begin(), other.end(), begin() ); + _num_vars = other.num_vars(); + + if ( _num_vars < 6 ) + { + mask_bits(); + } + + return *this; + } + + /*! Masks the number of valid truth table bits. + + If the truth table has less than 6 variables, it may not use all + the bits. This operation makes sure to zero out all non-valid + bits. + */ + inline void mask_bits() noexcept + { + if ( _num_vars < 6 ) + { + _bits[0u] &= detail::masks[_num_vars]; + } + } + + /*! \cond PRIVATE */ +public: /* fields */ + std::vector _bits; + uint32_t _num_vars; + /*! \endcond */ +}; + +} //namespace kitty + +#endif // _KITTY_DYNAMIC_TT_H_ \ No newline at end of file diff --git a/src/acd/kitty_operations.hpp b/src/acd/kitty_operations.hpp new file mode 100644 index 000000000..fb504489a --- /dev/null +++ b/src/acd/kitty_operations.hpp @@ -0,0 +1,333 @@ +#ifndef _KITTY_OPERATIONS_TT_H_ +#define _KITTY_OPERATIONS_TT_H_ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "kitty_algorithm.hpp" +#include "kitty_constants.hpp" +#include "kitty_dynamic_tt.hpp" +#include "kitty_static_tt.hpp" + +namespace kitty +{ + +/*! Inverts all bits in a truth table, based on a condition */ +template +inline TT unary_not_if( const TT& tt, bool cond ) +{ +#ifdef _MSC_VER +#pragma warning( push ) +#pragma warning( disable : 4146 ) +#endif + const auto mask = -static_cast( cond ); +#ifdef _MSC_VER +#pragma warning( pop ) +#endif + return unary_operation( tt, [mask]( auto a ) + { return a ^ mask; } ); +} + +/*! \brief Inverts all bits in a truth table */ +template +inline TT unary_not( const TT& tt ) +{ + return unary_operation( tt, []( auto a ) + { return ~a; } ); +} + +/*! \brief Bitwise AND of two truth tables */ +template + +inline TT binary_and( const TT& first, const TT& second ) +{ + return binary_operation( first, second, std::bit_and<>() ); +} + +/*! \brief Bitwise OR of two truth tables */ +template +inline TT binary_or( const TT& first, const TT& second ) +{ + return binary_operation( first, second, std::bit_or<>() ); +} + +/*! \brief Swaps two variables in a truth table + + The function swaps variable `var_index1` with `var_index2`. The + function will change `tt` in-place. If `tt` should not be changed, + one can use `swap` instead. + + \param tt Truth table + \param var_index1 First variable + \param var_index2 Second variable +*/ +template +void swap_inplace( TT& tt, uint8_t var_index1, uint8_t var_index2 ) +{ + if ( var_index1 == var_index2 ) + { + return; + } + + if ( var_index1 > var_index2 ) + { + std::swap( var_index1, var_index2 ); + } + + if ( tt.num_vars() <= 6 ) + { + const auto& pmask = detail::ppermutation_masks[var_index1][var_index2]; + const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 ); + tt._bits[0] = ( tt._bits[0] & pmask[0] ) | ( ( tt._bits[0] & pmask[1] ) << shift ) | ( ( tt._bits[0] & pmask[2] ) >> shift ); + } + else if ( var_index2 <= 5 ) + { + const auto& pmask = detail::ppermutation_masks[var_index1][var_index2]; + const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 ); + std::transform( std::begin( tt._bits ), std::end( tt._bits ), std::begin( tt._bits ), + [shift, &pmask]( uint64_t word ) + { + return ( word & pmask[0] ) | ( ( word & pmask[1] ) << shift ) | ( ( word & pmask[2] ) >> shift ); + } ); + } + else if ( var_index1 <= 5 ) /* in this case, var_index2 > 5 */ + { + const auto step = 1 << ( var_index2 - 6 ); + const auto shift = 1 << var_index1; + auto it = std::begin( tt._bits ); + while ( it != std::end( tt._bits ) ) + { + for ( auto i = decltype( step ){ 0 }; i < step; ++i ) + { + const auto low_to_high = ( *( it + i ) & detail::projections[var_index1] ) >> shift; + const auto high_to_low = ( *( it + i + step ) << shift ) & detail::projections[var_index1]; + *( it + i ) = ( *( it + i ) & ~detail::projections[var_index1] ) | high_to_low; + *( it + i + step ) = ( *( it + i + step ) & detail::projections[var_index1] ) | low_to_high; + } + it += 2 * step; + } + } + else + { + const auto step1 = 1 << ( var_index1 - 6 ); + const auto step2 = 1 << ( var_index2 - 6 ); + auto it = std::begin( tt._bits ); + while ( it != std::end( tt._bits ) ) + { + for ( auto i = 0; i < step2; i += 2 * step1 ) + { + for ( auto j = 0; j < step1; ++j ) + { + std::swap( *( it + i + j + step1 ), *( it + i + j + step2 ) ); + } + } + it += 2 * step2; + } + } +} + +/*! \brief Extends smaller truth table to larger one + + The most significant variables will not be in the functional support of the + resulting truth table, but the method is helpful to align a truth table when + being used with another one. + + \param tt Larger truth table to create + \param from Smaller truth table to copy from +*/ +template +void extend_to_inplace( TT& tt, const TTFrom& from ) +{ + assert( tt.num_vars() >= from.num_vars() ); + + if ( from.num_vars() < 6 ) + { + auto mask = *from.begin(); + + for ( auto i = from.num_vars(); i < std::min( 6, tt.num_vars() ); ++i ) + { + mask |= ( mask << ( 1 << i ) ); + } + + std::fill( tt.begin(), tt.end(), mask ); + } + else + { + auto it = tt.begin(); + while ( it != tt.end() ) + { + it = std::copy( from.cbegin(), from.cend(), it ); + } + } +} + +/*! \brief Extends smaller truth table to larger static one + + This is an out-of-place version of `extend_to_inplace` that has the truth + table as a return value. It only works for creating static truth tables. The + template parameter `NumVars` must be equal or larger to the number of + variables in `from`. + + \param from Smaller truth table to copy from +*/ +template +inline static_truth_table extend_to( const TTFrom& from ) +{ + static_truth_table tt; + extend_to_inplace( tt, from ); + return tt; +} + +/*! \brief Checks whether truth table depends on given variable index + + \param tt Truth table + \param var_index Variable index +*/ +template +bool has_var( const TT& tt, uint8_t var_index ) +{ + assert( var_index < tt.num_vars() ); + + if ( tt.num_vars() <= 6 || var_index < 6 ) + { + return std::any_of( std::begin( tt._bits ), std::end( tt._bits ), + [var_index]( uint64_t word ) + { return ( ( word >> ( uint64_t( 1 ) << var_index ) ) & detail::projections_neg[var_index] ) != + ( word & detail::projections_neg[var_index] ); } ); + } + + const auto step = 1 << ( var_index - 6 ); + for ( auto i = 0u; i < static_cast( tt.num_blocks() ); i += 2 * step ) + { + for ( auto j = 0; j < step; ++j ) + { + if ( tt._bits[i + j] != tt._bits[i + j + step] ) + { + return true; + } + } + } + return false; +} + +/*! \brief Checks whether truth table depends on given variable index + + \param tt Truth table + \param care Care set + \param var_index Variable index +*/ +template +bool has_var( const TT& tt, const TT& care, uint8_t var_index ) +{ + assert( var_index < tt.num_vars() ); + assert( tt.num_vars() == care.num_vars() ); + + if ( tt.num_vars() <= 6 || var_index < 6 ) + { + auto it_tt = std::begin( tt._bits ); + auto it_care = std::begin( care._bits ); + while ( it_tt != std::end( tt._bits ) ) + { + if ( ( ( ( *it_tt >> ( uint64_t( 1 ) << var_index ) ) ^ *it_tt ) & detail::projections_neg[var_index] + & ( *it_care >> ( uint64_t( 1 ) << var_index ) ) & *it_care ) != 0 ) + { + return true; + } + ++it_tt; + ++it_care; + } + + return false; + } + + const auto step = 1 << ( var_index - 6 ); + for ( auto i = 0u; i < static_cast( tt.num_blocks() ); i += 2 * step ) + { + for ( auto j = 0; j < step; ++j ) + { + if ( ( ( tt._bits[i + j] ^ tt._bits[i + j + step] ) & care._bits[i + j] & care._bits[i + j + step] ) != 0 ) + { + return true; + } + } + } + return false; +} + +/*! \brief Shrinks larger truth table to smaller one + + The function expects that the most significant bits, which are cut off, are + not in the functional support of the original function. Only then it is + ensured that the resulting function is equivalent. + + \param tt Smaller truth table to create + \param from Larger truth table to copy from +*/ +template +void shrink_to_inplace( TT& tt, const TTFrom& from ) +{ + assert( tt.num_vars() <= from.num_vars() ); + + std::copy( from.begin(), from.begin() + tt.num_blocks(), tt.begin() ); + + if ( tt.num_vars() < 6 ) + { + tt.mask_bits(); + } +} + +/*! \brief Shrinks larger truth table to smaller dynamic one + + This is an out-of-place version of `shrink_to` that has the truth table as a + return value. It only works for creating dynamic tables. The parameter + `num_vars` must be equal or smaller to the number of variables in `from`. + + \param from Smaller truth table to copy from +*/ +template +inline dynamic_truth_table shrink_to( const TTFrom& from, unsigned num_vars ) +{ + auto tt = create( num_vars ); + shrink_to_inplace( tt, from ); + return tt; +} + +/*! \brief Prints truth table in hexadecimal representation + + The most-significant bit will be the first character of the string. + + \param tt Truth table + \param os Output stream +*/ +template +void print_hex( const TT& tt, std::ostream& os = std::cout ) +{ + auto const chunk_size = + std::min( tt.num_vars() <= 1 ? 1 : ( tt.num_bits() >> 2 ), 16 ); + + for_each_block_reversed( tt, [&os, chunk_size]( auto word ) + { + std::string chunk( chunk_size, '0' ); + + auto it = chunk.rbegin(); + while (word && it != chunk.rend()) { + auto hex = word & 0xf; + if (hex < 10) { + *it = '0' + static_cast(hex); + } else { + *it = 'a' + static_cast(hex - 10); + } + ++it; + word >>= 4; + } + os << chunk; } ); +} + +} //namespace kitty + +#endif // _KITTY_OPERATIONS_TT_H_ \ No newline at end of file diff --git a/src/acd/kitty_operators.hpp b/src/acd/kitty_operators.hpp new file mode 100644 index 000000000..cf973ebe0 --- /dev/null +++ b/src/acd/kitty_operators.hpp @@ -0,0 +1,86 @@ +#ifndef _KITTY_OPERATORS_TT_H_ +#define _KITTY_OPERATORS_TT_H_ +#pragma once + +#include +#include +#include +#include +#include + +#include "kitty_constants.hpp" +#include "kitty_dynamic_tt.hpp" +#include "kitty_static_tt.hpp" +#include "kitty_operations.hpp" + +namespace kitty +{ + +/*! \brief Operator for unary_not */ +inline dynamic_truth_table operator~( const dynamic_truth_table& tt ) +{ + return unary_not( tt ); +} + +/*! \brief Operator for unary_not */ +template +inline static_truth_table operator~( const static_truth_table& tt ) +{ + return unary_not( tt ); +} + +/*! \brief Operator for binary_and */ +inline dynamic_truth_table operator&( const dynamic_truth_table& first, const dynamic_truth_table& second ) +{ + return binary_and( first, second ); +} + +/*! \brief Operator for binary_and */ +template +inline static_truth_table operator&( const static_truth_table& first, const static_truth_table& second ) +{ + return binary_and( first, second ); +} + +/*! \brief Operator for binary_and and assign */ +inline void operator&=( dynamic_truth_table& first, const dynamic_truth_table& second ) +{ + first = binary_and( first, second ); +} + +/*! \brief Operator for binary_and and assign */ +template +inline void operator&=( static_truth_table& first, const static_truth_table& second ) +{ + first = binary_and( first, second ); +} + +/*! \brief Operator for binary_or */ +inline dynamic_truth_table operator|( const dynamic_truth_table& first, const dynamic_truth_table& second ) +{ + return binary_or( first, second ); +} + +/*! \brief Operator for binary_or */ +template +inline static_truth_table operator|( const static_truth_table& first, const static_truth_table& second ) +{ + return binary_or( first, second ); +} + +/*! \brief Operator for binary_or and assign */ +inline void operator|=( dynamic_truth_table& first, const dynamic_truth_table& second ) +{ + first = binary_or( first, second ); +} + +/*! \brief Operator for binary_or and assign */ +template +inline void operator|=( static_truth_table& first, const static_truth_table& second ) +{ + first = binary_or( first, second ); +} + +} // namespace kitty + +#endif // _KITTY_OPERATORS_TT_H_ \ No newline at end of file diff --git a/src/acd/kitty_static_tt.hpp b/src/acd/kitty_static_tt.hpp new file mode 100644 index 000000000..61593f3ff --- /dev/null +++ b/src/acd/kitty_static_tt.hpp @@ -0,0 +1,131 @@ +#ifndef _KITTY_STATIC_TT_H_ +#define _KITTY_STATIC_TT_H_ +#pragma once + +#include +#include + +#include "kitty_constants.hpp" + +namespace kitty +{ + +template +struct static_truth_table +{ + /*! \cond PRIVATE */ + enum + { + NumBlocks = ( NumVars <= 6 ) ? 1u : ( 1u << ( NumVars - 6 ) ) + }; + + enum + { + NumBits = uint64_t( 1 ) << NumVars + }; + /*! \endcond */ + + /*! Standard constructor. + + The number of variables provided to the truth table must be known + at runtime. The number of blocks will be computed as a compile + time constant. + */ + static_truth_table() + { + _bits.fill( 0 ); + } + + /*! Constructs a new static truth table instance with the same number of variables. */ + inline static_truth_table construct() const + { + return static_truth_table(); + } + + /*! Returns number of variables. + */ + inline auto num_vars() const noexcept { return NumVars; } + + /*! Returns number of blocks. + */ + inline auto num_blocks() const noexcept { return NumBlocks; } + + /*! Returns number of bits. + */ + inline auto num_bits() const noexcept { return NumBits; } + + /*! \brief Begin iterator to bits. + */ + inline auto begin() noexcept { return _bits.begin(); } + + /*! \brief End iterator to bits. + */ + inline auto end() noexcept { return _bits.end(); } + + /*! \brief Begin iterator to bits. + */ + inline auto begin() const noexcept { return _bits.begin(); } + + /*! \brief End iterator to bits. + */ + inline auto end() const noexcept { return _bits.end(); } + + /*! \brief Reverse begin iterator to bits. + */ + inline auto rbegin() noexcept { return _bits.rbegin(); } + + /*! \brief Reverse end iterator to bits. + */ + inline auto rend() noexcept { return _bits.rend(); } + + /*! \brief Constant begin iterator to bits. + */ + inline auto cbegin() const noexcept { return _bits.cbegin(); } + + /*! \brief Constant end iterator to bits. + */ + inline auto cend() const noexcept { return _bits.cend(); } + + /*! \brief Constant reverse begin iterator to bits. + */ + inline auto crbegin() const noexcept { return _bits.crbegin(); } + + /*! \brief Constant teverse end iterator to bits. + */ + inline auto crend() const noexcept { return _bits.crend(); } + + /*! \brief Assign other truth table if number of variables match. + + This replaces the current truth table with another truth table, if `other` + has the same number of variables. Otherwise, the truth table is not + changed. + + \param other Other truth table + */ + template + static_truth_table& operator=( const TT& other ) + { + if ( other.num_bits() == num_bits() ) + { + std::copy( other.begin(), other.end(), begin() ); + } + + return *this; + } + + /*! Masks the number of valid truth table bits. + + We know that we will have at least 7 variables in this data + structure. + */ + inline void mask_bits() noexcept {} + + /*! \cond PRIVATE */ +public: /* fields */ + std::array _bits; + /*! \endcond */ +}; + +} //namespace kitty + +#endif // _KITTY_STATIC_TT_H_ \ No newline at end of file diff --git a/src/acd/module.make b/src/acd/module.make new file mode 100644 index 000000000..b245d2c42 --- /dev/null +++ b/src/acd/module.make @@ -0,0 +1 @@ +SRC += src/acd/ac_wrapper.cpp diff --git a/src/base/abci/abc.c b/src/base/abci/abc.c index c8e2b1ef8..33b85e0bf 100644 --- a/src/base/abci/abc.c +++ b/src/base/abci/abc.c @@ -19447,7 +19447,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) If_ManSetDefaultPars( pPars ); pPars->pLutLib = (If_LibLut_t *)Abc_FrameReadLibLut(); Extra_UtilGetoptReset(); - while ( ( c = Extra_UtilGetopt( argc, argv, "KCFAGRNTXYDEWSqaflepmrsdbgxyzuojiktncvh" ) ) != EOF ) + while ( ( c = Extra_UtilGetopt( argc, argv, "KCFAGRNTXYDEWSqaflepmrsdbgxyuojiktnczvh" ) ) != EOF ) { switch ( c ) { @@ -19652,9 +19652,6 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) case 'y': pPars->fUserRecLib ^= 1; break; - case 'z': - pPars->fUserLutDec ^= 1; - break; case 'u': pPars->fUserSesLib ^= 1; break; @@ -19679,6 +19676,9 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) case 'c': pPars->fUseTtPerm ^= 1; break; + case 'z': + pPars->fAcd ^= 1; + break; case 'v': pPars->fVerbose ^= 1; break; @@ -19810,7 +19810,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) pPars->pLutLib = NULL; } // modify for delay optimization - if ( pPars->fDelayOpt || pPars->fDsdBalance || pPars->fDelayOptLut || pPars->fUserLutDec ) + if ( pPars->fDelayOpt || pPars->fDsdBalance || pPars->fDelayOptLut || pPars->fAcd ) { pPars->fTruth = 1; pPars->fCutMin = 1; @@ -19956,7 +19956,7 @@ usage: sprintf(LutSize, "library" ); else sprintf(LutSize, "%d", pPars->nLutSize ); - Abc_Print( -2, "usage: if [-KCFAGRNTXY num] [-DEW float] [-S str] [-qarlepmsdbgxyzuojiktncvh]\n" ); + Abc_Print( -2, "usage: if [-KCFAGRNTXY num] [-DEW float] [-S str] [-qarlepmsdbgxyuojiktncvh]\n" ); Abc_Print( -2, "\t performs FPGA technology mapping of the network\n" ); Abc_Print( -2, "\t-K num : the number of LUT inputs (2 < num < %d) [default = %s]\n", IF_MAX_LUTSIZE+1, LutSize ); Abc_Print( -2, "\t-C num : the max number of priority cuts (0 < num < 2^12) [default = %d]\n", pPars->nCutsMax ); @@ -19985,7 +19985,6 @@ usage: Abc_Print( -2, "\t-g : toggles delay optimization by SOP balancing [default = %s]\n", pPars->fDelayOpt? "yes": "no" ); Abc_Print( -2, "\t-x : toggles delay optimization by DSD balancing [default = %s]\n", pPars->fDsdBalance? "yes": "no" ); Abc_Print( -2, "\t-y : toggles delay optimization with recorded library [default = %s]\n", pPars->fUserRecLib? "yes": "no" ); - Abc_Print( -2, "\t-z : toggles delay optimization with LUT decomposition [default = %s]\n", pPars->fUserLutDec? "yes": "no" ); Abc_Print( -2, "\t-u : toggles delay optimization with SAT-based library [default = %s]\n", pPars->fUserSesLib? "yes": "no" ); Abc_Print( -2, "\t-o : toggles using buffers to decouple combinational outputs [default = %s]\n", pPars->fUseBuffs? "yes": "no" ); Abc_Print( -2, "\t-j : toggles enabling additional check [default = %s]\n", pPars->fEnableCheck07? "yes": "no" ); @@ -19994,6 +19993,7 @@ usage: Abc_Print( -2, "\t-t : toggles optimizing average rather than maximum level [default = %s]\n", pPars->fDoAverage? "yes": "no" ); Abc_Print( -2, "\t-n : toggles computing DSDs of the cut functions [default = %s]\n", pPars->fUseDsd? "yes": "no" ); Abc_Print( -2, "\t-c : toggles computing truth tables in a new way [default = %s]\n", pPars->fUseTtPerm? "yes": "no" ); + Abc_Print( -2, "\t-z : toggles using ACD decomposition [default = %s]\n", pPars->fAcd? "yes": "no" ); Abc_Print( -2, "\t-v : toggles verbose output [default = %s]\n", pPars->fVerbose? "yes": "no" ); Abc_Print( -2, "\t-h : prints the command usage\n"); return 1; diff --git a/src/base/abci/abcIf.c b/src/base/abci/abcIf.c index e92a2282e..079cd0066 100644 --- a/src/base/abci/abcIf.c +++ b/src/base/abci/abcIf.c @@ -116,7 +116,7 @@ Abc_Ntk_t * Abc_NtkIf( Abc_Ntk_t * pNtk, If_Par_t * pPars ) pPars->pTimesReq = Abc_NtkGetCoRequiredFloats(pNtk); // update timing info to reflect logic level - if ( (pPars->fDelayOpt || pPars->fDsdBalance || pPars->fUserRecLib || pPars->fUserSesLib || pPars->fUserLutDec) && pNtk->pManTime ) + if ( (pPars->fDelayOpt || pPars->fDsdBalance || pPars->fUserRecLib || pPars->fUserSesLib || pPars->fAcd) && pNtk->pManTime ) { int c; if ( pNtk->AndGateDelay == 0.0 ) @@ -427,28 +427,143 @@ Hop_Obj_t * Abc_NodeBuildFromMini( Hop_Man_t * pMan, If_Man_t * p, If_Cut_t * pC } /**Function************************************************************* + Synopsis [Implements decomposed LUT-structure of the cut.] + Description [] + + SideEffects [] + SeeAlso [] + ***********************************************************************/ + void Abc_DecRecordToHop( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Cut_t * pCutBest, If_Obj_t * pIfObj, Vec_Int_t * vCover, Abc_Obj_t * pNodeTop ) + { + extern Hop_Obj_t * Kit_TruthToHop( Hop_Man_t * pMan, unsigned * pTruth, int nVars, Vec_Int_t * vMemory ); + assert( !pIfMan->pPars->fUseTtPerm ); - Synopsis [Implements decomposed LUT-structure of the cut.] - - Description [] - - SideEffects [] - - SeeAlso [] - -***********************************************************************/ -Hop_Obj_t * Abc_DecRecordToHop( Hop_Man_t * pMan, If_Man_t * pIfMan, If_Cut_t * pCutBest, If_Obj_t * pIfObj, Vec_Int_t * vCover ) -{ // get the truth table + word * pTruth = If_CutTruthW(pIfMan, pCutBest); + int v; + If_Obj_t * pIfLeaf; + + if ( pCutBest->nLeaves <= 6 ) + { + /* add fanins */ + If_CutForEachLeaf( pIfMan, pCutBest, pIfLeaf, v ) + Abc_ObjAddFanin( pNodeTop, (Abc_Obj_t *)If_ObjCopy( pIfLeaf ) ); + + pNodeTop->Level = Abc_ObjLevelNew( pNodeTop ); + + pNodeTop->pData = Kit_TruthToHop( (Hop_Man_t *)pNtkNew->pManFunc, (unsigned *)pTruth, If_CutLeaveNum(pCutBest), vCover ); + return; + } + + // get the delay profile + unsigned delayProfile = pCutBest->acdDelay; + + // If_Obj_t * pLeaf; + // int i, leafDelay; + // int DelayMax = -1, nLeafMax = 0; + // unsigned uLeafMask = 0; + + // If_CutForEachLeaf( pIfMan, pCutBest, pLeaf, i ) + // { + // leafDelay = If_ObjCutBest(pLeaf)->Delay; + + // if ( DelayMax < leafDelay ) + // { + // DelayMax = leafDelay; + // nLeafMax = 1; + // uLeafMask = (1 << i); + // } + // else if ( DelayMax == leafDelay ) + // { + // nLeafMax++; + // uLeafMask |= (1 << i); + // } + // } + // perform LUT-decomposition and return the LUT-structure + unsigned char decompArray[92]; + int val = acd_decompose( pTruth, pCutBest->nLeaves, 6, &(delayProfile), decompArray ); + + assert( val == 0 ); + // assert( DelayMax + 2 >= pCutBest->Delay ); + // convert the LUT-structure into a set of logic nodes in Abc_Ntk_t + unsigned char bytes_check = decompArray[0]; + assert( bytes_check <= 92 ); + + int byte_p = 2; + unsigned char i, j, k, num_fanins, num_words, num_bytes; + int level, fanin; + word *tt; + Abc_Obj_t *pNewNodes[5]; + + /* create intermediate LUTs*/ + assert( decompArray[1] - 1 <= 5 ); + Abc_Obj_t * pFanin; + for ( i = 0; i < decompArray[1]; ++i ) + { + if ( i < decompArray[1] - 1 ) + { + pNewNodes[i] = Abc_NtkCreateNode( pNtkNew ); + } + else + { + pNewNodes[i] = pNodeTop; + } + num_fanins = decompArray[byte_p++]; + level = 0; + for ( j = 0; j < num_fanins; ++j ) + { + fanin = (int)decompArray[byte_p++]; + if ( fanin < If_CutLeaveNum(pCutBest) ) + { + pFanin = (Abc_Obj_t *)If_ObjCopy( If_CutLeaf(pIfMan, pCutBest, fanin) ); + } + else + { + assert( fanin - If_CutLeaveNum(pCutBest) < i ); + pFanin = pNewNodes[fanin - If_CutLeaveNum(pCutBest)]; + } + Abc_ObjAddFanin( pNewNodes[i], pFanin ); + level = Abc_MaxInt( level, Abc_ObjLevel(pFanin) ); + } + + pNewNodes[i]->Level = level + (int)(Abc_ObjFaninNum(pNewNodes[i]) > 0); + + /* extract the truth table */ + tt = pIfMan->puTempW; + num_words = ( num_fanins <= 6 ) ? 1 : ( 1 << ( num_fanins - 6 ) ); + num_bytes = ( num_fanins <= 3 ) ? 1 : ( 1 << ( Abc_MinInt( (int)num_fanins, 6 ) - 3 ) ); + for ( j = 0; j < num_words; ++j ) + { + tt[j] = 0; + for ( k = 0; k < num_bytes; ++k ) + { + tt[j] |= ( (word)(decompArray[byte_p++]) ) << ( k << 3 ); + } + } + + /* extend truth table if size < 5 */ + assert( num_fanins != 1 ); + if ( num_fanins == 2 ) + { + tt[0] |= tt[0] << 4; + } + while ( num_bytes < 4 ) + { + tt[0] |= tt[0] << ( num_bytes << 3 ); + num_bytes <<= 1; + } + + /* add node data */ + pNewNodes[i]->pData = Kit_TruthToHop( (Hop_Man_t *)pNtkNew->pManFunc, (unsigned *)tt, (int) num_fanins, vCover ); + } + + /* check correct read */ + assert( byte_p == decompArray[0] ); // this is a placeholder, which takes the truth table and converts it into an AIG without LUT-decomposition - extern Hop_Obj_t * Kit_TruthToHop( Hop_Man_t * pMan, unsigned * pTruth, int nVars, Vec_Int_t * vMemory ); - word * pTruth = If_CutTruthW(pIfMan, pCutBest); - assert( !pIfMan->pPars->fUseTtPerm ); - return Kit_TruthToHop( (Hop_Man_t *)pMan, (unsigned *)pTruth, If_CutLeaveNum(pCutBest), vCover ); -} + } /**Function************************************************************* @@ -488,13 +603,18 @@ Abc_Obj_t * Abc_NodeFromIf_rec( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Obj_t pNodeNew = Abc_NtkCreateNode( pNtkNew ); // if ( pIfMan->pPars->pLutLib && pIfMan->pPars->pLutLib->fVarPinDelays ) if ( !pIfMan->pPars->fDelayOpt && !pIfMan->pPars->fDelayOptLut && !pIfMan->pPars->fDsdBalance && !pIfMan->pPars->fUseTtPerm && - !pIfMan->pPars->pLutStruct && !pIfMan->pPars->fUserRecLib && !pIfMan->pPars->fUserSesLib && !pIfMan->pPars->fUserLutDec && !pIfMan->pPars->nGateSize ) + !pIfMan->pPars->pLutStruct && !pIfMan->pPars->fAcd && !pIfMan->pPars->fUserRecLib && !pIfMan->pPars->fUserSesLib && !pIfMan->pPars->nGateSize ) If_CutRotatePins( pIfMan, pCutBest ); if ( pIfMan->pPars->fUseCnfs || pIfMan->pPars->fUseMv ) { If_CutForEachLeafReverse( pIfMan, pCutBest, pIfLeaf, i ) Abc_ObjAddFanin( pNodeNew, Abc_NodeFromIf_rec(pNtkNew, pIfMan, pIfLeaf, vCover) ); } + else if ( pIfMan->pPars->fAcd ) + { + If_CutForEachLeaf( pIfMan, pCutBest, pIfLeaf, i ) + Abc_NodeFromIf_rec(pNtkNew, pIfMan, pIfLeaf, vCover); + } else { If_CutForEachLeaf( pIfMan, pCutBest, pIfLeaf, i ) @@ -548,10 +668,10 @@ Abc_Obj_t * Abc_NodeFromIf_rec( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Obj_t extern Hop_Obj_t * Abc_RecToHop3( Hop_Man_t * pMan, If_Man_t * pIfMan, If_Cut_t * pCut, If_Obj_t * pIfObj ); pNodeNew->pData = Abc_RecToHop3( (Hop_Man_t *)pNtkNew->pManFunc, pIfMan, pCutBest, pIfObj ); } - else if ( pIfMan->pPars->fUserLutDec ) + else if ( pIfMan->pPars->fAcd ) { - extern Hop_Obj_t * Abc_DecRecordToHop( Hop_Man_t * pMan, If_Man_t * pIfMan, If_Cut_t * pCut, If_Obj_t * pIfObj, Vec_Int_t * vMemory ); - pNodeNew->pData = Abc_DecRecordToHop( (Hop_Man_t *)pNtkNew->pManFunc, pIfMan, pCutBest, pIfObj, vCover ); + extern void Abc_DecRecordToHop( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Cut_t * pCut, If_Obj_t * pIfObj, Vec_Int_t * vMemory, Abc_Obj_t * pNodeTop ); + Abc_DecRecordToHop( pNtkNew, pIfMan, pCutBest, pIfObj, vCover, pNodeNew ); } else { diff --git a/src/map/if/if.h b/src/map/if/if.h index 93cb0f6ca..156e8679f 100644 --- a/src/map/if/if.h +++ b/src/map/if/if.h @@ -40,6 +40,7 @@ #include "opt/dau/dau.h" #include "misc/vec/vecHash.h" #include "misc/vec/vecWec.h" +#include "ACD/ac_wrapper.h" ABC_NAMESPACE_HEADER_START @@ -126,7 +127,6 @@ struct If_Par_t_ int fDsdBalance; // special delay optimization int fUserRecLib; // use recorded library int fUserSesLib; // use SAT-based synthesis - int fUserLutDec; // use LUT-based decomposition int fBidec; // use bi-decomposition int fUse34Spec; // use specialized matching int fUseBat; // use one specialized feature @@ -146,6 +146,7 @@ struct If_Par_t_ int fDeriveLuts; // enables deriving LUT structures int fDoAverage; // optimize average rather than maximum level int fHashMapping; // perform AIG hashing after mapping + int fAcd; // perform AIG hashing after mapping int fVerbose; // the verbosity flag int fVerboseTrace; // the verbosity flag char * pLutStruct; // LUT structure @@ -280,6 +281,7 @@ struct If_Man_t_ int pDumpIns[16]; Vec_Str_t * vMarks; Vec_Int_t * vVisited2; + int useLimitAdc; // timing manager Tim_Man_t * pManTim; @@ -303,6 +305,7 @@ struct If_Cut_t_ int iCutFunc; // TT ID of the cut int uMaskFunc; // polarity bitmask unsigned uSign; // cut signature + unsigned acdDelay; // Computed pin delay during ACD unsigned Cost : 12; // the user's cost of the cut (related to IF_COST_MAX) unsigned fCompl : 1; // the complemented attribute unsigned fUser : 1; // using the user's area and delay @@ -552,6 +555,7 @@ extern int If_CutPerformCheck45( If_Man_t * p, unsigned * pTruth, in extern int If_CutPerformCheck54( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern int If_CutPerformCheck75( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern float If_CutDelayLutStruct( If_Man_t * p, If_Cut_t * pCut, char * pStr, float WireDelay ); +// extern int If_CutPerformAcd( If_Man_t * p, unsigned nVars, int lutSize, unsigned * pdelay, int use_late_arrival, unsigned * cost ); extern int If_CluCheckExt( void * p, word * pTruth, int nVars, int nLutLeaf, int nLutRoot, char * pLut0, char * pLut1, word * pFunc0, word * pFunc1 ); extern int If_CluCheckExt3( void * p, word * pTruth, int nVars, int nLutLeaf, int nLutLeaf2, int nLutRoot, @@ -566,6 +570,9 @@ extern int If_CutSopBalancePinDelaysInt( Vec_Int_t * vCover, int * p extern int If_CutSopBalancePinDelays( If_Man_t * p, If_Cut_t * pCut, char * pPerm ); extern int If_CutLutBalanceEval( If_Man_t * p, If_Cut_t * pCut ); extern int If_CutLutBalancePinDelays( If_Man_t * p, If_Cut_t * pCut, char * pPerm ); +extern int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, int best_delay ); +extern int If_AcdReEval( If_Man_t * p, If_Cut_t * pCut ); +extern float If_AcdLeafProp( If_Man_t * p, If_Cut_t * pCut, int i, float required ); /*=== ifDsd.c =============================================================*/ extern If_DsdMan_t * If_DsdManAlloc( int nVars, int nLutSize ); extern void If_DsdManAllocIsops( If_DsdMan_t * p, int nLutSize ); @@ -693,6 +700,8 @@ extern int If_ManCountSpecialPos( If_Man_t * p ); extern void If_CutTraverse( If_Man_t * p, If_Obj_t * pRoot, If_Cut_t * pCut, Vec_Ptr_t * vNodes ); extern void If_ObjPrint( If_Obj_t * pObj ); +extern int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost ); +extern int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ); ABC_NAMESPACE_HEADER_END diff --git a/src/map/if/ifCore.c b/src/map/if/ifCore.c index c03061af2..a8e482912 100644 --- a/src/map/if/ifCore.c +++ b/src/map/if/ifCore.c @@ -62,6 +62,7 @@ void If_ManSetDefaultPars( If_Par_t * pPars ) pPars->fPower = 0; pPars->fCutMin = 0; pPars->fBidec = 0; + pPars->fAcd = 0; pPars->fVerbose = 0; } @@ -106,9 +107,16 @@ int If_ManPerformMappingComb( If_Man_t * p ) If_Obj_t * pObj; abctime clkTotal = Abc_Clock(); int i; + p->useLimitAdc = 1; + //p->vVisited2 = Vec_IntAlloc( 100 ); //p->vMarks = Vec_StrStart( If_ManObjNum(p) ); + // if ( p->pPars->fAcd ) + // { + // p->pPars->nLutSize = 6; + // } + // set arrival times and fanout estimates If_ManForEachCi( p, pObj, i ) { @@ -121,6 +129,16 @@ int If_ManPerformMappingComb( If_Man_t * p ) { // map for delay If_ManPerformMappingRound( p, p->pPars->nCutsMax, 0, 1, 1, "Delay" ); + + if ( p->pPars->fAcd ) + { + // p->pPars->nLutSize = oldLutSize; + p->useLimitAdc = 0; + If_ManPerformMappingRound( p, p->pPars->nCutsMax, 0, 1, 0, "Delay" ); + p->useLimitAdc = 1; + // p->pPars->nLutSize = 6; + } + // map for delay second option p->pPars->fFancy = 1; If_ManResetOriginalRefs( p ); diff --git a/src/map/if/ifCut.c b/src/map/if/ifCut.c index f4f72d1c8..8d1cccba0 100644 --- a/src/map/if/ifCut.c +++ b/src/map/if/ifCut.c @@ -604,10 +604,6 @@ static inline int If_ManSortCompare( If_Man_t * p, If_Cut_t * pC0, If_Cut_t * pC return -1; if ( pC0->nLeaves > pC1->nLeaves ) return 1; - if ( pC0->Delay < pC1->Delay - p->fEpsilon ) - return -1; - if ( pC0->Delay > pC1->Delay + p->fEpsilon ) - return 1; if ( pC0->fUseless < pC1->fUseless ) return -1; if ( pC0->fUseless > pC1->fUseless ) @@ -765,7 +761,7 @@ void If_CutSort( If_Man_t * p, If_Set_t * pCutSet, If_Cut_t * pCut ) if ( !pCut->fUseless && (p->pPars->fUseDsd || p->pPars->pFuncCell2 || p->pPars->fUseBat || - p->pPars->pLutStruct || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUserLutDec || + p->pPars->pLutStruct || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fAcd || p->pPars->fEnableCheck07 || p->pPars->fUseCofVars || p->pPars->fUseAndVars || p->pPars->fUse34Spec || p->pPars->fUseDsdTune || p->pPars->fEnableCheck75 || p->pPars->fEnableCheck75u || p->pPars->fUseCheck1 || p->pPars->fUseCheck2) ) { diff --git a/src/map/if/ifDelay.c b/src/map/if/ifDelay.c index cb25e767e..fcd53e348 100644 --- a/src/map/if/ifDelay.c +++ b/src/map/if/ifDelay.c @@ -411,6 +411,132 @@ int If_CutLutBalanceEval( If_Man_t * p, If_Cut_t * pCut ) return DelayMax + 2; } } + +int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, int best_delay ) +{ + pCut->fUser = 1; + pCut->Cost = pCut->nLeaves > 1 ? 1 : 0; + pCut->acdDelay = 0; + if ( pCut->nLeaves == 0 ) // const + { + assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 0 ); + return 0; + } + if ( pCut->nLeaves == 1 ) // variable + { + assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 1 ); + return (int)If_ObjCutBest(If_CutLeaf(p, pCut, 0))->Delay; + } + + // int LutSize = p->pPars->pLutStruct[0] - '0'; + int LutSize = 6; + int i, leaf_delay; + int DelayMax = -1, nLeafMax = 0; + unsigned uLeafMask = 0; + for ( i = 0; i < If_CutLeaveNum(pCut); i++ ) + { + leaf_delay = If_ObjCutBest(If_CutLeaf(p, pCut, i))->Delay; + + if ( DelayMax < leaf_delay ) + { + DelayMax = leaf_delay; + nLeafMax = 1; + uLeafMask = (1 << i); + } + else if ( DelayMax == leaf_delay ) + { + nLeafMax++; + uLeafMask |= (1 << i); + } + } + if ( If_CutLeaveNum(pCut) <= LutSize ) + { + pCut->acdDelay = ( 1 << LutSize ) - 1; + return DelayMax + 1; + } + // else if ( DelayMax + 1 >= best_delay ) + // { + // return DelayMax + 2; + // } + + /* compute the decomposition */ + int use_late_arrival = DelayMax + 2 >= best_delay; + unsigned cost = 1; + + /* TODO: have checks based on delay */ + if ( use_late_arrival && nLeafMax > LutSize / 2 ) + { + pCut->Cost = IF_COST_MAX; + return ABC_INFINITY; + } + + /* remove from critical set */ + if ( !use_late_arrival ) + uLeafMask = 0; + + + word *pTruth = If_CutTruthW( p, pCut ); + int val = acd_evaluate( pTruth, pCut->nLeaves, LutSize, &uLeafMask, &cost ); + + /* not feasible decomposition */ + pCut->acdDelay = uLeafMask; + if ( val < 0 ) + { + pCut->Cost = IF_COST_MAX; + return ABC_INFINITY; + } + + pCut->Cost = cost; + + return DelayMax + ( use_late_arrival ? 1 : 2 ); +} + +int If_AcdReEval( If_Man_t * p, If_Cut_t * pCut ) +{ + // pCut->fUser = 1; + + if ( pCut->nLeaves == 0 ) // const + { + assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 0 ); + return 0; + } + if ( pCut->nLeaves == 1 ) // variable + { + assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 1 ); + return (int)If_ObjCutBest(If_CutLeaf(p, pCut, 0))->Delay; + } + + // int LutSize = p->pPars->pLutStruct[0] - '0'; + int LutSize = 6; + int i, leaf_delay; + int DelayMax = -1, nLeafMax = 0; + unsigned uLeafMask = 0; + for ( i = 0; i < If_CutLeaveNum(pCut); i++ ) + { + leaf_delay = If_ObjCutBest(If_CutLeaf(p, pCut, i))->Delay; + leaf_delay += ( ( pCut->acdDelay >> i ) & 1 ) == 0 ? 2 : 1; + DelayMax = Abc_MaxInt( leaf_delay, DelayMax ); + } + + return DelayMax; +} + +float If_AcdLeafProp( If_Man_t * p, If_Cut_t * pCut, int i, float required ) +{ + if ( pCut->nLeaves == 0 ) // const + { + assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 0 ); + return required; + } + if ( pCut->nLeaves == 1 ) // variable + { + assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 1 ); + return 0; + } + + return ( ( pCut->acdDelay >> i ) & 1 ) == 0 ? 2 : 1; +} + /* int If_CutLutBalanceEval( If_Man_t * p, If_Cut_t * pCut ) { diff --git a/src/map/if/ifMap.c b/src/map/if/ifMap.c index 4a5210e92..da83b5525 100644 --- a/src/map/if/ifMap.c +++ b/src/map/if/ifMap.c @@ -148,32 +148,6 @@ int * If_CutArrTimeProfile( If_Man_t * p, If_Cut_t * pCut ) return p->pArrTimeProfile; } - -/**Function************************************************************* - - Synopsis [Returns the node's delay if its cut it LUT-decomposed.] - - Description [] - - SideEffects [] - - SeeAlso [] - -***********************************************************************/ -int If_CutDelayLutDec( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj ) -{ - // get the truth table - // get the cut leaves' arrival times - // run LUT-decomposition in the evaluation mode - // return expected arrival time at the output - - // this is a placeholder code, which is assume the cut has unit delay - int i, ArrTimes = 0; - for ( i = 0; i < If_CutLeaveNum(pCut); i++ ) - ArrTimes = Abc_MaxInt( ArrTimes, (int)If_ObjCutBest(If_CutLeaf(p, pCut, i))->Delay ); - return ArrTimes + 1; -} - /**Function************************************************************* Synopsis [Finds the best cut for the given node.] @@ -192,7 +166,7 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep If_Cut_t * pCut0R, * pCut1R; int fFunc0R, fFunc1R; int i, k, v, iCutDsd, fChange; - int fSave0 = p->pPars->fDelayOpt || p->pPars->fDelayOptLut || p->pPars->fDsdBalance || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUserLutDec || + int fSave0 = p->pPars->fDelayOpt || p->pPars->fDelayOptLut || p->pPars->fDsdBalance || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fAcd || p->pPars->fUseDsdTune || p->pPars->fUseCofVars || p->pPars->fUseAndVars || p->pPars->fUse34Spec || p->pPars->pLutStruct || p->pPars->pFuncCell2 || p->pPars->fUseCheck1 || p->pPars->fUseCheck2; int fUseAndCut = (p->pPars->nAndDelay > 0) || (p->pPars->nAndArea > 0); assert( !If_ObjIsAnd(pObj->pFanin0) || pObj->pFanin0->pCutSet->nCuts > 0 ); @@ -234,8 +208,10 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep pCut->fUseless = 1; } } - else if ( p->pPars->fUserLutDec ) - pCut->Delay = If_CutDelayLutDec( p, pCut, pObj ); + else if ( p->pPars->fAcd ) + { + pCut->Delay = If_AcdReEval( p, pCut ); + } else if ( p->pPars->fDelayOptLut ) pCut->Delay = If_CutLutBalanceEval( p, pCut ); else if( p->pPars->nGateSize > 0 ) @@ -292,6 +268,8 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep if ( !If_CutMergeOrdered( p, pCut0, pCut1, pCut ) ) continue; } + if ( p->pPars->fAcd && p->useLimitAdc && pCut->nLeaves > 6 ) + continue; if ( pObj->fSpec && pCut->nLeaves == (unsigned)p->pPars->nLutSize ) continue; p->nCutsMerged++; @@ -450,7 +428,12 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep else if ( p->pPars->fDsdBalance ) pCut->Delay = If_CutDsdBalanceEval( p, pCut, NULL ); else if ( p->pPars->fUserRecLib ) - pCut->Delay = If_CutDelayRecCost3( p, pCut, pObj ); + pCut->Delay = If_CutDelayRecCost3( p, pCut, pObj ); + else if ( p->pPars->fAcd ) + { + pCut->Delay = If_AcdEval( p, pCut, fFirst ? ABC_INFINITY : (int) If_ObjCutBest(pObj)->Delay ); + pCut->fUseless = pCut->Delay == ABC_INFINITY; + } else if ( p->pPars->fUserSesLib ) { int Cost = 0; @@ -464,8 +447,6 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep pCut->fUseless = 1; } } - else if ( p->pPars->fUserLutDec ) - pCut->Delay = If_CutDelayLutDec( p, pCut, pObj ); else if ( p->pPars->fDelayOptLut ) pCut->Delay = If_CutLutBalanceEval( p, pCut ); else if( p->pPars->nGateSize > 0 ) @@ -537,7 +518,7 @@ void If_ObjPerformMappingChoice( If_Man_t * p, If_Obj_t * pObj, int Mode, int fP If_Set_t * pCutSet; If_Obj_t * pTemp; If_Cut_t * pCutTemp, * pCut; - int i, fSave0 = p->pPars->fDelayOpt || p->pPars->fDelayOptLut || p->pPars->fDsdBalance || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUserLutDec || p->pPars->fUse34Spec; + int i, fSave0 = p->pPars->fDelayOpt || p->pPars->fDelayOptLut || p->pPars->fDsdBalance || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUse34Spec || p->pPars->fAcd; assert( pObj->pEquiv != NULL ); // prepare diff --git a/src/map/if/ifTime.c b/src/map/if/ifTime.c index 9ceef1475..9bce5bc43 100644 --- a/src/map/if/ifTime.c +++ b/src/map/if/ifTime.c @@ -211,6 +211,12 @@ void If_CutPropagateRequired( If_Man_t * p, If_Obj_t * pObj, If_Cut_t * pCut, fl pLeaf->Required = IF_MIN( pLeaf->Required, Required - pLutDelays[0] ); } } + else if ( p->pPars->fAcd ) + { + Required = ObjRequired; + If_CutForEachLeaf( p, pCut, pLeaf, i ) + pLeaf->Required = IF_MIN( pLeaf->Required, Required - If_AcdLeafProp( p, pCut, i, ObjRequired ) ); + } else { if ( pCut->fUser ) From 66cdd36d20afea221ac9af47866569a9bf038f30 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 15 Nov 2023 19:03:29 +0100 Subject: [PATCH 02/24] Runtime improvements in decomposition --- src/acd/ac_decomposition.hpp | 4 ++-- src/acd/ac_wrapper.cpp | 4 ++-- src/acd/ac_wrapper.h | 1 + src/acd/kitty_operators.hpp | 38 +++++++++++++++++++++++++++++++++++- src/map/if/if.h | 2 +- 5 files changed, 43 insertions(+), 6 deletions(-) diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp index 4f94bcba4..59fd00ada 100644 --- a/src/acd/ac_decomposition.hpp +++ b/src/acd/ac_decomposition.hpp @@ -134,8 +134,8 @@ public: /* add cost if not support reducing */ uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0; - /* check for feasible solution that improves the cost */ - if ( cost <= ( 1 << ( ps.lut_size - i ) ) && cost + additional_cost < best_cost ) + /* check for feasible solution that improves the cost */ /* TODO: remove limit on cost */ + if ( cost <= ( 1 << ( ps.lut_size - i ) ) && cost + additional_cost < best_cost && cost < 12 ) { best_tt = tt_p; permutations = perm; diff --git a/src/acd/ac_wrapper.cpp b/src/acd/ac_wrapper.cpp index b7cee0dd7..aabe8e86f 100644 --- a/src/acd/ac_wrapper.cpp +++ b/src/acd/ac_wrapper.cpp @@ -20,8 +20,8 @@ int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, ac_decomposition_stats st; ac_decomposition_impl acd( tt, nVars, ps, &st ); - acd.run( *pdelay ); - int val = acd.compute_decomposition(); + int val = acd.run( *pdelay ); + // int val = acd.compute_decomposition(); if ( val < 0 ) { diff --git a/src/acd/ac_wrapper.h b/src/acd/ac_wrapper.h index 522a60b86..cb22acf80 100644 --- a/src/acd/ac_wrapper.h +++ b/src/acd/ac_wrapper.h @@ -4,6 +4,7 @@ // #include "base/main/main.h" #include "misc/util/abc_global.h" +#include "map/if/if.h" // ABC_NAMESPACE_HEADER_START diff --git a/src/acd/kitty_operators.hpp b/src/acd/kitty_operators.hpp index cf973ebe0..68a24cf2e 100644 --- a/src/acd/kitty_operators.hpp +++ b/src/acd/kitty_operators.hpp @@ -78,7 +78,43 @@ inline void operator|=( dynamic_truth_table& first, const dynamic_truth_table& s template inline void operator|=( static_truth_table& first, const static_truth_table& second ) { - first = binary_or( first, second ); + // first = binary_or( first, second ); + /* runtime improved version */ + if constexpr ( NumVars <= 6 ) + { + first._bits |= second._bits; + first.mask_bits(); + } + else if constexpr ( NumVars == 7 ) + { + first._bits[0] |= second._bits[0]; + first._bits[1] |= second._bits[1]; + } + else if constexpr ( NumVars == 8 ) + { + first._bits[0] |= second._bits[0]; + first._bits[1] |= second._bits[1]; + first._bits[2] |= second._bits[2]; + first._bits[3] |= second._bits[3]; + } + else if constexpr ( NumVars == 9 ) + { + first._bits[0] |= second._bits[0]; + first._bits[1] |= second._bits[1]; + first._bits[2] |= second._bits[2]; + first._bits[3] |= second._bits[3]; + first._bits[4] |= second._bits[4]; + first._bits[5] |= second._bits[5]; + first._bits[6] |= second._bits[6]; + first._bits[7] |= second._bits[7]; + } + else + { + for ( uint32_t i = 0; i < first.num_blocks(); ++i ) + { + first._bits[i] |= second._bits[i]; + } + } } } // namespace kitty diff --git a/src/map/if/if.h b/src/map/if/if.h index 156e8679f..56f0bb7ed 100644 --- a/src/map/if/if.h +++ b/src/map/if/if.h @@ -40,7 +40,7 @@ #include "opt/dau/dau.h" #include "misc/vec/vecHash.h" #include "misc/vec/vecWec.h" -#include "ACD/ac_wrapper.h" +#include "acd/ac_wrapper.h" ABC_NAMESPACE_HEADER_START From c07080f818a8982d68aa428ebd81bb490c89ed02 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 15 Nov 2023 21:32:34 +0100 Subject: [PATCH 03/24] Adding heuristic set covering solver --- src/acd/ac_decomposition.hpp | 225 ++++++++++++++++++++++++++++++++--- 1 file changed, 208 insertions(+), 17 deletions(-) diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp index 59fd00ada..5a93c5f35 100644 --- a/src/acd/ac_decomposition.hpp +++ b/src/acd/ac_decomposition.hpp @@ -84,7 +84,7 @@ private: uint64_t column{ 0 }; uint32_t cost{ 0 }; uint32_t index{ 0 }; - uint32_t sort_cost{ 0 }; + float sort_cost{ 0 }; }; private: @@ -129,6 +129,7 @@ public: uint32_t offset = std::max( static_cast( late_arriving ), 1u ); for ( uint32_t i = offset; i <= ps.lut_size / 2 && i <= 3; ++i ) { + /* TODO: add shared set */ auto evaluate_fn = [&]( STT const& tt ) { return column_multiplicity( tt, i ); }; auto [tt_p, perm, cost] = enumerate_iset_combinations_offset( i, offset, evaluate_fn, false ); @@ -179,12 +180,22 @@ public: std::vector isets = compute_isets( best_free_set ); generate_support_minimization_encodings(); - solve_min_support_exact( isets, best_free_set ); + + /* always solves exactly for power of 2 */ + if ( __builtin_popcount( best_multiplicity ) == 1 ) + solve_min_support_exact( isets, best_free_set ); + else + solve_min_support_heuristic( isets, best_free_set ); /* unfeasible decomposition */ if ( best_bound_sets.empty() ) { - return -1; + solve_min_support_exact( isets, best_free_set ); + + if ( best_bound_sets.empty() ) + { + return -1; + } } return 0; @@ -995,7 +1006,7 @@ private: } /* solve the covering problem */ - std::array solution = covering_solve_exact( matrix, 100, ps.max_iter ); + std::array solution = covering_solve_exact( matrix, 100, ps.max_iter ); /* check for failed decomposition */ if ( solution[0] == UINT32_MAX ) @@ -1047,6 +1058,72 @@ private: } } + void solve_min_support_heuristic( std::vector const& isets, uint32_t free_set_size ) + { + std::vector matrix; + matrix.reserve( support_minimization_encodings.size() ); + best_bound_sets.clear(); + + /* create covering matrix */ + if ( !create_covering_matrix( isets, matrix, free_set_size, false ) ) + { + return; + } + + /* solve the covering problem: heuristic pass + local search */ + std::array solution = covering_solve_heuristic( matrix ); + + /* check for failed decomposition */ + if ( solution[0] == UINT32_MAX ) + { + return; + } + + /* compute best bound sets */ + uint32_t num_luts = 1 + solution[4]; + uint32_t num_levels = 2; + uint32_t num_edges = free_set_size + solution[4]; + uint32_t isets_support = num_vars - free_set_size; + best_care_sets.clear(); + best_iset_onset.clear(); + best_iset_offset.clear(); + for ( uint32_t i = 0; i < solution[4]; ++i ) + { + STT tt; + STT care; + + const uint32_t onset = support_minimization_encodings[matrix[solution[i]].index][0]; + const uint32_t offset = support_minimization_encodings[matrix[solution[i]].index][1]; + for ( uint32_t j = 0; j < best_multiplicity; ++j ) + { + if ( ( ( onset >> j ) & 1 ) ) + { + tt |= isets[j]; + } + if ( ( ( offset >> j ) & 1 ) ) + { + care |= isets[j]; + } + } + + care |= tt; + num_edges += matrix[solution[i]].cost & ( ( 1 << isets_support ) - 1 ); + + best_bound_sets.push_back( tt ); + best_care_sets.push_back( care ); + best_iset_onset.push_back( onset ); + best_iset_offset.push_back( offset ); + } + + if ( pst != nullptr ) + { + pst->num_luts = num_luts; + pst->num_levels = num_levels; + pst->num_edges = num_edges; + } + } + + template bool create_covering_matrix( std::vector const& isets, std::vector& matrix, uint32_t free_set_size, bool sort ) { assert( best_multiplicity < 12 ); @@ -1119,7 +1196,15 @@ private: cost |= 1 << iset_support; } - uint32_t sort_cost = cost + ( ( combinations - __builtin_popcountl( column ) ) << num_vars ); + float sort_cost = 0; + if constexpr ( UseHeuristic ) + { + sort_cost = ( (float) cost ) / ( __builtin_popcountl( column ) ); + } + else + { + sort_cost = cost + ( ( combinations - __builtin_popcountl( column ) ) << num_vars ); + } /* insert */ matrix.emplace_back( encoding_matrix{ column, cost, i, sort_cost } ); @@ -1128,10 +1213,10 @@ private: } /* necessary condition for the existance of a solution */ - if ( __builtin_popcountl( sol_existance ) != combinations ) - { - return false; - } + // if ( __builtin_popcountl( sol_existance ) != combinations ) + // { + // return false; + // } if ( !sort ) { @@ -1154,7 +1239,7 @@ private: return true; } - template + template std::array covering_solve_exact( std::vector& matrix, uint32_t max_iter = 100, int32_t limit = 2000 ) { /* last value of res contains the size of the bound set */ @@ -1195,7 +1280,14 @@ private: /* limit */ if constexpr ( limit_iter ) { - if ( limit <= 0 || ( best_cost < UINT32_MAX && max_iter == 0 ) ) + if ( limit <= 0 ) + { + looping = false; + } + } + if constexpr ( limit_sol ) + { + if ( best_cost < UINT32_MAX && max_iter == 0 ) { looping = false; } @@ -1209,7 +1301,14 @@ private: /* limit */ if constexpr ( limit_iter ) { - if ( limit <= 0 || ( best_cost < UINT32_MAX && max_iter == 0 ) ) + if ( limit <= 0 ) + { + looping = false; + } + } + if constexpr ( limit_sol ) + { + if ( best_cost < UINT32_MAX && max_iter == 0 ) { looping = false; } @@ -1226,7 +1325,14 @@ private: /* limit */ if constexpr ( limit_iter ) { - if ( limit-- <= 0 || ( best_cost < UINT32_MAX && max_iter-- == 0 ) ) + if ( limit-- <= 0 ) + { + looping = false; + } + } + if constexpr ( limit_sol ) + { + if ( best_cost < UINT32_MAX && max_iter-- == 0 ) { looping = false; } @@ -1256,7 +1362,14 @@ private: /* limit */ if constexpr ( limit_iter ) { - if ( limit <= 0 || ( best_cost < UINT32_MAX && max_iter == 0 ) ) + if ( limit <= 0 ) + { + looping = false; + } + } + if constexpr ( limit_sol ) + { + if ( best_cost < UINT32_MAX && max_iter == 0 ) { looping = false; } @@ -1270,7 +1383,14 @@ private: /* limit */ if constexpr ( limit_iter ) { - if ( limit <= 0 || ( best_cost < UINT32_MAX && max_iter == 0 ) ) + if ( limit <= 0 ) + { + looping = false; + } + } + if constexpr ( limit_sol ) + { + if ( best_cost < UINT32_MAX && max_iter == 0 ) { looping = false; } @@ -1290,7 +1410,14 @@ private: /* limit */ if constexpr ( limit_iter ) { - if ( limit <= 0 || ( best_cost < UINT32_MAX && max_iter == 0 ) ) + if ( limit <= 0 ) + { + looping = false; + } + } + if constexpr ( limit_sol ) + { + if ( best_cost < UINT32_MAX && max_iter == 0 ) { looping = false; } @@ -1307,7 +1434,14 @@ private: /* limit */ if constexpr ( limit_iter ) { - if ( limit-- <= 0 || ( best_cost < UINT32_MAX && max_iter-- == 0 ) ) + if ( limit-- <= 0 ) + { + looping = false; + } + } + if constexpr ( limit_sol ) + { + if ( best_cost-- < UINT32_MAX && max_iter == 0 ) { looping = false; } @@ -1335,6 +1469,63 @@ private: return res; } + std::array covering_solve_heuristic( std::vector& matrix ) + { + /* last value of res contains the size of the bound set */ + std::array res = { UINT32_MAX }; + uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2; + uint64_t column = 0; + + uint32_t best = 0; + float best_cost = std::numeric_limits::max(); + for ( uint32_t i = 0; i < matrix.size(); ++i ) + { + if ( matrix[i].sort_cost < best_cost ) + { + best = i; + best_cost = matrix[i].sort_cost; + } + } + + /* select */ + column = matrix[best].column; + std::swap( matrix[0], matrix[best] ); + + /* get max number of BS's */ + uint32_t iter = 1; + + while ( iter < ps.lut_size - best_free_set && __builtin_popcountl( column ) != combinations ) + { + /* select column that minimizes the cost */ + best = 0; + best_cost = std::numeric_limits::max(); + for ( uint32_t i = iter; i < matrix.size(); ++i ) + { + float local_cost = ( (float) matrix[i].cost ) / __builtin_popcountl( matrix[i].column & ~column ); + if ( local_cost < best_cost ) + { + best = i; + best_cost = local_cost; + } + } + + column |= matrix[best].column; + std::swap( matrix[iter], matrix[best] ); + ++iter; + } + + if ( __builtin_popcountl( column ) != combinations ) + { + for ( uint32_t i = 0; i < iter; ++i ) + { + res[i] = i; + } + res[4] = iter; + } + + return res; + } + void adjust_truth_table_on_dc( STT& tt, STT& care, uint32_t var_index ) { assert( var_index < tt.num_vars() ); From dcc960bebad3253226dffb7b724d5ea5c076adc5 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 15 Nov 2023 21:57:29 +0100 Subject: [PATCH 04/24] Adding local search for covering --- src/acd/ac_decomposition.hpp | 48 ++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp index 5a93c5f35..de409b649 100644 --- a/src/acd/ac_decomposition.hpp +++ b/src/acd/ac_decomposition.hpp @@ -1079,6 +1079,9 @@ private: return; } + /* improve solution with local search */ + covering_improve( matrix, solution ); + /* compute best bound sets */ uint32_t num_luts = 1 + solution[4]; uint32_t num_levels = 2; @@ -1526,6 +1529,51 @@ private: return res; } + bool covering_improve( std::vector& matrix, std::array& solution ) + { + /* performs one iteration of local search */ + uint32_t best_cost = 0, local_cost = 0; + uint32_t num_elements = solution[4]; + uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2; + bool improved = false; + + /* compute current cost */ + for ( uint32_t i = 0; i < num_elements; ++i ) + { + best_cost += matrix[solution[i]].cost; + } + + uint64_t column; + for ( uint32_t i = 0; i < num_elements; ++i ) + { + /* remove element i */ + local_cost = 0; + column = 0; + for ( uint32_t j = 0; j < num_elements; ++j ) + { + if ( j == i ) + continue; + local_cost += matrix[solution[j]].cost; + column |= matrix[solution[j]].column; + } + + /* search for a better replecemnts */ + for ( uint32_t j = 0; j < matrix.size(); ++j ) + { + if ( __builtin_popcount( column | matrix[j].column ) != combinations ) + continue; + if ( local_cost + matrix[j].cost < best_cost ) + { + solution[i] = j; + best_cost = local_cost + matrix[j].cost; + improved = true; + } + } + } + + return improved; + } + void adjust_truth_table_on_dc( STT& tt, STT& care, uint32_t var_index ) { assert( var_index < tt.num_vars() ); From b32bbdfef31bdfef9790823da480bc55d693a45f Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 16 Nov 2023 15:33:19 +0100 Subject: [PATCH 05/24] Improving set covering using unitary cost --- src/acd/ac_decomposition.hpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp index de409b649..193ef9698 100644 --- a/src/acd/ac_decomposition.hpp +++ b/src/acd/ac_decomposition.hpp @@ -1080,7 +1080,8 @@ private: } /* improve solution with local search */ - covering_improve( matrix, solution ); + while ( covering_improve( matrix, solution ) ) + ; /* compute best bound sets */ uint32_t num_luts = 1 + solution[4]; @@ -1202,7 +1203,7 @@ private: float sort_cost = 0; if constexpr ( UseHeuristic ) { - sort_cost = ( (float) cost ) / ( __builtin_popcountl( column ) ); + sort_cost = 1.0f / ( __builtin_popcountl( column ) ); } else { @@ -1504,7 +1505,7 @@ private: best_cost = std::numeric_limits::max(); for ( uint32_t i = iter; i < matrix.size(); ++i ) { - float local_cost = ( (float) matrix[i].cost ) / __builtin_popcountl( matrix[i].column & ~column ); + float local_cost = 1.0f / __builtin_popcountl( matrix[i].column & ~column ); if ( local_cost < best_cost ) { best = i; @@ -1517,7 +1518,7 @@ private: ++iter; } - if ( __builtin_popcountl( column ) != combinations ) + if ( __builtin_popcountl( column ) == combinations ) { for ( uint32_t i = 0; i < iter; ++i ) { From 548fd6afb2d227df4eadf1c0d8d63ac350d0e9aa Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 16 Nov 2023 18:20:05 +0100 Subject: [PATCH 06/24] New version of enumeration of combinations --- src/acd/ac_decomposition.hpp | 228 +++++++++++------------------------ 1 file changed, 68 insertions(+), 160 deletions(-) diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp index 193ef9698..d59bf3d87 100644 --- a/src/acd/ac_decomposition.hpp +++ b/src/acd/ac_decomposition.hpp @@ -59,6 +59,9 @@ struct ac_decomposition_params /*! \brief Maximum number of iterations for covering. */ uint32_t max_iter{ 5000 }; + + /*! \brief Perform decomposition if support reducing. */ + bool support_reducing_only{ true }; }; /*! \brief Statistics for ac_decomposition */ @@ -88,7 +91,7 @@ private: }; private: - static constexpr uint32_t max_num_vars = 8; + static constexpr uint32_t max_num_vars = 9; using STT = kitty::static_truth_table; public: @@ -126,17 +129,25 @@ public: /* run ACD trying different bound sets and free sets */ uint32_t free_set_size = late_arriving; - uint32_t offset = std::max( static_cast( late_arriving ), 1u ); - for ( uint32_t i = offset; i <= ps.lut_size / 2 && i <= 3; ++i ) + uint32_t offset = static_cast( late_arriving ); + uint32_t start = std::max( offset, 1u ); + + /* perform only support reducing decomposition */ + if ( ps.support_reducing_only ) + { + start = std::max( start, num_vars - ps.lut_size ); + } + + for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= 3; ++i ) { /* TODO: add shared set */ auto evaluate_fn = [&]( STT const& tt ) { return column_multiplicity( tt, i ); }; - auto [tt_p, perm, cost] = enumerate_iset_combinations_offset( i, offset, evaluate_fn, false ); + auto [tt_p, perm, cost] = enumerate_iset_combinations_offset( i, offset, evaluate_fn ); - /* add cost if not support reducing */ + /* additional cost if not support reducing */ uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0; /* check for feasible solution that improves the cost */ /* TODO: remove limit on cost */ - if ( cost <= ( 1 << ( ps.lut_size - i ) ) && cost + additional_cost < best_cost && cost < 12 ) + if ( cost <= ( 1 << ( ps.lut_size - i ) ) && cost + additional_cost < best_cost && cost < 10 ) { best_tt = tt_p; permutations = perm; @@ -458,8 +469,37 @@ private: return std::make_tuple( best, res_perm, best_cost ); } + inline bool combinations_offset_next( uint32_t k, uint32_t offset, uint32_t *pComb, uint32_t *pInvPerm, STT& tt ) + { + uint32_t i; + + for ( i = k - 1; pComb[i] == num_vars - k + i; --i ) + { + if ( i == offset ) + return false; + } + + /* move vars */ + uint32_t var_old = pComb[i]; + uint32_t pos_new = pInvPerm[var_old + 1]; + std::swap( pInvPerm[var_old + 1], pInvPerm[var_old] ); + std::swap( pComb[i], pComb[pos_new] ); + kitty::swap_inplace( tt, i, pos_new ); + + for ( uint32_t j = i + 1; j < k; j++ ) + { + var_old = pComb[j]; + pos_new = pInvPerm[pComb[j - 1] + 1]; + std::swap( pInvPerm[pComb[j - 1] + 1], pInvPerm[var_old] ); + std::swap( pComb[j], pComb[pos_new] ); + kitty::swap_inplace( tt, j, pos_new ); + } + + return true; + } + template - std::tuple, uint32_t> enumerate_iset_combinations_offset( uint32_t free_set_size, uint32_t offset, Fn&& fn, bool verbose = false ) + std::tuple, uint32_t> enumerate_iset_combinations_offset( uint32_t free_set_size, uint32_t offset, Fn&& fn ) { STT tt = best_tt; @@ -467,177 +507,45 @@ private: STT best_tt = tt; uint32_t best_cost = UINT32_MAX; + assert( free_set_size >= offset ); + + /* special case */ + if ( free_set_size == offset ) + { + best_cost = fn( tt ); + return { tt, permutations, best_cost }; + } + /* works up to 16 input truth tables */ assert( num_vars <= 16 ); - /* select k */ - free_set_size = std::min( free_set_size, num_vars - free_set_size ); - - /* special case */ - if ( num_vars <= free_set_size || free_set_size <= offset ) + /* init combinations */ + uint32_t pComb[16], pInvPerm[16], bestPerm[16]; + for ( uint32_t i = 0; i < num_vars; ++i ) { - if ( offset == free_set_size ) - { - best_cost = fn( tt ); - if ( verbose ) - { - kitty::print_hex( tt ); - std::cout << " " << best_cost << " "; - print_perm( permutations.begin(), free_set_size ); - } - - return { tt, permutations, best_cost }; - } - else - { - return { tt, permutations, UINT32_MAX }; - } + pComb[i] = pInvPerm[i] = i; } - /* decrease combinations */ - free_set_size -= offset; - - /* init permutation array */ - std::array perm, best_perm; - std::copy( permutations.begin(), permutations.begin() + num_vars, perm.begin() ); - best_perm = perm; - /* enumerate combinations */ - if ( free_set_size == 1 ) + do { uint32_t cost = fn( tt ); if ( cost < best_cost ) { best_tt = tt; best_cost = cost; - best_perm = perm; - } - - if ( verbose ) - { - kitty::print_hex( tt ); - std::cout << " " << cost << " "; - print_perm( perm.begin(), free_set_size + offset ); - } - - for ( uint32_t i = offset + 1; i < num_vars; ++i ) - { - std::swap( perm[offset], perm[i] ); - kitty::swap_inplace( tt, offset, i ); - - uint32_t cost = fn( tt ); - if ( cost < best_cost ) + for ( uint32_t i = 0; i < num_vars; ++i ) { - best_tt = tt; - best_cost = cost; - best_perm = perm; - } - - if ( verbose ) - { - kitty::print_hex( tt ); - std::cout << " " << cost << " "; - print_perm( perm.begin(), free_set_size + offset ); + bestPerm[i] = pComb[i]; } } - } - else if ( free_set_size == 2 ) - { - for ( uint32_t i = 0; i < num_vars - 1 - offset; ++i ) - { - uint32_t cost = fn( tt ); - if ( cost < best_cost ) - { - best_tt = tt; - best_cost = cost; - best_perm = perm; - } - - if ( verbose ) - { - kitty::print_hex( tt ); - std::cout << " " << cost << " "; - print_perm( perm.begin(), free_set_size + offset ); - } - - for ( uint32_t j = offset + 2; j < num_vars - i; ++j ) - { - std::swap( perm[offset + 1], perm[j] ); - kitty::swap_inplace( tt, offset + 1, j ); - - uint32_t cost = fn( tt ); - if ( cost < best_cost ) - { - best_tt = tt; - best_cost = cost; - best_perm = perm; - } - - if ( verbose ) - { - kitty::print_hex( tt ); - std::cout << " " << cost << " "; - print_perm( perm.begin(), free_set_size + offset ); - } - } - - std::swap( perm[offset], perm[num_vars - i - 1] ); - kitty::swap_inplace( tt, offset, num_vars - i - 1 ); - } - } - else if ( free_set_size == 3 ) - { - for ( uint32_t i = 0; i < num_vars - 2 - offset; ++i ) - { - for ( uint32_t j = i; j < num_vars - 2 - offset; ++j ) - { - uint32_t cost = fn( tt ); - if ( cost < best_cost ) - { - best_tt = tt; - best_cost = cost; - best_perm = perm; - } - - if ( verbose ) - { - kitty::print_hex( tt ); - std::cout << " " << cost << " "; - print_perm( perm.begin(), free_set_size + offset ); - } - - for ( uint32_t k = offset + 3; k < num_vars - j; ++k ) - { - std::swap( perm[offset + 2], perm[k] ); - kitty::swap_inplace( tt, offset + 2, k ); - - uint32_t cost = fn( tt ); - if ( cost < best_cost ) - { - best_tt = tt; - best_cost = cost; - best_perm = perm; - } - - if ( verbose ) - { - kitty::print_hex( tt ); - std::cout << " " << cost << " "; - print_perm( perm.begin(), free_set_size + offset ); - } - } - - std::swap( perm[offset + 1], perm[num_vars - j - 1] ); - kitty::swap_inplace( tt, offset + 1, num_vars - j - 1 ); - } - - std::swap( perm[offset], perm[num_vars - i - 1] ); - kitty::swap_inplace( tt, offset, num_vars - i - 1 ); - } - } + } while( combinations_offset_next( free_set_size, offset, pComb, pInvPerm, tt ) ); std::vector res_perm( num_vars ); - std::copy( best_perm.begin(), best_perm.begin() + num_vars, res_perm.begin() ); + for ( uint32_t i = 0; i < num_vars; ++i ) + { + res_perm[i] = permutations[bestPerm[i]]; + } return std::make_tuple( best_tt, res_perm, best_cost ); } From 8aa57c5d54bf7307c823d39fb9cc03a5e140dafb Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 16 Nov 2023 18:53:02 +0100 Subject: [PATCH 07/24] Decisions on late arrival --- src/acd/ac_decomposition.hpp | 46 +++++++++++++++++++++++++----------- src/acd/ac_wrapper.cpp | 3 ++- src/acd/ac_wrapper.h | 2 +- src/map/if/if.h | 2 +- src/map/if/ifDelay.c | 11 ++++----- 5 files changed, 41 insertions(+), 23 deletions(-) diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp index d59bf3d87..71caccff2 100644 --- a/src/acd/ac_decomposition.hpp +++ b/src/acd/ac_decomposition.hpp @@ -62,6 +62,9 @@ struct ac_decomposition_params /*! \brief Perform decomposition if support reducing. */ bool support_reducing_only{ true }; + + /*! \brief If decomposition with delay profile fails, ignore it. */ + bool try_no_late_arrival{ false }; }; /*! \brief Statistics for ac_decomposition */ @@ -157,28 +160,43 @@ public: } } - if ( best_multiplicity == UINT32_MAX ) + if ( best_multiplicity == UINT32_MAX && ( !ps.try_no_late_arrival || late_arriving == 0 ) ) return -1; - /* compute isets */ - // std::vector isets = compute_isets( free_set_size ); + /* try without the delay profile */ + if ( best_multiplicity == UINT32_MAX && ps.try_no_late_arrival ) + { + if ( ps.support_reducing_only ) + { + start = std::max( 1u, num_vars - ps.lut_size ); + } - // generate_support_minimization_encodings(); - // solve_min_support_exact( isets, free_set_size ); + for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= 3; ++i ) + { + /* TODO: add shared set */ + auto evaluate_fn = [&]( STT const& tt ) { return column_multiplicity( tt, i ); }; + auto [tt_p, perm, cost] = enumerate_iset_combinations_offset( i, 0, evaluate_fn ); - /* unfeasible decomposition */ - // if ( best_bound_sets.empty() ) - // { - // return -1; - // } + /* additional cost if not support reducing */ + uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0; + /* check for feasible solution that improves the cost */ /* TODO: remove limit on cost */ + if ( cost <= ( 1 << ( ps.lut_size - i ) ) && cost + additional_cost < best_cost && cost < 10 ) + { + best_tt = tt_p; + permutations = perm; + best_multiplicity = cost; + best_cost = cost + additional_cost; + free_set_size = i; + } + } + } + + if ( best_multiplicity == UINT32_MAX ) + return -1; pst->num_luts = ps.lut_size - free_set_size; best_free_set = free_set_size; - /* TODO generate decomposition only when returning the result */ - // dec_result = generate_decomposition( free_set_size ); - - /* TODO: change return value */ return 0; } diff --git a/src/acd/ac_wrapper.cpp b/src/acd/ac_wrapper.cpp index aabe8e86f..821ab3f98 100644 --- a/src/acd/ac_wrapper.cpp +++ b/src/acd/ac_wrapper.cpp @@ -4,7 +4,7 @@ // ABC_NAMESPACE_IMPL_START -int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost ) +int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ) { using namespace mockturtle; @@ -17,6 +17,7 @@ int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, ac_decomposition_params ps; ps.lut_size = lutSize; + ps.try_no_late_arrival = static_cast( try_no_late_arrival ); ac_decomposition_stats st; ac_decomposition_impl acd( tt, nVars, ps, &st ); diff --git a/src/acd/ac_wrapper.h b/src/acd/ac_wrapper.h index cb22acf80..5e0af3787 100644 --- a/src/acd/ac_wrapper.h +++ b/src/acd/ac_wrapper.h @@ -12,7 +12,7 @@ extern "C" { #endif -int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost ); +int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ); int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ); #ifdef __cplusplus diff --git a/src/map/if/if.h b/src/map/if/if.h index 56f0bb7ed..05dc33949 100644 --- a/src/map/if/if.h +++ b/src/map/if/if.h @@ -700,7 +700,7 @@ extern int If_ManCountSpecialPos( If_Man_t * p ); extern void If_CutTraverse( If_Man_t * p, If_Obj_t * pRoot, If_Cut_t * pCut, Vec_Ptr_t * vNodes ); extern void If_ObjPrint( If_Obj_t * pObj ); -extern int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost ); +extern int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ); extern int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ); ABC_NAMESPACE_HEADER_END diff --git a/src/map/if/ifDelay.c b/src/map/if/ifDelay.c index fcd53e348..c1ecd7c08 100644 --- a/src/map/if/ifDelay.c +++ b/src/map/if/ifDelay.c @@ -471,12 +471,13 @@ int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, int best_delay ) } /* remove from critical set */ - if ( !use_late_arrival ) + if ( !use_late_arrival && nLeafMax > LutSize / 2 ) + { uLeafMask = 0; + } - word *pTruth = If_CutTruthW( p, pCut ); - int val = acd_evaluate( pTruth, pCut->nLeaves, LutSize, &uLeafMask, &cost ); + int val = acd_evaluate( pTruth, pCut->nLeaves, LutSize, &uLeafMask, &cost, !use_late_arrival ); /* not feasible decomposition */ pCut->acdDelay = uLeafMask; @@ -507,10 +508,8 @@ int If_AcdReEval( If_Man_t * p, If_Cut_t * pCut ) } // int LutSize = p->pPars->pLutStruct[0] - '0'; - int LutSize = 6; int i, leaf_delay; - int DelayMax = -1, nLeafMax = 0; - unsigned uLeafMask = 0; + int DelayMax = -1; for ( i = 0; i < If_CutLeaveNum(pCut); i++ ) { leaf_delay = If_ObjCutBest(If_CutLeaf(p, pCut, i))->Delay; From b77bdeeb173ef7799dc7a2b406ef7f1155ae3b5a Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 16 Nov 2023 19:21:29 +0100 Subject: [PATCH 08/24] Enabling ACD for area --- src/map/if/if.h | 2 +- src/map/if/ifDelay.c | 36 +++++++++++++++++++++++++----------- src/map/if/ifMap.c | 2 +- 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/map/if/if.h b/src/map/if/if.h index 05dc33949..b4d06edca 100644 --- a/src/map/if/if.h +++ b/src/map/if/if.h @@ -570,7 +570,7 @@ extern int If_CutSopBalancePinDelaysInt( Vec_Int_t * vCover, int * p extern int If_CutSopBalancePinDelays( If_Man_t * p, If_Cut_t * pCut, char * pPerm ); extern int If_CutLutBalanceEval( If_Man_t * p, If_Cut_t * pCut ); extern int If_CutLutBalancePinDelays( If_Man_t * p, If_Cut_t * pCut, char * pPerm ); -extern int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, int best_delay ); +extern int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay ); extern int If_AcdReEval( If_Man_t * p, If_Cut_t * pCut ); extern float If_AcdLeafProp( If_Man_t * p, If_Cut_t * pCut, int i, float required ); /*=== ifDsd.c =============================================================*/ diff --git a/src/map/if/ifDelay.c b/src/map/if/ifDelay.c index c1ecd7c08..75a0a0a66 100644 --- a/src/map/if/ifDelay.c +++ b/src/map/if/ifDelay.c @@ -412,7 +412,7 @@ int If_CutLutBalanceEval( If_Man_t * p, If_Cut_t * pCut ) } } -int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, int best_delay ) +int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay ) { pCut->fUser = 1; pCut->Cost = pCut->nLeaves > 1 ? 1 : 0; @@ -460,20 +460,34 @@ int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, int best_delay ) // } /* compute the decomposition */ - int use_late_arrival = DelayMax + 2 >= best_delay; + int use_late_arrival; unsigned cost = 1; - - /* TODO: have checks based on delay */ - if ( use_late_arrival && nLeafMax > LutSize / 2 ) + + if ( optDelay ) { - pCut->Cost = IF_COST_MAX; - return ABC_INFINITY; + /* checks based on delay: must be better than the previous best cut */ + use_late_arrival = DelayMax + 2 >= If_ObjCutBest(pObj)->Delay; } - - /* remove from critical set */ - if ( !use_late_arrival && nLeafMax > LutSize / 2 ) + else { - uLeafMask = 0; + /* checks based on delay: look at the required time */ + use_late_arrival = DelayMax + 2 > pObj->Required + p->fEpsilon; + } + + /* Too many late-arriving signals */ + if ( nLeafMax > LutSize / 2 ) + { + if ( use_late_arrival ) + { + /* unfeasible decomposition */ + pCut->Cost = IF_COST_MAX; + return ABC_INFINITY; + } + else + { + /* remove critical signals as not needed */ + uLeafMask = 0; + } } word *pTruth = If_CutTruthW( p, pCut ); diff --git a/src/map/if/ifMap.c b/src/map/if/ifMap.c index da83b5525..1455846f6 100644 --- a/src/map/if/ifMap.c +++ b/src/map/if/ifMap.c @@ -431,7 +431,7 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep pCut->Delay = If_CutDelayRecCost3( p, pCut, pObj ); else if ( p->pPars->fAcd ) { - pCut->Delay = If_AcdEval( p, pCut, fFirst ? ABC_INFINITY : (int) If_ObjCutBest(pObj)->Delay ); + pCut->Delay = If_AcdEval( p, pCut, pObj, Mode == 0 ); pCut->fUseless = pCut->Delay == ABC_INFINITY; } else if ( p->pPars->fUserSesLib ) From 1ca7a3a353dfce495a71d2f9d7abf3b3ffdddd71 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Fri, 17 Nov 2023 15:49:29 +0100 Subject: [PATCH 09/24] Remove symmetries in covering table --- src/acd/ac_decomposition.hpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp index 71caccff2..0faf46b91 100644 --- a/src/acd/ac_decomposition.hpp +++ b/src/acd/ac_decomposition.hpp @@ -150,7 +150,7 @@ public: /* additional cost if not support reducing */ uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0; /* check for feasible solution that improves the cost */ /* TODO: remove limit on cost */ - if ( cost <= ( 1 << ( ps.lut_size - i ) ) && cost + additional_cost < best_cost && cost < 10 ) + if ( cost <= ( 1 << ( ps.lut_size - i ) ) && cost + additional_cost < best_cost && cost < 12 ) { best_tt = tt_p; permutations = perm; @@ -194,7 +194,7 @@ public: if ( best_multiplicity == UINT32_MAX ) return -1; - pst->num_luts = ps.lut_size - free_set_size; + pst->num_luts = best_multiplicity <= 2 ? 2 : best_multiplicity <= 4 ? 3 : best_multiplicity <= 8 ? 4 : 5; best_free_set = free_set_size; return 0; @@ -845,10 +845,10 @@ private: void generate_support_minimization_encodings() { uint32_t count = 0; - uint32_t num_combs_exact[4] = { 2, 6, 70, 12870 }; + uint32_t num_combs_exact[4] = { 1, 3, 35, 6435 }; /* enable don't cares only if not a power of 2 */ - uint32_t num_combs = 3; + uint32_t num_combs = 2; if ( __builtin_popcount( best_multiplicity ) == 1 ) { for ( uint32_t i = 0; i < 4; ++i ) @@ -913,6 +913,12 @@ private: generate_support_minimization_encodings_rec( onset, offset, var + 1, count ); onset &= ~( 1 << var ); + /* remove symmetries */ + if ( var == 0 ) + { + return; + } + /* move var in OFFSET */ offset |= 1 << var; generate_support_minimization_encodings_rec( onset, offset, var + 1, count ); @@ -1181,7 +1187,12 @@ private: assert( best_multiplicity <= 16 ); /* determine the number of needed loops*/ - if ( best_multiplicity <= 4 ) + if ( best_multiplicity <= 2 ) + { + res[4] = 1; + res[0] = 0; + } + else if ( best_multiplicity <= 4 ) { res[4] = 2; for ( uint32_t i = 0; i < matrix.size() - 1; ++i ) From 3d602e2f00fb2fa1ea06f61c6ff1e0391232d439 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Fri, 17 Nov 2023 15:55:10 +0100 Subject: [PATCH 10/24] Adding sorting of columns in heuristic covering --- src/acd/ac_decomposition.hpp | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp index 0faf46b91..9fb17c764 100644 --- a/src/acd/ac_decomposition.hpp +++ b/src/acd/ac_decomposition.hpp @@ -997,7 +997,7 @@ private: best_bound_sets.clear(); /* create covering matrix */ - if ( !create_covering_matrix( isets, matrix, free_set_size, false ) ) + if ( !create_covering_matrix( isets, matrix, free_set_size, true ) ) { return; } @@ -1148,29 +1148,24 @@ private: sol_existance |= column; } - /* necessary condition for the existance of a solution */ - // if ( __builtin_popcountl( sol_existance ) != combinations ) - // { - // return false; - // } - if ( !sort ) { return true; } - std::sort( matrix.begin(), matrix.end(), [&]( auto const& a, auto const& b ) { - return a.sort_cost < b.sort_cost; - } ); - - /* print */ - // if ( best_multiplicity < 6 ) - // { - // for ( uint32_t i = 0; i < columns.size(); ++i ) - // { - // std::cout << indexes[i] << " " << costs[i] << " \t" << columns[i] << "\n"; - // } - // } + if constexpr ( UseHeuristic ) + { + std::sort( matrix.begin(), matrix.end(), [&]( auto const& a, auto const& b ) { + return a.cost < b.cost; + } ); + return true; + } + else + { + std::sort( matrix.begin(), matrix.end(), [&]( auto const& a, auto const& b ) { + return a.sort_cost < b.sort_cost; + } ); + } return true; } From 1d7dfd25c6d346e5e0849b8084250a1596160e38 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Fri, 17 Nov 2023 16:58:17 +0100 Subject: [PATCH 11/24] Improving ACD mapping --- src/acd/ac_decomposition.hpp | 4 +++- src/acd/ac_wrapper.cpp | 3 +-- src/base/abci/abcIf.c | 26 -------------------------- src/map/if/if.h | 2 +- src/map/if/ifCore.c | 25 ++++++++++++++++++------- src/map/if/ifDelay.c | 31 +++++++++++++++---------------- src/map/if/ifMap.c | 2 +- 7 files changed, 39 insertions(+), 54 deletions(-) diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp index 9fb17c764..bfcbf06e1 100644 --- a/src/acd/ac_decomposition.hpp +++ b/src/acd/ac_decomposition.hpp @@ -166,6 +166,7 @@ public: /* try without the delay profile */ if ( best_multiplicity == UINT32_MAX && ps.try_no_late_arrival ) { + delay_profile = 0; if ( ps.support_reducing_only ) { start = std::max( 1u, num_vars - ps.lut_size ); @@ -197,7 +198,8 @@ public: pst->num_luts = best_multiplicity <= 2 ? 2 : best_multiplicity <= 4 ? 3 : best_multiplicity <= 8 ? 4 : 5; best_free_set = free_set_size; - return 0; + /* return number of levels */ + return delay_profile == 0 ? 2 : 1; } int compute_decomposition() diff --git a/src/acd/ac_wrapper.cpp b/src/acd/ac_wrapper.cpp index 821ab3f98..6bb41ca37 100644 --- a/src/acd/ac_wrapper.cpp +++ b/src/acd/ac_wrapper.cpp @@ -22,7 +22,6 @@ int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, ac_decomposition_impl acd( tt, nVars, ps, &st ); int val = acd.run( *pdelay ); - // int val = acd.compute_decomposition(); if ( val < 0 ) { @@ -33,7 +32,7 @@ int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, *pdelay = acd.get_profile(); *cost = st.num_luts; - return 0; + return val; } int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ) diff --git a/src/base/abci/abcIf.c b/src/base/abci/abcIf.c index 079cd0066..b7d796273 100644 --- a/src/base/abci/abcIf.c +++ b/src/base/abci/abcIf.c @@ -458,34 +458,10 @@ Hop_Obj_t * Abc_NodeBuildFromMini( Hop_Man_t * pMan, If_Man_t * p, If_Cut_t * pC // get the delay profile unsigned delayProfile = pCutBest->acdDelay; - // If_Obj_t * pLeaf; - // int i, leafDelay; - // int DelayMax = -1, nLeafMax = 0; - // unsigned uLeafMask = 0; - - // If_CutForEachLeaf( pIfMan, pCutBest, pLeaf, i ) - // { - // leafDelay = If_ObjCutBest(pLeaf)->Delay; - - // if ( DelayMax < leafDelay ) - // { - // DelayMax = leafDelay; - // nLeafMax = 1; - // uLeafMask = (1 << i); - // } - // else if ( DelayMax == leafDelay ) - // { - // nLeafMax++; - // uLeafMask |= (1 << i); - // } - // } - // perform LUT-decomposition and return the LUT-structure unsigned char decompArray[92]; int val = acd_decompose( pTruth, pCutBest->nLeaves, 6, &(delayProfile), decompArray ); - assert( val == 0 ); - // assert( DelayMax + 2 >= pCutBest->Delay ); // convert the LUT-structure into a set of logic nodes in Abc_Ntk_t unsigned char bytes_check = decompArray[0]; @@ -561,8 +537,6 @@ Hop_Obj_t * Abc_NodeBuildFromMini( Hop_Man_t * pMan, If_Man_t * p, If_Cut_t * pC /* check correct read */ assert( byte_p == decompArray[0] ); - - // this is a placeholder, which takes the truth table and converts it into an AIG without LUT-decomposition } /**Function************************************************************* diff --git a/src/map/if/if.h b/src/map/if/if.h index b4d06edca..c3ba59be4 100644 --- a/src/map/if/if.h +++ b/src/map/if/if.h @@ -570,7 +570,7 @@ extern int If_CutSopBalancePinDelaysInt( Vec_Int_t * vCover, int * p extern int If_CutSopBalancePinDelays( If_Man_t * p, If_Cut_t * pCut, char * pPerm ); extern int If_CutLutBalanceEval( If_Man_t * p, If_Cut_t * pCut ); extern int If_CutLutBalancePinDelays( If_Man_t * p, If_Cut_t * pCut, char * pPerm ); -extern int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay ); +extern int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay, int fFirst ); extern int If_AcdReEval( If_Man_t * p, If_Cut_t * pCut ); extern float If_AcdLeafProp( If_Man_t * p, If_Cut_t * pCut, int i, float required ); /*=== ifDsd.c =============================================================*/ diff --git a/src/map/if/ifCore.c b/src/map/if/ifCore.c index a8e482912..ad3c85179 100644 --- a/src/map/if/ifCore.c +++ b/src/map/if/ifCore.c @@ -128,16 +128,11 @@ int If_ManPerformMappingComb( If_Man_t * p ) if ( p->pPars->fPreprocess && !p->pPars->fArea ) { // map for delay - If_ManPerformMappingRound( p, p->pPars->nCutsMax, 0, 1, 1, "Delay" ); - if ( p->pPars->fAcd ) - { - // p->pPars->nLutSize = oldLutSize; p->useLimitAdc = 0; - If_ManPerformMappingRound( p, p->pPars->nCutsMax, 0, 1, 0, "Delay" ); + If_ManPerformMappingRound( p, p->pPars->nCutsMax, 0, 1, 1, "Delay" ); + if ( p->pPars->fAcd ) p->useLimitAdc = 1; - // p->pPars->nLutSize = 6; - } // map for delay second option p->pPars->fFancy = 1; @@ -160,17 +155,33 @@ int If_ManPerformMappingComb( If_Man_t * p ) // area flow oriented mapping for ( i = 0; i < p->pPars->nFlowIters; i++ ) { + // if ( p->pPars->fAcd && i == 0 ) + // { + // p->useLimitAdc = 0; + // } If_ManPerformMappingRound( p, p->pPars->nCutsMax, 1, 0, 0, "Flow" ); if ( p->pPars->fExpRed ) If_ManImproveMapping( p ); + // if ( p->pPars->fAcd && i == 0 ) + // { + // p->useLimitAdc = 1; + // } } // area oriented mapping for ( i = 0; i < p->pPars->nAreaIters; i++ ) { + // if ( p->pPars->fAcd && i == 0 ) + // { + // p->useLimitAdc = 0; + // } If_ManPerformMappingRound( p, p->pPars->nCutsMax, 2, 0, 0, "Area" ); if ( p->pPars->fExpRed ) If_ManImproveMapping( p ); + // if ( p->pPars->fAcd && i == 0 ) + // { + // p->useLimitAdc = 1; + // } } if ( p->pPars->fVerbose ) diff --git a/src/map/if/ifDelay.c b/src/map/if/ifDelay.c index 75a0a0a66..90fab6b00 100644 --- a/src/map/if/ifDelay.c +++ b/src/map/if/ifDelay.c @@ -412,7 +412,7 @@ int If_CutLutBalanceEval( If_Man_t * p, If_Cut_t * pCut ) } } -int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay ) +int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay, int fFirst ) { pCut->fUser = 1; pCut->Cost = pCut->nLeaves > 1 ? 1 : 0; @@ -428,7 +428,6 @@ int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay ) return (int)If_ObjCutBest(If_CutLeaf(p, pCut, 0))->Delay; } - // int LutSize = p->pPars->pLutStruct[0] - '0'; int LutSize = 6; int i, leaf_delay; int DelayMax = -1, nLeafMax = 0; @@ -454,24 +453,23 @@ int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay ) pCut->acdDelay = ( 1 << LutSize ) - 1; return DelayMax + 1; } - // else if ( DelayMax + 1 >= best_delay ) - // { - // return DelayMax + 2; - // } /* compute the decomposition */ - int use_late_arrival; + int use_late_arrival = 0; unsigned cost = 1; - if ( optDelay ) + if ( !fFirst ) { - /* checks based on delay: must be better than the previous best cut */ - use_late_arrival = DelayMax + 2 >= If_ObjCutBest(pObj)->Delay; - } - else - { - /* checks based on delay: look at the required time */ - use_late_arrival = DelayMax + 2 > pObj->Required + p->fEpsilon; + if ( optDelay ) + { + /* checks based on delay: must be better than the previous best cut */ + use_late_arrival = DelayMax + 2 >= If_ObjCutBest(pObj)->Delay; + } + else + { + /* checks based on delay: look at the required time */ + use_late_arrival = DelayMax + 2 > pObj->Required + p->fEpsilon; + } } /* Too many late-arriving signals */ @@ -490,6 +488,7 @@ int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay ) } } + /* returns the delay of the decomposition */ word *pTruth = If_CutTruthW( p, pCut ); int val = acd_evaluate( pTruth, pCut->nLeaves, LutSize, &uLeafMask, &cost, !use_late_arrival ); @@ -503,7 +502,7 @@ int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay ) pCut->Cost = cost; - return DelayMax + ( use_late_arrival ? 1 : 2 ); + return DelayMax + val; } int If_AcdReEval( If_Man_t * p, If_Cut_t * pCut ) diff --git a/src/map/if/ifMap.c b/src/map/if/ifMap.c index 1455846f6..69f2ead81 100644 --- a/src/map/if/ifMap.c +++ b/src/map/if/ifMap.c @@ -431,7 +431,7 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep pCut->Delay = If_CutDelayRecCost3( p, pCut, pObj ); else if ( p->pPars->fAcd ) { - pCut->Delay = If_AcdEval( p, pCut, pObj, Mode == 0 ); + pCut->Delay = If_AcdEval( p, pCut, pObj, Mode == 0, fFirst ); pCut->fUseless = pCut->Delay == ABC_INFINITY; } else if ( p->pPars->fUserSesLib ) From f7a520b9571aab110781b9fdf4093da60fe16b6d Mon Sep 17 00:00:00 2001 From: aletempiac Date: Sun, 19 Nov 2023 18:51:50 +0100 Subject: [PATCH 12/24] restructuring code --- src/acd/ac_decomposition.hpp | 125 +++++++++++++++++++++++------------ 1 file changed, 84 insertions(+), 41 deletions(-) diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp index bfcbf06e1..c9b1b2453 100644 --- a/src/acd/ac_decomposition.hpp +++ b/src/acd/ac_decomposition.hpp @@ -39,6 +39,7 @@ #include #include #include +#include #include #include "kitty_constants.hpp" @@ -141,11 +142,16 @@ public: start = std::max( start, num_vars - ps.lut_size ); } + std::function column_multiplicity_fn[3] = { + [this]( STT const& tt ) { return column_multiplicity<1u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity<2u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity<3u>( tt ); } + }; + for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= 3; ++i ) { /* TODO: add shared set */ - auto evaluate_fn = [&]( STT const& tt ) { return column_multiplicity( tt, i ); }; - auto [tt_p, perm, cost] = enumerate_iset_combinations_offset( i, offset, evaluate_fn ); + auto [tt_p, perm, cost] = enumerate_iset_combinations_offset( i, offset, column_multiplicity_fn[i - 1] ); /* additional cost if not support reducing */ uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0; @@ -175,8 +181,7 @@ public: for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= 3; ++i ) { /* TODO: add shared set */ - auto evaluate_fn = [&]( STT const& tt ) { return column_multiplicity( tt, i ); }; - auto [tt_p, perm, cost] = enumerate_iset_combinations_offset( i, 0, evaluate_fn ); + auto [tt_p, perm, cost] = enumerate_iset_combinations_offset( i, 0, column_multiplicity_fn[i - 1] ); /* additional cost if not support reducing */ uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0; @@ -262,59 +267,33 @@ public: } private: - uint32_t column_multiplicity( STT tt, uint32_t free_set_size ) + template + uint32_t column_multiplicity( STT tt ) { uint64_t multiplicity_set[4] = { 0u, 0u, 0u, 0u }; uint32_t multiplicity = 0; uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t constexpr masks_bits[] = { 0x0, 0x3, 0xF, 0x3F }; + uint64_t constexpr masks_idx[] = { 0x0, 0x0, 0x0, 0x3 }; /* supports up to 64 values of free set (256 for |FS| == 3)*/ assert( free_set_size <= 3 ); /* extract iset functions */ - if ( free_set_size == 1 ) + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) { - auto it = std::begin( tt ); - for ( auto i = 0u; i < num_blocks; ++i ) + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) { - for ( auto j = 0; j < 32; ++j ) - { - multiplicity_set[0] |= UINT64_C( 1 ) << ( *it & 0x3 ); - *it >>= 2; - } - ++it; - } - } - else if ( free_set_size == 2 ) - { - auto it = std::begin( tt ); - for ( auto i = 0u; i < num_blocks; ++i ) - { - for ( auto j = 0; j < 16; ++j ) - { - multiplicity_set[0] |= UINT64_C( 1 ) << ( *it & 0xF ); - *it >>= 4; - } - ++it; - } - } - else /* free set size 3 */ - { - auto it = std::begin( tt ); - for ( auto i = 0u; i < num_blocks; ++i ) - { - for ( auto j = 0; j < 8; ++j ) - { - multiplicity_set[( *it >> 6 ) & 0x3] |= UINT64_C( 1 ) << ( *it & 0x3F ); - *it >>= 8; - } - ++it; + multiplicity_set[( *it >> 6 ) & masks_idx[free_set_size]] |= UINT64_C( 1 ) << ( *it & masks_bits[free_set_size] ); + *it >>= ( 1u << free_set_size ); } + ++it; } multiplicity = __builtin_popcountl( multiplicity_set[0] ); - if ( free_set_size == 3 ) + if constexpr ( free_set_size == 3 ) { multiplicity += __builtin_popcountl( multiplicity_set[1] ); multiplicity += __builtin_popcountl( multiplicity_set[2] ); @@ -324,6 +303,70 @@ private: return multiplicity; } + // uint32_t column_multiplicity2( STT tt, uint32_t free_set_size ) + // { + // uint64_t multiplicity_set[4] = { 0u, 0u, 0u, 0u }; + // uint32_t multiplicity = 0; + // uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + + // /* supports up to 64 values of free set (256 for |FS| == 3)*/ + // assert( free_set_size <= 5 ); + + // std::unordered_set column_to_iset; + + // /* extract iset functions */ + // if ( free_set_size == 1 ) + // { + // auto it = std::begin( tt ); + // for ( auto i = 0u; i < num_blocks; ++i ) + // { + // for ( auto j = 0; j < 32; ++j ) + // { + // multiplicity_set[0] |= UINT64_C( 1 ) << ( *it & 0x3 ); + // *it >>= 2; + // } + // ++it; + // } + // } + // else if ( free_set_size == 2 ) + // { + // auto it = std::begin( tt ); + // for ( auto i = 0u; i < num_blocks; ++i ) + // { + // for ( auto j = 0; j < 16; ++j ) + // { + // multiplicity_set[0] |= UINT64_C( 1 ) << ( *it & 0xF ); + // *it >>= 4; + // } + // ++it; + // } + // } + // else /* free set size 3 */ + // { + // auto it = std::begin( tt ); + // for ( auto i = 0u; i < num_blocks; ++i ) + // { + // for ( auto j = 0; j < 8; ++j ) + // { + // multiplicity_set[( *it >> 6 ) & 0x3] |= UINT64_C( 1 ) << ( *it & 0x3F ); + // *it >>= 8; + // } + // ++it; + // } + // } + + // multiplicity = __builtin_popcountl( multiplicity_set[0] ); + + // if ( free_set_size == 3 ) + // { + // multiplicity += __builtin_popcountl( multiplicity_set[1] ); + // multiplicity += __builtin_popcountl( multiplicity_set[2] ); + // multiplicity += __builtin_popcountl( multiplicity_set[3] ); + // } + + // return multiplicity; + // } + template std::tuple, uint32_t> enumerate_iset_combinations( uint32_t free_set_size, Fn&& fn, bool verbose = false ) { From 672fd1b629ede5d67484f73ca9f0f1830fd92322 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Sun, 19 Nov 2023 18:53:54 +0100 Subject: [PATCH 13/24] removing not used methods --- src/acd/ac_decomposition.hpp | 165 ----------------------------------- 1 file changed, 165 deletions(-) diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp index c9b1b2453..2714f7a8e 100644 --- a/src/acd/ac_decomposition.hpp +++ b/src/acd/ac_decomposition.hpp @@ -367,171 +367,6 @@ private: // return multiplicity; // } - template - std::tuple, uint32_t> enumerate_iset_combinations( uint32_t free_set_size, Fn&& fn, bool verbose = false ) - { - /* works up to 16 input truth tables */ - assert( num_vars <= 16 ); - - /* special case */ - STT tt = best_tt; - if ( num_vars <= free_set_size || free_set_size == 0 ) - { - return { tt, permutations, UINT32_MAX }; - } - - /* select k */ - // free_set_size = std::min( free_set_size, num_vars - free_set_size ); - - /* init permutation array */ - std::array perm, best_perm; - std::copy( permutations.begin(), permutations.begin() + num_vars, perm.begin() ); - best_perm = perm; - - /* TT with best cost */ - STT best = tt; - uint32_t best_cost = UINT32_MAX; - - /* enumerate combinations */ - if ( free_set_size == 1 ) - { - uint32_t cost = fn( tt ); - if ( cost < best_cost ) - { - best = tt; - best_cost = cost; - best_perm = perm; - } - - if ( verbose ) - { - kitty::print_hex( tt ); - std::cout << " " << cost << " "; - print_perm( perm.begin(), free_set_size ); - } - - for ( uint32_t i = 1; i < num_vars; ++i ) - { - std::swap( perm[0], perm[i] ); - kitty::swap_inplace( tt, 0, i ); - - uint32_t cost = fn( tt ); - if ( cost < best_cost ) - { - best = tt; - best_cost = cost; - best_perm = perm; - } - - if ( verbose ) - { - kitty::print_hex( tt ); - std::cout << " " << cost << " "; - print_perm( perm.begin(), free_set_size ); - } - } - } - else if ( free_set_size == 2 ) - { - for ( uint32_t i = 0; i < num_vars - 1; ++i ) - { - uint32_t cost = fn( tt ); - if ( cost < best_cost ) - { - best = tt; - best_cost = cost; - best_perm = perm; - } - - if ( verbose ) - { - kitty::print_hex( tt ); - std::cout << " " << cost << " "; - print_perm( perm.begin(), free_set_size ); - } - - for ( uint32_t j = 2; j < num_vars - i; ++j ) - { - std::swap( perm[1], perm[j] ); - kitty::swap_inplace( tt, 1, j ); - - uint32_t cost = fn( tt ); - if ( cost < best_cost ) - { - best = tt; - best_cost = cost; - best_perm = perm; - } - - if ( verbose ) - { - kitty::print_hex( tt ); - std::cout << " " << cost << " "; - print_perm( perm.begin(), free_set_size ); - } - } - - std::swap( perm[0], perm[num_vars - i - 1] ); - kitty::swap_inplace( tt, 0, num_vars - i - 1 ); - } - } - else if ( free_set_size == 3 ) - { - for ( uint32_t i = 0; i < num_vars - 2; ++i ) - { - for ( uint32_t j = i; j < num_vars - 2; ++j ) - { - uint32_t cost = fn( tt ); - if ( cost < best_cost ) - { - best = tt; - best_cost = cost; - best_perm = perm; - } - - if ( verbose ) - { - kitty::print_hex( tt ); - std::cout << " " << cost << " "; - print_perm( perm.begin(), free_set_size ); - } - - for ( uint32_t k = 3; k < num_vars - j; ++k ) - { - std::swap( perm[2], perm[k] ); - kitty::swap_inplace( tt, 2, k ); - - uint32_t cost = fn( tt ); - if ( cost < best_cost ) - { - best = tt; - best_cost = cost; - best_perm = perm; - } - - if ( verbose ) - { - kitty::print_hex( tt ); - std::cout << " " << cost << " "; - print_perm( perm.begin(), free_set_size ); - } - } - - std::swap( perm[1], perm[num_vars - j - 1] ); - kitty::swap_inplace( tt, 1, num_vars - j - 1 ); - } - - std::swap( perm[0], perm[num_vars - i - 1] ); - kitty::swap_inplace( tt, 0, num_vars - i - 1 ); - } - } - - std::vector res_perm( num_vars ); - std::copy( best_perm.begin(), best_perm.begin() + num_vars, res_perm.begin() ); - - return std::make_tuple( best, res_perm, best_cost ); - } - inline bool combinations_offset_next( uint32_t k, uint32_t offset, uint32_t *pComb, uint32_t *pInvPerm, STT& tt ) { uint32_t i; From 219d6d86d6dd2e6115048d5266a8da008ac57fd9 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Sun, 19 Nov 2023 19:33:19 +0100 Subject: [PATCH 14/24] Simplifying code --- src/acd/ac_decomposition.hpp | 196 ++++++++++++----------------------- 1 file changed, 65 insertions(+), 131 deletions(-) diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp index 2714f7a8e..6a7a90a8d 100644 --- a/src/acd/ac_decomposition.hpp +++ b/src/acd/ac_decomposition.hpp @@ -88,7 +88,7 @@ class ac_decomposition_impl private: struct encoding_matrix { - uint64_t column{ 0 }; + uint64_t column; uint32_t cost{ 0 }; uint32_t index{ 0 }; float sort_cost{ 0 }; @@ -142,10 +142,12 @@ public: start = std::max( start, num_vars - ps.lut_size ); } - std::function column_multiplicity_fn[3] = { + std::function column_multiplicity_fn[5] = { [this]( STT const& tt ) { return column_multiplicity<1u>( tt ); }, [this]( STT const& tt ) { return column_multiplicity<2u>( tt ); }, - [this]( STT const& tt ) { return column_multiplicity<3u>( tt ); } + [this]( STT const& tt ) { return column_multiplicity<3u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity4<4u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity5<5u>( tt ); } }; for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= 3; ++i ) @@ -277,7 +279,7 @@ private: uint64_t constexpr masks_idx[] = { 0x0, 0x0, 0x0, 0x3 }; /* supports up to 64 values of free set (256 for |FS| == 3)*/ - assert( free_set_size <= 3 ); + static_assert( free_set_size <= 3 ); /* extract iset functions */ auto it = std::begin( tt ); @@ -303,69 +305,55 @@ private: return multiplicity; } - // uint32_t column_multiplicity2( STT tt, uint32_t free_set_size ) - // { - // uint64_t multiplicity_set[4] = { 0u, 0u, 0u, 0u }; - // uint32_t multiplicity = 0; - // uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + template + uint32_t column_multiplicity4( STT tt ) + { + unsigned char multiplicity_set[1 << 16] = { 0 }; + uint32_t multiplicity = 0; + uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF }; - // /* supports up to 64 values of free set (256 for |FS| == 3)*/ - // assert( free_set_size <= 5 ); + static_assert( free_set_size <= 4 ); - // std::unordered_set column_to_iset; + /* extract iset functions */ + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + { + multiplicity += multiplicity_set[*it & masks[free_set_size]]++ == 0 ? 1 : 0; + *it >>= ( 1u << free_set_size ); + } + ++it; + } - // /* extract iset functions */ - // if ( free_set_size == 1 ) - // { - // auto it = std::begin( tt ); - // for ( auto i = 0u; i < num_blocks; ++i ) - // { - // for ( auto j = 0; j < 32; ++j ) - // { - // multiplicity_set[0] |= UINT64_C( 1 ) << ( *it & 0x3 ); - // *it >>= 2; - // } - // ++it; - // } - // } - // else if ( free_set_size == 2 ) - // { - // auto it = std::begin( tt ); - // for ( auto i = 0u; i < num_blocks; ++i ) - // { - // for ( auto j = 0; j < 16; ++j ) - // { - // multiplicity_set[0] |= UINT64_C( 1 ) << ( *it & 0xF ); - // *it >>= 4; - // } - // ++it; - // } - // } - // else /* free set size 3 */ - // { - // auto it = std::begin( tt ); - // for ( auto i = 0u; i < num_blocks; ++i ) - // { - // for ( auto j = 0; j < 8; ++j ) - // { - // multiplicity_set[( *it >> 6 ) & 0x3] |= UINT64_C( 1 ) << ( *it & 0x3F ); - // *it >>= 8; - // } - // ++it; - // } - // } + return multiplicity; + } - // multiplicity = __builtin_popcountl( multiplicity_set[0] ); + template + uint32_t column_multiplicity5( STT tt ) + { + uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF }; - // if ( free_set_size == 3 ) - // { - // multiplicity += __builtin_popcountl( multiplicity_set[1] ); - // multiplicity += __builtin_popcountl( multiplicity_set[2] ); - // multiplicity += __builtin_popcountl( multiplicity_set[3] ); - // } + std::unordered_set multiplicity_set; - // return multiplicity; - // } + static_assert( free_set_size <= 5 ); + + /* extract iset functions */ + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + { + multiplicity_set.insert( *it & masks[free_set_size] ); + *it >>= ( 1u << free_set_size ); + } + ++it; + } + + return static_cast( multiplicity_set.size() ); + } inline bool combinations_offset_next( uint32_t k, uint32_t offset, uint32_t *pComb, uint32_t *pInvPerm, STT& tt ) { @@ -459,84 +447,30 @@ private: STT tt = best_tt; uint32_t offset = 0; uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF }; - if ( free_set_size == 1 ) + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) { - auto it = std::begin( tt ); - for ( auto i = 0u; i < num_blocks; ++i ) + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) { - for ( auto j = 0; j < 32; ++j ) + uint64_t val = *it & masks[free_set_size]; + + if ( auto el = column_to_iset.find( val ); el != column_to_iset.end() ) { - uint64_t val = *it & 0x3; - - if ( auto el = column_to_iset.find( val ); el != column_to_iset.end() ) - { - isets[el->second]._bits[i / 2] |= UINT64_C( 1 ) << ( j + offset ); - } - else - { - isets[column_to_iset.size()]._bits[i / 2] |= UINT64_C( 1 ) << ( j + offset ); - column_to_iset[val] = column_to_iset.size(); - } - - *it >>= 2; + isets[el->second]._bits[i / ( 1u << free_set_size )] |= UINT64_C( 1 ) << ( j + offset ); + } + else + { + isets[column_to_iset.size()]._bits[i / ( 1u << free_set_size )] |= UINT64_C( 1 ) << ( j + offset ); + column_to_iset[val] = column_to_iset.size(); } - offset ^= 32; - ++it; + *it >>= ( 1u << free_set_size ); } - } - else if ( free_set_size == 2 ) - { - auto it = std::begin( tt ); - for ( auto i = 0u; i < num_blocks; ++i ) - { - for ( auto j = 0; j < 16; ++j ) - { - uint64_t val = *it & 0xF; - if ( auto el = column_to_iset.find( val ); el != column_to_iset.end() ) - { - isets[el->second]._bits[i / 4] |= UINT64_C( 1 ) << ( j + offset ); - } - else - { - isets[column_to_iset.size()]._bits[i / 4] |= UINT64_C( 1 ) << ( j + offset ); - column_to_iset[val] = column_to_iset.size(); - } - - *it >>= 4; - } - - offset = ( offset + 16 ) % 64; - ++it; - } - } - else /* free set size 3 */ - { - auto it = std::begin( tt ); - for ( auto i = 0u; i < num_blocks; ++i ) - { - for ( auto j = 0; j < 8; ++j ) - { - uint64_t val = *it & 0xFF; - - if ( auto el = column_to_iset.find( val ); el != column_to_iset.end() ) - { - isets[el->second]._bits[i / 8] |= UINT64_C( 1 ) << ( j + offset ); - } - else - { - isets[column_to_iset.size()]._bits[i / 8] |= UINT64_C( 1 ) << ( j + offset ); - column_to_iset[val] = column_to_iset.size(); - } - - *it >>= 8; - } - - offset = ( offset + 8 ) % 64; - ++it; - } + offset = ( offset + ( 64 >> free_set_size ) ) % 64; + ++it; } /* extend isets to cover the whole truth table */ From d10d450f38fa1a4e01b4343fd5afba006aea5963 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Sun, 19 Nov 2023 21:59:40 +0100 Subject: [PATCH 15/24] Final implementation --- src/acd/ac_decomposition.hpp | 77 ++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 35 deletions(-) diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp index 6a7a90a8d..0a05552e2 100644 --- a/src/acd/ac_decomposition.hpp +++ b/src/acd/ac_decomposition.hpp @@ -58,6 +58,9 @@ struct ac_decomposition_params /*! \brief LUT size for decomposition. */ uint32_t lut_size{ 6 }; + /*! \brief Perform decomposition if support reducing. */ + uint32_t max_free_set_vars{ 4 }; + /*! \brief Maximum number of iterations for covering. */ uint32_t max_iter{ 5000 }; @@ -88,14 +91,14 @@ class ac_decomposition_impl private: struct encoding_matrix { - uint64_t column; + uint64_t column[2]; uint32_t cost{ 0 }; uint32_t index{ 0 }; float sort_cost{ 0 }; }; private: - static constexpr uint32_t max_num_vars = 9; + static constexpr uint32_t max_num_vars = 10; using STT = kitty::static_truth_table; public: @@ -118,7 +121,7 @@ public: uint32_t late_arriving = __builtin_popcount( delay_profile ); /* return a high cost if too many late arriving variables */ - if ( late_arriving > ps.lut_size / 2 || late_arriving > 3 ) + if ( late_arriving > ps.lut_size - 1 || late_arriving > ps.max_free_set_vars ) { return -1; } @@ -150,15 +153,15 @@ public: [this]( STT const& tt ) { return column_multiplicity5<5u>( tt ); } }; - for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= 3; ++i ) + for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i ) { /* TODO: add shared set */ auto [tt_p, perm, cost] = enumerate_iset_combinations_offset( i, offset, column_multiplicity_fn[i - 1] ); /* additional cost if not support reducing */ uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0; - /* check for feasible solution that improves the cost */ /* TODO: remove limit on cost */ - if ( cost <= ( 1 << ( ps.lut_size - i ) ) && cost + additional_cost < best_cost && cost < 12 ) + /* check for feasible solution that improves the cost */ + if ( cost <= ( 1 << ( ps.lut_size - i ) ) && cost + additional_cost < best_cost && cost <= 16 ) { best_tt = tt_p; permutations = perm; @@ -180,15 +183,15 @@ public: start = std::max( 1u, num_vars - ps.lut_size ); } - for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= 3; ++i ) + for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i ) { /* TODO: add shared set */ auto [tt_p, perm, cost] = enumerate_iset_combinations_offset( i, 0, column_multiplicity_fn[i - 1] ); /* additional cost if not support reducing */ uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0; - /* check for feasible solution that improves the cost */ /* TODO: remove limit on cost */ - if ( cost <= ( 1 << ( ps.lut_size - i ) ) && cost + additional_cost < best_cost && cost < 10 ) + /* check for feasible solution that improves the cost */ + if ( cost <= ( 1 << ( ps.lut_size - i ) ) && cost + additional_cost < best_cost && cost <= 16 ) { best_tt = tt_p; permutations = perm; @@ -220,7 +223,7 @@ public: generate_support_minimization_encodings(); /* always solves exactly for power of 2 */ - if ( __builtin_popcount( best_multiplicity ) == 1 ) + if ( __builtin_popcount( best_multiplicity ) == 1 && best_multiplicity <= 8 ) solve_min_support_exact( isets, best_free_set ); else solve_min_support_heuristic( isets, best_free_set ); @@ -876,9 +879,8 @@ private: template bool create_covering_matrix( std::vector const& isets, std::vector& matrix, uint32_t free_set_size, bool sort ) { - assert( best_multiplicity < 12 ); + assert( best_multiplicity <= 16 ); uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2; - uint64_t sol_existance = 0; uint32_t iset_support = num_vars - free_set_size; /* insert dichotomies */ @@ -897,7 +899,7 @@ private: } /* compute function and distinguishable seed dichotomies */ - uint64_t column = 0; + uint64_t column[2] = { 0, 0 }; STT tt; STT care; uint32_t pair_pointer = 0; @@ -921,7 +923,7 @@ private: /* if is are in diffent sets */ if ( ( ( ( onset_shift & ( offset >> k ) ) | ( ( onset >> k ) & offset_shift ) ) & 1 ) ) { - column |= UINT64_C( 1 ) << ( pair_pointer ); + column[pair_pointer >> 6u] |= UINT64_C( 1 ) << ( pair_pointer & 0x3F ); } ++pair_pointer; @@ -949,17 +951,15 @@ private: float sort_cost = 0; if constexpr ( UseHeuristic ) { - sort_cost = 1.0f / ( __builtin_popcountl( column ) ); + sort_cost = 1.0f / ( __builtin_popcountl( column[0] ) + __builtin_popcountl( column[1] ) ); } else { - sort_cost = cost + ( ( combinations - __builtin_popcountl( column ) ) << num_vars ); + sort_cost = cost + ( ( combinations - __builtin_popcountl( column[0] + __builtin_popcountl( column[1] ) ) ) << num_vars ); } /* insert */ - matrix.emplace_back( encoding_matrix{ column, cost, i, sort_cost } ); - - sol_existance |= column; + matrix.emplace_back( encoding_matrix{ { column[0], column[1] }, cost, i, sort_cost } ); } if ( !sort ) @@ -1013,7 +1013,7 @@ private: continue; /* check validity */ - if ( __builtin_popcountl( matrix[i].column | matrix[j].column ) == combinations ) + if ( __builtin_popcountl( matrix[i].column[0] | matrix[j].column[0] ) + __builtin_popcountl( matrix[i].column[1] | matrix[j].column[1] ) == combinations ) { res[0] = i; res[1] = j; @@ -1045,7 +1045,8 @@ private: for ( uint32_t j = 1; j < matrix.size() - 1 && looping; ++j ) { - uint64_t current_columns = matrix[i].column | matrix[j].column; + uint64_t current_columns0 = matrix[i].column[0] | matrix[j].column[0]; + uint64_t current_columns1 = matrix[i].column[1] | matrix[j].column[1]; uint32_t current_cost = matrix[i].cost + matrix[j].cost; /* limit */ @@ -1093,7 +1094,7 @@ private: continue; /* check validity */ - if ( __builtin_popcountl( current_columns | matrix[k].column ) == combinations ) + if ( __builtin_popcountl( current_columns0 | matrix[k].column[0] ) + __builtin_popcountl( current_columns1 | matrix[k].column[1] ) == combinations ) { res[0] = i; res[1] = j; @@ -1127,7 +1128,8 @@ private: for ( uint32_t j = 1; j < matrix.size() - 2 && looping; ++j ) { - uint64_t current_columns0 = matrix[i].column | matrix[j].column; + uint64_t current_columns0 = matrix[i].column[0] | matrix[j].column[0]; + uint64_t current_columns1 = matrix[i].column[1] | matrix[j].column[1]; uint32_t current_cost0 = matrix[i].cost + matrix[j].cost; /* limit */ @@ -1154,7 +1156,8 @@ private: for ( uint32_t k = 2; k < matrix.size() - 1 && looping; ++k ) { - uint64_t current_columns1 = current_columns0 | matrix[k].column; + uint64_t current_columns00 = current_columns0 | matrix[k].column[0]; + uint64_t current_columns11 = current_columns1 | matrix[k].column[1]; uint32_t current_cost1 = current_cost0 + matrix[k].cost; /* limit */ @@ -1202,7 +1205,7 @@ private: continue; /* check validity */ - if ( __builtin_popcountl( current_columns1 | matrix[t].column ) == combinations ) + if ( __builtin_popcountl( current_columns00 | matrix[t].column[0] ) + __builtin_popcountl( current_columns11 | matrix[t].column[1] ) == combinations ) { res[0] = i; res[1] = j; @@ -1224,7 +1227,7 @@ private: /* last value of res contains the size of the bound set */ std::array res = { UINT32_MAX }; uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2; - uint64_t column = 0; + uint64_t column0 = 0, column1 = 0; uint32_t best = 0; float best_cost = std::numeric_limits::max(); @@ -1238,20 +1241,21 @@ private: } /* select */ - column = matrix[best].column; + column0 = matrix[best].column[0]; + column1 = matrix[best].column[1]; std::swap( matrix[0], matrix[best] ); /* get max number of BS's */ uint32_t iter = 1; - while ( iter < ps.lut_size - best_free_set && __builtin_popcountl( column ) != combinations ) + while ( iter < ps.lut_size - best_free_set && __builtin_popcountl( column0 ) + __builtin_popcountl( column1 ) != combinations ) { /* select column that minimizes the cost */ best = 0; best_cost = std::numeric_limits::max(); for ( uint32_t i = iter; i < matrix.size(); ++i ) { - float local_cost = 1.0f / __builtin_popcountl( matrix[i].column & ~column ); + float local_cost = 1.0f / ( __builtin_popcountl( matrix[i].column[0] & ~column0 ) + __builtin_popcountl( matrix[i].column[1] & ~column1 ) ); if ( local_cost < best_cost ) { best = i; @@ -1259,12 +1263,13 @@ private: } } - column |= matrix[best].column; + column0 |= matrix[best].column[0]; + column1 |= matrix[best].column[1]; std::swap( matrix[iter], matrix[best] ); ++iter; } - if ( __builtin_popcountl( column ) == combinations ) + if ( __builtin_popcountl( column0 ) + __builtin_popcountl( column1 ) == combinations ) { for ( uint32_t i = 0; i < iter; ++i ) { @@ -1290,24 +1295,26 @@ private: best_cost += matrix[solution[i]].cost; } - uint64_t column; + uint64_t column0, column1; for ( uint32_t i = 0; i < num_elements; ++i ) { /* remove element i */ local_cost = 0; - column = 0; + column0 = 0; + column1 = 0; for ( uint32_t j = 0; j < num_elements; ++j ) { if ( j == i ) continue; local_cost += matrix[solution[j]].cost; - column |= matrix[solution[j]].column; + column0 |= matrix[solution[j]].column[0]; + column1 |= matrix[solution[j]].column[1]; } /* search for a better replecemnts */ for ( uint32_t j = 0; j < matrix.size(); ++j ) { - if ( __builtin_popcount( column | matrix[j].column ) != combinations ) + if ( __builtin_popcount( column0 | matrix[j].column[0] ) + __builtin_popcount( column1 | matrix[j].column[1] ) != combinations ) continue; if ( local_cost + matrix[j].cost < best_cost ) { From acdd08fd9bc6792831fa67a2df9e8e1cfaf15d98 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Tue, 21 Nov 2023 11:47:56 +0100 Subject: [PATCH 16/24] Performance improvements --- src/acd/ac_decomposition.hpp | 39 ++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp index 0a05552e2..3112e4c20 100644 --- a/src/acd/ac_decomposition.hpp +++ b/src/acd/ac_decomposition.hpp @@ -59,7 +59,7 @@ struct ac_decomposition_params uint32_t lut_size{ 6 }; /*! \brief Perform decomposition if support reducing. */ - uint32_t max_free_set_vars{ 4 }; + uint32_t max_free_set_vars{ 5 }; /*! \brief Maximum number of iterations for covering. */ uint32_t max_iter{ 5000 }; @@ -67,6 +67,9 @@ struct ac_decomposition_params /*! \brief Perform decomposition if support reducing. */ bool support_reducing_only{ true }; + /*! \brief Commits the first feasible decomposition. */ + bool exit_of_feasible_decomposition{ true }; + /*! \brief If decomposition with delay profile fails, ignore it. */ bool try_no_late_arrival{ false }; }; @@ -98,7 +101,7 @@ private: }; private: - static constexpr uint32_t max_num_vars = 10; + static constexpr uint32_t max_num_vars = 8; using STT = kitty::static_truth_table; public: @@ -149,8 +152,8 @@ public: [this]( STT const& tt ) { return column_multiplicity<1u>( tt ); }, [this]( STT const& tt ) { return column_multiplicity<2u>( tt ); }, [this]( STT const& tt ) { return column_multiplicity<3u>( tt ); }, - [this]( STT const& tt ) { return column_multiplicity4<4u>( tt ); }, - [this]( STT const& tt ) { return column_multiplicity5<5u>( tt ); } + [this]( STT const& tt ) { return column_multiplicity5<4u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity5<5u>( tt ); } // slow, do not use }; for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i ) @@ -168,6 +171,11 @@ public: best_multiplicity = cost; best_cost = cost + additional_cost; free_set_size = i; + + if ( ps.exit_of_feasible_decomposition ) + { + break; + } } } @@ -198,6 +206,11 @@ public: best_multiplicity = cost; best_cost = cost + additional_cost; free_set_size = i; + + if ( ps.exit_of_feasible_decomposition ) + { + break; + } } } } @@ -339,9 +352,10 @@ private: uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF }; - std::unordered_set multiplicity_set; + static_assert( free_set_size == 5 || free_set_size == 4 ); - static_assert( free_set_size <= 5 ); + uint32_t size = 0; + std::array multiplicity_set; /* extract iset functions */ auto it = std::begin( tt ); @@ -349,13 +363,22 @@ private: { for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) { - multiplicity_set.insert( *it & masks[free_set_size] ); + multiplicity_set[size++] = static_cast( *it & masks[free_set_size] ); *it >>= ( 1u << free_set_size ); } ++it; } - return static_cast( multiplicity_set.size() ); + std::sort( multiplicity_set.begin(), multiplicity_set.begin() + size ); + + /* count unique */ + uint32_t multiplicity = 1; + for ( auto i = 1u; i < size; ++i ) + { + multiplicity += multiplicity_set[i] != multiplicity_set[i - 1] ? 1 : 0; + } + + return multiplicity; } inline bool combinations_offset_next( uint32_t k, uint32_t offset, uint32_t *pComb, uint32_t *pInvPerm, STT& tt ) From 43f4dccb4f1c6a8ad7d591d5bf511ab7e94bec92 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 23 Nov 2023 16:29:33 +0100 Subject: [PATCH 17/24] run time improvements in computing the column multiplicity --- src/acd/ac_decomposition.hpp | 41 ++++++++++-------------------------- 1 file changed, 11 insertions(+), 30 deletions(-) diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp index 3112e4c20..0b6101e53 100644 --- a/src/acd/ac_decomposition.hpp +++ b/src/acd/ac_decomposition.hpp @@ -101,7 +101,7 @@ private: }; private: - static constexpr uint32_t max_num_vars = 8; + static constexpr uint32_t max_num_vars = 10; using STT = kitty::static_truth_table; public: @@ -153,7 +153,7 @@ public: [this]( STT const& tt ) { return column_multiplicity<2u>( tt ); }, [this]( STT const& tt ) { return column_multiplicity<3u>( tt ); }, [this]( STT const& tt ) { return column_multiplicity5<4u>( tt ); }, - [this]( STT const& tt ) { return column_multiplicity5<5u>( tt ); } // slow, do not use + [this]( STT const& tt ) { return column_multiplicity5<5u>( tt ); } }; for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i ) @@ -290,7 +290,7 @@ private: { uint64_t multiplicity_set[4] = { 0u, 0u, 0u, 0u }; uint32_t multiplicity = 0; - uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; uint64_t constexpr masks_bits[] = { 0x0, 0x3, 0xF, 0x3F }; uint64_t constexpr masks_idx[] = { 0x0, 0x0, 0x0, 0x3 }; @@ -321,40 +321,16 @@ private: return multiplicity; } - template - uint32_t column_multiplicity4( STT tt ) - { - unsigned char multiplicity_set[1 << 16] = { 0 }; - uint32_t multiplicity = 0; - uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; - uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF }; - - static_assert( free_set_size <= 4 ); - - /* extract iset functions */ - auto it = std::begin( tt ); - for ( auto i = 0u; i < num_blocks; ++i ) - { - for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) - { - multiplicity += multiplicity_set[*it & masks[free_set_size]]++ == 0 ? 1 : 0; - *it >>= ( 1u << free_set_size ); - } - ++it; - } - - return multiplicity; - } - template uint32_t column_multiplicity5( STT tt ) { - uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF }; static_assert( free_set_size == 5 || free_set_size == 4 ); uint32_t size = 0; + uint64_t prev = -1; std::array multiplicity_set; /* extract iset functions */ @@ -363,7 +339,12 @@ private: { for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) { - multiplicity_set[size++] = static_cast( *it & masks[free_set_size] ); + uint32_t fs_fn = static_cast( *it & masks[free_set_size] ); + if ( fs_fn != prev ) + { + multiplicity_set[size++] = fs_fn; + prev = fs_fn; + } *it >>= ( 1u << free_set_size ); } ++it; From 23cfcc1e1f5e7d4b5624a4c29d41e7479cc54ecc Mon Sep 17 00:00:00 2001 From: aletempiac Date: Fri, 24 Nov 2023 12:18:49 +0100 Subject: [PATCH 18/24] Improving efficiency and removing useless code --- src/acd/ac_decomposition.hpp | 521 ++++++++++++----------------------- src/acd/ac_wrapper.cpp | 27 +- src/acd/ac_wrapper.h | 7 +- 3 files changed, 175 insertions(+), 380 deletions(-) diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp index 0b6101e53..d63d6685d 100644 --- a/src/acd/ac_decomposition.hpp +++ b/src/acd/ac_decomposition.hpp @@ -61,14 +61,11 @@ struct ac_decomposition_params /*! \brief Perform decomposition if support reducing. */ uint32_t max_free_set_vars{ 5 }; - /*! \brief Maximum number of iterations for covering. */ - uint32_t max_iter{ 5000 }; - /*! \brief Perform decomposition if support reducing. */ bool support_reducing_only{ true }; /*! \brief Commits the first feasible decomposition. */ - bool exit_of_feasible_decomposition{ true }; + bool exit_on_feasible_decomposition{ true }; /*! \brief If decomposition with delay profile fails, ignore it. */ bool try_no_late_arrival{ false }; @@ -88,16 +85,15 @@ struct ac_decomposition_result std::vector support; }; -template class ac_decomposition_impl { private: - struct encoding_matrix + struct encoding_column { uint64_t column[2]; - uint32_t cost{ 0 }; - uint32_t index{ 0 }; - float sort_cost{ 0 }; + uint32_t cost; + uint32_t index; + float sort_cost; }; private: @@ -105,15 +101,14 @@ private: using STT = kitty::static_truth_table; public: - explicit ac_decomposition_impl( TT const& tt, uint32_t num_vars, ac_decomposition_params const& ps, ac_decomposition_stats* pst = nullptr ) - : num_vars( num_vars ), ps( ps ), pst( pst ), permutations( num_vars ) + explicit ac_decomposition_impl( uint32_t num_vars, ac_decomposition_params const& ps, ac_decomposition_stats* pst = nullptr ) + : num_vars( num_vars ), ps( ps ), pst( pst ) { - tt_start = tt; std::iota( permutations.begin(), permutations.end(), 0 ); } /*! \brief Runs ACD using late arriving variables */ - int run( unsigned delay_profile ) + int run( word *tt, unsigned delay_profile ) { /* truth table is too large for the settings */ if ( num_vars > max_num_vars ) @@ -130,97 +125,17 @@ public: } /* convert to static TT */ - best_tt = kitty::extend_to( tt_start ); - best_multiplicity = UINT32_MAX; - uint32_t best_cost = UINT32_MAX; + init_truth_table( tt ); /* permute late arriving variables to be the least significant */ reposition_late_arriving_variables( delay_profile, late_arriving ); /* run ACD trying different bound sets and free sets */ - uint32_t free_set_size = late_arriving; - uint32_t offset = static_cast( late_arriving ); - uint32_t start = std::max( offset, 1u ); - - /* perform only support reducing decomposition */ - if ( ps.support_reducing_only ) + if ( !find_decomposition( delay_profile, late_arriving ) ) { - start = std::max( start, num_vars - ps.lut_size ); - } - - std::function column_multiplicity_fn[5] = { - [this]( STT const& tt ) { return column_multiplicity<1u>( tt ); }, - [this]( STT const& tt ) { return column_multiplicity<2u>( tt ); }, - [this]( STT const& tt ) { return column_multiplicity<3u>( tt ); }, - [this]( STT const& tt ) { return column_multiplicity5<4u>( tt ); }, - [this]( STT const& tt ) { return column_multiplicity5<5u>( tt ); } - }; - - for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i ) - { - /* TODO: add shared set */ - auto [tt_p, perm, cost] = enumerate_iset_combinations_offset( i, offset, column_multiplicity_fn[i - 1] ); - - /* additional cost if not support reducing */ - uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0; - /* check for feasible solution that improves the cost */ - if ( cost <= ( 1 << ( ps.lut_size - i ) ) && cost + additional_cost < best_cost && cost <= 16 ) - { - best_tt = tt_p; - permutations = perm; - best_multiplicity = cost; - best_cost = cost + additional_cost; - free_set_size = i; - - if ( ps.exit_of_feasible_decomposition ) - { - break; - } - } - } - - if ( best_multiplicity == UINT32_MAX && ( !ps.try_no_late_arrival || late_arriving == 0 ) ) return -1; - - /* try without the delay profile */ - if ( best_multiplicity == UINT32_MAX && ps.try_no_late_arrival ) - { - delay_profile = 0; - if ( ps.support_reducing_only ) - { - start = std::max( 1u, num_vars - ps.lut_size ); - } - - for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i ) - { - /* TODO: add shared set */ - auto [tt_p, perm, cost] = enumerate_iset_combinations_offset( i, 0, column_multiplicity_fn[i - 1] ); - - /* additional cost if not support reducing */ - uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0; - /* check for feasible solution that improves the cost */ - if ( cost <= ( 1 << ( ps.lut_size - i ) ) && cost + additional_cost < best_cost && cost <= 16 ) - { - best_tt = tt_p; - permutations = perm; - best_multiplicity = cost; - best_cost = cost + additional_cost; - free_set_size = i; - - if ( ps.exit_of_feasible_decomposition ) - { - break; - } - } - } } - if ( best_multiplicity == UINT32_MAX ) - return -1; - - pst->num_luts = best_multiplicity <= 2 ? 2 : best_multiplicity <= 4 ? 3 : best_multiplicity <= 8 ? 4 : 5; - best_free_set = free_set_size; - /* return number of levels */ return delay_profile == 0 ? 2 : 1; } @@ -231,26 +146,18 @@ public: return -1; /* compute isets */ - std::vector isets = compute_isets( best_free_set ); + std::vector isets = compute_isets(); generate_support_minimization_encodings(); - /* always solves exactly for power of 2 */ - if ( __builtin_popcount( best_multiplicity ) == 1 && best_multiplicity <= 8 ) - solve_min_support_exact( isets, best_free_set ); + /* solves exactly only for small multiplicities */ + if ( best_multiplicity <= 4u ) + solve_min_support_exact( isets ); else - solve_min_support_heuristic( isets, best_free_set ); + solve_min_support_heuristic( isets ); /* unfeasible decomposition */ - if ( best_bound_sets.empty() ) - { - solve_min_support_exact( isets, best_free_set ); - - if ( best_bound_sets.empty() ) - { - return -1; - } - } + assert( !best_bound_sets.empty() ); return 0; } @@ -285,6 +192,110 @@ public: } private: + bool find_decomposition( unsigned& delay_profile, uint32_t late_arriving ) + { + best_multiplicity = UINT32_MAX; + best_free_set = UINT32_MAX; + uint32_t best_cost = UINT32_MAX; + uint32_t offset = static_cast( late_arriving ); + uint32_t start = std::max( offset, 1u ); + + /* perform only support reducing decomposition */ + if ( ps.support_reducing_only ) + { + start = std::max( start, num_vars - ps.lut_size ); + } + + /* array of functions to compute the column multiplicity */ + std::function column_multiplicity_fn[5] = { + [this]( STT const& tt ) { return column_multiplicity<1u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity<2u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity<3u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity5<4u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity5<5u>( tt ); } + }; + + /* find a feasible AC decomposition */ + for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i ) + { + auto [tt_p, perm, multiplicity] = enumerate_iset_combinations_offset( i, offset, column_multiplicity_fn[i - 1] ); + + /* additional cost if not support reducing */ + uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0; + + /* check for feasible solution that improves the cost */ + if ( multiplicity <= ( 1 << ( ps.lut_size - i ) ) && multiplicity + additional_cost < best_cost && multiplicity <= 16 ) + { + best_tt = tt_p; + permutations = perm; + best_multiplicity = multiplicity; + best_cost = multiplicity + additional_cost; + best_free_set = i; + + if ( ps.exit_on_feasible_decomposition ) + { + break; + } + } + } + + if ( best_multiplicity == UINT32_MAX && ( !ps.try_no_late_arrival || late_arriving == 0 ) ) + return false; + + /* try without the delay profile */ + if ( best_multiplicity == UINT32_MAX && ps.try_no_late_arrival ) + { + delay_profile = 0; + if ( ps.support_reducing_only ) + { + start = std::max( 1u, num_vars - ps.lut_size ); + } + + for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i ) + { + auto [tt_p, perm, multiplicity] = enumerate_iset_combinations_offset( i, 0, column_multiplicity_fn[i - 1] ); + + /* additional cost if not support reducing */ + uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0; + + /* check for feasible solution that improves the cost */ + if ( multiplicity <= ( 1 << ( ps.lut_size - i ) ) && multiplicity + additional_cost < best_cost && multiplicity <= 16 ) + { + best_tt = tt_p; + permutations = perm; + best_multiplicity = multiplicity; + best_cost = multiplicity + additional_cost; + best_free_set = i; + + if ( ps.exit_on_feasible_decomposition ) + { + break; + } + } + } + } + + if ( best_multiplicity == UINT32_MAX ) + return false; + + /* estimation on number of LUTs */ + pst->num_luts = best_multiplicity <= 2 ? 2 : best_multiplicity <= 4 ? 3 : best_multiplicity <= 8 ? 4 : 5; + + return true; + } + + void init_truth_table( word *tt_start ) + { + uint32_t const num_blocks = ( num_vars <= 6 ) ? 1 : ( 1 << ( num_vars - 6 ) ); + + for ( uint32_t i = 0; i < num_blocks; ++i ) + { + best_tt._bits[i] = tt_start[i]; + } + + local_extend_to( best_tt, num_vars ); + } + template uint32_t column_multiplicity( STT tt ) { @@ -392,7 +403,7 @@ private: } template - std::tuple, uint32_t> enumerate_iset_combinations_offset( uint32_t free_set_size, uint32_t offset, Fn&& fn ) + std::tuple, uint32_t> enumerate_iset_combinations_offset( uint32_t free_set_size, uint32_t offset, Fn&& fn ) { STT tt = best_tt; @@ -434,7 +445,7 @@ private: } } while( combinations_offset_next( free_set_size, offset, pComb, pInvPerm, tt ) ); - std::vector res_perm( num_vars ); + std::array res_perm; for ( uint32_t i = 0; i < num_vars; ++i ) { res_perm[i] = permutations[bestPerm[i]]; @@ -443,10 +454,10 @@ private: return std::make_tuple( best_tt, res_perm, best_cost ); } - std::vector compute_isets( uint32_t free_set_size, bool verbose = false ) + std::vector compute_isets( bool verbose = false ) { /* construct isets involved in multiplicity */ - uint32_t isets_support = num_vars - free_set_size; + uint32_t isets_support = num_vars - best_free_set; std::vector isets( best_multiplicity ); /* construct isets */ @@ -459,24 +470,24 @@ private: auto it = std::begin( tt ); for ( auto i = 0u; i < num_blocks; ++i ) { - for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) { - uint64_t val = *it & masks[free_set_size]; + uint64_t val = *it & masks[best_free_set]; if ( auto el = column_to_iset.find( val ); el != column_to_iset.end() ) { - isets[el->second]._bits[i / ( 1u << free_set_size )] |= UINT64_C( 1 ) << ( j + offset ); + isets[el->second]._bits[i / ( 1u << best_free_set )] |= UINT64_C( 1 ) << ( j + offset ); } else { - isets[column_to_iset.size()]._bits[i / ( 1u << free_set_size )] |= UINT64_C( 1 ) << ( j + offset ); + isets[column_to_iset.size()]._bits[i / ( 1u << best_free_set )] |= UINT64_C( 1 ) << ( j + offset ); column_to_iset[val] = column_to_iset.size(); } - *it >>= ( 1u << free_set_size ); + *it >>= ( 1u << best_free_set ); } - offset = ( offset + ( 64 >> free_set_size ) ) % 64; + offset = ( offset + ( 64 >> best_free_set ) ) % 64; ++it; } @@ -489,11 +500,10 @@ private: /* save free_set functions */ std::vector free_set_tts( best_multiplicity ); - /* TODO: possible conflict */ for ( auto const& pair : column_to_iset ) { free_set_tts[pair.second]._bits[0] = pair.first; - local_extend_to( free_set_tts[pair.second], free_set_size ); + local_extend_to( free_set_tts[pair.second], best_free_set ); } /* print isets and free set*/ @@ -666,12 +676,12 @@ private: void generate_support_minimization_encodings() { uint32_t count = 0; - uint32_t num_combs_exact[4] = { 1, 3, 35, 6435 }; /* enable don't cares only if not a power of 2 */ uint32_t num_combs = 2; if ( __builtin_popcount( best_multiplicity ) == 1 ) { + uint32_t num_combs_exact[4] = { 1, 3, 35, 6435 }; for ( uint32_t i = 0; i < 4; ++i ) { if ( ( best_multiplicity >> i ) == 2u ) @@ -684,6 +694,7 @@ private: } else { + /* combinations are 2*3^(mu - 1) */ for ( uint32_t i = 1; i < best_multiplicity; ++i ) { num_combs = ( num_combs << 1 ) + num_combs; @@ -704,14 +715,14 @@ private: } template - void generate_support_minimization_encodings_rec( uint64_t onset, uint64_t offset, uint32_t var, uint32_t& count ) + void generate_support_minimization_encodings_rec( uint32_t onset, uint32_t offset, uint32_t var, uint32_t& count ) { if ( var == best_multiplicity ) { if constexpr ( !enable_dcset ) { /* sets must be equally populated */ - if ( __builtin_popcountl( onset ) != __builtin_popcountl( offset ) ) + if ( __builtin_popcount( onset ) != __builtin_popcount( offset ) ) { return; } @@ -723,7 +734,7 @@ private: return; } - /* move var in DCSET */ + /* var in DCSET */ if constexpr ( enable_dcset ) { generate_support_minimization_encodings_rec( onset, offset, var + 1, count ); @@ -746,20 +757,20 @@ private: offset &= ~( 1 << var ); } - void solve_min_support_exact( std::vector const& isets, uint32_t free_set_size ) + void solve_min_support_exact( std::vector const& isets ) { - std::vector matrix; + std::vector matrix; matrix.reserve( support_minimization_encodings.size() ); best_bound_sets.clear(); /* create covering matrix */ - if ( !create_covering_matrix( isets, matrix, free_set_size, best_multiplicity > 4 ) ) + if ( !create_covering_matrix( isets, matrix, false ) ) { return; } /* solve the covering problem */ - std::array solution = covering_solve_exact( matrix, 100, ps.max_iter ); + std::array solution = covering_solve_exact( matrix ); /* check for failed decomposition */ if ( solution[0] == UINT32_MAX ) @@ -770,8 +781,8 @@ private: /* compute best bound sets */ uint32_t num_luts = 1 + solution[4]; uint32_t num_levels = 2; - uint32_t num_edges = free_set_size + solution[4]; - uint32_t isets_support = num_vars - free_set_size; + uint32_t num_edges = best_free_set + solution[4]; + uint32_t isets_support = num_vars - best_free_set; best_care_sets.clear(); best_iset_onset.clear(); best_iset_offset.clear(); @@ -811,14 +822,14 @@ private: } } - void solve_min_support_heuristic( std::vector const& isets, uint32_t free_set_size ) + void solve_min_support_heuristic( std::vector const& isets ) { - std::vector matrix; + std::vector matrix; matrix.reserve( support_minimization_encodings.size() ); best_bound_sets.clear(); /* create covering matrix */ - if ( !create_covering_matrix( isets, matrix, free_set_size, true ) ) + if ( !create_covering_matrix( isets, matrix, true ) ) { return; } @@ -839,8 +850,8 @@ private: /* compute best bound sets */ uint32_t num_luts = 1 + solution[4]; uint32_t num_levels = 2; - uint32_t num_edges = free_set_size + solution[4]; - uint32_t isets_support = num_vars - free_set_size; + uint32_t num_edges = best_free_set + solution[4]; + uint32_t isets_support = num_vars - best_free_set; best_care_sets.clear(); best_iset_onset.clear(); best_iset_offset.clear(); @@ -880,12 +891,12 @@ private: } } - template - bool create_covering_matrix( std::vector const& isets, std::vector& matrix, uint32_t free_set_size, bool sort ) + template + bool create_covering_matrix( std::vector const& isets, std::vector& matrix, bool sort ) { assert( best_multiplicity <= 16 ); uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2; - uint32_t iset_support = num_vars - free_set_size; + uint32_t iset_support = num_vars - best_free_set; /* insert dichotomies */ for ( uint32_t i = 0; i < support_minimization_encodings.size(); ++i ) @@ -924,7 +935,7 @@ private: /* compute included seed dichotomies */ for ( uint32_t k = j + 1; k < best_multiplicity; ++k ) { - /* if is are in diffent sets */ + /* if are in diffent sets */ if ( ( ( ( onset_shift & ( offset >> k ) ) | ( ( onset >> k ) & offset_shift ) ) & 1 ) ) { column[pair_pointer >> 6u] |= UINT64_C( 1 ) << ( pair_pointer & 0x3F ); @@ -947,11 +958,6 @@ private: if ( cost > ps.lut_size ) continue; - if ( cost > 1 ) - { - cost |= 1 << iset_support; - } - float sort_cost = 0; if constexpr ( UseHeuristic ) { @@ -963,7 +969,7 @@ private: } /* insert */ - matrix.emplace_back( encoding_matrix{ { column[0], column[1] }, cost, i, sort_cost } ); + matrix.emplace_back( encoding_column{ { column[0], column[1] }, cost, i, sort_cost } ); } if ( !sort ) @@ -976,7 +982,6 @@ private: std::sort( matrix.begin(), matrix.end(), [&]( auto const& a, auto const& b ) { return a.cost < b.cost; } ); - return true; } else { @@ -988,16 +993,14 @@ private: return true; } - template - std::array covering_solve_exact( std::vector& matrix, uint32_t max_iter = 100, int32_t limit = 2000 ) + std::array covering_solve_exact( std::vector& matrix ) { /* last value of res contains the size of the bound set */ std::array res = { UINT32_MAX }; uint32_t best_cost = UINT32_MAX; uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2; - bool looping = true; - assert( best_multiplicity <= 16 ); + assert( best_multiplicity <= 4 ); /* determine the number of needed loops*/ if ( best_multiplicity <= 2 ) @@ -1026,207 +1029,11 @@ private: } } } - else if ( best_multiplicity <= 8 ) - { - res[4] = 3; - for ( uint32_t i = 0; i < matrix.size() - 2 && looping; ++i ) - { - /* limit */ - if constexpr ( limit_iter ) - { - if ( limit <= 0 ) - { - looping = false; - } - } - if constexpr ( limit_sol ) - { - if ( best_cost < UINT32_MAX && max_iter == 0 ) - { - looping = false; - } - } - - for ( uint32_t j = 1; j < matrix.size() - 1 && looping; ++j ) - { - uint64_t current_columns0 = matrix[i].column[0] | matrix[j].column[0]; - uint64_t current_columns1 = matrix[i].column[1] | matrix[j].column[1]; - uint32_t current_cost = matrix[i].cost + matrix[j].cost; - - /* limit */ - if constexpr ( limit_iter ) - { - if ( limit <= 0 ) - { - looping = false; - } - } - if constexpr ( limit_sol ) - { - if ( best_cost < UINT32_MAX && max_iter == 0 ) - { - looping = false; - } - } - - /* bound */ - if ( current_cost >= best_cost ) - { - continue; - } - - for ( uint32_t k = 2; k < matrix.size() && looping; ++k ) - { - /* limit */ - if constexpr ( limit_iter ) - { - if ( limit-- <= 0 ) - { - looping = false; - } - } - if constexpr ( limit_sol ) - { - if ( best_cost < UINT32_MAX && max_iter-- == 0 ) - { - looping = false; - } - } - - /* filter by cost */ - if ( current_cost + matrix[k].cost >= best_cost ) - continue; - - /* check validity */ - if ( __builtin_popcountl( current_columns0 | matrix[k].column[0] ) + __builtin_popcountl( current_columns1 | matrix[k].column[1] ) == combinations ) - { - res[0] = i; - res[1] = j; - res[2] = k; - best_cost = current_cost + matrix[k].cost; - } - } - } - } - } - else - { - res[4] = 4; - for ( uint32_t i = 0; i < matrix.size() - 3 && looping; ++i ) - { - /* limit */ - if constexpr ( limit_iter ) - { - if ( limit <= 0 ) - { - looping = false; - } - } - if constexpr ( limit_sol ) - { - if ( best_cost < UINT32_MAX && max_iter == 0 ) - { - looping = false; - } - } - - for ( uint32_t j = 1; j < matrix.size() - 2 && looping; ++j ) - { - uint64_t current_columns0 = matrix[i].column[0] | matrix[j].column[0]; - uint64_t current_columns1 = matrix[i].column[1] | matrix[j].column[1]; - uint32_t current_cost0 = matrix[i].cost + matrix[j].cost; - - /* limit */ - if constexpr ( limit_iter ) - { - if ( limit <= 0 ) - { - looping = false; - } - } - if constexpr ( limit_sol ) - { - if ( best_cost < UINT32_MAX && max_iter == 0 ) - { - looping = false; - } - } - - /* bound */ - if ( current_cost0 >= best_cost ) - { - continue; - } - - for ( uint32_t k = 2; k < matrix.size() - 1 && looping; ++k ) - { - uint64_t current_columns00 = current_columns0 | matrix[k].column[0]; - uint64_t current_columns11 = current_columns1 | matrix[k].column[1]; - uint32_t current_cost1 = current_cost0 + matrix[k].cost; - - /* limit */ - if constexpr ( limit_iter ) - { - if ( limit <= 0 ) - { - looping = false; - } - } - if constexpr ( limit_sol ) - { - if ( best_cost < UINT32_MAX && max_iter == 0 ) - { - looping = false; - } - } - - /* bound */ - if ( current_cost1 >= best_cost ) - { - continue; - } - - for ( uint32_t t = 3; t < matrix.size() && looping; ++t ) - { - /* limit */ - if constexpr ( limit_iter ) - { - if ( limit-- <= 0 ) - { - looping = false; - } - } - if constexpr ( limit_sol ) - { - if ( best_cost-- < UINT32_MAX && max_iter == 0 ) - { - looping = false; - } - } - - /* filter by cost */ - if ( current_cost1 + matrix[t].cost >= best_cost ) - continue; - - /* check validity */ - if ( __builtin_popcountl( current_columns00 | matrix[t].column[0] ) + __builtin_popcountl( current_columns11 | matrix[t].column[1] ) == combinations ) - { - res[0] = i; - res[1] = j; - res[2] = k; - res[3] = t; - best_cost = current_cost1 + matrix[t].cost; - } - } - } - } - } - } return res; } - std::array covering_solve_heuristic( std::vector& matrix ) + std::array covering_solve_heuristic( std::vector& matrix ) { /* last value of res contains the size of the bound set */ std::array res = { UINT32_MAX }; @@ -1285,7 +1092,7 @@ private: return res; } - bool covering_improve( std::vector& matrix, std::array& solution ) + bool covering_improve( std::vector& matrix, std::array& solution ) { /* performs one iteration of local search */ uint32_t best_cost = 0, local_cost = 0; @@ -1431,6 +1238,19 @@ private: return false; } + /* Decomposition format for ABC + * + * The record is an array of unsigned chars where: + * - the first unsigned char entry stores the number of unsigned chars in the record + * - the second entry stores the number of LUTs + * After this, several sub-records follow, each representing one LUT as follows: + * - an unsigned char entry listing the number of fanins + * - a list of fanins, from the LSB to the MSB of the truth table. The N inputs of the original function + * have indexes from 0 to N-1, followed by the internal signals in a topological order + * - the LUT truth table occupying 2^(M-3) bytes, where M is the fanin count of the LUT, from the LSB to the MSB. + * A 2-input LUT, which takes 4 bits, should be stretched to occupy 8 bits (one unsigned char) + * A 0- or 1-input LUT can be represented similarly but it is not expected that such LUTs will be represented + */ void get_decomposition_abc( unsigned char *decompArray ) { unsigned char *pArray = decompArray; @@ -1485,11 +1305,10 @@ private: std::vector> support_minimization_encodings; - TT tt_start; uint32_t num_vars; ac_decomposition_params const& ps; ac_decomposition_stats* pst; - std::vector permutations; + std::array permutations; }; } // namespace mockturtle diff --git a/src/acd/ac_wrapper.cpp b/src/acd/ac_wrapper.cpp index 6bb41ca37..99e5747f2 100644 --- a/src/acd/ac_wrapper.cpp +++ b/src/acd/ac_wrapper.cpp @@ -1,27 +1,17 @@ -// #include "base/main/main.h" #include "ac_wrapper.h" #include "ac_decomposition.hpp" -// ABC_NAMESPACE_IMPL_START - int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ) { using namespace mockturtle; - int num_blocks = ( nVars <= 6 ) ? 1 : ( 1 << ( nVars - 6 ) ); - - /* translate truth table into static table */ - kitty::dynamic_truth_table tt( nVars ); - for ( int i = 0; i < num_blocks; ++i ) - tt._bits[i] = pTruth[i]; - ac_decomposition_params ps; ps.lut_size = lutSize; ps.try_no_late_arrival = static_cast( try_no_late_arrival ); ac_decomposition_stats st; - ac_decomposition_impl acd( tt, nVars, ps, &st ); - int val = acd.run( *pdelay ); + ac_decomposition_impl acd( nVars, ps, &st ); + int val = acd.run( pTruth, *pdelay ); if ( val < 0 ) { @@ -39,19 +29,12 @@ int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, { using namespace mockturtle; - int num_blocks = ( nVars <= 6 ) ? 1 : ( 1 << ( nVars - 6 ) ); - - /* translate truth table into static table */ - kitty::dynamic_truth_table tt( nVars ); - for ( int i = 0; i < num_blocks; ++i ) - tt._bits[i] = pTruth[i]; - ac_decomposition_params ps; ps.lut_size = lutSize; ac_decomposition_stats st; - ac_decomposition_impl acd( tt, nVars, ps, &st ); - acd.run( *pdelay ); + ac_decomposition_impl acd( nVars, ps, &st ); + acd.run( pTruth, *pdelay ); int val = acd.compute_decomposition(); if ( val < 0 ) @@ -65,5 +48,3 @@ int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, acd.get_decomposition( decomposition ); return 0; } - -// ABC_NAMESPACE_IMPL_END \ No newline at end of file diff --git a/src/acd/ac_wrapper.h b/src/acd/ac_wrapper.h index 5e0af3787..c41b4aec7 100644 --- a/src/acd/ac_wrapper.h +++ b/src/acd/ac_wrapper.h @@ -1,13 +1,10 @@ -// #pragma once +#pragma once #ifndef __ACD_WRAPPER_H_ #define __ACD_WRAPPER_H_ -// #include "base/main/main.h" #include "misc/util/abc_global.h" #include "map/if/if.h" -// ABC_NAMESPACE_HEADER_START - #ifdef __cplusplus extern "C" { #endif @@ -19,6 +16,4 @@ int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, } #endif -// ABC_NAMESPACE_HEADER_END - #endif \ No newline at end of file From 6097fd43495b62bec793fa8ca4fbc429bc9130ae Mon Sep 17 00:00:00 2001 From: aletempiac Date: Fri, 24 Nov 2023 14:24:20 +0100 Subject: [PATCH 19/24] Code formatting --- src/acd/ac_decomposition.hpp | 170 +++++++++++++++++------------------ src/acd/ac_wrapper.cpp | 2 +- 2 files changed, 82 insertions(+), 90 deletions(-) diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp index d63d6685d..829ea5855 100644 --- a/src/acd/ac_decomposition.hpp +++ b/src/acd/ac_decomposition.hpp @@ -44,10 +44,10 @@ #include "kitty_constants.hpp" #include "kitty_constructors.hpp" -#include "kitty_static_tt.hpp" #include "kitty_dynamic_tt.hpp" #include "kitty_operations.hpp" #include "kitty_operators.hpp" +#include "kitty_static_tt.hpp" namespace mockturtle { @@ -55,19 +55,19 @@ namespace mockturtle /*! \brief Parameters for ac_decomposition */ struct ac_decomposition_params { - /*! \brief LUT size for decomposition. */ + /*! \brief LUT size for decomposition (3 < num < 7). */ uint32_t lut_size{ 6 }; - /*! \brief Perform decomposition if support reducing. */ - uint32_t max_free_set_vars{ 5 }; + /*! \brief Maximum size of the free set (1 < num < 6). */ + uint32_t max_free_set_vars{ 4 }; - /*! \brief Perform decomposition if support reducing. */ + /*! \brief Perform only support reducing (2-level) decompositions. */ bool support_reducing_only{ true }; - /*! \brief Commits the first feasible decomposition. */ - bool exit_on_feasible_decomposition{ true }; + /*! \brief Use the first feasible decomposition found. */ + bool use_first{ true }; - /*! \brief If decomposition with delay profile fails, ignore it. */ + /*! \brief If decomposition with delay profile fails, try without. */ bool try_no_late_arrival{ false }; }; @@ -108,7 +108,7 @@ public: } /*! \brief Runs ACD using late arriving variables */ - int run( word *tt, unsigned delay_profile ) + int run( word* ptt, unsigned delay_profile ) { /* truth table is too large for the settings */ if ( num_vars > max_num_vars ) @@ -125,7 +125,7 @@ public: } /* convert to static TT */ - init_truth_table( tt ); + init_truth_table( ptt ); /* permute late arriving variables to be the least significant */ reposition_late_arriving_variables( delay_profile, late_arriving ); @@ -142,7 +142,7 @@ public: int compute_decomposition() { - if ( best_multiplicity == UINT32_MAX ) + if ( best_multiplicity == UINT32_MAX ) return -1; /* compute isets */ @@ -168,7 +168,7 @@ public: if ( best_free_set > num_vars ) return -1; - + for ( uint32_t i = 0; i < best_free_set; ++i ) { profile |= 1 << permutations[i]; @@ -177,17 +177,12 @@ public: return profile; } - std::vector get_result() - { - return dec_result; - } - - void get_decomposition( unsigned char *decompArray ) + void get_decomposition( unsigned char* decompArray ) { if ( best_free_set > num_vars ) return; - dec_result = generate_decomposition( best_free_set ); + generate_decomposition(); return get_decomposition_abc( decompArray ); } @@ -208,12 +203,11 @@ private: /* array of functions to compute the column multiplicity */ std::function column_multiplicity_fn[5] = { - [this]( STT const& tt ) { return column_multiplicity<1u>( tt ); }, - [this]( STT const& tt ) { return column_multiplicity<2u>( tt ); }, - [this]( STT const& tt ) { return column_multiplicity<3u>( tt ); }, - [this]( STT const& tt ) { return column_multiplicity5<4u>( tt ); }, - [this]( STT const& tt ) { return column_multiplicity5<5u>( tt ); } - }; + [this]( STT const& tt ) { return column_multiplicity<1u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity<2u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity<3u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity5<4u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity5<5u>( tt ); } }; /* find a feasible AC decomposition */ for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i ) @@ -232,7 +226,7 @@ private: best_cost = multiplicity + additional_cost; best_free_set = i; - if ( ps.exit_on_feasible_decomposition ) + if ( ps.use_first ) { break; } @@ -267,7 +261,7 @@ private: best_cost = multiplicity + additional_cost; best_free_set = i; - if ( ps.exit_on_feasible_decomposition ) + if ( ps.use_first ) { break; } @@ -279,18 +273,20 @@ private: return false; /* estimation on number of LUTs */ - pst->num_luts = best_multiplicity <= 2 ? 2 : best_multiplicity <= 4 ? 3 : best_multiplicity <= 8 ? 4 : 5; + pst->num_luts = best_multiplicity <= 2 ? 2 : best_multiplicity <= 4 ? 3 + : best_multiplicity <= 8 ? 4 + : 5; return true; } - void init_truth_table( word *tt_start ) + void init_truth_table( word* ptt ) { uint32_t const num_blocks = ( num_vars <= 6 ) ? 1 : ( 1 << ( num_vars - 6 ) ); for ( uint32_t i = 0; i < num_blocks; ++i ) { - best_tt._bits[i] = tt_start[i]; + best_tt._bits[i] = ptt[i]; } local_extend_to( best_tt, num_vars ); @@ -362,7 +358,7 @@ private: } std::sort( multiplicity_set.begin(), multiplicity_set.begin() + size ); - + /* count unique */ uint32_t multiplicity = 1; for ( auto i = 1u; i < size; ++i ) @@ -373,7 +369,7 @@ private: return multiplicity; } - inline bool combinations_offset_next( uint32_t k, uint32_t offset, uint32_t *pComb, uint32_t *pInvPerm, STT& tt ) + inline bool combinations_offset_next( uint32_t k, uint32_t offset, uint32_t* pComb, uint32_t* pInvPerm, STT& tt ) { uint32_t i; @@ -443,7 +439,7 @@ private: bestPerm[i] = pComb[i]; } } - } while( combinations_offset_next( free_set_size, offset, pComb, pInvPerm, tt ) ); + } while ( combinations_offset_next( free_set_size, offset, pComb, pInvPerm, tt ) ); std::array res_perm; for ( uint32_t i = 0; i < num_vars; ++i ) @@ -525,10 +521,11 @@ private: return isets; } - std::vector generate_decomposition( uint32_t free_set_size ) + void generate_decomposition() { - std::vector res; + dec_result.clear(); + uint32_t num_edges = 0; for ( uint32_t i = 0; i < best_bound_sets.size(); ++i ) { ac_decomposition_result dec; @@ -537,7 +534,7 @@ private: /* compute and minimize support for bound set variables */ uint32_t k = 0; - for ( uint32_t j = 0; j < num_vars - free_set_size; ++j ) + for ( uint32_t j = 0; j < num_vars - best_free_set; ++j ) { if ( !kitty::has_var( tt, j ) ) continue; @@ -554,59 +551,64 @@ private: kitty::swap_inplace( tt, k, j ); kitty::swap_inplace( care, k, j ); } - dec.support.push_back( permutations[free_set_size + j] ); + dec.support.push_back( permutations[best_free_set + j] ); ++k; } dec.tt = kitty::shrink_to( tt, dec.support.size() ); - res.push_back( dec ); + dec_result.push_back( dec ); + num_edges += dec.support.size() > 1 ? dec.support.size() : 0; } /* compute the decomposition for the top-level LUT */ - compute_top_lut_decomposition( res, free_set_size ); + compute_top_lut_decomposition(); - return res; + if ( pst ) + { + pst->num_luts = dec_result.size(); + pst->num_edges = num_edges + dec_result.back().support.size(); + } } - void compute_top_lut_decomposition( std::vector& res, uint32_t free_set_size ) + void compute_top_lut_decomposition() { - uint32_t top_vars = best_bound_sets.size() + free_set_size; + uint32_t top_vars = best_bound_sets.size() + best_free_set; assert( top_vars <= ps.lut_size ); /* extend bound set functions with free_set_size LSB vars */ kitty::dynamic_truth_table tt( top_vars ); /* compute support */ - res.emplace_back(); - for ( uint32_t i = 0; i < free_set_size; ++i ) + dec_result.emplace_back(); + for ( uint32_t i = 0; i < best_free_set; ++i ) { - res.back().support.push_back( permutations[i] ); + dec_result.back().support.push_back( permutations[i] ); } /* create functions for bound set */ std::vector bound_set_vars; - auto res_it = res.begin(); + auto res_it = dec_result.begin(); uint32_t offset = 0; for ( uint32_t i = 0; i < best_bound_sets.size(); ++i ) { bound_set_vars.emplace_back( top_vars ); - kitty::create_nth_var( bound_set_vars[i], free_set_size + i ); + kitty::create_nth_var( bound_set_vars[i], best_free_set + i ); - /* add bound-set variables to the support, remove buffers */ + /* add bound-set variables to the support, remove buffers (shared set) */ if ( res_it->support.size() == 1 ) { - res.back().support.push_back( res_it->support.front() ); + dec_result.back().support.push_back( res_it->support.front() ); /* it is a NOT */ if ( ( res_it->tt._bits[0] & 1 ) == 1 ) { bound_set_vars[i] = ~bound_set_vars[i]; } - res.erase( res_it ); + dec_result.erase( res_it ); ++offset; } else { - res.back().support.push_back( num_vars + i - offset ); + dec_result.back().support.push_back( num_vars + i - offset ); ++res_it; } } @@ -634,7 +636,7 @@ private: } /* add top-level LUT to result */ - res.back().tt = tt; + dec_result.back().tt = tt; } inline void reposition_late_arriving_variables( unsigned delay_profile, uint32_t late_arriving ) @@ -704,14 +706,6 @@ private: } assert( count == num_combs ); - - /* print combinations */ - // std::cout << "{ "; - // for ( auto const& entry : support_minimization_encodings ) - // { - // std::cout << "{ " << entry[0] << ", " << entry[1] << " }, "; - // } - // std::cout << "}\n"; } template @@ -770,7 +764,7 @@ private: } /* solve the covering problem */ - std::array solution = covering_solve_exact( matrix ); + std::array solution = covering_solve_exact( matrix ); /* check for failed decomposition */ if ( solution[0] == UINT32_MAX ) @@ -779,14 +773,14 @@ private: } /* compute best bound sets */ - uint32_t num_luts = 1 + solution[4]; + uint32_t num_luts = 1 + solution[5]; uint32_t num_levels = 2; - uint32_t num_edges = best_free_set + solution[4]; + uint32_t num_edges = best_free_set + solution[5]; uint32_t isets_support = num_vars - best_free_set; best_care_sets.clear(); best_iset_onset.clear(); best_iset_offset.clear(); - for ( uint32_t i = 0; i < solution[4]; ++i ) + for ( uint32_t i = 0; i < solution[5]; ++i ) { STT tt; STT care; @@ -835,7 +829,7 @@ private: } /* solve the covering problem: heuristic pass + local search */ - std::array solution = covering_solve_heuristic( matrix ); + std::array solution = covering_solve_heuristic( matrix ); /* check for failed decomposition */ if ( solution[0] == UINT32_MAX ) @@ -848,14 +842,14 @@ private: ; /* compute best bound sets */ - uint32_t num_luts = 1 + solution[4]; + uint32_t num_luts = 1 + solution[5]; uint32_t num_levels = 2; - uint32_t num_edges = best_free_set + solution[4]; + uint32_t num_edges = best_free_set + solution[5]; uint32_t isets_support = num_vars - best_free_set; best_care_sets.clear(); best_iset_onset.clear(); best_iset_offset.clear(); - for ( uint32_t i = 0; i < solution[4]; ++i ) + for ( uint32_t i = 0; i < solution[5]; ++i ) { STT tt; STT care; @@ -993,10 +987,10 @@ private: return true; } - std::array covering_solve_exact( std::vector& matrix ) + std::array covering_solve_exact( std::vector& matrix ) { /* last value of res contains the size of the bound set */ - std::array res = { UINT32_MAX }; + std::array res = { UINT32_MAX }; uint32_t best_cost = UINT32_MAX; uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2; @@ -1005,12 +999,12 @@ private: /* determine the number of needed loops*/ if ( best_multiplicity <= 2 ) { - res[4] = 1; + res[5] = 1; res[0] = 0; } else if ( best_multiplicity <= 4 ) { - res[4] = 2; + res[5] = 2; for ( uint32_t i = 0; i < matrix.size() - 1; ++i ) { for ( uint32_t j = 1; j < matrix.size(); ++j ) @@ -1033,10 +1027,10 @@ private: return res; } - std::array covering_solve_heuristic( std::vector& matrix ) + std::array covering_solve_heuristic( std::vector& matrix ) { /* last value of res contains the size of the bound set */ - std::array res = { UINT32_MAX }; + std::array res = { UINT32_MAX }; uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2; uint64_t column0 = 0, column1 = 0; @@ -1086,17 +1080,17 @@ private: { res[i] = i; } - res[4] = iter; + res[5] = iter; } return res; } - bool covering_improve( std::vector& matrix, std::array& solution ) + bool covering_improve( std::vector& matrix, std::array& solution ) { /* performs one iteration of local search */ uint32_t best_cost = 0, local_cost = 0; - uint32_t num_elements = solution[4]; + uint32_t num_elements = solution[5]; uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2; bool improved = false; @@ -1211,8 +1205,7 @@ private: auto it_care = std::begin( care._bits ); while ( it_tt != std::begin( tt._bits ) + num_blocks ) { - if ( ( ( ( *it_tt >> ( uint64_t( 1 ) << var_index ) ) ^ *it_tt ) & kitty::detail::projections_neg[var_index] - & ( *it_care >> ( uint64_t( 1 ) << var_index ) ) & *it_care ) != 0 ) + if ( ( ( ( *it_tt >> ( uint64_t( 1 ) << var_index ) ) ^ *it_tt ) & kitty::detail::projections_neg[var_index] & ( *it_care >> ( uint64_t( 1 ) << var_index ) ) & *it_care ) != 0 ) { return true; } @@ -1251,28 +1244,27 @@ private: * A 2-input LUT, which takes 4 bits, should be stretched to occupy 8 bits (one unsigned char) * A 0- or 1-input LUT can be represented similarly but it is not expected that such LUTs will be represented */ - void get_decomposition_abc( unsigned char *decompArray ) + void get_decomposition_abc( unsigned char* decompArray ) { - unsigned char *pArray = decompArray; + unsigned char* pArray = decompArray; unsigned char bytes = 2; /* write number of LUTs */ pArray++; - *pArray = dec_result.size(); - pArray++; + *pArray++ = dec_result.size(); /* write LUTs */ for ( ac_decomposition_result const& lut : dec_result ) { /* write fanin size*/ - *pArray = lut.support.size(); - pArray++; ++bytes; + *pArray++ = lut.support.size(); + ++bytes; /* write support */ for ( uint32_t i : lut.support ) { - *pArray = (unsigned char) i; - pArray++; ++bytes; + *pArray++ = (unsigned char)i; + ++bytes; } /* write truth table */ @@ -1282,8 +1274,8 @@ private: { for ( uint32_t j = 0; j < tt_num_bytes; ++j ) { - *pArray = (unsigned char) ( ( lut.tt._bits[i] >> ( 8 * j ) ) & 0xFF ); - pArray++; ++bytes; + *pArray++ = (unsigned char)( ( lut.tt._bits[i] >> ( 8 * j ) ) & 0xFF ); + ++bytes; } } } diff --git a/src/acd/ac_wrapper.cpp b/src/acd/ac_wrapper.cpp index 99e5747f2..27259b67a 100644 --- a/src/acd/ac_wrapper.cpp +++ b/src/acd/ac_wrapper.cpp @@ -7,7 +7,7 @@ int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, ac_decomposition_params ps; ps.lut_size = lutSize; - ps.try_no_late_arrival = static_cast( try_no_late_arrival ); + ps.try_no_late_arrival = static_cast( try_no_late_arrival ); /* TODO: additional tests */ ac_decomposition_stats st; ac_decomposition_impl acd( nVars, ps, &st ); From b3d2419d9a0175bfbfb7478a7131bd706a16126a Mon Sep 17 00:00:00 2001 From: aletempiac Date: Mon, 27 Nov 2023 13:38:36 +0100 Subject: [PATCH 20/24] Formatting, renaming, and cleaning code --- src/acd/ac_decomposition.hpp | 49 +++++++++++++++--------------------- src/acd/ac_wrapper.cpp | 22 ++++++++++++++-- src/acd/ac_wrapper.h | 18 +++++++++++++ src/base/abci/abc.c | 45 +++++++++++++++++++++++++++------ src/base/abci/abcIf.c | 16 ++++++------ src/map/if/if.h | 12 ++++----- src/map/if/ifCore.c | 28 +-------------------- src/map/if/ifCut.c | 2 +- src/map/if/ifDelay.c | 20 +++++++-------- src/map/if/ifMap.c | 14 +++++------ src/map/if/ifTime.c | 4 +-- 11 files changed, 131 insertions(+), 99 deletions(-) diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp index 829ea5855..2b3161476 100644 --- a/src/acd/ac_decomposition.hpp +++ b/src/acd/ac_decomposition.hpp @@ -1,28 +1,20 @@ -/* mockturtle: C++ logic network library - * Copyright (C) 2018-2023 EPFL - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ +/**C++File************************************************************** + FileName [ac_decomposition.hpp] + + SystemName [ABC: Logic synthesis and verification system.] + + PackageName [Ashenhurst-Curtis decomposition.] + + Synopsis [Interface with the FPGA mapping package.] + + Author [Alessandro Tempia Calvino] + + Affiliation [EPFL] + + Date [Ver. 1.0. Started - November 20, 2023.] + +***********************************************************************/ /*! \file ac_decomposition.hpp \brief Ashenhurst-Curtis decomposition @@ -39,7 +31,6 @@ #include #include #include -#include #include #include "kitty_constants.hpp" @@ -49,7 +40,7 @@ #include "kitty_operators.hpp" #include "kitty_static_tt.hpp" -namespace mockturtle +namespace acd { /*! \brief Parameters for ac_decomposition */ @@ -346,10 +337,10 @@ private: { for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) { - uint32_t fs_fn = static_cast( *it & masks[free_set_size] ); + uint64_t fs_fn = *it & masks[free_set_size]; if ( fs_fn != prev ) { - multiplicity_set[size++] = fs_fn; + multiplicity_set[size++] = static_cast( fs_fn ); prev = fs_fn; } *it >>= ( 1u << free_set_size ); @@ -1303,6 +1294,6 @@ private: std::array permutations; }; -} // namespace mockturtle +} // namespace acd #endif // _ACD_H_ \ No newline at end of file diff --git a/src/acd/ac_wrapper.cpp b/src/acd/ac_wrapper.cpp index 27259b67a..baeee2fd6 100644 --- a/src/acd/ac_wrapper.cpp +++ b/src/acd/ac_wrapper.cpp @@ -1,9 +1,27 @@ +/**C++File************************************************************** + + FileName [ac_wrapper.cpp] + + SystemName [ABC: Logic synthesis and verification system.] + + PackageName [Ashenhurst-Curtis decomposition.] + + Synopsis [Interface with the FPGA mapping package.] + + Author [Alessandro Tempia Calvino] + + Affiliation [EPFL] + + Date [Ver. 1.0. Started - November 20, 2023.] + +***********************************************************************/ + #include "ac_wrapper.h" #include "ac_decomposition.hpp" int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ) { - using namespace mockturtle; + using namespace acd; ac_decomposition_params ps; ps.lut_size = lutSize; @@ -27,7 +45,7 @@ int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ) { - using namespace mockturtle; + using namespace acd; ac_decomposition_params ps; ps.lut_size = lutSize; diff --git a/src/acd/ac_wrapper.h b/src/acd/ac_wrapper.h index c41b4aec7..ce39949fb 100644 --- a/src/acd/ac_wrapper.h +++ b/src/acd/ac_wrapper.h @@ -1,3 +1,21 @@ +/**C++File************************************************************** + + FileName [ac_wrapper.h] + + SystemName [ABC: Logic synthesis and verification system.] + + PackageName [Ashenhurst-Curtis decomposition.] + + Synopsis [Interface with the FPGA mapping package.] + + Author [Alessandro Tempia Calvino] + + Affiliation [EPFL] + + Date [Ver. 1.0. Started - November 20, 2023.] + +***********************************************************************/ + #pragma once #ifndef __ACD_WRAPPER_H_ #define __ACD_WRAPPER_H_ diff --git a/src/base/abci/abc.c b/src/base/abci/abc.c index 33b85e0bf..89785887d 100644 --- a/src/base/abci/abc.c +++ b/src/base/abci/abc.c @@ -19447,7 +19447,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) If_ManSetDefaultPars( pPars ); pPars->pLutLib = (If_LibLut_t *)Abc_FrameReadLibLut(); Extra_UtilGetoptReset(); - while ( ( c = Extra_UtilGetopt( argc, argv, "KCFAGRNTXYDEWSqaflepmrsdbgxyuojiktnczvh" ) ) != EOF ) + while ( ( c = Extra_UtilGetopt( argc, argv, "KCFAGRNTXYZDEWSqaflepmrsdbgxyzuojiktncvh" ) ) != EOF ) { switch ( c ) { @@ -19563,6 +19563,17 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) if ( pPars->nAndDelay < 0 ) goto usage; break; + case 'Z': + if ( globalUtilOptind >= argc ) + { + Abc_Print( -1, "Command line switch \"-Z\" should be followed by a positive integer 3, 4, 5, or 6.\n" ); + goto usage; + } + pPars->nLutDecSize = atoi(argv[globalUtilOptind]); + globalUtilOptind++; + if ( pPars->nLutDecSize < 3 || pPars->nLutDecSize > 6 ) + goto usage; + break; case 'D': if ( globalUtilOptind >= argc ) { @@ -19652,6 +19663,9 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) case 'y': pPars->fUserRecLib ^= 1; break; + case 'z': + pPars->fUserLutDec ^= 1; + break; case 'u': pPars->fUserSesLib ^= 1; break; @@ -19676,9 +19690,6 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) case 'c': pPars->fUseTtPerm ^= 1; break; - case 'z': - pPars->fAcd ^= 1; - break; case 'v': pPars->fVerbose ^= 1; break; @@ -19794,6 +19805,25 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) pPars->fCutMin = 1; } + if ( pPars->fUserLutDec ) + { + if ( pPars->nLutDecSize == 0 ) + { + Abc_Print( -1, "LUT decomposition size (%d) must be set.\n", pPars->nLutDecSize ); + return 1; + } + if ( pPars->nLutDecSize >= pPars->nLutSize ) + { + Abc_Print( -1, "LUT size (%d) must be greater than the LUT decomposition size (%d).\n", pPars->nLutSize, pPars->nLutDecSize ); + return 1; + } + if ( pPars->nLutSize < 4 || pPars->nLutSize > 10 ) + { + Abc_Print( -1, "This feature only works for [4;10]-LUTs.\n" ); + return 1; + } + } + // enable truth table computation if cut minimization is selected if ( pPars->fCutMin ) { @@ -19810,7 +19840,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) pPars->pLutLib = NULL; } // modify for delay optimization - if ( pPars->fDelayOpt || pPars->fDsdBalance || pPars->fDelayOptLut || pPars->fAcd ) + if ( pPars->fDelayOpt || pPars->fDsdBalance || pPars->fDelayOptLut || pPars->fUserLutDec ) { pPars->fTruth = 1; pPars->fCutMin = 1; @@ -19956,7 +19986,7 @@ usage: sprintf(LutSize, "library" ); else sprintf(LutSize, "%d", pPars->nLutSize ); - Abc_Print( -2, "usage: if [-KCFAGRNTXY num] [-DEW float] [-S str] [-qarlepmsdbgxyuojiktncvh]\n" ); + Abc_Print( -2, "usage: if [-KCFAGRNTXYZ num] [-DEW float] [-S str] [-qarlepmsdbgxyzuojiktncvh]\n" ); Abc_Print( -2, "\t performs FPGA technology mapping of the network\n" ); Abc_Print( -2, "\t-K num : the number of LUT inputs (2 < num < %d) [default = %s]\n", IF_MAX_LUTSIZE+1, LutSize ); Abc_Print( -2, "\t-C num : the max number of priority cuts (0 < num < 2^12) [default = %d]\n", pPars->nCutsMax ); @@ -19968,6 +19998,7 @@ usage: Abc_Print( -2, "\t-T num : the type of LUT structures [default = any]\n" ); Abc_Print( -2, "\t-X num : delay of AND-gate in LUT library units [default = %d]\n", pPars->nAndDelay ); Abc_Print( -2, "\t-Y num : area of AND-gate in LUT library units [default = %d]\n", pPars->nAndArea ); + Abc_Print( -2, "\t-Z num : the number of LUT inputs for LUT decomposition [default = %d]\n", pPars->nLutDecSize ); Abc_Print( -2, "\t-D float : sets the delay constraint for the mapping [default = %s]\n", Buffer ); Abc_Print( -2, "\t-E float : sets epsilon used for tie-breaking [default = %f]\n", pPars->Epsilon ); Abc_Print( -2, "\t-W float : sets wire delay between adjects LUTs [default = %f]\n", pPars->WireDelay ); @@ -19985,6 +20016,7 @@ usage: Abc_Print( -2, "\t-g : toggles delay optimization by SOP balancing [default = %s]\n", pPars->fDelayOpt? "yes": "no" ); Abc_Print( -2, "\t-x : toggles delay optimization by DSD balancing [default = %s]\n", pPars->fDsdBalance? "yes": "no" ); Abc_Print( -2, "\t-y : toggles delay optimization with recorded library [default = %s]\n", pPars->fUserRecLib? "yes": "no" ); + Abc_Print( -2, "\t-z : toggles delay optimization with LUT decomposition [default = %s]\n", pPars->fUserLutDec? "yes": "no" ); Abc_Print( -2, "\t-u : toggles delay optimization with SAT-based library [default = %s]\n", pPars->fUserSesLib? "yes": "no" ); Abc_Print( -2, "\t-o : toggles using buffers to decouple combinational outputs [default = %s]\n", pPars->fUseBuffs? "yes": "no" ); Abc_Print( -2, "\t-j : toggles enabling additional check [default = %s]\n", pPars->fEnableCheck07? "yes": "no" ); @@ -19993,7 +20025,6 @@ usage: Abc_Print( -2, "\t-t : toggles optimizing average rather than maximum level [default = %s]\n", pPars->fDoAverage? "yes": "no" ); Abc_Print( -2, "\t-n : toggles computing DSDs of the cut functions [default = %s]\n", pPars->fUseDsd? "yes": "no" ); Abc_Print( -2, "\t-c : toggles computing truth tables in a new way [default = %s]\n", pPars->fUseTtPerm? "yes": "no" ); - Abc_Print( -2, "\t-z : toggles using ACD decomposition [default = %s]\n", pPars->fAcd? "yes": "no" ); Abc_Print( -2, "\t-v : toggles verbose output [default = %s]\n", pPars->fVerbose? "yes": "no" ); Abc_Print( -2, "\t-h : prints the command usage\n"); return 1; diff --git a/src/base/abci/abcIf.c b/src/base/abci/abcIf.c index b7d796273..357d7d83f 100644 --- a/src/base/abci/abcIf.c +++ b/src/base/abci/abcIf.c @@ -116,7 +116,7 @@ Abc_Ntk_t * Abc_NtkIf( Abc_Ntk_t * pNtk, If_Par_t * pPars ) pPars->pTimesReq = Abc_NtkGetCoRequiredFloats(pNtk); // update timing info to reflect logic level - if ( (pPars->fDelayOpt || pPars->fDsdBalance || pPars->fUserRecLib || pPars->fUserSesLib || pPars->fAcd) && pNtk->pManTime ) + if ( (pPars->fDelayOpt || pPars->fDsdBalance || pPars->fUserRecLib || pPars->fUserSesLib || pPars->fUserLutDec) && pNtk->pManTime ) { int c; if ( pNtk->AndGateDelay == 0.0 ) @@ -443,7 +443,7 @@ Hop_Obj_t * Abc_NodeBuildFromMini( Hop_Man_t * pMan, If_Man_t * p, If_Cut_t * pC int v; If_Obj_t * pIfLeaf; - if ( pCutBest->nLeaves <= 6 ) + if ( pCutBest->nLeaves <= pIfMan->pPars->nLutDecSize ) { /* add fanins */ If_CutForEachLeaf( pIfMan, pCutBest, pIfLeaf, v ) @@ -456,11 +456,11 @@ Hop_Obj_t * Abc_NodeBuildFromMini( Hop_Man_t * pMan, If_Man_t * p, If_Cut_t * pC } // get the delay profile - unsigned delayProfile = pCutBest->acdDelay; + unsigned delayProfile = pCutBest->decDelay; // perform LUT-decomposition and return the LUT-structure unsigned char decompArray[92]; - int val = acd_decompose( pTruth, pCutBest->nLeaves, 6, &(delayProfile), decompArray ); + int val = acd_decompose( pTruth, pCutBest->nLeaves, pIfMan->pPars->nLutDecSize, &(delayProfile), decompArray ); assert( val == 0 ); // convert the LUT-structure into a set of logic nodes in Abc_Ntk_t @@ -474,7 +474,7 @@ Hop_Obj_t * Abc_NodeBuildFromMini( Hop_Man_t * pMan, If_Man_t * p, If_Cut_t * pC Abc_Obj_t *pNewNodes[5]; /* create intermediate LUTs*/ - assert( decompArray[1] - 1 <= 5 ); + assert( decompArray[1] <= 6 ); Abc_Obj_t * pFanin; for ( i = 0; i < decompArray[1]; ++i ) { @@ -577,14 +577,14 @@ Abc_Obj_t * Abc_NodeFromIf_rec( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Obj_t pNodeNew = Abc_NtkCreateNode( pNtkNew ); // if ( pIfMan->pPars->pLutLib && pIfMan->pPars->pLutLib->fVarPinDelays ) if ( !pIfMan->pPars->fDelayOpt && !pIfMan->pPars->fDelayOptLut && !pIfMan->pPars->fDsdBalance && !pIfMan->pPars->fUseTtPerm && - !pIfMan->pPars->pLutStruct && !pIfMan->pPars->fAcd && !pIfMan->pPars->fUserRecLib && !pIfMan->pPars->fUserSesLib && !pIfMan->pPars->nGateSize ) + !pIfMan->pPars->pLutStruct && !pIfMan->pPars->fUserLutDec && !pIfMan->pPars->fUserRecLib && !pIfMan->pPars->fUserSesLib && !pIfMan->pPars->nGateSize ) If_CutRotatePins( pIfMan, pCutBest ); if ( pIfMan->pPars->fUseCnfs || pIfMan->pPars->fUseMv ) { If_CutForEachLeafReverse( pIfMan, pCutBest, pIfLeaf, i ) Abc_ObjAddFanin( pNodeNew, Abc_NodeFromIf_rec(pNtkNew, pIfMan, pIfLeaf, vCover) ); } - else if ( pIfMan->pPars->fAcd ) + else if ( pIfMan->pPars->fUserLutDec ) { If_CutForEachLeaf( pIfMan, pCutBest, pIfLeaf, i ) Abc_NodeFromIf_rec(pNtkNew, pIfMan, pIfLeaf, vCover); @@ -642,7 +642,7 @@ Abc_Obj_t * Abc_NodeFromIf_rec( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Obj_t extern Hop_Obj_t * Abc_RecToHop3( Hop_Man_t * pMan, If_Man_t * pIfMan, If_Cut_t * pCut, If_Obj_t * pIfObj ); pNodeNew->pData = Abc_RecToHop3( (Hop_Man_t *)pNtkNew->pManFunc, pIfMan, pCutBest, pIfObj ); } - else if ( pIfMan->pPars->fAcd ) + else if ( pIfMan->pPars->fUserLutDec ) { extern void Abc_DecRecordToHop( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Cut_t * pCut, If_Obj_t * pIfObj, Vec_Int_t * vMemory, Abc_Obj_t * pNodeTop ); Abc_DecRecordToHop( pNtkNew, pIfMan, pCutBest, pIfObj, vCover, pNodeNew ); diff --git a/src/map/if/if.h b/src/map/if/if.h index c3ba59be4..cc4d2926b 100644 --- a/src/map/if/if.h +++ b/src/map/if/if.h @@ -113,6 +113,7 @@ struct If_Par_t_ int nStructType; // type of the structure int nAndDelay; // delay of AND-gate in LUT library units int nAndArea; // area of AND-gate in LUT library units + int nLutDecSize; // the LUT size for decomposition int fPreprocess; // preprossing int fArea; // area-oriented mapping int fFancy; // a fancy feature @@ -146,7 +147,7 @@ struct If_Par_t_ int fDeriveLuts; // enables deriving LUT structures int fDoAverage; // optimize average rather than maximum level int fHashMapping; // perform AIG hashing after mapping - int fAcd; // perform AIG hashing after mapping + int fUserLutDec; // perform AIG hashing after mapping int fVerbose; // the verbosity flag int fVerboseTrace; // the verbosity flag char * pLutStruct; // LUT structure @@ -281,7 +282,6 @@ struct If_Man_t_ int pDumpIns[16]; Vec_Str_t * vMarks; Vec_Int_t * vVisited2; - int useLimitAdc; // timing manager Tim_Man_t * pManTim; @@ -305,7 +305,6 @@ struct If_Cut_t_ int iCutFunc; // TT ID of the cut int uMaskFunc; // polarity bitmask unsigned uSign; // cut signature - unsigned acdDelay; // Computed pin delay during ACD unsigned Cost : 12; // the user's cost of the cut (related to IF_COST_MAX) unsigned fCompl : 1; // the complemented attribute unsigned fUser : 1; // using the user's area and delay @@ -313,6 +312,7 @@ struct If_Cut_t_ unsigned fAndCut : 1; // matched with AND gate unsigned nLimit : 8; // the maximum number of leaves unsigned nLeaves : 8; // the number of leaves + unsigned decDelay: 16; // pin-to-pin decomposition delay int pLeaves[0]; }; @@ -570,9 +570,9 @@ extern int If_CutSopBalancePinDelaysInt( Vec_Int_t * vCover, int * p extern int If_CutSopBalancePinDelays( If_Man_t * p, If_Cut_t * pCut, char * pPerm ); extern int If_CutLutBalanceEval( If_Man_t * p, If_Cut_t * pCut ); extern int If_CutLutBalancePinDelays( If_Man_t * p, If_Cut_t * pCut, char * pPerm ); -extern int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay, int fFirst ); -extern int If_AcdReEval( If_Man_t * p, If_Cut_t * pCut ); -extern float If_AcdLeafProp( If_Man_t * p, If_Cut_t * pCut, int i, float required ); +extern int If_LutDecEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay, int fFirst ); +extern int If_LutDecReEval( If_Man_t * p, If_Cut_t * pCut ); +extern float If_LutDecPinRequired( If_Man_t * p, If_Cut_t * pCut, int i, float required ); /*=== ifDsd.c =============================================================*/ extern If_DsdMan_t * If_DsdManAlloc( int nVars, int nLutSize ); extern void If_DsdManAllocIsops( If_DsdMan_t * p, int nLutSize ); diff --git a/src/map/if/ifCore.c b/src/map/if/ifCore.c index ad3c85179..f7fcbca66 100644 --- a/src/map/if/ifCore.c +++ b/src/map/if/ifCore.c @@ -62,7 +62,7 @@ void If_ManSetDefaultPars( If_Par_t * pPars ) pPars->fPower = 0; pPars->fCutMin = 0; pPars->fBidec = 0; - pPars->fAcd = 0; + pPars->fUserLutDec = 0; pPars->fVerbose = 0; } @@ -107,16 +107,10 @@ int If_ManPerformMappingComb( If_Man_t * p ) If_Obj_t * pObj; abctime clkTotal = Abc_Clock(); int i; - p->useLimitAdc = 1; //p->vVisited2 = Vec_IntAlloc( 100 ); //p->vMarks = Vec_StrStart( If_ManObjNum(p) ); - // if ( p->pPars->fAcd ) - // { - // p->pPars->nLutSize = 6; - // } - // set arrival times and fanout estimates If_ManForEachCi( p, pObj, i ) { @@ -128,11 +122,7 @@ int If_ManPerformMappingComb( If_Man_t * p ) if ( p->pPars->fPreprocess && !p->pPars->fArea ) { // map for delay - if ( p->pPars->fAcd ) - p->useLimitAdc = 0; If_ManPerformMappingRound( p, p->pPars->nCutsMax, 0, 1, 1, "Delay" ); - if ( p->pPars->fAcd ) - p->useLimitAdc = 1; // map for delay second option p->pPars->fFancy = 1; @@ -155,33 +145,17 @@ int If_ManPerformMappingComb( If_Man_t * p ) // area flow oriented mapping for ( i = 0; i < p->pPars->nFlowIters; i++ ) { - // if ( p->pPars->fAcd && i == 0 ) - // { - // p->useLimitAdc = 0; - // } If_ManPerformMappingRound( p, p->pPars->nCutsMax, 1, 0, 0, "Flow" ); if ( p->pPars->fExpRed ) If_ManImproveMapping( p ); - // if ( p->pPars->fAcd && i == 0 ) - // { - // p->useLimitAdc = 1; - // } } // area oriented mapping for ( i = 0; i < p->pPars->nAreaIters; i++ ) { - // if ( p->pPars->fAcd && i == 0 ) - // { - // p->useLimitAdc = 0; - // } If_ManPerformMappingRound( p, p->pPars->nCutsMax, 2, 0, 0, "Area" ); if ( p->pPars->fExpRed ) If_ManImproveMapping( p ); - // if ( p->pPars->fAcd && i == 0 ) - // { - // p->useLimitAdc = 1; - // } } if ( p->pPars->fVerbose ) diff --git a/src/map/if/ifCut.c b/src/map/if/ifCut.c index 8d1cccba0..49850d313 100644 --- a/src/map/if/ifCut.c +++ b/src/map/if/ifCut.c @@ -761,7 +761,7 @@ void If_CutSort( If_Man_t * p, If_Set_t * pCutSet, If_Cut_t * pCut ) if ( !pCut->fUseless && (p->pPars->fUseDsd || p->pPars->pFuncCell2 || p->pPars->fUseBat || - p->pPars->pLutStruct || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fAcd || + p->pPars->pLutStruct || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUserLutDec || p->pPars->fEnableCheck07 || p->pPars->fUseCofVars || p->pPars->fUseAndVars || p->pPars->fUse34Spec || p->pPars->fUseDsdTune || p->pPars->fEnableCheck75 || p->pPars->fEnableCheck75u || p->pPars->fUseCheck1 || p->pPars->fUseCheck2) ) { diff --git a/src/map/if/ifDelay.c b/src/map/if/ifDelay.c index 90fab6b00..3514327c1 100644 --- a/src/map/if/ifDelay.c +++ b/src/map/if/ifDelay.c @@ -412,11 +412,11 @@ int If_CutLutBalanceEval( If_Man_t * p, If_Cut_t * pCut ) } } -int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay, int fFirst ) +int If_LutDecEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay, int fFirst ) { pCut->fUser = 1; pCut->Cost = pCut->nLeaves > 1 ? 1 : 0; - pCut->acdDelay = 0; + pCut->decDelay = 0; if ( pCut->nLeaves == 0 ) // const { assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 0 ); @@ -428,7 +428,7 @@ int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay, in return (int)If_ObjCutBest(If_CutLeaf(p, pCut, 0))->Delay; } - int LutSize = 6; + int LutSize = p->pPars->nLutDecSize; int i, leaf_delay; int DelayMax = -1, nLeafMax = 0; unsigned uLeafMask = 0; @@ -450,7 +450,7 @@ int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay, in } if ( If_CutLeaveNum(pCut) <= LutSize ) { - pCut->acdDelay = ( 1 << LutSize ) - 1; + pCut->decDelay = ( 1 << LutSize ) - 1; return DelayMax + 1; } @@ -473,7 +473,7 @@ int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay, in } /* Too many late-arriving signals */ - if ( nLeafMax > LutSize / 2 ) + if ( nLeafMax == LutSize ) { if ( use_late_arrival ) { @@ -493,7 +493,7 @@ int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay, in int val = acd_evaluate( pTruth, pCut->nLeaves, LutSize, &uLeafMask, &cost, !use_late_arrival ); /* not feasible decomposition */ - pCut->acdDelay = uLeafMask; + pCut->decDelay = uLeafMask; if ( val < 0 ) { pCut->Cost = IF_COST_MAX; @@ -505,7 +505,7 @@ int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay, in return DelayMax + val; } -int If_AcdReEval( If_Man_t * p, If_Cut_t * pCut ) +int If_LutDecReEval( If_Man_t * p, If_Cut_t * pCut ) { // pCut->fUser = 1; @@ -526,14 +526,14 @@ int If_AcdReEval( If_Man_t * p, If_Cut_t * pCut ) for ( i = 0; i < If_CutLeaveNum(pCut); i++ ) { leaf_delay = If_ObjCutBest(If_CutLeaf(p, pCut, i))->Delay; - leaf_delay += ( ( pCut->acdDelay >> i ) & 1 ) == 0 ? 2 : 1; + leaf_delay += ( ( pCut->decDelay >> i ) & 1 ) == 0 ? 2 : 1; DelayMax = Abc_MaxInt( leaf_delay, DelayMax ); } return DelayMax; } -float If_AcdLeafProp( If_Man_t * p, If_Cut_t * pCut, int i, float required ) +float If_LutDecPinRequired( If_Man_t * p, If_Cut_t * pCut, int i, float required ) { if ( pCut->nLeaves == 0 ) // const { @@ -546,7 +546,7 @@ float If_AcdLeafProp( If_Man_t * p, If_Cut_t * pCut, int i, float required ) return 0; } - return ( ( pCut->acdDelay >> i ) & 1 ) == 0 ? 2 : 1; + return ( ( pCut->decDelay >> i ) & 1 ) == 0 ? 2 : 1; } /* diff --git a/src/map/if/ifMap.c b/src/map/if/ifMap.c index 69f2ead81..bdd3ae439 100644 --- a/src/map/if/ifMap.c +++ b/src/map/if/ifMap.c @@ -166,7 +166,7 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep If_Cut_t * pCut0R, * pCut1R; int fFunc0R, fFunc1R; int i, k, v, iCutDsd, fChange; - int fSave0 = p->pPars->fDelayOpt || p->pPars->fDelayOptLut || p->pPars->fDsdBalance || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fAcd || + int fSave0 = p->pPars->fDelayOpt || p->pPars->fDelayOptLut || p->pPars->fDsdBalance || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUserLutDec || p->pPars->fUseDsdTune || p->pPars->fUseCofVars || p->pPars->fUseAndVars || p->pPars->fUse34Spec || p->pPars->pLutStruct || p->pPars->pFuncCell2 || p->pPars->fUseCheck1 || p->pPars->fUseCheck2; int fUseAndCut = (p->pPars->nAndDelay > 0) || (p->pPars->nAndArea > 0); assert( !If_ObjIsAnd(pObj->pFanin0) || pObj->pFanin0->pCutSet->nCuts > 0 ); @@ -208,9 +208,9 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep pCut->fUseless = 1; } } - else if ( p->pPars->fAcd ) + else if ( p->pPars->fUserLutDec ) { - pCut->Delay = If_AcdReEval( p, pCut ); + pCut->Delay = If_LutDecReEval( p, pCut ); } else if ( p->pPars->fDelayOptLut ) pCut->Delay = If_CutLutBalanceEval( p, pCut ); @@ -268,7 +268,7 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep if ( !If_CutMergeOrdered( p, pCut0, pCut1, pCut ) ) continue; } - if ( p->pPars->fAcd && p->useLimitAdc && pCut->nLeaves > 6 ) + if ( p->pPars->fUserLutDec && !fFirst && pCut->nLeaves > p->pPars->nLutDecSize ) continue; if ( pObj->fSpec && pCut->nLeaves == (unsigned)p->pPars->nLutSize ) continue; @@ -429,9 +429,9 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep pCut->Delay = If_CutDsdBalanceEval( p, pCut, NULL ); else if ( p->pPars->fUserRecLib ) pCut->Delay = If_CutDelayRecCost3( p, pCut, pObj ); - else if ( p->pPars->fAcd ) + else if ( p->pPars->fUserLutDec ) { - pCut->Delay = If_AcdEval( p, pCut, pObj, Mode == 0, fFirst ); + pCut->Delay = If_LutDecEval( p, pCut, pObj, Mode == 0, fFirst ); pCut->fUseless = pCut->Delay == ABC_INFINITY; } else if ( p->pPars->fUserSesLib ) @@ -518,7 +518,7 @@ void If_ObjPerformMappingChoice( If_Man_t * p, If_Obj_t * pObj, int Mode, int fP If_Set_t * pCutSet; If_Obj_t * pTemp; If_Cut_t * pCutTemp, * pCut; - int i, fSave0 = p->pPars->fDelayOpt || p->pPars->fDelayOptLut || p->pPars->fDsdBalance || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUse34Spec || p->pPars->fAcd; + int i, fSave0 = p->pPars->fDelayOpt || p->pPars->fDelayOptLut || p->pPars->fDsdBalance || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUse34Spec || p->pPars->fUserLutDec; assert( pObj->pEquiv != NULL ); // prepare diff --git a/src/map/if/ifTime.c b/src/map/if/ifTime.c index 9bce5bc43..f20842384 100644 --- a/src/map/if/ifTime.c +++ b/src/map/if/ifTime.c @@ -211,11 +211,11 @@ void If_CutPropagateRequired( If_Man_t * p, If_Obj_t * pObj, If_Cut_t * pCut, fl pLeaf->Required = IF_MIN( pLeaf->Required, Required - pLutDelays[0] ); } } - else if ( p->pPars->fAcd ) + else if ( p->pPars->fUserLutDec ) { Required = ObjRequired; If_CutForEachLeaf( p, pCut, pLeaf, i ) - pLeaf->Required = IF_MIN( pLeaf->Required, Required - If_AcdLeafProp( p, pCut, i, ObjRequired ) ); + pLeaf->Required = IF_MIN( pLeaf->Required, Required - If_LutDecPinRequired( p, pCut, i, ObjRequired ) ); } else { From 7dcc10a254e780ca7c8e6047f0cc03b7717a148a Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 10 Jan 2024 15:18:39 +0100 Subject: [PATCH 21/24] Minor fixes --- src/acd/ac_decomposition.hpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp index 2b3161476..64155b713 100644 --- a/src/acd/ac_decomposition.hpp +++ b/src/acd/ac_decomposition.hpp @@ -174,7 +174,7 @@ public: return; generate_decomposition(); - return get_decomposition_abc( decompArray ); + get_decomposition_abc( decompArray ); } private: @@ -264,9 +264,12 @@ private: return false; /* estimation on number of LUTs */ - pst->num_luts = best_multiplicity <= 2 ? 2 : best_multiplicity <= 4 ? 3 - : best_multiplicity <= 8 ? 4 - : 5; + if ( pst ) + { + pst->num_luts = best_multiplicity <= 2 ? 2 : best_multiplicity <= 4 ? 3 + : best_multiplicity <= 8 ? 4 + : 5; + } return true; } @@ -799,7 +802,7 @@ private: best_iset_offset.push_back( offset ); } - if ( pst != nullptr ) + if ( pst ) { pst->num_luts = num_luts; pst->num_levels = num_levels; @@ -868,7 +871,7 @@ private: best_iset_offset.push_back( offset ); } - if ( pst != nullptr ) + if ( pst ) { pst->num_luts = num_luts; pst->num_levels = num_levels; From 38e632a954a61e02e3397e02d42142ef86fdd9cb Mon Sep 17 00:00:00 2001 From: aletempiac Date: Fri, 12 Jan 2024 14:50:34 +0100 Subject: [PATCH 22/24] Consider buffers in matrix covering as free --- src/acd/ac_decomposition.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp index 64155b713..8ba2fb20e 100644 --- a/src/acd/ac_decomposition.hpp +++ b/src/acd/ac_decomposition.hpp @@ -945,6 +945,10 @@ private: /* discard solutions with support over LUT size */ if ( cost > ps.lut_size ) continue; + + /* buffers have zero cost */ + if ( cost == 1 ) + cost = 0; float sort_cost = 0; if constexpr ( UseHeuristic ) From 67aab70cff96346e825ad351e26c69ff88948f50 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Tue, 16 Jan 2024 17:42:43 +0100 Subject: [PATCH 23/24] Moving ACD package to if folder --- Makefile | 5 ++--- src/acd/module.make | 1 - src/{ => map/if}/acd/ac_decomposition.hpp | 0 src/{ => map/if}/acd/ac_wrapper.cpp | 0 src/{ => map/if}/acd/ac_wrapper.h | 0 src/{ => map/if}/acd/kitty_algorithm.hpp | 0 src/{ => map/if}/acd/kitty_constants.hpp | 0 src/{ => map/if}/acd/kitty_constructors.hpp | 0 src/{ => map/if}/acd/kitty_dynamic_tt.hpp | 0 src/{ => map/if}/acd/kitty_operations.hpp | 0 src/{ => map/if}/acd/kitty_operators.hpp | 0 src/{ => map/if}/acd/kitty_static_tt.hpp | 0 src/map/if/acd/module.make | 1 + src/map/if/if.h | 2 +- 14 files changed, 4 insertions(+), 5 deletions(-) delete mode 100644 src/acd/module.make rename src/{ => map/if}/acd/ac_decomposition.hpp (100%) rename src/{ => map/if}/acd/ac_wrapper.cpp (100%) rename src/{ => map/if}/acd/ac_wrapper.h (100%) rename src/{ => map/if}/acd/kitty_algorithm.hpp (100%) rename src/{ => map/if}/acd/kitty_constants.hpp (100%) rename src/{ => map/if}/acd/kitty_constructors.hpp (100%) rename src/{ => map/if}/acd/kitty_dynamic_tt.hpp (100%) rename src/{ => map/if}/acd/kitty_operations.hpp (100%) rename src/{ => map/if}/acd/kitty_operators.hpp (100%) rename src/{ => map/if}/acd/kitty_static_tt.hpp (100%) create mode 100644 src/map/if/acd/module.make diff --git a/Makefile b/Makefile index d770c3faf..f7f4fda6f 100644 --- a/Makefile +++ b/Makefile @@ -17,10 +17,9 @@ OS := $(shell uname -s) MODULES := \ $(wildcard src/ext*) \ - src/acd \ src/base/abc src/base/abci src/base/cmd src/base/io src/base/main src/base/exor \ src/base/ver src/base/wlc src/base/wln src/base/acb src/base/bac src/base/cba src/base/pla src/base/test \ - src/map/mapper src/map/mio src/map/super src/map/if \ + src/map/mapper src/map/mio src/map/super src/map/if src/map/if/acd \ src/map/amap src/map/cov src/map/scl src/map/mpm \ src/misc/extra src/misc/mvc src/misc/st src/misc/util src/misc/nm \ src/misc/vec src/misc/hash src/misc/tim src/misc/bzlib src/misc/zlib \ @@ -55,7 +54,7 @@ endif ARCHFLAGS := $(ARCHFLAGS) -OPTFLAGS ?= -g -O +OPTFLAGS ?= -g -O3 CFLAGS += -std=c17 -Wall -Wno-unused-function -Wno-write-strings -Wno-sign-compare $(ARCHFLAGS) ifneq ($(findstring arm,$(shell uname -m)),) diff --git a/src/acd/module.make b/src/acd/module.make deleted file mode 100644 index b245d2c42..000000000 --- a/src/acd/module.make +++ /dev/null @@ -1 +0,0 @@ -SRC += src/acd/ac_wrapper.cpp diff --git a/src/acd/ac_decomposition.hpp b/src/map/if/acd/ac_decomposition.hpp similarity index 100% rename from src/acd/ac_decomposition.hpp rename to src/map/if/acd/ac_decomposition.hpp diff --git a/src/acd/ac_wrapper.cpp b/src/map/if/acd/ac_wrapper.cpp similarity index 100% rename from src/acd/ac_wrapper.cpp rename to src/map/if/acd/ac_wrapper.cpp diff --git a/src/acd/ac_wrapper.h b/src/map/if/acd/ac_wrapper.h similarity index 100% rename from src/acd/ac_wrapper.h rename to src/map/if/acd/ac_wrapper.h diff --git a/src/acd/kitty_algorithm.hpp b/src/map/if/acd/kitty_algorithm.hpp similarity index 100% rename from src/acd/kitty_algorithm.hpp rename to src/map/if/acd/kitty_algorithm.hpp diff --git a/src/acd/kitty_constants.hpp b/src/map/if/acd/kitty_constants.hpp similarity index 100% rename from src/acd/kitty_constants.hpp rename to src/map/if/acd/kitty_constants.hpp diff --git a/src/acd/kitty_constructors.hpp b/src/map/if/acd/kitty_constructors.hpp similarity index 100% rename from src/acd/kitty_constructors.hpp rename to src/map/if/acd/kitty_constructors.hpp diff --git a/src/acd/kitty_dynamic_tt.hpp b/src/map/if/acd/kitty_dynamic_tt.hpp similarity index 100% rename from src/acd/kitty_dynamic_tt.hpp rename to src/map/if/acd/kitty_dynamic_tt.hpp diff --git a/src/acd/kitty_operations.hpp b/src/map/if/acd/kitty_operations.hpp similarity index 100% rename from src/acd/kitty_operations.hpp rename to src/map/if/acd/kitty_operations.hpp diff --git a/src/acd/kitty_operators.hpp b/src/map/if/acd/kitty_operators.hpp similarity index 100% rename from src/acd/kitty_operators.hpp rename to src/map/if/acd/kitty_operators.hpp diff --git a/src/acd/kitty_static_tt.hpp b/src/map/if/acd/kitty_static_tt.hpp similarity index 100% rename from src/acd/kitty_static_tt.hpp rename to src/map/if/acd/kitty_static_tt.hpp diff --git a/src/map/if/acd/module.make b/src/map/if/acd/module.make new file mode 100644 index 000000000..33c59e830 --- /dev/null +++ b/src/map/if/acd/module.make @@ -0,0 +1 @@ +SRC += src/map/if/acd/ac_wrapper.cpp diff --git a/src/map/if/if.h b/src/map/if/if.h index cc4d2926b..f8c99fdf1 100644 --- a/src/map/if/if.h +++ b/src/map/if/if.h @@ -40,7 +40,7 @@ #include "opt/dau/dau.h" #include "misc/vec/vecHash.h" #include "misc/vec/vecWec.h" -#include "acd/ac_wrapper.h" +#include "map/if/acd/ac_wrapper.h" ABC_NAMESPACE_HEADER_START From 5a00bbaa8f930f9653fd24fabd948e78bb5784fb Mon Sep 17 00:00:00 2001 From: aletempiac Date: Tue, 16 Jan 2024 18:13:30 +0100 Subject: [PATCH 24/24] Cleaning Makefile --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index f7f4fda6f..0cc979b75 100644 --- a/Makefile +++ b/Makefile @@ -54,9 +54,9 @@ endif ARCHFLAGS := $(ARCHFLAGS) -OPTFLAGS ?= -g -O3 +OPTFLAGS ?= -g -O -CFLAGS += -std=c17 -Wall -Wno-unused-function -Wno-write-strings -Wno-sign-compare $(ARCHFLAGS) +CFLAGS += -Wall -Wno-unused-function -Wno-write-strings -Wno-sign-compare $(ARCHFLAGS) ifneq ($(findstring arm,$(shell uname -m)),) CFLAGS += -DABC_MEMALIGN=4 endif