diff --git a/Makefile b/Makefile index 0cc979b75..3c81251cf 100644 --- a/Makefile +++ b/Makefile @@ -151,7 +151,7 @@ ifdef ABC_USE_LIBSTDCXX endif $(info $(MSG_PREFIX)Using CFLAGS=$(CFLAGS)) -CXXFLAGS += $(CFLAGS) -std=c++17 +CXXFLAGS += $(CFLAGS) -std=c++11 SRC := GARBAGE := core core.* *.stackdump ./tags $(PROG) arch_flags diff --git a/src/base/abci/abc.c b/src/base/abci/abc.c index 7edc78e7e..b9535f423 100644 --- a/src/base/abci/abc.c +++ b/src/base/abci/abc.c @@ -19514,7 +19514,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) If_ManSetDefaultPars( pPars ); pPars->pLutLib = (If_LibLut_t *)Abc_FrameReadLibLut(); Extra_UtilGetoptReset(); - while ( ( c = Extra_UtilGetopt( argc, argv, "KCFAGRNTXYZDEWSqaflepmrsdbgxyzuojiktncvh" ) ) != EOF ) + while ( ( c = Extra_UtilGetopt( argc, argv, "KCFAGRNTXYZDEWSJqaflepmrsdbgxyuojiktncvh" ) ) != EOF ) { switch ( c ) { @@ -19637,6 +19637,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) goto usage; } pPars->nLutDecSize = atoi(argv[globalUtilOptind]); + pPars->fUserLutDec = 1; globalUtilOptind++; if ( pPars->nLutDecSize < 3 || pPars->nLutDecSize > 6 ) goto usage; @@ -19688,6 +19689,21 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) goto usage; } break; + case 'J': + if ( globalUtilOptind >= argc ) + { + Abc_Print( -1, "Command line switch \"-J\" should be followed by string.\n" ); + goto usage; + } + pPars->pLutStruct = argv[globalUtilOptind]; + pPars->fEnableStructN = 1; + globalUtilOptind++; + if ( strlen(pPars->pLutStruct) != 2 && strlen(pPars->pLutStruct) != 3 ) + { + Abc_Print( -1, "Command line switch \"-J\" should be followed by a 2- or 3-char string (e.g. \"66\" or \"666\").\n" ); + goto usage; + } + break; case 'q': pPars->fPreprocess ^= 1; break; @@ -19730,9 +19746,6 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) case 'y': pPars->fUserRecLib ^= 1; break; - case 'z': - pPars->fUserLutDec ^= 1; - break; case 'u': pPars->fUserSesLib ^= 1; break; @@ -19868,7 +19881,14 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) Abc_Print( -1, "This feature only works for [6;16]-LUTs.\n" ); return 1; } - pPars->pFuncCell = pPars->fDelayOptLut ? NULL : If_CutPerformCheck16; + if ( pPars->fEnableStructN ) + { + pPars->pFuncCell = pPars->fDelayOptLut ? NULL : If_CutPerformCheck66; + } + else + { + pPars->pFuncCell = pPars->fDelayOptLut ? NULL : If_CutPerformCheck16; + } pPars->fCutMin = 1; } @@ -19884,9 +19904,9 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) Abc_Print( -1, "LUT size (%d) must be greater than the LUT decomposition size (%d).\n", pPars->nLutSize, pPars->nLutDecSize ); return 1; } - if ( pPars->nLutSize < 4 || pPars->nLutSize > 10 ) + if ( pPars->nLutSize < 4 || pPars->nLutSize > 11 ) { - Abc_Print( -1, "This feature only works for [4;10]-LUTs.\n" ); + Abc_Print( -1, "This feature only works for [4;11]-LUTs.\n" ); return 1; } } @@ -20053,7 +20073,7 @@ usage: sprintf(LutSize, "library" ); else sprintf(LutSize, "%d", pPars->nLutSize ); - Abc_Print( -2, "usage: if [-KCFAGRNTXYZ num] [-DEW float] [-S str] [-qarlepmsdbgxyzuojiktncvh]\n" ); + Abc_Print( -2, "usage: if [-KCFAGRNTXYZ num] [-DEW float] [-S str] [-qarlepmsdbgxyuojiktncvh]\n" ); Abc_Print( -2, "\t performs FPGA technology mapping of the network\n" ); Abc_Print( -2, "\t-K num : the number of LUT inputs (2 < num < %d) [default = %s]\n", IF_MAX_LUTSIZE+1, LutSize ); Abc_Print( -2, "\t-C num : the max number of priority cuts (0 < num < 2^12) [default = %d]\n", pPars->nCutsMax ); @@ -20065,11 +20085,12 @@ usage: Abc_Print( -2, "\t-T num : the type of LUT structures [default = any]\n" ); Abc_Print( -2, "\t-X num : delay of AND-gate in LUT library units [default = %d]\n", pPars->nAndDelay ); Abc_Print( -2, "\t-Y num : area of AND-gate in LUT library units [default = %d]\n", pPars->nAndArea ); - Abc_Print( -2, "\t-Z num : the number of LUT inputs for LUT decomposition [default = %d]\n", pPars->nLutDecSize ); + Abc_Print( -2, "\t-Z num : the number of LUT inputs for delay-driven LUT decomposition [default = not used]\n" ); Abc_Print( -2, "\t-D float : sets the delay constraint for the mapping [default = %s]\n", Buffer ); Abc_Print( -2, "\t-E float : sets epsilon used for tie-breaking [default = %f]\n", pPars->Epsilon ); Abc_Print( -2, "\t-W float : sets wire delay between adjects LUTs [default = %f]\n", pPars->WireDelay ); Abc_Print( -2, "\t-S str : string representing the LUT structure [default = %s]\n", pPars->pLutStruct ? pPars->pLutStruct : "not used" ); + Abc_Print( -2, "\t-J str : string representing the LUT structure (new method) [default = %s]\n", pPars->pLutStruct ? pPars->pLutStruct : "not used" ); Abc_Print( -2, "\t-q : toggles preprocessing using several starting points [default = %s]\n", pPars->fPreprocess? "yes": "no" ); Abc_Print( -2, "\t-a : toggles area-oriented mapping [default = %s]\n", pPars->fArea? "yes": "no" ); Abc_Print( -2, "\t-r : enables expansion/reduction of the best cuts [default = %s]\n", pPars->fExpRed? "yes": "no" ); @@ -20083,7 +20104,6 @@ usage: Abc_Print( -2, "\t-g : toggles delay optimization by SOP balancing [default = %s]\n", pPars->fDelayOpt? "yes": "no" ); Abc_Print( -2, "\t-x : toggles delay optimization by DSD balancing [default = %s]\n", pPars->fDsdBalance? "yes": "no" ); Abc_Print( -2, "\t-y : toggles delay optimization with recorded library [default = %s]\n", pPars->fUserRecLib? "yes": "no" ); - Abc_Print( -2, "\t-z : toggles delay optimization with LUT decomposition [default = %s]\n", pPars->fUserLutDec? "yes": "no" ); Abc_Print( -2, "\t-u : toggles delay optimization with SAT-based library [default = %s]\n", pPars->fUserSesLib? "yes": "no" ); Abc_Print( -2, "\t-o : toggles using buffers to decouple combinational outputs [default = %s]\n", pPars->fUseBuffs? "yes": "no" ); Abc_Print( -2, "\t-j : toggles enabling additional check [default = %s]\n", pPars->fEnableCheck07? "yes": "no" ); diff --git a/src/map/if/acd/ac_decomposition.hpp b/src/map/if/acd/ac_decomposition.hpp index 8aee0266d..d573da2cd 100644 --- a/src/map/if/acd/ac_decomposition.hpp +++ b/src/map/if/acd/ac_decomposition.hpp @@ -58,7 +58,7 @@ struct ac_decomposition_params bool support_reducing_only{ true }; /*! \brief Use the first feasible decomposition found. */ - bool use_first{ true }; + bool use_first{ false }; /*! \brief If decomposition with delay profile fails, try without. */ bool try_no_late_arrival{ false }; @@ -90,7 +90,7 @@ private: }; private: - static constexpr uint32_t max_num_vars = 10; + static constexpr uint32_t max_num_vars = 11; using STT = kitty::static_truth_table; public: @@ -111,6 +111,16 @@ public: uint32_t late_arriving = __builtin_popcount( delay_profile ); + /* relax maximum number of free set variables if a function has more variables */ + if ( num_vars > ps.max_free_set_vars + ps.lut_size ) + { + ps.max_free_set_vars = num_vars - ps.lut_size; + } + if ( late_arriving > ps.max_free_set_vars ) + { + ps.max_free_set_vars = late_arriving; + } + /* return a high cost if too many late arriving variables */ if ( late_arriving > ps.lut_size - 1 || late_arriving > ps.max_free_set_vars ) { @@ -203,9 +213,11 @@ private: [this]( STT const& tt ) { return column_multiplicity5<5u>( tt ); } }; /* find a feasible AC decomposition */ + // for ( uint32_t i = std::min( ps.lut_size - 1, ps.max_free_set_vars); i >= start; --i ) for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i ) { - auto [tt_p, perm, multiplicity] = enumerate_iset_combinations_offset( i, offset, column_multiplicity_fn[i - 1] ); + auto ret_tuple = enumerate_iset_combinations( i, offset, column_multiplicity_fn[i - 1] ); + uint32_t multiplicity = std::get<2>( ret_tuple ); /* additional cost if not support reducing */ uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0; @@ -213,24 +225,26 @@ private: /* check for feasible solution that improves the cost */ if ( multiplicity <= ( 1 << ( ps.lut_size - i ) ) && multiplicity + additional_cost < best_cost && multiplicity <= 16 ) { - best_tt = tt_p; - permutations = perm; + best_tt = std::get<0>( ret_tuple ); + permutations = std::get<1>( ret_tuple ); best_multiplicity = multiplicity; best_cost = multiplicity + additional_cost; best_free_set = i; - if ( ps.use_first ) + if ( !ps.use_first ) { - break; + continue; } } + + break; } if ( best_multiplicity == UINT32_MAX && ( !ps.try_no_late_arrival || late_arriving == 0 ) ) return false; /* try without the delay profile */ - if ( best_multiplicity == UINT32_MAX && ps.try_no_late_arrival ) + if ( best_multiplicity == UINT32_MAX ) { delay_profile = 0; if ( ps.support_reducing_only ) @@ -240,7 +254,8 @@ private: for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i ) { - auto [tt_p, perm, multiplicity] = enumerate_iset_combinations_offset( i, 0, column_multiplicity_fn[i - 1] ); + auto ret_tuple = enumerate_iset_combinations( i, 0, column_multiplicity_fn[i - 1] ); + uint32_t multiplicity = std::get<2>( ret_tuple ); /* additional cost if not support reducing */ uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0; @@ -248,17 +263,19 @@ private: /* check for feasible solution that improves the cost */ if ( multiplicity <= ( 1 << ( ps.lut_size - i ) ) && multiplicity + additional_cost < best_cost && multiplicity <= 16 ) { - best_tt = tt_p; - permutations = perm; + best_tt = std::get<0>( ret_tuple ); + permutations = std::get<1>( ret_tuple ); best_multiplicity = multiplicity; best_cost = multiplicity + additional_cost; best_free_set = i; - if ( ps.use_first ) + if ( !ps.use_first ) { - break; + continue; } } + + break; } } @@ -285,11 +302,11 @@ private: best_tt._bits[i] = ptt[i]; } - local_extend_to( best_tt, num_vars ); + // local_extend_to( best_tt, num_vars ); } template - uint32_t column_multiplicity( STT tt ) + uint32_t column_multiplicity( STT const& tt ) { uint64_t multiplicity_set[4] = { 0u, 0u, 0u, 0u }; uint32_t multiplicity = 0; @@ -298,23 +315,22 @@ private: uint64_t constexpr masks_idx[] = { 0x0, 0x0, 0x0, 0x3 }; /* supports up to 64 values of free set (256 for |FS| == 3)*/ - static_assert( free_set_size <= 3 ); + static_assert( free_set_size <= 3, "Wrong free set size for method used, expected le 3" ); /* extract iset functions */ - auto it = std::begin( tt ); for ( auto i = 0u; i < num_blocks; ++i ) { + uint64_t cof = tt._bits[i]; for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) { - multiplicity_set[( *it >> 6 ) & masks_idx[free_set_size]] |= UINT64_C( 1 ) << ( *it & masks_bits[free_set_size] ); - *it >>= ( 1u << free_set_size ); + multiplicity_set[( cof >> 6 ) & masks_idx[free_set_size]] |= UINT64_C( 1 ) << ( cof & masks_bits[free_set_size] ); + cof >>= ( 1u << free_set_size ); } - ++it; } multiplicity = __builtin_popcountl( multiplicity_set[0] ); - if constexpr ( free_set_size == 3 ) + if ( free_set_size == 3 ) { multiplicity += __builtin_popcountl( multiplicity_set[1] ); multiplicity += __builtin_popcountl( multiplicity_set[2] ); @@ -325,32 +341,31 @@ private: } template - uint32_t column_multiplicity5( STT tt ) + uint32_t column_multiplicity5( STT const& tt ) { uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF }; - static_assert( free_set_size == 5 || free_set_size == 4 ); + static_assert( free_set_size == 5 || free_set_size == 4, "Wrong free set size for method used, expected of 4 or 5" ); uint32_t size = 0; uint64_t prev = -1; std::array multiplicity_set; /* extract iset functions */ - auto it = std::begin( tt ); for ( auto i = 0u; i < num_blocks; ++i ) { + uint64_t cof = tt._bits[i]; for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) { - uint64_t fs_fn = *it & masks[free_set_size]; + uint64_t fs_fn = cof & masks[free_set_size]; if ( fs_fn != prev ) { multiplicity_set[size++] = static_cast( fs_fn ); prev = fs_fn; } - *it >>= ( 1u << free_set_size ); + cof >>= ( 1u << free_set_size ); } - ++it; } std::sort( multiplicity_set.begin(), multiplicity_set.begin() + size ); @@ -365,6 +380,40 @@ private: return multiplicity; } + uint32_t column_multiplicity2( STT const& tt, uint32_t free_set_size ) + { + assert( free_set_size <= 5 ); + + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << free_set_size; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t cofactors[4]; + uint32_t size = 0; + + /* extract iset functions */ + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t sub = tt._bits[i]; + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + { + uint32_t fs_fn = static_cast( sub & mask ); + uint32_t k; + for ( k = 0; k < size; ++k ) + { + if ( fs_fn == cofactors[k] ) + break; + } + if ( k == 2 ) + return 3; + if ( k == size ) + cofactors[size++] = fs_fn; + sub >>= shift; + } + } + + return size; + } + inline bool combinations_offset_next( uint32_t k, uint32_t offset, uint32_t* pComb, uint32_t* pInvPerm, STT& tt ) { uint32_t i; @@ -380,7 +429,7 @@ private: uint32_t pos_new = pInvPerm[var_old + 1]; std::swap( pInvPerm[var_old + 1], pInvPerm[var_old] ); std::swap( pComb[i], pComb[pos_new] ); - kitty::swap_inplace( tt, i, pos_new ); + swap_inplace_local( tt, i, pos_new ); for ( uint32_t j = i + 1; j < k; j++ ) { @@ -388,20 +437,20 @@ private: pos_new = pInvPerm[pComb[j - 1] + 1]; std::swap( pInvPerm[pComb[j - 1] + 1], pInvPerm[var_old] ); std::swap( pComb[j], pComb[pos_new] ); - kitty::swap_inplace( tt, j, pos_new ); + swap_inplace_local( tt, j, pos_new ); } return true; } template - std::tuple, uint32_t> enumerate_iset_combinations_offset( uint32_t free_set_size, uint32_t offset, Fn&& fn ) + std::tuple, uint32_t> enumerate_iset_combinations( uint32_t free_set_size, uint32_t offset, Fn&& fn ) { STT tt = best_tt; /* TT with best cost */ - STT best_tt = tt; - uint32_t best_cost = UINT32_MAX; + STT local_best_tt = tt; + uint32_t best_cost = ( 1 << ( ps.lut_size - free_set_size ) ) + 1; assert( free_set_size >= offset ); @@ -415,6 +464,12 @@ private: /* works up to 16 input truth tables */ assert( num_vars <= 16 ); + /* Search for column multiplicity of 2 */ + if ( free_set_size == ps.lut_size - 1 ) + { + return enumerate_iset_combinations2( free_set_size, offset ); + } + /* init combinations */ uint32_t pComb[16], pInvPerm[16], bestPerm[16]; for ( uint32_t i = 0; i < num_vars; ++i ) @@ -428,7 +483,7 @@ private: uint32_t cost = fn( tt ); if ( cost < best_cost ) { - best_tt = tt; + local_best_tt = tt; best_cost = cost; for ( uint32_t i = 0; i < num_vars; ++i ) { @@ -438,12 +493,56 @@ private: } while ( combinations_offset_next( free_set_size, offset, pComb, pInvPerm, tt ) ); std::array res_perm; + + if ( best_cost > ( 1 << ( ps.lut_size - free_set_size ) ) ) + { + return std::make_tuple( local_best_tt, res_perm, UINT32_MAX ); + } + for ( uint32_t i = 0; i < num_vars; ++i ) { res_perm[i] = permutations[bestPerm[i]]; } - return std::make_tuple( best_tt, res_perm, best_cost ); + return std::make_tuple( local_best_tt, res_perm, best_cost ); + } + + inline std::tuple, uint32_t> enumerate_iset_combinations2( uint32_t free_set_size, uint32_t offset ) + { + STT tt = best_tt; + + /* TT with best cost */ + STT local_best_tt = tt; + uint32_t best_cost = ( 1 << ( ps.lut_size - free_set_size ) ) + 1; + + assert( free_set_size >= offset ); + + /* init combinations */ + uint32_t pComb[16], pInvPerm[16]; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pComb[i] = pInvPerm[i] = i; + } + + /* enumerate combinations */ + std::array res_perm; + + do + { + uint32_t cost = column_multiplicity2( tt, free_set_size ); + if ( cost <= 2 ) + { + local_best_tt = tt; + best_cost = cost; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + res_perm[i] = permutations[pComb[i]]; + } + return std::make_tuple( local_best_tt, res_perm, best_cost ); + } + } while ( combinations_offset_next( free_set_size, offset, pComb, pInvPerm, tt ) ); + + return std::make_tuple( local_best_tt, res_perm, UINT32_MAX ); } std::vector compute_isets( bool verbose = false ) @@ -466,7 +565,8 @@ private: { uint64_t val = *it & masks[best_free_set]; - if ( auto el = column_to_iset.find( val ); el != column_to_iset.end() ) + auto el = column_to_iset.find( val ); + if ( el != column_to_iset.end() ) { isets[el->second]._bits[i / ( 1u << best_free_set )] |= UINT64_C( 1 ) << ( j + offset ); } @@ -479,7 +579,7 @@ private: *it >>= ( 1u << best_free_set ); } - offset = ( offset + ( 64 >> best_free_set ) ) % 64; + offset = ( offset + ( 64 >> best_free_set ) ) & 0x3F; ++it; } @@ -650,7 +750,7 @@ private: } std::swap( permutations[i], permutations[k] ); - kitty::swap_inplace( best_tt, i, k ); + swap_inplace_local( best_tt, i, k ); ++k; } } @@ -709,7 +809,7 @@ private: { if ( var == best_multiplicity ) { - if constexpr ( !enable_dcset ) + if ( !enable_dcset ) { /* sets must be equally populated */ if ( __builtin_popcount( onset ) != __builtin_popcount( offset ) ) @@ -725,7 +825,7 @@ private: } /* var in DCSET */ - if constexpr ( enable_dcset ) + if ( enable_dcset ) { generate_support_minimization_encodings_rec( onset, offset, var + 1, count ); } @@ -953,7 +1053,7 @@ private: cost = 0; float sort_cost = 0; - if constexpr ( UseHeuristic ) + if ( UseHeuristic ) { sort_cost = 1.0f / ( __builtin_popcountl( column[0] ) + __builtin_popcountl( column[1] ) ); } @@ -971,15 +1071,15 @@ private: return true; } - if constexpr ( UseHeuristic ) + if ( UseHeuristic ) { - std::sort( matrix.begin(), matrix.end(), [&]( auto const& a, auto const& b ) { + std::sort( matrix.begin(), matrix.end(), [&]( encoding_column const& a, encoding_column const& b ) { return a.cost < b.cost; } ); } else { - std::sort( matrix.begin(), matrix.end(), [&]( auto const& a, auto const& b ) { + std::sort( matrix.begin(), matrix.end(), [&]( encoding_column const& a, encoding_column const& b ) { return a.sort_cost < b.sort_cost; } ); } @@ -1231,6 +1331,66 @@ private: return false; } + void swap_inplace_local( STT& tt, uint8_t var_index1, uint8_t var_index2 ) + { + if ( var_index1 == var_index2 ) + { + return; + } + + if ( var_index1 > var_index2 ) + { + std::swap( var_index1, var_index2 ); + } + + assert( num_vars > 6 ); + const uint32_t num_blocks = 1 << ( num_vars - 6 ); + + if ( var_index2 <= 5 ) + { + const auto& pmask = kitty::detail::ppermutation_masks[var_index1][var_index2]; + const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 ); + std::transform( std::begin( tt._bits ), std::begin( tt._bits ) + num_blocks, std::begin( tt._bits ), + [shift, &pmask]( uint64_t word ) { + return ( word & pmask[0] ) | ( ( word & pmask[1] ) << shift ) | ( ( word & pmask[2] ) >> shift ); + } ); + } + else if ( var_index1 <= 5 ) /* in this case, var_index2 > 5 */ + { + const auto step = 1 << ( var_index2 - 6 ); + const auto shift = 1 << var_index1; + auto it = std::begin( tt._bits ); + while ( it != std::begin( tt._bits ) + num_blocks ) + { + for ( auto i = decltype( step ){ 0 }; i < step; ++i ) + { + const auto low_to_high = ( *( it + i ) & kitty::detail::projections[var_index1] ) >> shift; + const auto high_to_low = ( *( it + i + step ) << shift ) & kitty::detail::projections[var_index1]; + *( it + i ) = ( *( it + i ) & ~kitty::detail::projections[var_index1] ) | high_to_low; + *( it + i + step ) = ( *( it + i + step ) & kitty::detail::projections[var_index1] ) | low_to_high; + } + it += 2 * step; + } + } + else + { + const auto step1 = 1 << ( var_index1 - 6 ); + const auto step2 = 1 << ( var_index2 - 6 ); + auto it = std::begin( tt._bits ); + while ( it != std::begin( tt._bits ) + num_blocks ) + { + for ( auto i = 0; i < step2; i += 2 * step1 ) + { + for ( auto j = 0; j < step1; ++j ) + { + std::swap( *( it + i + j + step1 ), *( it + i + j + step2 ) ); + } + } + it += 2 * step2; + } + } + } + /* Decomposition format for ABC * * The record is an array of unsigned chars where: @@ -1298,7 +1458,7 @@ private: std::vector> support_minimization_encodings; uint32_t num_vars; - ac_decomposition_params const& ps; + ac_decomposition_params ps; ac_decomposition_stats* pst; std::array permutations; }; diff --git a/src/map/if/acd/ac_wrapper.cpp b/src/map/if/acd/ac_wrapper.cpp index fd8015f95..6ee265318 100644 --- a/src/map/if/acd/ac_wrapper.cpp +++ b/src/map/if/acd/ac_wrapper.cpp @@ -18,55 +18,158 @@ #include "ac_wrapper.h" #include "ac_decomposition.hpp" +#include "acd66.hpp" +#include "acd666.hpp" ABC_NAMESPACE_IMPL_START +static constexpr bool use_generic_acd = true; + int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ) { using namespace acd; - ac_decomposition_params ps; - ps.lut_size = lutSize; - ps.try_no_late_arrival = static_cast( try_no_late_arrival ); /* TODO: additional tests */ - ac_decomposition_stats st; - - ac_decomposition_impl acd( nVars, ps, &st ); - int val = acd.run( pTruth, *pdelay ); - - if ( val < 0 ) + if ( use_generic_acd ) { - *pdelay = 0; - return -1; + ac_decomposition_params ps; + ps.lut_size = lutSize; + ps.use_first = false; + ps.try_no_late_arrival = static_cast( try_no_late_arrival ); + ac_decomposition_stats st; + + ac_decomposition_impl acd( nVars, ps, &st ); + int val = acd.run( pTruth, *pdelay ); + + if ( val < 0 ) + { + *pdelay = 0; + return -1; + } + + *pdelay = acd.get_profile(); + *cost = st.num_luts; + + return val; } + else + { + acd66_impl acd( nVars ); + int val = acd.run( pTruth, *pdelay ); - *pdelay = acd.get_profile(); - *cost = st.num_luts; + if ( val == 0 ) + { + *pdelay = 0; + return -1; + } - return val; + *pdelay = acd.get_profile(); + *cost = 2; + + return val; + } } int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ) { using namespace acd; - ac_decomposition_params ps; - ps.lut_size = lutSize; - ac_decomposition_stats st; - - ac_decomposition_impl acd( nVars, ps, &st ); - acd.run( pTruth, *pdelay ); - int val = acd.compute_decomposition(); - - if ( val < 0 ) + if ( use_generic_acd ) { - *pdelay = 0; - return -1; + ac_decomposition_params ps; + ps.lut_size = lutSize; + ps.use_first = true; + ac_decomposition_stats st; + + ac_decomposition_impl acd( nVars, ps, &st ); + acd.run( pTruth, *pdelay ); + int val = acd.compute_decomposition(); + + if ( val < 0 ) + { + *pdelay = 0; + return -1; + } + + *pdelay = acd.get_profile(); + acd.get_decomposition( decomposition ); + return 0; + } + else + { + acd66_impl acd( nVars ); + acd.run( pTruth, *pdelay ); + int val = acd.compute_decomposition(); + + if ( val != 0 ) + { + *pdelay = 0; + return -1; + } + + *pdelay = acd.get_profile(); + + acd.get_decomposition( decomposition ); + return 0; + } +} + +int acd66_evaluate( word * pTruth, unsigned nVars, int compute_decomposition ) +{ + using namespace acd; + + acd66_impl acd( nVars, true, false ); + + if ( acd.run( pTruth ) == 0 ) + return 0; + + if ( !compute_decomposition ) + return 1; + + int val = acd.compute_decomposition(); + if ( val != 0 ) + { + return 0; } - *pdelay = acd.get_profile(); + return 1; +} + +int acd66_decompose( word * pTruth, unsigned nVars, unsigned char *decomposition ) +{ + using namespace acd; + + acd66_impl acd( nVars, true, false ); + acd.run( pTruth ); + + int val = acd.compute_decomposition(); + if ( val != 0 ) + { + return -1; + } acd.get_decomposition( decomposition ); return 0; } +int acd666_evaluate( word * pTruth, unsigned nVars, int compute_decomposition ) +{ + using namespace acd; + + acd666_impl acd( nVars, false ); + + if ( acd.run( pTruth ) == 0 ) + return 0; + + if ( !compute_decomposition ) + return 1; + + int val = acd.compute_decomposition(); + if ( val != 0 ) + { + return 0; + } + + return 1; +} + ABC_NAMESPACE_IMPL_END diff --git a/src/map/if/acd/ac_wrapper.h b/src/map/if/acd/ac_wrapper.h index a384b4404..03f17ed40 100644 --- a/src/map/if/acd/ac_wrapper.h +++ b/src/map/if/acd/ac_wrapper.h @@ -28,6 +28,11 @@ ABC_NAMESPACE_HEADER_START int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ); int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ); +int acd66_evaluate( word * pTruth, unsigned nVars, int compute_decomposition ); +int acd66_decompose( word * pTruth, unsigned nVars, unsigned char *decomposition ); + +int acd666_evaluate( word * pTruth, unsigned nVars, int compute_decomposition ); + ABC_NAMESPACE_HEADER_END #endif \ No newline at end of file diff --git a/src/map/if/acd/acd66.hpp b/src/map/if/acd/acd66.hpp new file mode 100644 index 000000000..5891f7b90 --- /dev/null +++ b/src/map/if/acd/acd66.hpp @@ -0,0 +1,1430 @@ +/**C++File************************************************************** + + FileName [acd66.hpp] + + SystemName [ABC: Logic synthesis and verification system.] + + PackageName [Ashenhurst-Curtis decomposition.] + + Synopsis [Interface with the FPGA mapping package.] + + Author [Alessandro Tempia Calvino] + + Affiliation [EPFL] + + Date [Ver. 1.0. Started - Feb 8, 2024.] + +***********************************************************************/ +/*! + \file acd66.hpp + \brief Ashenhurst-Curtis decomposition for "66" cascade + + \author Alessandro Tempia Calvino +*/ + +#ifndef _ACD66_H_ +#define _ACD66_H_ +#pragma once + +#include +#include +#include +#include +#include + +#include "kitty_constants.hpp" +#include "kitty_constructors.hpp" +#include "kitty_dynamic_tt.hpp" +#include "kitty_operations.hpp" +#include "kitty_operators.hpp" +#include "kitty_static_tt.hpp" + +ABC_NAMESPACE_CXX_HEADER_START + +namespace acd +{ + +class acd66_impl +{ +private: + static constexpr uint32_t max_num_vars = 11; + using STT = kitty::static_truth_table; + using LTT = kitty::static_truth_table<6>; + +public: + explicit acd66_impl( uint32_t const num_vars, bool multiple_shared_set = false, bool const verify = false ) + : num_vars( num_vars ), multiple_ss( multiple_shared_set ), verify( verify ) + { + std::iota( permutations.begin(), permutations.end(), 0 ); + } + + /*! \brief Runs ACD 66 */ + bool run( word* ptt ) + { + assert( num_vars > 6 ); + + /* truth table is too large for the settings */ + if ( num_vars > max_num_vars || num_vars > 11 ) + { + return false; + } + + /* convert to static TT */ + init_truth_table( ptt ); + + /* run ACD trying different bound sets and free sets */ + return find_decomposition(); + } + + /*! \brief Runs ACD 66 */ + int run( word* ptt, unsigned delay_profile ) + { + assert( num_vars > 6 ); + + /* truth table is too large for the settings */ + if ( num_vars > max_num_vars || num_vars > 11 ) + { + return false; + } + + uint32_t late_arriving = __builtin_popcount( delay_profile ); + + /* too many late arriving variables */ + if ( late_arriving > 5 ) + return 0; + + /* convert to static TT */ + init_truth_table( ptt ); + best_tt = start_tt; + + /* permute late arriving variables to be the least significant */ + reposition_late_arriving_variables( delay_profile, late_arriving ); + + /* run ACD trying different bound sets and free sets */ + return find_decomposition_offset( late_arriving ) ? ( delay_profile == 0 ? 2 : 1 ) : 0; + } + + int compute_decomposition() + { + if ( best_multiplicity == UINT32_MAX ) + return -1; + + compute_decomposition_impl(); + + if ( verify && !verify_impl() ) + { + return 1; + } + + return 0; + } + + uint32_t get_num_edges() + { + if ( bs_support_size == UINT32_MAX ) + { + return num_vars + 1 + num_shared_vars; + } + + /* real value after support minimization */ + return bs_support_size + best_free_set + 1 + num_shared_vars; + } + + /* contains a 1 for FS variables */ + unsigned get_profile() + { + unsigned profile = 0; + + if ( best_multiplicity == UINT32_MAX ) + return -1; + + for ( uint32_t i = 0; i < best_free_set; ++i ) + { + profile |= 1 << permutations[i]; + } + + return profile; + } + + void get_decomposition( unsigned char* decompArray ) + { + if ( bs_support_size == UINT32_MAX ) + return; + + get_decomposition_abc( decompArray ); + } + +private: + bool find_decomposition() + { + best_multiplicity = UINT32_MAX; + best_free_set = UINT32_MAX; + + /* use multiple shared set variables */ + if ( multiple_ss ) + { + return find_decomposition_bs_multi_ss( num_vars - 6 ); + } + + uint32_t max_free_set = num_vars == 11 ? 5 : 4; + + /* find ACD "66" for different number of variables in the free set */ + for ( uint32_t i = num_vars - 6; i <= max_free_set; ++i ) + { + if ( find_decomposition_bs( i ) ) + return true; + } + + best_multiplicity = UINT32_MAX; + return false; + } + + bool find_decomposition_offset( uint32_t offset ) + { + best_multiplicity = UINT32_MAX; + best_free_set = UINT32_MAX; + + /* use multiple shared set variables */ + if ( multiple_ss ) + { + return find_decomposition_bs_offset_multi_ss( std::max( num_vars - 6, offset ), offset ); + } + + uint32_t max_free_set = ( num_vars == 11 || offset == 5 ) ? 5 : 4; + + /* find ACD "66" for different number of variables in the free set */ + for ( uint32_t i = std::max( num_vars - 6, offset ); i <= max_free_set; ++i ) + { + if ( find_decomposition_bs_offset( i, offset ) ) + return true; + } + + best_multiplicity = UINT32_MAX; + return false; + } + + void init_truth_table( word* ptt ) + { + uint32_t const num_blocks = ( num_vars <= 6 ) ? 1 : ( 1 << ( num_vars - 6 ) ); + + for ( uint32_t i = 0; i < num_blocks; ++i ) + { + start_tt._bits[i] = ptt[i]; + } + + local_extend_to( start_tt, num_vars ); + } + + uint32_t column_multiplicity( STT const& tt, uint32_t free_set_size ) + { + assert( free_set_size <= 5 ); + + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << free_set_size; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t const limit = free_set_size < 5 ? 4 : 2; + uint32_t cofactors[4]; + uint32_t size = 0; + + /* extract iset functions */ + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t sub = tt._bits[i]; + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + { + uint32_t fs_fn = static_cast( sub & mask ); + uint32_t k; + for ( k = 0; k < size; ++k ) + { + if ( fs_fn == cofactors[k] ) + break; + } + if ( k == limit ) + return 5; + if ( k == size ) + cofactors[size++] = fs_fn; + sub >>= shift; + } + } + + return size; + } + + uint32_t column_multiplicity2( STT const& tt, uint32_t free_set_size, uint32_t const limit ) + { + assert( free_set_size <= 5 ); + + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << free_set_size; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t cofactors[32]; + uint32_t size = 0; + + /* extract iset functions */ + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t sub = tt._bits[i]; + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + { + uint32_t fs_fn = static_cast( sub & mask ); + uint32_t k; + for ( k = 0; k < size; ++k ) + { + if ( fs_fn == cofactors[k] ) + break; + } + if ( k == limit ) + return limit + 1; + if ( k == size ) + cofactors[size++] = fs_fn; + sub >>= shift; + } + } + + return size; + } + + inline bool combinations_next( uint32_t k, uint32_t offset, uint32_t* pComb, uint32_t* pInvPerm, STT& tt ) + { + uint32_t i; + + for ( i = k - 1; pComb[i] == num_vars - k + i; --i ) + { + if ( i == offset ) + return false; + } + + /* move vars */ + uint32_t var_old = pComb[i]; + uint32_t pos_new = pInvPerm[var_old + 1]; + std::swap( pInvPerm[var_old + 1], pInvPerm[var_old] ); + std::swap( pComb[i], pComb[pos_new] ); + swap_inplace_local( tt, i, pos_new ); + + for ( uint32_t j = i + 1; j < k; j++ ) + { + var_old = pComb[j]; + pos_new = pInvPerm[pComb[j - 1] + 1]; + std::swap( pInvPerm[pComb[j - 1] + 1], pInvPerm[var_old] ); + std::swap( pComb[j], pComb[pos_new] ); + swap_inplace_local( tt, j, pos_new ); + } + + return true; + } + + inline bool combinations_next_simple( uint32_t k, uint32_t* pComb, uint32_t* pInvPerm, uint32_t size ) + { + uint32_t i; + + for ( i = k - 1; pComb[i] == size - k + i; --i ) + { + if ( i == 0 ) + return false; + } + + /* move vars */ + uint32_t var_old = pComb[i]; + uint32_t pos_new = pInvPerm[var_old + 1]; + std::swap( pInvPerm[var_old + 1], pInvPerm[var_old] ); + std::swap( pComb[i], pComb[pos_new] ); + + for ( uint32_t j = i + 1; j < k; j++ ) + { + var_old = pComb[j]; + pos_new = pInvPerm[pComb[j - 1] + 1]; + std::swap( pInvPerm[pComb[j - 1] + 1], pInvPerm[var_old] ); + std::swap( pComb[j], pComb[pos_new] ); + } + + return true; + } + + bool find_decomposition_bs( uint32_t free_set_size ) + { + STT tt = start_tt; + + /* works up to 16 input truth tables */ + assert( num_vars <= 16 ); + + /* init combinations */ + uint32_t pComb[16], pInvPerm[16]; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pComb[i] = pInvPerm[i] = i; + } + + /* enumerate combinations */ + best_free_set = free_set_size; + do + { + uint32_t cost = column_multiplicity( tt, free_set_size ); + if ( cost == 2 ) + { + best_tt = tt; + best_multiplicity = cost; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pComb[i]; + } + return true; + } + else if ( cost <= 4 && free_set_size < 5 ) + { + /* look for a shared variable */ + best_multiplicity = cost; + int res = check_shared_set( tt ); + + if ( res >= 0 ) + { + best_tt = tt; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pComb[i]; + } + /* move shared variable as the most significative one */ + swap_inplace_local( best_tt, res, num_vars - 1 ); + std::swap( permutations[res], permutations[num_vars - 1] ); + num_shared_vars = 1; + return true; + } + } + } while ( combinations_next( free_set_size, 0, pComb, pInvPerm, tt ) ); + + return false; + } + + bool find_decomposition_bs_offset( uint32_t free_set_size, uint32_t offset ) + { + STT tt = best_tt; + + /* works up to 16 input truth tables */ + assert( num_vars <= 16 ); + best_free_set = free_set_size; + + /* special case */ + if ( free_set_size == offset ) + { + uint32_t cost = column_multiplicity( tt, free_set_size ); + if ( cost == 2 ) + { + best_tt = tt; + best_multiplicity = cost; + return true; + } + else if ( cost <= 4 && free_set_size < 5 ) + { + /* look for a shared variable */ + best_multiplicity = cost; + int res = check_shared_set( tt ); + + if ( res >= 0 ) + { + best_tt = tt; + /* move shared variable as the most significative one */ + swap_inplace_local( best_tt, res, num_vars - 1 ); + std::swap( permutations[res], permutations[num_vars - 1] ); + num_shared_vars = 1; + return true; + } + } + return false; + } + + /* init combinations */ + uint32_t pComb[16], pInvPerm[16]; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pComb[i] = pInvPerm[i] = i; + } + + /* enumerate combinations */ + do + { + uint32_t cost = column_multiplicity( tt, free_set_size ); + if ( cost == 2 ) + { + best_tt = tt; + best_multiplicity = cost; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pInvPerm[i] = permutations[pComb[i]]; + } + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pInvPerm[i]; + } + return true; + } + else if ( cost <= 4 && free_set_size < 5 ) + { + /* look for a shared variable */ + best_multiplicity = cost; + int res = check_shared_set( tt ); + + if ( res >= 0 ) + { + best_tt = tt; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pInvPerm[i] = permutations[pComb[i]]; + } + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pInvPerm[i]; + } + /* move shared variable as the most significative one */ + swap_inplace_local( best_tt, res, num_vars - 1 ); + std::swap( permutations[res], permutations[num_vars - 1] ); + num_shared_vars = 1; + return true; + } + } + } while ( combinations_next( free_set_size, offset, pComb, pInvPerm, tt ) ); + + return false; + } + + bool find_decomposition_bs_multi_ss( uint32_t free_set_size ) + { + STT tt = start_tt; + + /* works up to 16 input truth tables */ + assert( num_vars <= 16 ); + + /* init combinations */ + uint32_t pComb[16], pInvPerm[16], shared_set[4]; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pComb[i] = pInvPerm[i] = i; + } + + /* enumerate combinations */ + best_free_set = free_set_size; + do + { + uint32_t cost = column_multiplicity2( tt, free_set_size, 1 << ( 6 - free_set_size ) ); + if ( cost <= 2 ) + { + best_tt = tt; + best_multiplicity = cost; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pComb[i]; + } + return true; + } + + uint32_t ss_vars_needed = cost <= 4 ? 1 : cost <= 8 ? 2 + : cost <= 16 ? 3 + : cost <= 32 ? 4 + : 5; + if ( ss_vars_needed + free_set_size < 6 ) + { + /* look for a shared variable */ + best_multiplicity = cost; + int res = check_shared_set_multi( tt, ss_vars_needed, shared_set ); + + if ( res >= 0 ) + { + best_tt = tt; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pComb[i]; + } + /* move shared variables as the most significative ones */ + for ( int32_t i = res - 1; i >= 0; --i ) + { + swap_inplace_local( best_tt, shared_set[i] + best_free_set, num_vars - res + i ); + std::swap( permutations[shared_set[i] + best_free_set], permutations[num_vars - res + i] ); + } + num_shared_vars = res; + return true; + } + } + } while ( combinations_next( free_set_size, 0, pComb, pInvPerm, tt ) ); + + return false; + } + + bool find_decomposition_bs_offset_multi_ss( uint32_t free_set_size, uint32_t offset ) + { + STT tt = best_tt; + + /* works up to 16 input truth tables */ + assert( num_vars <= 16 ); + best_free_set = free_set_size; + uint32_t shared_set[4]; + + /* special case */ + if ( free_set_size == offset ) + { + uint32_t cost = column_multiplicity2( tt, free_set_size, 1 << ( 6 - free_set_size ) ); + if ( cost == 2 ) + { + best_tt = tt; + best_multiplicity = cost; + return true; + } + + uint32_t ss_vars_needed = cost <= 4 ? 1 : cost <= 8 ? 2 + : cost <= 16 ? 3 + : cost <= 32 ? 4 + : 5; + + if ( ss_vars_needed + free_set_size < 6 ) + { + /* look for a shared variable */ + best_multiplicity = cost; + int res = check_shared_set_multi( tt, ss_vars_needed, shared_set ); + + if ( res >= 0 ) + { + best_tt = tt; + /* move shared variables as the most significative ones */ + for ( int32_t i = res - 1; i >= 0; --i ) + { + swap_inplace_local( best_tt, shared_set[i] + best_free_set, num_vars - res + i ); + std::swap( permutations[shared_set[i] + best_free_set], permutations[num_vars - res + i] ); + } + num_shared_vars = res; + return true; + } + } + return false; + } + + /* init combinations */ + uint32_t pComb[16], pInvPerm[16]; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pComb[i] = pInvPerm[i] = i; + } + + /* enumerate combinations */ + do + { + uint32_t cost = column_multiplicity2( tt, free_set_size, 1 << ( 6 - free_set_size ) ); + if ( cost == 2 ) + { + best_tt = tt; + best_multiplicity = cost; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pInvPerm[i] = permutations[pComb[i]]; + } + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pInvPerm[i]; + } + return true; + } + + uint32_t ss_vars_needed = cost <= 4 ? 1 : cost <= 8 ? 2 + : cost <= 16 ? 3 + : cost <= 32 ? 4 + : 5; + + if ( ss_vars_needed + free_set_size < 6 ) + { + /* look for a shared variable */ + best_multiplicity = cost; + int res = check_shared_set_multi( tt, ss_vars_needed, shared_set ); + + if ( res >= 0 ) + { + best_tt = tt; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pInvPerm[i] = permutations[pComb[i]]; + } + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pInvPerm[i]; + } + /* move shared variables as the most significative ones */ + for ( int32_t i = res - 1; i >= 0; --i ) + { + swap_inplace_local( best_tt, shared_set[i] + best_free_set, num_vars - res + i ); + std::swap( permutations[shared_set[i] + best_free_set], permutations[num_vars - res + i] ); + } + num_shared_vars = res; + return true; + } + } + } while ( combinations_next( free_set_size, offset, pComb, pInvPerm, tt ) ); + + return false; + } + + bool check_shared_var( STT const& tt, uint32_t free_set_size, uint32_t shared_var ) + { + assert( free_set_size <= 5 ); + + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << free_set_size; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t cofactors[2][4]; + uint32_t size[2] = { 0, 0 }; + uint32_t shared_var_shift = shared_var - free_set_size; + + /* extract iset functions */ + uint32_t iteration_counter = 0; + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t sub = tt._bits[i]; + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + { + uint32_t fs_fn = static_cast( sub & mask ); + uint32_t p = ( iteration_counter >> shared_var_shift ) & 1; + uint32_t k; + for ( k = 0; k < size[p]; ++k ) + { + if ( fs_fn == cofactors[p][k] ) + break; + } + if ( k == 2 ) + return false; + if ( k == size[p] ) + cofactors[p][size[p]++] = fs_fn; + sub >>= shift; + ++iteration_counter; + } + } + + return true; + } + + inline int check_shared_set( STT const& tt ) + { + /* find one shared set variable */ + for ( uint32_t i = best_free_set; i < num_vars; ++i ) + { + /* check the multiplicity of cofactors */ + if ( check_shared_var( tt, best_free_set, i ) ) + { + return i; + } + } + + return -1; + } + + bool check_shared_var_combined( STT const& tt, uint32_t free_set_size, uint32_t shared_vars[6], uint32_t num_shared_vars ) + { + assert( free_set_size <= 5 ); + assert( num_shared_vars <= 4 ); + + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << free_set_size; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t cofactors[16][2]; + uint32_t size[16] = { 0 }; + + /* extract iset functions */ + uint32_t iteration_counter = 0; + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t sub = tt._bits[i]; + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + { + uint32_t fs_fn = static_cast( sub & mask ); + uint32_t p = 0; + for ( uint32_t k = 0; k < num_shared_vars; ++k ) + { + p += ( ( iteration_counter >> shared_vars[k] ) & 1 ) << k; + } + + uint32_t k; + for ( k = 0; k < size[p]; ++k ) + { + if ( fs_fn == cofactors[p][k] ) + break; + } + if ( k == 2 ) + return false; + if ( k == size[p] ) + cofactors[p][size[p]++] = fs_fn; + sub >>= shift; + ++iteration_counter; + } + } + + return true; + } + + inline int check_shared_set_multi( STT const& tt, uint32_t target_num_ss, uint32_t* res_shared ) + { + /* init combinations */ + uint32_t pComb[6], pInvPerm[6]; + + /* search for a feasible shared set */ + for ( uint32_t i = target_num_ss; i < 6 - best_free_set; ++i ) + { + for ( uint32_t i = 0; i < 6; ++i ) + { + pComb[i] = pInvPerm[i] = i; + } + + do + { + /* check for combined shared set */ + if ( check_shared_var_combined( tt, best_free_set, pComb, i ) ) + { + for ( uint32_t j = 0; j < i; ++j ) + { + res_shared[j] = pComb[j]; + } + /* sort vars */ + std::sort( res_shared, res_shared + i ); + return i; + } + } while ( combinations_next_simple( i, pComb, pInvPerm, num_vars - best_free_set ) ); + } + + return -1; + } + + void compute_decomposition_impl( bool verbose = false ) + { + if ( num_shared_vars > 1 ) + return compute_decomposition_impl_multi_ss( verbose ); + + bool has_shared_set = num_shared_vars > 0; + + /* construct isets involved in multiplicity */ + LTT isets0[2]; + LTT isets1[2]; + + /* construct isets */ + uint32_t offset = 0; + uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << best_free_set; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + + /* limit analysis on 0 cofactor of the shared variable */ + if ( has_shared_set ) + num_blocks >>= 1; + + uint64_t fs_fun[4] = { best_tt._bits[0] & mask, 0, 0, 0 }; + + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t cof = best_tt._bits[i]; + for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) + { + uint64_t val = cof & mask; + if ( val == fs_fun[0] ) + { + isets0[0]._bits |= UINT64_C( 1 ) << ( j + offset ); + } + else + { + isets0[1]._bits |= UINT64_C( 1 ) << ( j + offset ); + fs_fun[1] = val; + } + cof >>= shift; + } + offset = ( offset + ( 64 >> best_free_set ) ) & 0x3F; + } + + /* continue on the 1 cofactor if shared set */ + if ( has_shared_set ) + { + fs_fun[2] = best_tt._bits[num_blocks] & mask; + for ( auto i = num_blocks; i < ( num_blocks << 1 ); ++i ) + { + uint64_t cof = best_tt._bits[i]; + for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) + { + uint64_t val = cof & mask; + if ( val == fs_fun[2] ) + { + isets1[0]._bits |= UINT64_C( 1 ) << ( j + offset ); + } + else + { + isets1[1]._bits |= UINT64_C( 1 ) << ( j + offset ); + fs_fun[3] = val; + } + cof >>= shift; + } + offset = ( offset + ( 64 >> best_free_set ) ) & 0x3F; + } + } + + /* find the support minimizing combination with shared set */ + compute_functions( isets0, isets1, fs_fun ); + + /* print functions */ + if ( verbose ) + { + LTT f; + f._bits = dec_funcs[0]; + std::cout << "BS function : "; + kitty::print_hex( f ); + std::cout << "\n"; + f._bits = dec_funcs[1]; + std::cout << "Composition function: "; + kitty::print_hex( f ); + std::cout << "\n"; + } + } + + void compute_decomposition_impl_multi_ss( bool verbose = false ) + { + /* due to the high multiplicity value this method does not perform support minimization */ + + /* construct isets involved in multiplicity */ + LTT composition; + LTT bs; + + /* construct isets */ + uint32_t offset = 0; + uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << best_free_set; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t const num_groups = 1 << num_shared_vars; + uint32_t const next_group = 1 << ( num_vars - best_free_set - num_shared_vars ); + + uint64_t fs_fun[32] = { 0 }; + + uint32_t group_index = 0; + uint32_t set_index = 0; + fs_fun[0] = best_tt._bits[0] & mask; + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t cof = best_tt._bits[i]; + for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) + { + uint64_t val = cof & mask; + /* move to next block */ + if ( set_index == next_group ) + { + group_index += 2; + set_index = 0; + fs_fun[group_index] = val; + } + /* gather encoding */ + if ( val != fs_fun[group_index] ) + { + bs._bits |= UINT64_C( 1 ) << ( j + offset ); + fs_fun[group_index + 1] = val; + } + cof >>= shift; + ++set_index; + } + offset = ( offset + ( 64 >> best_free_set ) ) & 0x3F; + } + + /* create composition function */ + for ( uint32_t i = 0; i < 2 * num_groups; ++i ) + { + composition._bits |= fs_fun[i] << ( i * shift ); + } + + /* minimize support BS */ + LTT care; + bs_support_size = 0; + uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF, UINT64_MAX }; + care._bits = masks[num_vars - best_free_set]; + for ( uint32_t i = 0; i < num_vars - best_free_set; ++i ) + { + if ( !has_var6( bs, care, i ) ) + { + continue; + } + + if ( bs_support_size < i ) + { + kitty::swap_inplace( bs, bs_support_size, i ); + } + + bs_support[bs_support_size] = i; + ++bs_support_size; + } + + /* assign functions */ + dec_funcs[0] = bs._bits; + dec_funcs[1] = composition._bits; + + /* print functions */ + if ( verbose ) + { + LTT f; + f._bits = dec_funcs[0]; + std::cout << "BS function : "; + kitty::print_hex( f ); + std::cout << "\n"; + f._bits = dec_funcs[1]; + std::cout << "Composition function: "; + kitty::print_hex( f ); + std::cout << "\n"; + } + } + + inline void compute_functions( LTT isets0[2], LTT isets1[2], uint64_t fs_fun[4] ) + { + /* u = 2 no support minimization */ + if ( best_multiplicity < 3 ) + { + dec_funcs[0] = isets0[0]._bits; + bs_support_size = num_vars - best_free_set; + for ( uint32_t i = 0; i < num_vars - best_free_set; ++i ) + { + bs_support[i] = i; + } + compute_composition( fs_fun ); + return; + } + + /* u = 4 two possibilities */ + if ( best_multiplicity == 4 ) + { + compute_functions4( isets0, isets1, fs_fun ); + return; + } + + /* u = 3 if both sets have multiplicity 2 there are no don't cares */ + if ( best_multiplicity0 == best_multiplicity1 ) + { + compute_functions4( isets0, isets1, fs_fun ); + return; + } + + /* u = 3 one set has multiplicity 1, use don't cares */ + compute_functions3( isets0, isets1, fs_fun ); + } + + inline void compute_functions4( LTT isets0[2], LTT isets1[2], uint64_t fs_fun[4] ) + { + uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF, UINT64_MAX }; + LTT f = isets0[0] | isets1[1]; + LTT care; + care._bits = masks[num_vars - best_free_set]; + + /* count the number of support variables */ + uint32_t support_vars1 = 0; + for ( uint32_t i = 0; i < num_vars - best_free_set; ++i ) + { + support_vars1 += has_var6( f, care, i ) ? 1 : 0; + bs_support[i] = i; + } + + /* use a different set */ + f = isets0[0] | isets1[0]; + + uint32_t support_vars2 = 0; + for ( uint32_t i = 0; i < num_vars - best_free_set; ++i ) + { + support_vars2 += has_var6( f, care, i ) ? 1 : 0; + } + + bs_support_size = support_vars2; + if ( support_vars2 > support_vars1 ) + { + f = isets0[0] | isets1[1]; + std::swap( fs_fun[3], fs_fun[4] ); + bs_support_size = support_vars1; + } + + /* move variables */ + if ( bs_support_size < num_vars - best_free_set ) + { + support_vars1 = 0; + for ( uint32_t i = 0; i < num_vars - best_free_set; ++i ) + { + if ( !has_var6( f, care, i ) ) + { + continue; + } + + if ( support_vars1 < i ) + { + kitty::swap_inplace( f, support_vars1, i ); + } + + bs_support[support_vars1] = i; + ++support_vars1; + } + } + + dec_funcs[0] = f._bits; + compute_composition( fs_fun ); + } + + inline void compute_functions3( LTT isets0[2], LTT isets1[2], uint64_t fs_fun[4] ) + { + uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF, UINT64_MAX }; + LTT f = isets0[0] | isets1[0]; + LTT care; + + /* init the care set */ + if ( best_multiplicity0 == 1 ) + { + care._bits = masks[num_vars - best_free_set] & ( ~isets0[0]._bits ); + fs_fun[1] = fs_fun[0]; + } + else + { + care._bits = masks[num_vars - best_free_set] & ( ~isets1[0]._bits ); + fs_fun[3] = fs_fun[2]; + } + + /* count the number of support variables */ + uint32_t support_vars = 0; + for ( uint32_t i = 0; i < num_vars - best_free_set; ++i ) + { + if ( !has_var6( f, care, i ) ) + { + adjust_truth_table_on_dc( f, care, i ); + continue; + } + + if ( support_vars < i ) + { + kitty::swap_inplace( f, support_vars, i ); + } + + bs_support[support_vars] = i; + ++support_vars; + } + + bs_support_size = support_vars; + dec_funcs[0] = f._bits; + compute_composition( fs_fun ); + } + + void compute_composition( uint64_t fs_fun[4] ) + { + dec_funcs[1] = fs_fun[0] << ( 1 << best_free_set ); + dec_funcs[1] |= fs_fun[1]; + + if ( best_multiplicity > 2 ) + { + dec_funcs[1] |= fs_fun[2] << ( ( 2 << best_free_set ) + ( 1 << best_free_set ) ); + dec_funcs[1] |= fs_fun[3] << ( 2 << best_free_set ); + } + } + + template + void local_extend_to( TT_type& tt, uint32_t real_num_vars ) + { + if ( real_num_vars < 6 ) + { + auto mask = *tt.begin(); + + for ( auto i = real_num_vars; i < num_vars; ++i ) + { + mask |= ( mask << ( 1 << i ) ); + } + + std::fill( tt.begin(), tt.end(), mask ); + } + else + { + uint32_t num_blocks = ( 1u << ( real_num_vars - 6 ) ); + auto it = tt.begin(); + while ( it != tt.end() ) + { + it = std::copy( tt.cbegin(), tt.cbegin() + num_blocks, it ); + } + } + } + + inline void reposition_late_arriving_variables( unsigned delay_profile, uint32_t late_arriving ) + { + uint32_t k = 0; + for ( uint32_t i = 0; i < late_arriving; ++i ) + { + while ( ( ( delay_profile >> k ) & 1 ) == 0 ) + ++k; + + if ( permutations[i] == k ) + { + ++k; + continue; + } + + std::swap( permutations[i], permutations[k] ); + swap_inplace_local( best_tt, i, k ); + ++k; + } + } + + void swap_inplace_local( STT& tt, uint8_t var_index1, uint8_t var_index2 ) + { + if ( var_index1 == var_index2 ) + { + return; + } + + if ( var_index1 > var_index2 ) + { + std::swap( var_index1, var_index2 ); + } + + assert( num_vars > 6 ); + const uint32_t num_blocks = 1 << ( num_vars - 6 ); + + if ( var_index2 <= 5 ) + { + const auto& pmask = kitty::detail::ppermutation_masks[var_index1][var_index2]; + const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 ); + std::transform( std::begin( tt._bits ), std::begin( tt._bits ) + num_blocks, std::begin( tt._bits ), + [shift, &pmask]( uint64_t word ) { + return ( word & pmask[0] ) | ( ( word & pmask[1] ) << shift ) | ( ( word & pmask[2] ) >> shift ); + } ); + } + else if ( var_index1 <= 5 ) /* in this case, var_index2 > 5 */ + { + const auto step = 1 << ( var_index2 - 6 ); + const auto shift = 1 << var_index1; + auto it = std::begin( tt._bits ); + while ( it != std::begin( tt._bits ) + num_blocks ) + { + for ( auto i = decltype( step ){ 0 }; i < step; ++i ) + { + const auto low_to_high = ( *( it + i ) & kitty::detail::projections[var_index1] ) >> shift; + const auto high_to_low = ( *( it + i + step ) << shift ) & kitty::detail::projections[var_index1]; + *( it + i ) = ( *( it + i ) & ~kitty::detail::projections[var_index1] ) | high_to_low; + *( it + i + step ) = ( *( it + i + step ) & kitty::detail::projections[var_index1] ) | low_to_high; + } + it += 2 * step; + } + } + else + { + const auto step1 = 1 << ( var_index1 - 6 ); + const auto step2 = 1 << ( var_index2 - 6 ); + auto it = std::begin( tt._bits ); + while ( it != std::begin( tt._bits ) + num_blocks ) + { + for ( auto i = 0; i < step2; i += 2 * step1 ) + { + for ( auto j = 0; j < step1; ++j ) + { + std::swap( *( it + i + j + step1 ), *( it + i + j + step2 ) ); + } + } + it += 2 * step2; + } + } + } + + inline bool has_var6( const LTT& tt, const LTT& care, uint8_t var_index ) + { + if ( ( ( ( tt._bits >> ( uint64_t( 1 ) << var_index ) ) ^ tt._bits ) & kitty::detail::projections_neg[var_index] & ( care._bits >> ( uint64_t( 1 ) << var_index ) ) & care._bits ) != 0 ) + { + return true; + } + + return false; + } + + void adjust_truth_table_on_dc( LTT& tt, LTT& care, uint32_t var_index ) + { + uint64_t new_bits = tt._bits & care._bits; + tt._bits = ( ( new_bits | ( new_bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index] ) | + ( ( new_bits | ( new_bits << ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections[var_index] ); + care._bits = care._bits | ( care._bits >> ( uint64_t( 1 ) << var_index ) ); + } + + /* Decomposition format for ABC + * + * The record is an array of unsigned chars where: + * - the first unsigned char entry stores the number of unsigned chars in the record + * - the second entry stores the number of LUTs + * After this, several sub-records follow, each representing one LUT as follows: + * - an unsigned char entry listing the number of fanins + * - a list of fanins, from the LSB to the MSB of the truth table. The N inputs of the original function + * have indexes from 0 to N-1, followed by the internal signals in a topological order + * - the LUT truth table occupying 2^(M-3) bytes, where M is the fanin count of the LUT, from the LSB to the MSB. + * A 2-input LUT, which takes 4 bits, should be stretched to occupy 8 bits (one unsigned char) + * A 0- or 1-input LUT can be represented similarly but it is not expected that such LUTs will be represented + */ + void get_decomposition_abc( unsigned char* decompArray ) + { + unsigned char* pArray = decompArray; + unsigned char bytes = 2; + + /* write number of LUTs */ + pArray++; + *pArray = 2; + pArray++; + + /* write BS LUT */ + /* write fanin size */ + *pArray = bs_support_size; + pArray++; + ++bytes; + + /* write support */ + for ( uint32_t i = 0; i < bs_support_size; ++i ) + { + *pArray = (unsigned char)permutations[bs_support[i] + best_free_set]; + pArray++; + ++bytes; + } + + /* write truth table */ + uint32_t tt_num_bytes = ( bs_support_size <= 3 ) ? 1 : ( 1 << ( bs_support_size - 3 ) ); + for ( uint32_t i = 0; i < tt_num_bytes; ++i ) + { + *pArray = (unsigned char)( ( dec_funcs[0] >> ( 8 * i ) ) & 0xFF ); + pArray++; + ++bytes; + } + + /* write top LUT */ + /* write fanin size */ + uint32_t support_size = best_free_set + 1 + num_shared_vars; + *pArray = support_size; + pArray++; + ++bytes; + + /* write support */ + for ( uint32_t i = best_free_set; i < best_free_set; ++i ) + { + *pArray = (unsigned char)permutations[i]; + pArray++; + ++bytes; + } + + *pArray = (unsigned char)num_vars; + pArray++; + ++bytes; + + for ( uint32_t i = 0; i < num_shared_vars; ++i ) + { + *pArray = (unsigned char)permutations[num_vars - num_shared_vars + i]; + pArray++; + ++bytes; + } + + /* write truth table */ + tt_num_bytes = ( support_size <= 3 ) ? 1 : ( 1 << ( support_size - 3 ) ); + for ( uint32_t i = 0; i < tt_num_bytes; ++i ) + { + *pArray = (unsigned char)( ( dec_funcs[1] >> ( 8 * i ) ) & 0xFF ); + pArray++; + ++bytes; + } + + /* write numBytes */ + *decompArray = bytes; + } + + bool verify_impl() + { + /* create PIs */ + STT pis[max_num_vars]; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + kitty::create_nth_var( pis[i], permutations[i] ); + } + + /* BS function patterns */ + STT bsi[6]; + for ( uint32_t i = 0; i < bs_support_size; ++i ) + { + bsi[i] = pis[best_free_set + bs_support[i]]; + } + + /* compute first function */ + STT bsf_sim; + for ( uint32_t i = 0u; i < ( 1 << num_vars ); ++i ) + { + uint32_t pattern = 0u; + for ( auto j = 0; j < bs_support_size; ++j ) + { + pattern |= get_bit( bsi[j], i ) << j; + } + if ( ( dec_funcs[0] >> pattern ) & 1 ) + { + set_bit( bsf_sim, i ); + } + } + + /* compute first function */ + STT top_sim; + for ( uint32_t i = 0u; i < ( 1 << num_vars ); ++i ) + { + uint32_t pattern = 0u; + for ( auto j = 0; j < best_free_set; ++j ) + { + pattern |= get_bit( pis[j], i ) << j; + } + pattern |= get_bit( bsf_sim, i ) << best_free_set; + + /* shared variables */ + for ( auto j = 0u; j < num_shared_vars; ++j ) + { + pattern |= get_bit( pis[num_vars - num_shared_vars + j], i ) << ( best_free_set + 1 + j ); + } + + if ( ( dec_funcs[1] >> pattern ) & 1 ) + { + set_bit( top_sim, i ); + } + } + + for ( uint32_t i = 0; i < ( 1 << ( num_vars - 6 ) ); ++i ) + { + if ( top_sim._bits[i] != start_tt._bits[i] ) + { + /* convert to dynamic_truth_table */ + // report_tt( bsf_sim ); + std::cout << "Found incorrect decomposition\n"; + report_tt( top_sim ); + std::cout << " instead_of\n"; + report_tt( start_tt ); + return false; + } + } + + return true; + } + + uint32_t get_bit( const STT& tt, uint64_t index ) + { + return ( tt._bits[index >> 6] >> ( index & 0x3f ) ) & 0x1; + } + + void set_bit( STT& tt, uint64_t index ) + { + tt._bits[index >> 6] |= uint64_t( 1 ) << ( index & 0x3f ); + } + + void report_tt( STT const& stt ) + { + kitty::dynamic_truth_table tt( num_vars ); + + std::copy( std::begin( stt._bits ), std::begin( stt._bits ) + ( 1 << ( num_vars - 6 ) ), std::begin( tt ) ); + kitty::print_hex( tt ); + std::cout << "\n"; + } + +private: + uint32_t best_multiplicity{ UINT32_MAX }; + uint32_t best_free_set{ UINT32_MAX }; + uint32_t best_multiplicity0{ UINT32_MAX }; + uint32_t best_multiplicity1{ UINT32_MAX }; + uint32_t bs_support_size{ UINT32_MAX }; + uint32_t num_shared_vars{ 0 }; + STT best_tt; + STT start_tt; + uint64_t dec_funcs[2]; + uint32_t bs_support[6]; + + uint32_t const num_vars; + bool const multiple_ss; + bool const verify; + std::array permutations; +}; + +} // namespace acd + +ABC_NAMESPACE_CXX_HEADER_END + +#endif // _ACD66_H_ \ No newline at end of file diff --git a/src/map/if/acd/acd666.hpp b/src/map/if/acd/acd666.hpp new file mode 100644 index 000000000..72ae68ec4 --- /dev/null +++ b/src/map/if/acd/acd666.hpp @@ -0,0 +1,1257 @@ +/**C++File************************************************************** + + FileName [acd666.hpp] + + SystemName [ABC: Logic synthesis and verification system.] + + PackageName [Ashenhurst-Curtis decomposition.] + + Synopsis [Interface with the FPGA mapping package.] + + Author [Alessandro Tempia Calvino] + + Affiliation [EPFL] + + Date [Ver. 1.0. Started - Feb 8, 2024.] + +***********************************************************************/ +/*! + \file acd666.hpp + \brief Ashenhurst-Curtis decomposition for "666" cascade + + \author Alessandro Tempia Calvino +*/ + +#ifndef _ACD666_H_ +#define _ACD666_H_ +#pragma once + +#include +#include +#include +#include +#include + +#include "kitty_constants.hpp" +#include "kitty_constructors.hpp" +#include "kitty_dynamic_tt.hpp" +#include "kitty_operations.hpp" +#include "kitty_operators.hpp" +#include "kitty_static_tt.hpp" + +ABC_NAMESPACE_CXX_HEADER_START + +namespace acd +{ + +class acd666_impl +{ +private: + static constexpr uint32_t max_num_vars = 16; + using STT = kitty::static_truth_table; + using LTT = kitty::static_truth_table<6>; + +public: + explicit acd666_impl( uint32_t const num_vars, bool const verify = false ) + : num_vars( num_vars ), verify( verify ) + { + std::iota( permutations.begin(), permutations.end(), 0 ); + } + + /*! \brief Runs ACD 666 */ + bool run( word* ptt ) + { + assert( num_vars > 6 ); + + /* truth table is too large for the settings */ + if ( num_vars > max_num_vars || num_vars > 16 ) + { + return false; + } + + /* convert to static TT */ + init_truth_table( ptt ); + + /* run ACD trying different bound sets and free sets */ + return find_decomposition(); + } + + int compute_decomposition() + { + if ( best_multiplicity == UINT32_MAX ) + return -1; + + uint32_t n = num_luts == 3 ? rm_support_size : num_vars; + compute_decomposition_impl( n ); + + if ( verify && !verify_impl() ) + { + return 1; + } + + return 0; + } + + uint32_t get_num_edges() + { + if ( support_sizes[0] == UINT32_MAX ) + { + return UINT32_MAX; + } + + uint32_t num_edges = support_sizes[0] + support_sizes[1] + 1 + ( shared_vars[0] < UINT32_MAX ? 1 : 0 ); + + if ( num_luts == 2 ) + return num_edges; + + /* real value after support minimization */ + return num_edges + support_sizes[2] + 1 + ( shared_vars[1] < UINT32_MAX ? 1 : 0 ); + } + + /* contains a 1 for BS variables */ + // unsigned get_profile() + // { + // unsigned profile = 0; + + // if ( support_sizes[0] == UINT32_MAX ) + // return -1; + + // for ( uint32_t i = 0; i < bs_support_size; ++i ) + // { + // profile |= 1 << permutations[best_free_set + bs_support[i]]; + // } + + // return profile; + // } + + // void get_decomposition( unsigned char* decompArray ) + // { + // if ( support_sizes[0] == UINT32_MAX ) + // return; + + // get_decomposition_abc( decompArray ); + // } + +private: + bool find_decomposition() + { + best_multiplicity = UINT32_MAX; + best_free_set = UINT32_MAX; + + /* find ACD "66" for different number of variables in the free set */ + for ( uint32_t i = num_vars - 6; i <= 5; ++i ) + { + if ( find_decomposition_bs( start_tt, num_vars, i ) ) + { + num_luts = 2; + return true; + } + } + + /* find ACD "666" for different number of variables in the free set */ + bool dec_found = false; + uint32_t min_vars_free_set = num_vars <= 11 ? 1 : num_vars - 11; + uint32_t max_vars_free_set = num_vars <= 11 ? num_vars - 7 : 5; + for ( uint32_t i = max_vars_free_set; i >= min_vars_free_set; --i ) + // for ( uint32_t i = min_vars_free_set; i <= max_vars_free_set; ++i ) + { + dec_found = find_decomposition_bs( start_tt, num_vars, i ); + if ( dec_found ) + break; + } + + if ( !dec_found ) + { + best_multiplicity = UINT32_MAX; + return false; + } + + /* compute functions for the top and reminder LUT */ + compute_decomposition_impl_top( num_vars ); + + /* find ACD "66" for the remainder function */ + for ( uint32_t i = rm_support_size - 6; i <= 5; ++i ) + { + if ( find_decomposition_bs( remainder, rm_support_size, i ) ) + { + num_luts = 3; + fix_permutations_remainder( rm_support_size ); + return true; + } + } + + best_multiplicity = UINT32_MAX; + return false; + } + + void init_truth_table( word* ptt ) + { + uint32_t const num_blocks = ( num_vars <= 6 ) ? 1 : ( 1 << ( num_vars - 6 ) ); + + for ( uint32_t i = 0; i < num_blocks; ++i ) + { + start_tt._bits[i] = ptt[i]; + } + + local_extend_to( start_tt, num_vars ); + } + + uint32_t column_multiplicity( STT const& tt, uint32_t n, uint32_t free_set_size ) + { + assert( free_set_size <= 5 ); + + uint32_t const num_blocks = ( n > 6 ) ? ( 1u << ( n - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << free_set_size; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t const limit = free_set_size < 5 ? 4 : 2; + uint32_t cofactors[4]; + uint32_t size = 0; + + /* extract iset functions */ + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t sub = tt._bits[i]; + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + { + uint32_t fs_fn = static_cast( sub & mask ); + uint32_t k; + for ( k = 0; k < size; ++k ) + { + if ( fs_fn == cofactors[k] ) + break; + } + if ( k == limit ) + return 5; + if ( k == size ) + cofactors[size++] = fs_fn; + sub >>= shift; + } + } + + return size; + } + + inline bool combinations_next( uint32_t n, uint32_t k, uint32_t* pComb, uint32_t* pInvPerm, STT& tt ) + { + uint32_t i; + + for ( i = k - 1; pComb[i] == n - k + i; --i ) + { + if ( i == 0 ) + return false; + } + + /* move vars */ + uint32_t var_old = pComb[i]; + uint32_t pos_new = pInvPerm[var_old + 1]; + std::swap( pInvPerm[var_old + 1], pInvPerm[var_old] ); + std::swap( pComb[i], pComb[pos_new] ); + swap_inplace_local( tt, n, i, pos_new ); + + for ( uint32_t j = i + 1; j < k; j++ ) + { + var_old = pComb[j]; + pos_new = pInvPerm[pComb[j - 1] + 1]; + std::swap( pInvPerm[pComb[j - 1] + 1], pInvPerm[var_old] ); + std::swap( pComb[j], pComb[pos_new] ); + swap_inplace_local( tt, n, j, pos_new ); + } + + return true; + } + + bool find_decomposition_bs( STT tt, uint32_t n, uint32_t free_set_size ) + { + /* works up to 16 input truth tables */ + assert( n <= 16 ); + + /* init combinations */ + uint32_t pComb[16], pInvPerm[16]; + for ( uint32_t i = 0; i < n; ++i ) + { + pComb[i] = pInvPerm[i] = i; + } + + /* enumerate combinations */ + best_free_set = free_set_size; + do + { + uint32_t cost = column_multiplicity( tt, n, free_set_size ); + if ( cost == 2 ) + { + best_tt = tt; + best_multiplicity = cost; + for ( uint32_t i = 0; i < n; ++i ) + { + permutations[i] = pComb[i]; + } + return true; + } + else if ( cost <= 4 && free_set_size < 5 ) + { + /* look for a shared variable */ + best_multiplicity = cost; + int res = check_shared_set( tt, n ); + + if ( res > 0 ) + { + best_tt = tt; + for ( uint32_t i = 0; i < n; ++i ) + { + permutations[i] = pComb[i]; + } + /* move shared variable as the most significative one */ + swap_inplace_local( best_tt, n, res, n - 1 ); + std::swap( permutations[res], permutations[n - 1] ); + return true; + } + } + } while ( combinations_next( n, free_set_size, pComb, pInvPerm, tt ) ); + + return false; + } + + inline bool check_shared_var( STT const& tt, uint32_t n, uint32_t free_set_size, uint32_t shared_var ) + { + assert( free_set_size <= 5 ); + + uint32_t const num_blocks = ( n > 6 ) ? ( 1u << ( n - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << free_set_size; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t cofactors[2][4]; + uint32_t size[2] = { 0, 0 }; + uint32_t shared_var_shift = shared_var - free_set_size; + + /* extract iset functions */ + uint32_t iteration_counter = 0; + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t sub = tt._bits[i]; + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + { + uint32_t fs_fn = static_cast( sub & mask ); + uint32_t p = ( iteration_counter >> shared_var_shift ) & 1; + uint32_t k; + for ( k = 0; k < size[p]; ++k ) + { + if ( fs_fn == cofactors[p][k] ) + break; + } + if ( k == 2 ) + return false; + if ( k == size[p] ) + cofactors[p][size[p]++] = fs_fn; + sub >>= shift; + ++iteration_counter; + } + } + + return true; + } + + inline int check_shared_set( STT const& tt, uint32_t n ) + { + /* find one shared set variable */ + for ( uint32_t i = best_free_set; i < n; ++i ) + { + /* check the multiplicity of cofactors */ + if ( check_shared_var( tt, n, best_free_set, i ) ) + { + return i; + } + } + + return -1; + } + + void compute_decomposition_impl_top( uint32_t n, bool verbose = false ) + { + bool has_shared_set = best_multiplicity > 2; + + /* construct isets involved in multiplicity */ + STT isets0[2]; + STT isets1[2]; + + /* construct isets */ + uint32_t offset = 0; + uint32_t num_blocks = ( n > 6 ) ? ( 1u << ( n - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << best_free_set; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + + /* limit analysis on 0 cofactor of the shared variable */ + if ( has_shared_set ) + num_blocks >>= 1; + + uint64_t fs_fun[4] = { best_tt._bits[0] & mask, 0, 0, 0 }; + + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t cof = best_tt._bits[i]; + for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) + { + uint64_t val = cof & mask; + if ( val == fs_fun[0] ) + { + isets0[0]._bits[i / shift] |= UINT64_C( 1 ) << ( j + offset ); + } + else + { + isets0[1]._bits[i / shift] |= UINT64_C( 1 ) << ( j + offset ); + fs_fun[1] = val; + } + cof >>= shift; + } + offset = ( offset + ( 64 >> best_free_set ) ) & 0x3F; + } + + /* continue on the 1 cofactor if shared set */ + if ( has_shared_set ) + { + fs_fun[2] = best_tt._bits[num_blocks] & mask; + for ( auto i = num_blocks; i < ( num_blocks << 1 ); ++i ) + { + uint64_t cof = best_tt._bits[i]; + for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) + { + uint64_t val = cof & mask; + if ( val == fs_fun[2] ) + { + isets1[0]._bits[i / shift] |= UINT64_C( 1 ) << ( j + offset ); + } + else + { + isets1[1]._bits[i / shift] |= UINT64_C( 1 ) << ( j + offset ); + fs_fun[3] = val; + } + cof >>= shift; + } + offset = ( offset + ( 64 >> best_free_set ) ) & 0x3F; + } + } + + /* find the support minimizing combination with shared set */ + compute_functions_top( isets0, isets1, fs_fun, n ); + + /* print functions */ + if ( verbose ) + { + std::cout << "RM function : "; + kitty::print_hex( remainder ); + std::cout << "\n"; + LTT f; + f._bits = dec_funcs[2]; + std::cout << "Composition function: "; + kitty::print_hex( f ); + std::cout << "\n"; + } + } + + void compute_decomposition_impl( uint32_t n, bool verbose = false ) + { + bool has_shared_set = best_multiplicity > 2; + + /* construct isets involved in multiplicity */ + LTT isets0[2]; + LTT isets1[2]; + + /* construct isets */ + uint32_t offset = 0; + uint32_t num_blocks = ( n > 6 ) ? ( 1u << ( n - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << best_free_set; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + + /* limit analysis on 0 cofactor of the shared variable */ + if ( has_shared_set ) + num_blocks >>= 1; + + uint64_t fs_fun[4] = { best_tt._bits[0] & mask, 0, 0, 0 }; + + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t cof = best_tt._bits[i]; + for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) + { + uint64_t val = cof & mask; + + if ( val == fs_fun[0] ) + { + isets0[0]._bits |= UINT64_C( 1 ) << ( j + offset ); + } + else + { + isets0[1]._bits |= UINT64_C( 1 ) << ( j + offset ); + fs_fun[1] = val; + } + + cof >>= shift; + } + + offset = ( offset + ( 64 >> best_free_set ) ) % 64; + } + + /* continue on the 1 cofactor if shared set */ + if ( has_shared_set ) + { + fs_fun[2] = best_tt._bits[num_blocks] & mask; + for ( auto i = num_blocks; i < ( num_blocks << 1 ); ++i ) + { + uint64_t cof = best_tt._bits[i]; + for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) + { + uint64_t val = cof & mask; + + if ( val == fs_fun[2] ) + { + isets1[0]._bits |= UINT64_C( 1 ) << ( j + offset ); + } + else + { + isets1[1]._bits |= UINT64_C( 1 ) << ( j + offset ); + fs_fun[3] = val; + } + + cof >>= shift; + } + + offset = ( offset + ( 64 >> best_free_set ) ) % 64; + } + } + + /* find the support minimizing combination with shared set */ + compute_functions( isets0, isets1, fs_fun, n ); + + /* print functions */ + if ( verbose ) + { + LTT f; + f._bits = dec_funcs[0]; + std::cout << "BS function : "; + kitty::print_hex( f ); + std::cout << "\n"; + f._bits = dec_funcs[1]; + std::cout << "Composition function: "; + kitty::print_hex( f ); + std::cout << "\n"; + } + } + + inline void compute_functions_top( STT isets0[2], STT isets1[2], uint64_t fs_fun[4], uint32_t n ) + { + /* u = 2 no support minimization */ + if ( best_multiplicity < 3 ) + { + shared_vars[1] = UINT32_MAX; + remainder = isets0[0]; + rm_support_size = n - best_free_set; + for ( uint32_t i = 0; i < n - best_free_set; ++i ) + { + rm_support[i] = permutations[i + best_free_set]; + } + compute_composition( fs_fun, 2 ); + return; + } + + shared_vars[1] = permutations[n - 1]; + + /* u = 4 two possibilities */ + if ( best_multiplicity == 4 ) + { + compute_functions4_top( isets0, isets1, fs_fun, n ); + return; + } + + /* u = 3 if both sets have multiplicity 2 there are no don't cares */ + if ( best_multiplicity0 == best_multiplicity1 ) + { + compute_functions4_top( isets0, isets1, fs_fun, n ); + return; + } + + /* u = 3 one set has multiplicity 1, use don't cares */ + compute_functions3_top( isets0, isets1, fs_fun, n ); + } + + inline void compute_functions( LTT isets0[2], LTT isets1[2], uint64_t fs_fun[4], uint32_t n ) + { + /* u = 2 no support minimization */ + if ( best_multiplicity < 3 ) + { + shared_vars[0] = UINT32_MAX; + dec_funcs[0] = isets0[0]._bits; + support_sizes[0] = n - best_free_set; + for ( uint32_t i = 0; i < n - best_free_set; ++i ) + { + supports[0][i] = permutations[i + best_free_set]; + } + compute_composition( fs_fun, 1 ); + return; + } + + shared_vars[0] = permutations[n - 1]; + + /* u = 4 two possibilities */ + if ( best_multiplicity == 4 ) + { + compute_functions4( isets0, isets1, fs_fun, n ); + return; + } + + /* u = 3 if both sets have multiplicity 2 there are no don't cares */ + if ( best_multiplicity0 == best_multiplicity1 ) + { + compute_functions4( isets0, isets1, fs_fun, n ); + return; + } + + /* u = 3 one set has multiplicity 1, use don't cares */ + compute_functions3( isets0, isets1, fs_fun, n ); + } + + inline void compute_functions4_top( STT isets0[2], STT isets1[2], uint64_t fs_fun[4], uint32_t n ) + { + STT f; + uint32_t const num_iset_vars = n - best_free_set; + uint32_t const num_blocks = 1u << ( num_iset_vars - 6 ); + + assert( num_iset_vars > 6 ); + for ( uint32_t i = 0; i < num_blocks; ++i ) + { + f._bits[i] = isets0[0]._bits[i] | isets1[1]._bits[i]; + } + + /* count the number of support variables */ + uint32_t support_vars1 = 0; + for ( uint32_t i = 0; i < num_iset_vars; ++i ) + { + support_vars1 += has_var( f, num_iset_vars, i ) ? 1 : 0; + rm_support[i] = permutations[i + best_free_set]; + } + + /* use a different set */ + for ( uint32_t i = 0; i < num_blocks; ++i ) + { + f._bits[i] = isets0[0]._bits[i] | isets1[0]._bits[i]; + } + + uint32_t support_vars2 = 0; + for ( uint32_t i = 0; i < n - best_free_set; ++i ) + { + support_vars2 += has_var( f, num_iset_vars, i ) ? 1 : 0; + } + + rm_support_size = support_vars2; + if ( support_vars2 > support_vars1 ) + { + for ( uint32_t i = 0; i < num_blocks; ++i ) + { + f._bits[i] = isets0[0]._bits[i] | isets1[1]._bits[i]; + } + std::swap( fs_fun[3], fs_fun[4] ); + rm_support_size = support_vars1; + } + + /* move variables */ + if ( rm_support_size < num_iset_vars ) + { + support_vars1 = 0; + for ( uint32_t i = 0; i < num_iset_vars; ++i ) + { + if ( !has_var( f, num_iset_vars, i ) ) + { + continue; + } + + if ( support_vars1 < i ) + { + swap_inplace_local( f, num_iset_vars, support_vars1, i ); + } + + rm_support[support_vars1] = permutations[i + best_free_set]; + ++support_vars1; + } + } + + remainder = f; + compute_composition( fs_fun, 2 ); + } + + inline void compute_functions4( LTT isets0[2], LTT isets1[2], uint64_t fs_fun[4], uint32_t n ) + { + uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF, UINT64_MAX }; + LTT f = isets0[0] | isets1[1]; + LTT care; + + assert( n - best_free_set <= 6 ); + care._bits = masks[n - best_free_set]; + + /* count the number of support variables */ + uint32_t support_vars1 = 0; + for ( uint32_t i = 0; i < n - best_free_set; ++i ) + { + support_vars1 += has_var6( f, care, i ) ? 1 : 0; + supports[0][i] = permutations[i + best_free_set]; + } + + /* use a different set */ + f = isets0[0] | isets1[0]; + + uint32_t support_vars2 = 0; + for ( uint32_t i = 0; i < n - best_free_set; ++i ) + { + support_vars2 += has_var6( f, care, i ) ? 1 : 0; + } + + support_sizes[0] = support_vars2; + if ( support_vars2 > support_vars1 ) + { + f = isets0[0] | isets1[1]; + std::swap( fs_fun[3], fs_fun[4] ); + support_sizes[0] = support_vars1; + } + + /* move variables */ + if ( support_sizes[0] < n - best_free_set ) + { + support_vars1 = 0; + for ( uint32_t i = 0; i < n - best_free_set; ++i ) + { + if ( !has_var6( f, care, i ) ) + { + continue; + } + + if ( support_vars1 < i ) + { + kitty::swap_inplace( f, support_vars1, i ); + } + + supports[0][support_vars1] = permutations[i + best_free_set]; + ++support_vars1; + } + } + + dec_funcs[0] = f._bits; + compute_composition( fs_fun, 1 ); + } + + inline void compute_functions3_top( STT isets0[2], STT isets1[2], uint64_t fs_fun[4], uint32_t n ) + { + STT f, care; + uint32_t const num_iset_vars = n - best_free_set; + uint32_t const num_blocks = 1u << ( num_iset_vars - 6 ); + + assert( num_iset_vars > 6 ); + for ( uint32_t i = 0; i < num_blocks; ++i ) + { + f._bits[i] = isets0[0]._bits[i] | isets1[0]._bits[i]; + } + + assert( n - best_free_set <= 6 ); + + /* init the care set */ + if ( best_multiplicity0 == 1 ) + { + for ( uint32_t i = 0; i < num_blocks; ++i ) + { + care._bits[i] = ~( isets0[0]._bits[i] ); + } + fs_fun[1] = fs_fun[0]; + } + else + { + for ( uint32_t i = 0; i < num_blocks; ++i ) + { + care._bits[i] = ~( isets1[0]._bits[i] ); + } + fs_fun[3] = fs_fun[2]; + } + + /* count the number of support variables */ + uint32_t support_vars = 0; + for ( uint32_t i = 0; i < num_iset_vars; ++i ) + { + if ( !has_var_support( f, care, num_iset_vars, i ) ) + { + adjust_truth_table_on_dc( f, care, n, i ); + continue; + } + + if ( support_vars < i ) + { + kitty::swap_inplace( f, support_vars, i ); + } + + rm_support[support_vars] = permutations[i + best_free_set]; + ++support_vars; + } + + rm_support_size = support_vars; + remainder = f; + compute_composition( fs_fun, 2 ); + } + + inline void compute_functions3( LTT isets0[2], LTT isets1[2], uint64_t fs_fun[4], uint32_t n ) + { + uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF, UINT64_MAX }; + LTT f = isets0[0] | isets1[0]; + LTT care; + + assert( n - best_free_set <= 6 ); + + /* init the care set */ + if ( best_multiplicity0 == 1 ) + { + care._bits = masks[n - best_free_set] & ( ~isets0[0]._bits ); + fs_fun[1] = fs_fun[0]; + } + else + { + care._bits = masks[n - best_free_set] & ( ~isets1[0]._bits ); + fs_fun[3] = fs_fun[2]; + } + + /* count the number of support variables */ + uint32_t support_vars = 0; + for ( uint32_t i = 0; i < n - best_free_set; ++i ) + { + if ( !has_var6( f, care, i ) ) + { + adjust_truth_table_on_dc6( f, care, i ); + continue; + } + + if ( support_vars < i ) + { + kitty::swap_inplace( f, support_vars, i ); + } + + supports[0][support_vars] = i; + ++support_vars; + } + + support_sizes[0] = support_vars; + dec_funcs[0] = f._bits; + compute_composition( fs_fun, 1 ); + } + + void compute_composition( uint64_t fs_fun[4], uint32_t index ) + { + dec_funcs[index] = fs_fun[0] << ( 1 << best_free_set ); + dec_funcs[index] |= fs_fun[1]; + + if ( best_multiplicity > 2 ) + { + dec_funcs[index] |= fs_fun[2] << ( ( 2 << best_free_set ) + ( 1 << best_free_set ) ); + dec_funcs[index] |= fs_fun[3] << ( 2 << best_free_set ); + } + + for ( uint32_t i = 0; i < best_free_set; ++i ) + { + supports[index][i] = permutations[i]; + } + support_sizes[index] = best_free_set; + } + + void fix_permutations_remainder( uint32_t n ) + { + for ( uint32_t i = 0; i < n; ++i ) + { + permutations[i] = rm_support[permutations[i]]; + } + } + + template + void local_extend_to( TT_type& tt, uint32_t real_num_vars ) + { + if ( real_num_vars < 6 ) + { + auto mask = *tt.begin(); + + for ( auto i = real_num_vars; i < num_vars; ++i ) + { + mask |= ( mask << ( 1 << i ) ); + } + + std::fill( tt.begin(), tt.end(), mask ); + } + else + { + uint32_t num_blocks = ( 1u << ( real_num_vars - 6 ) ); + auto it = tt.begin(); + while ( it != tt.end() ) + { + it = std::copy( tt.cbegin(), tt.cbegin() + num_blocks, it ); + } + } + } + + void swap_inplace_local( STT& tt, uint32_t n, uint8_t var_index1, uint8_t var_index2 ) + { + if ( var_index1 == var_index2 ) + { + return; + } + + if ( var_index1 > var_index2 ) + { + std::swap( var_index1, var_index2 ); + } + + assert( n > 6 ); + const uint32_t num_blocks = 1 << ( n - 6 ); + + if ( var_index2 <= 5 ) + { + const auto& pmask = kitty::detail::ppermutation_masks[var_index1][var_index2]; + const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 ); + std::transform( std::begin( tt._bits ), std::begin( tt._bits ) + num_blocks, std::begin( tt._bits ), + [shift, &pmask]( uint64_t word ) { + return ( word & pmask[0] ) | ( ( word & pmask[1] ) << shift ) | ( ( word & pmask[2] ) >> shift ); + } ); + } + else if ( var_index1 <= 5 ) /* in this case, var_index2 > 5 */ + { + const auto step = 1 << ( var_index2 - 6 ); + const auto shift = 1 << var_index1; + auto it = std::begin( tt._bits ); + while ( it != std::begin( tt._bits ) + num_blocks ) + { + for ( auto i = decltype( step ){ 0 }; i < step; ++i ) + { + const auto low_to_high = ( *( it + i ) & kitty::detail::projections[var_index1] ) >> shift; + const auto high_to_low = ( *( it + i + step ) << shift ) & kitty::detail::projections[var_index1]; + *( it + i ) = ( *( it + i ) & ~kitty::detail::projections[var_index1] ) | high_to_low; + *( it + i + step ) = ( *( it + i + step ) & kitty::detail::projections[var_index1] ) | low_to_high; + } + it += 2 * step; + } + } + else + { + const auto step1 = 1 << ( var_index1 - 6 ); + const auto step2 = 1 << ( var_index2 - 6 ); + auto it = std::begin( tt._bits ); + while ( it != std::begin( tt._bits ) + num_blocks ) + { + for ( auto i = 0; i < step2; i += 2 * step1 ) + { + for ( auto j = 0; j < step1; ++j ) + { + std::swap( *( it + i + j + step1 ), *( it + i + j + step2 ) ); + } + } + it += 2 * step2; + } + } + } + + inline bool has_var6( const LTT& tt, const LTT& care, uint8_t var_index ) + { + if ( ( ( ( tt._bits >> ( uint64_t( 1 ) << var_index ) ) ^ tt._bits ) & kitty::detail::projections_neg[var_index] & ( care._bits >> ( uint64_t( 1 ) << var_index ) ) & care._bits ) != 0 ) + { + return true; + } + + return false; + } + + inline bool has_var( const STT& tt, uint32_t n, uint8_t var_index ) + { + uint32_t const num_blocks = 1u << ( n - 6 ); + + if ( var_index < 6 ) + { + return std::any_of( std::begin( tt._bits ), std::begin( tt._bits ) + num_blocks, + [var_index]( uint64_t word ) { return ( ( word >> ( uint64_t( 1 ) << var_index ) ) & kitty::detail::projections_neg[var_index] ) != + ( word & kitty::detail::projections_neg[var_index] ); } ); + } + + const auto step = 1 << ( var_index - 6 ); + for ( auto i = 0u; i < num_blocks; i += 2 * step ) + { + for ( auto j = 0; j < step; ++j ) + { + if ( tt._bits[i + j] != tt._bits[i + j + step] ) + { + return true; + } + } + } + return false; + } + + bool has_var_support( const STT& tt, const STT& care, uint32_t real_num_vars, uint8_t var_index ) + { + assert( var_index < real_num_vars ); + assert( real_num_vars <= tt.num_vars() ); + assert( tt.num_vars() == care.num_vars() ); + + const uint32_t num_blocks = real_num_vars <= 6 ? 1 : ( 1 << ( real_num_vars - 6 ) ); + if ( real_num_vars <= 6 || var_index < 6 ) + { + auto it_tt = std::begin( tt._bits ); + auto it_care = std::begin( care._bits ); + while ( it_tt != std::begin( tt._bits ) + num_blocks ) + { + if ( ( ( ( *it_tt >> ( uint64_t( 1 ) << var_index ) ) ^ *it_tt ) & kitty::detail::projections_neg[var_index] & ( *it_care >> ( uint64_t( 1 ) << var_index ) ) & *it_care ) != 0 ) + { + return true; + } + ++it_tt; + ++it_care; + } + + return false; + } + + const auto step = 1 << ( var_index - 6 ); + for ( auto i = 0u; i < num_blocks; i += 2 * step ) + { + for ( auto j = 0; j < step; ++j ) + { + if ( ( ( tt._bits[i + j] ^ tt._bits[i + j + step] ) & care._bits[i + j] & care._bits[i + j + step] ) != 0 ) + { + return true; + } + } + } + + return false; + } + + void adjust_truth_table_on_dc6( LTT& tt, LTT& care, uint32_t var_index ) + { + uint64_t new_bits = tt._bits & care._bits; + tt._bits = ( ( new_bits | ( new_bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index] ) | + ( ( new_bits | ( new_bits << ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections[var_index] ); + care._bits = care._bits | ( care._bits >> ( uint64_t( 1 ) << var_index ) ); + } + + void adjust_truth_table_on_dc( STT& tt, STT& care, uint32_t n, uint32_t var_index ) + { + assert( var_index < n ); + const uint32_t num_blocks = n <= 6 ? 1 : ( 1 << ( n - 6 ) ); + + if ( n <= 6 || var_index < 6 ) + { + auto it_tt = std::begin( tt._bits ); + auto it_care = std::begin( care._bits ); + while ( it_tt != std::begin( tt._bits ) + num_blocks ) + { + uint64_t new_bits = *it_tt & *it_care; + *it_tt = ( ( new_bits | ( new_bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index] ) | + ( ( new_bits | ( new_bits << ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections[var_index] ); + *it_care = *it_care | ( *it_care >> ( uint64_t( 1 ) << var_index ) ); + + ++it_tt; + ++it_care; + } + return; + } + + const auto step = 1 << ( var_index - 6 ); + for ( auto i = 0u; i < num_blocks; i += 2 * step ) + { + for ( auto j = 0; j < step; ++j ) + { + tt._bits[i + j] = ( tt._bits[i + j] & care._bits[i + j] ) | ( tt._bits[i + j + step] & care._bits[i + j + step] ); + tt._bits[i + j + step] = tt._bits[i + j]; + care._bits[i + j] = care._bits[i + j] | care._bits[i + j + step]; + care._bits[i + j + step] = care._bits[i + j]; + } + } + } + + /* Decomposition format for ABC + * + * The record is an array of unsigned chars where: + * - the first unsigned char entry stores the number of unsigned chars in the record + * - the second entry stores the number of LUTs + * After this, several sub-records follow, each representing one LUT as follows: + * - an unsigned char entry listing the number of fanins + * - a list of fanins, from the LSB to the MSB of the truth table. The N inputs of the original function + * have indexes from 0 to N-1, followed by the internal signals in a topological order + * - the LUT truth table occupying 2^(M-3) bytes, where M is the fanin count of the LUT, from the LSB to the MSB. + * A 2-input LUT, which takes 4 bits, should be stretched to occupy 8 bits (one unsigned char) + * A 0- or 1-input LUT can be represented similarly but it is not expected that such LUTs will be represented + */ + // void get_decomposition_abc( unsigned char* decompArray ) + // { + // unsigned char* pArray = decompArray; + // unsigned char bytes = 2; + + // /* write number of LUTs */ + // pArray++; + // *pArray = 2; + // pArray++; + + // /* write BS LUT */ + // /* write fanin size */ + // *pArray = bs_support_size; + // pArray++; + // ++bytes; + + // /* write support */ + // for ( uint32_t i = 0; i < bs_support_size; ++i ) + // { + // *pArray = (unsigned char)permutations[bs_support[i] + best_free_set]; + // pArray++; + // ++bytes; + // } + + // /* write truth table */ + // uint32_t tt_num_bytes = ( bs_support_size <= 3 ) ? 1 : ( 1 << ( bs_support_size - 3 ) ); + // for ( uint32_t i = 0; i < tt_num_bytes; ++i ) + // { + // *pArray = (unsigned char)( ( dec_funcs[0] >> ( 8 * i ) ) & 0xFF ); + // pArray++; + // ++bytes; + // } + + // /* write top LUT */ + // /* write fanin size */ + // uint32_t support_size = best_free_set + 1 + ( best_multiplicity > 2 ? 1 : 0 ); + // *pArray = support_size; + // pArray++; + // ++bytes; + + // /* write support */ + // for ( uint32_t i = 0; i < best_free_set; ++i ) + // { + // *pArray = (unsigned char)permutations[i]; + // pArray++; + // ++bytes; + // } + + // *pArray = (unsigned char)num_vars; + // pArray++; + // ++bytes; + + // if ( best_multiplicity > 2 ) + // { + // *pArray = (unsigned char)permutations[num_vars - 1]; + // pArray++; + // ++bytes; + // } + + // /* write truth table */ + // tt_num_bytes = ( support_size <= 3 ) ? 1 : ( 1 << ( support_size - 3 ) ); + // for ( uint32_t i = 0; i < tt_num_bytes; ++i ) + // { + // *pArray = (unsigned char)( ( dec_funcs[1] >> ( 8 * i ) ) & 0xFF ); + // pArray++; + // ++bytes; + // } + + // /* write numBytes */ + // *decompArray = bytes; + // } + + bool verify_impl() + { + /* create PIs */ + STT pis[max_num_vars]; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + kitty::create_nth_var( pis[i], i ); + } + + STT bsi[6]; + STT bsf_sim; + for ( uint32_t lut_i = 0; lut_i < num_luts; ++lut_i ) + { + for ( uint32_t i = 0; i < support_sizes[lut_i]; ++i ) + { + bsi[i] = pis[supports[lut_i][i]]; + } + + STT top_sim; + for ( uint32_t i = 0u; i < ( 1 << num_vars ); ++i ) + { + uint32_t pattern = 0u; + for ( auto j = 0; j < support_sizes[lut_i]; ++j ) + { + pattern |= get_bit( bsi[j], i ) << j; + } + if ( lut_i != 0 ) + { + pattern |= get_bit( bsf_sim, i ) << support_sizes[lut_i]; + if ( shared_vars[lut_i - 1] < UINT32_MAX ) + { + pattern |= get_bit( pis[shared_vars[lut_i - 1]], i ) << ( support_sizes[lut_i] + 1 ); + } + } + if ( ( dec_funcs[lut_i] >> pattern ) & 1 ) + { + set_bit( top_sim, i ); + } + } + + bsf_sim = top_sim; + } + + /* extend function */ + local_extend_to( bsf_sim, num_vars ); + + for ( uint32_t i = 0; i < ( 1 << ( num_vars - 6 ) ); ++i ) + { + if ( bsf_sim._bits[i] != start_tt._bits[i] ) + { + std::cout << "Found incorrect decomposition\n"; + report_tt( bsf_sim ); + std::cout << " instead_of\n"; + report_tt( start_tt ); + return false; + } + } + + return true; + } + + uint32_t get_bit( const STT& tt, uint64_t index ) + { + return ( tt._bits[index >> 6] >> ( index & 0x3f ) ) & 0x1; + } + + void set_bit( STT& tt, uint64_t index ) + { + tt._bits[index >> 6] |= uint64_t( 1 ) << ( index & 0x3f ); + } + + void report_tt( STT const& stt ) + { + kitty::dynamic_truth_table tt( num_vars ); + + std::copy( std::begin( stt._bits ), std::begin( stt._bits ) + ( 1 << ( num_vars - 6 ) ), std::begin( tt ) ); + kitty::print_hex( tt ); + std::cout << "\n"; + } + +private: + uint32_t best_multiplicity{ UINT32_MAX }; + uint32_t best_free_set{ UINT32_MAX }; + uint32_t best_multiplicity0{ UINT32_MAX }; + uint32_t best_multiplicity1{ UINT32_MAX }; + uint32_t rm_support_size{ UINT32_MAX }; + uint32_t num_luts{ 0 }; + + STT start_tt; + STT best_tt; + STT remainder; + + uint64_t dec_funcs[3]; + uint32_t supports[3][6]; + uint32_t support_sizes[3] = { UINT32_MAX, UINT32_MAX, UINT32_MAX }; + uint32_t rm_support[15]; + uint32_t shared_vars[2]; + + uint32_t const num_vars; + bool const verify; + std::array permutations; +}; + +} // namespace acd + +ABC_NAMESPACE_CXX_HEADER_END + +#endif // _ACD666_H_ \ No newline at end of file diff --git a/src/map/if/acd/kitty_algorithm.hpp b/src/map/if/acd/kitty_algorithm.hpp index 78eead08a..a8c71dc07 100644 --- a/src/map/if/acd/kitty_algorithm.hpp +++ b/src/map/if/acd/kitty_algorithm.hpp @@ -22,9 +22,9 @@ namespace kitty \return new constructed truth table of same type and dimensions */ template -auto unary_operation( const TT& tt, Fn&& op ) +TT unary_operation( const TT& tt, Fn&& op ) { - auto result = tt.construct(); + TT result = tt.construct(); std::transform( tt.cbegin(), tt.cend(), result.begin(), op ); result.mask_bits(); return result; @@ -43,11 +43,11 @@ auto unary_operation( const TT& tt, Fn&& op ) \return new constructed truth table of same type and dimensions */ template -auto binary_operation( const TT& first, const TT& second, Fn&& op ) +TT binary_operation( const TT& first, const TT& second, Fn&& op ) { assert( first.num_vars() == second.num_vars() ); - auto result = first.construct(); + TT result = first.construct(); std::transform( first.cbegin(), first.cend(), second.cbegin(), result.begin(), op ); result.mask_bits(); return result; diff --git a/src/map/if/acd/kitty_dynamic_tt.hpp b/src/map/if/acd/kitty_dynamic_tt.hpp index 880943dfa..2913b6674 100644 --- a/src/map/if/acd/kitty_dynamic_tt.hpp +++ b/src/map/if/acd/kitty_dynamic_tt.hpp @@ -51,55 +51,55 @@ struct dynamic_truth_table /*! Returns number of variables. */ - inline auto num_vars() const noexcept { return _num_vars; } + inline uint32_t num_vars() const noexcept { return _num_vars; } /*! Returns number of blocks. */ - inline auto num_blocks() const noexcept { return _bits.size(); } + inline uint32_t num_blocks() const noexcept { return _bits.size(); } /*! Returns number of bits. */ - inline auto num_bits() const noexcept { return uint64_t( 1 ) << _num_vars; } + inline uint32_t num_bits() const noexcept { return uint64_t( 1 ) << _num_vars; } /*! \brief Begin iterator to bits. */ - inline auto begin() noexcept { return _bits.begin(); } + inline std::vector::iterator begin() noexcept { return _bits.begin(); } /*! \brief End iterator to bits. */ - inline auto end() noexcept { return _bits.end(); } + inline std::vector::iterator end() noexcept { return _bits.end(); } /*! \brief Begin iterator to bits. */ - inline auto begin() const noexcept { return _bits.begin(); } + inline std::vector::const_iterator begin() const noexcept { return _bits.begin(); } /*! \brief End iterator to bits. */ - inline auto end() const noexcept { return _bits.end(); } + inline std::vector::const_iterator end() const noexcept { return _bits.end(); } /*! \brief Reverse begin iterator to bits. */ - inline auto rbegin() noexcept { return _bits.rbegin(); } + inline std::vector::reverse_iterator rbegin() noexcept { return _bits.rbegin(); } /*! \brief Reverse end iterator to bits. */ - inline auto rend() noexcept { return _bits.rend(); } + inline std::vector::reverse_iterator rend() noexcept { return _bits.rend(); } /*! \brief Constant begin iterator to bits. */ - inline auto cbegin() const noexcept { return _bits.cbegin(); } + inline std::vector::const_iterator cbegin() const noexcept { return _bits.cbegin(); } /*! \brief Constant end iterator to bits. */ - inline auto cend() const noexcept { return _bits.cend(); } + inline std::vector::const_iterator cend() const noexcept { return _bits.cend(); } /*! \brief Constant reverse begin iterator to bits. */ - inline auto crbegin() const noexcept { return _bits.crbegin(); } + inline std::vector::const_reverse_iterator crbegin() const noexcept { return _bits.crbegin(); } /*! \brief Constant teverse end iterator to bits. */ - inline auto crend() const noexcept { return _bits.crend(); } + inline std::vector::const_reverse_iterator crend() const noexcept { return _bits.crend(); } /*! \brief Assign other truth table. diff --git a/src/map/if/acd/kitty_operations.hpp b/src/map/if/acd/kitty_operations.hpp index bf8e38007..e0292bc05 100644 --- a/src/map/if/acd/kitty_operations.hpp +++ b/src/map/if/acd/kitty_operations.hpp @@ -31,7 +31,7 @@ inline TT unary_not_if( const TT& tt, bool cond ) #ifdef _MSC_VER #pragma warning( pop ) #endif - return unary_operation( tt, [mask]( auto a ) + return unary_operation( tt, [mask]( uint64_t a ) { return a ^ mask; } ); } @@ -39,7 +39,7 @@ inline TT unary_not_if( const TT& tt, bool cond ) template inline TT unary_not( const TT& tt ) { - return unary_operation( tt, []( auto a ) + return unary_operation( tt, []( uint64_t a ) { return ~a; } ); } @@ -48,14 +48,14 @@ template inline TT binary_and( const TT& first, const TT& second ) { - return binary_operation( first, second, std::bit_and<>() ); + return binary_operation( first, second, std::bit_and() ); } /*! \brief Bitwise OR of two truth tables */ template inline TT binary_or( const TT& first, const TT& second ) { - return binary_operation( first, second, std::bit_or<>() ); + return binary_operation( first, second, std::bit_or() ); } /*! \brief Swaps two variables in a truth table @@ -133,6 +133,24 @@ void swap_inplace( TT& tt, uint8_t var_index1, uint8_t var_index2 ) } } +template +inline void swap_inplace( static_truth_table& tt, uint8_t var_index1, uint8_t var_index2 ) +{ + if ( var_index1 == var_index2 ) + { + return; + } + + if ( var_index1 > var_index2 ) + { + std::swap( var_index1, var_index2 ); + } + + const auto& pmask = detail::ppermutation_masks[var_index1][var_index2]; + const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 ); + tt._bits = ( tt._bits & pmask[0] ) | ( ( tt._bits & pmask[1] ) << shift ) | ( ( tt._bits & pmask[2] ) >> shift ); +} + /*! \brief Extends smaller truth table to larger one The most significant variables will not be in the functional support of the @@ -312,7 +330,7 @@ void print_hex( const TT& tt, std::ostream& os = std::cout ) auto const chunk_size = std::min( tt.num_vars() <= 1 ? 1 : ( tt.num_bits() >> 2 ), 16 ); - for_each_block_reversed( tt, [&os, chunk_size]( auto word ) + for_each_block_reversed( tt, [&os, chunk_size]( uint64_t word ) { std::string chunk( chunk_size, '0' ); diff --git a/src/map/if/acd/kitty_operators.hpp b/src/map/if/acd/kitty_operators.hpp index b5f4688c2..7ccab7ca1 100644 --- a/src/map/if/acd/kitty_operators.hpp +++ b/src/map/if/acd/kitty_operators.hpp @@ -78,28 +78,33 @@ inline void operator|=( dynamic_truth_table& first, const dynamic_truth_table& s /*! \brief Operator for binary_or and assign */ template -inline void operator|=( static_truth_table& first, const static_truth_table& second ) +inline void operator|=( static_truth_table& first, const static_truth_table& second ) { // first = binary_or( first, second ); /* runtime improved version */ - if constexpr ( NumVars <= 6 ) - { - first._bits |= second._bits; - first.mask_bits(); - } - else if constexpr ( NumVars == 7 ) + first._bits |= second._bits; + first.mask_bits(); +} + +/*! \brief Operator for binary_or and assign */ +template +inline void operator|=( static_truth_table& first, const static_truth_table& second ) +{ + // first = binary_or( first, second ); + /* runtime improved version */ + if ( NumVars == 7 ) { first._bits[0] |= second._bits[0]; first._bits[1] |= second._bits[1]; } - else if constexpr ( NumVars == 8 ) + else if ( NumVars == 8 ) { first._bits[0] |= second._bits[0]; first._bits[1] |= second._bits[1]; first._bits[2] |= second._bits[2]; first._bits[3] |= second._bits[3]; } - else if constexpr ( NumVars == 9 ) + else if ( NumVars == 9 ) { first._bits[0] |= second._bits[0]; first._bits[1] |= second._bits[1]; diff --git a/src/map/if/acd/kitty_static_tt.hpp b/src/map/if/acd/kitty_static_tt.hpp index ab5a5d1c9..5bb4bdac1 100644 --- a/src/map/if/acd/kitty_static_tt.hpp +++ b/src/map/if/acd/kitty_static_tt.hpp @@ -12,8 +12,116 @@ ABC_NAMESPACE_CXX_HEADER_START namespace kitty { +template +struct static_truth_table; + +/*! Truth table (for up to 6 variables) in which number of variables is known at compile time. + */ template -struct static_truth_table +struct static_truth_table +{ + /*! \cond PRIVATE */ + enum + { + NumBits = uint64_t( 1 ) << NumVars + }; + /*! \endcond */ + + /*! Constructs a new static truth table instance with the same number of variables. */ + inline static_truth_table construct() const + { + return static_truth_table(); + } + + /*! Returns number of variables. + */ + inline uint32_t num_vars() const noexcept { return NumVars; } + + /*! Returns number of blocks. + */ + inline uint32_t num_blocks() const noexcept { return 1u; } + + /*! Returns number of bits. + */ + inline uint32_t num_bits() const noexcept { return NumBits; } + + /*! \brief Begin iterator to bits. + */ + inline uint64_t * begin() noexcept { return &_bits; } + + /*! \brief End iterator to bits. + */ + inline uint64_t * end() noexcept { return ( &_bits ) + 1; } + + /*! \brief Begin iterator to bits. + */ + inline const uint64_t * begin() const noexcept { return &_bits; } + + /*! \brief End iterator to bits. + */ + inline const uint64_t * end() const noexcept { return ( &_bits ) + 1; } + + /*! \brief Reverse begin iterator to bits. + */ + inline uint64_t * rbegin() noexcept { return &_bits; } + + /*! \brief Reverse end iterator to bits. + */ + inline uint64_t * rend() noexcept { return ( &_bits ) + 1; } + + /*! \brief Constant begin iterator to bits. + */ + inline const uint64_t * cbegin() const noexcept { return &_bits; } + + /*! \brief Constant end iterator to bits. + */ + inline const uint64_t * cend() const noexcept { return ( &_bits ) + 1; } + + /*! \brief Constant reverse begin iterator to bits. + */ + inline const uint64_t * crbegin() const noexcept { return &_bits; } + + /*! \brief Constant everse end iterator to bits. + */ + inline const uint64_t * crend() const noexcept { return ( &_bits ) + 1; } + + /*! \brief Assign other truth table if number of variables match. + + This replaces the current truth table with another truth table, if `other` + has the same number of variables. Otherwise, the truth table is not + changed. + + \param other Other truth table + */ + template + static_truth_table& operator=( const TT& other ) + { + if ( other.num_vars() == num_vars() ) + { + std::copy( other.begin(), other.end(), begin() ); + } + + return *this; + } + + /*! Masks the number of valid truth table bits. + + If the truth table has less than 6 variables, it may not use all + the bits. This operation makes sure to zero out all non-valid + bits. + */ + inline void mask_bits() noexcept { _bits &= detail::masks[NumVars]; } + + /*! \cond PRIVATE */ +public: /* fields */ + uint64_t _bits = 0; + /*! \endcond */ +}; + +/*! Truth table (more than 6 variables) in which number of variables is known at compile time. + */ +template +struct static_truth_table { /*! \cond PRIVATE */ enum @@ -46,55 +154,55 @@ struct static_truth_table /*! Returns number of variables. */ - inline auto num_vars() const noexcept { return NumVars; } + inline uint32_t num_vars() const noexcept { return NumVars; } /*! Returns number of blocks. */ - inline auto num_blocks() const noexcept { return NumBlocks; } + inline uint32_t num_blocks() const noexcept { return NumBlocks; } /*! Returns number of bits. */ - inline auto num_bits() const noexcept { return NumBits; } + inline uint32_t num_bits() const noexcept { return NumBits; } /*! \brief Begin iterator to bits. */ - inline auto begin() noexcept { return _bits.begin(); } + inline typename std::array::iterator begin() noexcept { return _bits.begin(); } /*! \brief End iterator to bits. */ - inline auto end() noexcept { return _bits.end(); } + inline typename std::array::iterator end() noexcept { return _bits.end(); } /*! \brief Begin iterator to bits. */ - inline auto begin() const noexcept { return _bits.begin(); } + inline typename std::array::const_iterator begin() const noexcept { return _bits.begin(); } /*! \brief End iterator to bits. */ - inline auto end() const noexcept { return _bits.end(); } + inline typename std::array::const_iterator end() const noexcept { return _bits.end(); } /*! \brief Reverse begin iterator to bits. */ - inline auto rbegin() noexcept { return _bits.rbegin(); } + inline typename std::array::reverse_iterator rbegin() noexcept { return _bits.rbegin(); } /*! \brief Reverse end iterator to bits. */ - inline auto rend() noexcept { return _bits.rend(); } + inline typename std::array::reverse_iterator rend() noexcept { return _bits.rend(); } /*! \brief Constant begin iterator to bits. */ - inline auto cbegin() const noexcept { return _bits.cbegin(); } + inline typename std::array::const_iterator cbegin() const noexcept { return _bits.cbegin(); } /*! \brief Constant end iterator to bits. */ - inline auto cend() const noexcept { return _bits.cend(); } + inline typename std::array::const_iterator cend() const noexcept { return _bits.cend(); } /*! \brief Constant reverse begin iterator to bits. */ - inline auto crbegin() const noexcept { return _bits.crbegin(); } + inline typename std::array::const_reverse_iterator crbegin() const noexcept { return _bits.crbegin(); } /*! \brief Constant teverse end iterator to bits. */ - inline auto crend() const noexcept { return _bits.crend(); } + inline typename std::array::const_reverse_iterator crend() const noexcept { return _bits.crend(); } /*! \brief Assign other truth table if number of variables match. diff --git a/src/map/if/if.h b/src/map/if/if.h index f8c99fdf1..33621ac92 100644 --- a/src/map/if/if.h +++ b/src/map/if/if.h @@ -151,6 +151,7 @@ struct If_Par_t_ int fVerbose; // the verbosity flag int fVerboseTrace; // the verbosity flag char * pLutStruct; // LUT structure + int fEnableStructN;// LUT structure using a new method float WireDelay; // wire delay // internal parameters int fSkipCutFilter;// skip cut filter @@ -551,6 +552,7 @@ extern int If_CutPerformCheck07( If_Man_t * p, unsigned * pTruth, in extern int If_CutPerformCheck08( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern int If_CutPerformCheck10( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern int If_CutPerformCheck16( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); +extern int If_CutPerformCheck66( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern int If_CutPerformCheck45( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern int If_CutPerformCheck54( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern int If_CutPerformCheck75( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); diff --git a/src/map/if/ifDec66.c b/src/map/if/ifDec66.c new file mode 100644 index 000000000..5c2dce53b --- /dev/null +++ b/src/map/if/ifDec66.c @@ -0,0 +1,366 @@ +/**CFile**************************************************************** + + FileName [ifDec66.c] + + SystemName [ABC: Logic synthesis and verification system.] + + PackageName [FPGA mapping based on priority cuts.] + + Synopsis [Fast checking procedures.] + + Author [Alessandro Tempia Calvino] + + Affiliation [EPFL] + + Date [Ver. 1.0. Started - Feb 8, 2024.] + + Revision [$Id: ifDec66.c,v 1.00 2008/02/08 00:00:00 tempia Exp $] + +***********************************************************************/ + +#include "if.h" +#include "bool/kit/kit.h" +#include "misc/vec/vecMem.h" + +ABC_NAMESPACE_IMPL_START + +#define CLU_VAR_MAX 16 +#define CLU_MEM_MAX 1000 // 1 GB +#define CLU_UNUSED 0xff + +//////////////////////////////////////////////////////////////////////// +/// DECLARATIONS /// +//////////////////////////////////////////////////////////////////////// + +// decomposition +typedef struct If_Grp_t_ If_Grp_t; +struct If_Grp_t_ +{ + char nVars; + char nMyu; + char pVars[CLU_VAR_MAX]; +}; + +// hash table entry +typedef struct If_Hte_t_ If_Hte_t; +struct If_Hte_t_ +{ + If_Hte_t * pNext; + unsigned Group; + unsigned Counter; + word pTruth[1]; +}; + +//////////////////////////////////////////////////////////////////////// +/// FUNCTION DEFINITIONS /// +//////////////////////////////////////////////////////////////////////// + +static inline unsigned If_CluGrp2Uns2( If_Grp_t * pG ) +{ + char * pChar = (char *)pG; + unsigned Res = 0; + int i; + for ( i = 0; i < 8; i++ ) + Res |= ((pChar[i] & 15) << (i << 2)); + return Res; +} + +static inline void If_CluUns2Grp2( unsigned Group, If_Grp_t * pG ) +{ + char * pChar = (char *)pG; + int i; + for ( i = 0; i < 8; i++ ) + pChar[i] = ((Group >> (i << 2)) & 15); +} + +unsigned int If_CluPrimeCudd2( unsigned int p ) +{ + int i,pn; + + p--; + do { + p++; + if (p&1) { + pn = 1; + i = 3; + while ((unsigned) (i * i) <= p) { + if (p % i == 0) { + pn = 0; + break; + } + i += 2; + } + } else { + pn = 0; + } + } while (!pn); + return(p); + +} /* end of Cudd_Prime */ + +// hash table +static inline int If_CluWordNum2( int nVars ) +{ + return nVars <= 6 ? 1 : 1 << (nVars-6); +} + +int If_CluHashFindMedian2( If_Man_t * p, int t ) +{ + If_Hte_t * pEntry; + Vec_Int_t * vCounters; + int i, Max = 0, Total = 0, Half = 0; + vCounters = Vec_IntStart( 1000 ); + for ( i = 0; i < p->nTableSize[t]; i++ ) + { + for ( pEntry = ((If_Hte_t **)p->pHashTable[t])[i]; pEntry; pEntry = pEntry->pNext ) + { + if ( Max < (int)pEntry->Counter ) + { + Max = pEntry->Counter; + Vec_IntSetEntry( vCounters, pEntry->Counter, 0 ); + } + Vec_IntAddToEntry( vCounters, pEntry->Counter, 1 ); + Total++; + } + } + for ( i = Max; i > 0; i-- ) + { + Half += Vec_IntEntry( vCounters, i ); + if ( Half > Total/2 ) + break; + } +/* + printf( "total = %d ", Total ); + printf( "half = %d ", Half ); + printf( "i = %d ", i ); + printf( "Max = %d.\n", Max ); +*/ + Vec_IntFree( vCounters ); + return Abc_MaxInt( i, 1 ); +} + +int If_CluHashKey2( word * pTruth, int nWords, int Size ) +{ + static unsigned BigPrimes[8] = {12582917, 25165843, 50331653, 100663319, 201326611, 402653189, 805306457, 1610612741}; + unsigned Value = 0; + int i; + if ( nWords < 4 ) + { + unsigned char * s = (unsigned char *)pTruth; + for ( i = 0; i < 8 * nWords; i++ ) + Value ^= BigPrimes[i % 7] * s[i]; + } + else + { + unsigned * s = (unsigned *)pTruth; + for ( i = 0; i < 2 * nWords; i++ ) + Value ^= BigPrimes[i % 7] * s[i]; + } + return Value % Size; +} + +unsigned * If_CluHashLookup2( If_Man_t * p, word * pTruth, int t ) +{ + If_Hte_t * pEntry, * pPrev; + int nWords, HashKey; + if ( p == NULL ) + return NULL; + nWords = If_CluWordNum2(p->pPars->nLutSize); + if ( p->pMemEntries == NULL ) + p->pMemEntries = Mem_FixedStart( sizeof(If_Hte_t) + sizeof(word) * (If_CluWordNum2(p->pPars->nLutSize) - 1) ); + if ( p->pHashTable[t] == NULL ) + { + // decide how large should be the table + int nEntriesMax1 = 4 * If_CluPrimeCudd2( Vec_PtrSize(p->vObjs) * p->pPars->nCutsMax ); + int nEntriesMax2 = (int)(((double)CLU_MEM_MAX * (1 << 20)) / If_CluWordNum2(p->pPars->nLutSize) / 8); +// int nEntriesMax2 = 10000; + // create table + p->nTableSize[t] = If_CluPrimeCudd2( Abc_MinInt(nEntriesMax1, nEntriesMax2)/2 ); + p->pHashTable[t] = ABC_CALLOC( void *, p->nTableSize[t] ); + } + // check if this entry exists + HashKey = If_CluHashKey2( pTruth, nWords, p->nTableSize[t] ); + for ( pEntry = ((If_Hte_t **)p->pHashTable[t])[HashKey]; pEntry; pEntry = pEntry->pNext ) + if ( memcmp(pEntry->pTruth, pTruth, sizeof(word) * nWords) == 0 ) + { + pEntry->Counter++; + return &pEntry->Group; + } + // resize the hash table + if ( p->nTableEntries[t] >= 2 * p->nTableSize[t] ) + { + // collect useful entries + If_Hte_t * pPrev; + Vec_Ptr_t * vUseful = Vec_PtrAlloc( p->nTableEntries[t] ); + int i, Median = If_CluHashFindMedian2( p, t ); + for ( i = 0; i < p->nTableSize[t]; i++ ) + { + for ( pEntry = ((If_Hte_t **)p->pHashTable[t])[i]; pEntry; ) + { + if ( (int)pEntry->Counter > Median ) + { + Vec_PtrPush( vUseful, pEntry ); + pEntry = pEntry->pNext; + } + else + { + pPrev = pEntry->pNext; + Mem_FixedEntryRecycle( p->pMemEntries, (char *)pEntry ); + pEntry = pPrev; + } + } + } + // add useful entries + memset( p->pHashTable[t], 0, sizeof(void *) * p->nTableSize[t] ); + Vec_PtrForEachEntry( If_Hte_t *, vUseful, pEntry, i ) + { + HashKey = If_CluHashKey2( pEntry->pTruth, nWords, p->nTableSize[t] ); + pPrev = ((If_Hte_t **)p->pHashTable[t])[HashKey]; + if ( pPrev == NULL || pEntry->Counter >= pPrev->Counter ) + { + pEntry->pNext = pPrev; + ((If_Hte_t **)p->pHashTable[t])[HashKey] = pEntry; + } + else + { + while ( pPrev->pNext && pEntry->Counter < pPrev->pNext->Counter ) + pPrev = pPrev->pNext; + pEntry->pNext = pPrev->pNext; + pPrev->pNext = pEntry; + } + } + p->nTableEntries[t] = Vec_PtrSize( vUseful ); + Vec_PtrFree( vUseful ); + } + // create entry + p->nTableEntries[t]++; + pEntry = (If_Hte_t *)Mem_FixedEntryFetch( p->pMemEntries ); + memcpy( pEntry->pTruth, pTruth, sizeof(word) * nWords ); + pEntry->Group = CLU_UNUSED; + pEntry->Counter = 1; + // insert at the beginning +// pEntry->pNext = ((If_Hte_t **)p->pHashTable[t])[HashKey]; +// ((If_Hte_t **)p->pHashTable[t])[HashKey] = pEntry; + // insert at the end + pEntry->pNext = NULL; + for ( pPrev = ((If_Hte_t **)p->pHashTable[t])[HashKey]; pPrev && pPrev->pNext; pPrev = pPrev->pNext ); + if ( pPrev == NULL ) + ((If_Hte_t **)p->pHashTable[t])[HashKey] = pEntry; + else + pPrev->pNext = pEntry; + return &pEntry->Group; +} + +// returns if successful +int If_CluCheck66( If_Man_t * p, word * pTruth0, int nVars, int fHashing ) +{ + If_Grp_t G1 = {0}; + unsigned * pHashed = NULL; + + if ( p && fHashing ) + { + pHashed = If_CluHashLookup2( p, pTruth0, 0 ); + if ( pHashed && *pHashed != CLU_UNUSED ) + If_CluUns2Grp2( *pHashed, &G1 ); + } + + /* new entry */ + if ( G1.nVars == 0 ) + { + G1.nVars = acd66_evaluate( pTruth0, nVars, 0 ); + } + + if ( pHashed ) + *pHashed = If_CluGrp2Uns2( &G1 ); + + return G1.nVars; +} + +// returns if successful +int If_CluCheck666( If_Man_t * p, word * pTruth0, int nVars, int fHashing ) +{ + If_Grp_t G1 = {0}; + unsigned * pHashed = NULL; + + if ( p && fHashing ) + { + pHashed = If_CluHashLookup2( p, pTruth0, 0 ); + if ( pHashed && *pHashed != CLU_UNUSED ) + If_CluUns2Grp2( *pHashed, &G1 ); + } + + /* new entry */ + if ( G1.nVars == 0 ) + { + G1.nVars = acd666_evaluate( pTruth0, nVars, 0 ); + } + + if ( pHashed ) + *pHashed = If_CluGrp2Uns2( &G1 ); + + return G1.nVars; +} + +/**Function************************************************************* + + Synopsis [Performs ACD into 66 cascade.] + + Description [] + + SideEffects [] + + SeeAlso [] + +***********************************************************************/ +int If_CutPerformCheck66( If_Man_t * p, unsigned * pTruth0, int nVars, int nLeaves, char * pStr ) +{ + unsigned pTruth[IF_MAX_FUNC_LUTSIZE > 5 ? 1 << (IF_MAX_FUNC_LUTSIZE - 5) : 1]; + int i, Length; + // stretch the truth table + assert( nVars >= 6 ); + memcpy( pTruth, pTruth0, sizeof(word) * Abc_TtWordNum(nVars) ); + Abc_TtStretch6( (word *)pTruth, nLeaves, p->pPars->nLutSize ); + + // if cutmin is disabled, minimize the function + if ( !p->pPars->fCutMin ) + nLeaves = Abc_TtMinBase( (word *)pTruth, NULL, nLeaves, nVars ); + + // quit if parameters are wrong + Length = strlen(pStr); + if ( Length != 2 && Length != 3 ) + { + printf( "Wrong LUT struct (%s)\n", pStr ); + return 0; + } + for ( i = 0; i < Length; i++ ) + { + if ( pStr[i] != '6' ) + { + printf( "The LUT size (%d) should belong to {6}.\n", pStr[i] - '0' ); + return 0; + } + } + + if ( ( Length == 2 && nLeaves > 11 ) || ( Length == 3 && nLeaves > 16 ) ) + { + printf( "The cut size (%d) is too large for the LUT structure %s.\n", nLeaves, pStr ); + return 0; + } + + // consider easy case + if ( nLeaves <= 6 ) + return 1; + + // derive the decomposition + if ( Length == 2 ) + return If_CluCheck66(p, (word*)pTruth, nVars, 1); + else + return If_CluCheck666(p, (word*)pTruth, nVars, 1); +} + +//////////////////////////////////////////////////////////////////////// +/// END OF FILE /// +//////////////////////////////////////////////////////////////////////// + + +ABC_NAMESPACE_IMPL_END \ No newline at end of file diff --git a/src/map/if/module.make b/src/map/if/module.make index 6651d465b..bd652f35b 100644 --- a/src/map/if/module.make +++ b/src/map/if/module.make @@ -7,6 +7,7 @@ SRC += src/map/if/ifCom.c \ src/map/if/ifDec08.c \ src/map/if/ifDec10.c \ src/map/if/ifDec16.c \ + src/map/if/ifDec66.c \ src/map/if/ifDec75.c \ src/map/if/ifDelay.c \ src/map/if/ifDsd.c \