From 2d9af6c9a41850622f06691a0152295463efb115 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 8 Feb 2024 09:36:58 +0100 Subject: [PATCH 01/19] Adding ACD for 66 LUT structure using a new method --- src/base/abci/abc.c | 27 +- src/map/if/acd/ac_wrapper.cpp | 42 + src/map/if/acd/ac_wrapper.h | 3 + src/map/if/acd/acd66.hpp | 1093 +++++++++++++++++++++++++++ src/map/if/acd/kitty_operations.hpp | 18 + src/map/if/acd/kitty_static_tt.hpp | 110 ++- src/map/if/if.h | 2 + src/map/if/ifDec66.c | 90 +++ src/map/if/module.make | 1 + 9 files changed, 1383 insertions(+), 3 deletions(-) create mode 100644 src/map/if/acd/acd66.hpp create mode 100644 src/map/if/ifDec66.c diff --git a/src/base/abci/abc.c b/src/base/abci/abc.c index e0ca6e948..3c5d3cbb8 100644 --- a/src/base/abci/abc.c +++ b/src/base/abci/abc.c @@ -19447,7 +19447,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) If_ManSetDefaultPars( pPars ); pPars->pLutLib = (If_LibLut_t *)Abc_FrameReadLibLut(); Extra_UtilGetoptReset(); - while ( ( c = Extra_UtilGetopt( argc, argv, "KCFAGRNTXYZDEWSqaflepmrsdbgxyzuojiktncvh" ) ) != EOF ) + while ( ( c = Extra_UtilGetopt( argc, argv, "KCFAGRNTXYZDEWSJqaflepmrsdbgxyzuojiktncvh" ) ) != EOF ) { switch ( c ) { @@ -19621,6 +19621,21 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) goto usage; } break; + case 'J': + if ( globalUtilOptind >= argc ) + { + Abc_Print( -1, "Command line switch \"-J\" should be followed by string.\n" ); + goto usage; + } + pPars->pLutStruct = argv[globalUtilOptind]; + pPars->fEnableStructN = 1; + globalUtilOptind++; + if ( strlen(pPars->pLutStruct) != 2 ) + { + Abc_Print( -1, "Command line switch \"-J\" should be followed by a 2-char string (e.g. \"66\").\n" ); + goto usage; + } + break; case 'q': pPars->fPreprocess ^= 1; break; @@ -19801,7 +19816,14 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) Abc_Print( -1, "This feature only works for [6;16]-LUTs.\n" ); return 1; } - pPars->pFuncCell = pPars->fDelayOptLut ? NULL : If_CutPerformCheck16; + if ( pPars->fEnableStructN ) + { + pPars->pFuncCell = pPars->fDelayOptLut ? NULL : If_CutPerformCheck66; + } + else + { + pPars->pFuncCell = pPars->fDelayOptLut ? NULL : If_CutPerformCheck16; + } pPars->fCutMin = 1; } @@ -20003,6 +20025,7 @@ usage: Abc_Print( -2, "\t-E float : sets epsilon used for tie-breaking [default = %f]\n", pPars->Epsilon ); Abc_Print( -2, "\t-W float : sets wire delay between adjects LUTs [default = %f]\n", pPars->WireDelay ); Abc_Print( -2, "\t-S str : string representing the LUT structure [default = %s]\n", pPars->pLutStruct ? pPars->pLutStruct : "not used" ); + Abc_Print( -2, "\t-J str : string representing the LUT structure (new method) [default = %s]\n", pPars->pLutStruct ? pPars->pLutStruct : "not used" ); Abc_Print( -2, "\t-q : toggles preprocessing using several starting points [default = %s]\n", pPars->fPreprocess? "yes": "no" ); Abc_Print( -2, "\t-a : toggles area-oriented mapping [default = %s]\n", pPars->fArea? "yes": "no" ); Abc_Print( -2, "\t-r : enables expansion/reduction of the best cuts [default = %s]\n", pPars->fExpRed? "yes": "no" ); diff --git a/src/map/if/acd/ac_wrapper.cpp b/src/map/if/acd/ac_wrapper.cpp index fd8015f95..7268fcad4 100644 --- a/src/map/if/acd/ac_wrapper.cpp +++ b/src/map/if/acd/ac_wrapper.cpp @@ -18,6 +18,7 @@ #include "ac_wrapper.h" #include "ac_decomposition.hpp" +#include "acd66.hpp" ABC_NAMESPACE_IMPL_START @@ -69,4 +70,45 @@ int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, return 0; } +int acd66_evaluate( word * pTruth, unsigned nVars, int verify ) +{ + using namespace acd; + + acd66_params ps; + ps.verify = static_cast( verify ); + acd66_impl acd( nVars, ps ); + + if ( acd.run( pTruth ) == 0 ) + return 0; + + if ( !verify ) + return 1; + + int val = acd.compute_decomposition(); + if ( val != 0 ) + { + return 0; + } + + return 1; +} + +int acd66_decompose( word * pTruth, unsigned nVars, unsigned char *decomposition ) +{ + using namespace acd; + + acd66_params ps; + acd66_impl acd( nVars, ps ); + acd.run( pTruth ); + + int val = acd.compute_decomposition(); + if ( val != 0 ) + { + return -1; + } + + acd.get_decomposition( decomposition ); + return 0; +} + ABC_NAMESPACE_IMPL_END diff --git a/src/map/if/acd/ac_wrapper.h b/src/map/if/acd/ac_wrapper.h index a384b4404..2b832c287 100644 --- a/src/map/if/acd/ac_wrapper.h +++ b/src/map/if/acd/ac_wrapper.h @@ -28,6 +28,9 @@ ABC_NAMESPACE_HEADER_START int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ); int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ); +int acd66_evaluate( word * pTruth, unsigned nVars, int verify ); +int acd66_decompose( word * pTruth, unsigned nVars, unsigned char *decomposition ); + ABC_NAMESPACE_HEADER_END #endif \ No newline at end of file diff --git a/src/map/if/acd/acd66.hpp b/src/map/if/acd/acd66.hpp new file mode 100644 index 000000000..a1369db83 --- /dev/null +++ b/src/map/if/acd/acd66.hpp @@ -0,0 +1,1093 @@ +/**C++File************************************************************** + + FileName [acd66.hpp] + + SystemName [ABC: Logic synthesis and verification system.] + + PackageName [Ashenhurst-Curtis decomposition.] + + Synopsis [Interface with the FPGA mapping package.] + + Author [Alessandro Tempia Calvino] + + Affiliation [EPFL] + + Date [Ver. 1.0. Started - Feb 8, 2024.] + +***********************************************************************/ +/*! + \file acd66.hpp + \brief Ashenhurst-Curtis decomposition for "66" cascade + + \author Alessandro Tempia Calvino +*/ + +#ifndef _ACD66_H_ +#define _ACD66_H_ +#pragma once + +#include +#include +#include +#include +#include + +#include "kitty_constants.hpp" +#include "kitty_constructors.hpp" +#include "kitty_dynamic_tt.hpp" +#include "kitty_operations.hpp" +#include "kitty_operators.hpp" +#include "kitty_static_tt.hpp" + +ABC_NAMESPACE_CXX_HEADER_START + +namespace acd +{ + +/*! \brief Parameters for acd66 */ +struct acd66_params +{ + /*! \brief Maximum size of the free set (1 < num < 6). */ + uint32_t max_free_set_vars{ 5 }; + + /*! \brief Number of configurations to test for decomposition. */ + uint32_t max_evaluations{ 3 }; + + /*! \brief Run verification before returning. */ + bool verify{ true }; +}; + +/*! \brief Statistics for acd66 */ +struct acd66_stats +{ + uint32_t num_edges{ 0 }; +}; + +class acd66_impl +{ +private: + static constexpr uint32_t max_num_vars = 11; + using STT = kitty::static_truth_table; + using LTT = kitty::static_truth_table<6>; + +public: + explicit acd66_impl( uint32_t num_vars, acd66_params const& ps, acd66_stats* pst = nullptr ) + : num_vars( num_vars ), ps( ps ), pst( pst ) + { + std::iota( permutations.begin(), permutations.end(), 0 ); + } + + /*! \brief Runs ACD 66 */ + int run( word* ptt ) + { + assert( num_vars > 6 ); + + /* truth table is too large for the settings */ + if ( num_vars > max_num_vars || num_vars > 11 ) + { + return -1; + } + + /* convert to static TT */ + init_truth_table( ptt ); + + /* run ACD trying different bound sets and free sets */ + return find_decomposition() ? 1 : 0; + } + + int compute_decomposition() + { + if ( best_multiplicity == UINT32_MAX ) + return -1; + + compute_decomposition_impl(); + + if ( ps.verify && !verify_impl() ) + { + return 1; + } + + if ( pst ) + { + pst->num_edges = bs_support_size + best_free_set + 1 + ( best_multiplicity > 2 ? 1 : 0 ); + } + + return 0; + } + + /* contains a 1 for BS variables */ + unsigned get_profile() + { + unsigned profile = 0; + + if ( bs_support_size == UINT32_MAX ) + return -1; + + for ( uint32_t i = 0; i < bs_support_size; ++i ) + { + profile |= 1 << permutations[best_free_set + bs_support[i]]; + } + + return profile; + } + + void get_decomposition( unsigned char* decompArray ) + { + if ( bs_support_size == UINT32_MAX ) + return; + + get_decomposition_abc( decompArray ); + } + +private: + bool find_decomposition() + { + best_multiplicity = UINT32_MAX; + best_free_set = UINT32_MAX; + + /* array of functions to compute the column multiplicity */ + std::function column_multiplicity_fn[5] = { + [this]( STT const& tt ) { return column_multiplicity<1u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity<2u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity<3u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity5<4u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity5<5u>( tt ); } }; + + /* find AC decompositions with minimal multiplicity */ + for ( uint32_t i = num_vars - 6; i <= 5 && i <= ps.max_free_set_vars; ++i ) + { + auto [tt_p, perm, multiplicity] = enumerate_iset_combinations( i, column_multiplicity_fn[i - 1] ); + + /* check for feasible solution into "66" with one possible shared variable */ + if ( multiplicity <= 2 || ( multiplicity <= 4 && i < 5 ) ) + { + best_tt = tt_p; + permutations = perm; + best_multiplicity = multiplicity; + best_free_set = i; + + if ( multiplicity <= 2 || check_shared_set() ) + return true; + } + } + + best_multiplicity = UINT32_MAX; + return false; + } + + void init_truth_table( word* ptt ) + { + uint32_t const num_blocks = ( num_vars <= 6 ) ? 1 : ( 1 << ( num_vars - 6 ) ); + + for ( uint32_t i = 0; i < num_blocks; ++i ) + { + start_tt._bits[i] = ptt[i]; + } + + local_extend_to( start_tt, num_vars ); + } + + template + uint32_t column_multiplicity( STT tt ) + { + uint64_t multiplicity_set[4] = { 0u, 0u, 0u, 0u }; + uint32_t multiplicity = 0; + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t constexpr masks_bits[] = { 0x0, 0x3, 0xF, 0x3F }; + uint64_t constexpr masks_idx[] = { 0x0, 0x0, 0x0, 0x3 }; + + /* supports up to 64 values of free set (256 for |FS| == 3)*/ + static_assert( free_set_size <= 3 ); + + /* extract iset functions */ + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + { + multiplicity_set[( *it >> 6 ) & masks_idx[free_set_size]] |= UINT64_C( 1 ) << ( *it & masks_bits[free_set_size] ); + *it >>= ( 1u << free_set_size ); + } + ++it; + } + + multiplicity = __builtin_popcountl( multiplicity_set[0] ); + + if constexpr ( free_set_size == 3 ) + { + multiplicity += __builtin_popcountl( multiplicity_set[1] ); + multiplicity += __builtin_popcountl( multiplicity_set[2] ); + multiplicity += __builtin_popcountl( multiplicity_set[3] ); + } + + return multiplicity; + } + + template + uint32_t column_multiplicity5( STT tt ) + { + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF }; + + static_assert( free_set_size == 5 || free_set_size == 4 ); + + uint32_t size = 0; + uint64_t prev = -1; + std::array multiplicity_set; + + /* extract iset functions */ + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + { + uint32_t fs_fn = static_cast( *it & masks[free_set_size] ); + if ( fs_fn != prev ) + { + multiplicity_set[size++] = fs_fn; + prev = fs_fn; + } + *it >>= ( 1u << free_set_size ); + } + ++it; + } + + std::sort( multiplicity_set.begin(), multiplicity_set.begin() + size ); + + /* count unique */ + uint32_t multiplicity = 1; + for ( auto i = 1u; i < size; ++i ) + { + multiplicity += multiplicity_set[i] != multiplicity_set[i - 1] ? 1 : 0; + } + + return multiplicity; + } + + inline bool combinations_next( uint32_t k, uint32_t* pComb, uint32_t* pInvPerm, STT& tt ) + { + uint32_t i; + + for ( i = k - 1; pComb[i] == num_vars - k + i; --i ) + { + if ( i == 0 ) + return false; + } + + /* move vars */ + uint32_t var_old = pComb[i]; + uint32_t pos_new = pInvPerm[var_old + 1]; + std::swap( pInvPerm[var_old + 1], pInvPerm[var_old] ); + std::swap( pComb[i], pComb[pos_new] ); + swap_inplace_local( tt, i, pos_new ); + + for ( uint32_t j = i + 1; j < k; j++ ) + { + var_old = pComb[j]; + pos_new = pInvPerm[pComb[j - 1] + 1]; + std::swap( pInvPerm[pComb[j - 1] + 1], pInvPerm[var_old] ); + std::swap( pComb[j], pComb[pos_new] ); + swap_inplace_local( tt, j, pos_new ); + } + + return true; + } + + template + std::tuple, uint32_t> enumerate_iset_combinations( uint32_t free_set_size, Fn&& fn ) + { + STT tt = start_tt; + + /* TT with best cost */ + STT best_tt = tt; + uint32_t best_cost = UINT32_MAX; + + /* works up to 16 input truth tables */ + assert( num_vars <= 16 ); + + /* init combinations */ + uint32_t pComb[16], pInvPerm[16], bestPerm[16]; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pComb[i] = pInvPerm[i] = i; + } + + /* enumerate combinations */ + do + { + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best_tt = tt; + best_cost = cost; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + bestPerm[i] = pComb[i]; + } + } + } while ( combinations_next( free_set_size, pComb, pInvPerm, tt ) ); + + std::array res_perm; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + res_perm[i] = bestPerm[i]; + } + + return std::make_tuple( best_tt, res_perm, best_cost ); + } + + bool check_shared_var( STT tt, uint32_t free_set_size, uint32_t shared_var, uint32_t multiplicity_limit ) + { + uint64_t multiplicity_set[2][4] = { { 0u, 0u, 0u, 0u }, { 0u, 0u, 0u, 0u } }; + uint32_t multiplicity0 = 0, multiplicity1 = 0; + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t constexpr masks_bits[] = { 0x0, 0x3, 0xF, 0x3F }; + uint64_t constexpr masks_idx[] = { 0x0, 0x0, 0x0, 0x3 }; + + /* supports up to 64 values of free set (256 for |FS| == 3)*/ + assert( free_set_size <= 3 ); + + uint32_t shared_var_shift = shared_var - free_set_size; + + /* extract iset functions */ + uint64_t iteration_counter = 0; + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + { + multiplicity_set[( iteration_counter >> shared_var_shift ) & 1][( *it >> 6 ) & masks_idx[free_set_size]] |= UINT64_C( 1 ) << ( *it & masks_bits[free_set_size] ); + *it >>= ( 1u << free_set_size ); + ++iteration_counter; + } + ++it; + } + + multiplicity0 = __builtin_popcountl( multiplicity_set[0][0] ); + multiplicity1 = __builtin_popcountl( multiplicity_set[1][0] ); + + if ( free_set_size == 3 ) + { + multiplicity0 += __builtin_popcountl( multiplicity_set[0][1] ); + multiplicity0 += __builtin_popcountl( multiplicity_set[0][2] ); + multiplicity0 += __builtin_popcountl( multiplicity_set[0][3] ); + + multiplicity1 += __builtin_popcountl( multiplicity_set[1][1] ); + multiplicity1 += __builtin_popcountl( multiplicity_set[1][2] ); + multiplicity1 += __builtin_popcountl( multiplicity_set[1][3] ); + } + + if ( multiplicity0 > multiplicity_limit || multiplicity1 > multiplicity_limit ) + return false; + + best_multiplicity0 = multiplicity0; + best_multiplicity1 = multiplicity1; + + return true; + } + + bool check_shared_var5( STT tt, uint32_t free_set_size, uint32_t shared_var, uint32_t multiplicity_limit ) + { + uint32_t const num_blocks = 1u << ( num_vars - 6 ); + uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF }; + + assert( free_set_size == 5 || free_set_size == 4 ); + + uint32_t size[2] = { 0, 0 }; + uint64_t prev[2] = { UINT64_MAX, UINT64_MAX }; + std::array multiplicity_set[2]; + + uint32_t shared_var_shift = shared_var - free_set_size; + + /* extract iset functions */ + uint64_t iteration_counter = 0; + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + { + uint32_t fs_fn = static_cast( *it & masks[free_set_size] ); + uint32_t cofactor = ( iteration_counter >> shared_var_shift ) & 1; + if ( fs_fn != prev[cofactor] ) + { + multiplicity_set[cofactor][size[cofactor]++] = fs_fn; + prev[cofactor] = fs_fn; + } + *it >>= ( 1u << free_set_size ); + ++iteration_counter; + } + ++it; + } + + std::sort( multiplicity_set[0].begin(), multiplicity_set[0].begin() + size[0] ); + + /* count unique in 0 cofactor */ + uint32_t multiplicity = 1; + for ( auto i = 1u; i < size[0]; ++i ) + { + multiplicity += multiplicity_set[0][i] != multiplicity_set[0][i - 1] ? 1 : 0; + } + + if ( multiplicity > multiplicity_limit ) + return false; + + best_multiplicity0 = multiplicity; + + std::sort( multiplicity_set[1].begin(), multiplicity_set[1].begin() + size[1] ); + + /* count unique in 1 cofactor */ + multiplicity = 1; + for ( auto i = 1u; i < size[1]; ++i ) + { + multiplicity += multiplicity_set[1][i] != multiplicity_set[1][i - 1] ? 1 : 0; + } + + best_multiplicity1 = multiplicity; + + return multiplicity <= multiplicity_limit; + } + + bool check_shared_set() + { + /* find one shared set variable */ + for ( uint32_t i = best_free_set; i < num_vars; ++i ) + { + /* check the multiplicity of cofactors */ + if ( best_free_set < 4 ) + { + if ( check_shared_var( best_tt, best_free_set, i, 2 ) ) + { + /* move shared variable as the most significative one */ + swap_inplace_local( best_tt, i, num_vars - 1 ); + std::swap( permutations[i], permutations[num_vars - 1] ); + return true; + } + } + else + { + if ( check_shared_var5( best_tt, best_free_set, i, 2 ) ) + { + /* move shared variable as the most significative one */ + swap_inplace_local( best_tt, i, num_vars - 1 ); + std::swap( permutations[i], permutations[num_vars - 1] ); + return true; + } + } + } + + return false; + } + + void compute_decomposition_impl( bool verbose = false ) + { + bool has_shared_set = best_multiplicity > 2; + + /* construct isets involved in multiplicity */ + LTT isets0[2]; + LTT isets1[2]; + + /* construct isets */ + STT tt = best_tt; + uint32_t offset = 0; + uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF }; + + /* limit analysis on 0 cofactor of the shared variable */ + if ( has_shared_set ) + num_blocks >>= 1; + + auto it = std::begin( tt ); + uint64_t fs_fun[4] = { *it & masks[best_free_set], 0, 0, 0 }; + + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) + { + uint64_t val = *it & masks[best_free_set]; + + if ( val == fs_fun[0] ) + { + isets0[0]._bits |= UINT64_C( 1 ) << ( j + offset ); + } + else + { + isets0[1]._bits |= UINT64_C( 1 ) << ( j + offset ); + fs_fun[1] = val; + } + + *it >>= ( 1u << best_free_set ); + } + + offset = ( offset + ( 64 >> best_free_set ) ) % 64; + ++it; + } + + /* continue on the 1 cofactor if shared set */ + if ( has_shared_set ) + { + fs_fun[2] = *it & masks[best_free_set]; + for ( auto i = num_blocks; i < ( num_blocks << 1 ); ++i ) + { + for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) + { + uint64_t val = *it & masks[best_free_set]; + + if ( val == fs_fun[2] ) + { + isets1[0]._bits |= UINT64_C( 1 ) << ( j + offset ); + } + else + { + isets1[1]._bits |= UINT64_C( 1 ) << ( j + offset ); + fs_fun[3] = val; + } + + *it >>= ( 1u << best_free_set ); + } + + offset = ( offset + ( 64 >> best_free_set ) ) % 64; + ++it; + } + } + + /* find the support minimizing combination with shared set */ + compute_functions( isets0, isets1, fs_fun ); + + /* print functions */ + if ( verbose ) + { + LTT f; + f._bits = dec_funcs[0]; + std::cout << "BS function : "; + kitty::print_hex( f ); + std::cout << "\n"; + f._bits = dec_funcs[1]; + std::cout << "Composition function: "; + kitty::print_hex( f ); + std::cout << "\n"; + } + } + + inline void compute_functions( LTT isets0[2], LTT isets1[2], uint64_t fs_fun[4] ) + { + /* u = 2 no support minimization */ + if ( best_multiplicity < 3 ) + { + dec_funcs[0] = isets0[0]._bits; + bs_support_size = num_vars - best_free_set; + for ( uint32_t i = 0; i < num_vars - best_free_set; ++i ) + { + bs_support[i] = i; + } + compute_composition( fs_fun ); + return; + } + + /* u = 4 two possibilities */ + if ( best_multiplicity == 4 ) + { + compute_functions4( isets0, isets1, fs_fun ); + return; + } + + /* u = 3 if both sets have multiplicity 2 there are no don't cares */ + if ( best_multiplicity0 == best_multiplicity1 ) + { + compute_functions4( isets0, isets1, fs_fun ); + return; + } + + /* u = 3 one set has multiplicity 1, use don't cares */ + compute_functions3( isets0, isets1, fs_fun ); + compute_composition( fs_fun ); + } + + inline void compute_functions4( LTT isets0[2], LTT isets1[2], uint64_t fs_fun[4] ) + { + uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF, UINT64_MAX }; + LTT f = isets0[0] | isets1[1]; + LTT care; + care._bits = masks[num_vars - best_free_set]; + + /* count the number of support variables */ + uint32_t support_vars1 = 0; + for ( uint32_t i = 0; i < num_vars - best_free_set; ++i ) + { + support_vars1 += has_var6( f, care, i ) ? 1 : 0; + bs_support[i] = i; + } + + /* use a different set */ + f = isets0[0] | isets1[0]; + + uint32_t support_vars2 = 0; + for ( uint32_t i = 0; i < num_vars - best_free_set; ++i ) + { + support_vars2 += has_var6( f, care, i ) ? 1 : 0; + } + + bs_support_size = support_vars2; + if ( support_vars2 > support_vars1 ) + { + f = isets0[0] | isets1[1]; + std::swap( fs_fun[3], fs_fun[4] ); + bs_support_size = support_vars1; + } + + /* move variables */ + if ( bs_support_size < num_vars - best_free_set ) + { + support_vars1 = 0; + for ( uint32_t i = 0; i < num_vars - best_free_set; ++i ) + { + if ( !has_var6( f, care, i ) ) + { + adjust_truth_table_on_dc( f, care, i ); + continue; + } + + if ( support_vars1 < i ) + { + kitty::swap_inplace( f, support_vars1, i ); + } + + bs_support[support_vars1] = i; + ++support_vars1; + } + } + + dec_funcs[0] = f._bits; + compute_composition( fs_fun ); + } + + inline void compute_functions3( LTT isets0[2], LTT isets1[2], uint64_t fs_fun[4] ) + { + uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF, UINT64_MAX }; + LTT f = isets0[0] | isets1[0]; + LTT care; + + /* init the care set */ + if ( best_multiplicity0 == 1 ) + { + care._bits = masks[num_vars - best_free_set] & ( ~isets0[0]._bits ); + fs_fun[1] = fs_fun[0]; + } + else + { + care._bits = masks[num_vars - best_free_set] & ( ~isets1[0]._bits ); + fs_fun[3] = fs_fun[2]; + } + + /* count the number of support variables */ + uint32_t support_vars = 0; + for ( uint32_t i = 0; i < num_vars - best_free_set; ++i ) + { + if ( !has_var6( f, care, i ) ) + { + adjust_truth_table_on_dc( f, care, i ); + continue; + } + + if ( support_vars < i ) + { + kitty::swap_inplace( f, support_vars, i ); + } + + bs_support[support_vars] = i; + ++support_vars; + } + + bs_support_size = support_vars; + dec_funcs[0] = f._bits; + compute_composition( fs_fun ); + } + + void compute_composition( uint64_t fs_fun[4] ) + { + dec_funcs[1] = fs_fun[0] << ( 1 << best_free_set ); + dec_funcs[1] |= fs_fun[1]; + + if ( best_multiplicity > 2 ) + { + dec_funcs[1] |= fs_fun[2] << ( ( 2 << best_free_set ) + ( 1 << best_free_set ) ); + dec_funcs[1] |= fs_fun[3] << ( 2 << best_free_set ); + } + } + + template + void local_extend_to( TT_type& tt, uint32_t real_num_vars ) + { + if ( real_num_vars < 6 ) + { + auto mask = *tt.begin(); + + for ( auto i = real_num_vars; i < num_vars; ++i ) + { + mask |= ( mask << ( 1 << i ) ); + } + + std::fill( tt.begin(), tt.end(), mask ); + } + else + { + uint32_t num_blocks = ( 1u << ( real_num_vars - 6 ) ); + auto it = tt.begin(); + while ( it != tt.end() ) + { + it = std::copy( tt.cbegin(), tt.cbegin() + num_blocks, it ); + } + } + } + + void swap_inplace_local( STT& tt, uint8_t var_index1, uint8_t var_index2 ) + { + if ( var_index1 == var_index2 ) + { + return; + } + + if ( var_index1 > var_index2 ) + { + std::swap( var_index1, var_index2 ); + } + + assert( num_vars > 6 ); + const uint32_t num_blocks = 1 << ( num_vars - 6 ); + + if ( var_index2 <= 5 ) + { + const auto& pmask = kitty::detail::ppermutation_masks[var_index1][var_index2]; + const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 ); + std::transform( std::begin( tt._bits ), std::begin( tt._bits ) + num_blocks, std::begin( tt._bits ), + [shift, &pmask]( uint64_t word ) { + return ( word & pmask[0] ) | ( ( word & pmask[1] ) << shift ) | ( ( word & pmask[2] ) >> shift ); + } ); + } + else if ( var_index1 <= 5 ) /* in this case, var_index2 > 5 */ + { + const auto step = 1 << ( var_index2 - 6 ); + const auto shift = 1 << var_index1; + auto it = std::begin( tt._bits ); + while ( it != std::begin( tt._bits ) + num_blocks ) + { + for ( auto i = decltype( step ){ 0 }; i < step; ++i ) + { + const auto low_to_high = ( *( it + i ) & kitty::detail::projections[var_index1] ) >> shift; + const auto high_to_low = ( *( it + i + step ) << shift ) & kitty::detail::projections[var_index1]; + *( it + i ) = ( *( it + i ) & ~kitty::detail::projections[var_index1] ) | high_to_low; + *( it + i + step ) = ( *( it + i + step ) & kitty::detail::projections[var_index1] ) | low_to_high; + } + it += 2 * step; + } + } + else + { + const auto step1 = 1 << ( var_index1 - 6 ); + const auto step2 = 1 << ( var_index2 - 6 ); + auto it = std::begin( tt._bits ); + while ( it != std::begin( tt._bits ) + num_blocks ) + { + for ( auto i = 0; i < step2; i += 2 * step1 ) + { + for ( auto j = 0; j < step1; ++j ) + { + std::swap( *( it + i + j + step1 ), *( it + i + j + step2 ) ); + } + } + it += 2 * step2; + } + } + } + + inline bool has_var6( const LTT& tt, const LTT& care, uint8_t var_index ) + { + if ( ( ( ( tt._bits >> ( uint64_t( 1 ) << var_index ) ) ^ tt._bits ) & kitty::detail::projections_neg[var_index] & ( care._bits >> ( uint64_t( 1 ) << var_index ) ) & care._bits ) != 0 ) + { + return true; + } + + return false; + } + + bool has_var_support( const STT& tt, const STT& care, uint32_t real_num_vars, uint8_t var_index ) + { + assert( var_index < real_num_vars ); + assert( real_num_vars <= tt.num_vars() ); + assert( tt.num_vars() == care.num_vars() ); + + const uint32_t num_blocks = real_num_vars <= 6 ? 1 : ( 1 << ( real_num_vars - 6 ) ); + if ( real_num_vars <= 6 || var_index < 6 ) + { + auto it_tt = std::begin( tt._bits ); + auto it_care = std::begin( care._bits ); + while ( it_tt != std::begin( tt._bits ) + num_blocks ) + { + if ( ( ( ( *it_tt >> ( uint64_t( 1 ) << var_index ) ) ^ *it_tt ) & kitty::detail::projections_neg[var_index] & ( *it_care >> ( uint64_t( 1 ) << var_index ) ) & *it_care ) != 0 ) + { + return true; + } + ++it_tt; + ++it_care; + } + + return false; + } + + const auto step = 1 << ( var_index - 6 ); + for ( auto i = 0u; i < num_blocks; i += 2 * step ) + { + for ( auto j = 0; j < step; ++j ) + { + if ( ( ( tt._bits[i + j] ^ tt._bits[i + j + step] ) & care._bits[i + j] & care._bits[i + j + step] ) != 0 ) + { + return true; + } + } + } + + return false; + } + + template + bool has_var_support( const TT_type& tt, const TT_type& care, uint32_t real_num_vars, uint8_t var_index ) + { + assert( var_index < real_num_vars ); + assert( real_num_vars <= tt.num_vars() ); + assert( tt.num_vars() == care.num_vars() ); + + const uint32_t num_blocks = real_num_vars <= 6 ? 1 : ( 1 << ( real_num_vars - 6 ) ); + if ( real_num_vars <= 6 || var_index < 6 ) + { + auto it_tt = std::begin( tt._bits ); + auto it_care = std::begin( care._bits ); + while ( it_tt != std::begin( tt._bits ) + num_blocks ) + { + if ( ( ( ( *it_tt >> ( uint64_t( 1 ) << var_index ) ) ^ *it_tt ) & kitty::detail::projections_neg[var_index] & ( *it_care >> ( uint64_t( 1 ) << var_index ) ) & *it_care ) != 0 ) + { + return true; + } + ++it_tt; + ++it_care; + } + + return false; + } + + const auto step = 1 << ( var_index - 6 ); + for ( auto i = 0u; i < num_blocks; i += 2 * step ) + { + for ( auto j = 0; j < step; ++j ) + { + if ( ( ( tt._bits[i + j] ^ tt._bits[i + j + step] ) & care._bits[i + j] & care._bits[i + j + step] ) != 0 ) + { + return true; + } + } + } + + return false; + } + + void adjust_truth_table_on_dc( LTT& tt, LTT& care, uint32_t var_index ) + { + uint64_t new_bits = tt._bits & care._bits; + tt._bits = ( ( new_bits | ( new_bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index] ) | + ( ( new_bits | ( new_bits << ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections[var_index] ); + care._bits = care._bits | ( care._bits >> ( uint64_t( 1 ) << var_index ) ); + } + + /* Decomposition format for ABC + * + * The record is an array of unsigned chars where: + * - the first unsigned char entry stores the number of unsigned chars in the record + * - the second entry stores the number of LUTs + * After this, several sub-records follow, each representing one LUT as follows: + * - an unsigned char entry listing the number of fanins + * - a list of fanins, from the LSB to the MSB of the truth table. The N inputs of the original function + * have indexes from 0 to N-1, followed by the internal signals in a topological order + * - the LUT truth table occupying 2^(M-3) bytes, where M is the fanin count of the LUT, from the LSB to the MSB. + * A 2-input LUT, which takes 4 bits, should be stretched to occupy 8 bits (one unsigned char) + * A 0- or 1-input LUT can be represented similarly but it is not expected that such LUTs will be represented + */ + void get_decomposition_abc( unsigned char* decompArray ) + { + unsigned char* pArray = decompArray; + unsigned char bytes = 2; + + /* write number of LUTs */ + pArray++; + *pArray = 2; + pArray++; + + /* write BS LUT */ + /* write fanin size */ + *pArray = bs_support_size; + pArray++; + ++bytes; + + /* write support */ + for ( uint32_t i = 0; i < bs_support_size; ++i ) + { + *pArray = (unsigned char)permutations[bs_support[i] + best_free_set]; + pArray++; + ++bytes; + } + + /* write truth table */ + uint32_t tt_num_bytes = ( bs_support_size <= 3 ) ? 1 : ( 1 << ( bs_support_size - 3 ) ); + for ( uint32_t i = 0; i < tt_num_bytes; ++i ) + { + *pArray = (unsigned char)( ( dec_funcs[0] >> ( 8 * i ) ) & 0xFF ); + pArray++; + ++bytes; + } + + /* write top LUT */ + /* write fanin size */ + uint32_t support_size = best_free_set + 1 + ( best_multiplicity > 2 ? 1 : 0 ); + *pArray = support_size; + pArray++; + ++bytes; + + /* write support */ + for ( uint32_t i = best_free_set; i < best_free_set; ++i ) + { + *pArray = (unsigned char)permutations[i]; + pArray++; + ++bytes; + } + + *pArray = (unsigned char)num_vars; + pArray++; + ++bytes; + + if ( best_multiplicity > 2 ) + { + *pArray = (unsigned char)permutations[num_vars - 1]; + pArray++; + ++bytes; + } + + /* write truth table */ + tt_num_bytes = ( support_size <= 3 ) ? 1 : ( 1 << ( support_size - 3 ) ); + for ( uint32_t i = 0; i < tt_num_bytes; ++i ) + { + *pArray = (unsigned char)( ( dec_funcs[1] >> ( 8 * i ) ) & 0xFF ); + pArray++; + ++bytes; + } + + /* write numBytes */ + *decompArray = bytes; + } + + bool verify_impl() + { + /* create PIs */ + STT pis[max_num_vars]; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + kitty::create_nth_var( pis[i], permutations[i] ); + } + + /* BS function patterns */ + STT bsi[6]; + for ( uint32_t i = 0; i < bs_support_size; ++i ) + { + bsi[i] = pis[best_free_set + bs_support[i]]; + } + + /* compute first function */ + STT bsf_sim; + for ( uint32_t i = 0u; i < ( 1 << num_vars ); ++i ) + { + uint32_t pattern = 0u; + for ( auto j = 0; j < bs_support_size; ++j ) + { + pattern |= get_bit( bsi[j], i ) << j; + } + if ( ( dec_funcs[0] >> pattern ) & 1 ) + { + set_bit( bsf_sim, i ); + } + } + + /* compute first function */ + STT top_sim; + for ( uint32_t i = 0u; i < ( 1 << num_vars ); ++i ) + { + uint32_t pattern = 0u; + for ( auto j = 0; j < best_free_set; ++j ) + { + pattern |= get_bit( pis[j], i ) << j; + } + pattern |= get_bit( bsf_sim, i ) << best_free_set; + if ( best_multiplicity > 2 ) + { + pattern |= get_bit( pis[num_vars - 1], i ) << ( best_free_set + 1 ); + } + + if ( ( dec_funcs[1] >> pattern ) & 1 ) + { + set_bit( top_sim, i ); + } + } + + for ( uint32_t i = 0; i < ( 1 << ( num_vars - 6 ) ); ++i ) + { + if ( top_sim._bits[i] != start_tt._bits[i] ) + { + /* convert to dynamic_truth_table */ + // report_tt( bsf_sim ); + std::cout << "Found incorrect decomposition\n"; + report_tt( top_sim ); + std::cout << " instead_of\n"; + report_tt( start_tt ); + return false; + } + } + + return true; + } + + uint32_t get_bit( const STT& tt, uint64_t index ) + { + return ( tt._bits[index >> 6] >> ( index & 0x3f ) ) & 0x1; + } + + void set_bit( STT& tt, uint64_t index ) + { + tt._bits[index >> 6] |= uint64_t( 1 ) << ( index & 0x3f ); + } + + void report_tt( STT const& stt ) + { + kitty::dynamic_truth_table tt( num_vars ); + + std::copy( std::begin( stt._bits ), std::begin( stt._bits ) + ( 1 << ( num_vars - 6 ) ), std::begin( tt ) ); + kitty::print_hex( tt ); + std::cout << "\n"; + } + +private: + uint32_t best_multiplicity{ UINT32_MAX }; + uint32_t best_free_set{ UINT32_MAX }; + uint32_t best_multiplicity0{ UINT32_MAX }; + uint32_t best_multiplicity1{ UINT32_MAX }; + uint32_t bs_support_size{ UINT32_MAX }; + STT best_tt; + STT start_tt; + uint64_t dec_funcs[2]; + uint32_t bs_support[6]; + + uint32_t num_vars; + acd66_params const& ps; + acd66_stats* pst; + std::array permutations; +}; + +} // namespace acd + +ABC_NAMESPACE_CXX_HEADER_END + +#endif // _ACD66_H_ \ No newline at end of file diff --git a/src/map/if/acd/kitty_operations.hpp b/src/map/if/acd/kitty_operations.hpp index bf8e38007..48a4b7c67 100644 --- a/src/map/if/acd/kitty_operations.hpp +++ b/src/map/if/acd/kitty_operations.hpp @@ -133,6 +133,24 @@ void swap_inplace( TT& tt, uint8_t var_index1, uint8_t var_index2 ) } } +template +inline void swap_inplace( static_truth_table& tt, uint8_t var_index1, uint8_t var_index2 ) +{ + if ( var_index1 == var_index2 ) + { + return; + } + + if ( var_index1 > var_index2 ) + { + std::swap( var_index1, var_index2 ); + } + + const auto& pmask = detail::ppermutation_masks[var_index1][var_index2]; + const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 ); + tt._bits = ( tt._bits & pmask[0] ) | ( ( tt._bits & pmask[1] ) << shift ) | ( ( tt._bits & pmask[2] ) >> shift ); +} + /*! \brief Extends smaller truth table to larger one The most significant variables will not be in the functional support of the diff --git a/src/map/if/acd/kitty_static_tt.hpp b/src/map/if/acd/kitty_static_tt.hpp index ab5a5d1c9..2b1613a6a 100644 --- a/src/map/if/acd/kitty_static_tt.hpp +++ b/src/map/if/acd/kitty_static_tt.hpp @@ -12,8 +12,116 @@ ABC_NAMESPACE_CXX_HEADER_START namespace kitty { +template +struct static_truth_table; + +/*! Truth table (for up to 6 variables) in which number of variables is known at compile time. + */ template -struct static_truth_table +struct static_truth_table +{ + /*! \cond PRIVATE */ + enum + { + NumBits = uint64_t( 1 ) << NumVars + }; + /*! \endcond */ + + /*! Constructs a new static truth table instance with the same number of variables. */ + inline static_truth_table construct() const + { + return static_truth_table(); + } + + /*! Returns number of variables. + */ + inline auto num_vars() const noexcept { return NumVars; } + + /*! Returns number of blocks. + */ + inline auto num_blocks() const noexcept { return 1u; } + + /*! Returns number of bits. + */ + inline auto num_bits() const noexcept { return NumBits; } + + /*! \brief Begin iterator to bits. + */ + inline auto begin() noexcept { return &_bits; } + + /*! \brief End iterator to bits. + */ + inline auto end() noexcept { return ( &_bits ) + 1; } + + /*! \brief Begin iterator to bits. + */ + inline auto begin() const noexcept { return &_bits; } + + /*! \brief End iterator to bits. + */ + inline auto end() const noexcept { return ( &_bits ) + 1; } + + /*! \brief Reverse begin iterator to bits. + */ + inline auto rbegin() noexcept { return &_bits; } + + /*! \brief Reverse end iterator to bits. + */ + inline auto rend() noexcept { return ( &_bits ) + 1; } + + /*! \brief Constant begin iterator to bits. + */ + inline auto cbegin() const noexcept { return &_bits; } + + /*! \brief Constant end iterator to bits. + */ + inline auto cend() const noexcept { return ( &_bits ) + 1; } + + /*! \brief Constant reverse begin iterator to bits. + */ + inline auto crbegin() const noexcept { return &_bits; } + + /*! \brief Constant everse end iterator to bits. + */ + inline auto crend() const noexcept { return ( &_bits ) + 1; } + + /*! \brief Assign other truth table if number of variables match. + + This replaces the current truth table with another truth table, if `other` + has the same number of variables. Otherwise, the truth table is not + changed. + + \param other Other truth table + */ + template + static_truth_table& operator=( const TT& other ) + { + if ( other.num_vars() == num_vars() ) + { + std::copy( other.begin(), other.end(), begin() ); + } + + return *this; + } + + /*! Masks the number of valid truth table bits. + + If the truth table has less than 6 variables, it may not use all + the bits. This operation makes sure to zero out all non-valid + bits. + */ + inline void mask_bits() noexcept { _bits &= detail::masks[NumVars]; } + + /*! \cond PRIVATE */ +public: /* fields */ + uint64_t _bits = 0; + /*! \endcond */ +}; + +/*! Truth table (more than 6 variables) in which number of variables is known at compile time. + */ +template +struct static_truth_table { /*! \cond PRIVATE */ enum diff --git a/src/map/if/if.h b/src/map/if/if.h index f8c99fdf1..33621ac92 100644 --- a/src/map/if/if.h +++ b/src/map/if/if.h @@ -151,6 +151,7 @@ struct If_Par_t_ int fVerbose; // the verbosity flag int fVerboseTrace; // the verbosity flag char * pLutStruct; // LUT structure + int fEnableStructN;// LUT structure using a new method float WireDelay; // wire delay // internal parameters int fSkipCutFilter;// skip cut filter @@ -551,6 +552,7 @@ extern int If_CutPerformCheck07( If_Man_t * p, unsigned * pTruth, in extern int If_CutPerformCheck08( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern int If_CutPerformCheck10( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern int If_CutPerformCheck16( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); +extern int If_CutPerformCheck66( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern int If_CutPerformCheck45( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern int If_CutPerformCheck54( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern int If_CutPerformCheck75( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); diff --git a/src/map/if/ifDec66.c b/src/map/if/ifDec66.c new file mode 100644 index 000000000..932f5feb2 --- /dev/null +++ b/src/map/if/ifDec66.c @@ -0,0 +1,90 @@ +/**CFile**************************************************************** + + FileName [ifDec16.c] + + SystemName [ABC: Logic synthesis and verification system.] + + PackageName [FPGA mapping based on priority cuts.] + + Synopsis [Fast checking procedures.] + + Author [Alan Mishchenko] + + Affiliation [UC Berkeley] + + Date [Ver. 1.0. Started - November 21, 2006.] + + Revision [$Id: ifDec16.c,v 1.00 2006/11/21 00:00:00 alanmi Exp $] + +***********************************************************************/ + +#include "if.h" +#include "bool/kit/kit.h" +#include "misc/vec/vecMem.h" + +ABC_NAMESPACE_IMPL_START + +//////////////////////////////////////////////////////////////////////// +/// FUNCTION DEFINITIONS /// +//////////////////////////////////////////////////////////////////////// + +/**Function************************************************************* + + Synopsis [Performs ACD into 66 cascade.] + + Description [] + + SideEffects [] + + SeeAlso [] + +***********************************************************************/ +int If_CutPerformCheck66( If_Man_t * p, unsigned * pTruth0, int nVars, int nLeaves, char * pStr ) +{ + unsigned pTruth[IF_MAX_FUNC_LUTSIZE > 5 ? 1 << (IF_MAX_FUNC_LUTSIZE - 5) : 1]; + int i, Length; + // stretch the truth table + assert( nVars >= 6 ); + memcpy( pTruth, pTruth0, sizeof(word) * Abc_TtWordNum(nVars) ); + Abc_TtStretch6( (word *)pTruth, nLeaves, p->pPars->nLutSize ); + + // if cutmin is disabled, minimize the function + if ( !p->pPars->fCutMin ) + nLeaves = Abc_TtMinBase( (word *)pTruth, NULL, nLeaves, nVars ); + + // quit if parameters are wrong + Length = strlen(pStr); + if ( Length != 2 ) + { + printf( "Wrong LUT struct (%s)\n", pStr ); + return 0; + } + for ( i = 0; i < Length; i++ ) + { + if ( pStr[i] != '6' ) + { + printf( "The LUT size (%d) should belong to {6}.\n", pStr[i] - '0' ); + return 0; + } + } + + if ( nLeaves > 11 ) + { + printf( "The cut size (%d) is too large for the LUT structure %s.\n", nLeaves, pStr ); + return 0; + } + + // consider easy case + if ( nLeaves <= 6 ) + return 1; + + // derive the decomposition + return (int)(acd66_evaluate( (word *)pTruth, nVars, 1 ) > 0 ); +} + +//////////////////////////////////////////////////////////////////////// +/// END OF FILE /// +//////////////////////////////////////////////////////////////////////// + + +ABC_NAMESPACE_IMPL_END \ No newline at end of file diff --git a/src/map/if/module.make b/src/map/if/module.make index 6651d465b..bd652f35b 100644 --- a/src/map/if/module.make +++ b/src/map/if/module.make @@ -7,6 +7,7 @@ SRC += src/map/if/ifCom.c \ src/map/if/ifDec08.c \ src/map/if/ifDec10.c \ src/map/if/ifDec16.c \ + src/map/if/ifDec66.c \ src/map/if/ifDec75.c \ src/map/if/ifDelay.c \ src/map/if/ifDsd.c \ From 2afaeac8234196ec427cda99cbbe5df82a6f51df Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 8 Feb 2024 11:20:19 +0100 Subject: [PATCH 02/19] Adding hash table to reduce computations --- src/map/if/acd/ac_wrapper.cpp | 6 +- src/map/if/acd/ac_wrapper.h | 2 +- src/map/if/acd/acd66.hpp | 2 +- src/map/if/ifDec66.c | 260 +++++++++++++++++++++++++++++++++- 4 files changed, 259 insertions(+), 11 deletions(-) diff --git a/src/map/if/acd/ac_wrapper.cpp b/src/map/if/acd/ac_wrapper.cpp index 7268fcad4..441cb7008 100644 --- a/src/map/if/acd/ac_wrapper.cpp +++ b/src/map/if/acd/ac_wrapper.cpp @@ -70,18 +70,18 @@ int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, return 0; } -int acd66_evaluate( word * pTruth, unsigned nVars, int verify ) +int acd66_evaluate( word * pTruth, unsigned nVars, int compute_decomposition ) { using namespace acd; acd66_params ps; - ps.verify = static_cast( verify ); + ps.verify = false; acd66_impl acd( nVars, ps ); if ( acd.run( pTruth ) == 0 ) return 0; - if ( !verify ) + if ( !compute_decomposition ) return 1; int val = acd.compute_decomposition(); diff --git a/src/map/if/acd/ac_wrapper.h b/src/map/if/acd/ac_wrapper.h index 2b832c287..2e052c563 100644 --- a/src/map/if/acd/ac_wrapper.h +++ b/src/map/if/acd/ac_wrapper.h @@ -28,7 +28,7 @@ ABC_NAMESPACE_HEADER_START int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ); int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ); -int acd66_evaluate( word * pTruth, unsigned nVars, int verify ); +int acd66_evaluate( word * pTruth, unsigned nVars, int compute_decomposition ); int acd66_decompose( word * pTruth, unsigned nVars, unsigned char *decomposition ); ABC_NAMESPACE_HEADER_END diff --git a/src/map/if/acd/acd66.hpp b/src/map/if/acd/acd66.hpp index a1369db83..166402ca0 100644 --- a/src/map/if/acd/acd66.hpp +++ b/src/map/if/acd/acd66.hpp @@ -54,7 +54,7 @@ struct acd66_params uint32_t max_evaluations{ 3 }; /*! \brief Run verification before returning. */ - bool verify{ true }; + bool verify{ false }; }; /*! \brief Statistics for acd66 */ diff --git a/src/map/if/ifDec66.c b/src/map/if/ifDec66.c index 932f5feb2..3a836363d 100644 --- a/src/map/if/ifDec66.c +++ b/src/map/if/ifDec66.c @@ -1,6 +1,6 @@ /**CFile**************************************************************** - FileName [ifDec16.c] + FileName [ifDec66.c] SystemName [ABC: Logic synthesis and verification system.] @@ -8,13 +8,13 @@ Synopsis [Fast checking procedures.] - Author [Alan Mishchenko] + Author [Alessandro Tempia Calvino] - Affiliation [UC Berkeley] + Affiliation [EPFL] - Date [Ver. 1.0. Started - November 21, 2006.] + Date [Ver. 1.0. Started - Feb 8, 2024.] - Revision [$Id: ifDec16.c,v 1.00 2006/11/21 00:00:00 alanmi Exp $] + Revision [$Id: ifDec66.c,v 1.00 2008/02/08 00:00:00 tempia Exp $] ***********************************************************************/ @@ -24,10 +24,258 @@ ABC_NAMESPACE_IMPL_START +#define CLU_VAR_MAX 16 +#define CLU_MEM_MAX 1000 // 1 GB +#define CLU_UNUSED 0xff + +//////////////////////////////////////////////////////////////////////// +/// DECLARATIONS /// +//////////////////////////////////////////////////////////////////////// + +// decomposition +typedef struct If_Grp_t_ If_Grp_t; +struct If_Grp_t_ +{ + char nVars; + char nMyu; + char pVars[CLU_VAR_MAX]; +}; + +// hash table entry +typedef struct If_Hte_t_ If_Hte_t; +struct If_Hte_t_ +{ + If_Hte_t * pNext; + unsigned Group; + unsigned Counter; + word pTruth[1]; +}; + //////////////////////////////////////////////////////////////////////// /// FUNCTION DEFINITIONS /// //////////////////////////////////////////////////////////////////////// +static inline unsigned If_CluGrp2Uns2( If_Grp_t * pG ) +{ + char * pChar = (char *)pG; + unsigned Res = 0; + int i; + for ( i = 0; i < 8; i++ ) + Res |= ((pChar[i] & 15) << (i << 2)); + return Res; +} + +static inline void If_CluUns2Grp2( unsigned Group, If_Grp_t * pG ) +{ + char * pChar = (char *)pG; + int i; + for ( i = 0; i < 8; i++ ) + pChar[i] = ((Group >> (i << 2)) & 15); +} + +unsigned int If_CluPrimeCudd2( unsigned int p ) +{ + int i,pn; + + p--; + do { + p++; + if (p&1) { + pn = 1; + i = 3; + while ((unsigned) (i * i) <= p) { + if (p % i == 0) { + pn = 0; + break; + } + i += 2; + } + } else { + pn = 0; + } + } while (!pn); + return(p); + +} /* end of Cudd_Prime */ + +// hash table +static inline int If_CluWordNum2( int nVars ) +{ + return nVars <= 6 ? 1 : 1 << (nVars-6); +} + +int If_CluHashFindMedian2( If_Man_t * p, int t ) +{ + If_Hte_t * pEntry; + Vec_Int_t * vCounters; + int i, Max = 0, Total = 0, Half = 0; + vCounters = Vec_IntStart( 1000 ); + for ( i = 0; i < p->nTableSize[t]; i++ ) + { + for ( pEntry = ((If_Hte_t **)p->pHashTable[t])[i]; pEntry; pEntry = pEntry->pNext ) + { + if ( Max < (int)pEntry->Counter ) + { + Max = pEntry->Counter; + Vec_IntSetEntry( vCounters, pEntry->Counter, 0 ); + } + Vec_IntAddToEntry( vCounters, pEntry->Counter, 1 ); + Total++; + } + } + for ( i = Max; i > 0; i-- ) + { + Half += Vec_IntEntry( vCounters, i ); + if ( Half > Total/2 ) + break; + } +/* + printf( "total = %d ", Total ); + printf( "half = %d ", Half ); + printf( "i = %d ", i ); + printf( "Max = %d.\n", Max ); +*/ + Vec_IntFree( vCounters ); + return Abc_MaxInt( i, 1 ); +} + +int If_CluHashKey2( word * pTruth, int nWords, int Size ) +{ + static unsigned BigPrimes[8] = {12582917, 25165843, 50331653, 100663319, 201326611, 402653189, 805306457, 1610612741}; + unsigned Value = 0; + int i; + if ( nWords < 4 ) + { + unsigned char * s = (unsigned char *)pTruth; + for ( i = 0; i < 8 * nWords; i++ ) + Value ^= BigPrimes[i % 7] * s[i]; + } + else + { + unsigned * s = (unsigned *)pTruth; + for ( i = 0; i < 2 * nWords; i++ ) + Value ^= BigPrimes[i % 7] * s[i]; + } + return Value % Size; +} + +unsigned * If_CluHashLookup2( If_Man_t * p, word * pTruth, int t ) +{ + If_Hte_t * pEntry, * pPrev; + int nWords, HashKey; + if ( p == NULL ) + return NULL; + nWords = If_CluWordNum2(p->pPars->nLutSize); + if ( p->pMemEntries == NULL ) + p->pMemEntries = Mem_FixedStart( sizeof(If_Hte_t) + sizeof(word) * (If_CluWordNum2(p->pPars->nLutSize) - 1) ); + if ( p->pHashTable[t] == NULL ) + { + // decide how large should be the table + int nEntriesMax1 = 4 * If_CluPrimeCudd2( Vec_PtrSize(p->vObjs) * p->pPars->nCutsMax ); + int nEntriesMax2 = (int)(((double)CLU_MEM_MAX * (1 << 20)) / If_CluWordNum2(p->pPars->nLutSize) / 8); +// int nEntriesMax2 = 10000; + // create table + p->nTableSize[t] = If_CluPrimeCudd2( Abc_MinInt(nEntriesMax1, nEntriesMax2)/2 ); + p->pHashTable[t] = ABC_CALLOC( void *, p->nTableSize[t] ); + } + // check if this entry exists + HashKey = If_CluHashKey2( pTruth, nWords, p->nTableSize[t] ); + for ( pEntry = ((If_Hte_t **)p->pHashTable[t])[HashKey]; pEntry; pEntry = pEntry->pNext ) + if ( memcmp(pEntry->pTruth, pTruth, sizeof(word) * nWords) == 0 ) + { + pEntry->Counter++; + return &pEntry->Group; + } + // resize the hash table + if ( p->nTableEntries[t] >= 2 * p->nTableSize[t] ) + { + // collect useful entries + If_Hte_t * pPrev; + Vec_Ptr_t * vUseful = Vec_PtrAlloc( p->nTableEntries[t] ); + int i, Median = If_CluHashFindMedian2( p, t ); + for ( i = 0; i < p->nTableSize[t]; i++ ) + { + for ( pEntry = ((If_Hte_t **)p->pHashTable[t])[i]; pEntry; ) + { + if ( (int)pEntry->Counter > Median ) + { + Vec_PtrPush( vUseful, pEntry ); + pEntry = pEntry->pNext; + } + else + { + pPrev = pEntry->pNext; + Mem_FixedEntryRecycle( p->pMemEntries, (char *)pEntry ); + pEntry = pPrev; + } + } + } + // add useful entries + memset( p->pHashTable[t], 0, sizeof(void *) * p->nTableSize[t] ); + Vec_PtrForEachEntry( If_Hte_t *, vUseful, pEntry, i ) + { + HashKey = If_CluHashKey2( pEntry->pTruth, nWords, p->nTableSize[t] ); + pPrev = ((If_Hte_t **)p->pHashTable[t])[HashKey]; + if ( pPrev == NULL || pEntry->Counter >= pPrev->Counter ) + { + pEntry->pNext = pPrev; + ((If_Hte_t **)p->pHashTable[t])[HashKey] = pEntry; + } + else + { + while ( pPrev->pNext && pEntry->Counter < pPrev->pNext->Counter ) + pPrev = pPrev->pNext; + pEntry->pNext = pPrev->pNext; + pPrev->pNext = pEntry; + } + } + p->nTableEntries[t] = Vec_PtrSize( vUseful ); + Vec_PtrFree( vUseful ); + } + // create entry + p->nTableEntries[t]++; + pEntry = (If_Hte_t *)Mem_FixedEntryFetch( p->pMemEntries ); + memcpy( pEntry->pTruth, pTruth, sizeof(word) * nWords ); + pEntry->Group = CLU_UNUSED; + pEntry->Counter = 1; + // insert at the beginning +// pEntry->pNext = ((If_Hte_t **)p->pHashTable[t])[HashKey]; +// ((If_Hte_t **)p->pHashTable[t])[HashKey] = pEntry; + // insert at the end + pEntry->pNext = NULL; + for ( pPrev = ((If_Hte_t **)p->pHashTable[t])[HashKey]; pPrev && pPrev->pNext; pPrev = pPrev->pNext ); + if ( pPrev == NULL ) + ((If_Hte_t **)p->pHashTable[t])[HashKey] = pEntry; + else + pPrev->pNext = pEntry; + return &pEntry->Group; +} + +// returns if successful +int If_CluCheck66( If_Man_t * p, word * pTruth0, int nVars, int fHashing ) +{ + If_Grp_t G1 = {0}; + unsigned * pHashed = NULL; + + if ( p && fHashing ) + { + pHashed = If_CluHashLookup2( p, pTruth0, 0 ); + if ( pHashed && *pHashed != CLU_UNUSED ) + If_CluUns2Grp2( *pHashed, &G1 ); + } + + /* new entry */ + if ( G1.nVars == 0 ) + { + G1.nVars = acd66_evaluate( pTruth0, nVars, 0 ); + } + + if ( pHashed ) + *pHashed = If_CluGrp2Uns2( &G1 ); + + return G1.nVars; +} + /**Function************************************************************* Synopsis [Performs ACD into 66 cascade.] @@ -79,7 +327,7 @@ int If_CutPerformCheck66( If_Man_t * p, unsigned * pTruth0, int nVars, int nLeav return 1; // derive the decomposition - return (int)(acd66_evaluate( (word *)pTruth, nVars, 1 ) > 0 ); + return If_CluCheck66(p, (word*)pTruth, nVars, 1); } //////////////////////////////////////////////////////////////////////// From 3f80b202cd91b1966378333d6fccf07a7c94fd07 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 8 Feb 2024 14:57:42 +0100 Subject: [PATCH 03/19] C++11 compatible code --- src/map/if/acd/ac_decomposition.hpp | 35 ++++++++++--------- src/map/if/acd/acd66.hpp | 13 ++++---- src/map/if/acd/kitty_algorithm.hpp | 8 ++--- src/map/if/acd/kitty_dynamic_tt.hpp | 26 +++++++-------- src/map/if/acd/kitty_operations.hpp | 10 +++--- src/map/if/acd/kitty_operators.hpp | 23 ++++++++----- src/map/if/acd/kitty_static_tt.hpp | 52 ++++++++++++++--------------- 7 files changed, 88 insertions(+), 79 deletions(-) diff --git a/src/map/if/acd/ac_decomposition.hpp b/src/map/if/acd/ac_decomposition.hpp index 8aee0266d..d55941995 100644 --- a/src/map/if/acd/ac_decomposition.hpp +++ b/src/map/if/acd/ac_decomposition.hpp @@ -205,7 +205,8 @@ private: /* find a feasible AC decomposition */ for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i ) { - auto [tt_p, perm, multiplicity] = enumerate_iset_combinations_offset( i, offset, column_multiplicity_fn[i - 1] ); + auto ret_tuple = enumerate_iset_combinations_offset( i, offset, column_multiplicity_fn[i - 1] ); + uint32_t multiplicity = std::get<2>( ret_tuple ); /* additional cost if not support reducing */ uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0; @@ -213,8 +214,8 @@ private: /* check for feasible solution that improves the cost */ if ( multiplicity <= ( 1 << ( ps.lut_size - i ) ) && multiplicity + additional_cost < best_cost && multiplicity <= 16 ) { - best_tt = tt_p; - permutations = perm; + best_tt = std::get<0>( ret_tuple ); + permutations = std::get<1>( ret_tuple ); best_multiplicity = multiplicity; best_cost = multiplicity + additional_cost; best_free_set = i; @@ -240,7 +241,8 @@ private: for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i ) { - auto [tt_p, perm, multiplicity] = enumerate_iset_combinations_offset( i, 0, column_multiplicity_fn[i - 1] ); + auto ret_tuple = enumerate_iset_combinations_offset( i, 0, column_multiplicity_fn[i - 1] ); + uint32_t multiplicity = std::get<2>( ret_tuple ); /* additional cost if not support reducing */ uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0; @@ -248,8 +250,8 @@ private: /* check for feasible solution that improves the cost */ if ( multiplicity <= ( 1 << ( ps.lut_size - i ) ) && multiplicity + additional_cost < best_cost && multiplicity <= 16 ) { - best_tt = tt_p; - permutations = perm; + best_tt = std::get<0>( ret_tuple ); + permutations = std::get<1>( ret_tuple ); best_multiplicity = multiplicity; best_cost = multiplicity + additional_cost; best_free_set = i; @@ -298,7 +300,7 @@ private: uint64_t constexpr masks_idx[] = { 0x0, 0x0, 0x0, 0x3 }; /* supports up to 64 values of free set (256 for |FS| == 3)*/ - static_assert( free_set_size <= 3 ); + static_assert( free_set_size <= 3, "Wrong free set size for method used, expected le 3" ); /* extract iset functions */ auto it = std::begin( tt ); @@ -314,7 +316,7 @@ private: multiplicity = __builtin_popcountl( multiplicity_set[0] ); - if constexpr ( free_set_size == 3 ) + if ( free_set_size == 3 ) { multiplicity += __builtin_popcountl( multiplicity_set[1] ); multiplicity += __builtin_popcountl( multiplicity_set[2] ); @@ -330,7 +332,7 @@ private: uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF }; - static_assert( free_set_size == 5 || free_set_size == 4 ); + static_assert( free_set_size == 5 || free_set_size == 4, "Wrong free set size for method used, expected of 4 or 5" ); uint32_t size = 0; uint64_t prev = -1; @@ -466,7 +468,8 @@ private: { uint64_t val = *it & masks[best_free_set]; - if ( auto el = column_to_iset.find( val ); el != column_to_iset.end() ) + auto el = column_to_iset.find( val ); + if ( el != column_to_iset.end() ) { isets[el->second]._bits[i / ( 1u << best_free_set )] |= UINT64_C( 1 ) << ( j + offset ); } @@ -709,7 +712,7 @@ private: { if ( var == best_multiplicity ) { - if constexpr ( !enable_dcset ) + if ( !enable_dcset ) { /* sets must be equally populated */ if ( __builtin_popcount( onset ) != __builtin_popcount( offset ) ) @@ -725,7 +728,7 @@ private: } /* var in DCSET */ - if constexpr ( enable_dcset ) + if ( enable_dcset ) { generate_support_minimization_encodings_rec( onset, offset, var + 1, count ); } @@ -953,7 +956,7 @@ private: cost = 0; float sort_cost = 0; - if constexpr ( UseHeuristic ) + if ( UseHeuristic ) { sort_cost = 1.0f / ( __builtin_popcountl( column[0] ) + __builtin_popcountl( column[1] ) ); } @@ -971,15 +974,15 @@ private: return true; } - if constexpr ( UseHeuristic ) + if ( UseHeuristic ) { - std::sort( matrix.begin(), matrix.end(), [&]( auto const& a, auto const& b ) { + std::sort( matrix.begin(), matrix.end(), [&]( encoding_column const& a, encoding_column const& b ) { return a.cost < b.cost; } ); } else { - std::sort( matrix.begin(), matrix.end(), [&]( auto const& a, auto const& b ) { + std::sort( matrix.begin(), matrix.end(), [&]( encoding_column const& a, encoding_column const& b ) { return a.sort_cost < b.sort_cost; } ); } diff --git a/src/map/if/acd/acd66.hpp b/src/map/if/acd/acd66.hpp index 166402ca0..67d460157 100644 --- a/src/map/if/acd/acd66.hpp +++ b/src/map/if/acd/acd66.hpp @@ -156,13 +156,14 @@ private: /* find AC decompositions with minimal multiplicity */ for ( uint32_t i = num_vars - 6; i <= 5 && i <= ps.max_free_set_vars; ++i ) { - auto [tt_p, perm, multiplicity] = enumerate_iset_combinations( i, column_multiplicity_fn[i - 1] ); + auto ret_tuple = enumerate_iset_combinations( i, column_multiplicity_fn[i - 1] ); + uint32_t multiplicity = std::get<2>( ret_tuple ); /* check for feasible solution into "66" with one possible shared variable */ if ( multiplicity <= 2 || ( multiplicity <= 4 && i < 5 ) ) { - best_tt = tt_p; - permutations = perm; + best_tt = std::get<0>( ret_tuple ); + permutations = std::get<1>( ret_tuple ); best_multiplicity = multiplicity; best_free_set = i; @@ -197,7 +198,7 @@ private: uint64_t constexpr masks_idx[] = { 0x0, 0x0, 0x0, 0x3 }; /* supports up to 64 values of free set (256 for |FS| == 3)*/ - static_assert( free_set_size <= 3 ); + static_assert( free_set_size <= 3, "Wrong free set size for method used, expected le 3" ); /* extract iset functions */ auto it = std::begin( tt ); @@ -213,7 +214,7 @@ private: multiplicity = __builtin_popcountl( multiplicity_set[0] ); - if constexpr ( free_set_size == 3 ) + if ( free_set_size == 3 ) { multiplicity += __builtin_popcountl( multiplicity_set[1] ); multiplicity += __builtin_popcountl( multiplicity_set[2] ); @@ -229,7 +230,7 @@ private: uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF }; - static_assert( free_set_size == 5 || free_set_size == 4 ); + static_assert( free_set_size == 5 || free_set_size == 4, "Wrong free set size for method used, expected of 4 or 5" ); uint32_t size = 0; uint64_t prev = -1; diff --git a/src/map/if/acd/kitty_algorithm.hpp b/src/map/if/acd/kitty_algorithm.hpp index 78eead08a..a8c71dc07 100644 --- a/src/map/if/acd/kitty_algorithm.hpp +++ b/src/map/if/acd/kitty_algorithm.hpp @@ -22,9 +22,9 @@ namespace kitty \return new constructed truth table of same type and dimensions */ template -auto unary_operation( const TT& tt, Fn&& op ) +TT unary_operation( const TT& tt, Fn&& op ) { - auto result = tt.construct(); + TT result = tt.construct(); std::transform( tt.cbegin(), tt.cend(), result.begin(), op ); result.mask_bits(); return result; @@ -43,11 +43,11 @@ auto unary_operation( const TT& tt, Fn&& op ) \return new constructed truth table of same type and dimensions */ template -auto binary_operation( const TT& first, const TT& second, Fn&& op ) +TT binary_operation( const TT& first, const TT& second, Fn&& op ) { assert( first.num_vars() == second.num_vars() ); - auto result = first.construct(); + TT result = first.construct(); std::transform( first.cbegin(), first.cend(), second.cbegin(), result.begin(), op ); result.mask_bits(); return result; diff --git a/src/map/if/acd/kitty_dynamic_tt.hpp b/src/map/if/acd/kitty_dynamic_tt.hpp index 880943dfa..2913b6674 100644 --- a/src/map/if/acd/kitty_dynamic_tt.hpp +++ b/src/map/if/acd/kitty_dynamic_tt.hpp @@ -51,55 +51,55 @@ struct dynamic_truth_table /*! Returns number of variables. */ - inline auto num_vars() const noexcept { return _num_vars; } + inline uint32_t num_vars() const noexcept { return _num_vars; } /*! Returns number of blocks. */ - inline auto num_blocks() const noexcept { return _bits.size(); } + inline uint32_t num_blocks() const noexcept { return _bits.size(); } /*! Returns number of bits. */ - inline auto num_bits() const noexcept { return uint64_t( 1 ) << _num_vars; } + inline uint32_t num_bits() const noexcept { return uint64_t( 1 ) << _num_vars; } /*! \brief Begin iterator to bits. */ - inline auto begin() noexcept { return _bits.begin(); } + inline std::vector::iterator begin() noexcept { return _bits.begin(); } /*! \brief End iterator to bits. */ - inline auto end() noexcept { return _bits.end(); } + inline std::vector::iterator end() noexcept { return _bits.end(); } /*! \brief Begin iterator to bits. */ - inline auto begin() const noexcept { return _bits.begin(); } + inline std::vector::const_iterator begin() const noexcept { return _bits.begin(); } /*! \brief End iterator to bits. */ - inline auto end() const noexcept { return _bits.end(); } + inline std::vector::const_iterator end() const noexcept { return _bits.end(); } /*! \brief Reverse begin iterator to bits. */ - inline auto rbegin() noexcept { return _bits.rbegin(); } + inline std::vector::reverse_iterator rbegin() noexcept { return _bits.rbegin(); } /*! \brief Reverse end iterator to bits. */ - inline auto rend() noexcept { return _bits.rend(); } + inline std::vector::reverse_iterator rend() noexcept { return _bits.rend(); } /*! \brief Constant begin iterator to bits. */ - inline auto cbegin() const noexcept { return _bits.cbegin(); } + inline std::vector::const_iterator cbegin() const noexcept { return _bits.cbegin(); } /*! \brief Constant end iterator to bits. */ - inline auto cend() const noexcept { return _bits.cend(); } + inline std::vector::const_iterator cend() const noexcept { return _bits.cend(); } /*! \brief Constant reverse begin iterator to bits. */ - inline auto crbegin() const noexcept { return _bits.crbegin(); } + inline std::vector::const_reverse_iterator crbegin() const noexcept { return _bits.crbegin(); } /*! \brief Constant teverse end iterator to bits. */ - inline auto crend() const noexcept { return _bits.crend(); } + inline std::vector::const_reverse_iterator crend() const noexcept { return _bits.crend(); } /*! \brief Assign other truth table. diff --git a/src/map/if/acd/kitty_operations.hpp b/src/map/if/acd/kitty_operations.hpp index 48a4b7c67..e0292bc05 100644 --- a/src/map/if/acd/kitty_operations.hpp +++ b/src/map/if/acd/kitty_operations.hpp @@ -31,7 +31,7 @@ inline TT unary_not_if( const TT& tt, bool cond ) #ifdef _MSC_VER #pragma warning( pop ) #endif - return unary_operation( tt, [mask]( auto a ) + return unary_operation( tt, [mask]( uint64_t a ) { return a ^ mask; } ); } @@ -39,7 +39,7 @@ inline TT unary_not_if( const TT& tt, bool cond ) template inline TT unary_not( const TT& tt ) { - return unary_operation( tt, []( auto a ) + return unary_operation( tt, []( uint64_t a ) { return ~a; } ); } @@ -48,14 +48,14 @@ template inline TT binary_and( const TT& first, const TT& second ) { - return binary_operation( first, second, std::bit_and<>() ); + return binary_operation( first, second, std::bit_and() ); } /*! \brief Bitwise OR of two truth tables */ template inline TT binary_or( const TT& first, const TT& second ) { - return binary_operation( first, second, std::bit_or<>() ); + return binary_operation( first, second, std::bit_or() ); } /*! \brief Swaps two variables in a truth table @@ -330,7 +330,7 @@ void print_hex( const TT& tt, std::ostream& os = std::cout ) auto const chunk_size = std::min( tt.num_vars() <= 1 ? 1 : ( tt.num_bits() >> 2 ), 16 ); - for_each_block_reversed( tt, [&os, chunk_size]( auto word ) + for_each_block_reversed( tt, [&os, chunk_size]( uint64_t word ) { std::string chunk( chunk_size, '0' ); diff --git a/src/map/if/acd/kitty_operators.hpp b/src/map/if/acd/kitty_operators.hpp index b5f4688c2..7ccab7ca1 100644 --- a/src/map/if/acd/kitty_operators.hpp +++ b/src/map/if/acd/kitty_operators.hpp @@ -78,28 +78,33 @@ inline void operator|=( dynamic_truth_table& first, const dynamic_truth_table& s /*! \brief Operator for binary_or and assign */ template -inline void operator|=( static_truth_table& first, const static_truth_table& second ) +inline void operator|=( static_truth_table& first, const static_truth_table& second ) { // first = binary_or( first, second ); /* runtime improved version */ - if constexpr ( NumVars <= 6 ) - { - first._bits |= second._bits; - first.mask_bits(); - } - else if constexpr ( NumVars == 7 ) + first._bits |= second._bits; + first.mask_bits(); +} + +/*! \brief Operator for binary_or and assign */ +template +inline void operator|=( static_truth_table& first, const static_truth_table& second ) +{ + // first = binary_or( first, second ); + /* runtime improved version */ + if ( NumVars == 7 ) { first._bits[0] |= second._bits[0]; first._bits[1] |= second._bits[1]; } - else if constexpr ( NumVars == 8 ) + else if ( NumVars == 8 ) { first._bits[0] |= second._bits[0]; first._bits[1] |= second._bits[1]; first._bits[2] |= second._bits[2]; first._bits[3] |= second._bits[3]; } - else if constexpr ( NumVars == 9 ) + else if ( NumVars == 9 ) { first._bits[0] |= second._bits[0]; first._bits[1] |= second._bits[1]; diff --git a/src/map/if/acd/kitty_static_tt.hpp b/src/map/if/acd/kitty_static_tt.hpp index 2b1613a6a..5bb4bdac1 100644 --- a/src/map/if/acd/kitty_static_tt.hpp +++ b/src/map/if/acd/kitty_static_tt.hpp @@ -35,55 +35,55 @@ struct static_truth_table /*! Returns number of variables. */ - inline auto num_vars() const noexcept { return NumVars; } + inline uint32_t num_vars() const noexcept { return NumVars; } /*! Returns number of blocks. */ - inline auto num_blocks() const noexcept { return 1u; } + inline uint32_t num_blocks() const noexcept { return 1u; } /*! Returns number of bits. */ - inline auto num_bits() const noexcept { return NumBits; } + inline uint32_t num_bits() const noexcept { return NumBits; } /*! \brief Begin iterator to bits. */ - inline auto begin() noexcept { return &_bits; } + inline uint64_t * begin() noexcept { return &_bits; } /*! \brief End iterator to bits. */ - inline auto end() noexcept { return ( &_bits ) + 1; } + inline uint64_t * end() noexcept { return ( &_bits ) + 1; } /*! \brief Begin iterator to bits. */ - inline auto begin() const noexcept { return &_bits; } + inline const uint64_t * begin() const noexcept { return &_bits; } /*! \brief End iterator to bits. */ - inline auto end() const noexcept { return ( &_bits ) + 1; } + inline const uint64_t * end() const noexcept { return ( &_bits ) + 1; } /*! \brief Reverse begin iterator to bits. */ - inline auto rbegin() noexcept { return &_bits; } + inline uint64_t * rbegin() noexcept { return &_bits; } /*! \brief Reverse end iterator to bits. */ - inline auto rend() noexcept { return ( &_bits ) + 1; } + inline uint64_t * rend() noexcept { return ( &_bits ) + 1; } /*! \brief Constant begin iterator to bits. */ - inline auto cbegin() const noexcept { return &_bits; } + inline const uint64_t * cbegin() const noexcept { return &_bits; } /*! \brief Constant end iterator to bits. */ - inline auto cend() const noexcept { return ( &_bits ) + 1; } + inline const uint64_t * cend() const noexcept { return ( &_bits ) + 1; } /*! \brief Constant reverse begin iterator to bits. */ - inline auto crbegin() const noexcept { return &_bits; } + inline const uint64_t * crbegin() const noexcept { return &_bits; } /*! \brief Constant everse end iterator to bits. */ - inline auto crend() const noexcept { return ( &_bits ) + 1; } + inline const uint64_t * crend() const noexcept { return ( &_bits ) + 1; } /*! \brief Assign other truth table if number of variables match. @@ -154,55 +154,55 @@ struct static_truth_table /*! Returns number of variables. */ - inline auto num_vars() const noexcept { return NumVars; } + inline uint32_t num_vars() const noexcept { return NumVars; } /*! Returns number of blocks. */ - inline auto num_blocks() const noexcept { return NumBlocks; } + inline uint32_t num_blocks() const noexcept { return NumBlocks; } /*! Returns number of bits. */ - inline auto num_bits() const noexcept { return NumBits; } + inline uint32_t num_bits() const noexcept { return NumBits; } /*! \brief Begin iterator to bits. */ - inline auto begin() noexcept { return _bits.begin(); } + inline typename std::array::iterator begin() noexcept { return _bits.begin(); } /*! \brief End iterator to bits. */ - inline auto end() noexcept { return _bits.end(); } + inline typename std::array::iterator end() noexcept { return _bits.end(); } /*! \brief Begin iterator to bits. */ - inline auto begin() const noexcept { return _bits.begin(); } + inline typename std::array::const_iterator begin() const noexcept { return _bits.begin(); } /*! \brief End iterator to bits. */ - inline auto end() const noexcept { return _bits.end(); } + inline typename std::array::const_iterator end() const noexcept { return _bits.end(); } /*! \brief Reverse begin iterator to bits. */ - inline auto rbegin() noexcept { return _bits.rbegin(); } + inline typename std::array::reverse_iterator rbegin() noexcept { return _bits.rbegin(); } /*! \brief Reverse end iterator to bits. */ - inline auto rend() noexcept { return _bits.rend(); } + inline typename std::array::reverse_iterator rend() noexcept { return _bits.rend(); } /*! \brief Constant begin iterator to bits. */ - inline auto cbegin() const noexcept { return _bits.cbegin(); } + inline typename std::array::const_iterator cbegin() const noexcept { return _bits.cbegin(); } /*! \brief Constant end iterator to bits. */ - inline auto cend() const noexcept { return _bits.cend(); } + inline typename std::array::const_iterator cend() const noexcept { return _bits.cend(); } /*! \brief Constant reverse begin iterator to bits. */ - inline auto crbegin() const noexcept { return _bits.crbegin(); } + inline typename std::array::const_reverse_iterator crbegin() const noexcept { return _bits.crbegin(); } /*! \brief Constant teverse end iterator to bits. */ - inline auto crend() const noexcept { return _bits.crend(); } + inline typename std::array::const_reverse_iterator crend() const noexcept { return _bits.crend(); } /*! \brief Assign other truth table if number of variables match. From 9eb32f0766969b78dd34d3d5ef11e670a24d03e1 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 8 Feb 2024 15:11:58 +0100 Subject: [PATCH 04/19] Changing compilation flag for c++11 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0cc979b75..3c81251cf 100644 --- a/Makefile +++ b/Makefile @@ -151,7 +151,7 @@ ifdef ABC_USE_LIBSTDCXX endif $(info $(MSG_PREFIX)Using CFLAGS=$(CFLAGS)) -CXXFLAGS += $(CFLAGS) -std=c++17 +CXXFLAGS += $(CFLAGS) -std=c++11 SRC := GARBAGE := core core.* *.stackdump ./tags $(PROG) arch_flags From 17afd93c78f13b3f903b78292cb0f5163e8831f0 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 8 Feb 2024 15:36:09 +0100 Subject: [PATCH 05/19] Extending ACD to work up to 11 variables --- src/base/abci/abc.c | 4 +- src/map/if/acd/ac_decomposition.hpp | 70 ++++++++++++++++++++++++++--- 2 files changed, 67 insertions(+), 7 deletions(-) diff --git a/src/base/abci/abc.c b/src/base/abci/abc.c index 3c5d3cbb8..2f6d6d545 100644 --- a/src/base/abci/abc.c +++ b/src/base/abci/abc.c @@ -19839,9 +19839,9 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) Abc_Print( -1, "LUT size (%d) must be greater than the LUT decomposition size (%d).\n", pPars->nLutSize, pPars->nLutDecSize ); return 1; } - if ( pPars->nLutSize < 4 || pPars->nLutSize > 10 ) + if ( pPars->nLutSize < 4 || pPars->nLutSize > 11 ) { - Abc_Print( -1, "This feature only works for [4;10]-LUTs.\n" ); + Abc_Print( -1, "This feature only works for [4;11]-LUTs.\n" ); return 1; } } diff --git a/src/map/if/acd/ac_decomposition.hpp b/src/map/if/acd/ac_decomposition.hpp index d55941995..7791ba9d7 100644 --- a/src/map/if/acd/ac_decomposition.hpp +++ b/src/map/if/acd/ac_decomposition.hpp @@ -90,7 +90,7 @@ private: }; private: - static constexpr uint32_t max_num_vars = 10; + static constexpr uint32_t max_num_vars = 11; using STT = kitty::static_truth_table; public: @@ -287,7 +287,7 @@ private: best_tt._bits[i] = ptt[i]; } - local_extend_to( best_tt, num_vars ); + // local_extend_to( best_tt, num_vars ); } template @@ -382,7 +382,7 @@ private: uint32_t pos_new = pInvPerm[var_old + 1]; std::swap( pInvPerm[var_old + 1], pInvPerm[var_old] ); std::swap( pComb[i], pComb[pos_new] ); - kitty::swap_inplace( tt, i, pos_new ); + swap_inplace_local( tt, i, pos_new ); for ( uint32_t j = i + 1; j < k; j++ ) { @@ -390,7 +390,7 @@ private: pos_new = pInvPerm[pComb[j - 1] + 1]; std::swap( pInvPerm[pComb[j - 1] + 1], pInvPerm[var_old] ); std::swap( pComb[j], pComb[pos_new] ); - kitty::swap_inplace( tt, j, pos_new ); + swap_inplace_local( tt, j, pos_new ); } return true; @@ -653,7 +653,7 @@ private: } std::swap( permutations[i], permutations[k] ); - kitty::swap_inplace( best_tt, i, k ); + swap_inplace_local( best_tt, i, k ); ++k; } } @@ -1234,6 +1234,66 @@ private: return false; } + void swap_inplace_local( STT& tt, uint8_t var_index1, uint8_t var_index2 ) + { + if ( var_index1 == var_index2 ) + { + return; + } + + if ( var_index1 > var_index2 ) + { + std::swap( var_index1, var_index2 ); + } + + assert( num_vars > 6 ); + const uint32_t num_blocks = 1 << ( num_vars - 6 ); + + if ( var_index2 <= 5 ) + { + const auto& pmask = kitty::detail::ppermutation_masks[var_index1][var_index2]; + const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 ); + std::transform( std::begin( tt._bits ), std::begin( tt._bits ) + num_blocks, std::begin( tt._bits ), + [shift, &pmask]( uint64_t word ) { + return ( word & pmask[0] ) | ( ( word & pmask[1] ) << shift ) | ( ( word & pmask[2] ) >> shift ); + } ); + } + else if ( var_index1 <= 5 ) /* in this case, var_index2 > 5 */ + { + const auto step = 1 << ( var_index2 - 6 ); + const auto shift = 1 << var_index1; + auto it = std::begin( tt._bits ); + while ( it != std::begin( tt._bits ) + num_blocks ) + { + for ( auto i = decltype( step ){ 0 }; i < step; ++i ) + { + const auto low_to_high = ( *( it + i ) & kitty::detail::projections[var_index1] ) >> shift; + const auto high_to_low = ( *( it + i + step ) << shift ) & kitty::detail::projections[var_index1]; + *( it + i ) = ( *( it + i ) & ~kitty::detail::projections[var_index1] ) | high_to_low; + *( it + i + step ) = ( *( it + i + step ) & kitty::detail::projections[var_index1] ) | low_to_high; + } + it += 2 * step; + } + } + else + { + const auto step1 = 1 << ( var_index1 - 6 ); + const auto step2 = 1 << ( var_index2 - 6 ); + auto it = std::begin( tt._bits ); + while ( it != std::begin( tt._bits ) + num_blocks ) + { + for ( auto i = 0; i < step2; i += 2 * step1 ) + { + for ( auto j = 0; j < step1; ++j ) + { + std::swap( *( it + i + j + step1 ), *( it + i + j + step2 ) ); + } + } + it += 2 * step2; + } + } + } + /* Decomposition format for ABC * * The record is an array of unsigned chars where: From 7b74810047cd34ff4e0a37a984bc4eb60ae3412a Mon Sep 17 00:00:00 2001 From: aletempiac Date: Fri, 16 Feb 2024 16:43:24 +0100 Subject: [PATCH 06/19] Changing policy of finding ACD 66 decomposition (faster and 100 percent coverage) --- src/map/if/acd/acd66.hpp | 77 ++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 42 deletions(-) diff --git a/src/map/if/acd/acd66.hpp b/src/map/if/acd/acd66.hpp index 67d460157..5f925387e 100644 --- a/src/map/if/acd/acd66.hpp +++ b/src/map/if/acd/acd66.hpp @@ -156,20 +156,8 @@ private: /* find AC decompositions with minimal multiplicity */ for ( uint32_t i = num_vars - 6; i <= 5 && i <= ps.max_free_set_vars; ++i ) { - auto ret_tuple = enumerate_iset_combinations( i, column_multiplicity_fn[i - 1] ); - uint32_t multiplicity = std::get<2>( ret_tuple ); - - /* check for feasible solution into "66" with one possible shared variable */ - if ( multiplicity <= 2 || ( multiplicity <= 4 && i < 5 ) ) - { - best_tt = std::get<0>( ret_tuple ); - permutations = std::get<1>( ret_tuple ); - best_multiplicity = multiplicity; - best_free_set = i; - - if ( multiplicity <= 2 || check_shared_set() ) - return true; - } + if ( find_decomposition_bs( i, column_multiplicity_fn[i - 1] ) ) + return true; } best_multiplicity = UINT32_MAX; @@ -295,46 +283,57 @@ private: } template - std::tuple, uint32_t> enumerate_iset_combinations( uint32_t free_set_size, Fn&& fn ) + bool find_decomposition_bs( uint32_t free_set_size, Fn&& fn ) { STT tt = start_tt; - /* TT with best cost */ - STT best_tt = tt; - uint32_t best_cost = UINT32_MAX; - /* works up to 16 input truth tables */ assert( num_vars <= 16 ); /* init combinations */ - uint32_t pComb[16], pInvPerm[16], bestPerm[16]; + uint32_t pComb[16], pInvPerm[16]; for ( uint32_t i = 0; i < num_vars; ++i ) { pComb[i] = pInvPerm[i] = i; } /* enumerate combinations */ + best_free_set = free_set_size; do { uint32_t cost = fn( tt ); - if ( cost < best_cost ) + if ( cost == 2 ) { best_tt = tt; - best_cost = cost; + best_multiplicity = cost; for ( uint32_t i = 0; i < num_vars; ++i ) { - bestPerm[i] = pComb[i]; + permutations[i] = pComb[i]; + } + return true; + } + else if ( cost <= 4 && free_set_size < 5 ) + { + /* look for a shared variable */ + best_multiplicity = cost; + int res = check_shared_set2( tt ); + + if ( res > 0 ) + { + best_tt = tt; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pComb[i]; + } + /* move shared variable as the most significative one */ + swap_inplace_local( best_tt, res, num_vars - 1 ); + std::swap( permutations[res], permutations[num_vars - 1] ); + return true; } } } while ( combinations_next( free_set_size, pComb, pInvPerm, tt ) ); - std::array res_perm; - for ( uint32_t i = 0; i < num_vars; ++i ) - { - res_perm[i] = bestPerm[i]; - } - - return std::make_tuple( best_tt, res_perm, best_cost ); + return false; } bool check_shared_var( STT tt, uint32_t free_set_size, uint32_t shared_var, uint32_t multiplicity_limit ) @@ -448,7 +447,7 @@ private: return multiplicity <= multiplicity_limit; } - bool check_shared_set() + int check_shared_set2( STT const& tt ) { /* find one shared set variable */ for ( uint32_t i = best_free_set; i < num_vars; ++i ) @@ -456,27 +455,21 @@ private: /* check the multiplicity of cofactors */ if ( best_free_set < 4 ) { - if ( check_shared_var( best_tt, best_free_set, i, 2 ) ) + if ( check_shared_var( tt, best_free_set, i, 2 ) ) { - /* move shared variable as the most significative one */ - swap_inplace_local( best_tt, i, num_vars - 1 ); - std::swap( permutations[i], permutations[num_vars - 1] ); - return true; + return i; } } else { - if ( check_shared_var5( best_tt, best_free_set, i, 2 ) ) + if ( check_shared_var5( tt, best_free_set, i, 2 ) ) { - /* move shared variable as the most significative one */ - swap_inplace_local( best_tt, i, num_vars - 1 ); - std::swap( permutations[i], permutations[num_vars - 1] ); - return true; + return i; } } } - return false; + return -1; } void compute_decomposition_impl( bool verbose = false ) From 0e471e3ff8ca350fe550af6fb1c92a316db53378 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Tue, 20 Feb 2024 14:41:52 +0100 Subject: [PATCH 07/19] Performance improvements of ACD 66 --- src/map/if/acd/acd66.hpp | 225 ++++++++------------------------------- 1 file changed, 47 insertions(+), 178 deletions(-) diff --git a/src/map/if/acd/acd66.hpp b/src/map/if/acd/acd66.hpp index 5f925387e..9377db5ec 100644 --- a/src/map/if/acd/acd66.hpp +++ b/src/map/if/acd/acd66.hpp @@ -145,18 +145,10 @@ private: best_multiplicity = UINT32_MAX; best_free_set = UINT32_MAX; - /* array of functions to compute the column multiplicity */ - std::function column_multiplicity_fn[5] = { - [this]( STT const& tt ) { return column_multiplicity<1u>( tt ); }, - [this]( STT const& tt ) { return column_multiplicity<2u>( tt ); }, - [this]( STT const& tt ) { return column_multiplicity<3u>( tt ); }, - [this]( STT const& tt ) { return column_multiplicity5<4u>( tt ); }, - [this]( STT const& tt ) { return column_multiplicity5<5u>( tt ); } }; - /* find AC decompositions with minimal multiplicity */ for ( uint32_t i = num_vars - 6; i <= 5 && i <= ps.max_free_set_vars; ++i ) { - if ( find_decomposition_bs( i, column_multiplicity_fn[i - 1] ) ) + if ( find_decomposition_bs( i ) ) return true; } @@ -176,81 +168,40 @@ private: local_extend_to( start_tt, num_vars ); } - template - uint32_t column_multiplicity( STT tt ) + uint32_t column_multiplicity( STT const& tt, uint32_t free_set_size ) { - uint64_t multiplicity_set[4] = { 0u, 0u, 0u, 0u }; - uint32_t multiplicity = 0; + assert( free_set_size <= 5 ); + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; - uint64_t constexpr masks_bits[] = { 0x0, 0x3, 0xF, 0x3F }; - uint64_t constexpr masks_idx[] = { 0x0, 0x0, 0x0, 0x3 }; - - /* supports up to 64 values of free set (256 for |FS| == 3)*/ - static_assert( free_set_size <= 3, "Wrong free set size for method used, expected le 3" ); - - /* extract iset functions */ - auto it = std::begin( tt ); - for ( auto i = 0u; i < num_blocks; ++i ) - { - for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) - { - multiplicity_set[( *it >> 6 ) & masks_idx[free_set_size]] |= UINT64_C( 1 ) << ( *it & masks_bits[free_set_size] ); - *it >>= ( 1u << free_set_size ); - } - ++it; - } - - multiplicity = __builtin_popcountl( multiplicity_set[0] ); - - if ( free_set_size == 3 ) - { - multiplicity += __builtin_popcountl( multiplicity_set[1] ); - multiplicity += __builtin_popcountl( multiplicity_set[2] ); - multiplicity += __builtin_popcountl( multiplicity_set[3] ); - } - - return multiplicity; - } - - template - uint32_t column_multiplicity5( STT tt ) - { - uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; - uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF }; - - static_assert( free_set_size == 5 || free_set_size == 4, "Wrong free set size for method used, expected of 4 or 5" ); - + uint64_t shift = UINT64_C( 1 ) << free_set_size; + uint64_t mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t cofactors[4]; uint32_t size = 0; - uint64_t prev = -1; - std::array multiplicity_set; /* extract iset functions */ auto it = std::begin( tt ); for ( auto i = 0u; i < num_blocks; ++i ) { + uint64_t sub = *it; for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) { - uint32_t fs_fn = static_cast( *it & masks[free_set_size] ); - if ( fs_fn != prev ) + uint32_t fs_fn = static_cast( sub & mask ); + uint32_t k; + for ( k = 0; k < size; ++k ) { - multiplicity_set[size++] = fs_fn; - prev = fs_fn; + if ( fs_fn == cofactors[k] ) + break; } - *it >>= ( 1u << free_set_size ); + if ( k == 4 ) + return 5; + if ( k == size ) + cofactors[size++] = fs_fn; + sub >>= shift; } ++it; } - std::sort( multiplicity_set.begin(), multiplicity_set.begin() + size ); - - /* count unique */ - uint32_t multiplicity = 1; - for ( auto i = 1u; i < size; ++i ) - { - multiplicity += multiplicity_set[i] != multiplicity_set[i - 1] ? 1 : 0; - } - - return multiplicity; + return size; } inline bool combinations_next( uint32_t k, uint32_t* pComb, uint32_t* pInvPerm, STT& tt ) @@ -282,8 +233,7 @@ private: return true; } - template - bool find_decomposition_bs( uint32_t free_set_size, Fn&& fn ) + bool find_decomposition_bs( uint32_t free_set_size ) { STT tt = start_tt; @@ -301,7 +251,7 @@ private: best_free_set = free_set_size; do { - uint32_t cost = fn( tt ); + uint32_t cost = column_multiplicity( tt, free_set_size ); if ( cost == 2 ) { best_tt = tt; @@ -316,7 +266,7 @@ private: { /* look for a shared variable */ best_multiplicity = cost; - int res = check_shared_set2( tt ); + int res = check_shared_set( tt ); if ( res > 0 ) { @@ -336,136 +286,55 @@ private: return false; } - bool check_shared_var( STT tt, uint32_t free_set_size, uint32_t shared_var, uint32_t multiplicity_limit ) + bool check_shared_var( STT const& tt, uint32_t free_set_size, uint32_t shared_var ) { - uint64_t multiplicity_set[2][4] = { { 0u, 0u, 0u, 0u }, { 0u, 0u, 0u, 0u } }; - uint32_t multiplicity0 = 0, multiplicity1 = 0; + assert( free_set_size <= 5 ); + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; - uint64_t constexpr masks_bits[] = { 0x0, 0x3, 0xF, 0x3F }; - uint64_t constexpr masks_idx[] = { 0x0, 0x0, 0x0, 0x3 }; - - /* supports up to 64 values of free set (256 for |FS| == 3)*/ - assert( free_set_size <= 3 ); - + uint64_t shift = UINT64_C( 1 ) << free_set_size; + uint64_t mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t cofactors[2][4]; + uint32_t size[2] = { 0, 0 }; uint32_t shared_var_shift = shared_var - free_set_size; /* extract iset functions */ - uint64_t iteration_counter = 0; + uint32_t iteration_counter = 0; auto it = std::begin( tt ); for ( auto i = 0u; i < num_blocks; ++i ) { + uint64_t sub = *it; for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) { - multiplicity_set[( iteration_counter >> shared_var_shift ) & 1][( *it >> 6 ) & masks_idx[free_set_size]] |= UINT64_C( 1 ) << ( *it & masks_bits[free_set_size] ); - *it >>= ( 1u << free_set_size ); + uint32_t fs_fn = static_cast( sub & mask ); + uint32_t p = ( iteration_counter >> shared_var_shift ) & 1; + uint32_t k; + for ( k = 0; k < size[p]; ++k ) + { + if ( fs_fn == cofactors[p][k] ) + break; + } + if ( k == 2 ) + return false; + if ( k == size[p] ) + cofactors[p][size[p]++] = fs_fn; + sub >>= shift; ++iteration_counter; } ++it; } - multiplicity0 = __builtin_popcountl( multiplicity_set[0][0] ); - multiplicity1 = __builtin_popcountl( multiplicity_set[1][0] ); - - if ( free_set_size == 3 ) - { - multiplicity0 += __builtin_popcountl( multiplicity_set[0][1] ); - multiplicity0 += __builtin_popcountl( multiplicity_set[0][2] ); - multiplicity0 += __builtin_popcountl( multiplicity_set[0][3] ); - - multiplicity1 += __builtin_popcountl( multiplicity_set[1][1] ); - multiplicity1 += __builtin_popcountl( multiplicity_set[1][2] ); - multiplicity1 += __builtin_popcountl( multiplicity_set[1][3] ); - } - - if ( multiplicity0 > multiplicity_limit || multiplicity1 > multiplicity_limit ) - return false; - - best_multiplicity0 = multiplicity0; - best_multiplicity1 = multiplicity1; - return true; } - bool check_shared_var5( STT tt, uint32_t free_set_size, uint32_t shared_var, uint32_t multiplicity_limit ) - { - uint32_t const num_blocks = 1u << ( num_vars - 6 ); - uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF }; - - assert( free_set_size == 5 || free_set_size == 4 ); - - uint32_t size[2] = { 0, 0 }; - uint64_t prev[2] = { UINT64_MAX, UINT64_MAX }; - std::array multiplicity_set[2]; - - uint32_t shared_var_shift = shared_var - free_set_size; - - /* extract iset functions */ - uint64_t iteration_counter = 0; - auto it = std::begin( tt ); - for ( auto i = 0u; i < num_blocks; ++i ) - { - for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) - { - uint32_t fs_fn = static_cast( *it & masks[free_set_size] ); - uint32_t cofactor = ( iteration_counter >> shared_var_shift ) & 1; - if ( fs_fn != prev[cofactor] ) - { - multiplicity_set[cofactor][size[cofactor]++] = fs_fn; - prev[cofactor] = fs_fn; - } - *it >>= ( 1u << free_set_size ); - ++iteration_counter; - } - ++it; - } - - std::sort( multiplicity_set[0].begin(), multiplicity_set[0].begin() + size[0] ); - - /* count unique in 0 cofactor */ - uint32_t multiplicity = 1; - for ( auto i = 1u; i < size[0]; ++i ) - { - multiplicity += multiplicity_set[0][i] != multiplicity_set[0][i - 1] ? 1 : 0; - } - - if ( multiplicity > multiplicity_limit ) - return false; - - best_multiplicity0 = multiplicity; - - std::sort( multiplicity_set[1].begin(), multiplicity_set[1].begin() + size[1] ); - - /* count unique in 1 cofactor */ - multiplicity = 1; - for ( auto i = 1u; i < size[1]; ++i ) - { - multiplicity += multiplicity_set[1][i] != multiplicity_set[1][i - 1] ? 1 : 0; - } - - best_multiplicity1 = multiplicity; - - return multiplicity <= multiplicity_limit; - } - - int check_shared_set2( STT const& tt ) + inline int check_shared_set( STT const& tt ) { /* find one shared set variable */ for ( uint32_t i = best_free_set; i < num_vars; ++i ) { /* check the multiplicity of cofactors */ - if ( best_free_set < 4 ) + if ( check_shared_var( tt, best_free_set, i ) ) { - if ( check_shared_var( tt, best_free_set, i, 2 ) ) - { - return i; - } - } - else - { - if ( check_shared_var5( tt, best_free_set, i, 2 ) ) - { - return i; - } + return i; } } From 0cd548f1cb771c195004157290e700d673d41a10 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Tue, 20 Feb 2024 17:28:50 +0100 Subject: [PATCH 08/19] Performance improvements to ACD --- src/map/if/acd/ac_decomposition.hpp | 35 ++++++++++++++++------------- src/map/if/acd/acd66.hpp | 8 ++----- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/src/map/if/acd/ac_decomposition.hpp b/src/map/if/acd/ac_decomposition.hpp index 7791ba9d7..cc50eb8fd 100644 --- a/src/map/if/acd/ac_decomposition.hpp +++ b/src/map/if/acd/ac_decomposition.hpp @@ -203,9 +203,10 @@ private: [this]( STT const& tt ) { return column_multiplicity5<5u>( tt ); } }; /* find a feasible AC decomposition */ + // for ( uint32_t i = std::min( ps.lut_size - 1, ps.max_free_set_vars); i >= start; --i ) for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i ) { - auto ret_tuple = enumerate_iset_combinations_offset( i, offset, column_multiplicity_fn[i - 1] ); + auto ret_tuple = enumerate_iset_combinations( i, offset, column_multiplicity_fn[i - 1] ); uint32_t multiplicity = std::get<2>( ret_tuple ); /* additional cost if not support reducing */ @@ -231,7 +232,7 @@ private: return false; /* try without the delay profile */ - if ( best_multiplicity == UINT32_MAX && ps.try_no_late_arrival ) + if ( best_multiplicity == UINT32_MAX ) { delay_profile = 0; if ( ps.support_reducing_only ) @@ -241,7 +242,7 @@ private: for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i ) { - auto ret_tuple = enumerate_iset_combinations_offset( i, 0, column_multiplicity_fn[i - 1] ); + auto ret_tuple = enumerate_iset_combinations( i, 0, column_multiplicity_fn[i - 1] ); uint32_t multiplicity = std::get<2>( ret_tuple ); /* additional cost if not support reducing */ @@ -291,7 +292,7 @@ private: } template - uint32_t column_multiplicity( STT tt ) + uint32_t column_multiplicity( STT const& tt ) { uint64_t multiplicity_set[4] = { 0u, 0u, 0u, 0u }; uint32_t multiplicity = 0; @@ -303,15 +304,14 @@ private: static_assert( free_set_size <= 3, "Wrong free set size for method used, expected le 3" ); /* extract iset functions */ - auto it = std::begin( tt ); for ( auto i = 0u; i < num_blocks; ++i ) { + uint64_t cof = tt._bits[i]; for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) { - multiplicity_set[( *it >> 6 ) & masks_idx[free_set_size]] |= UINT64_C( 1 ) << ( *it & masks_bits[free_set_size] ); - *it >>= ( 1u << free_set_size ); + multiplicity_set[( cof >> 6 ) & masks_idx[free_set_size]] |= UINT64_C( 1 ) << ( cof & masks_bits[free_set_size] ); + cof >>= ( 1u << free_set_size ); } - ++it; } multiplicity = __builtin_popcountl( multiplicity_set[0] ); @@ -327,7 +327,7 @@ private: } template - uint32_t column_multiplicity5( STT tt ) + uint32_t column_multiplicity5( STT const& tt ) { uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF }; @@ -339,20 +339,19 @@ private: std::array multiplicity_set; /* extract iset functions */ - auto it = std::begin( tt ); for ( auto i = 0u; i < num_blocks; ++i ) { + uint64_t cof = tt._bits[i]; for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) { - uint64_t fs_fn = *it & masks[free_set_size]; + uint64_t fs_fn = cof & masks[free_set_size]; if ( fs_fn != prev ) { multiplicity_set[size++] = static_cast( fs_fn ); prev = fs_fn; } - *it >>= ( 1u << free_set_size ); + cof >>= ( 1u << free_set_size ); } - ++it; } std::sort( multiplicity_set.begin(), multiplicity_set.begin() + size ); @@ -397,13 +396,13 @@ private: } template - std::tuple, uint32_t> enumerate_iset_combinations_offset( uint32_t free_set_size, uint32_t offset, Fn&& fn ) + std::tuple, uint32_t> enumerate_iset_combinations( uint32_t free_set_size, uint32_t offset, Fn&& fn ) { STT tt = best_tt; /* TT with best cost */ STT best_tt = tt; - uint32_t best_cost = UINT32_MAX; + uint32_t best_cost = ( 1 << ( ps.lut_size - free_set_size ) ) + 1; assert( free_set_size >= offset ); @@ -440,6 +439,12 @@ private: } while ( combinations_offset_next( free_set_size, offset, pComb, pInvPerm, tt ) ); std::array res_perm; + + if ( best_cost > ( 1 << ( ps.lut_size - free_set_size ) ) ) + { + return std::make_tuple( best_tt, res_perm, UINT32_MAX ); + } + for ( uint32_t i = 0; i < num_vars; ++i ) { res_perm[i] = permutations[bestPerm[i]]; diff --git a/src/map/if/acd/acd66.hpp b/src/map/if/acd/acd66.hpp index 9377db5ec..976c1e353 100644 --- a/src/map/if/acd/acd66.hpp +++ b/src/map/if/acd/acd66.hpp @@ -179,10 +179,9 @@ private: uint32_t size = 0; /* extract iset functions */ - auto it = std::begin( tt ); for ( auto i = 0u; i < num_blocks; ++i ) { - uint64_t sub = *it; + uint64_t sub = tt._bits[i]; for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) { uint32_t fs_fn = static_cast( sub & mask ); @@ -198,7 +197,6 @@ private: cofactors[size++] = fs_fn; sub >>= shift; } - ++it; } return size; @@ -299,10 +297,9 @@ private: /* extract iset functions */ uint32_t iteration_counter = 0; - auto it = std::begin( tt ); for ( auto i = 0u; i < num_blocks; ++i ) { - uint64_t sub = *it; + uint64_t sub = tt._bits[i]; for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) { uint32_t fs_fn = static_cast( sub & mask ); @@ -320,7 +317,6 @@ private: sub >>= shift; ++iteration_counter; } - ++it; } return true; From 13fd0d55c7b7e54cf0a7fc70ade6ffeae7397d5b Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 21 Feb 2024 09:47:16 +0100 Subject: [PATCH 09/19] Removing unnecessary structs --- src/map/if/acd/ac_wrapper.cpp | 7 ++---- src/map/if/acd/acd66.hpp | 42 ++++++++++++----------------------- 2 files changed, 16 insertions(+), 33 deletions(-) diff --git a/src/map/if/acd/ac_wrapper.cpp b/src/map/if/acd/ac_wrapper.cpp index 441cb7008..f2e2f1015 100644 --- a/src/map/if/acd/ac_wrapper.cpp +++ b/src/map/if/acd/ac_wrapper.cpp @@ -74,9 +74,7 @@ int acd66_evaluate( word * pTruth, unsigned nVars, int compute_decomposition ) { using namespace acd; - acd66_params ps; - ps.verify = false; - acd66_impl acd( nVars, ps ); + acd66_impl acd( nVars, false ); if ( acd.run( pTruth ) == 0 ) return 0; @@ -97,8 +95,7 @@ int acd66_decompose( word * pTruth, unsigned nVars, unsigned char *decomposition { using namespace acd; - acd66_params ps; - acd66_impl acd( nVars, ps ); + acd66_impl acd( nVars, false ); acd.run( pTruth ); int val = acd.compute_decomposition(); diff --git a/src/map/if/acd/acd66.hpp b/src/map/if/acd/acd66.hpp index 976c1e353..7c8c7a821 100644 --- a/src/map/if/acd/acd66.hpp +++ b/src/map/if/acd/acd66.hpp @@ -44,25 +44,6 @@ ABC_NAMESPACE_CXX_HEADER_START namespace acd { -/*! \brief Parameters for acd66 */ -struct acd66_params -{ - /*! \brief Maximum size of the free set (1 < num < 6). */ - uint32_t max_free_set_vars{ 5 }; - - /*! \brief Number of configurations to test for decomposition. */ - uint32_t max_evaluations{ 3 }; - - /*! \brief Run verification before returning. */ - bool verify{ false }; -}; - -/*! \brief Statistics for acd66 */ -struct acd66_stats -{ - uint32_t num_edges{ 0 }; -}; - class acd66_impl { private: @@ -71,8 +52,8 @@ private: using LTT = kitty::static_truth_table<6>; public: - explicit acd66_impl( uint32_t num_vars, acd66_params const& ps, acd66_stats* pst = nullptr ) - : num_vars( num_vars ), ps( ps ), pst( pst ) + explicit acd66_impl( uint32_t num_vars, bool verify = false ) + : num_vars( num_vars ), verify( verify ) { std::iota( permutations.begin(), permutations.end(), 0 ); } @@ -102,17 +83,23 @@ public: compute_decomposition_impl(); - if ( ps.verify && !verify_impl() ) + if ( verify && !verify_impl() ) { return 1; } - if ( pst ) + return 0; + } + + uint32_t get_num_edges() + { + if ( bs_support_size == UINT32_MAX ) { - pst->num_edges = bs_support_size + best_free_set + 1 + ( best_multiplicity > 2 ? 1 : 0 ); + return num_vars + 1 + ( best_multiplicity > 2 ? 1 : 0 ); } - return 0; + /* real value after support minimization */ + return bs_support_size + best_free_set + 1 + ( best_multiplicity > 2 ? 1 : 0 ); } /* contains a 1 for BS variables */ @@ -146,7 +133,7 @@ private: best_free_set = UINT32_MAX; /* find AC decompositions with minimal multiplicity */ - for ( uint32_t i = num_vars - 6; i <= 5 && i <= ps.max_free_set_vars; ++i ) + for ( uint32_t i = num_vars - 6; i <= 5; ++i ) { if ( find_decomposition_bs( i ) ) return true; @@ -940,8 +927,7 @@ private: uint32_t bs_support[6]; uint32_t num_vars; - acd66_params const& ps; - acd66_stats* pst; + bool verify; std::array permutations; }; From eba56b088fe3c4b80e858cd7cc5d75193f4a0fb2 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 21 Feb 2024 17:13:29 +0100 Subject: [PATCH 10/19] Cleaning code and performance improvements --- src/map/if/acd/ac_decomposition.hpp | 2 +- src/map/if/acd/acd66.hpp | 126 +++++----------------------- 2 files changed, 20 insertions(+), 108 deletions(-) diff --git a/src/map/if/acd/ac_decomposition.hpp b/src/map/if/acd/ac_decomposition.hpp index cc50eb8fd..8e1fc31b5 100644 --- a/src/map/if/acd/ac_decomposition.hpp +++ b/src/map/if/acd/ac_decomposition.hpp @@ -487,7 +487,7 @@ private: *it >>= ( 1u << best_free_set ); } - offset = ( offset + ( 64 >> best_free_set ) ) % 64; + offset = ( offset + ( 64 >> best_free_set ) ) & 0x3F; ++it; } diff --git a/src/map/if/acd/acd66.hpp b/src/map/if/acd/acd66.hpp index 7c8c7a821..5645f9341 100644 --- a/src/map/if/acd/acd66.hpp +++ b/src/map/if/acd/acd66.hpp @@ -66,7 +66,7 @@ public: /* truth table is too large for the settings */ if ( num_vars > max_num_vars || num_vars > 11 ) { - return -1; + return false; } /* convert to static TT */ @@ -132,7 +132,7 @@ private: best_multiplicity = UINT32_MAX; best_free_set = UINT32_MAX; - /* find AC decompositions with minimal multiplicity */ + /* find ACD "66" for different number of variables in the free set */ for ( uint32_t i = num_vars - 6; i <= 5; ++i ) { if ( find_decomposition_bs( i ) ) @@ -160,8 +160,8 @@ private: assert( free_set_size <= 5 ); uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; - uint64_t shift = UINT64_C( 1 ) << free_set_size; - uint64_t mask = ( UINT64_C( 1 ) << shift ) - 1; + uint64_t const shift = UINT64_C( 1 ) << free_set_size; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; uint32_t cofactors[4]; uint32_t size = 0; @@ -276,8 +276,8 @@ private: assert( free_set_size <= 5 ); uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; - uint64_t shift = UINT64_C( 1 ) << free_set_size; - uint64_t mask = ( UINT64_C( 1 ) << shift ) - 1; + uint64_t const shift = UINT64_C( 1 ) << free_set_size; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; uint32_t cofactors[2][4]; uint32_t size[2] = { 0, 0 }; uint32_t shared_var_shift = shared_var - free_set_size; @@ -333,24 +333,23 @@ private: LTT isets1[2]; /* construct isets */ - STT tt = best_tt; uint32_t offset = 0; uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; - uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF }; + uint64_t const shift = UINT64_C( 1 ) << best_free_set; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; /* limit analysis on 0 cofactor of the shared variable */ if ( has_shared_set ) num_blocks >>= 1; - auto it = std::begin( tt ); - uint64_t fs_fun[4] = { *it & masks[best_free_set], 0, 0, 0 }; + uint64_t fs_fun[4] = { best_tt._bits[0] & mask, 0, 0, 0 }; for ( auto i = 0u; i < num_blocks; ++i ) { + uint64_t cof = best_tt._bits[i]; for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) { - uint64_t val = *it & masks[best_free_set]; - + uint64_t val = cof & mask; if ( val == fs_fun[0] ) { isets0[0]._bits |= UINT64_C( 1 ) << ( j + offset ); @@ -360,24 +359,21 @@ private: isets0[1]._bits |= UINT64_C( 1 ) << ( j + offset ); fs_fun[1] = val; } - - *it >>= ( 1u << best_free_set ); + cof >>= shift; } - - offset = ( offset + ( 64 >> best_free_set ) ) % 64; - ++it; + offset = ( offset + ( 64 >> best_free_set ) ) & 0x3F; } /* continue on the 1 cofactor if shared set */ if ( has_shared_set ) { - fs_fun[2] = *it & masks[best_free_set]; + fs_fun[2] = best_tt._bits[num_blocks] & mask; for ( auto i = num_blocks; i < ( num_blocks << 1 ); ++i ) { + uint64_t cof = best_tt._bits[i]; for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) { - uint64_t val = *it & masks[best_free_set]; - + uint64_t val = cof & mask; if ( val == fs_fun[2] ) { isets1[0]._bits |= UINT64_C( 1 ) << ( j + offset ); @@ -387,12 +383,9 @@ private: isets1[1]._bits |= UINT64_C( 1 ) << ( j + offset ); fs_fun[3] = val; } - - *it >>= ( 1u << best_free_set ); + cof >>= shift; } - - offset = ( offset + ( 64 >> best_free_set ) ) % 64; - ++it; + offset = ( offset + ( 64 >> best_free_set ) ) & 0x3F; } } @@ -445,7 +438,6 @@ private: /* u = 3 one set has multiplicity 1, use don't cares */ compute_functions3( isets0, isets1, fs_fun ); - compute_composition( fs_fun ); } inline void compute_functions4( LTT isets0[2], LTT isets1[2], uint64_t fs_fun[4] ) @@ -488,7 +480,6 @@ private: { if ( !has_var6( f, care, i ) ) { - adjust_truth_table_on_dc( f, care, i ); continue; } @@ -655,85 +646,6 @@ private: return false; } - bool has_var_support( const STT& tt, const STT& care, uint32_t real_num_vars, uint8_t var_index ) - { - assert( var_index < real_num_vars ); - assert( real_num_vars <= tt.num_vars() ); - assert( tt.num_vars() == care.num_vars() ); - - const uint32_t num_blocks = real_num_vars <= 6 ? 1 : ( 1 << ( real_num_vars - 6 ) ); - if ( real_num_vars <= 6 || var_index < 6 ) - { - auto it_tt = std::begin( tt._bits ); - auto it_care = std::begin( care._bits ); - while ( it_tt != std::begin( tt._bits ) + num_blocks ) - { - if ( ( ( ( *it_tt >> ( uint64_t( 1 ) << var_index ) ) ^ *it_tt ) & kitty::detail::projections_neg[var_index] & ( *it_care >> ( uint64_t( 1 ) << var_index ) ) & *it_care ) != 0 ) - { - return true; - } - ++it_tt; - ++it_care; - } - - return false; - } - - const auto step = 1 << ( var_index - 6 ); - for ( auto i = 0u; i < num_blocks; i += 2 * step ) - { - for ( auto j = 0; j < step; ++j ) - { - if ( ( ( tt._bits[i + j] ^ tt._bits[i + j + step] ) & care._bits[i + j] & care._bits[i + j + step] ) != 0 ) - { - return true; - } - } - } - - return false; - } - - template - bool has_var_support( const TT_type& tt, const TT_type& care, uint32_t real_num_vars, uint8_t var_index ) - { - assert( var_index < real_num_vars ); - assert( real_num_vars <= tt.num_vars() ); - assert( tt.num_vars() == care.num_vars() ); - - const uint32_t num_blocks = real_num_vars <= 6 ? 1 : ( 1 << ( real_num_vars - 6 ) ); - if ( real_num_vars <= 6 || var_index < 6 ) - { - auto it_tt = std::begin( tt._bits ); - auto it_care = std::begin( care._bits ); - while ( it_tt != std::begin( tt._bits ) + num_blocks ) - { - if ( ( ( ( *it_tt >> ( uint64_t( 1 ) << var_index ) ) ^ *it_tt ) & kitty::detail::projections_neg[var_index] & ( *it_care >> ( uint64_t( 1 ) << var_index ) ) & *it_care ) != 0 ) - { - return true; - } - ++it_tt; - ++it_care; - } - - return false; - } - - const auto step = 1 << ( var_index - 6 ); - for ( auto i = 0u; i < num_blocks; i += 2 * step ) - { - for ( auto j = 0; j < step; ++j ) - { - if ( ( ( tt._bits[i + j] ^ tt._bits[i + j + step] ) & care._bits[i + j] & care._bits[i + j + step] ) != 0 ) - { - return true; - } - } - } - - return false; - } - void adjust_truth_table_on_dc( LTT& tt, LTT& care, uint32_t var_index ) { uint64_t new_bits = tt._bits & care._bits; @@ -796,7 +708,7 @@ private: ++bytes; /* write support */ - for ( uint32_t i = best_free_set; i < best_free_set; ++i ) + for ( uint32_t i = 0; i < best_free_set; ++i ) { *pArray = (unsigned char)permutations[i]; pArray++; From f72000f5aebd1a222f8357ddc87ae3c3a471c3ff Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 21 Feb 2024 18:25:48 +0100 Subject: [PATCH 11/19] Adding ACD cascade 666, performance improvements --- src/base/abci/abc.c | 4 +- src/map/if/acd/ac_wrapper.cpp | 22 + src/map/if/acd/ac_wrapper.h | 2 + src/map/if/acd/acd66.hpp | 6 +- src/map/if/acd/acd666.hpp | 1256 +++++++++++++++++++++++++++++++++ src/map/if/ifDec66.c | 34 +- 6 files changed, 1316 insertions(+), 8 deletions(-) create mode 100644 src/map/if/acd/acd666.hpp diff --git a/src/base/abci/abc.c b/src/base/abci/abc.c index 2f6d6d545..f79e8de77 100644 --- a/src/base/abci/abc.c +++ b/src/base/abci/abc.c @@ -19630,9 +19630,9 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) pPars->pLutStruct = argv[globalUtilOptind]; pPars->fEnableStructN = 1; globalUtilOptind++; - if ( strlen(pPars->pLutStruct) != 2 ) + if ( strlen(pPars->pLutStruct) != 2 && strlen(pPars->pLutStruct) != 3 ) { - Abc_Print( -1, "Command line switch \"-J\" should be followed by a 2-char string (e.g. \"66\").\n" ); + Abc_Print( -1, "Command line switch \"-J\" should be followed by a 2- or 3-char string (e.g. \"66\" or \"666\").\n" ); goto usage; } break; diff --git a/src/map/if/acd/ac_wrapper.cpp b/src/map/if/acd/ac_wrapper.cpp index f2e2f1015..be1e8783a 100644 --- a/src/map/if/acd/ac_wrapper.cpp +++ b/src/map/if/acd/ac_wrapper.cpp @@ -19,6 +19,7 @@ #include "ac_wrapper.h" #include "ac_decomposition.hpp" #include "acd66.hpp" +#include "acd666.hpp" ABC_NAMESPACE_IMPL_START @@ -108,4 +109,25 @@ int acd66_decompose( word * pTruth, unsigned nVars, unsigned char *decomposition return 0; } +int acd666_evaluate( word * pTruth, unsigned nVars, int compute_decomposition ) +{ + using namespace acd; + + acd666_impl acd( nVars, false ); + + if ( acd.run( pTruth ) == 0 ) + return 0; + + if ( !compute_decomposition ) + return 1; + + int val = acd.compute_decomposition(); + if ( val != 0 ) + { + return 0; + } + + return 1; +} + ABC_NAMESPACE_IMPL_END diff --git a/src/map/if/acd/ac_wrapper.h b/src/map/if/acd/ac_wrapper.h index 2e052c563..03f17ed40 100644 --- a/src/map/if/acd/ac_wrapper.h +++ b/src/map/if/acd/ac_wrapper.h @@ -31,6 +31,8 @@ int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, int acd66_evaluate( word * pTruth, unsigned nVars, int compute_decomposition ); int acd66_decompose( word * pTruth, unsigned nVars, unsigned char *decomposition ); +int acd666_evaluate( word * pTruth, unsigned nVars, int compute_decomposition ); + ABC_NAMESPACE_HEADER_END #endif \ No newline at end of file diff --git a/src/map/if/acd/acd66.hpp b/src/map/if/acd/acd66.hpp index 5645f9341..807181a7a 100644 --- a/src/map/if/acd/acd66.hpp +++ b/src/map/if/acd/acd66.hpp @@ -52,7 +52,7 @@ private: using LTT = kitty::static_truth_table<6>; public: - explicit acd66_impl( uint32_t num_vars, bool verify = false ) + explicit acd66_impl( uint32_t const num_vars, bool const verify = false ) : num_vars( num_vars ), verify( verify ) { std::iota( permutations.begin(), permutations.end(), 0 ); @@ -838,8 +838,8 @@ private: uint64_t dec_funcs[2]; uint32_t bs_support[6]; - uint32_t num_vars; - bool verify; + uint32_t const num_vars; + bool const verify; std::array permutations; }; diff --git a/src/map/if/acd/acd666.hpp b/src/map/if/acd/acd666.hpp new file mode 100644 index 000000000..a1287f72c --- /dev/null +++ b/src/map/if/acd/acd666.hpp @@ -0,0 +1,1256 @@ +/**C++File************************************************************** + + FileName [acd666.hpp] + + SystemName [ABC: Logic synthesis and verification system.] + + PackageName [Ashenhurst-Curtis decomposition.] + + Synopsis [Interface with the FPGA mapping package.] + + Author [Alessandro Tempia Calvino] + + Affiliation [EPFL] + + Date [Ver. 1.0. Started - Feb 8, 2024.] + +***********************************************************************/ +/*! + \file acd666.hpp + \brief Ashenhurst-Curtis decomposition for "666" cascade + + \author Alessandro Tempia Calvino +*/ + +#ifndef _ACD666_H_ +#define _ACD666_H_ +#pragma once + +#include +#include +#include +#include +#include + +#include "kitty_constants.hpp" +#include "kitty_constructors.hpp" +#include "kitty_dynamic_tt.hpp" +#include "kitty_operations.hpp" +#include "kitty_operators.hpp" +#include "kitty_static_tt.hpp" + +ABC_NAMESPACE_CXX_HEADER_START + +namespace acd +{ + +class acd666_impl +{ +private: + static constexpr uint32_t max_num_vars = 16; + using STT = kitty::static_truth_table; + using LTT = kitty::static_truth_table<6>; + +public: + explicit acd666_impl( uint32_t const num_vars, bool const verify = false ) + : num_vars( num_vars ), verify( verify ) + { + std::iota( permutations.begin(), permutations.end(), 0 ); + } + + /*! \brief Runs ACD 666 */ + bool run( word* ptt ) + { + assert( num_vars > 6 ); + + /* truth table is too large for the settings */ + if ( num_vars > max_num_vars || num_vars > 16 ) + { + return false; + } + + /* convert to static TT */ + init_truth_table( ptt ); + + /* run ACD trying different bound sets and free sets */ + return find_decomposition(); + } + + int compute_decomposition() + { + if ( best_multiplicity == UINT32_MAX ) + return -1; + + uint32_t n = num_luts == 3 ? rm_support_size : num_vars; + compute_decomposition_impl( n ); + + if ( verify && !verify_impl() ) + { + return 1; + } + + return 0; + } + + uint32_t get_num_edges() + { + if ( support_sizes[0] == UINT32_MAX ) + { + return UINT32_MAX; + } + + uint32_t num_edges = support_sizes[0] + support_sizes[1] + 1 + ( shared_vars[0] < UINT32_MAX ? 1 : 0 ); + + if ( num_luts = 2 ) + return num_edges; + + /* real value after support minimization */ + return num_edges + support_sizes[2] + 1 + ( shared_vars[1] < UINT32_MAX ? 1 : 0 ); + } + + /* contains a 1 for BS variables */ + // unsigned get_profile() + // { + // unsigned profile = 0; + + // if ( support_sizes[0] == UINT32_MAX ) + // return -1; + + // for ( uint32_t i = 0; i < bs_support_size; ++i ) + // { + // profile |= 1 << permutations[best_free_set + bs_support[i]]; + // } + + // return profile; + // } + + // void get_decomposition( unsigned char* decompArray ) + // { + // if ( support_sizes[0] == UINT32_MAX ) + // return; + + // get_decomposition_abc( decompArray ); + // } + +private: + bool find_decomposition() + { + best_multiplicity = UINT32_MAX; + best_free_set = UINT32_MAX; + + /* find ACD "66" for different number of variables in the free set */ + for ( uint32_t i = num_vars - 6; i <= 5; ++i ) + { + if ( find_decomposition_bs( start_tt, num_vars, i ) ) + { + num_luts = 2; + return true; + } + } + + /* find ACD "666" for different number of variables in the free set */ + bool dec_found = false; + uint32_t min_vars_free_set = num_vars <= 11 ? 1 : num_vars - 11; + uint32_t max_vars_free_set = num_vars <= 11 ? num_vars - 7 : 5; + for ( uint32_t i = max_vars_free_set; i >= min_vars_free_set; --i ) + // for ( uint32_t i = min_vars_free_set; i <= max_vars_free_set; ++i ) + { + dec_found = find_decomposition_bs( start_tt, num_vars, i ); + if ( dec_found ) + break; + } + + if ( !dec_found ) + { + best_multiplicity = UINT32_MAX; + return false; + } + + /* compute functions for the top and reminder LUT */ + compute_decomposition_impl_top( num_vars ); + + /* find ACD "66" for the remainder function */ + for ( uint32_t i = rm_support_size - 6; i <= 5; ++i ) + { + if ( find_decomposition_bs( remainder, rm_support_size, i ) ) + { + num_luts = 3; + fix_permutations_remainder( rm_support_size ); + return true; + } + } + + best_multiplicity = UINT32_MAX; + return false; + } + + void init_truth_table( word* ptt ) + { + uint32_t const num_blocks = ( num_vars <= 6 ) ? 1 : ( 1 << ( num_vars - 6 ) ); + + for ( uint32_t i = 0; i < num_blocks; ++i ) + { + start_tt._bits[i] = ptt[i]; + } + + local_extend_to( start_tt, num_vars ); + } + + uint32_t column_multiplicity( STT const& tt, uint32_t n, uint32_t free_set_size ) + { + assert( free_set_size <= 5 ); + + uint32_t const num_blocks = ( n > 6 ) ? ( 1u << ( n - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << free_set_size; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t cofactors[4]; + uint32_t size = 0; + + /* extract iset functions */ + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t sub = tt._bits[i]; + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + { + uint32_t fs_fn = static_cast( sub & mask ); + uint32_t k; + for ( k = 0; k < size; ++k ) + { + if ( fs_fn == cofactors[k] ) + break; + } + if ( k == 4 ) + return 5; + if ( k == size ) + cofactors[size++] = fs_fn; + sub >>= shift; + } + } + + return size; + } + + inline bool combinations_next( uint32_t n, uint32_t k, uint32_t* pComb, uint32_t* pInvPerm, STT& tt ) + { + uint32_t i; + + for ( i = k - 1; pComb[i] == n - k + i; --i ) + { + if ( i == 0 ) + return false; + } + + /* move vars */ + uint32_t var_old = pComb[i]; + uint32_t pos_new = pInvPerm[var_old + 1]; + std::swap( pInvPerm[var_old + 1], pInvPerm[var_old] ); + std::swap( pComb[i], pComb[pos_new] ); + swap_inplace_local( tt, n, i, pos_new ); + + for ( uint32_t j = i + 1; j < k; j++ ) + { + var_old = pComb[j]; + pos_new = pInvPerm[pComb[j - 1] + 1]; + std::swap( pInvPerm[pComb[j - 1] + 1], pInvPerm[var_old] ); + std::swap( pComb[j], pComb[pos_new] ); + swap_inplace_local( tt, n, j, pos_new ); + } + + return true; + } + + bool find_decomposition_bs( STT tt, uint32_t n, uint32_t free_set_size ) + { + /* works up to 16 input truth tables */ + assert( n <= 16 ); + + /* init combinations */ + uint32_t pComb[16], pInvPerm[16]; + for ( uint32_t i = 0; i < n; ++i ) + { + pComb[i] = pInvPerm[i] = i; + } + + /* enumerate combinations */ + best_free_set = free_set_size; + do + { + uint32_t cost = column_multiplicity( tt, n, free_set_size ); + if ( cost == 2 ) + { + best_tt = tt; + best_multiplicity = cost; + for ( uint32_t i = 0; i < n; ++i ) + { + permutations[i] = pComb[i]; + } + return true; + } + else if ( cost <= 4 && free_set_size < 5 ) + { + /* look for a shared variable */ + best_multiplicity = cost; + int res = check_shared_set( tt, n ); + + if ( res > 0 ) + { + best_tt = tt; + for ( uint32_t i = 0; i < n; ++i ) + { + permutations[i] = pComb[i]; + } + /* move shared variable as the most significative one */ + swap_inplace_local( best_tt, n, res, n - 1 ); + std::swap( permutations[res], permutations[n - 1] ); + return true; + } + } + } while ( combinations_next( n, free_set_size, pComb, pInvPerm, tt ) ); + + return false; + } + + inline bool check_shared_var( STT const& tt, uint32_t n, uint32_t free_set_size, uint32_t shared_var ) + { + assert( free_set_size <= 5 ); + + uint32_t const num_blocks = ( n > 6 ) ? ( 1u << ( n - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << free_set_size; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t cofactors[2][4]; + uint32_t size[2] = { 0, 0 }; + uint32_t shared_var_shift = shared_var - free_set_size; + + /* extract iset functions */ + uint32_t iteration_counter = 0; + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t sub = tt._bits[i]; + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + { + uint32_t fs_fn = static_cast( sub & mask ); + uint32_t p = ( iteration_counter >> shared_var_shift ) & 1; + uint32_t k; + for ( k = 0; k < size[p]; ++k ) + { + if ( fs_fn == cofactors[p][k] ) + break; + } + if ( k == 2 ) + return false; + if ( k == size[p] ) + cofactors[p][size[p]++] = fs_fn; + sub >>= shift; + ++iteration_counter; + } + } + + return true; + } + + inline int check_shared_set( STT const& tt, uint32_t n ) + { + /* find one shared set variable */ + for ( uint32_t i = best_free_set; i < n; ++i ) + { + /* check the multiplicity of cofactors */ + if ( check_shared_var( tt, n, best_free_set, i ) ) + { + return i; + } + } + + return -1; + } + + void compute_decomposition_impl_top( uint32_t n, bool verbose = false ) + { + bool has_shared_set = best_multiplicity > 2; + + /* construct isets involved in multiplicity */ + STT isets0[2]; + STT isets1[2]; + + /* construct isets */ + uint32_t offset = 0; + uint32_t num_blocks = ( n > 6 ) ? ( 1u << ( n - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << best_free_set; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + + /* limit analysis on 0 cofactor of the shared variable */ + if ( has_shared_set ) + num_blocks >>= 1; + + uint64_t fs_fun[4] = { best_tt._bits[0] & mask, 0, 0, 0 }; + + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t cof = best_tt._bits[i]; + for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) + { + uint64_t val = cof & mask; + if ( val == fs_fun[0] ) + { + isets0[0]._bits[i / shift] |= UINT64_C( 1 ) << ( j + offset ); + } + else + { + isets0[1]._bits[i / shift] |= UINT64_C( 1 ) << ( j + offset ); + fs_fun[1] = val; + } + cof >>= shift; + } + offset = ( offset + ( 64 >> best_free_set ) ) & 0x3F; + } + + /* continue on the 1 cofactor if shared set */ + if ( has_shared_set ) + { + fs_fun[2] = best_tt._bits[num_blocks] & mask; + for ( auto i = num_blocks; i < ( num_blocks << 1 ); ++i ) + { + uint64_t cof = best_tt._bits[i]; + for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) + { + uint64_t val = cof & mask; + if ( val == fs_fun[2] ) + { + isets1[0]._bits[i / shift] |= UINT64_C( 1 ) << ( j + offset ); + } + else + { + isets1[1]._bits[i / shift] |= UINT64_C( 1 ) << ( j + offset ); + fs_fun[3] = val; + } + cof >>= shift; + } + offset = ( offset + ( 64 >> best_free_set ) ) & 0x3F; + } + } + + /* find the support minimizing combination with shared set */ + compute_functions_top( isets0, isets1, fs_fun, n ); + + /* print functions */ + if ( verbose ) + { + std::cout << "RM function : "; + kitty::print_hex( remainder ); + std::cout << "\n"; + LTT f; + f._bits = dec_funcs[2]; + std::cout << "Composition function: "; + kitty::print_hex( f ); + std::cout << "\n"; + } + } + + void compute_decomposition_impl( uint32_t n, bool verbose = false ) + { + bool has_shared_set = best_multiplicity > 2; + + /* construct isets involved in multiplicity */ + LTT isets0[2]; + LTT isets1[2]; + + /* construct isets */ + uint32_t offset = 0; + uint32_t num_blocks = ( n > 6 ) ? ( 1u << ( n - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << best_free_set; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + + /* limit analysis on 0 cofactor of the shared variable */ + if ( has_shared_set ) + num_blocks >>= 1; + + uint64_t fs_fun[4] = { best_tt._bits[0] & mask, 0, 0, 0 }; + + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t cof = best_tt._bits[i]; + for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) + { + uint64_t val = cof & mask; + + if ( val == fs_fun[0] ) + { + isets0[0]._bits |= UINT64_C( 1 ) << ( j + offset ); + } + else + { + isets0[1]._bits |= UINT64_C( 1 ) << ( j + offset ); + fs_fun[1] = val; + } + + cof >>= shift; + } + + offset = ( offset + ( 64 >> best_free_set ) ) % 64; + } + + /* continue on the 1 cofactor if shared set */ + if ( has_shared_set ) + { + fs_fun[2] = best_tt._bits[num_blocks] & mask; + for ( auto i = num_blocks; i < ( num_blocks << 1 ); ++i ) + { + uint64_t cof = best_tt._bits[i]; + for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) + { + uint64_t val = cof & mask; + + if ( val == fs_fun[2] ) + { + isets1[0]._bits |= UINT64_C( 1 ) << ( j + offset ); + } + else + { + isets1[1]._bits |= UINT64_C( 1 ) << ( j + offset ); + fs_fun[3] = val; + } + + cof >>= shift; + } + + offset = ( offset + ( 64 >> best_free_set ) ) % 64; + } + } + + /* find the support minimizing combination with shared set */ + compute_functions( isets0, isets1, fs_fun, n ); + + /* print functions */ + if ( verbose ) + { + LTT f; + f._bits = dec_funcs[0]; + std::cout << "BS function : "; + kitty::print_hex( f ); + std::cout << "\n"; + f._bits = dec_funcs[1]; + std::cout << "Composition function: "; + kitty::print_hex( f ); + std::cout << "\n"; + } + } + + inline void compute_functions_top( STT isets0[2], STT isets1[2], uint64_t fs_fun[4], uint32_t n ) + { + /* u = 2 no support minimization */ + if ( best_multiplicity < 3 ) + { + shared_vars[1] = UINT32_MAX; + remainder = isets0[0]; + rm_support_size = n - best_free_set; + for ( uint32_t i = 0; i < n - best_free_set; ++i ) + { + rm_support[i] = permutations[i + best_free_set]; + } + compute_composition( fs_fun, 2 ); + return; + } + + shared_vars[1] = permutations[n - 1]; + + /* u = 4 two possibilities */ + if ( best_multiplicity == 4 ) + { + compute_functions4_top( isets0, isets1, fs_fun, n ); + return; + } + + /* u = 3 if both sets have multiplicity 2 there are no don't cares */ + if ( best_multiplicity0 == best_multiplicity1 ) + { + compute_functions4_top( isets0, isets1, fs_fun, n ); + return; + } + + /* u = 3 one set has multiplicity 1, use don't cares */ + compute_functions3_top( isets0, isets1, fs_fun, n ); + } + + inline void compute_functions( LTT isets0[2], LTT isets1[2], uint64_t fs_fun[4], uint32_t n ) + { + /* u = 2 no support minimization */ + if ( best_multiplicity < 3 ) + { + shared_vars[0] = UINT32_MAX; + dec_funcs[0] = isets0[0]._bits; + support_sizes[0] = n - best_free_set; + for ( uint32_t i = 0; i < n - best_free_set; ++i ) + { + supports[0][i] = permutations[i + best_free_set]; + } + compute_composition( fs_fun, 1 ); + return; + } + + shared_vars[0] = permutations[n - 1]; + + /* u = 4 two possibilities */ + if ( best_multiplicity == 4 ) + { + compute_functions4( isets0, isets1, fs_fun, n ); + return; + } + + /* u = 3 if both sets have multiplicity 2 there are no don't cares */ + if ( best_multiplicity0 == best_multiplicity1 ) + { + compute_functions4( isets0, isets1, fs_fun, n ); + return; + } + + /* u = 3 one set has multiplicity 1, use don't cares */ + compute_functions3( isets0, isets1, fs_fun, n ); + } + + inline void compute_functions4_top( STT isets0[2], STT isets1[2], uint64_t fs_fun[4], uint32_t n ) + { + STT f; + uint32_t const num_iset_vars = n - best_free_set; + uint32_t const num_blocks = 1u << ( num_iset_vars - 6 ); + + assert( num_iset_vars > 6 ); + for ( uint32_t i = 0; i < num_blocks; ++i ) + { + f._bits[i] = isets0[0]._bits[i] | isets1[1]._bits[i]; + } + + /* count the number of support variables */ + uint32_t support_vars1 = 0; + for ( uint32_t i = 0; i < num_iset_vars; ++i ) + { + support_vars1 += has_var( f, num_iset_vars, i ) ? 1 : 0; + rm_support[i] = permutations[i + best_free_set]; + } + + /* use a different set */ + for ( uint32_t i = 0; i < num_blocks; ++i ) + { + f._bits[i] = isets0[0]._bits[i] | isets1[0]._bits[i]; + } + + uint32_t support_vars2 = 0; + for ( uint32_t i = 0; i < n - best_free_set; ++i ) + { + support_vars2 += has_var( f, num_iset_vars, i ) ? 1 : 0; + } + + rm_support_size = support_vars2; + if ( support_vars2 > support_vars1 ) + { + for ( uint32_t i = 0; i < num_blocks; ++i ) + { + f._bits[i] = isets0[0]._bits[i] | isets1[1]._bits[i]; + } + std::swap( fs_fun[3], fs_fun[4] ); + rm_support_size = support_vars1; + } + + /* move variables */ + if ( rm_support_size < num_iset_vars ) + { + support_vars1 = 0; + for ( uint32_t i = 0; i < num_iset_vars; ++i ) + { + if ( !has_var( f, num_iset_vars, i ) ) + { + continue; + } + + if ( support_vars1 < i ) + { + swap_inplace_local( f, num_iset_vars, support_vars1, i ); + } + + rm_support[support_vars1] = permutations[i + best_free_set]; + ++support_vars1; + } + } + + remainder = f; + compute_composition( fs_fun, 2 ); + } + + inline void compute_functions4( LTT isets0[2], LTT isets1[2], uint64_t fs_fun[4], uint32_t n ) + { + uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF, UINT64_MAX }; + LTT f = isets0[0] | isets1[1]; + LTT care; + + assert( n - best_free_set <= 6 ); + care._bits = masks[n - best_free_set]; + + /* count the number of support variables */ + uint32_t support_vars1 = 0; + for ( uint32_t i = 0; i < n - best_free_set; ++i ) + { + support_vars1 += has_var6( f, care, i ) ? 1 : 0; + supports[0][i] = permutations[i + best_free_set]; + } + + /* use a different set */ + f = isets0[0] | isets1[0]; + + uint32_t support_vars2 = 0; + for ( uint32_t i = 0; i < n - best_free_set; ++i ) + { + support_vars2 += has_var6( f, care, i ) ? 1 : 0; + } + + support_sizes[0] = support_vars2; + if ( support_vars2 > support_vars1 ) + { + f = isets0[0] | isets1[1]; + std::swap( fs_fun[3], fs_fun[4] ); + support_sizes[0] = support_vars1; + } + + /* move variables */ + if ( support_sizes[0] < n - best_free_set ) + { + support_vars1 = 0; + for ( uint32_t i = 0; i < n - best_free_set; ++i ) + { + if ( !has_var6( f, care, i ) ) + { + continue; + } + + if ( support_vars1 < i ) + { + kitty::swap_inplace( f, support_vars1, i ); + } + + supports[0][support_vars1] = permutations[i + best_free_set]; + ++support_vars1; + } + } + + dec_funcs[0] = f._bits; + compute_composition( fs_fun, 1 ); + } + + inline void compute_functions3_top( STT isets0[2], STT isets1[2], uint64_t fs_fun[4], uint32_t n ) + { + STT f, care; + uint32_t const num_iset_vars = n - best_free_set; + uint32_t const num_blocks = 1u << ( num_iset_vars - 6 ); + + assert( num_iset_vars > 6 ); + for ( uint32_t i = 0; i < num_blocks; ++i ) + { + f._bits[i] = isets0[0]._bits[i] | isets1[0]._bits[i]; + } + + assert( n - best_free_set <= 6 ); + + /* init the care set */ + if ( best_multiplicity0 == 1 ) + { + for ( uint32_t i = 0; i < num_blocks; ++i ) + { + care._bits[i] = ~( isets0[0]._bits[i] ); + } + fs_fun[1] = fs_fun[0]; + } + else + { + for ( uint32_t i = 0; i < num_blocks; ++i ) + { + care._bits[i] = ~( isets1[0]._bits[i] ); + } + fs_fun[3] = fs_fun[2]; + } + + /* count the number of support variables */ + uint32_t support_vars = 0; + for ( uint32_t i = 0; i < num_iset_vars; ++i ) + { + if ( !has_var_support( f, care, num_iset_vars, i ) ) + { + adjust_truth_table_on_dc( f, care, n, i ); + continue; + } + + if ( support_vars < i ) + { + kitty::swap_inplace( f, support_vars, i ); + } + + rm_support[support_vars] = permutations[i + best_free_set]; + ++support_vars; + } + + rm_support_size = support_vars; + remainder = f; + compute_composition( fs_fun, 2 ); + } + + inline void compute_functions3( LTT isets0[2], LTT isets1[2], uint64_t fs_fun[4], uint32_t n ) + { + uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF, UINT64_MAX }; + LTT f = isets0[0] | isets1[0]; + LTT care; + + assert( n - best_free_set <= 6 ); + + /* init the care set */ + if ( best_multiplicity0 == 1 ) + { + care._bits = masks[n - best_free_set] & ( ~isets0[0]._bits ); + fs_fun[1] = fs_fun[0]; + } + else + { + care._bits = masks[n - best_free_set] & ( ~isets1[0]._bits ); + fs_fun[3] = fs_fun[2]; + } + + /* count the number of support variables */ + uint32_t support_vars = 0; + for ( uint32_t i = 0; i < n - best_free_set; ++i ) + { + if ( !has_var6( f, care, i ) ) + { + adjust_truth_table_on_dc6( f, care, i ); + continue; + } + + if ( support_vars < i ) + { + kitty::swap_inplace( f, support_vars, i ); + } + + supports[0][support_vars] = i; + ++support_vars; + } + + support_sizes[0] = support_vars; + dec_funcs[0] = f._bits; + compute_composition( fs_fun, 1 ); + } + + void compute_composition( uint64_t fs_fun[4], uint32_t index ) + { + dec_funcs[index] = fs_fun[0] << ( 1 << best_free_set ); + dec_funcs[index] |= fs_fun[1]; + + if ( best_multiplicity > 2 ) + { + dec_funcs[index] |= fs_fun[2] << ( ( 2 << best_free_set ) + ( 1 << best_free_set ) ); + dec_funcs[index] |= fs_fun[3] << ( 2 << best_free_set ); + } + + for ( uint32_t i = 0; i < best_free_set; ++i ) + { + supports[index][i] = permutations[i]; + } + support_sizes[index] = best_free_set; + } + + void fix_permutations_remainder( uint32_t n ) + { + for ( uint32_t i = 0; i < n; ++i ) + { + permutations[i] = rm_support[permutations[i]]; + } + } + + template + void local_extend_to( TT_type& tt, uint32_t real_num_vars ) + { + if ( real_num_vars < 6 ) + { + auto mask = *tt.begin(); + + for ( auto i = real_num_vars; i < num_vars; ++i ) + { + mask |= ( mask << ( 1 << i ) ); + } + + std::fill( tt.begin(), tt.end(), mask ); + } + else + { + uint32_t num_blocks = ( 1u << ( real_num_vars - 6 ) ); + auto it = tt.begin(); + while ( it != tt.end() ) + { + it = std::copy( tt.cbegin(), tt.cbegin() + num_blocks, it ); + } + } + } + + void swap_inplace_local( STT& tt, uint32_t n, uint8_t var_index1, uint8_t var_index2 ) + { + if ( var_index1 == var_index2 ) + { + return; + } + + if ( var_index1 > var_index2 ) + { + std::swap( var_index1, var_index2 ); + } + + assert( n > 6 ); + const uint32_t num_blocks = 1 << ( n - 6 ); + + if ( var_index2 <= 5 ) + { + const auto& pmask = kitty::detail::ppermutation_masks[var_index1][var_index2]; + const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 ); + std::transform( std::begin( tt._bits ), std::begin( tt._bits ) + num_blocks, std::begin( tt._bits ), + [shift, &pmask]( uint64_t word ) { + return ( word & pmask[0] ) | ( ( word & pmask[1] ) << shift ) | ( ( word & pmask[2] ) >> shift ); + } ); + } + else if ( var_index1 <= 5 ) /* in this case, var_index2 > 5 */ + { + const auto step = 1 << ( var_index2 - 6 ); + const auto shift = 1 << var_index1; + auto it = std::begin( tt._bits ); + while ( it != std::begin( tt._bits ) + num_blocks ) + { + for ( auto i = decltype( step ){ 0 }; i < step; ++i ) + { + const auto low_to_high = ( *( it + i ) & kitty::detail::projections[var_index1] ) >> shift; + const auto high_to_low = ( *( it + i + step ) << shift ) & kitty::detail::projections[var_index1]; + *( it + i ) = ( *( it + i ) & ~kitty::detail::projections[var_index1] ) | high_to_low; + *( it + i + step ) = ( *( it + i + step ) & kitty::detail::projections[var_index1] ) | low_to_high; + } + it += 2 * step; + } + } + else + { + const auto step1 = 1 << ( var_index1 - 6 ); + const auto step2 = 1 << ( var_index2 - 6 ); + auto it = std::begin( tt._bits ); + while ( it != std::begin( tt._bits ) + num_blocks ) + { + for ( auto i = 0; i < step2; i += 2 * step1 ) + { + for ( auto j = 0; j < step1; ++j ) + { + std::swap( *( it + i + j + step1 ), *( it + i + j + step2 ) ); + } + } + it += 2 * step2; + } + } + } + + inline bool has_var6( const LTT& tt, const LTT& care, uint8_t var_index ) + { + if ( ( ( ( tt._bits >> ( uint64_t( 1 ) << var_index ) ) ^ tt._bits ) & kitty::detail::projections_neg[var_index] & ( care._bits >> ( uint64_t( 1 ) << var_index ) ) & care._bits ) != 0 ) + { + return true; + } + + return false; + } + + inline bool has_var( const STT& tt, uint32_t n, uint8_t var_index ) + { + uint32_t const num_blocks = 1u << ( n - 6 ); + + if ( var_index < 6 ) + { + return std::any_of( std::begin( tt._bits ), std::begin( tt._bits ) + num_blocks, + [var_index]( uint64_t word ) { return ( ( word >> ( uint64_t( 1 ) << var_index ) ) & kitty::detail::projections_neg[var_index] ) != + ( word & kitty::detail::projections_neg[var_index] ); } ); + } + + const auto step = 1 << ( var_index - 6 ); + for ( auto i = 0u; i < num_blocks; i += 2 * step ) + { + for ( auto j = 0; j < step; ++j ) + { + if ( tt._bits[i + j] != tt._bits[i + j + step] ) + { + return true; + } + } + } + return false; + } + + bool has_var_support( const STT& tt, const STT& care, uint32_t real_num_vars, uint8_t var_index ) + { + assert( var_index < real_num_vars ); + assert( real_num_vars <= tt.num_vars() ); + assert( tt.num_vars() == care.num_vars() ); + + const uint32_t num_blocks = real_num_vars <= 6 ? 1 : ( 1 << ( real_num_vars - 6 ) ); + if ( real_num_vars <= 6 || var_index < 6 ) + { + auto it_tt = std::begin( tt._bits ); + auto it_care = std::begin( care._bits ); + while ( it_tt != std::begin( tt._bits ) + num_blocks ) + { + if ( ( ( ( *it_tt >> ( uint64_t( 1 ) << var_index ) ) ^ *it_tt ) & kitty::detail::projections_neg[var_index] & ( *it_care >> ( uint64_t( 1 ) << var_index ) ) & *it_care ) != 0 ) + { + return true; + } + ++it_tt; + ++it_care; + } + + return false; + } + + const auto step = 1 << ( var_index - 6 ); + for ( auto i = 0u; i < num_blocks; i += 2 * step ) + { + for ( auto j = 0; j < step; ++j ) + { + if ( ( ( tt._bits[i + j] ^ tt._bits[i + j + step] ) & care._bits[i + j] & care._bits[i + j + step] ) != 0 ) + { + return true; + } + } + } + + return false; + } + + void adjust_truth_table_on_dc6( LTT& tt, LTT& care, uint32_t var_index ) + { + uint64_t new_bits = tt._bits & care._bits; + tt._bits = ( ( new_bits | ( new_bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index] ) | + ( ( new_bits | ( new_bits << ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections[var_index] ); + care._bits = care._bits | ( care._bits >> ( uint64_t( 1 ) << var_index ) ); + } + + void adjust_truth_table_on_dc( STT& tt, STT& care, uint32_t n, uint32_t var_index ) + { + assert( var_index < n ); + const uint32_t num_blocks = n <= 6 ? 1 : ( 1 << ( n - 6 ) ); + + if ( n <= 6 || var_index < 6 ) + { + auto it_tt = std::begin( tt._bits ); + auto it_care = std::begin( care._bits ); + while ( it_tt != std::begin( tt._bits ) + num_blocks ) + { + uint64_t new_bits = *it_tt & *it_care; + *it_tt = ( ( new_bits | ( new_bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index] ) | + ( ( new_bits | ( new_bits << ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections[var_index] ); + *it_care = *it_care | ( *it_care >> ( uint64_t( 1 ) << var_index ) ); + + ++it_tt; + ++it_care; + } + return; + } + + const auto step = 1 << ( var_index - 6 ); + for ( auto i = 0u; i < num_blocks; i += 2 * step ) + { + for ( auto j = 0; j < step; ++j ) + { + tt._bits[i + j] = ( tt._bits[i + j] & care._bits[i + j] ) | ( tt._bits[i + j + step] & care._bits[i + j + step] ); + tt._bits[i + j + step] = tt._bits[i + j]; + care._bits[i + j] = care._bits[i + j] | care._bits[i + j + step]; + care._bits[i + j + step] = care._bits[i + j]; + } + } + } + + /* Decomposition format for ABC + * + * The record is an array of unsigned chars where: + * - the first unsigned char entry stores the number of unsigned chars in the record + * - the second entry stores the number of LUTs + * After this, several sub-records follow, each representing one LUT as follows: + * - an unsigned char entry listing the number of fanins + * - a list of fanins, from the LSB to the MSB of the truth table. The N inputs of the original function + * have indexes from 0 to N-1, followed by the internal signals in a topological order + * - the LUT truth table occupying 2^(M-3) bytes, where M is the fanin count of the LUT, from the LSB to the MSB. + * A 2-input LUT, which takes 4 bits, should be stretched to occupy 8 bits (one unsigned char) + * A 0- or 1-input LUT can be represented similarly but it is not expected that such LUTs will be represented + */ + // void get_decomposition_abc( unsigned char* decompArray ) + // { + // unsigned char* pArray = decompArray; + // unsigned char bytes = 2; + + // /* write number of LUTs */ + // pArray++; + // *pArray = 2; + // pArray++; + + // /* write BS LUT */ + // /* write fanin size */ + // *pArray = bs_support_size; + // pArray++; + // ++bytes; + + // /* write support */ + // for ( uint32_t i = 0; i < bs_support_size; ++i ) + // { + // *pArray = (unsigned char)permutations[bs_support[i] + best_free_set]; + // pArray++; + // ++bytes; + // } + + // /* write truth table */ + // uint32_t tt_num_bytes = ( bs_support_size <= 3 ) ? 1 : ( 1 << ( bs_support_size - 3 ) ); + // for ( uint32_t i = 0; i < tt_num_bytes; ++i ) + // { + // *pArray = (unsigned char)( ( dec_funcs[0] >> ( 8 * i ) ) & 0xFF ); + // pArray++; + // ++bytes; + // } + + // /* write top LUT */ + // /* write fanin size */ + // uint32_t support_size = best_free_set + 1 + ( best_multiplicity > 2 ? 1 : 0 ); + // *pArray = support_size; + // pArray++; + // ++bytes; + + // /* write support */ + // for ( uint32_t i = 0; i < best_free_set; ++i ) + // { + // *pArray = (unsigned char)permutations[i]; + // pArray++; + // ++bytes; + // } + + // *pArray = (unsigned char)num_vars; + // pArray++; + // ++bytes; + + // if ( best_multiplicity > 2 ) + // { + // *pArray = (unsigned char)permutations[num_vars - 1]; + // pArray++; + // ++bytes; + // } + + // /* write truth table */ + // tt_num_bytes = ( support_size <= 3 ) ? 1 : ( 1 << ( support_size - 3 ) ); + // for ( uint32_t i = 0; i < tt_num_bytes; ++i ) + // { + // *pArray = (unsigned char)( ( dec_funcs[1] >> ( 8 * i ) ) & 0xFF ); + // pArray++; + // ++bytes; + // } + + // /* write numBytes */ + // *decompArray = bytes; + // } + + bool verify_impl() + { + /* create PIs */ + STT pis[max_num_vars]; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + kitty::create_nth_var( pis[i], i ); + } + + STT bsi[6]; + STT bsf_sim; + for ( uint32_t lut_i = 0; lut_i < num_luts; ++lut_i ) + { + for ( uint32_t i = 0; i < support_sizes[lut_i]; ++i ) + { + bsi[i] = pis[supports[lut_i][i]]; + } + + STT top_sim; + for ( uint32_t i = 0u; i < ( 1 << num_vars ); ++i ) + { + uint32_t pattern = 0u; + for ( auto j = 0; j < support_sizes[lut_i]; ++j ) + { + pattern |= get_bit( bsi[j], i ) << j; + } + if ( lut_i != 0 ) + { + pattern |= get_bit( bsf_sim, i ) << support_sizes[lut_i]; + if ( shared_vars[lut_i - 1] < UINT32_MAX ) + { + pattern |= get_bit( pis[shared_vars[lut_i - 1]], i ) << ( support_sizes[lut_i] + 1 ); + } + } + if ( ( dec_funcs[lut_i] >> pattern ) & 1 ) + { + set_bit( top_sim, i ); + } + } + + bsf_sim = top_sim; + } + + /* extend function */ + local_extend_to( bsf_sim, num_vars ); + + for ( uint32_t i = 0; i < ( 1 << ( num_vars - 6 ) ); ++i ) + { + if ( bsf_sim._bits[i] != start_tt._bits[i] ) + { + std::cout << "Found incorrect decomposition\n"; + report_tt( bsf_sim ); + std::cout << " instead_of\n"; + report_tt( start_tt ); + return false; + } + } + + return true; + } + + uint32_t get_bit( const STT& tt, uint64_t index ) + { + return ( tt._bits[index >> 6] >> ( index & 0x3f ) ) & 0x1; + } + + void set_bit( STT& tt, uint64_t index ) + { + tt._bits[index >> 6] |= uint64_t( 1 ) << ( index & 0x3f ); + } + + void report_tt( STT const& stt ) + { + kitty::dynamic_truth_table tt( num_vars ); + + std::copy( std::begin( stt._bits ), std::begin( stt._bits ) + ( 1 << ( num_vars - 6 ) ), std::begin( tt ) ); + kitty::print_hex( tt ); + std::cout << "\n"; + } + +private: + uint32_t best_multiplicity{ UINT32_MAX }; + uint32_t best_free_set{ UINT32_MAX }; + uint32_t best_multiplicity0{ UINT32_MAX }; + uint32_t best_multiplicity1{ UINT32_MAX }; + uint32_t rm_support_size{ UINT32_MAX }; + uint32_t num_luts{ 0 }; + + STT start_tt; + STT best_tt; + STT remainder; + + uint64_t dec_funcs[3]; + uint32_t supports[3][6]; + uint32_t support_sizes[3] = { UINT32_MAX, UINT32_MAX, UINT32_MAX }; + uint32_t rm_support[15]; + uint32_t shared_vars[2]; + + uint32_t const num_vars; + bool const verify; + std::array permutations; +}; + +} // namespace acd + +ABC_NAMESPACE_CXX_HEADER_END + +#endif // _ACD666_H_ \ No newline at end of file diff --git a/src/map/if/ifDec66.c b/src/map/if/ifDec66.c index 3a836363d..5c2dce53b 100644 --- a/src/map/if/ifDec66.c +++ b/src/map/if/ifDec66.c @@ -276,6 +276,31 @@ int If_CluCheck66( If_Man_t * p, word * pTruth0, int nVars, int fHashing ) return G1.nVars; } +// returns if successful +int If_CluCheck666( If_Man_t * p, word * pTruth0, int nVars, int fHashing ) +{ + If_Grp_t G1 = {0}; + unsigned * pHashed = NULL; + + if ( p && fHashing ) + { + pHashed = If_CluHashLookup2( p, pTruth0, 0 ); + if ( pHashed && *pHashed != CLU_UNUSED ) + If_CluUns2Grp2( *pHashed, &G1 ); + } + + /* new entry */ + if ( G1.nVars == 0 ) + { + G1.nVars = acd666_evaluate( pTruth0, nVars, 0 ); + } + + if ( pHashed ) + *pHashed = If_CluGrp2Uns2( &G1 ); + + return G1.nVars; +} + /**Function************************************************************* Synopsis [Performs ACD into 66 cascade.] @@ -302,7 +327,7 @@ int If_CutPerformCheck66( If_Man_t * p, unsigned * pTruth0, int nVars, int nLeav // quit if parameters are wrong Length = strlen(pStr); - if ( Length != 2 ) + if ( Length != 2 && Length != 3 ) { printf( "Wrong LUT struct (%s)\n", pStr ); return 0; @@ -316,7 +341,7 @@ int If_CutPerformCheck66( If_Man_t * p, unsigned * pTruth0, int nVars, int nLeav } } - if ( nLeaves > 11 ) + if ( ( Length == 2 && nLeaves > 11 ) || ( Length == 3 && nLeaves > 16 ) ) { printf( "The cut size (%d) is too large for the LUT structure %s.\n", nLeaves, pStr ); return 0; @@ -327,7 +352,10 @@ int If_CutPerformCheck66( If_Man_t * p, unsigned * pTruth0, int nVars, int nLeav return 1; // derive the decomposition - return If_CluCheck66(p, (word*)pTruth, nVars, 1); + if ( Length == 2 ) + return If_CluCheck66(p, (word*)pTruth, nVars, 1); + else + return If_CluCheck666(p, (word*)pTruth, nVars, 1); } //////////////////////////////////////////////////////////////////////// From d3f140f1dfca176e7d79777217bef4dd25210be4 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Tue, 27 Feb 2024 17:36:24 +0100 Subject: [PATCH 12/19] Performance improvements --- src/map/if/acd/ac_decomposition.hpp | 86 +++++++++++++++++++++++++++-- src/map/if/acd/acd66.hpp | 3 +- src/map/if/acd/acd666.hpp | 5 +- 3 files changed, 87 insertions(+), 7 deletions(-) diff --git a/src/map/if/acd/ac_decomposition.hpp b/src/map/if/acd/ac_decomposition.hpp index 8e1fc31b5..3c72b792f 100644 --- a/src/map/if/acd/ac_decomposition.hpp +++ b/src/map/if/acd/ac_decomposition.hpp @@ -366,6 +366,40 @@ private: return multiplicity; } + uint32_t column_multiplicity2( STT const& tt, uint32_t free_set_size ) + { + assert( free_set_size <= 5 ); + + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << free_set_size; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t cofactors[4]; + uint32_t size = 0; + + /* extract iset functions */ + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t sub = tt._bits[i]; + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + { + uint32_t fs_fn = static_cast( sub & mask ); + uint32_t k; + for ( k = 0; k < size; ++k ) + { + if ( fs_fn == cofactors[k] ) + break; + } + if ( k == 2 ) + return 3; + if ( k == size ) + cofactors[size++] = fs_fn; + sub >>= shift; + } + } + + return size; + } + inline bool combinations_offset_next( uint32_t k, uint32_t offset, uint32_t* pComb, uint32_t* pInvPerm, STT& tt ) { uint32_t i; @@ -401,7 +435,7 @@ private: STT tt = best_tt; /* TT with best cost */ - STT best_tt = tt; + STT local_best_tt = tt; uint32_t best_cost = ( 1 << ( ps.lut_size - free_set_size ) ) + 1; assert( free_set_size >= offset ); @@ -416,6 +450,12 @@ private: /* works up to 16 input truth tables */ assert( num_vars <= 16 ); + /* Search for column multiplicity of 2 */ + if ( free_set_size == ps.lut_size - 1 ) + { + return enumerate_iset_combinations2( free_set_size, offset ); + } + /* init combinations */ uint32_t pComb[16], pInvPerm[16], bestPerm[16]; for ( uint32_t i = 0; i < num_vars; ++i ) @@ -429,7 +469,7 @@ private: uint32_t cost = fn( tt ); if ( cost < best_cost ) { - best_tt = tt; + local_best_tt = tt; best_cost = cost; for ( uint32_t i = 0; i < num_vars; ++i ) { @@ -442,7 +482,7 @@ private: if ( best_cost > ( 1 << ( ps.lut_size - free_set_size ) ) ) { - return std::make_tuple( best_tt, res_perm, UINT32_MAX ); + return std::make_tuple( local_best_tt, res_perm, UINT32_MAX ); } for ( uint32_t i = 0; i < num_vars; ++i ) @@ -450,7 +490,45 @@ private: res_perm[i] = permutations[bestPerm[i]]; } - return std::make_tuple( best_tt, res_perm, best_cost ); + return std::make_tuple( local_best_tt, res_perm, best_cost ); + } + + inline std::tuple, uint32_t> enumerate_iset_combinations2( uint32_t free_set_size, uint32_t offset ) + { + STT tt = best_tt; + + /* TT with best cost */ + STT local_best_tt = tt; + uint32_t best_cost = ( 1 << ( ps.lut_size - free_set_size ) ) + 1; + + assert( free_set_size >= offset ); + + /* init combinations */ + uint32_t pComb[16], pInvPerm[16]; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pComb[i] = pInvPerm[i] = i; + } + + /* enumerate combinations */ + std::array res_perm; + + do + { + uint32_t cost = column_multiplicity2( tt, free_set_size ); + if ( cost <= 2 ) + { + local_best_tt = tt; + best_cost = cost; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + res_perm[i] = permutations[pComb[i]]; + } + return std::make_tuple( local_best_tt, res_perm, best_cost ); + } + } while ( combinations_offset_next( free_set_size, offset, pComb, pInvPerm, tt ) ); + + return std::make_tuple( local_best_tt, res_perm, UINT32_MAX ); } std::vector compute_isets( bool verbose = false ) diff --git a/src/map/if/acd/acd66.hpp b/src/map/if/acd/acd66.hpp index 807181a7a..52de4581b 100644 --- a/src/map/if/acd/acd66.hpp +++ b/src/map/if/acd/acd66.hpp @@ -162,6 +162,7 @@ private: uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; uint64_t const shift = UINT64_C( 1 ) << free_set_size; uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t const limit = free_set_size < 5 ? 4 : 2; uint32_t cofactors[4]; uint32_t size = 0; @@ -178,7 +179,7 @@ private: if ( fs_fn == cofactors[k] ) break; } - if ( k == 4 ) + if ( k == limit ) return 5; if ( k == size ) cofactors[size++] = fs_fn; diff --git a/src/map/if/acd/acd666.hpp b/src/map/if/acd/acd666.hpp index a1287f72c..72ae68ec4 100644 --- a/src/map/if/acd/acd666.hpp +++ b/src/map/if/acd/acd666.hpp @@ -101,7 +101,7 @@ public: uint32_t num_edges = support_sizes[0] + support_sizes[1] + 1 + ( shared_vars[0] < UINT32_MAX ? 1 : 0 ); - if ( num_luts = 2 ) + if ( num_luts == 2 ) return num_edges; /* real value after support minimization */ @@ -203,6 +203,7 @@ private: uint32_t const num_blocks = ( n > 6 ) ? ( 1u << ( n - 6 ) ) : 1; uint64_t const shift = UINT64_C( 1 ) << free_set_size; uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t const limit = free_set_size < 5 ? 4 : 2; uint32_t cofactors[4]; uint32_t size = 0; @@ -219,7 +220,7 @@ private: if ( fs_fn == cofactors[k] ) break; } - if ( k == 4 ) + if ( k == limit ) return 5; if ( k == size ) cofactors[size++] = fs_fn; From 44a65c23ed1604f4605a227b97ee6a14b08ed2bb Mon Sep 17 00:00:00 2001 From: aletempiac Date: Tue, 27 Feb 2024 17:47:43 +0100 Subject: [PATCH 13/19] Adding relaxation on the maximum free set constraint --- src/map/if/acd/ac_decomposition.hpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/map/if/acd/ac_decomposition.hpp b/src/map/if/acd/ac_decomposition.hpp index 3c72b792f..7cd6d9613 100644 --- a/src/map/if/acd/ac_decomposition.hpp +++ b/src/map/if/acd/ac_decomposition.hpp @@ -111,6 +111,12 @@ public: uint32_t late_arriving = __builtin_popcount( delay_profile ); + /* relax maximum number of free set variables if a function has more variables */ + if ( num_vars > ps.max_free_set_vars + ps.lut_size ) + { + ps.max_free_set_vars = num_vars - ps.lut_size; + } + /* return a high cost if too many late arriving variables */ if ( late_arriving > ps.lut_size - 1 || late_arriving > ps.max_free_set_vars ) { @@ -1444,7 +1450,7 @@ private: std::vector> support_minimization_encodings; uint32_t num_vars; - ac_decomposition_params const& ps; + ac_decomposition_params ps; ac_decomposition_stats* pst; std::array permutations; }; From 75abcd376beb39ee163b5db9ec9d87802a4ead33 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 28 Feb 2024 09:51:32 +0100 Subject: [PATCH 14/19] Adding bindings to use ACD66 instead of generic ACD --- src/map/if/acd/ac_wrapper.cpp | 96 +++++++++++++------ src/map/if/acd/acd66.hpp | 169 ++++++++++++++++++++++++++++++++-- 2 files changed, 229 insertions(+), 36 deletions(-) diff --git a/src/map/if/acd/ac_wrapper.cpp b/src/map/if/acd/ac_wrapper.cpp index be1e8783a..7786f0e72 100644 --- a/src/map/if/acd/ac_wrapper.cpp +++ b/src/map/if/acd/ac_wrapper.cpp @@ -23,52 +23,92 @@ ABC_NAMESPACE_IMPL_START +static constexpr bool use_generic_acd = true; + int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ) { using namespace acd; - ac_decomposition_params ps; - ps.lut_size = lutSize; - ps.try_no_late_arrival = static_cast( try_no_late_arrival ); /* TODO: additional tests */ - ac_decomposition_stats st; - - ac_decomposition_impl acd( nVars, ps, &st ); - int val = acd.run( pTruth, *pdelay ); - - if ( val < 0 ) + if ( use_generic_acd ) { - *pdelay = 0; - return -1; + ac_decomposition_params ps; + ps.lut_size = lutSize; + ps.try_no_late_arrival = static_cast( try_no_late_arrival ); /* TODO: additional tests */ + ac_decomposition_stats st; + + ac_decomposition_impl acd( nVars, ps, &st ); + int val = acd.run( pTruth, *pdelay ); + + if ( val < 0 ) + { + *pdelay = 0; + return -1; + } + + *pdelay = acd.get_profile(); + *cost = 2; + + return val; } + else + { + acd66_impl acd( nVars ); + int val = acd.run( pTruth, *pdelay ); - *pdelay = acd.get_profile(); - *cost = st.num_luts; + if ( val == 0 ) + { + *pdelay = 0; + return -1; + } - return val; + *pdelay = acd.get_profile(); + *cost = 2; + + return val; + } } int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ) { using namespace acd; - ac_decomposition_params ps; - ps.lut_size = lutSize; - ac_decomposition_stats st; - - ac_decomposition_impl acd( nVars, ps, &st ); - acd.run( pTruth, *pdelay ); - int val = acd.compute_decomposition(); - - if ( val < 0 ) + if ( use_generic_acd ) { - *pdelay = 0; - return -1; + ac_decomposition_params ps; + ps.lut_size = lutSize; + ac_decomposition_stats st; + + ac_decomposition_impl acd( nVars, ps, &st ); + acd.run( pTruth, *pdelay ); + int val = acd.compute_decomposition(); + + if ( val < 0 ) + { + *pdelay = 0; + return -1; + } + + *pdelay = acd.get_profile(); + acd.get_decomposition( decomposition ); + return 0; } + else + { + acd66_impl acd( nVars ); + acd.run( pTruth, *pdelay ); + int val = acd.compute_decomposition(); - *pdelay = acd.get_profile(); + if ( val != 0 ) + { + *pdelay = 0; + return -1; + } - acd.get_decomposition( decomposition ); - return 0; + *pdelay = acd.get_profile(); + + acd.get_decomposition( decomposition ); + return 0; + } } int acd66_evaluate( word * pTruth, unsigned nVars, int compute_decomposition ) diff --git a/src/map/if/acd/acd66.hpp b/src/map/if/acd/acd66.hpp index 52de4581b..92caf3b57 100644 --- a/src/map/if/acd/acd66.hpp +++ b/src/map/if/acd/acd66.hpp @@ -76,6 +76,34 @@ public: return find_decomposition() ? 1 : 0; } + /*! \brief Runs ACD 66 */ + int run( word* ptt, unsigned delay_profile ) + { + assert( num_vars > 6 ); + + /* truth table is too large for the settings */ + if ( num_vars > max_num_vars || num_vars > 11 ) + { + return false; + } + + uint32_t late_arriving = __builtin_popcount( delay_profile ); + + /* too many late arriving variables */ + if ( late_arriving > 5 ) + return 0; + + /* convert to static TT */ + init_truth_table( ptt ); + best_tt = start_tt; + + /* permute late arriving variables to be the least significant */ + reposition_late_arriving_variables( delay_profile, late_arriving ); + + /* run ACD trying different bound sets and free sets */ + return find_decomposition_offset( late_arriving ) ? ( delay_profile == 0 ? 2 : 1 ) : 0; + } + int compute_decomposition() { if ( best_multiplicity == UINT32_MAX ) @@ -102,17 +130,17 @@ public: return bs_support_size + best_free_set + 1 + ( best_multiplicity > 2 ? 1 : 0 ); } - /* contains a 1 for BS variables */ + /* contains a 1 for FS variables */ unsigned get_profile() { unsigned profile = 0; - if ( bs_support_size == UINT32_MAX ) + if ( best_multiplicity == UINT32_MAX ) return -1; - for ( uint32_t i = 0; i < bs_support_size; ++i ) + for ( uint32_t i = 0; i < best_free_set; ++i ) { - profile |= 1 << permutations[best_free_set + bs_support[i]]; + profile |= 1 << permutations[i]; } return profile; @@ -143,6 +171,22 @@ private: return false; } + bool find_decomposition_offset( uint32_t offset ) + { + best_multiplicity = UINT32_MAX; + best_free_set = UINT32_MAX; + + /* find ACD "66" for different number of variables in the free set */ + for ( uint32_t i = std::max( num_vars - 6, offset ); i <= 5; ++i ) + { + if ( find_decomposition_bs_offset( i, offset ) ) + return true; + } + + best_multiplicity = UINT32_MAX; + return false; + } + void init_truth_table( word* ptt ) { uint32_t const num_blocks = ( num_vars <= 6 ) ? 1 : ( 1 << ( num_vars - 6 ) ); @@ -190,13 +234,13 @@ private: return size; } - inline bool combinations_next( uint32_t k, uint32_t* pComb, uint32_t* pInvPerm, STT& tt ) + inline bool combinations_next( uint32_t k, uint32_t offset, uint32_t* pComb, uint32_t* pInvPerm, STT& tt ) { uint32_t i; for ( i = k - 1; pComb[i] == num_vars - k + i; --i ) { - if ( i == 0 ) + if ( i == offset ) return false; } @@ -254,7 +298,7 @@ private: best_multiplicity = cost; int res = check_shared_set( tt ); - if ( res > 0 ) + if ( res >= 0 ) { best_tt = tt; for ( uint32_t i = 0; i < num_vars; ++i ) @@ -267,7 +311,96 @@ private: return true; } } - } while ( combinations_next( free_set_size, pComb, pInvPerm, tt ) ); + } while ( combinations_next( free_set_size, 0, pComb, pInvPerm, tt ) ); + + return false; + } + + bool find_decomposition_bs_offset( uint32_t free_set_size, uint32_t offset ) + { + STT tt = best_tt; + + /* works up to 16 input truth tables */ + assert( num_vars <= 16 ); + best_free_set = free_set_size; + + /* special case */ + if ( free_set_size == offset ) + { + uint32_t cost = column_multiplicity( tt, free_set_size ); + if ( cost == 2 ) + { + best_tt = tt; + best_multiplicity = cost; + return true; + } + else if ( cost <= 4 && free_set_size < 5 ) + { + /* look for a shared variable */ + best_multiplicity = cost; + int res = check_shared_set( tt ); + + if ( res >= 0 ) + { + best_tt = tt; + /* move shared variable as the most significative one */ + swap_inplace_local( best_tt, res, num_vars - 1 ); + std::swap( permutations[res], permutations[num_vars - 1] ); + return true; + } + } + return false; + } + + /* init combinations */ + uint32_t pComb[16], pInvPerm[16]; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pComb[i] = pInvPerm[i] = i; + } + + /* enumerate combinations */ + do + { + uint32_t cost = column_multiplicity( tt, free_set_size ); + if ( cost == 2 ) + { + best_tt = tt; + best_multiplicity = cost; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pInvPerm[i] = permutations[pComb[i]]; + } + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pInvPerm[i]; + } + return true; + } + else if ( cost <= 4 && free_set_size < 5 ) + { + /* look for a shared variable */ + best_multiplicity = cost; + int res = check_shared_set( tt ); + + if ( res >= 0 ) + { + best_tt = tt; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pInvPerm[i] = permutations[pComb[i]]; + } + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pInvPerm[i]; + } + /* move shared variable as the most significative one */ + swap_inplace_local( best_tt, res, num_vars - 1 ); + std::swap( permutations[res], permutations[num_vars - 1] ); + return true; + } + } + } while ( combinations_next( free_set_size, offset, pComb, pInvPerm, tt ) ); return false; } @@ -552,6 +685,26 @@ private: } } + inline void reposition_late_arriving_variables( unsigned delay_profile, uint32_t late_arriving ) + { + uint32_t k = 0; + for ( uint32_t i = 0; i < late_arriving; ++i ) + { + while ( ( ( delay_profile >> k ) & 1 ) == 0 ) + ++k; + + if ( permutations[i] == k ) + { + ++k; + continue; + } + + std::swap( permutations[i], permutations[k] ); + swap_inplace_local( best_tt, i, k ); + ++k; + } + } + template void local_extend_to( TT_type& tt, uint32_t real_num_vars ) { From 48b5f3b399a51b96bc379ea18052374e64e5fec2 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 29 Feb 2024 17:15:29 +0100 Subject: [PATCH 15/19] ACD66 performance improvements by avoiding unnecessary computation --- src/map/if/acd/acd66.hpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/map/if/acd/acd66.hpp b/src/map/if/acd/acd66.hpp index 92caf3b57..7d7a14faa 100644 --- a/src/map/if/acd/acd66.hpp +++ b/src/map/if/acd/acd66.hpp @@ -160,8 +160,10 @@ private: best_multiplicity = UINT32_MAX; best_free_set = UINT32_MAX; + uint32_t max_free_set = num_vars == 11 ? 5 : 4; + /* find ACD "66" for different number of variables in the free set */ - for ( uint32_t i = num_vars - 6; i <= 5; ++i ) + for ( uint32_t i = num_vars - 6; i <= max_free_set; ++i ) { if ( find_decomposition_bs( i ) ) return true; @@ -176,8 +178,10 @@ private: best_multiplicity = UINT32_MAX; best_free_set = UINT32_MAX; + uint32_t max_free_set = ( num_vars == 11 || offset == 5 ) ? 5 : 4; + /* find ACD "66" for different number of variables in the free set */ - for ( uint32_t i = std::max( num_vars - 6, offset ); i <= 5; ++i ) + for ( uint32_t i = std::max( num_vars - 6, offset ); i <= max_free_set; ++i ) { if ( find_decomposition_bs_offset( i, offset ) ) return true; From fa8a2777659aa8c229b451ff494b5b639d73ec14 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 29 Feb 2024 17:16:49 +0100 Subject: [PATCH 16/19] Changing search space exploration of ACD to search for better implementation and prune unnecessary computations based on theoretical properties --- src/map/if/acd/ac_decomposition.hpp | 18 +++++++++++++----- src/map/if/acd/ac_wrapper.cpp | 2 +- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/map/if/acd/ac_decomposition.hpp b/src/map/if/acd/ac_decomposition.hpp index 7cd6d9613..d573da2cd 100644 --- a/src/map/if/acd/ac_decomposition.hpp +++ b/src/map/if/acd/ac_decomposition.hpp @@ -58,7 +58,7 @@ struct ac_decomposition_params bool support_reducing_only{ true }; /*! \brief Use the first feasible decomposition found. */ - bool use_first{ true }; + bool use_first{ false }; /*! \brief If decomposition with delay profile fails, try without. */ bool try_no_late_arrival{ false }; @@ -116,6 +116,10 @@ public: { ps.max_free_set_vars = num_vars - ps.lut_size; } + if ( late_arriving > ps.max_free_set_vars ) + { + ps.max_free_set_vars = late_arriving; + } /* return a high cost if too many late arriving variables */ if ( late_arriving > ps.lut_size - 1 || late_arriving > ps.max_free_set_vars ) @@ -227,11 +231,13 @@ private: best_cost = multiplicity + additional_cost; best_free_set = i; - if ( ps.use_first ) + if ( !ps.use_first ) { - break; + continue; } } + + break; } if ( best_multiplicity == UINT32_MAX && ( !ps.try_no_late_arrival || late_arriving == 0 ) ) @@ -263,11 +269,13 @@ private: best_cost = multiplicity + additional_cost; best_free_set = i; - if ( ps.use_first ) + if ( !ps.use_first ) { - break; + continue; } } + + break; } } diff --git a/src/map/if/acd/ac_wrapper.cpp b/src/map/if/acd/ac_wrapper.cpp index 7786f0e72..7551335f8 100644 --- a/src/map/if/acd/ac_wrapper.cpp +++ b/src/map/if/acd/ac_wrapper.cpp @@ -46,7 +46,7 @@ int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, } *pdelay = acd.get_profile(); - *cost = 2; + *cost = st.num_luts; return val; } From 9bec2afd601082757c87229beb26ed320922f5ce Mon Sep 17 00:00:00 2001 From: aletempiac Date: Fri, 1 Mar 2024 10:04:48 +0100 Subject: [PATCH 17/19] Removing -z flag to execute delay-driven ACD --- src/base/abci/abc.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/base/abci/abc.c b/src/base/abci/abc.c index f79e8de77..a14a3a8bc 100644 --- a/src/base/abci/abc.c +++ b/src/base/abci/abc.c @@ -19447,7 +19447,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) If_ManSetDefaultPars( pPars ); pPars->pLutLib = (If_LibLut_t *)Abc_FrameReadLibLut(); Extra_UtilGetoptReset(); - while ( ( c = Extra_UtilGetopt( argc, argv, "KCFAGRNTXYZDEWSJqaflepmrsdbgxyzuojiktncvh" ) ) != EOF ) + while ( ( c = Extra_UtilGetopt( argc, argv, "KCFAGRNTXYZDEWSJqaflepmrsdbgxyuojiktncvh" ) ) != EOF ) { switch ( c ) { @@ -19570,6 +19570,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) goto usage; } pPars->nLutDecSize = atoi(argv[globalUtilOptind]); + pPars->fUserLutDec = 1; globalUtilOptind++; if ( pPars->nLutDecSize < 3 || pPars->nLutDecSize > 6 ) goto usage; @@ -19678,9 +19679,6 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) case 'y': pPars->fUserRecLib ^= 1; break; - case 'z': - pPars->fUserLutDec ^= 1; - break; case 'u': pPars->fUserSesLib ^= 1; break; @@ -20008,7 +20006,7 @@ usage: sprintf(LutSize, "library" ); else sprintf(LutSize, "%d", pPars->nLutSize ); - Abc_Print( -2, "usage: if [-KCFAGRNTXYZ num] [-DEW float] [-S str] [-qarlepmsdbgxyzuojiktncvh]\n" ); + Abc_Print( -2, "usage: if [-KCFAGRNTXYZ num] [-DEW float] [-S str] [-qarlepmsdbgxyuojiktncvh]\n" ); Abc_Print( -2, "\t performs FPGA technology mapping of the network\n" ); Abc_Print( -2, "\t-K num : the number of LUT inputs (2 < num < %d) [default = %s]\n", IF_MAX_LUTSIZE+1, LutSize ); Abc_Print( -2, "\t-C num : the max number of priority cuts (0 < num < 2^12) [default = %d]\n", pPars->nCutsMax ); @@ -20020,7 +20018,7 @@ usage: Abc_Print( -2, "\t-T num : the type of LUT structures [default = any]\n" ); Abc_Print( -2, "\t-X num : delay of AND-gate in LUT library units [default = %d]\n", pPars->nAndDelay ); Abc_Print( -2, "\t-Y num : area of AND-gate in LUT library units [default = %d]\n", pPars->nAndArea ); - Abc_Print( -2, "\t-Z num : the number of LUT inputs for LUT decomposition [default = %d]\n", pPars->nLutDecSize ); + Abc_Print( -2, "\t-Z num : the number of LUT inputs for delay-driven LUT decomposition [default = not used]\n" ); Abc_Print( -2, "\t-D float : sets the delay constraint for the mapping [default = %s]\n", Buffer ); Abc_Print( -2, "\t-E float : sets epsilon used for tie-breaking [default = %f]\n", pPars->Epsilon ); Abc_Print( -2, "\t-W float : sets wire delay between adjects LUTs [default = %f]\n", pPars->WireDelay ); @@ -20039,7 +20037,6 @@ usage: Abc_Print( -2, "\t-g : toggles delay optimization by SOP balancing [default = %s]\n", pPars->fDelayOpt? "yes": "no" ); Abc_Print( -2, "\t-x : toggles delay optimization by DSD balancing [default = %s]\n", pPars->fDsdBalance? "yes": "no" ); Abc_Print( -2, "\t-y : toggles delay optimization with recorded library [default = %s]\n", pPars->fUserRecLib? "yes": "no" ); - Abc_Print( -2, "\t-z : toggles delay optimization with LUT decomposition [default = %s]\n", pPars->fUserLutDec? "yes": "no" ); Abc_Print( -2, "\t-u : toggles delay optimization with SAT-based library [default = %s]\n", pPars->fUserSesLib? "yes": "no" ); Abc_Print( -2, "\t-o : toggles using buffers to decouple combinational outputs [default = %s]\n", pPars->fUseBuffs? "yes": "no" ); Abc_Print( -2, "\t-j : toggles enabling additional check [default = %s]\n", pPars->fEnableCheck07? "yes": "no" ); From cd407e2ba3bc0455501e4586a21a1acba1f0a3d6 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Fri, 1 Mar 2024 10:05:30 +0100 Subject: [PATCH 18/19] Activate use_first flag in acd_decompose --- src/map/if/acd/ac_wrapper.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/map/if/acd/ac_wrapper.cpp b/src/map/if/acd/ac_wrapper.cpp index 7551335f8..e47847f10 100644 --- a/src/map/if/acd/ac_wrapper.cpp +++ b/src/map/if/acd/ac_wrapper.cpp @@ -33,7 +33,8 @@ int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, { ac_decomposition_params ps; ps.lut_size = lutSize; - ps.try_no_late_arrival = static_cast( try_no_late_arrival ); /* TODO: additional tests */ + ps.use_first = false; + ps.try_no_late_arrival = static_cast( try_no_late_arrival ); ac_decomposition_stats st; ac_decomposition_impl acd( nVars, ps, &st ); @@ -76,6 +77,7 @@ int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, { ac_decomposition_params ps; ps.lut_size = lutSize; + ps.use_first = true; ac_decomposition_stats st; ac_decomposition_impl acd( nVars, ps, &st ); From 3737a69d8d64cdbfafbfc0727bd291343d3ec25f Mon Sep 17 00:00:00 2001 From: aletempiac Date: Mon, 18 Mar 2024 10:01:59 +0100 Subject: [PATCH 19/19] Adding new ACD66 with support for multiple shared-set variables --- src/map/if/acd/ac_wrapper.cpp | 4 +- src/map/if/acd/acd66.hpp | 488 +++++++++++++++++++++++++++++++--- 2 files changed, 457 insertions(+), 35 deletions(-) diff --git a/src/map/if/acd/ac_wrapper.cpp b/src/map/if/acd/ac_wrapper.cpp index e47847f10..6ee265318 100644 --- a/src/map/if/acd/ac_wrapper.cpp +++ b/src/map/if/acd/ac_wrapper.cpp @@ -117,7 +117,7 @@ int acd66_evaluate( word * pTruth, unsigned nVars, int compute_decomposition ) { using namespace acd; - acd66_impl acd( nVars, false ); + acd66_impl acd( nVars, true, false ); if ( acd.run( pTruth ) == 0 ) return 0; @@ -138,7 +138,7 @@ int acd66_decompose( word * pTruth, unsigned nVars, unsigned char *decomposition { using namespace acd; - acd66_impl acd( nVars, false ); + acd66_impl acd( nVars, true, false ); acd.run( pTruth ); int val = acd.compute_decomposition(); diff --git a/src/map/if/acd/acd66.hpp b/src/map/if/acd/acd66.hpp index 7d7a14faa..5891f7b90 100644 --- a/src/map/if/acd/acd66.hpp +++ b/src/map/if/acd/acd66.hpp @@ -52,14 +52,14 @@ private: using LTT = kitty::static_truth_table<6>; public: - explicit acd66_impl( uint32_t const num_vars, bool const verify = false ) - : num_vars( num_vars ), verify( verify ) + explicit acd66_impl( uint32_t const num_vars, bool multiple_shared_set = false, bool const verify = false ) + : num_vars( num_vars ), multiple_ss( multiple_shared_set ), verify( verify ) { std::iota( permutations.begin(), permutations.end(), 0 ); } /*! \brief Runs ACD 66 */ - int run( word* ptt ) + bool run( word* ptt ) { assert( num_vars > 6 ); @@ -73,7 +73,7 @@ public: init_truth_table( ptt ); /* run ACD trying different bound sets and free sets */ - return find_decomposition() ? 1 : 0; + return find_decomposition(); } /*! \brief Runs ACD 66 */ @@ -123,11 +123,11 @@ public: { if ( bs_support_size == UINT32_MAX ) { - return num_vars + 1 + ( best_multiplicity > 2 ? 1 : 0 ); + return num_vars + 1 + num_shared_vars; } /* real value after support minimization */ - return bs_support_size + best_free_set + 1 + ( best_multiplicity > 2 ? 1 : 0 ); + return bs_support_size + best_free_set + 1 + num_shared_vars; } /* contains a 1 for FS variables */ @@ -160,6 +160,12 @@ private: best_multiplicity = UINT32_MAX; best_free_set = UINT32_MAX; + /* use multiple shared set variables */ + if ( multiple_ss ) + { + return find_decomposition_bs_multi_ss( num_vars - 6 ); + } + uint32_t max_free_set = num_vars == 11 ? 5 : 4; /* find ACD "66" for different number of variables in the free set */ @@ -178,6 +184,12 @@ private: best_multiplicity = UINT32_MAX; best_free_set = UINT32_MAX; + /* use multiple shared set variables */ + if ( multiple_ss ) + { + return find_decomposition_bs_offset_multi_ss( std::max( num_vars - 6, offset ), offset ); + } + uint32_t max_free_set = ( num_vars == 11 || offset == 5 ) ? 5 : 4; /* find ACD "66" for different number of variables in the free set */ @@ -238,6 +250,40 @@ private: return size; } + uint32_t column_multiplicity2( STT const& tt, uint32_t free_set_size, uint32_t const limit ) + { + assert( free_set_size <= 5 ); + + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << free_set_size; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t cofactors[32]; + uint32_t size = 0; + + /* extract iset functions */ + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t sub = tt._bits[i]; + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + { + uint32_t fs_fn = static_cast( sub & mask ); + uint32_t k; + for ( k = 0; k < size; ++k ) + { + if ( fs_fn == cofactors[k] ) + break; + } + if ( k == limit ) + return limit + 1; + if ( k == size ) + cofactors[size++] = fs_fn; + sub >>= shift; + } + } + + return size; + } + inline bool combinations_next( uint32_t k, uint32_t offset, uint32_t* pComb, uint32_t* pInvPerm, STT& tt ) { uint32_t i; @@ -267,6 +313,33 @@ private: return true; } + inline bool combinations_next_simple( uint32_t k, uint32_t* pComb, uint32_t* pInvPerm, uint32_t size ) + { + uint32_t i; + + for ( i = k - 1; pComb[i] == size - k + i; --i ) + { + if ( i == 0 ) + return false; + } + + /* move vars */ + uint32_t var_old = pComb[i]; + uint32_t pos_new = pInvPerm[var_old + 1]; + std::swap( pInvPerm[var_old + 1], pInvPerm[var_old] ); + std::swap( pComb[i], pComb[pos_new] ); + + for ( uint32_t j = i + 1; j < k; j++ ) + { + var_old = pComb[j]; + pos_new = pInvPerm[pComb[j - 1] + 1]; + std::swap( pInvPerm[pComb[j - 1] + 1], pInvPerm[var_old] ); + std::swap( pComb[j], pComb[pos_new] ); + } + + return true; + } + bool find_decomposition_bs( uint32_t free_set_size ) { STT tt = start_tt; @@ -312,6 +385,7 @@ private: /* move shared variable as the most significative one */ swap_inplace_local( best_tt, res, num_vars - 1 ); std::swap( permutations[res], permutations[num_vars - 1] ); + num_shared_vars = 1; return true; } } @@ -350,6 +424,7 @@ private: /* move shared variable as the most significative one */ swap_inplace_local( best_tt, res, num_vars - 1 ); std::swap( permutations[res], permutations[num_vars - 1] ); + num_shared_vars = 1; return true; } } @@ -401,6 +476,179 @@ private: /* move shared variable as the most significative one */ swap_inplace_local( best_tt, res, num_vars - 1 ); std::swap( permutations[res], permutations[num_vars - 1] ); + num_shared_vars = 1; + return true; + } + } + } while ( combinations_next( free_set_size, offset, pComb, pInvPerm, tt ) ); + + return false; + } + + bool find_decomposition_bs_multi_ss( uint32_t free_set_size ) + { + STT tt = start_tt; + + /* works up to 16 input truth tables */ + assert( num_vars <= 16 ); + + /* init combinations */ + uint32_t pComb[16], pInvPerm[16], shared_set[4]; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pComb[i] = pInvPerm[i] = i; + } + + /* enumerate combinations */ + best_free_set = free_set_size; + do + { + uint32_t cost = column_multiplicity2( tt, free_set_size, 1 << ( 6 - free_set_size ) ); + if ( cost <= 2 ) + { + best_tt = tt; + best_multiplicity = cost; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pComb[i]; + } + return true; + } + + uint32_t ss_vars_needed = cost <= 4 ? 1 : cost <= 8 ? 2 + : cost <= 16 ? 3 + : cost <= 32 ? 4 + : 5; + if ( ss_vars_needed + free_set_size < 6 ) + { + /* look for a shared variable */ + best_multiplicity = cost; + int res = check_shared_set_multi( tt, ss_vars_needed, shared_set ); + + if ( res >= 0 ) + { + best_tt = tt; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pComb[i]; + } + /* move shared variables as the most significative ones */ + for ( int32_t i = res - 1; i >= 0; --i ) + { + swap_inplace_local( best_tt, shared_set[i] + best_free_set, num_vars - res + i ); + std::swap( permutations[shared_set[i] + best_free_set], permutations[num_vars - res + i] ); + } + num_shared_vars = res; + return true; + } + } + } while ( combinations_next( free_set_size, 0, pComb, pInvPerm, tt ) ); + + return false; + } + + bool find_decomposition_bs_offset_multi_ss( uint32_t free_set_size, uint32_t offset ) + { + STT tt = best_tt; + + /* works up to 16 input truth tables */ + assert( num_vars <= 16 ); + best_free_set = free_set_size; + uint32_t shared_set[4]; + + /* special case */ + if ( free_set_size == offset ) + { + uint32_t cost = column_multiplicity2( tt, free_set_size, 1 << ( 6 - free_set_size ) ); + if ( cost == 2 ) + { + best_tt = tt; + best_multiplicity = cost; + return true; + } + + uint32_t ss_vars_needed = cost <= 4 ? 1 : cost <= 8 ? 2 + : cost <= 16 ? 3 + : cost <= 32 ? 4 + : 5; + + if ( ss_vars_needed + free_set_size < 6 ) + { + /* look for a shared variable */ + best_multiplicity = cost; + int res = check_shared_set_multi( tt, ss_vars_needed, shared_set ); + + if ( res >= 0 ) + { + best_tt = tt; + /* move shared variables as the most significative ones */ + for ( int32_t i = res - 1; i >= 0; --i ) + { + swap_inplace_local( best_tt, shared_set[i] + best_free_set, num_vars - res + i ); + std::swap( permutations[shared_set[i] + best_free_set], permutations[num_vars - res + i] ); + } + num_shared_vars = res; + return true; + } + } + return false; + } + + /* init combinations */ + uint32_t pComb[16], pInvPerm[16]; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pComb[i] = pInvPerm[i] = i; + } + + /* enumerate combinations */ + do + { + uint32_t cost = column_multiplicity2( tt, free_set_size, 1 << ( 6 - free_set_size ) ); + if ( cost == 2 ) + { + best_tt = tt; + best_multiplicity = cost; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pInvPerm[i] = permutations[pComb[i]]; + } + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pInvPerm[i]; + } + return true; + } + + uint32_t ss_vars_needed = cost <= 4 ? 1 : cost <= 8 ? 2 + : cost <= 16 ? 3 + : cost <= 32 ? 4 + : 5; + + if ( ss_vars_needed + free_set_size < 6 ) + { + /* look for a shared variable */ + best_multiplicity = cost; + int res = check_shared_set_multi( tt, ss_vars_needed, shared_set ); + + if ( res >= 0 ) + { + best_tt = tt; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pInvPerm[i] = permutations[pComb[i]]; + } + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pInvPerm[i]; + } + /* move shared variables as the most significative ones */ + for ( int32_t i = res - 1; i >= 0; --i ) + { + swap_inplace_local( best_tt, shared_set[i] + best_free_set, num_vars - res + i ); + std::swap( permutations[shared_set[i] + best_free_set], permutations[num_vars - res + i] ); + } + num_shared_vars = res; return true; } } @@ -462,9 +710,87 @@ private: return -1; } + bool check_shared_var_combined( STT const& tt, uint32_t free_set_size, uint32_t shared_vars[6], uint32_t num_shared_vars ) + { + assert( free_set_size <= 5 ); + assert( num_shared_vars <= 4 ); + + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << free_set_size; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t cofactors[16][2]; + uint32_t size[16] = { 0 }; + + /* extract iset functions */ + uint32_t iteration_counter = 0; + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t sub = tt._bits[i]; + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + { + uint32_t fs_fn = static_cast( sub & mask ); + uint32_t p = 0; + for ( uint32_t k = 0; k < num_shared_vars; ++k ) + { + p += ( ( iteration_counter >> shared_vars[k] ) & 1 ) << k; + } + + uint32_t k; + for ( k = 0; k < size[p]; ++k ) + { + if ( fs_fn == cofactors[p][k] ) + break; + } + if ( k == 2 ) + return false; + if ( k == size[p] ) + cofactors[p][size[p]++] = fs_fn; + sub >>= shift; + ++iteration_counter; + } + } + + return true; + } + + inline int check_shared_set_multi( STT const& tt, uint32_t target_num_ss, uint32_t* res_shared ) + { + /* init combinations */ + uint32_t pComb[6], pInvPerm[6]; + + /* search for a feasible shared set */ + for ( uint32_t i = target_num_ss; i < 6 - best_free_set; ++i ) + { + for ( uint32_t i = 0; i < 6; ++i ) + { + pComb[i] = pInvPerm[i] = i; + } + + do + { + /* check for combined shared set */ + if ( check_shared_var_combined( tt, best_free_set, pComb, i ) ) + { + for ( uint32_t j = 0; j < i; ++j ) + { + res_shared[j] = pComb[j]; + } + /* sort vars */ + std::sort( res_shared, res_shared + i ); + return i; + } + } while ( combinations_next_simple( i, pComb, pInvPerm, num_vars - best_free_set ) ); + } + + return -1; + } + void compute_decomposition_impl( bool verbose = false ) { - bool has_shared_set = best_multiplicity > 2; + if ( num_shared_vars > 1 ) + return compute_decomposition_impl_multi_ss( verbose ); + + bool has_shared_set = num_shared_vars > 0; /* construct isets involved in multiplicity */ LTT isets0[2]; @@ -545,6 +871,98 @@ private: } } + void compute_decomposition_impl_multi_ss( bool verbose = false ) + { + /* due to the high multiplicity value this method does not perform support minimization */ + + /* construct isets involved in multiplicity */ + LTT composition; + LTT bs; + + /* construct isets */ + uint32_t offset = 0; + uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << best_free_set; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t const num_groups = 1 << num_shared_vars; + uint32_t const next_group = 1 << ( num_vars - best_free_set - num_shared_vars ); + + uint64_t fs_fun[32] = { 0 }; + + uint32_t group_index = 0; + uint32_t set_index = 0; + fs_fun[0] = best_tt._bits[0] & mask; + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t cof = best_tt._bits[i]; + for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) + { + uint64_t val = cof & mask; + /* move to next block */ + if ( set_index == next_group ) + { + group_index += 2; + set_index = 0; + fs_fun[group_index] = val; + } + /* gather encoding */ + if ( val != fs_fun[group_index] ) + { + bs._bits |= UINT64_C( 1 ) << ( j + offset ); + fs_fun[group_index + 1] = val; + } + cof >>= shift; + ++set_index; + } + offset = ( offset + ( 64 >> best_free_set ) ) & 0x3F; + } + + /* create composition function */ + for ( uint32_t i = 0; i < 2 * num_groups; ++i ) + { + composition._bits |= fs_fun[i] << ( i * shift ); + } + + /* minimize support BS */ + LTT care; + bs_support_size = 0; + uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF, UINT64_MAX }; + care._bits = masks[num_vars - best_free_set]; + for ( uint32_t i = 0; i < num_vars - best_free_set; ++i ) + { + if ( !has_var6( bs, care, i ) ) + { + continue; + } + + if ( bs_support_size < i ) + { + kitty::swap_inplace( bs, bs_support_size, i ); + } + + bs_support[bs_support_size] = i; + ++bs_support_size; + } + + /* assign functions */ + dec_funcs[0] = bs._bits; + dec_funcs[1] = composition._bits; + + /* print functions */ + if ( verbose ) + { + LTT f; + f._bits = dec_funcs[0]; + std::cout << "BS function : "; + kitty::print_hex( f ); + std::cout << "\n"; + f._bits = dec_funcs[1]; + std::cout << "Composition function: "; + kitty::print_hex( f ); + std::cout << "\n"; + } + } + inline void compute_functions( LTT isets0[2], LTT isets1[2], uint64_t fs_fun[4] ) { /* u = 2 no support minimization */ @@ -689,26 +1107,6 @@ private: } } - inline void reposition_late_arriving_variables( unsigned delay_profile, uint32_t late_arriving ) - { - uint32_t k = 0; - for ( uint32_t i = 0; i < late_arriving; ++i ) - { - while ( ( ( delay_profile >> k ) & 1 ) == 0 ) - ++k; - - if ( permutations[i] == k ) - { - ++k; - continue; - } - - std::swap( permutations[i], permutations[k] ); - swap_inplace_local( best_tt, i, k ); - ++k; - } - } - template void local_extend_to( TT_type& tt, uint32_t real_num_vars ) { @@ -734,6 +1132,26 @@ private: } } + inline void reposition_late_arriving_variables( unsigned delay_profile, uint32_t late_arriving ) + { + uint32_t k = 0; + for ( uint32_t i = 0; i < late_arriving; ++i ) + { + while ( ( ( delay_profile >> k ) & 1 ) == 0 ) + ++k; + + if ( permutations[i] == k ) + { + ++k; + continue; + } + + std::swap( permutations[i], permutations[k] ); + swap_inplace_local( best_tt, i, k ); + ++k; + } + } + void swap_inplace_local( STT& tt, uint8_t var_index1, uint8_t var_index2 ) { if ( var_index1 == var_index2 ) @@ -860,13 +1278,13 @@ private: /* write top LUT */ /* write fanin size */ - uint32_t support_size = best_free_set + 1 + ( best_multiplicity > 2 ? 1 : 0 ); + uint32_t support_size = best_free_set + 1 + num_shared_vars; *pArray = support_size; pArray++; ++bytes; /* write support */ - for ( uint32_t i = 0; i < best_free_set; ++i ) + for ( uint32_t i = best_free_set; i < best_free_set; ++i ) { *pArray = (unsigned char)permutations[i]; pArray++; @@ -877,9 +1295,9 @@ private: pArray++; ++bytes; - if ( best_multiplicity > 2 ) + for ( uint32_t i = 0; i < num_shared_vars; ++i ) { - *pArray = (unsigned char)permutations[num_vars - 1]; + *pArray = (unsigned char)permutations[num_vars - num_shared_vars + i]; pArray++; ++bytes; } @@ -938,9 +1356,11 @@ private: pattern |= get_bit( pis[j], i ) << j; } pattern |= get_bit( bsf_sim, i ) << best_free_set; - if ( best_multiplicity > 2 ) + + /* shared variables */ + for ( auto j = 0u; j < num_shared_vars; ++j ) { - pattern |= get_bit( pis[num_vars - 1], i ) << ( best_free_set + 1 ); + pattern |= get_bit( pis[num_vars - num_shared_vars + j], i ) << ( best_free_set + 1 + j ); } if ( ( dec_funcs[1] >> pattern ) & 1 ) @@ -991,12 +1411,14 @@ private: uint32_t best_multiplicity0{ UINT32_MAX }; uint32_t best_multiplicity1{ UINT32_MAX }; uint32_t bs_support_size{ UINT32_MAX }; + uint32_t num_shared_vars{ 0 }; STT best_tt; STT start_tt; uint64_t dec_funcs[2]; uint32_t bs_support[6]; uint32_t const num_vars; + bool const multiple_ss; bool const verify; std::array permutations; };