diff --git a/Makefile b/Makefile index 3976cf7b1..0cc979b75 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ MODULES := \ $(wildcard src/ext*) \ src/base/abc src/base/abci src/base/cmd src/base/io src/base/main src/base/exor \ src/base/ver src/base/wlc src/base/wln src/base/acb src/base/bac src/base/cba src/base/pla src/base/test \ - src/map/mapper src/map/mio src/map/super src/map/if \ + src/map/mapper src/map/mio src/map/super src/map/if src/map/if/acd \ src/map/amap src/map/cov src/map/scl src/map/mpm \ src/misc/extra src/misc/mvc src/misc/st src/misc/util src/misc/nm \ src/misc/vec src/misc/hash src/misc/tim src/misc/bzlib src/misc/zlib \ @@ -151,7 +151,7 @@ ifdef ABC_USE_LIBSTDCXX endif $(info $(MSG_PREFIX)Using CFLAGS=$(CFLAGS)) -CXXFLAGS += $(CFLAGS) +CXXFLAGS += $(CFLAGS) -std=c++17 SRC := GARBAGE := core core.* *.stackdump ./tags $(PROG) arch_flags diff --git a/src/base/abci/abc.c b/src/base/abci/abc.c index c8e2b1ef8..89785887d 100644 --- a/src/base/abci/abc.c +++ b/src/base/abci/abc.c @@ -19447,7 +19447,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) If_ManSetDefaultPars( pPars ); pPars->pLutLib = (If_LibLut_t *)Abc_FrameReadLibLut(); Extra_UtilGetoptReset(); - while ( ( c = Extra_UtilGetopt( argc, argv, "KCFAGRNTXYDEWSqaflepmrsdbgxyzuojiktncvh" ) ) != EOF ) + while ( ( c = Extra_UtilGetopt( argc, argv, "KCFAGRNTXYZDEWSqaflepmrsdbgxyzuojiktncvh" ) ) != EOF ) { switch ( c ) { @@ -19563,6 +19563,17 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) if ( pPars->nAndDelay < 0 ) goto usage; break; + case 'Z': + if ( globalUtilOptind >= argc ) + { + Abc_Print( -1, "Command line switch \"-Z\" should be followed by a positive integer 3, 4, 5, or 6.\n" ); + goto usage; + } + pPars->nLutDecSize = atoi(argv[globalUtilOptind]); + globalUtilOptind++; + if ( pPars->nLutDecSize < 3 || pPars->nLutDecSize > 6 ) + goto usage; + break; case 'D': if ( globalUtilOptind >= argc ) { @@ -19654,7 +19665,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) break; case 'z': pPars->fUserLutDec ^= 1; - break; + break; case 'u': pPars->fUserSesLib ^= 1; break; @@ -19794,6 +19805,25 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) pPars->fCutMin = 1; } + if ( pPars->fUserLutDec ) + { + if ( pPars->nLutDecSize == 0 ) + { + Abc_Print( -1, "LUT decomposition size (%d) must be set.\n", pPars->nLutDecSize ); + return 1; + } + if ( pPars->nLutDecSize >= pPars->nLutSize ) + { + Abc_Print( -1, "LUT size (%d) must be greater than the LUT decomposition size (%d).\n", pPars->nLutSize, pPars->nLutDecSize ); + return 1; + } + if ( pPars->nLutSize < 4 || pPars->nLutSize > 10 ) + { + Abc_Print( -1, "This feature only works for [4;10]-LUTs.\n" ); + return 1; + } + } + // enable truth table computation if cut minimization is selected if ( pPars->fCutMin ) { @@ -19956,7 +19986,7 @@ usage: sprintf(LutSize, "library" ); else sprintf(LutSize, "%d", pPars->nLutSize ); - Abc_Print( -2, "usage: if [-KCFAGRNTXY num] [-DEW float] [-S str] [-qarlepmsdbgxyzuojiktncvh]\n" ); + Abc_Print( -2, "usage: if [-KCFAGRNTXYZ num] [-DEW float] [-S str] [-qarlepmsdbgxyzuojiktncvh]\n" ); Abc_Print( -2, "\t performs FPGA technology mapping of the network\n" ); Abc_Print( -2, "\t-K num : the number of LUT inputs (2 < num < %d) [default = %s]\n", IF_MAX_LUTSIZE+1, LutSize ); Abc_Print( -2, "\t-C num : the max number of priority cuts (0 < num < 2^12) [default = %d]\n", pPars->nCutsMax ); @@ -19968,6 +19998,7 @@ usage: Abc_Print( -2, "\t-T num : the type of LUT structures [default = any]\n" ); Abc_Print( -2, "\t-X num : delay of AND-gate in LUT library units [default = %d]\n", pPars->nAndDelay ); Abc_Print( -2, "\t-Y num : area of AND-gate in LUT library units [default = %d]\n", pPars->nAndArea ); + Abc_Print( -2, "\t-Z num : the number of LUT inputs for LUT decomposition [default = %d]\n", pPars->nLutDecSize ); Abc_Print( -2, "\t-D float : sets the delay constraint for the mapping [default = %s]\n", Buffer ); Abc_Print( -2, "\t-E float : sets epsilon used for tie-breaking [default = %f]\n", pPars->Epsilon ); Abc_Print( -2, "\t-W float : sets wire delay between adjects LUTs [default = %f]\n", pPars->WireDelay ); diff --git a/src/base/abci/abcIf.c b/src/base/abci/abcIf.c index e92a2282e..357d7d83f 100644 --- a/src/base/abci/abcIf.c +++ b/src/base/abci/abcIf.c @@ -427,28 +427,117 @@ Hop_Obj_t * Abc_NodeBuildFromMini( Hop_Man_t * pMan, If_Man_t * p, If_Cut_t * pC } /**Function************************************************************* - - Synopsis [Implements decomposed LUT-structure of the cut.] - - Description [] - - SideEffects [] - - SeeAlso [] - -***********************************************************************/ -Hop_Obj_t * Abc_DecRecordToHop( Hop_Man_t * pMan, If_Man_t * pIfMan, If_Cut_t * pCutBest, If_Obj_t * pIfObj, Vec_Int_t * vCover ) -{ - // get the truth table - // perform LUT-decomposition and return the LUT-structure - // convert the LUT-structure into a set of logic nodes in Abc_Ntk_t - - // this is a placeholder, which takes the truth table and converts it into an AIG without LUT-decomposition + Synopsis [Implements decomposed LUT-structure of the cut.] + Description [] + + SideEffects [] + SeeAlso [] + ***********************************************************************/ + void Abc_DecRecordToHop( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Cut_t * pCutBest, If_Obj_t * pIfObj, Vec_Int_t * vCover, Abc_Obj_t * pNodeTop ) + { extern Hop_Obj_t * Kit_TruthToHop( Hop_Man_t * pMan, unsigned * pTruth, int nVars, Vec_Int_t * vMemory ); - word * pTruth = If_CutTruthW(pIfMan, pCutBest); assert( !pIfMan->pPars->fUseTtPerm ); - return Kit_TruthToHop( (Hop_Man_t *)pMan, (unsigned *)pTruth, If_CutLeaveNum(pCutBest), vCover ); -} + + // get the truth table + word * pTruth = If_CutTruthW(pIfMan, pCutBest); + int v; + If_Obj_t * pIfLeaf; + + if ( pCutBest->nLeaves <= pIfMan->pPars->nLutDecSize ) + { + /* add fanins */ + If_CutForEachLeaf( pIfMan, pCutBest, pIfLeaf, v ) + Abc_ObjAddFanin( pNodeTop, (Abc_Obj_t *)If_ObjCopy( pIfLeaf ) ); + + pNodeTop->Level = Abc_ObjLevelNew( pNodeTop ); + + pNodeTop->pData = Kit_TruthToHop( (Hop_Man_t *)pNtkNew->pManFunc, (unsigned *)pTruth, If_CutLeaveNum(pCutBest), vCover ); + return; + } + + // get the delay profile + unsigned delayProfile = pCutBest->decDelay; + + // perform LUT-decomposition and return the LUT-structure + unsigned char decompArray[92]; + int val = acd_decompose( pTruth, pCutBest->nLeaves, pIfMan->pPars->nLutDecSize, &(delayProfile), decompArray ); + assert( val == 0 ); + + // convert the LUT-structure into a set of logic nodes in Abc_Ntk_t + unsigned char bytes_check = decompArray[0]; + assert( bytes_check <= 92 ); + + int byte_p = 2; + unsigned char i, j, k, num_fanins, num_words, num_bytes; + int level, fanin; + word *tt; + Abc_Obj_t *pNewNodes[5]; + + /* create intermediate LUTs*/ + assert( decompArray[1] <= 6 ); + Abc_Obj_t * pFanin; + for ( i = 0; i < decompArray[1]; ++i ) + { + if ( i < decompArray[1] - 1 ) + { + pNewNodes[i] = Abc_NtkCreateNode( pNtkNew ); + } + else + { + pNewNodes[i] = pNodeTop; + } + num_fanins = decompArray[byte_p++]; + level = 0; + for ( j = 0; j < num_fanins; ++j ) + { + fanin = (int)decompArray[byte_p++]; + if ( fanin < If_CutLeaveNum(pCutBest) ) + { + pFanin = (Abc_Obj_t *)If_ObjCopy( If_CutLeaf(pIfMan, pCutBest, fanin) ); + } + else + { + assert( fanin - If_CutLeaveNum(pCutBest) < i ); + pFanin = pNewNodes[fanin - If_CutLeaveNum(pCutBest)]; + } + Abc_ObjAddFanin( pNewNodes[i], pFanin ); + level = Abc_MaxInt( level, Abc_ObjLevel(pFanin) ); + } + + pNewNodes[i]->Level = level + (int)(Abc_ObjFaninNum(pNewNodes[i]) > 0); + + /* extract the truth table */ + tt = pIfMan->puTempW; + num_words = ( num_fanins <= 6 ) ? 1 : ( 1 << ( num_fanins - 6 ) ); + num_bytes = ( num_fanins <= 3 ) ? 1 : ( 1 << ( Abc_MinInt( (int)num_fanins, 6 ) - 3 ) ); + for ( j = 0; j < num_words; ++j ) + { + tt[j] = 0; + for ( k = 0; k < num_bytes; ++k ) + { + tt[j] |= ( (word)(decompArray[byte_p++]) ) << ( k << 3 ); + } + } + + /* extend truth table if size < 5 */ + assert( num_fanins != 1 ); + if ( num_fanins == 2 ) + { + tt[0] |= tt[0] << 4; + } + while ( num_bytes < 4 ) + { + tt[0] |= tt[0] << ( num_bytes << 3 ); + num_bytes <<= 1; + } + + /* add node data */ + pNewNodes[i]->pData = Kit_TruthToHop( (Hop_Man_t *)pNtkNew->pManFunc, (unsigned *)tt, (int) num_fanins, vCover ); + } + + /* check correct read */ + assert( byte_p == decompArray[0] ); + } /**Function************************************************************* @@ -488,13 +577,18 @@ Abc_Obj_t * Abc_NodeFromIf_rec( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Obj_t pNodeNew = Abc_NtkCreateNode( pNtkNew ); // if ( pIfMan->pPars->pLutLib && pIfMan->pPars->pLutLib->fVarPinDelays ) if ( !pIfMan->pPars->fDelayOpt && !pIfMan->pPars->fDelayOptLut && !pIfMan->pPars->fDsdBalance && !pIfMan->pPars->fUseTtPerm && - !pIfMan->pPars->pLutStruct && !pIfMan->pPars->fUserRecLib && !pIfMan->pPars->fUserSesLib && !pIfMan->pPars->fUserLutDec && !pIfMan->pPars->nGateSize ) + !pIfMan->pPars->pLutStruct && !pIfMan->pPars->fUserLutDec && !pIfMan->pPars->fUserRecLib && !pIfMan->pPars->fUserSesLib && !pIfMan->pPars->nGateSize ) If_CutRotatePins( pIfMan, pCutBest ); if ( pIfMan->pPars->fUseCnfs || pIfMan->pPars->fUseMv ) { If_CutForEachLeafReverse( pIfMan, pCutBest, pIfLeaf, i ) Abc_ObjAddFanin( pNodeNew, Abc_NodeFromIf_rec(pNtkNew, pIfMan, pIfLeaf, vCover) ); } + else if ( pIfMan->pPars->fUserLutDec ) + { + If_CutForEachLeaf( pIfMan, pCutBest, pIfLeaf, i ) + Abc_NodeFromIf_rec(pNtkNew, pIfMan, pIfLeaf, vCover); + } else { If_CutForEachLeaf( pIfMan, pCutBest, pIfLeaf, i ) @@ -550,8 +644,8 @@ Abc_Obj_t * Abc_NodeFromIf_rec( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Obj_t } else if ( pIfMan->pPars->fUserLutDec ) { - extern Hop_Obj_t * Abc_DecRecordToHop( Hop_Man_t * pMan, If_Man_t * pIfMan, If_Cut_t * pCut, If_Obj_t * pIfObj, Vec_Int_t * vMemory ); - pNodeNew->pData = Abc_DecRecordToHop( (Hop_Man_t *)pNtkNew->pManFunc, pIfMan, pCutBest, pIfObj, vCover ); + extern void Abc_DecRecordToHop( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Cut_t * pCut, If_Obj_t * pIfObj, Vec_Int_t * vMemory, Abc_Obj_t * pNodeTop ); + Abc_DecRecordToHop( pNtkNew, pIfMan, pCutBest, pIfObj, vCover, pNodeNew ); } else { diff --git a/src/map/if/acd/ac_decomposition.hpp b/src/map/if/acd/ac_decomposition.hpp new file mode 100644 index 000000000..8ba2fb20e --- /dev/null +++ b/src/map/if/acd/ac_decomposition.hpp @@ -0,0 +1,1306 @@ +/**C++File************************************************************** + + FileName [ac_decomposition.hpp] + + SystemName [ABC: Logic synthesis and verification system.] + + PackageName [Ashenhurst-Curtis decomposition.] + + Synopsis [Interface with the FPGA mapping package.] + + Author [Alessandro Tempia Calvino] + + Affiliation [EPFL] + + Date [Ver. 1.0. Started - November 20, 2023.] + +***********************************************************************/ +/*! + \file ac_decomposition.hpp + \brief Ashenhurst-Curtis decomposition + + \author Alessandro Tempia Calvino +*/ + +#ifndef _ACD_H_ +#define _ACD_H_ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "kitty_constants.hpp" +#include "kitty_constructors.hpp" +#include "kitty_dynamic_tt.hpp" +#include "kitty_operations.hpp" +#include "kitty_operators.hpp" +#include "kitty_static_tt.hpp" + +namespace acd +{ + +/*! \brief Parameters for ac_decomposition */ +struct ac_decomposition_params +{ + /*! \brief LUT size for decomposition (3 < num < 7). */ + uint32_t lut_size{ 6 }; + + /*! \brief Maximum size of the free set (1 < num < 6). */ + uint32_t max_free_set_vars{ 4 }; + + /*! \brief Perform only support reducing (2-level) decompositions. */ + bool support_reducing_only{ true }; + + /*! \brief Use the first feasible decomposition found. */ + bool use_first{ true }; + + /*! \brief If decomposition with delay profile fails, try without. */ + bool try_no_late_arrival{ false }; +}; + +/*! \brief Statistics for ac_decomposition */ +struct ac_decomposition_stats +{ + uint32_t num_luts{ 0 }; + uint32_t num_edges{ 0 }; + uint32_t num_levels{ 0 }; +}; + +struct ac_decomposition_result +{ + kitty::dynamic_truth_table tt; + std::vector support; +}; + +class ac_decomposition_impl +{ +private: + struct encoding_column + { + uint64_t column[2]; + uint32_t cost; + uint32_t index; + float sort_cost; + }; + +private: + static constexpr uint32_t max_num_vars = 10; + using STT = kitty::static_truth_table; + +public: + explicit ac_decomposition_impl( uint32_t num_vars, ac_decomposition_params const& ps, ac_decomposition_stats* pst = nullptr ) + : num_vars( num_vars ), ps( ps ), pst( pst ) + { + std::iota( permutations.begin(), permutations.end(), 0 ); + } + + /*! \brief Runs ACD using late arriving variables */ + int run( word* ptt, unsigned delay_profile ) + { + /* truth table is too large for the settings */ + if ( num_vars > max_num_vars ) + { + return -1; + } + + uint32_t late_arriving = __builtin_popcount( delay_profile ); + + /* return a high cost if too many late arriving variables */ + if ( late_arriving > ps.lut_size - 1 || late_arriving > ps.max_free_set_vars ) + { + return -1; + } + + /* convert to static TT */ + init_truth_table( ptt ); + + /* permute late arriving variables to be the least significant */ + reposition_late_arriving_variables( delay_profile, late_arriving ); + + /* run ACD trying different bound sets and free sets */ + if ( !find_decomposition( delay_profile, late_arriving ) ) + { + return -1; + } + + /* return number of levels */ + return delay_profile == 0 ? 2 : 1; + } + + int compute_decomposition() + { + if ( best_multiplicity == UINT32_MAX ) + return -1; + + /* compute isets */ + std::vector isets = compute_isets(); + + generate_support_minimization_encodings(); + + /* solves exactly only for small multiplicities */ + if ( best_multiplicity <= 4u ) + solve_min_support_exact( isets ); + else + solve_min_support_heuristic( isets ); + + /* unfeasible decomposition */ + assert( !best_bound_sets.empty() ); + + return 0; + } + + unsigned get_profile() + { + unsigned profile = 0; + + if ( best_free_set > num_vars ) + return -1; + + for ( uint32_t i = 0; i < best_free_set; ++i ) + { + profile |= 1 << permutations[i]; + } + + return profile; + } + + void get_decomposition( unsigned char* decompArray ) + { + if ( best_free_set > num_vars ) + return; + + generate_decomposition(); + get_decomposition_abc( decompArray ); + } + +private: + bool find_decomposition( unsigned& delay_profile, uint32_t late_arriving ) + { + best_multiplicity = UINT32_MAX; + best_free_set = UINT32_MAX; + uint32_t best_cost = UINT32_MAX; + uint32_t offset = static_cast( late_arriving ); + uint32_t start = std::max( offset, 1u ); + + /* perform only support reducing decomposition */ + if ( ps.support_reducing_only ) + { + start = std::max( start, num_vars - ps.lut_size ); + } + + /* array of functions to compute the column multiplicity */ + std::function column_multiplicity_fn[5] = { + [this]( STT const& tt ) { return column_multiplicity<1u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity<2u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity<3u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity5<4u>( tt ); }, + [this]( STT const& tt ) { return column_multiplicity5<5u>( tt ); } }; + + /* find a feasible AC decomposition */ + for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i ) + { + auto [tt_p, perm, multiplicity] = enumerate_iset_combinations_offset( i, offset, column_multiplicity_fn[i - 1] ); + + /* additional cost if not support reducing */ + uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0; + + /* check for feasible solution that improves the cost */ + if ( multiplicity <= ( 1 << ( ps.lut_size - i ) ) && multiplicity + additional_cost < best_cost && multiplicity <= 16 ) + { + best_tt = tt_p; + permutations = perm; + best_multiplicity = multiplicity; + best_cost = multiplicity + additional_cost; + best_free_set = i; + + if ( ps.use_first ) + { + break; + } + } + } + + if ( best_multiplicity == UINT32_MAX && ( !ps.try_no_late_arrival || late_arriving == 0 ) ) + return false; + + /* try without the delay profile */ + if ( best_multiplicity == UINT32_MAX && ps.try_no_late_arrival ) + { + delay_profile = 0; + if ( ps.support_reducing_only ) + { + start = std::max( 1u, num_vars - ps.lut_size ); + } + + for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i ) + { + auto [tt_p, perm, multiplicity] = enumerate_iset_combinations_offset( i, 0, column_multiplicity_fn[i - 1] ); + + /* additional cost if not support reducing */ + uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0; + + /* check for feasible solution that improves the cost */ + if ( multiplicity <= ( 1 << ( ps.lut_size - i ) ) && multiplicity + additional_cost < best_cost && multiplicity <= 16 ) + { + best_tt = tt_p; + permutations = perm; + best_multiplicity = multiplicity; + best_cost = multiplicity + additional_cost; + best_free_set = i; + + if ( ps.use_first ) + { + break; + } + } + } + } + + if ( best_multiplicity == UINT32_MAX ) + return false; + + /* estimation on number of LUTs */ + if ( pst ) + { + pst->num_luts = best_multiplicity <= 2 ? 2 : best_multiplicity <= 4 ? 3 + : best_multiplicity <= 8 ? 4 + : 5; + } + + return true; + } + + void init_truth_table( word* ptt ) + { + uint32_t const num_blocks = ( num_vars <= 6 ) ? 1 : ( 1 << ( num_vars - 6 ) ); + + for ( uint32_t i = 0; i < num_blocks; ++i ) + { + best_tt._bits[i] = ptt[i]; + } + + local_extend_to( best_tt, num_vars ); + } + + template + uint32_t column_multiplicity( STT tt ) + { + uint64_t multiplicity_set[4] = { 0u, 0u, 0u, 0u }; + uint32_t multiplicity = 0; + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t constexpr masks_bits[] = { 0x0, 0x3, 0xF, 0x3F }; + uint64_t constexpr masks_idx[] = { 0x0, 0x0, 0x0, 0x3 }; + + /* supports up to 64 values of free set (256 for |FS| == 3)*/ + static_assert( free_set_size <= 3 ); + + /* extract iset functions */ + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + { + multiplicity_set[( *it >> 6 ) & masks_idx[free_set_size]] |= UINT64_C( 1 ) << ( *it & masks_bits[free_set_size] ); + *it >>= ( 1u << free_set_size ); + } + ++it; + } + + multiplicity = __builtin_popcountl( multiplicity_set[0] ); + + if constexpr ( free_set_size == 3 ) + { + multiplicity += __builtin_popcountl( multiplicity_set[1] ); + multiplicity += __builtin_popcountl( multiplicity_set[2] ); + multiplicity += __builtin_popcountl( multiplicity_set[3] ); + } + + return multiplicity; + } + + template + uint32_t column_multiplicity5( STT tt ) + { + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF }; + + static_assert( free_set_size == 5 || free_set_size == 4 ); + + uint32_t size = 0; + uint64_t prev = -1; + std::array multiplicity_set; + + /* extract iset functions */ + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) + { + uint64_t fs_fn = *it & masks[free_set_size]; + if ( fs_fn != prev ) + { + multiplicity_set[size++] = static_cast( fs_fn ); + prev = fs_fn; + } + *it >>= ( 1u << free_set_size ); + } + ++it; + } + + std::sort( multiplicity_set.begin(), multiplicity_set.begin() + size ); + + /* count unique */ + uint32_t multiplicity = 1; + for ( auto i = 1u; i < size; ++i ) + { + multiplicity += multiplicity_set[i] != multiplicity_set[i - 1] ? 1 : 0; + } + + return multiplicity; + } + + inline bool combinations_offset_next( uint32_t k, uint32_t offset, uint32_t* pComb, uint32_t* pInvPerm, STT& tt ) + { + uint32_t i; + + for ( i = k - 1; pComb[i] == num_vars - k + i; --i ) + { + if ( i == offset ) + return false; + } + + /* move vars */ + uint32_t var_old = pComb[i]; + uint32_t pos_new = pInvPerm[var_old + 1]; + std::swap( pInvPerm[var_old + 1], pInvPerm[var_old] ); + std::swap( pComb[i], pComb[pos_new] ); + kitty::swap_inplace( tt, i, pos_new ); + + for ( uint32_t j = i + 1; j < k; j++ ) + { + var_old = pComb[j]; + pos_new = pInvPerm[pComb[j - 1] + 1]; + std::swap( pInvPerm[pComb[j - 1] + 1], pInvPerm[var_old] ); + std::swap( pComb[j], pComb[pos_new] ); + kitty::swap_inplace( tt, j, pos_new ); + } + + return true; + } + + template + std::tuple, uint32_t> enumerate_iset_combinations_offset( uint32_t free_set_size, uint32_t offset, Fn&& fn ) + { + STT tt = best_tt; + + /* TT with best cost */ + STT best_tt = tt; + uint32_t best_cost = UINT32_MAX; + + assert( free_set_size >= offset ); + + /* special case */ + if ( free_set_size == offset ) + { + best_cost = fn( tt ); + return { tt, permutations, best_cost }; + } + + /* works up to 16 input truth tables */ + assert( num_vars <= 16 ); + + /* init combinations */ + uint32_t pComb[16], pInvPerm[16], bestPerm[16]; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pComb[i] = pInvPerm[i] = i; + } + + /* enumerate combinations */ + do + { + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best_tt = tt; + best_cost = cost; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + bestPerm[i] = pComb[i]; + } + } + } while ( combinations_offset_next( free_set_size, offset, pComb, pInvPerm, tt ) ); + + std::array res_perm; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + res_perm[i] = permutations[bestPerm[i]]; + } + + return std::make_tuple( best_tt, res_perm, best_cost ); + } + + std::vector compute_isets( bool verbose = false ) + { + /* construct isets involved in multiplicity */ + uint32_t isets_support = num_vars - best_free_set; + std::vector isets( best_multiplicity ); + + /* construct isets */ + std::unordered_map column_to_iset; + STT tt = best_tt; + uint32_t offset = 0; + uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF }; + + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) + { + uint64_t val = *it & masks[best_free_set]; + + if ( auto el = column_to_iset.find( val ); el != column_to_iset.end() ) + { + isets[el->second]._bits[i / ( 1u << best_free_set )] |= UINT64_C( 1 ) << ( j + offset ); + } + else + { + isets[column_to_iset.size()]._bits[i / ( 1u << best_free_set )] |= UINT64_C( 1 ) << ( j + offset ); + column_to_iset[val] = column_to_iset.size(); + } + + *it >>= ( 1u << best_free_set ); + } + + offset = ( offset + ( 64 >> best_free_set ) ) % 64; + ++it; + } + + /* extend isets to cover the whole truth table */ + for ( STT& iset : isets ) + { + local_extend_to( iset, isets_support ); + } + + /* save free_set functions */ + std::vector free_set_tts( best_multiplicity ); + + for ( auto const& pair : column_to_iset ) + { + free_set_tts[pair.second]._bits[0] = pair.first; + local_extend_to( free_set_tts[pair.second], best_free_set ); + } + + /* print isets and free set*/ + if ( verbose ) + { + std::cout << "iSets\n"; + uint32_t i = 0; + for ( auto iset : isets ) + { + kitty::print_hex( iset ); + std::cout << " of func "; + kitty::print_hex( free_set_tts[i++] ); + std::cout << "\n"; + } + } + + best_free_set_tts = std::move( free_set_tts ); + + return isets; + } + + void generate_decomposition() + { + dec_result.clear(); + + uint32_t num_edges = 0; + for ( uint32_t i = 0; i < best_bound_sets.size(); ++i ) + { + ac_decomposition_result dec; + auto tt = best_bound_sets[i]; + auto care = best_care_sets[i]; + + /* compute and minimize support for bound set variables */ + uint32_t k = 0; + for ( uint32_t j = 0; j < num_vars - best_free_set; ++j ) + { + if ( !kitty::has_var( tt, j ) ) + continue; + + if ( !kitty::has_var( tt, care, j ) ) + { + /* fix truth table */ + adjust_truth_table_on_dc( tt, care, j ); + continue; + } + + if ( k < j ) + { + kitty::swap_inplace( tt, k, j ); + kitty::swap_inplace( care, k, j ); + } + dec.support.push_back( permutations[best_free_set + j] ); + ++k; + } + + dec.tt = kitty::shrink_to( tt, dec.support.size() ); + dec_result.push_back( dec ); + num_edges += dec.support.size() > 1 ? dec.support.size() : 0; + } + + /* compute the decomposition for the top-level LUT */ + compute_top_lut_decomposition(); + + if ( pst ) + { + pst->num_luts = dec_result.size(); + pst->num_edges = num_edges + dec_result.back().support.size(); + } + } + + void compute_top_lut_decomposition() + { + uint32_t top_vars = best_bound_sets.size() + best_free_set; + assert( top_vars <= ps.lut_size ); + + /* extend bound set functions with free_set_size LSB vars */ + kitty::dynamic_truth_table tt( top_vars ); + + /* compute support */ + dec_result.emplace_back(); + for ( uint32_t i = 0; i < best_free_set; ++i ) + { + dec_result.back().support.push_back( permutations[i] ); + } + + /* create functions for bound set */ + std::vector bound_set_vars; + auto res_it = dec_result.begin(); + uint32_t offset = 0; + for ( uint32_t i = 0; i < best_bound_sets.size(); ++i ) + { + bound_set_vars.emplace_back( top_vars ); + kitty::create_nth_var( bound_set_vars[i], best_free_set + i ); + + /* add bound-set variables to the support, remove buffers (shared set) */ + if ( res_it->support.size() == 1 ) + { + dec_result.back().support.push_back( res_it->support.front() ); + /* it is a NOT */ + if ( ( res_it->tt._bits[0] & 1 ) == 1 ) + { + bound_set_vars[i] = ~bound_set_vars[i]; + } + dec_result.erase( res_it ); + ++offset; + } + else + { + dec_result.back().support.push_back( num_vars + i - offset ); + ++res_it; + } + } + + /* create composition function */ + for ( uint32_t i = 0; i < best_free_set_tts.size(); ++i ) + { + kitty::dynamic_truth_table free_set_tt = kitty::shrink_to( best_free_set_tts[i], top_vars ); + + /* find MUX assignments */ + for ( uint32_t j = 0; j < bound_set_vars.size(); ++j ) + { + /* AND with ONSET or OFFSET */ + if ( ( ( best_iset_onset[j] >> i ) & 1 ) ) + { + free_set_tt &= bound_set_vars[j]; + } + else if ( ( ( best_iset_offset[j] >> i ) & 1 ) ) + { + free_set_tt &= ~bound_set_vars[j]; + } + } + + tt |= free_set_tt; + } + + /* add top-level LUT to result */ + dec_result.back().tt = tt; + } + + inline void reposition_late_arriving_variables( unsigned delay_profile, uint32_t late_arriving ) + { + uint32_t k = 0; + for ( uint32_t i = 0; i < late_arriving; ++i ) + { + while ( ( ( delay_profile >> k ) & 1 ) == 0 ) + ++k; + + if ( permutations[i] == k ) + { + ++k; + continue; + } + + std::swap( permutations[i], permutations[k] ); + kitty::swap_inplace( best_tt, i, k ); + ++k; + } + } + + template + void print_perm( Iterator begin, uint32_t free_set ) + { + std::cout << "["; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + if ( i == free_set ) + { + std::cout << ", "; + } + std::cout << *begin << " "; + ++begin; + } + std::cout << "]\n"; + } + + void generate_support_minimization_encodings() + { + uint32_t count = 0; + + /* enable don't cares only if not a power of 2 */ + uint32_t num_combs = 2; + if ( __builtin_popcount( best_multiplicity ) == 1 ) + { + uint32_t num_combs_exact[4] = { 1, 3, 35, 6435 }; + for ( uint32_t i = 0; i < 4; ++i ) + { + if ( ( best_multiplicity >> i ) == 2u ) + { + num_combs = num_combs_exact[i]; + } + } + support_minimization_encodings = std::vector>( num_combs ); + generate_support_minimization_encodings_rec( 0, 0, 0, count ); + } + else + { + /* combinations are 2*3^(mu - 1) */ + for ( uint32_t i = 1; i < best_multiplicity; ++i ) + { + num_combs = ( num_combs << 1 ) + num_combs; + } + support_minimization_encodings = std::vector>( num_combs ); + generate_support_minimization_encodings_rec( 0, 0, 0, count ); + } + + assert( count == num_combs ); + } + + template + void generate_support_minimization_encodings_rec( uint32_t onset, uint32_t offset, uint32_t var, uint32_t& count ) + { + if ( var == best_multiplicity ) + { + if constexpr ( !enable_dcset ) + { + /* sets must be equally populated */ + if ( __builtin_popcount( onset ) != __builtin_popcount( offset ) ) + { + return; + } + } + + support_minimization_encodings[count][0] = onset; + support_minimization_encodings[count][1] = offset; + ++count; + return; + } + + /* var in DCSET */ + if constexpr ( enable_dcset ) + { + generate_support_minimization_encodings_rec( onset, offset, var + 1, count ); + } + + /* move var in ONSET */ + onset |= 1 << var; + generate_support_minimization_encodings_rec( onset, offset, var + 1, count ); + onset &= ~( 1 << var ); + + /* remove symmetries */ + if ( var == 0 ) + { + return; + } + + /* move var in OFFSET */ + offset |= 1 << var; + generate_support_minimization_encodings_rec( onset, offset, var + 1, count ); + offset &= ~( 1 << var ); + } + + void solve_min_support_exact( std::vector const& isets ) + { + std::vector matrix; + matrix.reserve( support_minimization_encodings.size() ); + best_bound_sets.clear(); + + /* create covering matrix */ + if ( !create_covering_matrix( isets, matrix, false ) ) + { + return; + } + + /* solve the covering problem */ + std::array solution = covering_solve_exact( matrix ); + + /* check for failed decomposition */ + if ( solution[0] == UINT32_MAX ) + { + return; + } + + /* compute best bound sets */ + uint32_t num_luts = 1 + solution[5]; + uint32_t num_levels = 2; + uint32_t num_edges = best_free_set + solution[5]; + uint32_t isets_support = num_vars - best_free_set; + best_care_sets.clear(); + best_iset_onset.clear(); + best_iset_offset.clear(); + for ( uint32_t i = 0; i < solution[5]; ++i ) + { + STT tt; + STT care; + + const uint32_t onset = support_minimization_encodings[matrix[solution[i]].index][0]; + const uint32_t offset = support_minimization_encodings[matrix[solution[i]].index][1]; + for ( uint32_t j = 0; j < best_multiplicity; ++j ) + { + if ( ( ( onset >> j ) & 1 ) ) + { + tt |= isets[j]; + } + if ( ( ( offset >> j ) & 1 ) ) + { + care |= isets[j]; + } + } + + care |= tt; + num_edges += matrix[solution[i]].cost & ( ( 1 << isets_support ) - 1 ); + + best_bound_sets.push_back( tt ); + best_care_sets.push_back( care ); + best_iset_onset.push_back( onset ); + best_iset_offset.push_back( offset ); + } + + if ( pst ) + { + pst->num_luts = num_luts; + pst->num_levels = num_levels; + pst->num_edges = num_edges; + } + } + + void solve_min_support_heuristic( std::vector const& isets ) + { + std::vector matrix; + matrix.reserve( support_minimization_encodings.size() ); + best_bound_sets.clear(); + + /* create covering matrix */ + if ( !create_covering_matrix( isets, matrix, true ) ) + { + return; + } + + /* solve the covering problem: heuristic pass + local search */ + std::array solution = covering_solve_heuristic( matrix ); + + /* check for failed decomposition */ + if ( solution[0] == UINT32_MAX ) + { + return; + } + + /* improve solution with local search */ + while ( covering_improve( matrix, solution ) ) + ; + + /* compute best bound sets */ + uint32_t num_luts = 1 + solution[5]; + uint32_t num_levels = 2; + uint32_t num_edges = best_free_set + solution[5]; + uint32_t isets_support = num_vars - best_free_set; + best_care_sets.clear(); + best_iset_onset.clear(); + best_iset_offset.clear(); + for ( uint32_t i = 0; i < solution[5]; ++i ) + { + STT tt; + STT care; + + const uint32_t onset = support_minimization_encodings[matrix[solution[i]].index][0]; + const uint32_t offset = support_minimization_encodings[matrix[solution[i]].index][1]; + for ( uint32_t j = 0; j < best_multiplicity; ++j ) + { + if ( ( ( onset >> j ) & 1 ) ) + { + tt |= isets[j]; + } + if ( ( ( offset >> j ) & 1 ) ) + { + care |= isets[j]; + } + } + + care |= tt; + num_edges += matrix[solution[i]].cost & ( ( 1 << isets_support ) - 1 ); + + best_bound_sets.push_back( tt ); + best_care_sets.push_back( care ); + best_iset_onset.push_back( onset ); + best_iset_offset.push_back( offset ); + } + + if ( pst ) + { + pst->num_luts = num_luts; + pst->num_levels = num_levels; + pst->num_edges = num_edges; + } + } + + template + bool create_covering_matrix( std::vector const& isets, std::vector& matrix, bool sort ) + { + assert( best_multiplicity <= 16 ); + uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2; + uint32_t iset_support = num_vars - best_free_set; + + /* insert dichotomies */ + for ( uint32_t i = 0; i < support_minimization_encodings.size(); ++i ) + { + uint32_t const onset = support_minimization_encodings[i][0]; + uint32_t const offset = support_minimization_encodings[i][1]; + + uint32_t ones_onset = __builtin_popcount( onset ); + uint32_t ones_offset = __builtin_popcount( offset ); + + /* filter columns that do not distinguish pairs */ + if ( ones_onset == 0 || ones_offset == 0 || ones_onset == best_multiplicity || ones_offset == best_multiplicity ) + { + continue; + } + + /* compute function and distinguishable seed dichotomies */ + uint64_t column[2] = { 0, 0 }; + STT tt; + STT care; + uint32_t pair_pointer = 0; + for ( uint32_t j = 0; j < best_multiplicity; ++j ) + { + auto onset_shift = ( onset >> j ); + auto offset_shift = ( offset >> j ); + if ( ( onset_shift & 1 ) ) + { + tt |= isets[j]; + } + + if ( ( offset_shift & 1 ) ) + { + care |= isets[j]; + } + + /* compute included seed dichotomies */ + for ( uint32_t k = j + 1; k < best_multiplicity; ++k ) + { + /* if are in diffent sets */ + if ( ( ( ( onset_shift & ( offset >> k ) ) | ( ( onset >> k ) & offset_shift ) ) & 1 ) ) + { + column[pair_pointer >> 6u] |= UINT64_C( 1 ) << ( pair_pointer & 0x3F ); + } + + ++pair_pointer; + } + } + + care |= tt; + + /* compute cost */ + uint32_t cost = 0; + for ( uint32_t j = 0; j < iset_support; ++j ) + { + cost += has_var_support( tt, care, iset_support, j ) ? 1 : 0; + } + + /* discard solutions with support over LUT size */ + if ( cost > ps.lut_size ) + continue; + + /* buffers have zero cost */ + if ( cost == 1 ) + cost = 0; + + float sort_cost = 0; + if constexpr ( UseHeuristic ) + { + sort_cost = 1.0f / ( __builtin_popcountl( column[0] ) + __builtin_popcountl( column[1] ) ); + } + else + { + sort_cost = cost + ( ( combinations - __builtin_popcountl( column[0] + __builtin_popcountl( column[1] ) ) ) << num_vars ); + } + + /* insert */ + matrix.emplace_back( encoding_column{ { column[0], column[1] }, cost, i, sort_cost } ); + } + + if ( !sort ) + { + return true; + } + + if constexpr ( UseHeuristic ) + { + std::sort( matrix.begin(), matrix.end(), [&]( auto const& a, auto const& b ) { + return a.cost < b.cost; + } ); + } + else + { + std::sort( matrix.begin(), matrix.end(), [&]( auto const& a, auto const& b ) { + return a.sort_cost < b.sort_cost; + } ); + } + + return true; + } + + std::array covering_solve_exact( std::vector& matrix ) + { + /* last value of res contains the size of the bound set */ + std::array res = { UINT32_MAX }; + uint32_t best_cost = UINT32_MAX; + uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2; + + assert( best_multiplicity <= 4 ); + + /* determine the number of needed loops*/ + if ( best_multiplicity <= 2 ) + { + res[5] = 1; + res[0] = 0; + } + else if ( best_multiplicity <= 4 ) + { + res[5] = 2; + for ( uint32_t i = 0; i < matrix.size() - 1; ++i ) + { + for ( uint32_t j = 1; j < matrix.size(); ++j ) + { + /* filter by cost */ + if ( matrix[i].cost + matrix[j].cost >= best_cost ) + continue; + + /* check validity */ + if ( __builtin_popcountl( matrix[i].column[0] | matrix[j].column[0] ) + __builtin_popcountl( matrix[i].column[1] | matrix[j].column[1] ) == combinations ) + { + res[0] = i; + res[1] = j; + best_cost = matrix[i].cost + matrix[j].cost; + } + } + } + } + + return res; + } + + std::array covering_solve_heuristic( std::vector& matrix ) + { + /* last value of res contains the size of the bound set */ + std::array res = { UINT32_MAX }; + uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2; + uint64_t column0 = 0, column1 = 0; + + uint32_t best = 0; + float best_cost = std::numeric_limits::max(); + for ( uint32_t i = 0; i < matrix.size(); ++i ) + { + if ( matrix[i].sort_cost < best_cost ) + { + best = i; + best_cost = matrix[i].sort_cost; + } + } + + /* select */ + column0 = matrix[best].column[0]; + column1 = matrix[best].column[1]; + std::swap( matrix[0], matrix[best] ); + + /* get max number of BS's */ + uint32_t iter = 1; + + while ( iter < ps.lut_size - best_free_set && __builtin_popcountl( column0 ) + __builtin_popcountl( column1 ) != combinations ) + { + /* select column that minimizes the cost */ + best = 0; + best_cost = std::numeric_limits::max(); + for ( uint32_t i = iter; i < matrix.size(); ++i ) + { + float local_cost = 1.0f / ( __builtin_popcountl( matrix[i].column[0] & ~column0 ) + __builtin_popcountl( matrix[i].column[1] & ~column1 ) ); + if ( local_cost < best_cost ) + { + best = i; + best_cost = local_cost; + } + } + + column0 |= matrix[best].column[0]; + column1 |= matrix[best].column[1]; + std::swap( matrix[iter], matrix[best] ); + ++iter; + } + + if ( __builtin_popcountl( column0 ) + __builtin_popcountl( column1 ) == combinations ) + { + for ( uint32_t i = 0; i < iter; ++i ) + { + res[i] = i; + } + res[5] = iter; + } + + return res; + } + + bool covering_improve( std::vector& matrix, std::array& solution ) + { + /* performs one iteration of local search */ + uint32_t best_cost = 0, local_cost = 0; + uint32_t num_elements = solution[5]; + uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2; + bool improved = false; + + /* compute current cost */ + for ( uint32_t i = 0; i < num_elements; ++i ) + { + best_cost += matrix[solution[i]].cost; + } + + uint64_t column0, column1; + for ( uint32_t i = 0; i < num_elements; ++i ) + { + /* remove element i */ + local_cost = 0; + column0 = 0; + column1 = 0; + for ( uint32_t j = 0; j < num_elements; ++j ) + { + if ( j == i ) + continue; + local_cost += matrix[solution[j]].cost; + column0 |= matrix[solution[j]].column[0]; + column1 |= matrix[solution[j]].column[1]; + } + + /* search for a better replecemnts */ + for ( uint32_t j = 0; j < matrix.size(); ++j ) + { + if ( __builtin_popcount( column0 | matrix[j].column[0] ) + __builtin_popcount( column1 | matrix[j].column[1] ) != combinations ) + continue; + if ( local_cost + matrix[j].cost < best_cost ) + { + solution[i] = j; + best_cost = local_cost + matrix[j].cost; + improved = true; + } + } + } + + return improved; + } + + void adjust_truth_table_on_dc( STT& tt, STT& care, uint32_t var_index ) + { + assert( var_index < tt.num_vars() ); + assert( tt.num_vars() == care.num_vars() ); + + if ( tt.num_vars() <= 6 || var_index < 6 ) + { + auto it_tt = std::begin( tt._bits ); + auto it_care = std::begin( care._bits ); + while ( it_tt != std::end( tt._bits ) ) + { + uint64_t new_bits = *it_tt & *it_care; + *it_tt = ( ( new_bits | ( new_bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index] ) | + ( ( new_bits | ( new_bits << ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections[var_index] ); + *it_care = *it_care | ( *it_care >> ( uint64_t( 1 ) << var_index ) ); + + ++it_tt; + ++it_care; + } + return; + } + + const auto step = 1 << ( var_index - 6 ); + for ( auto i = 0u; i < static_cast( tt.num_blocks() ); i += 2 * step ) + { + for ( auto j = 0; j < step; ++j ) + { + tt._bits[i + j] = ( tt._bits[i + j] & care._bits[i + j] ) | ( tt._bits[i + j + step] & care._bits[i + j + step] ); + tt._bits[i + j + step] = tt._bits[i + j]; + care._bits[i + j] = care._bits[i + j] | care._bits[i + j + step]; + care._bits[i + j + step] = care._bits[i + j]; + } + } + } + + void local_extend_to( STT& tt, uint32_t real_num_vars ) + { + if ( real_num_vars < 6 ) + { + auto mask = *tt.begin(); + + for ( auto i = real_num_vars; i < num_vars; ++i ) + { + mask |= ( mask << ( 1 << i ) ); + } + + std::fill( tt.begin(), tt.end(), mask ); + } + else + { + uint32_t num_blocks = ( 1u << ( real_num_vars - 6 ) ); + auto it = tt.begin(); + while ( it != tt.end() ) + { + it = std::copy( tt.cbegin(), tt.cbegin() + num_blocks, it ); + } + } + } + + bool has_var_support( const STT& tt, const STT& care, uint32_t real_num_vars, uint8_t var_index ) + { + assert( var_index < real_num_vars ); + assert( real_num_vars <= tt.num_vars() ); + assert( tt.num_vars() == care.num_vars() ); + + const uint32_t num_blocks = real_num_vars <= 6 ? 1 : ( 1 << ( real_num_vars - 6 ) ); + if ( real_num_vars <= 6 || var_index < 6 ) + { + auto it_tt = std::begin( tt._bits ); + auto it_care = std::begin( care._bits ); + while ( it_tt != std::begin( tt._bits ) + num_blocks ) + { + if ( ( ( ( *it_tt >> ( uint64_t( 1 ) << var_index ) ) ^ *it_tt ) & kitty::detail::projections_neg[var_index] & ( *it_care >> ( uint64_t( 1 ) << var_index ) ) & *it_care ) != 0 ) + { + return true; + } + ++it_tt; + ++it_care; + } + + return false; + } + + const auto step = 1 << ( var_index - 6 ); + for ( auto i = 0u; i < num_blocks; i += 2 * step ) + { + for ( auto j = 0; j < step; ++j ) + { + if ( ( ( tt._bits[i + j] ^ tt._bits[i + j + step] ) & care._bits[i + j] & care._bits[i + j + step] ) != 0 ) + { + return true; + } + } + } + + return false; + } + + /* Decomposition format for ABC + * + * The record is an array of unsigned chars where: + * - the first unsigned char entry stores the number of unsigned chars in the record + * - the second entry stores the number of LUTs + * After this, several sub-records follow, each representing one LUT as follows: + * - an unsigned char entry listing the number of fanins + * - a list of fanins, from the LSB to the MSB of the truth table. The N inputs of the original function + * have indexes from 0 to N-1, followed by the internal signals in a topological order + * - the LUT truth table occupying 2^(M-3) bytes, where M is the fanin count of the LUT, from the LSB to the MSB. + * A 2-input LUT, which takes 4 bits, should be stretched to occupy 8 bits (one unsigned char) + * A 0- or 1-input LUT can be represented similarly but it is not expected that such LUTs will be represented + */ + void get_decomposition_abc( unsigned char* decompArray ) + { + unsigned char* pArray = decompArray; + unsigned char bytes = 2; + + /* write number of LUTs */ + pArray++; + *pArray++ = dec_result.size(); + + /* write LUTs */ + for ( ac_decomposition_result const& lut : dec_result ) + { + /* write fanin size*/ + *pArray++ = lut.support.size(); + ++bytes; + + /* write support */ + for ( uint32_t i : lut.support ) + { + *pArray++ = (unsigned char)i; + ++bytes; + } + + /* write truth table */ + uint32_t tt_num_bytes = ( lut.tt.num_vars() <= 3 ) ? 1 : ( 1 << ( lut.tt.num_vars() - 3 ) ); + tt_num_bytes = std::min( tt_num_bytes, 8u ); + for ( uint32_t i = 0; i < lut.tt.num_blocks(); ++i ) + { + for ( uint32_t j = 0; j < tt_num_bytes; ++j ) + { + *pArray++ = (unsigned char)( ( lut.tt._bits[i] >> ( 8 * j ) ) & 0xFF ); + ++bytes; + } + } + } + + /* write numBytes */ + *decompArray = bytes; + } + +private: + uint32_t best_multiplicity{ UINT32_MAX }; + uint32_t best_free_set{ UINT32_MAX }; + STT best_tt; + std::vector best_bound_sets; + std::vector best_care_sets; + std::vector best_free_set_tts; + std::vector best_iset_onset; + std::vector best_iset_offset; + std::vector dec_result; + + std::vector> support_minimization_encodings; + + uint32_t num_vars; + ac_decomposition_params const& ps; + ac_decomposition_stats* pst; + std::array permutations; +}; + +} // namespace acd + +#endif // _ACD_H_ \ No newline at end of file diff --git a/src/map/if/acd/ac_wrapper.cpp b/src/map/if/acd/ac_wrapper.cpp new file mode 100644 index 000000000..baeee2fd6 --- /dev/null +++ b/src/map/if/acd/ac_wrapper.cpp @@ -0,0 +1,68 @@ +/**C++File************************************************************** + + FileName [ac_wrapper.cpp] + + SystemName [ABC: Logic synthesis and verification system.] + + PackageName [Ashenhurst-Curtis decomposition.] + + Synopsis [Interface with the FPGA mapping package.] + + Author [Alessandro Tempia Calvino] + + Affiliation [EPFL] + + Date [Ver. 1.0. Started - November 20, 2023.] + +***********************************************************************/ + +#include "ac_wrapper.h" +#include "ac_decomposition.hpp" + +int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ) +{ + using namespace acd; + + ac_decomposition_params ps; + ps.lut_size = lutSize; + ps.try_no_late_arrival = static_cast( try_no_late_arrival ); /* TODO: additional tests */ + ac_decomposition_stats st; + + ac_decomposition_impl acd( nVars, ps, &st ); + int val = acd.run( pTruth, *pdelay ); + + if ( val < 0 ) + { + *pdelay = 0; + return -1; + } + + *pdelay = acd.get_profile(); + *cost = st.num_luts; + + return val; +} + +int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ) +{ + using namespace acd; + + ac_decomposition_params ps; + ps.lut_size = lutSize; + ac_decomposition_stats st; + + ac_decomposition_impl acd( nVars, ps, &st ); + acd.run( pTruth, *pdelay ); + int val = acd.compute_decomposition(); + + if ( val < 0 ) + { + *pdelay = 0; + return -1; + } + + *pdelay = acd.get_profile(); + + acd.get_decomposition( decomposition ); + return 0; +} diff --git a/src/map/if/acd/ac_wrapper.h b/src/map/if/acd/ac_wrapper.h new file mode 100644 index 000000000..ce39949fb --- /dev/null +++ b/src/map/if/acd/ac_wrapper.h @@ -0,0 +1,37 @@ +/**C++File************************************************************** + + FileName [ac_wrapper.h] + + SystemName [ABC: Logic synthesis and verification system.] + + PackageName [Ashenhurst-Curtis decomposition.] + + Synopsis [Interface with the FPGA mapping package.] + + Author [Alessandro Tempia Calvino] + + Affiliation [EPFL] + + Date [Ver. 1.0. Started - November 20, 2023.] + +***********************************************************************/ + +#pragma once +#ifndef __ACD_WRAPPER_H_ +#define __ACD_WRAPPER_H_ + +#include "misc/util/abc_global.h" +#include "map/if/if.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ); +int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/src/map/if/acd/kitty_algorithm.hpp b/src/map/if/acd/kitty_algorithm.hpp new file mode 100644 index 000000000..6460a802c --- /dev/null +++ b/src/map/if/acd/kitty_algorithm.hpp @@ -0,0 +1,119 @@ +#ifndef _KITTY_ALGORITHM_H_ +#define _KITTY_ALGORITHM_H_ +#pragma once + +#include +#include + +#include "kitty_constants.hpp" +#include "kitty_dynamic_tt.hpp" +#include "kitty_static_tt.hpp" + +namespace kitty +{ + +/*! \brief Perform bitwise unary operation on truth table + + \param tt Truth table + \param op Unary operation that takes as input a word (`uint64_t`) and returns a word + + \return new constructed truth table of same type and dimensions + */ +template +auto unary_operation( const TT& tt, Fn&& op ) +{ + auto result = tt.construct(); + std::transform( tt.cbegin(), tt.cend(), result.begin(), op ); + result.mask_bits(); + return result; +} + +/*! \brief Perform bitwise binary operation on two truth tables + + The dimensions of `first` and `second` must match. This is ensured + at compile-time for static truth tables, but at run-time for dynamic + truth tables. + + \param first First truth table + \param second Second truth table + \param op Binary operation that takes as input two words (`uint64_t`) and returns a word + + \return new constructed truth table of same type and dimensions + */ +template +auto binary_operation( const TT& first, const TT& second, Fn&& op ) +{ + assert( first.num_vars() == second.num_vars() ); + + auto result = first.construct(); + std::transform( first.cbegin(), first.cend(), second.cbegin(), result.begin(), op ); + result.mask_bits(); + return result; +} + +/*! \brief Computes a predicate based on two truth tables + + The dimensions of `first` and `second` must match. This is ensured + at compile-time for static truth tables, but at run-time for dynamic + truth tables. + + \param first First truth table + \param second Second truth table + \param op Binary operation that takes as input two words (`uint64_t`) and returns a Boolean + + \return true or false based on the predicate + */ +template +bool binary_predicate( const TT& first, const TT& second, Fn&& op ) +{ + assert( first.num_vars() == second.num_vars() ); + + return std::equal( first.begin(), first.end(), second.begin(), op ); +} + +/*! \brief Assign computed values to bits + + The functor `op` computes bits which are assigned to the bits of the + truth table. + + \param tt Truth table + \param op Unary operation that takes no input and returns a word (`uint64_t`) +*/ +template +void assign_operation( TT& tt, Fn&& op ) +{ + std::generate( tt.begin(), tt.end(), op ); + tt.mask_bits(); +} + +/*! \brief Iterates through each block of a truth table + + The functor `op` is called for every block of the truth table. + + \param tt Truth table + \param op Unary operation that takes as input a word (`uint64_t`) and returns void +*/ +template +void for_each_block( const TT& tt, Fn&& op ) +{ + std::for_each( tt.cbegin(), tt.cend(), op ); +} + +/*! \brief Iterates through each block of a truth table in reverse + order + + The functor `op` is called for every block of the truth table in + reverse order. + + \param tt Truth table + \param op Unary operation that takes as input a word (`uint64_t`) and returns void +*/ +template +void for_each_block_reversed( const TT& tt, Fn&& op ) +{ + std::for_each( tt.crbegin(), tt.crend(), op ); +} + +} // namespace kitty + +#endif // _KITTY_ALGORITHM_H_ \ No newline at end of file diff --git a/src/map/if/acd/kitty_constants.hpp b/src/map/if/acd/kitty_constants.hpp new file mode 100644 index 000000000..55cfcd650 --- /dev/null +++ b/src/map/if/acd/kitty_constants.hpp @@ -0,0 +1,91 @@ +#ifndef _KITTY_CONSTANTS_H_ +#define _KITTY_CONSTANTS_H_ +#pragma once + +#include +#include + +namespace kitty +{ + +namespace detail +{ + +static constexpr uint64_t projections[] = { + UINT64_C( 0xaaaaaaaaaaaaaaaa ), + UINT64_C( 0xcccccccccccccccc ), + UINT64_C( 0xf0f0f0f0f0f0f0f0 ), + UINT64_C( 0xff00ff00ff00ff00 ), + UINT64_C( 0xffff0000ffff0000 ), + UINT64_C( 0xffffffff00000000 ) }; + +static constexpr uint64_t projections_neg[] = { + UINT64_C( 0x5555555555555555 ), + UINT64_C( 0x3333333333333333 ), + UINT64_C( 0x0f0f0f0f0f0f0f0f ), + UINT64_C( 0x00ff00ff00ff00ff ), + UINT64_C( 0x0000ffff0000ffff ), + UINT64_C( 0x00000000ffffffff ) }; + +static constexpr uint64_t masks[] = { + UINT64_C( 0x0000000000000001 ), + UINT64_C( 0x0000000000000003 ), + UINT64_C( 0x000000000000000f ), + UINT64_C( 0x00000000000000ff ), + UINT64_C( 0x000000000000ffff ), + UINT64_C( 0x00000000ffffffff ), + UINT64_C( 0xffffffffffffffff ) }; + +static constexpr uint64_t permutation_masks[][3] = { + { UINT64_C( 0x9999999999999999 ), UINT64_C( 0x2222222222222222 ), UINT64_C( 0x4444444444444444 ) }, + { UINT64_C( 0xc3c3c3c3c3c3c3c3 ), UINT64_C( 0x0c0c0c0c0c0c0c0c ), UINT64_C( 0x3030303030303030 ) }, + { UINT64_C( 0xf00ff00ff00ff00f ), UINT64_C( 0x00f000f000f000f0 ), UINT64_C( 0x0f000f000f000f00 ) }, + { UINT64_C( 0xff0000ffff0000ff ), UINT64_C( 0x0000ff000000ff00 ), UINT64_C( 0x00ff000000ff0000 ) }, + { UINT64_C( 0xffff00000000ffff ), UINT64_C( 0x00000000ffff0000 ), UINT64_C( 0x0000ffff00000000 ) } }; + +static constexpr uint64_t ppermutation_masks[][6][3] = { + { { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x9999999999999999 ), UINT64_C( 0x2222222222222222 ), UINT64_C( 0x4444444444444444 ) }, + { UINT64_C( 0xa5a5a5a5a5a5a5a5 ), UINT64_C( 0x0a0a0a0a0a0a0a0a ), UINT64_C( 0x5050505050505050 ) }, + { UINT64_C( 0xaa55aa55aa55aa55 ), UINT64_C( 0x00aa00aa00aa00aa ), UINT64_C( 0x5500550055005500 ) }, + { UINT64_C( 0xaaaa5555aaaa5555 ), UINT64_C( 0x0000aaaa0000aaaa ), UINT64_C( 0x5555000055550000 ) }, + { UINT64_C( 0xaaaaaaaa55555555 ), UINT64_C( 0x00000000aaaaaaaa ), UINT64_C( 0x5555555500000000 ) } }, + { { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0xc3c3c3c3c3c3c3c3 ), UINT64_C( 0x0c0c0c0c0c0c0c0c ), UINT64_C( 0x3030303030303030 ) }, + { UINT64_C( 0xcc33cc33cc33cc33 ), UINT64_C( 0x00cc00cc00cc00cc ), UINT64_C( 0x3300330033003300 ) }, + { UINT64_C( 0xcccc3333cccc3333 ), UINT64_C( 0x0000cccc0000cccc ), UINT64_C( 0x3333000033330000 ) }, + { UINT64_C( 0xcccccccc33333333 ), UINT64_C( 0x00000000cccccccc ), UINT64_C( 0x3333333300000000 ) } }, + { { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0xf00ff00ff00ff00f ), UINT64_C( 0x00f000f000f000f0 ), UINT64_C( 0x0f000f000f000f00 ) }, + { UINT64_C( 0xf0f00f0ff0f00f0f ), UINT64_C( 0x0000f0f00000f0f0 ), UINT64_C( 0x0f0f00000f0f0000 ) }, + { UINT64_C( 0xf0f0f0f00f0f0f0f ), UINT64_C( 0x00000000f0f0f0f0 ), UINT64_C( 0x0f0f0f0f00000000 ) } }, + { { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0xff0000ffff0000ff ), UINT64_C( 0x0000ff000000ff00 ), UINT64_C( 0x00ff000000ff0000 ) }, + { UINT64_C( 0xff00ff0000ff00ff ), UINT64_C( 0x00000000ff00ff00 ), UINT64_C( 0x00ff00ff00000000 ) } }, + { { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0xffff00000000ffff ), UINT64_C( 0x00000000ffff0000 ), UINT64_C( 0x0000ffff00000000 ) } } }; + +static constexpr int32_t hex_to_int[] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; + +} // namespace detail + +} // namespace kitty + +#endif //_KITTY_CONSTANTS_H_ \ No newline at end of file diff --git a/src/map/if/acd/kitty_constructors.hpp b/src/map/if/acd/kitty_constructors.hpp new file mode 100644 index 000000000..43408b8cc --- /dev/null +++ b/src/map/if/acd/kitty_constructors.hpp @@ -0,0 +1,92 @@ +#ifndef _KITTY_CONSTRUCT_TT_H_ +#define _KITTY_CONSTRUCT_TT_H_ +#pragma once + +#include +#include +#include +#include +#include + +#include "kitty_constants.hpp" +#include "kitty_dynamic_tt.hpp" +#include "kitty_static_tt.hpp" + +namespace kitty +{ + +/*! \brief Creates truth table with number of variables + + If some truth table instance is given, one can create a truth table with the + same type by calling the `construct()` method on it. This function helps if + only the number of variables is known and the base type and uniforms the + creation of static and dynamic truth tables. Note, however, that for static + truth tables `num_vars` must be consistent to the number of variables in the + truth table type. + + \param num_vars Number of variables +*/ +template +inline TT create( unsigned num_vars ) +{ + (void)num_vars; + TT tt; + assert( tt.num_vars() == num_vars ); + return tt; +} + +/*! \cond PRIVATE */ +template<> +inline dynamic_truth_table create( unsigned num_vars ) +{ + return dynamic_truth_table( num_vars ); +} +/*! \endcond */ + +/*! \brief Constructs projections (single-variable functions) + + \param tt Truth table + \param var_index Index of the variable, must be smaller than the truth table's number of variables + \param complement If true, realize inverse projection +*/ +template +void create_nth_var( TT& tt, uint8_t var_index, bool complement = false ) +{ + if ( tt.num_vars() <= 6 ) + { + /* assign from precomputed table */ + tt._bits[0] = complement ? ~detail::projections[var_index] : detail::projections[var_index]; + + /* mask if truth table does not require all bits */ + tt.mask_bits(); + return; + } + + if ( var_index < 6 ) + { + std::fill( std::begin( tt._bits ), std::end( tt._bits ), complement ? ~detail::projections[var_index] : detail::projections[var_index] ); + } + else + { + const auto c = 1 << ( var_index - 6 ); + const auto zero = uint64_t( 0 ); + const auto one = ~zero; + auto block = uint64_t( 0u ); + + while ( block < tt.num_blocks() ) + { + for ( auto i = 0; i < c; ++i ) + { + tt._bits[block++] = complement ? one : zero; + } + for ( auto i = 0; i < c; ++i ) + { + tt._bits[block++] = complement ? zero : one; + } + } + } +} + +} // namespace kitty + +#endif // _KITTY_CONSTRUCT_TT_H_ \ No newline at end of file diff --git a/src/map/if/acd/kitty_dynamic_tt.hpp b/src/map/if/acd/kitty_dynamic_tt.hpp new file mode 100644 index 000000000..f3ef0c7d9 --- /dev/null +++ b/src/map/if/acd/kitty_dynamic_tt.hpp @@ -0,0 +1,147 @@ +#ifndef _KITTY_DYNAMIC_TT_H_ +#define _KITTY_DYNAMIC_TT_H_ +#pragma once + +#include +#include +#include + +#include "kitty_constants.hpp" + +namespace kitty +{ + +/*! Truth table in which number of variables is known at runtime. + */ +struct dynamic_truth_table +{ + /*! Standard constructor. + + The number of variables provided to the truth table can be + computed at runtime. However, once the truth table is constructed + its number of variables cannot change anymore. + + The constructor computes the number of blocks and resizes the + vector accordingly. + + \param num_vars Number of variables + */ + explicit dynamic_truth_table( uint32_t num_vars ) + : _bits( ( num_vars <= 6 ) ? 1u : ( 1u << ( num_vars - 6 ) ) ), + _num_vars( num_vars ) + { + } + + /*! Empty constructor. + + Creates an empty truth table. It has 0 variables, but no bits, i.e., it is + different from a truth table for the constant function. This constructor is + only used for convenience, if algorithms require the existence of default + constructable classes. + */ + dynamic_truth_table() : _num_vars( 0 ) {} + + /*! Constructs a new dynamic truth table instance with the same number of variables. */ + inline dynamic_truth_table construct() const + { + return dynamic_truth_table( _num_vars ); + } + + /*! Returns number of variables. + */ + inline auto num_vars() const noexcept { return _num_vars; } + + /*! Returns number of blocks. + */ + inline auto num_blocks() const noexcept { return _bits.size(); } + + /*! Returns number of bits. + */ + inline auto num_bits() const noexcept { return uint64_t( 1 ) << _num_vars; } + + /*! \brief Begin iterator to bits. + */ + inline auto begin() noexcept { return _bits.begin(); } + + /*! \brief End iterator to bits. + */ + inline auto end() noexcept { return _bits.end(); } + + /*! \brief Begin iterator to bits. + */ + inline auto begin() const noexcept { return _bits.begin(); } + + /*! \brief End iterator to bits. + */ + inline auto end() const noexcept { return _bits.end(); } + + /*! \brief Reverse begin iterator to bits. + */ + inline auto rbegin() noexcept { return _bits.rbegin(); } + + /*! \brief Reverse end iterator to bits. + */ + inline auto rend() noexcept { return _bits.rend(); } + + /*! \brief Constant begin iterator to bits. + */ + inline auto cbegin() const noexcept { return _bits.cbegin(); } + + /*! \brief Constant end iterator to bits. + */ + inline auto cend() const noexcept { return _bits.cend(); } + + /*! \brief Constant reverse begin iterator to bits. + */ + inline auto crbegin() const noexcept { return _bits.crbegin(); } + + /*! \brief Constant teverse end iterator to bits. + */ + inline auto crend() const noexcept { return _bits.crend(); } + + /*! \brief Assign other truth table. + + This replaces the current truth table with another truth table. The truth + table type has to be complete. The vector of bits is resized accordingly. + + \param other Other truth table + */ + template + dynamic_truth_table& operator=( const TT& other ) + { + _bits.resize( other.num_blocks() ); + std::copy( other.begin(), other.end(), begin() ); + _num_vars = other.num_vars(); + + if ( _num_vars < 6 ) + { + mask_bits(); + } + + return *this; + } + + /*! Masks the number of valid truth table bits. + + If the truth table has less than 6 variables, it may not use all + the bits. This operation makes sure to zero out all non-valid + bits. + */ + inline void mask_bits() noexcept + { + if ( _num_vars < 6 ) + { + _bits[0u] &= detail::masks[_num_vars]; + } + } + + /*! \cond PRIVATE */ +public: /* fields */ + std::vector _bits; + uint32_t _num_vars; + /*! \endcond */ +}; + +} //namespace kitty + +#endif // _KITTY_DYNAMIC_TT_H_ \ No newline at end of file diff --git a/src/map/if/acd/kitty_operations.hpp b/src/map/if/acd/kitty_operations.hpp new file mode 100644 index 000000000..fb504489a --- /dev/null +++ b/src/map/if/acd/kitty_operations.hpp @@ -0,0 +1,333 @@ +#ifndef _KITTY_OPERATIONS_TT_H_ +#define _KITTY_OPERATIONS_TT_H_ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "kitty_algorithm.hpp" +#include "kitty_constants.hpp" +#include "kitty_dynamic_tt.hpp" +#include "kitty_static_tt.hpp" + +namespace kitty +{ + +/*! Inverts all bits in a truth table, based on a condition */ +template +inline TT unary_not_if( const TT& tt, bool cond ) +{ +#ifdef _MSC_VER +#pragma warning( push ) +#pragma warning( disable : 4146 ) +#endif + const auto mask = -static_cast( cond ); +#ifdef _MSC_VER +#pragma warning( pop ) +#endif + return unary_operation( tt, [mask]( auto a ) + { return a ^ mask; } ); +} + +/*! \brief Inverts all bits in a truth table */ +template +inline TT unary_not( const TT& tt ) +{ + return unary_operation( tt, []( auto a ) + { return ~a; } ); +} + +/*! \brief Bitwise AND of two truth tables */ +template + +inline TT binary_and( const TT& first, const TT& second ) +{ + return binary_operation( first, second, std::bit_and<>() ); +} + +/*! \brief Bitwise OR of two truth tables */ +template +inline TT binary_or( const TT& first, const TT& second ) +{ + return binary_operation( first, second, std::bit_or<>() ); +} + +/*! \brief Swaps two variables in a truth table + + The function swaps variable `var_index1` with `var_index2`. The + function will change `tt` in-place. If `tt` should not be changed, + one can use `swap` instead. + + \param tt Truth table + \param var_index1 First variable + \param var_index2 Second variable +*/ +template +void swap_inplace( TT& tt, uint8_t var_index1, uint8_t var_index2 ) +{ + if ( var_index1 == var_index2 ) + { + return; + } + + if ( var_index1 > var_index2 ) + { + std::swap( var_index1, var_index2 ); + } + + if ( tt.num_vars() <= 6 ) + { + const auto& pmask = detail::ppermutation_masks[var_index1][var_index2]; + const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 ); + tt._bits[0] = ( tt._bits[0] & pmask[0] ) | ( ( tt._bits[0] & pmask[1] ) << shift ) | ( ( tt._bits[0] & pmask[2] ) >> shift ); + } + else if ( var_index2 <= 5 ) + { + const auto& pmask = detail::ppermutation_masks[var_index1][var_index2]; + const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 ); + std::transform( std::begin( tt._bits ), std::end( tt._bits ), std::begin( tt._bits ), + [shift, &pmask]( uint64_t word ) + { + return ( word & pmask[0] ) | ( ( word & pmask[1] ) << shift ) | ( ( word & pmask[2] ) >> shift ); + } ); + } + else if ( var_index1 <= 5 ) /* in this case, var_index2 > 5 */ + { + const auto step = 1 << ( var_index2 - 6 ); + const auto shift = 1 << var_index1; + auto it = std::begin( tt._bits ); + while ( it != std::end( tt._bits ) ) + { + for ( auto i = decltype( step ){ 0 }; i < step; ++i ) + { + const auto low_to_high = ( *( it + i ) & detail::projections[var_index1] ) >> shift; + const auto high_to_low = ( *( it + i + step ) << shift ) & detail::projections[var_index1]; + *( it + i ) = ( *( it + i ) & ~detail::projections[var_index1] ) | high_to_low; + *( it + i + step ) = ( *( it + i + step ) & detail::projections[var_index1] ) | low_to_high; + } + it += 2 * step; + } + } + else + { + const auto step1 = 1 << ( var_index1 - 6 ); + const auto step2 = 1 << ( var_index2 - 6 ); + auto it = std::begin( tt._bits ); + while ( it != std::end( tt._bits ) ) + { + for ( auto i = 0; i < step2; i += 2 * step1 ) + { + for ( auto j = 0; j < step1; ++j ) + { + std::swap( *( it + i + j + step1 ), *( it + i + j + step2 ) ); + } + } + it += 2 * step2; + } + } +} + +/*! \brief Extends smaller truth table to larger one + + The most significant variables will not be in the functional support of the + resulting truth table, but the method is helpful to align a truth table when + being used with another one. + + \param tt Larger truth table to create + \param from Smaller truth table to copy from +*/ +template +void extend_to_inplace( TT& tt, const TTFrom& from ) +{ + assert( tt.num_vars() >= from.num_vars() ); + + if ( from.num_vars() < 6 ) + { + auto mask = *from.begin(); + + for ( auto i = from.num_vars(); i < std::min( 6, tt.num_vars() ); ++i ) + { + mask |= ( mask << ( 1 << i ) ); + } + + std::fill( tt.begin(), tt.end(), mask ); + } + else + { + auto it = tt.begin(); + while ( it != tt.end() ) + { + it = std::copy( from.cbegin(), from.cend(), it ); + } + } +} + +/*! \brief Extends smaller truth table to larger static one + + This is an out-of-place version of `extend_to_inplace` that has the truth + table as a return value. It only works for creating static truth tables. The + template parameter `NumVars` must be equal or larger to the number of + variables in `from`. + + \param from Smaller truth table to copy from +*/ +template +inline static_truth_table extend_to( const TTFrom& from ) +{ + static_truth_table tt; + extend_to_inplace( tt, from ); + return tt; +} + +/*! \brief Checks whether truth table depends on given variable index + + \param tt Truth table + \param var_index Variable index +*/ +template +bool has_var( const TT& tt, uint8_t var_index ) +{ + assert( var_index < tt.num_vars() ); + + if ( tt.num_vars() <= 6 || var_index < 6 ) + { + return std::any_of( std::begin( tt._bits ), std::end( tt._bits ), + [var_index]( uint64_t word ) + { return ( ( word >> ( uint64_t( 1 ) << var_index ) ) & detail::projections_neg[var_index] ) != + ( word & detail::projections_neg[var_index] ); } ); + } + + const auto step = 1 << ( var_index - 6 ); + for ( auto i = 0u; i < static_cast( tt.num_blocks() ); i += 2 * step ) + { + for ( auto j = 0; j < step; ++j ) + { + if ( tt._bits[i + j] != tt._bits[i + j + step] ) + { + return true; + } + } + } + return false; +} + +/*! \brief Checks whether truth table depends on given variable index + + \param tt Truth table + \param care Care set + \param var_index Variable index +*/ +template +bool has_var( const TT& tt, const TT& care, uint8_t var_index ) +{ + assert( var_index < tt.num_vars() ); + assert( tt.num_vars() == care.num_vars() ); + + if ( tt.num_vars() <= 6 || var_index < 6 ) + { + auto it_tt = std::begin( tt._bits ); + auto it_care = std::begin( care._bits ); + while ( it_tt != std::end( tt._bits ) ) + { + if ( ( ( ( *it_tt >> ( uint64_t( 1 ) << var_index ) ) ^ *it_tt ) & detail::projections_neg[var_index] + & ( *it_care >> ( uint64_t( 1 ) << var_index ) ) & *it_care ) != 0 ) + { + return true; + } + ++it_tt; + ++it_care; + } + + return false; + } + + const auto step = 1 << ( var_index - 6 ); + for ( auto i = 0u; i < static_cast( tt.num_blocks() ); i += 2 * step ) + { + for ( auto j = 0; j < step; ++j ) + { + if ( ( ( tt._bits[i + j] ^ tt._bits[i + j + step] ) & care._bits[i + j] & care._bits[i + j + step] ) != 0 ) + { + return true; + } + } + } + return false; +} + +/*! \brief Shrinks larger truth table to smaller one + + The function expects that the most significant bits, which are cut off, are + not in the functional support of the original function. Only then it is + ensured that the resulting function is equivalent. + + \param tt Smaller truth table to create + \param from Larger truth table to copy from +*/ +template +void shrink_to_inplace( TT& tt, const TTFrom& from ) +{ + assert( tt.num_vars() <= from.num_vars() ); + + std::copy( from.begin(), from.begin() + tt.num_blocks(), tt.begin() ); + + if ( tt.num_vars() < 6 ) + { + tt.mask_bits(); + } +} + +/*! \brief Shrinks larger truth table to smaller dynamic one + + This is an out-of-place version of `shrink_to` that has the truth table as a + return value. It only works for creating dynamic tables. The parameter + `num_vars` must be equal or smaller to the number of variables in `from`. + + \param from Smaller truth table to copy from +*/ +template +inline dynamic_truth_table shrink_to( const TTFrom& from, unsigned num_vars ) +{ + auto tt = create( num_vars ); + shrink_to_inplace( tt, from ); + return tt; +} + +/*! \brief Prints truth table in hexadecimal representation + + The most-significant bit will be the first character of the string. + + \param tt Truth table + \param os Output stream +*/ +template +void print_hex( const TT& tt, std::ostream& os = std::cout ) +{ + auto const chunk_size = + std::min( tt.num_vars() <= 1 ? 1 : ( tt.num_bits() >> 2 ), 16 ); + + for_each_block_reversed( tt, [&os, chunk_size]( auto word ) + { + std::string chunk( chunk_size, '0' ); + + auto it = chunk.rbegin(); + while (word && it != chunk.rend()) { + auto hex = word & 0xf; + if (hex < 10) { + *it = '0' + static_cast(hex); + } else { + *it = 'a' + static_cast(hex - 10); + } + ++it; + word >>= 4; + } + os << chunk; } ); +} + +} //namespace kitty + +#endif // _KITTY_OPERATIONS_TT_H_ \ No newline at end of file diff --git a/src/map/if/acd/kitty_operators.hpp b/src/map/if/acd/kitty_operators.hpp new file mode 100644 index 000000000..68a24cf2e --- /dev/null +++ b/src/map/if/acd/kitty_operators.hpp @@ -0,0 +1,122 @@ +#ifndef _KITTY_OPERATORS_TT_H_ +#define _KITTY_OPERATORS_TT_H_ +#pragma once + +#include +#include +#include +#include +#include + +#include "kitty_constants.hpp" +#include "kitty_dynamic_tt.hpp" +#include "kitty_static_tt.hpp" +#include "kitty_operations.hpp" + +namespace kitty +{ + +/*! \brief Operator for unary_not */ +inline dynamic_truth_table operator~( const dynamic_truth_table& tt ) +{ + return unary_not( tt ); +} + +/*! \brief Operator for unary_not */ +template +inline static_truth_table operator~( const static_truth_table& tt ) +{ + return unary_not( tt ); +} + +/*! \brief Operator for binary_and */ +inline dynamic_truth_table operator&( const dynamic_truth_table& first, const dynamic_truth_table& second ) +{ + return binary_and( first, second ); +} + +/*! \brief Operator for binary_and */ +template +inline static_truth_table operator&( const static_truth_table& first, const static_truth_table& second ) +{ + return binary_and( first, second ); +} + +/*! \brief Operator for binary_and and assign */ +inline void operator&=( dynamic_truth_table& first, const dynamic_truth_table& second ) +{ + first = binary_and( first, second ); +} + +/*! \brief Operator for binary_and and assign */ +template +inline void operator&=( static_truth_table& first, const static_truth_table& second ) +{ + first = binary_and( first, second ); +} + +/*! \brief Operator for binary_or */ +inline dynamic_truth_table operator|( const dynamic_truth_table& first, const dynamic_truth_table& second ) +{ + return binary_or( first, second ); +} + +/*! \brief Operator for binary_or */ +template +inline static_truth_table operator|( const static_truth_table& first, const static_truth_table& second ) +{ + return binary_or( first, second ); +} + +/*! \brief Operator for binary_or and assign */ +inline void operator|=( dynamic_truth_table& first, const dynamic_truth_table& second ) +{ + first = binary_or( first, second ); +} + +/*! \brief Operator for binary_or and assign */ +template +inline void operator|=( static_truth_table& first, const static_truth_table& second ) +{ + // first = binary_or( first, second ); + /* runtime improved version */ + if constexpr ( NumVars <= 6 ) + { + first._bits |= second._bits; + first.mask_bits(); + } + else if constexpr ( NumVars == 7 ) + { + first._bits[0] |= second._bits[0]; + first._bits[1] |= second._bits[1]; + } + else if constexpr ( NumVars == 8 ) + { + first._bits[0] |= second._bits[0]; + first._bits[1] |= second._bits[1]; + first._bits[2] |= second._bits[2]; + first._bits[3] |= second._bits[3]; + } + else if constexpr ( NumVars == 9 ) + { + first._bits[0] |= second._bits[0]; + first._bits[1] |= second._bits[1]; + first._bits[2] |= second._bits[2]; + first._bits[3] |= second._bits[3]; + first._bits[4] |= second._bits[4]; + first._bits[5] |= second._bits[5]; + first._bits[6] |= second._bits[6]; + first._bits[7] |= second._bits[7]; + } + else + { + for ( uint32_t i = 0; i < first.num_blocks(); ++i ) + { + first._bits[i] |= second._bits[i]; + } + } +} + +} // namespace kitty + +#endif // _KITTY_OPERATORS_TT_H_ \ No newline at end of file diff --git a/src/map/if/acd/kitty_static_tt.hpp b/src/map/if/acd/kitty_static_tt.hpp new file mode 100644 index 000000000..61593f3ff --- /dev/null +++ b/src/map/if/acd/kitty_static_tt.hpp @@ -0,0 +1,131 @@ +#ifndef _KITTY_STATIC_TT_H_ +#define _KITTY_STATIC_TT_H_ +#pragma once + +#include +#include + +#include "kitty_constants.hpp" + +namespace kitty +{ + +template +struct static_truth_table +{ + /*! \cond PRIVATE */ + enum + { + NumBlocks = ( NumVars <= 6 ) ? 1u : ( 1u << ( NumVars - 6 ) ) + }; + + enum + { + NumBits = uint64_t( 1 ) << NumVars + }; + /*! \endcond */ + + /*! Standard constructor. + + The number of variables provided to the truth table must be known + at runtime. The number of blocks will be computed as a compile + time constant. + */ + static_truth_table() + { + _bits.fill( 0 ); + } + + /*! Constructs a new static truth table instance with the same number of variables. */ + inline static_truth_table construct() const + { + return static_truth_table(); + } + + /*! Returns number of variables. + */ + inline auto num_vars() const noexcept { return NumVars; } + + /*! Returns number of blocks. + */ + inline auto num_blocks() const noexcept { return NumBlocks; } + + /*! Returns number of bits. + */ + inline auto num_bits() const noexcept { return NumBits; } + + /*! \brief Begin iterator to bits. + */ + inline auto begin() noexcept { return _bits.begin(); } + + /*! \brief End iterator to bits. + */ + inline auto end() noexcept { return _bits.end(); } + + /*! \brief Begin iterator to bits. + */ + inline auto begin() const noexcept { return _bits.begin(); } + + /*! \brief End iterator to bits. + */ + inline auto end() const noexcept { return _bits.end(); } + + /*! \brief Reverse begin iterator to bits. + */ + inline auto rbegin() noexcept { return _bits.rbegin(); } + + /*! \brief Reverse end iterator to bits. + */ + inline auto rend() noexcept { return _bits.rend(); } + + /*! \brief Constant begin iterator to bits. + */ + inline auto cbegin() const noexcept { return _bits.cbegin(); } + + /*! \brief Constant end iterator to bits. + */ + inline auto cend() const noexcept { return _bits.cend(); } + + /*! \brief Constant reverse begin iterator to bits. + */ + inline auto crbegin() const noexcept { return _bits.crbegin(); } + + /*! \brief Constant teverse end iterator to bits. + */ + inline auto crend() const noexcept { return _bits.crend(); } + + /*! \brief Assign other truth table if number of variables match. + + This replaces the current truth table with another truth table, if `other` + has the same number of variables. Otherwise, the truth table is not + changed. + + \param other Other truth table + */ + template + static_truth_table& operator=( const TT& other ) + { + if ( other.num_bits() == num_bits() ) + { + std::copy( other.begin(), other.end(), begin() ); + } + + return *this; + } + + /*! Masks the number of valid truth table bits. + + We know that we will have at least 7 variables in this data + structure. + */ + inline void mask_bits() noexcept {} + + /*! \cond PRIVATE */ +public: /* fields */ + std::array _bits; + /*! \endcond */ +}; + +} //namespace kitty + +#endif // _KITTY_STATIC_TT_H_ \ No newline at end of file diff --git a/src/map/if/acd/module.make b/src/map/if/acd/module.make new file mode 100644 index 000000000..33c59e830 --- /dev/null +++ b/src/map/if/acd/module.make @@ -0,0 +1 @@ +SRC += src/map/if/acd/ac_wrapper.cpp diff --git a/src/map/if/if.h b/src/map/if/if.h index 93cb0f6ca..f8c99fdf1 100644 --- a/src/map/if/if.h +++ b/src/map/if/if.h @@ -40,6 +40,7 @@ #include "opt/dau/dau.h" #include "misc/vec/vecHash.h" #include "misc/vec/vecWec.h" +#include "map/if/acd/ac_wrapper.h" ABC_NAMESPACE_HEADER_START @@ -112,6 +113,7 @@ struct If_Par_t_ int nStructType; // type of the structure int nAndDelay; // delay of AND-gate in LUT library units int nAndArea; // area of AND-gate in LUT library units + int nLutDecSize; // the LUT size for decomposition int fPreprocess; // preprossing int fArea; // area-oriented mapping int fFancy; // a fancy feature @@ -126,7 +128,6 @@ struct If_Par_t_ int fDsdBalance; // special delay optimization int fUserRecLib; // use recorded library int fUserSesLib; // use SAT-based synthesis - int fUserLutDec; // use LUT-based decomposition int fBidec; // use bi-decomposition int fUse34Spec; // use specialized matching int fUseBat; // use one specialized feature @@ -146,6 +147,7 @@ struct If_Par_t_ int fDeriveLuts; // enables deriving LUT structures int fDoAverage; // optimize average rather than maximum level int fHashMapping; // perform AIG hashing after mapping + int fUserLutDec; // perform AIG hashing after mapping int fVerbose; // the verbosity flag int fVerboseTrace; // the verbosity flag char * pLutStruct; // LUT structure @@ -310,6 +312,7 @@ struct If_Cut_t_ unsigned fAndCut : 1; // matched with AND gate unsigned nLimit : 8; // the maximum number of leaves unsigned nLeaves : 8; // the number of leaves + unsigned decDelay: 16; // pin-to-pin decomposition delay int pLeaves[0]; }; @@ -552,6 +555,7 @@ extern int If_CutPerformCheck45( If_Man_t * p, unsigned * pTruth, in extern int If_CutPerformCheck54( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern int If_CutPerformCheck75( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern float If_CutDelayLutStruct( If_Man_t * p, If_Cut_t * pCut, char * pStr, float WireDelay ); +// extern int If_CutPerformAcd( If_Man_t * p, unsigned nVars, int lutSize, unsigned * pdelay, int use_late_arrival, unsigned * cost ); extern int If_CluCheckExt( void * p, word * pTruth, int nVars, int nLutLeaf, int nLutRoot, char * pLut0, char * pLut1, word * pFunc0, word * pFunc1 ); extern int If_CluCheckExt3( void * p, word * pTruth, int nVars, int nLutLeaf, int nLutLeaf2, int nLutRoot, @@ -566,6 +570,9 @@ extern int If_CutSopBalancePinDelaysInt( Vec_Int_t * vCover, int * p extern int If_CutSopBalancePinDelays( If_Man_t * p, If_Cut_t * pCut, char * pPerm ); extern int If_CutLutBalanceEval( If_Man_t * p, If_Cut_t * pCut ); extern int If_CutLutBalancePinDelays( If_Man_t * p, If_Cut_t * pCut, char * pPerm ); +extern int If_LutDecEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay, int fFirst ); +extern int If_LutDecReEval( If_Man_t * p, If_Cut_t * pCut ); +extern float If_LutDecPinRequired( If_Man_t * p, If_Cut_t * pCut, int i, float required ); /*=== ifDsd.c =============================================================*/ extern If_DsdMan_t * If_DsdManAlloc( int nVars, int nLutSize ); extern void If_DsdManAllocIsops( If_DsdMan_t * p, int nLutSize ); @@ -693,6 +700,8 @@ extern int If_ManCountSpecialPos( If_Man_t * p ); extern void If_CutTraverse( If_Man_t * p, If_Obj_t * pRoot, If_Cut_t * pCut, Vec_Ptr_t * vNodes ); extern void If_ObjPrint( If_Obj_t * pObj ); +extern int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ); +extern int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ); ABC_NAMESPACE_HEADER_END diff --git a/src/map/if/ifCore.c b/src/map/if/ifCore.c index c03061af2..f7fcbca66 100644 --- a/src/map/if/ifCore.c +++ b/src/map/if/ifCore.c @@ -62,6 +62,7 @@ void If_ManSetDefaultPars( If_Par_t * pPars ) pPars->fPower = 0; pPars->fCutMin = 0; pPars->fBidec = 0; + pPars->fUserLutDec = 0; pPars->fVerbose = 0; } @@ -106,6 +107,7 @@ int If_ManPerformMappingComb( If_Man_t * p ) If_Obj_t * pObj; abctime clkTotal = Abc_Clock(); int i; + //p->vVisited2 = Vec_IntAlloc( 100 ); //p->vMarks = Vec_StrStart( If_ManObjNum(p) ); @@ -121,6 +123,7 @@ int If_ManPerformMappingComb( If_Man_t * p ) { // map for delay If_ManPerformMappingRound( p, p->pPars->nCutsMax, 0, 1, 1, "Delay" ); + // map for delay second option p->pPars->fFancy = 1; If_ManResetOriginalRefs( p ); diff --git a/src/map/if/ifCut.c b/src/map/if/ifCut.c index f4f72d1c8..49850d313 100644 --- a/src/map/if/ifCut.c +++ b/src/map/if/ifCut.c @@ -604,10 +604,6 @@ static inline int If_ManSortCompare( If_Man_t * p, If_Cut_t * pC0, If_Cut_t * pC return -1; if ( pC0->nLeaves > pC1->nLeaves ) return 1; - if ( pC0->Delay < pC1->Delay - p->fEpsilon ) - return -1; - if ( pC0->Delay > pC1->Delay + p->fEpsilon ) - return 1; if ( pC0->fUseless < pC1->fUseless ) return -1; if ( pC0->fUseless > pC1->fUseless ) @@ -765,7 +761,7 @@ void If_CutSort( If_Man_t * p, If_Set_t * pCutSet, If_Cut_t * pCut ) if ( !pCut->fUseless && (p->pPars->fUseDsd || p->pPars->pFuncCell2 || p->pPars->fUseBat || - p->pPars->pLutStruct || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUserLutDec || + p->pPars->pLutStruct || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUserLutDec || p->pPars->fEnableCheck07 || p->pPars->fUseCofVars || p->pPars->fUseAndVars || p->pPars->fUse34Spec || p->pPars->fUseDsdTune || p->pPars->fEnableCheck75 || p->pPars->fEnableCheck75u || p->pPars->fUseCheck1 || p->pPars->fUseCheck2) ) { diff --git a/src/map/if/ifDelay.c b/src/map/if/ifDelay.c index cb25e767e..3514327c1 100644 --- a/src/map/if/ifDelay.c +++ b/src/map/if/ifDelay.c @@ -411,6 +411,144 @@ int If_CutLutBalanceEval( If_Man_t * p, If_Cut_t * pCut ) return DelayMax + 2; } } + +int If_LutDecEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay, int fFirst ) +{ + pCut->fUser = 1; + pCut->Cost = pCut->nLeaves > 1 ? 1 : 0; + pCut->decDelay = 0; + if ( pCut->nLeaves == 0 ) // const + { + assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 0 ); + return 0; + } + if ( pCut->nLeaves == 1 ) // variable + { + assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 1 ); + return (int)If_ObjCutBest(If_CutLeaf(p, pCut, 0))->Delay; + } + + int LutSize = p->pPars->nLutDecSize; + int i, leaf_delay; + int DelayMax = -1, nLeafMax = 0; + unsigned uLeafMask = 0; + for ( i = 0; i < If_CutLeaveNum(pCut); i++ ) + { + leaf_delay = If_ObjCutBest(If_CutLeaf(p, pCut, i))->Delay; + + if ( DelayMax < leaf_delay ) + { + DelayMax = leaf_delay; + nLeafMax = 1; + uLeafMask = (1 << i); + } + else if ( DelayMax == leaf_delay ) + { + nLeafMax++; + uLeafMask |= (1 << i); + } + } + if ( If_CutLeaveNum(pCut) <= LutSize ) + { + pCut->decDelay = ( 1 << LutSize ) - 1; + return DelayMax + 1; + } + + /* compute the decomposition */ + int use_late_arrival = 0; + unsigned cost = 1; + + if ( !fFirst ) + { + if ( optDelay ) + { + /* checks based on delay: must be better than the previous best cut */ + use_late_arrival = DelayMax + 2 >= If_ObjCutBest(pObj)->Delay; + } + else + { + /* checks based on delay: look at the required time */ + use_late_arrival = DelayMax + 2 > pObj->Required + p->fEpsilon; + } + } + + /* Too many late-arriving signals */ + if ( nLeafMax == LutSize ) + { + if ( use_late_arrival ) + { + /* unfeasible decomposition */ + pCut->Cost = IF_COST_MAX; + return ABC_INFINITY; + } + else + { + /* remove critical signals as not needed */ + uLeafMask = 0; + } + } + + /* returns the delay of the decomposition */ + word *pTruth = If_CutTruthW( p, pCut ); + int val = acd_evaluate( pTruth, pCut->nLeaves, LutSize, &uLeafMask, &cost, !use_late_arrival ); + + /* not feasible decomposition */ + pCut->decDelay = uLeafMask; + if ( val < 0 ) + { + pCut->Cost = IF_COST_MAX; + return ABC_INFINITY; + } + + pCut->Cost = cost; + + return DelayMax + val; +} + +int If_LutDecReEval( If_Man_t * p, If_Cut_t * pCut ) +{ + // pCut->fUser = 1; + + if ( pCut->nLeaves == 0 ) // const + { + assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 0 ); + return 0; + } + if ( pCut->nLeaves == 1 ) // variable + { + assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 1 ); + return (int)If_ObjCutBest(If_CutLeaf(p, pCut, 0))->Delay; + } + + // int LutSize = p->pPars->pLutStruct[0] - '0'; + int i, leaf_delay; + int DelayMax = -1; + for ( i = 0; i < If_CutLeaveNum(pCut); i++ ) + { + leaf_delay = If_ObjCutBest(If_CutLeaf(p, pCut, i))->Delay; + leaf_delay += ( ( pCut->decDelay >> i ) & 1 ) == 0 ? 2 : 1; + DelayMax = Abc_MaxInt( leaf_delay, DelayMax ); + } + + return DelayMax; +} + +float If_LutDecPinRequired( If_Man_t * p, If_Cut_t * pCut, int i, float required ) +{ + if ( pCut->nLeaves == 0 ) // const + { + assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 0 ); + return required; + } + if ( pCut->nLeaves == 1 ) // variable + { + assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 1 ); + return 0; + } + + return ( ( pCut->decDelay >> i ) & 1 ) == 0 ? 2 : 1; +} + /* int If_CutLutBalanceEval( If_Man_t * p, If_Cut_t * pCut ) { diff --git a/src/map/if/ifMap.c b/src/map/if/ifMap.c index 4a5210e92..bdd3ae439 100644 --- a/src/map/if/ifMap.c +++ b/src/map/if/ifMap.c @@ -148,32 +148,6 @@ int * If_CutArrTimeProfile( If_Man_t * p, If_Cut_t * pCut ) return p->pArrTimeProfile; } - -/**Function************************************************************* - - Synopsis [Returns the node's delay if its cut it LUT-decomposed.] - - Description [] - - SideEffects [] - - SeeAlso [] - -***********************************************************************/ -int If_CutDelayLutDec( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj ) -{ - // get the truth table - // get the cut leaves' arrival times - // run LUT-decomposition in the evaluation mode - // return expected arrival time at the output - - // this is a placeholder code, which is assume the cut has unit delay - int i, ArrTimes = 0; - for ( i = 0; i < If_CutLeaveNum(pCut); i++ ) - ArrTimes = Abc_MaxInt( ArrTimes, (int)If_ObjCutBest(If_CutLeaf(p, pCut, i))->Delay ); - return ArrTimes + 1; -} - /**Function************************************************************* Synopsis [Finds the best cut for the given node.] @@ -192,7 +166,7 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep If_Cut_t * pCut0R, * pCut1R; int fFunc0R, fFunc1R; int i, k, v, iCutDsd, fChange; - int fSave0 = p->pPars->fDelayOpt || p->pPars->fDelayOptLut || p->pPars->fDsdBalance || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUserLutDec || + int fSave0 = p->pPars->fDelayOpt || p->pPars->fDelayOptLut || p->pPars->fDsdBalance || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUserLutDec || p->pPars->fUseDsdTune || p->pPars->fUseCofVars || p->pPars->fUseAndVars || p->pPars->fUse34Spec || p->pPars->pLutStruct || p->pPars->pFuncCell2 || p->pPars->fUseCheck1 || p->pPars->fUseCheck2; int fUseAndCut = (p->pPars->nAndDelay > 0) || (p->pPars->nAndArea > 0); assert( !If_ObjIsAnd(pObj->pFanin0) || pObj->pFanin0->pCutSet->nCuts > 0 ); @@ -235,7 +209,9 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep } } else if ( p->pPars->fUserLutDec ) - pCut->Delay = If_CutDelayLutDec( p, pCut, pObj ); + { + pCut->Delay = If_LutDecReEval( p, pCut ); + } else if ( p->pPars->fDelayOptLut ) pCut->Delay = If_CutLutBalanceEval( p, pCut ); else if( p->pPars->nGateSize > 0 ) @@ -292,6 +268,8 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep if ( !If_CutMergeOrdered( p, pCut0, pCut1, pCut ) ) continue; } + if ( p->pPars->fUserLutDec && !fFirst && pCut->nLeaves > p->pPars->nLutDecSize ) + continue; if ( pObj->fSpec && pCut->nLeaves == (unsigned)p->pPars->nLutSize ) continue; p->nCutsMerged++; @@ -450,7 +428,12 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep else if ( p->pPars->fDsdBalance ) pCut->Delay = If_CutDsdBalanceEval( p, pCut, NULL ); else if ( p->pPars->fUserRecLib ) - pCut->Delay = If_CutDelayRecCost3( p, pCut, pObj ); + pCut->Delay = If_CutDelayRecCost3( p, pCut, pObj ); + else if ( p->pPars->fUserLutDec ) + { + pCut->Delay = If_LutDecEval( p, pCut, pObj, Mode == 0, fFirst ); + pCut->fUseless = pCut->Delay == ABC_INFINITY; + } else if ( p->pPars->fUserSesLib ) { int Cost = 0; @@ -464,8 +447,6 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep pCut->fUseless = 1; } } - else if ( p->pPars->fUserLutDec ) - pCut->Delay = If_CutDelayLutDec( p, pCut, pObj ); else if ( p->pPars->fDelayOptLut ) pCut->Delay = If_CutLutBalanceEval( p, pCut ); else if( p->pPars->nGateSize > 0 ) @@ -537,7 +518,7 @@ void If_ObjPerformMappingChoice( If_Man_t * p, If_Obj_t * pObj, int Mode, int fP If_Set_t * pCutSet; If_Obj_t * pTemp; If_Cut_t * pCutTemp, * pCut; - int i, fSave0 = p->pPars->fDelayOpt || p->pPars->fDelayOptLut || p->pPars->fDsdBalance || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUserLutDec || p->pPars->fUse34Spec; + int i, fSave0 = p->pPars->fDelayOpt || p->pPars->fDelayOptLut || p->pPars->fDsdBalance || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUse34Spec || p->pPars->fUserLutDec; assert( pObj->pEquiv != NULL ); // prepare diff --git a/src/map/if/ifTime.c b/src/map/if/ifTime.c index 9ceef1475..f20842384 100644 --- a/src/map/if/ifTime.c +++ b/src/map/if/ifTime.c @@ -211,6 +211,12 @@ void If_CutPropagateRequired( If_Man_t * p, If_Obj_t * pObj, If_Cut_t * pCut, fl pLeaf->Required = IF_MIN( pLeaf->Required, Required - pLutDelays[0] ); } } + else if ( p->pPars->fUserLutDec ) + { + Required = ObjRequired; + If_CutForEachLeaf( p, pCut, pLeaf, i ) + pLeaf->Required = IF_MIN( pLeaf->Required, Required - If_LutDecPinRequired( p, pCut, i, ObjRequired ) ); + } else { if ( pCut->fUser )