Performance improvements

This commit is contained in:
aletempiac 2024-02-27 17:36:24 +01:00
parent f72000f5ae
commit d3f140f1df
3 changed files with 87 additions and 7 deletions

View File

@ -366,6 +366,40 @@ private:
return multiplicity;
}
uint32_t column_multiplicity2( STT const& tt, uint32_t free_set_size )
{
assert( free_set_size <= 5 );
uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1;
uint64_t const shift = UINT64_C( 1 ) << free_set_size;
uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1;
uint32_t cofactors[4];
uint32_t size = 0;
/* extract iset functions */
for ( auto i = 0u; i < num_blocks; ++i )
{
uint64_t sub = tt._bits[i];
for ( auto j = 0; j < ( 64 >> free_set_size ); ++j )
{
uint32_t fs_fn = static_cast<uint32_t>( sub & mask );
uint32_t k;
for ( k = 0; k < size; ++k )
{
if ( fs_fn == cofactors[k] )
break;
}
if ( k == 2 )
return 3;
if ( k == size )
cofactors[size++] = fs_fn;
sub >>= shift;
}
}
return size;
}
inline bool combinations_offset_next( uint32_t k, uint32_t offset, uint32_t* pComb, uint32_t* pInvPerm, STT& tt )
{
uint32_t i;
@ -401,7 +435,7 @@ private:
STT tt = best_tt;
/* TT with best cost */
STT best_tt = tt;
STT local_best_tt = tt;
uint32_t best_cost = ( 1 << ( ps.lut_size - free_set_size ) ) + 1;
assert( free_set_size >= offset );
@ -416,6 +450,12 @@ private:
/* works up to 16 input truth tables */
assert( num_vars <= 16 );
/* Search for column multiplicity of 2 */
if ( free_set_size == ps.lut_size - 1 )
{
return enumerate_iset_combinations2( free_set_size, offset );
}
/* init combinations */
uint32_t pComb[16], pInvPerm[16], bestPerm[16];
for ( uint32_t i = 0; i < num_vars; ++i )
@ -429,7 +469,7 @@ private:
uint32_t cost = fn( tt );
if ( cost < best_cost )
{
best_tt = tt;
local_best_tt = tt;
best_cost = cost;
for ( uint32_t i = 0; i < num_vars; ++i )
{
@ -442,7 +482,7 @@ private:
if ( best_cost > ( 1 << ( ps.lut_size - free_set_size ) ) )
{
return std::make_tuple( best_tt, res_perm, UINT32_MAX );
return std::make_tuple( local_best_tt, res_perm, UINT32_MAX );
}
for ( uint32_t i = 0; i < num_vars; ++i )
@ -450,7 +490,45 @@ private:
res_perm[i] = permutations[bestPerm[i]];
}
return std::make_tuple( best_tt, res_perm, best_cost );
return std::make_tuple( local_best_tt, res_perm, best_cost );
}
inline std::tuple<STT, std::array<uint32_t, max_num_vars>, uint32_t> enumerate_iset_combinations2( uint32_t free_set_size, uint32_t offset )
{
STT tt = best_tt;
/* TT with best cost */
STT local_best_tt = tt;
uint32_t best_cost = ( 1 << ( ps.lut_size - free_set_size ) ) + 1;
assert( free_set_size >= offset );
/* init combinations */
uint32_t pComb[16], pInvPerm[16];
for ( uint32_t i = 0; i < num_vars; ++i )
{
pComb[i] = pInvPerm[i] = i;
}
/* enumerate combinations */
std::array<uint32_t, max_num_vars> res_perm;
do
{
uint32_t cost = column_multiplicity2( tt, free_set_size );
if ( cost <= 2 )
{
local_best_tt = tt;
best_cost = cost;
for ( uint32_t i = 0; i < num_vars; ++i )
{
res_perm[i] = permutations[pComb[i]];
}
return std::make_tuple( local_best_tt, res_perm, best_cost );
}
} while ( combinations_offset_next( free_set_size, offset, pComb, pInvPerm, tt ) );
return std::make_tuple( local_best_tt, res_perm, UINT32_MAX );
}
std::vector<STT> compute_isets( bool verbose = false )

View File

@ -162,6 +162,7 @@ private:
uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1;
uint64_t const shift = UINT64_C( 1 ) << free_set_size;
uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1;
uint32_t const limit = free_set_size < 5 ? 4 : 2;
uint32_t cofactors[4];
uint32_t size = 0;
@ -178,7 +179,7 @@ private:
if ( fs_fn == cofactors[k] )
break;
}
if ( k == 4 )
if ( k == limit )
return 5;
if ( k == size )
cofactors[size++] = fs_fn;

View File

@ -101,7 +101,7 @@ public:
uint32_t num_edges = support_sizes[0] + support_sizes[1] + 1 + ( shared_vars[0] < UINT32_MAX ? 1 : 0 );
if ( num_luts = 2 )
if ( num_luts == 2 )
return num_edges;
/* real value after support minimization */
@ -203,6 +203,7 @@ private:
uint32_t const num_blocks = ( n > 6 ) ? ( 1u << ( n - 6 ) ) : 1;
uint64_t const shift = UINT64_C( 1 ) << free_set_size;
uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1;
uint32_t const limit = free_set_size < 5 ? 4 : 2;
uint32_t cofactors[4];
uint32_t size = 0;
@ -219,7 +220,7 @@ private:
if ( fs_fn == cofactors[k] )
break;
}
if ( k == 4 )
if ( k == limit )
return 5;
if ( k == size )
cofactors[size++] = fs_fn;