diff --git a/Makefile b/Makefile index 3976cf7b1..d770c3faf 100644 --- a/Makefile +++ b/Makefile @@ -17,6 +17,7 @@ OS := $(shell uname -s) MODULES := \ $(wildcard src/ext*) \ + src/acd \ src/base/abc src/base/abci src/base/cmd src/base/io src/base/main src/base/exor \ src/base/ver src/base/wlc src/base/wln src/base/acb src/base/bac src/base/cba src/base/pla src/base/test \ src/map/mapper src/map/mio src/map/super src/map/if \ @@ -56,7 +57,7 @@ ARCHFLAGS := $(ARCHFLAGS) OPTFLAGS ?= -g -O -CFLAGS += -Wall -Wno-unused-function -Wno-write-strings -Wno-sign-compare $(ARCHFLAGS) +CFLAGS += -std=c17 -Wall -Wno-unused-function -Wno-write-strings -Wno-sign-compare $(ARCHFLAGS) ifneq ($(findstring arm,$(shell uname -m)),) CFLAGS += -DABC_MEMALIGN=4 endif @@ -151,7 +152,7 @@ ifdef ABC_USE_LIBSTDCXX endif $(info $(MSG_PREFIX)Using CFLAGS=$(CFLAGS)) -CXXFLAGS += $(CFLAGS) +CXXFLAGS += $(CFLAGS) -std=c++17 SRC := GARBAGE := core core.* *.stackdump ./tags $(PROG) arch_flags diff --git a/src/acd/ac_decomposition.hpp b/src/acd/ac_decomposition.hpp new file mode 100644 index 000000000..4f94bcba4 --- /dev/null +++ b/src/acd/ac_decomposition.hpp @@ -0,0 +1,1500 @@ +/* mockturtle: C++ logic network library + * Copyright (C) 2018-2023 EPFL + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/*! + \file ac_decomposition.hpp + \brief Ashenhurst-Curtis decomposition + + \author Alessandro Tempia Calvino +*/ + +#ifndef _ACD_H_ +#define _ACD_H_ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "kitty_constants.hpp" +#include "kitty_constructors.hpp" +#include "kitty_static_tt.hpp" +#include "kitty_dynamic_tt.hpp" +#include "kitty_operations.hpp" +#include "kitty_operators.hpp" + +namespace mockturtle +{ + +/*! \brief Parameters for ac_decomposition */ +struct ac_decomposition_params +{ + /*! \brief LUT size for decomposition. */ + uint32_t lut_size{ 6 }; + + /*! \brief Maximum number of iterations for covering. */ + uint32_t max_iter{ 5000 }; +}; + +/*! \brief Statistics for ac_decomposition */ +struct ac_decomposition_stats +{ + uint32_t num_luts{ 0 }; + uint32_t num_edges{ 0 }; + uint32_t num_levels{ 0 }; +}; + +struct ac_decomposition_result +{ + kitty::dynamic_truth_table tt; + std::vector support; +}; + +template +class ac_decomposition_impl +{ +private: + struct encoding_matrix + { + uint64_t column{ 0 }; + uint32_t cost{ 0 }; + uint32_t index{ 0 }; + uint32_t sort_cost{ 0 }; + }; + +private: + static constexpr uint32_t max_num_vars = 8; + using STT = kitty::static_truth_table; + +public: + explicit ac_decomposition_impl( TT const& tt, uint32_t num_vars, ac_decomposition_params const& ps, ac_decomposition_stats* pst = nullptr ) + : num_vars( num_vars ), ps( ps ), pst( pst ), permutations( num_vars ) + { + tt_start = tt; + std::iota( permutations.begin(), permutations.end(), 0 ); + } + + /*! \brief Runs ACD using late arriving variables */ + int run( unsigned delay_profile ) + { + /* truth table is too large for the settings */ + if ( num_vars > max_num_vars ) + { + return -1; + } + + uint32_t late_arriving = __builtin_popcount( delay_profile ); + + /* return a high cost if too many late arriving variables */ + if ( late_arriving > ps.lut_size / 2 || late_arriving > 3 ) + { + return -1; + } + + /* convert to static TT */ + best_tt = kitty::extend_to( tt_start ); + best_multiplicity = UINT32_MAX; + uint32_t best_cost = UINT32_MAX; + + /* permute late arriving variables to be the least significant */ + reposition_late_arriving_variables( delay_profile, late_arriving ); + + /* run ACD trying different bound sets and free sets */ + uint32_t free_set_size = late_arriving; + uint32_t offset = std::max( static_cast( late_arriving ), 1u ); + for ( uint32_t i = offset; i <= ps.lut_size / 2 && i <= 3; ++i ) + { + auto evaluate_fn = [&]( STT const& tt ) { return column_multiplicity( tt, i ); }; + auto [tt_p, perm, cost] = enumerate_iset_combinations_offset( i, offset, evaluate_fn, false ); + + /* add cost if not support reducing */ + uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0; + /* check for feasible solution that improves the cost */ + if ( cost <= ( 1 << ( ps.lut_size - i ) ) && cost + additional_cost < best_cost ) + { + best_tt = tt_p; + permutations = perm; + best_multiplicity = cost; + best_cost = cost + additional_cost; + free_set_size = i; + } + } + + if ( best_multiplicity == UINT32_MAX ) + return -1; + + /* compute isets */ + // std::vector isets = compute_isets( free_set_size ); + + // generate_support_minimization_encodings(); + // solve_min_support_exact( isets, free_set_size ); + + /* unfeasible decomposition */ + // if ( best_bound_sets.empty() ) + // { + // return -1; + // } + + pst->num_luts = ps.lut_size - free_set_size; + best_free_set = free_set_size; + + /* TODO generate decomposition only when returning the result */ + // dec_result = generate_decomposition( free_set_size ); + + /* TODO: change return value */ + return 0; + } + + int compute_decomposition() + { + if ( best_multiplicity == UINT32_MAX ) + return -1; + + /* compute isets */ + std::vector isets = compute_isets( best_free_set ); + + generate_support_minimization_encodings(); + solve_min_support_exact( isets, best_free_set ); + + /* unfeasible decomposition */ + if ( best_bound_sets.empty() ) + { + return -1; + } + + return 0; + } + + unsigned get_profile() + { + unsigned profile = 0; + + if ( best_free_set > num_vars ) + return -1; + + for ( uint32_t i = 0; i < best_free_set; ++i ) + { + profile |= 1 << permutations[i]; + } + + return profile; + } + + std::vector get_result() + { + return dec_result; + } + + void get_decomposition( unsigned char *decompArray ) + { + if ( best_free_set > num_vars ) + return; + + dec_result = generate_decomposition( best_free_set ); + return get_decomposition_abc( decompArray ); + } + +private: + uint32_t column_multiplicity( STT tt, uint32_t free_set_size ) + { + uint64_t multiplicity_set[4] = { 0u, 0u, 0u, 0u }; + uint32_t multiplicity = 0; + uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + + /* supports up to 64 values of free set (256 for |FS| == 3)*/ + assert( free_set_size <= 3 ); + + /* extract iset functions */ + if ( free_set_size == 1 ) + { + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < 32; ++j ) + { + multiplicity_set[0] |= UINT64_C( 1 ) << ( *it & 0x3 ); + *it >>= 2; + } + ++it; + } + } + else if ( free_set_size == 2 ) + { + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < 16; ++j ) + { + multiplicity_set[0] |= UINT64_C( 1 ) << ( *it & 0xF ); + *it >>= 4; + } + ++it; + } + } + else /* free set size 3 */ + { + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < 8; ++j ) + { + multiplicity_set[( *it >> 6 ) & 0x3] |= UINT64_C( 1 ) << ( *it & 0x3F ); + *it >>= 8; + } + ++it; + } + } + + multiplicity = __builtin_popcountl( multiplicity_set[0] ); + + if ( free_set_size == 3 ) + { + multiplicity += __builtin_popcountl( multiplicity_set[1] ); + multiplicity += __builtin_popcountl( multiplicity_set[2] ); + multiplicity += __builtin_popcountl( multiplicity_set[3] ); + } + + return multiplicity; + } + + template + std::tuple, uint32_t> enumerate_iset_combinations( uint32_t free_set_size, Fn&& fn, bool verbose = false ) + { + /* works up to 16 input truth tables */ + assert( num_vars <= 16 ); + + /* special case */ + STT tt = best_tt; + if ( num_vars <= free_set_size || free_set_size == 0 ) + { + return { tt, permutations, UINT32_MAX }; + } + + /* select k */ + // free_set_size = std::min( free_set_size, num_vars - free_set_size ); + + /* init permutation array */ + std::array perm, best_perm; + std::copy( permutations.begin(), permutations.begin() + num_vars, perm.begin() ); + best_perm = perm; + + /* TT with best cost */ + STT best = tt; + uint32_t best_cost = UINT32_MAX; + + /* enumerate combinations */ + if ( free_set_size == 1 ) + { + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size ); + } + + for ( uint32_t i = 1; i < num_vars; ++i ) + { + std::swap( perm[0], perm[i] ); + kitty::swap_inplace( tt, 0, i ); + + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size ); + } + } + } + else if ( free_set_size == 2 ) + { + for ( uint32_t i = 0; i < num_vars - 1; ++i ) + { + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size ); + } + + for ( uint32_t j = 2; j < num_vars - i; ++j ) + { + std::swap( perm[1], perm[j] ); + kitty::swap_inplace( tt, 1, j ); + + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size ); + } + } + + std::swap( perm[0], perm[num_vars - i - 1] ); + kitty::swap_inplace( tt, 0, num_vars - i - 1 ); + } + } + else if ( free_set_size == 3 ) + { + for ( uint32_t i = 0; i < num_vars - 2; ++i ) + { + for ( uint32_t j = i; j < num_vars - 2; ++j ) + { + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size ); + } + + for ( uint32_t k = 3; k < num_vars - j; ++k ) + { + std::swap( perm[2], perm[k] ); + kitty::swap_inplace( tt, 2, k ); + + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size ); + } + } + + std::swap( perm[1], perm[num_vars - j - 1] ); + kitty::swap_inplace( tt, 1, num_vars - j - 1 ); + } + + std::swap( perm[0], perm[num_vars - i - 1] ); + kitty::swap_inplace( tt, 0, num_vars - i - 1 ); + } + } + + std::vector res_perm( num_vars ); + std::copy( best_perm.begin(), best_perm.begin() + num_vars, res_perm.begin() ); + + return std::make_tuple( best, res_perm, best_cost ); + } + + template + std::tuple, uint32_t> enumerate_iset_combinations_offset( uint32_t free_set_size, uint32_t offset, Fn&& fn, bool verbose = false ) + { + STT tt = best_tt; + + /* TT with best cost */ + STT best_tt = tt; + uint32_t best_cost = UINT32_MAX; + + /* works up to 16 input truth tables */ + assert( num_vars <= 16 ); + + /* select k */ + free_set_size = std::min( free_set_size, num_vars - free_set_size ); + + /* special case */ + if ( num_vars <= free_set_size || free_set_size <= offset ) + { + if ( offset == free_set_size ) + { + best_cost = fn( tt ); + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << best_cost << " "; + print_perm( permutations.begin(), free_set_size ); + } + + return { tt, permutations, best_cost }; + } + else + { + return { tt, permutations, UINT32_MAX }; + } + } + + /* decrease combinations */ + free_set_size -= offset; + + /* init permutation array */ + std::array perm, best_perm; + std::copy( permutations.begin(), permutations.begin() + num_vars, perm.begin() ); + best_perm = perm; + + /* enumerate combinations */ + if ( free_set_size == 1 ) + { + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best_tt = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size + offset ); + } + + for ( uint32_t i = offset + 1; i < num_vars; ++i ) + { + std::swap( perm[offset], perm[i] ); + kitty::swap_inplace( tt, offset, i ); + + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best_tt = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size + offset ); + } + } + } + else if ( free_set_size == 2 ) + { + for ( uint32_t i = 0; i < num_vars - 1 - offset; ++i ) + { + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best_tt = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size + offset ); + } + + for ( uint32_t j = offset + 2; j < num_vars - i; ++j ) + { + std::swap( perm[offset + 1], perm[j] ); + kitty::swap_inplace( tt, offset + 1, j ); + + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best_tt = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size + offset ); + } + } + + std::swap( perm[offset], perm[num_vars - i - 1] ); + kitty::swap_inplace( tt, offset, num_vars - i - 1 ); + } + } + else if ( free_set_size == 3 ) + { + for ( uint32_t i = 0; i < num_vars - 2 - offset; ++i ) + { + for ( uint32_t j = i; j < num_vars - 2 - offset; ++j ) + { + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best_tt = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size + offset ); + } + + for ( uint32_t k = offset + 3; k < num_vars - j; ++k ) + { + std::swap( perm[offset + 2], perm[k] ); + kitty::swap_inplace( tt, offset + 2, k ); + + uint32_t cost = fn( tt ); + if ( cost < best_cost ) + { + best_tt = tt; + best_cost = cost; + best_perm = perm; + } + + if ( verbose ) + { + kitty::print_hex( tt ); + std::cout << " " << cost << " "; + print_perm( perm.begin(), free_set_size + offset ); + } + } + + std::swap( perm[offset + 1], perm[num_vars - j - 1] ); + kitty::swap_inplace( tt, offset + 1, num_vars - j - 1 ); + } + + std::swap( perm[offset], perm[num_vars - i - 1] ); + kitty::swap_inplace( tt, offset, num_vars - i - 1 ); + } + } + + std::vector res_perm( num_vars ); + std::copy( best_perm.begin(), best_perm.begin() + num_vars, res_perm.begin() ); + + return std::make_tuple( best_tt, res_perm, best_cost ); + } + + std::vector compute_isets( uint32_t free_set_size, bool verbose = false ) + { + /* construct isets involved in multiplicity */ + uint32_t isets_support = num_vars - free_set_size; + std::vector isets( best_multiplicity ); + + /* construct isets */ + std::unordered_map column_to_iset; + STT tt = best_tt; + uint32_t offset = 0; + uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + + if ( free_set_size == 1 ) + { + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < 32; ++j ) + { + uint64_t val = *it & 0x3; + + if ( auto el = column_to_iset.find( val ); el != column_to_iset.end() ) + { + isets[el->second]._bits[i / 2] |= UINT64_C( 1 ) << ( j + offset ); + } + else + { + isets[column_to_iset.size()]._bits[i / 2] |= UINT64_C( 1 ) << ( j + offset ); + column_to_iset[val] = column_to_iset.size(); + } + + *it >>= 2; + } + + offset ^= 32; + ++it; + } + } + else if ( free_set_size == 2 ) + { + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < 16; ++j ) + { + uint64_t val = *it & 0xF; + + if ( auto el = column_to_iset.find( val ); el != column_to_iset.end() ) + { + isets[el->second]._bits[i / 4] |= UINT64_C( 1 ) << ( j + offset ); + } + else + { + isets[column_to_iset.size()]._bits[i / 4] |= UINT64_C( 1 ) << ( j + offset ); + column_to_iset[val] = column_to_iset.size(); + } + + *it >>= 4; + } + + offset = ( offset + 16 ) % 64; + ++it; + } + } + else /* free set size 3 */ + { + auto it = std::begin( tt ); + for ( auto i = 0u; i < num_blocks; ++i ) + { + for ( auto j = 0; j < 8; ++j ) + { + uint64_t val = *it & 0xFF; + + if ( auto el = column_to_iset.find( val ); el != column_to_iset.end() ) + { + isets[el->second]._bits[i / 8] |= UINT64_C( 1 ) << ( j + offset ); + } + else + { + isets[column_to_iset.size()]._bits[i / 8] |= UINT64_C( 1 ) << ( j + offset ); + column_to_iset[val] = column_to_iset.size(); + } + + *it >>= 8; + } + + offset = ( offset + 8 ) % 64; + ++it; + } + } + + /* extend isets to cover the whole truth table */ + for ( STT& iset : isets ) + { + local_extend_to( iset, isets_support ); + } + + /* save free_set functions */ + std::vector free_set_tts( best_multiplicity ); + + /* TODO: possible conflict */ + for ( auto const& pair : column_to_iset ) + { + free_set_tts[pair.second]._bits[0] = pair.first; + local_extend_to( free_set_tts[pair.second], free_set_size ); + } + + /* print isets and free set*/ + if ( verbose ) + { + std::cout << "iSets\n"; + uint32_t i = 0; + for ( auto iset : isets ) + { + kitty::print_hex( iset ); + std::cout << " of func "; + kitty::print_hex( free_set_tts[i++] ); + std::cout << "\n"; + } + } + + best_free_set_tts = std::move( free_set_tts ); + + return isets; + } + + std::vector generate_decomposition( uint32_t free_set_size ) + { + std::vector res; + + for ( uint32_t i = 0; i < best_bound_sets.size(); ++i ) + { + ac_decomposition_result dec; + auto tt = best_bound_sets[i]; + auto care = best_care_sets[i]; + + /* compute and minimize support for bound set variables */ + uint32_t k = 0; + for ( uint32_t j = 0; j < num_vars - free_set_size; ++j ) + { + if ( !kitty::has_var( tt, j ) ) + continue; + + if ( !kitty::has_var( tt, care, j ) ) + { + /* fix truth table */ + adjust_truth_table_on_dc( tt, care, j ); + continue; + } + + if ( k < j ) + { + kitty::swap_inplace( tt, k, j ); + kitty::swap_inplace( care, k, j ); + } + dec.support.push_back( permutations[free_set_size + j] ); + ++k; + } + + dec.tt = kitty::shrink_to( tt, dec.support.size() ); + res.push_back( dec ); + } + + /* compute the decomposition for the top-level LUT */ + compute_top_lut_decomposition( res, free_set_size ); + + return res; + } + + void compute_top_lut_decomposition( std::vector& res, uint32_t free_set_size ) + { + uint32_t top_vars = best_bound_sets.size() + free_set_size; + assert( top_vars <= ps.lut_size ); + + /* extend bound set functions with free_set_size LSB vars */ + kitty::dynamic_truth_table tt( top_vars ); + + /* compute support */ + res.emplace_back(); + for ( uint32_t i = 0; i < free_set_size; ++i ) + { + res.back().support.push_back( permutations[i] ); + } + + /* create functions for bound set */ + std::vector bound_set_vars; + auto res_it = res.begin(); + uint32_t offset = 0; + for ( uint32_t i = 0; i < best_bound_sets.size(); ++i ) + { + bound_set_vars.emplace_back( top_vars ); + kitty::create_nth_var( bound_set_vars[i], free_set_size + i ); + + /* add bound-set variables to the support, remove buffers */ + if ( res_it->support.size() == 1 ) + { + res.back().support.push_back( res_it->support.front() ); + /* it is a NOT */ + if ( ( res_it->tt._bits[0] & 1 ) == 1 ) + { + bound_set_vars[i] = ~bound_set_vars[i]; + } + res.erase( res_it ); + ++offset; + } + else + { + res.back().support.push_back( num_vars + i - offset ); + ++res_it; + } + } + + /* create composition function */ + for ( uint32_t i = 0; i < best_free_set_tts.size(); ++i ) + { + kitty::dynamic_truth_table free_set_tt = kitty::shrink_to( best_free_set_tts[i], top_vars ); + + /* find MUX assignments */ + for ( uint32_t j = 0; j < bound_set_vars.size(); ++j ) + { + /* AND with ONSET or OFFSET */ + if ( ( ( best_iset_onset[j] >> i ) & 1 ) ) + { + free_set_tt &= bound_set_vars[j]; + } + else if ( ( ( best_iset_offset[j] >> i ) & 1 ) ) + { + free_set_tt &= ~bound_set_vars[j]; + } + } + + tt |= free_set_tt; + } + + /* add top-level LUT to result */ + res.back().tt = tt; + } + + inline void reposition_late_arriving_variables( unsigned delay_profile, uint32_t late_arriving ) + { + uint32_t k = 0; + for ( uint32_t i = 0; i < late_arriving; ++i ) + { + while ( ( ( delay_profile >> k ) & 1 ) == 0 ) + ++k; + + if ( permutations[i] == k ) + { + ++k; + continue; + } + + std::swap( permutations[i], permutations[k] ); + kitty::swap_inplace( best_tt, i, k ); + ++k; + } + } + + template + void print_perm( Iterator begin, uint32_t free_set ) + { + std::cout << "["; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + if ( i == free_set ) + { + std::cout << ", "; + } + std::cout << *begin << " "; + ++begin; + } + std::cout << "]\n"; + } + + void generate_support_minimization_encodings() + { + uint32_t count = 0; + uint32_t num_combs_exact[4] = { 2, 6, 70, 12870 }; + + /* enable don't cares only if not a power of 2 */ + uint32_t num_combs = 3; + if ( __builtin_popcount( best_multiplicity ) == 1 ) + { + for ( uint32_t i = 0; i < 4; ++i ) + { + if ( ( best_multiplicity >> i ) == 2u ) + { + num_combs = num_combs_exact[i]; + } + } + support_minimization_encodings = std::vector>( num_combs ); + generate_support_minimization_encodings_rec( 0, 0, 0, count ); + } + else + { + for ( uint32_t i = 1; i < best_multiplicity; ++i ) + { + num_combs = ( num_combs << 1 ) + num_combs; + } + support_minimization_encodings = std::vector>( num_combs ); + generate_support_minimization_encodings_rec( 0, 0, 0, count ); + } + + assert( count == num_combs ); + + /* print combinations */ + // std::cout << "{ "; + // for ( auto const& entry : support_minimization_encodings ) + // { + // std::cout << "{ " << entry[0] << ", " << entry[1] << " }, "; + // } + // std::cout << "}\n"; + } + + template + void generate_support_minimization_encodings_rec( uint64_t onset, uint64_t offset, uint32_t var, uint32_t& count ) + { + if ( var == best_multiplicity ) + { + if constexpr ( !enable_dcset ) + { + /* sets must be equally populated */ + if ( __builtin_popcountl( onset ) != __builtin_popcountl( offset ) ) + { + return; + } + } + + support_minimization_encodings[count][0] = onset; + support_minimization_encodings[count][1] = offset; + ++count; + return; + } + + /* move var in DCSET */ + if constexpr ( enable_dcset ) + { + generate_support_minimization_encodings_rec( onset, offset, var + 1, count ); + } + + /* move var in ONSET */ + onset |= 1 << var; + generate_support_minimization_encodings_rec( onset, offset, var + 1, count ); + onset &= ~( 1 << var ); + + /* move var in OFFSET */ + offset |= 1 << var; + generate_support_minimization_encodings_rec( onset, offset, var + 1, count ); + offset &= ~( 1 << var ); + } + + void solve_min_support_exact( std::vector const& isets, uint32_t free_set_size ) + { + std::vector matrix; + matrix.reserve( support_minimization_encodings.size() ); + best_bound_sets.clear(); + + /* create covering matrix */ + if ( !create_covering_matrix( isets, matrix, free_set_size, best_multiplicity > 4 ) ) + { + return; + } + + /* solve the covering problem */ + std::array solution = covering_solve_exact( matrix, 100, ps.max_iter ); + + /* check for failed decomposition */ + if ( solution[0] == UINT32_MAX ) + { + return; + } + + /* compute best bound sets */ + uint32_t num_luts = 1 + solution[4]; + uint32_t num_levels = 2; + uint32_t num_edges = free_set_size + solution[4]; + uint32_t isets_support = num_vars - free_set_size; + best_care_sets.clear(); + best_iset_onset.clear(); + best_iset_offset.clear(); + for ( uint32_t i = 0; i < solution[4]; ++i ) + { + STT tt; + STT care; + + const uint32_t onset = support_minimization_encodings[matrix[solution[i]].index][0]; + const uint32_t offset = support_minimization_encodings[matrix[solution[i]].index][1]; + for ( uint32_t j = 0; j < best_multiplicity; ++j ) + { + if ( ( ( onset >> j ) & 1 ) ) + { + tt |= isets[j]; + } + if ( ( ( offset >> j ) & 1 ) ) + { + care |= isets[j]; + } + } + + care |= tt; + num_edges += matrix[solution[i]].cost & ( ( 1 << isets_support ) - 1 ); + + best_bound_sets.push_back( tt ); + best_care_sets.push_back( care ); + best_iset_onset.push_back( onset ); + best_iset_offset.push_back( offset ); + } + + if ( pst != nullptr ) + { + pst->num_luts = num_luts; + pst->num_levels = num_levels; + pst->num_edges = num_edges; + } + } + + bool create_covering_matrix( std::vector const& isets, std::vector& matrix, uint32_t free_set_size, bool sort ) + { + assert( best_multiplicity < 12 ); + uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2; + uint64_t sol_existance = 0; + uint32_t iset_support = num_vars - free_set_size; + + /* insert dichotomies */ + for ( uint32_t i = 0; i < support_minimization_encodings.size(); ++i ) + { + uint32_t const onset = support_minimization_encodings[i][0]; + uint32_t const offset = support_minimization_encodings[i][1]; + + uint32_t ones_onset = __builtin_popcount( onset ); + uint32_t ones_offset = __builtin_popcount( offset ); + + /* filter columns that do not distinguish pairs */ + if ( ones_onset == 0 || ones_offset == 0 || ones_onset == best_multiplicity || ones_offset == best_multiplicity ) + { + continue; + } + + /* compute function and distinguishable seed dichotomies */ + uint64_t column = 0; + STT tt; + STT care; + uint32_t pair_pointer = 0; + for ( uint32_t j = 0; j < best_multiplicity; ++j ) + { + auto onset_shift = ( onset >> j ); + auto offset_shift = ( offset >> j ); + if ( ( onset_shift & 1 ) ) + { + tt |= isets[j]; + } + + if ( ( offset_shift & 1 ) ) + { + care |= isets[j]; + } + + /* compute included seed dichotomies */ + for ( uint32_t k = j + 1; k < best_multiplicity; ++k ) + { + /* if is are in diffent sets */ + if ( ( ( ( onset_shift & ( offset >> k ) ) | ( ( onset >> k ) & offset_shift ) ) & 1 ) ) + { + column |= UINT64_C( 1 ) << ( pair_pointer ); + } + + ++pair_pointer; + } + } + + care |= tt; + + /* compute cost */ + uint32_t cost = 0; + for ( uint32_t j = 0; j < iset_support; ++j ) + { + cost += has_var_support( tt, care, iset_support, j ) ? 1 : 0; + } + + /* discard solutions with support over LUT size */ + if ( cost > ps.lut_size ) + continue; + + if ( cost > 1 ) + { + cost |= 1 << iset_support; + } + + uint32_t sort_cost = cost + ( ( combinations - __builtin_popcountl( column ) ) << num_vars ); + + /* insert */ + matrix.emplace_back( encoding_matrix{ column, cost, i, sort_cost } ); + + sol_existance |= column; + } + + /* necessary condition for the existance of a solution */ + if ( __builtin_popcountl( sol_existance ) != combinations ) + { + return false; + } + + if ( !sort ) + { + return true; + } + + std::sort( matrix.begin(), matrix.end(), [&]( auto const& a, auto const& b ) { + return a.sort_cost < b.sort_cost; + } ); + + /* print */ + // if ( best_multiplicity < 6 ) + // { + // for ( uint32_t i = 0; i < columns.size(); ++i ) + // { + // std::cout << indexes[i] << " " << costs[i] << " \t" << columns[i] << "\n"; + // } + // } + + return true; + } + + template + std::array covering_solve_exact( std::vector& matrix, uint32_t max_iter = 100, int32_t limit = 2000 ) + { + /* last value of res contains the size of the bound set */ + std::array res = { UINT32_MAX }; + uint32_t best_cost = UINT32_MAX; + uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2; + bool looping = true; + + assert( best_multiplicity <= 16 ); + + /* determine the number of needed loops*/ + if ( best_multiplicity <= 4 ) + { + res[4] = 2; + for ( uint32_t i = 0; i < matrix.size() - 1; ++i ) + { + for ( uint32_t j = 1; j < matrix.size(); ++j ) + { + /* filter by cost */ + if ( matrix[i].cost + matrix[j].cost >= best_cost ) + continue; + + /* check validity */ + if ( __builtin_popcountl( matrix[i].column | matrix[j].column ) == combinations ) + { + res[0] = i; + res[1] = j; + best_cost = matrix[i].cost + matrix[j].cost; + } + } + } + } + else if ( best_multiplicity <= 8 ) + { + res[4] = 3; + for ( uint32_t i = 0; i < matrix.size() - 2 && looping; ++i ) + { + /* limit */ + if constexpr ( limit_iter ) + { + if ( limit <= 0 || ( best_cost < UINT32_MAX && max_iter == 0 ) ) + { + looping = false; + } + } + + for ( uint32_t j = 1; j < matrix.size() - 1 && looping; ++j ) + { + uint64_t current_columns = matrix[i].column | matrix[j].column; + uint32_t current_cost = matrix[i].cost + matrix[j].cost; + + /* limit */ + if constexpr ( limit_iter ) + { + if ( limit <= 0 || ( best_cost < UINT32_MAX && max_iter == 0 ) ) + { + looping = false; + } + } + + /* bound */ + if ( current_cost >= best_cost ) + { + continue; + } + + for ( uint32_t k = 2; k < matrix.size() && looping; ++k ) + { + /* limit */ + if constexpr ( limit_iter ) + { + if ( limit-- <= 0 || ( best_cost < UINT32_MAX && max_iter-- == 0 ) ) + { + looping = false; + } + } + + /* filter by cost */ + if ( current_cost + matrix[k].cost >= best_cost ) + continue; + + /* check validity */ + if ( __builtin_popcountl( current_columns | matrix[k].column ) == combinations ) + { + res[0] = i; + res[1] = j; + res[2] = k; + best_cost = current_cost + matrix[k].cost; + } + } + } + } + } + else + { + res[4] = 4; + for ( uint32_t i = 0; i < matrix.size() - 3 && looping; ++i ) + { + /* limit */ + if constexpr ( limit_iter ) + { + if ( limit <= 0 || ( best_cost < UINT32_MAX && max_iter == 0 ) ) + { + looping = false; + } + } + + for ( uint32_t j = 1; j < matrix.size() - 2 && looping; ++j ) + { + uint64_t current_columns0 = matrix[i].column | matrix[j].column; + uint32_t current_cost0 = matrix[i].cost + matrix[j].cost; + + /* limit */ + if constexpr ( limit_iter ) + { + if ( limit <= 0 || ( best_cost < UINT32_MAX && max_iter == 0 ) ) + { + looping = false; + } + } + + /* bound */ + if ( current_cost0 >= best_cost ) + { + continue; + } + + for ( uint32_t k = 2; k < matrix.size() - 1 && looping; ++k ) + { + uint64_t current_columns1 = current_columns0 | matrix[k].column; + uint32_t current_cost1 = current_cost0 + matrix[k].cost; + + /* limit */ + if constexpr ( limit_iter ) + { + if ( limit <= 0 || ( best_cost < UINT32_MAX && max_iter == 0 ) ) + { + looping = false; + } + } + + /* bound */ + if ( current_cost1 >= best_cost ) + { + continue; + } + + for ( uint32_t t = 3; t < matrix.size() && looping; ++t ) + { + /* limit */ + if constexpr ( limit_iter ) + { + if ( limit-- <= 0 || ( best_cost < UINT32_MAX && max_iter-- == 0 ) ) + { + looping = false; + } + } + + /* filter by cost */ + if ( current_cost1 + matrix[t].cost >= best_cost ) + continue; + + /* check validity */ + if ( __builtin_popcountl( current_columns1 | matrix[t].column ) == combinations ) + { + res[0] = i; + res[1] = j; + res[2] = k; + res[3] = t; + best_cost = current_cost1 + matrix[t].cost; + } + } + } + } + } + } + + return res; + } + + void adjust_truth_table_on_dc( STT& tt, STT& care, uint32_t var_index ) + { + assert( var_index < tt.num_vars() ); + assert( tt.num_vars() == care.num_vars() ); + + if ( tt.num_vars() <= 6 || var_index < 6 ) + { + auto it_tt = std::begin( tt._bits ); + auto it_care = std::begin( care._bits ); + while ( it_tt != std::end( tt._bits ) ) + { + uint64_t new_bits = *it_tt & *it_care; + *it_tt = ( ( new_bits | ( new_bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index] ) | + ( ( new_bits | ( new_bits << ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections[var_index] ); + *it_care = *it_care | ( *it_care >> ( uint64_t( 1 ) << var_index ) ); + + ++it_tt; + ++it_care; + } + return; + } + + const auto step = 1 << ( var_index - 6 ); + for ( auto i = 0u; i < static_cast( tt.num_blocks() ); i += 2 * step ) + { + for ( auto j = 0; j < step; ++j ) + { + tt._bits[i + j] = ( tt._bits[i + j] & care._bits[i + j] ) | ( tt._bits[i + j + step] & care._bits[i + j + step] ); + tt._bits[i + j + step] = tt._bits[i + j]; + care._bits[i + j] = care._bits[i + j] | care._bits[i + j + step]; + care._bits[i + j + step] = care._bits[i + j]; + } + } + } + + void local_extend_to( STT& tt, uint32_t real_num_vars ) + { + if ( real_num_vars < 6 ) + { + auto mask = *tt.begin(); + + for ( auto i = real_num_vars; i < num_vars; ++i ) + { + mask |= ( mask << ( 1 << i ) ); + } + + std::fill( tt.begin(), tt.end(), mask ); + } + else + { + uint32_t num_blocks = ( 1u << ( real_num_vars - 6 ) ); + auto it = tt.begin(); + while ( it != tt.end() ) + { + it = std::copy( tt.cbegin(), tt.cbegin() + num_blocks, it ); + } + } + } + + bool has_var_support( const STT& tt, const STT& care, uint32_t real_num_vars, uint8_t var_index ) + { + assert( var_index < real_num_vars ); + assert( real_num_vars <= tt.num_vars() ); + assert( tt.num_vars() == care.num_vars() ); + + const uint32_t num_blocks = real_num_vars <= 6 ? 1 : ( 1 << ( real_num_vars - 6 ) ); + if ( real_num_vars <= 6 || var_index < 6 ) + { + auto it_tt = std::begin( tt._bits ); + auto it_care = std::begin( care._bits ); + while ( it_tt != std::begin( tt._bits ) + num_blocks ) + { + if ( ( ( ( *it_tt >> ( uint64_t( 1 ) << var_index ) ) ^ *it_tt ) & kitty::detail::projections_neg[var_index] + & ( *it_care >> ( uint64_t( 1 ) << var_index ) ) & *it_care ) != 0 ) + { + return true; + } + ++it_tt; + ++it_care; + } + + return false; + } + + const auto step = 1 << ( var_index - 6 ); + for ( auto i = 0u; i < num_blocks; i += 2 * step ) + { + for ( auto j = 0; j < step; ++j ) + { + if ( ( ( tt._bits[i + j] ^ tt._bits[i + j + step] ) & care._bits[i + j] & care._bits[i + j + step] ) != 0 ) + { + return true; + } + } + } + + return false; + } + + void get_decomposition_abc( unsigned char *decompArray ) + { + unsigned char *pArray = decompArray; + unsigned char bytes = 2; + + /* write number of LUTs */ + pArray++; + *pArray = dec_result.size(); + pArray++; + + /* write LUTs */ + for ( ac_decomposition_result const& lut : dec_result ) + { + /* write fanin size*/ + *pArray = lut.support.size(); + pArray++; ++bytes; + + /* write support */ + for ( uint32_t i : lut.support ) + { + *pArray = (unsigned char) i; + pArray++; ++bytes; + } + + /* write truth table */ + uint32_t tt_num_bytes = ( lut.tt.num_vars() <= 3 ) ? 1 : ( 1 << ( lut.tt.num_vars() - 3 ) ); + tt_num_bytes = std::min( tt_num_bytes, 8u ); + for ( uint32_t i = 0; i < lut.tt.num_blocks(); ++i ) + { + for ( uint32_t j = 0; j < tt_num_bytes; ++j ) + { + *pArray = (unsigned char) ( ( lut.tt._bits[i] >> ( 8 * j ) ) & 0xFF ); + pArray++; ++bytes; + } + } + } + + /* write numBytes */ + *decompArray = bytes; + } + +private: + uint32_t best_multiplicity{ UINT32_MAX }; + uint32_t best_free_set{ UINT32_MAX }; + STT best_tt; + std::vector best_bound_sets; + std::vector best_care_sets; + std::vector best_free_set_tts; + std::vector best_iset_onset; + std::vector best_iset_offset; + std::vector dec_result; + + std::vector> support_minimization_encodings; + + TT tt_start; + uint32_t num_vars; + ac_decomposition_params const& ps; + ac_decomposition_stats* pst; + std::vector permutations; +}; + +} // namespace mockturtle + +#endif // _ACD_H_ \ No newline at end of file diff --git a/src/acd/ac_wrapper.cpp b/src/acd/ac_wrapper.cpp new file mode 100644 index 000000000..b7cee0dd7 --- /dev/null +++ b/src/acd/ac_wrapper.cpp @@ -0,0 +1,69 @@ +// #include "base/main/main.h" +#include "ac_wrapper.h" +#include "ac_decomposition.hpp" + +// ABC_NAMESPACE_IMPL_START + +int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost ) +{ + using namespace mockturtle; + + int num_blocks = ( nVars <= 6 ) ? 1 : ( 1 << ( nVars - 6 ) ); + + /* translate truth table into static table */ + kitty::dynamic_truth_table tt( nVars ); + for ( int i = 0; i < num_blocks; ++i ) + tt._bits[i] = pTruth[i]; + + ac_decomposition_params ps; + ps.lut_size = lutSize; + ac_decomposition_stats st; + + ac_decomposition_impl acd( tt, nVars, ps, &st ); + acd.run( *pdelay ); + int val = acd.compute_decomposition(); + + if ( val < 0 ) + { + *pdelay = 0; + return -1; + } + + *pdelay = acd.get_profile(); + *cost = st.num_luts; + + return 0; +} + +int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ) +{ + using namespace mockturtle; + + int num_blocks = ( nVars <= 6 ) ? 1 : ( 1 << ( nVars - 6 ) ); + + /* translate truth table into static table */ + kitty::dynamic_truth_table tt( nVars ); + for ( int i = 0; i < num_blocks; ++i ) + tt._bits[i] = pTruth[i]; + + ac_decomposition_params ps; + ps.lut_size = lutSize; + ac_decomposition_stats st; + + ac_decomposition_impl acd( tt, nVars, ps, &st ); + acd.run( *pdelay ); + int val = acd.compute_decomposition(); + + if ( val < 0 ) + { + *pdelay = 0; + return -1; + } + + *pdelay = acd.get_profile(); + + acd.get_decomposition( decomposition ); + return 0; +} + +// ABC_NAMESPACE_IMPL_END \ No newline at end of file diff --git a/src/acd/ac_wrapper.h b/src/acd/ac_wrapper.h new file mode 100644 index 000000000..522a60b86 --- /dev/null +++ b/src/acd/ac_wrapper.h @@ -0,0 +1,23 @@ +// #pragma once +#ifndef __ACD_WRAPPER_H_ +#define __ACD_WRAPPER_H_ + +// #include "base/main/main.h" +#include "misc/util/abc_global.h" + +// ABC_NAMESPACE_HEADER_START + +#ifdef __cplusplus +extern "C" { +#endif + +int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost ); +int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ); + +#ifdef __cplusplus +} +#endif + +// ABC_NAMESPACE_HEADER_END + +#endif \ No newline at end of file diff --git a/src/acd/kitty_algorithm.hpp b/src/acd/kitty_algorithm.hpp new file mode 100644 index 000000000..6460a802c --- /dev/null +++ b/src/acd/kitty_algorithm.hpp @@ -0,0 +1,119 @@ +#ifndef _KITTY_ALGORITHM_H_ +#define _KITTY_ALGORITHM_H_ +#pragma once + +#include +#include + +#include "kitty_constants.hpp" +#include "kitty_dynamic_tt.hpp" +#include "kitty_static_tt.hpp" + +namespace kitty +{ + +/*! \brief Perform bitwise unary operation on truth table + + \param tt Truth table + \param op Unary operation that takes as input a word (`uint64_t`) and returns a word + + \return new constructed truth table of same type and dimensions + */ +template +auto unary_operation( const TT& tt, Fn&& op ) +{ + auto result = tt.construct(); + std::transform( tt.cbegin(), tt.cend(), result.begin(), op ); + result.mask_bits(); + return result; +} + +/*! \brief Perform bitwise binary operation on two truth tables + + The dimensions of `first` and `second` must match. This is ensured + at compile-time for static truth tables, but at run-time for dynamic + truth tables. + + \param first First truth table + \param second Second truth table + \param op Binary operation that takes as input two words (`uint64_t`) and returns a word + + \return new constructed truth table of same type and dimensions + */ +template +auto binary_operation( const TT& first, const TT& second, Fn&& op ) +{ + assert( first.num_vars() == second.num_vars() ); + + auto result = first.construct(); + std::transform( first.cbegin(), first.cend(), second.cbegin(), result.begin(), op ); + result.mask_bits(); + return result; +} + +/*! \brief Computes a predicate based on two truth tables + + The dimensions of `first` and `second` must match. This is ensured + at compile-time for static truth tables, but at run-time for dynamic + truth tables. + + \param first First truth table + \param second Second truth table + \param op Binary operation that takes as input two words (`uint64_t`) and returns a Boolean + + \return true or false based on the predicate + */ +template +bool binary_predicate( const TT& first, const TT& second, Fn&& op ) +{ + assert( first.num_vars() == second.num_vars() ); + + return std::equal( first.begin(), first.end(), second.begin(), op ); +} + +/*! \brief Assign computed values to bits + + The functor `op` computes bits which are assigned to the bits of the + truth table. + + \param tt Truth table + \param op Unary operation that takes no input and returns a word (`uint64_t`) +*/ +template +void assign_operation( TT& tt, Fn&& op ) +{ + std::generate( tt.begin(), tt.end(), op ); + tt.mask_bits(); +} + +/*! \brief Iterates through each block of a truth table + + The functor `op` is called for every block of the truth table. + + \param tt Truth table + \param op Unary operation that takes as input a word (`uint64_t`) and returns void +*/ +template +void for_each_block( const TT& tt, Fn&& op ) +{ + std::for_each( tt.cbegin(), tt.cend(), op ); +} + +/*! \brief Iterates through each block of a truth table in reverse + order + + The functor `op` is called for every block of the truth table in + reverse order. + + \param tt Truth table + \param op Unary operation that takes as input a word (`uint64_t`) and returns void +*/ +template +void for_each_block_reversed( const TT& tt, Fn&& op ) +{ + std::for_each( tt.crbegin(), tt.crend(), op ); +} + +} // namespace kitty + +#endif // _KITTY_ALGORITHM_H_ \ No newline at end of file diff --git a/src/acd/kitty_constants.hpp b/src/acd/kitty_constants.hpp new file mode 100644 index 000000000..55cfcd650 --- /dev/null +++ b/src/acd/kitty_constants.hpp @@ -0,0 +1,91 @@ +#ifndef _KITTY_CONSTANTS_H_ +#define _KITTY_CONSTANTS_H_ +#pragma once + +#include +#include + +namespace kitty +{ + +namespace detail +{ + +static constexpr uint64_t projections[] = { + UINT64_C( 0xaaaaaaaaaaaaaaaa ), + UINT64_C( 0xcccccccccccccccc ), + UINT64_C( 0xf0f0f0f0f0f0f0f0 ), + UINT64_C( 0xff00ff00ff00ff00 ), + UINT64_C( 0xffff0000ffff0000 ), + UINT64_C( 0xffffffff00000000 ) }; + +static constexpr uint64_t projections_neg[] = { + UINT64_C( 0x5555555555555555 ), + UINT64_C( 0x3333333333333333 ), + UINT64_C( 0x0f0f0f0f0f0f0f0f ), + UINT64_C( 0x00ff00ff00ff00ff ), + UINT64_C( 0x0000ffff0000ffff ), + UINT64_C( 0x00000000ffffffff ) }; + +static constexpr uint64_t masks[] = { + UINT64_C( 0x0000000000000001 ), + UINT64_C( 0x0000000000000003 ), + UINT64_C( 0x000000000000000f ), + UINT64_C( 0x00000000000000ff ), + UINT64_C( 0x000000000000ffff ), + UINT64_C( 0x00000000ffffffff ), + UINT64_C( 0xffffffffffffffff ) }; + +static constexpr uint64_t permutation_masks[][3] = { + { UINT64_C( 0x9999999999999999 ), UINT64_C( 0x2222222222222222 ), UINT64_C( 0x4444444444444444 ) }, + { UINT64_C( 0xc3c3c3c3c3c3c3c3 ), UINT64_C( 0x0c0c0c0c0c0c0c0c ), UINT64_C( 0x3030303030303030 ) }, + { UINT64_C( 0xf00ff00ff00ff00f ), UINT64_C( 0x00f000f000f000f0 ), UINT64_C( 0x0f000f000f000f00 ) }, + { UINT64_C( 0xff0000ffff0000ff ), UINT64_C( 0x0000ff000000ff00 ), UINT64_C( 0x00ff000000ff0000 ) }, + { UINT64_C( 0xffff00000000ffff ), UINT64_C( 0x00000000ffff0000 ), UINT64_C( 0x0000ffff00000000 ) } }; + +static constexpr uint64_t ppermutation_masks[][6][3] = { + { { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x9999999999999999 ), UINT64_C( 0x2222222222222222 ), UINT64_C( 0x4444444444444444 ) }, + { UINT64_C( 0xa5a5a5a5a5a5a5a5 ), UINT64_C( 0x0a0a0a0a0a0a0a0a ), UINT64_C( 0x5050505050505050 ) }, + { UINT64_C( 0xaa55aa55aa55aa55 ), UINT64_C( 0x00aa00aa00aa00aa ), UINT64_C( 0x5500550055005500 ) }, + { UINT64_C( 0xaaaa5555aaaa5555 ), UINT64_C( 0x0000aaaa0000aaaa ), UINT64_C( 0x5555000055550000 ) }, + { UINT64_C( 0xaaaaaaaa55555555 ), UINT64_C( 0x00000000aaaaaaaa ), UINT64_C( 0x5555555500000000 ) } }, + { { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0xc3c3c3c3c3c3c3c3 ), UINT64_C( 0x0c0c0c0c0c0c0c0c ), UINT64_C( 0x3030303030303030 ) }, + { UINT64_C( 0xcc33cc33cc33cc33 ), UINT64_C( 0x00cc00cc00cc00cc ), UINT64_C( 0x3300330033003300 ) }, + { UINT64_C( 0xcccc3333cccc3333 ), UINT64_C( 0x0000cccc0000cccc ), UINT64_C( 0x3333000033330000 ) }, + { UINT64_C( 0xcccccccc33333333 ), UINT64_C( 0x00000000cccccccc ), UINT64_C( 0x3333333300000000 ) } }, + { { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0xf00ff00ff00ff00f ), UINT64_C( 0x00f000f000f000f0 ), UINT64_C( 0x0f000f000f000f00 ) }, + { UINT64_C( 0xf0f00f0ff0f00f0f ), UINT64_C( 0x0000f0f00000f0f0 ), UINT64_C( 0x0f0f00000f0f0000 ) }, + { UINT64_C( 0xf0f0f0f00f0f0f0f ), UINT64_C( 0x00000000f0f0f0f0 ), UINT64_C( 0x0f0f0f0f00000000 ) } }, + { { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0xff0000ffff0000ff ), UINT64_C( 0x0000ff000000ff00 ), UINT64_C( 0x00ff000000ff0000 ) }, + { UINT64_C( 0xff00ff0000ff00ff ), UINT64_C( 0x00000000ff00ff00 ), UINT64_C( 0x00ff00ff00000000 ) } }, + { { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ), UINT64_C( 0x0000000000000000 ) }, + { UINT64_C( 0xffff00000000ffff ), UINT64_C( 0x00000000ffff0000 ), UINT64_C( 0x0000ffff00000000 ) } } }; + +static constexpr int32_t hex_to_int[] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; + +} // namespace detail + +} // namespace kitty + +#endif //_KITTY_CONSTANTS_H_ \ No newline at end of file diff --git a/src/acd/kitty_constructors.hpp b/src/acd/kitty_constructors.hpp new file mode 100644 index 000000000..43408b8cc --- /dev/null +++ b/src/acd/kitty_constructors.hpp @@ -0,0 +1,92 @@ +#ifndef _KITTY_CONSTRUCT_TT_H_ +#define _KITTY_CONSTRUCT_TT_H_ +#pragma once + +#include +#include +#include +#include +#include + +#include "kitty_constants.hpp" +#include "kitty_dynamic_tt.hpp" +#include "kitty_static_tt.hpp" + +namespace kitty +{ + +/*! \brief Creates truth table with number of variables + + If some truth table instance is given, one can create a truth table with the + same type by calling the `construct()` method on it. This function helps if + only the number of variables is known and the base type and uniforms the + creation of static and dynamic truth tables. Note, however, that for static + truth tables `num_vars` must be consistent to the number of variables in the + truth table type. + + \param num_vars Number of variables +*/ +template +inline TT create( unsigned num_vars ) +{ + (void)num_vars; + TT tt; + assert( tt.num_vars() == num_vars ); + return tt; +} + +/*! \cond PRIVATE */ +template<> +inline dynamic_truth_table create( unsigned num_vars ) +{ + return dynamic_truth_table( num_vars ); +} +/*! \endcond */ + +/*! \brief Constructs projections (single-variable functions) + + \param tt Truth table + \param var_index Index of the variable, must be smaller than the truth table's number of variables + \param complement If true, realize inverse projection +*/ +template +void create_nth_var( TT& tt, uint8_t var_index, bool complement = false ) +{ + if ( tt.num_vars() <= 6 ) + { + /* assign from precomputed table */ + tt._bits[0] = complement ? ~detail::projections[var_index] : detail::projections[var_index]; + + /* mask if truth table does not require all bits */ + tt.mask_bits(); + return; + } + + if ( var_index < 6 ) + { + std::fill( std::begin( tt._bits ), std::end( tt._bits ), complement ? ~detail::projections[var_index] : detail::projections[var_index] ); + } + else + { + const auto c = 1 << ( var_index - 6 ); + const auto zero = uint64_t( 0 ); + const auto one = ~zero; + auto block = uint64_t( 0u ); + + while ( block < tt.num_blocks() ) + { + for ( auto i = 0; i < c; ++i ) + { + tt._bits[block++] = complement ? one : zero; + } + for ( auto i = 0; i < c; ++i ) + { + tt._bits[block++] = complement ? zero : one; + } + } + } +} + +} // namespace kitty + +#endif // _KITTY_CONSTRUCT_TT_H_ \ No newline at end of file diff --git a/src/acd/kitty_dynamic_tt.hpp b/src/acd/kitty_dynamic_tt.hpp new file mode 100644 index 000000000..f3ef0c7d9 --- /dev/null +++ b/src/acd/kitty_dynamic_tt.hpp @@ -0,0 +1,147 @@ +#ifndef _KITTY_DYNAMIC_TT_H_ +#define _KITTY_DYNAMIC_TT_H_ +#pragma once + +#include +#include +#include + +#include "kitty_constants.hpp" + +namespace kitty +{ + +/*! Truth table in which number of variables is known at runtime. + */ +struct dynamic_truth_table +{ + /*! Standard constructor. + + The number of variables provided to the truth table can be + computed at runtime. However, once the truth table is constructed + its number of variables cannot change anymore. + + The constructor computes the number of blocks and resizes the + vector accordingly. + + \param num_vars Number of variables + */ + explicit dynamic_truth_table( uint32_t num_vars ) + : _bits( ( num_vars <= 6 ) ? 1u : ( 1u << ( num_vars - 6 ) ) ), + _num_vars( num_vars ) + { + } + + /*! Empty constructor. + + Creates an empty truth table. It has 0 variables, but no bits, i.e., it is + different from a truth table for the constant function. This constructor is + only used for convenience, if algorithms require the existence of default + constructable classes. + */ + dynamic_truth_table() : _num_vars( 0 ) {} + + /*! Constructs a new dynamic truth table instance with the same number of variables. */ + inline dynamic_truth_table construct() const + { + return dynamic_truth_table( _num_vars ); + } + + /*! Returns number of variables. + */ + inline auto num_vars() const noexcept { return _num_vars; } + + /*! Returns number of blocks. + */ + inline auto num_blocks() const noexcept { return _bits.size(); } + + /*! Returns number of bits. + */ + inline auto num_bits() const noexcept { return uint64_t( 1 ) << _num_vars; } + + /*! \brief Begin iterator to bits. + */ + inline auto begin() noexcept { return _bits.begin(); } + + /*! \brief End iterator to bits. + */ + inline auto end() noexcept { return _bits.end(); } + + /*! \brief Begin iterator to bits. + */ + inline auto begin() const noexcept { return _bits.begin(); } + + /*! \brief End iterator to bits. + */ + inline auto end() const noexcept { return _bits.end(); } + + /*! \brief Reverse begin iterator to bits. + */ + inline auto rbegin() noexcept { return _bits.rbegin(); } + + /*! \brief Reverse end iterator to bits. + */ + inline auto rend() noexcept { return _bits.rend(); } + + /*! \brief Constant begin iterator to bits. + */ + inline auto cbegin() const noexcept { return _bits.cbegin(); } + + /*! \brief Constant end iterator to bits. + */ + inline auto cend() const noexcept { return _bits.cend(); } + + /*! \brief Constant reverse begin iterator to bits. + */ + inline auto crbegin() const noexcept { return _bits.crbegin(); } + + /*! \brief Constant teverse end iterator to bits. + */ + inline auto crend() const noexcept { return _bits.crend(); } + + /*! \brief Assign other truth table. + + This replaces the current truth table with another truth table. The truth + table type has to be complete. The vector of bits is resized accordingly. + + \param other Other truth table + */ + template + dynamic_truth_table& operator=( const TT& other ) + { + _bits.resize( other.num_blocks() ); + std::copy( other.begin(), other.end(), begin() ); + _num_vars = other.num_vars(); + + if ( _num_vars < 6 ) + { + mask_bits(); + } + + return *this; + } + + /*! Masks the number of valid truth table bits. + + If the truth table has less than 6 variables, it may not use all + the bits. This operation makes sure to zero out all non-valid + bits. + */ + inline void mask_bits() noexcept + { + if ( _num_vars < 6 ) + { + _bits[0u] &= detail::masks[_num_vars]; + } + } + + /*! \cond PRIVATE */ +public: /* fields */ + std::vector _bits; + uint32_t _num_vars; + /*! \endcond */ +}; + +} //namespace kitty + +#endif // _KITTY_DYNAMIC_TT_H_ \ No newline at end of file diff --git a/src/acd/kitty_operations.hpp b/src/acd/kitty_operations.hpp new file mode 100644 index 000000000..fb504489a --- /dev/null +++ b/src/acd/kitty_operations.hpp @@ -0,0 +1,333 @@ +#ifndef _KITTY_OPERATIONS_TT_H_ +#define _KITTY_OPERATIONS_TT_H_ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "kitty_algorithm.hpp" +#include "kitty_constants.hpp" +#include "kitty_dynamic_tt.hpp" +#include "kitty_static_tt.hpp" + +namespace kitty +{ + +/*! Inverts all bits in a truth table, based on a condition */ +template +inline TT unary_not_if( const TT& tt, bool cond ) +{ +#ifdef _MSC_VER +#pragma warning( push ) +#pragma warning( disable : 4146 ) +#endif + const auto mask = -static_cast( cond ); +#ifdef _MSC_VER +#pragma warning( pop ) +#endif + return unary_operation( tt, [mask]( auto a ) + { return a ^ mask; } ); +} + +/*! \brief Inverts all bits in a truth table */ +template +inline TT unary_not( const TT& tt ) +{ + return unary_operation( tt, []( auto a ) + { return ~a; } ); +} + +/*! \brief Bitwise AND of two truth tables */ +template + +inline TT binary_and( const TT& first, const TT& second ) +{ + return binary_operation( first, second, std::bit_and<>() ); +} + +/*! \brief Bitwise OR of two truth tables */ +template +inline TT binary_or( const TT& first, const TT& second ) +{ + return binary_operation( first, second, std::bit_or<>() ); +} + +/*! \brief Swaps two variables in a truth table + + The function swaps variable `var_index1` with `var_index2`. The + function will change `tt` in-place. If `tt` should not be changed, + one can use `swap` instead. + + \param tt Truth table + \param var_index1 First variable + \param var_index2 Second variable +*/ +template +void swap_inplace( TT& tt, uint8_t var_index1, uint8_t var_index2 ) +{ + if ( var_index1 == var_index2 ) + { + return; + } + + if ( var_index1 > var_index2 ) + { + std::swap( var_index1, var_index2 ); + } + + if ( tt.num_vars() <= 6 ) + { + const auto& pmask = detail::ppermutation_masks[var_index1][var_index2]; + const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 ); + tt._bits[0] = ( tt._bits[0] & pmask[0] ) | ( ( tt._bits[0] & pmask[1] ) << shift ) | ( ( tt._bits[0] & pmask[2] ) >> shift ); + } + else if ( var_index2 <= 5 ) + { + const auto& pmask = detail::ppermutation_masks[var_index1][var_index2]; + const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 ); + std::transform( std::begin( tt._bits ), std::end( tt._bits ), std::begin( tt._bits ), + [shift, &pmask]( uint64_t word ) + { + return ( word & pmask[0] ) | ( ( word & pmask[1] ) << shift ) | ( ( word & pmask[2] ) >> shift ); + } ); + } + else if ( var_index1 <= 5 ) /* in this case, var_index2 > 5 */ + { + const auto step = 1 << ( var_index2 - 6 ); + const auto shift = 1 << var_index1; + auto it = std::begin( tt._bits ); + while ( it != std::end( tt._bits ) ) + { + for ( auto i = decltype( step ){ 0 }; i < step; ++i ) + { + const auto low_to_high = ( *( it + i ) & detail::projections[var_index1] ) >> shift; + const auto high_to_low = ( *( it + i + step ) << shift ) & detail::projections[var_index1]; + *( it + i ) = ( *( it + i ) & ~detail::projections[var_index1] ) | high_to_low; + *( it + i + step ) = ( *( it + i + step ) & detail::projections[var_index1] ) | low_to_high; + } + it += 2 * step; + } + } + else + { + const auto step1 = 1 << ( var_index1 - 6 ); + const auto step2 = 1 << ( var_index2 - 6 ); + auto it = std::begin( tt._bits ); + while ( it != std::end( tt._bits ) ) + { + for ( auto i = 0; i < step2; i += 2 * step1 ) + { + for ( auto j = 0; j < step1; ++j ) + { + std::swap( *( it + i + j + step1 ), *( it + i + j + step2 ) ); + } + } + it += 2 * step2; + } + } +} + +/*! \brief Extends smaller truth table to larger one + + The most significant variables will not be in the functional support of the + resulting truth table, but the method is helpful to align a truth table when + being used with another one. + + \param tt Larger truth table to create + \param from Smaller truth table to copy from +*/ +template +void extend_to_inplace( TT& tt, const TTFrom& from ) +{ + assert( tt.num_vars() >= from.num_vars() ); + + if ( from.num_vars() < 6 ) + { + auto mask = *from.begin(); + + for ( auto i = from.num_vars(); i < std::min( 6, tt.num_vars() ); ++i ) + { + mask |= ( mask << ( 1 << i ) ); + } + + std::fill( tt.begin(), tt.end(), mask ); + } + else + { + auto it = tt.begin(); + while ( it != tt.end() ) + { + it = std::copy( from.cbegin(), from.cend(), it ); + } + } +} + +/*! \brief Extends smaller truth table to larger static one + + This is an out-of-place version of `extend_to_inplace` that has the truth + table as a return value. It only works for creating static truth tables. The + template parameter `NumVars` must be equal or larger to the number of + variables in `from`. + + \param from Smaller truth table to copy from +*/ +template +inline static_truth_table extend_to( const TTFrom& from ) +{ + static_truth_table tt; + extend_to_inplace( tt, from ); + return tt; +} + +/*! \brief Checks whether truth table depends on given variable index + + \param tt Truth table + \param var_index Variable index +*/ +template +bool has_var( const TT& tt, uint8_t var_index ) +{ + assert( var_index < tt.num_vars() ); + + if ( tt.num_vars() <= 6 || var_index < 6 ) + { + return std::any_of( std::begin( tt._bits ), std::end( tt._bits ), + [var_index]( uint64_t word ) + { return ( ( word >> ( uint64_t( 1 ) << var_index ) ) & detail::projections_neg[var_index] ) != + ( word & detail::projections_neg[var_index] ); } ); + } + + const auto step = 1 << ( var_index - 6 ); + for ( auto i = 0u; i < static_cast( tt.num_blocks() ); i += 2 * step ) + { + for ( auto j = 0; j < step; ++j ) + { + if ( tt._bits[i + j] != tt._bits[i + j + step] ) + { + return true; + } + } + } + return false; +} + +/*! \brief Checks whether truth table depends on given variable index + + \param tt Truth table + \param care Care set + \param var_index Variable index +*/ +template +bool has_var( const TT& tt, const TT& care, uint8_t var_index ) +{ + assert( var_index < tt.num_vars() ); + assert( tt.num_vars() == care.num_vars() ); + + if ( tt.num_vars() <= 6 || var_index < 6 ) + { + auto it_tt = std::begin( tt._bits ); + auto it_care = std::begin( care._bits ); + while ( it_tt != std::end( tt._bits ) ) + { + if ( ( ( ( *it_tt >> ( uint64_t( 1 ) << var_index ) ) ^ *it_tt ) & detail::projections_neg[var_index] + & ( *it_care >> ( uint64_t( 1 ) << var_index ) ) & *it_care ) != 0 ) + { + return true; + } + ++it_tt; + ++it_care; + } + + return false; + } + + const auto step = 1 << ( var_index - 6 ); + for ( auto i = 0u; i < static_cast( tt.num_blocks() ); i += 2 * step ) + { + for ( auto j = 0; j < step; ++j ) + { + if ( ( ( tt._bits[i + j] ^ tt._bits[i + j + step] ) & care._bits[i + j] & care._bits[i + j + step] ) != 0 ) + { + return true; + } + } + } + return false; +} + +/*! \brief Shrinks larger truth table to smaller one + + The function expects that the most significant bits, which are cut off, are + not in the functional support of the original function. Only then it is + ensured that the resulting function is equivalent. + + \param tt Smaller truth table to create + \param from Larger truth table to copy from +*/ +template +void shrink_to_inplace( TT& tt, const TTFrom& from ) +{ + assert( tt.num_vars() <= from.num_vars() ); + + std::copy( from.begin(), from.begin() + tt.num_blocks(), tt.begin() ); + + if ( tt.num_vars() < 6 ) + { + tt.mask_bits(); + } +} + +/*! \brief Shrinks larger truth table to smaller dynamic one + + This is an out-of-place version of `shrink_to` that has the truth table as a + return value. It only works for creating dynamic tables. The parameter + `num_vars` must be equal or smaller to the number of variables in `from`. + + \param from Smaller truth table to copy from +*/ +template +inline dynamic_truth_table shrink_to( const TTFrom& from, unsigned num_vars ) +{ + auto tt = create( num_vars ); + shrink_to_inplace( tt, from ); + return tt; +} + +/*! \brief Prints truth table in hexadecimal representation + + The most-significant bit will be the first character of the string. + + \param tt Truth table + \param os Output stream +*/ +template +void print_hex( const TT& tt, std::ostream& os = std::cout ) +{ + auto const chunk_size = + std::min( tt.num_vars() <= 1 ? 1 : ( tt.num_bits() >> 2 ), 16 ); + + for_each_block_reversed( tt, [&os, chunk_size]( auto word ) + { + std::string chunk( chunk_size, '0' ); + + auto it = chunk.rbegin(); + while (word && it != chunk.rend()) { + auto hex = word & 0xf; + if (hex < 10) { + *it = '0' + static_cast(hex); + } else { + *it = 'a' + static_cast(hex - 10); + } + ++it; + word >>= 4; + } + os << chunk; } ); +} + +} //namespace kitty + +#endif // _KITTY_OPERATIONS_TT_H_ \ No newline at end of file diff --git a/src/acd/kitty_operators.hpp b/src/acd/kitty_operators.hpp new file mode 100644 index 000000000..cf973ebe0 --- /dev/null +++ b/src/acd/kitty_operators.hpp @@ -0,0 +1,86 @@ +#ifndef _KITTY_OPERATORS_TT_H_ +#define _KITTY_OPERATORS_TT_H_ +#pragma once + +#include +#include +#include +#include +#include + +#include "kitty_constants.hpp" +#include "kitty_dynamic_tt.hpp" +#include "kitty_static_tt.hpp" +#include "kitty_operations.hpp" + +namespace kitty +{ + +/*! \brief Operator for unary_not */ +inline dynamic_truth_table operator~( const dynamic_truth_table& tt ) +{ + return unary_not( tt ); +} + +/*! \brief Operator for unary_not */ +template +inline static_truth_table operator~( const static_truth_table& tt ) +{ + return unary_not( tt ); +} + +/*! \brief Operator for binary_and */ +inline dynamic_truth_table operator&( const dynamic_truth_table& first, const dynamic_truth_table& second ) +{ + return binary_and( first, second ); +} + +/*! \brief Operator for binary_and */ +template +inline static_truth_table operator&( const static_truth_table& first, const static_truth_table& second ) +{ + return binary_and( first, second ); +} + +/*! \brief Operator for binary_and and assign */ +inline void operator&=( dynamic_truth_table& first, const dynamic_truth_table& second ) +{ + first = binary_and( first, second ); +} + +/*! \brief Operator for binary_and and assign */ +template +inline void operator&=( static_truth_table& first, const static_truth_table& second ) +{ + first = binary_and( first, second ); +} + +/*! \brief Operator for binary_or */ +inline dynamic_truth_table operator|( const dynamic_truth_table& first, const dynamic_truth_table& second ) +{ + return binary_or( first, second ); +} + +/*! \brief Operator for binary_or */ +template +inline static_truth_table operator|( const static_truth_table& first, const static_truth_table& second ) +{ + return binary_or( first, second ); +} + +/*! \brief Operator for binary_or and assign */ +inline void operator|=( dynamic_truth_table& first, const dynamic_truth_table& second ) +{ + first = binary_or( first, second ); +} + +/*! \brief Operator for binary_or and assign */ +template +inline void operator|=( static_truth_table& first, const static_truth_table& second ) +{ + first = binary_or( first, second ); +} + +} // namespace kitty + +#endif // _KITTY_OPERATORS_TT_H_ \ No newline at end of file diff --git a/src/acd/kitty_static_tt.hpp b/src/acd/kitty_static_tt.hpp new file mode 100644 index 000000000..61593f3ff --- /dev/null +++ b/src/acd/kitty_static_tt.hpp @@ -0,0 +1,131 @@ +#ifndef _KITTY_STATIC_TT_H_ +#define _KITTY_STATIC_TT_H_ +#pragma once + +#include +#include + +#include "kitty_constants.hpp" + +namespace kitty +{ + +template +struct static_truth_table +{ + /*! \cond PRIVATE */ + enum + { + NumBlocks = ( NumVars <= 6 ) ? 1u : ( 1u << ( NumVars - 6 ) ) + }; + + enum + { + NumBits = uint64_t( 1 ) << NumVars + }; + /*! \endcond */ + + /*! Standard constructor. + + The number of variables provided to the truth table must be known + at runtime. The number of blocks will be computed as a compile + time constant. + */ + static_truth_table() + { + _bits.fill( 0 ); + } + + /*! Constructs a new static truth table instance with the same number of variables. */ + inline static_truth_table construct() const + { + return static_truth_table(); + } + + /*! Returns number of variables. + */ + inline auto num_vars() const noexcept { return NumVars; } + + /*! Returns number of blocks. + */ + inline auto num_blocks() const noexcept { return NumBlocks; } + + /*! Returns number of bits. + */ + inline auto num_bits() const noexcept { return NumBits; } + + /*! \brief Begin iterator to bits. + */ + inline auto begin() noexcept { return _bits.begin(); } + + /*! \brief End iterator to bits. + */ + inline auto end() noexcept { return _bits.end(); } + + /*! \brief Begin iterator to bits. + */ + inline auto begin() const noexcept { return _bits.begin(); } + + /*! \brief End iterator to bits. + */ + inline auto end() const noexcept { return _bits.end(); } + + /*! \brief Reverse begin iterator to bits. + */ + inline auto rbegin() noexcept { return _bits.rbegin(); } + + /*! \brief Reverse end iterator to bits. + */ + inline auto rend() noexcept { return _bits.rend(); } + + /*! \brief Constant begin iterator to bits. + */ + inline auto cbegin() const noexcept { return _bits.cbegin(); } + + /*! \brief Constant end iterator to bits. + */ + inline auto cend() const noexcept { return _bits.cend(); } + + /*! \brief Constant reverse begin iterator to bits. + */ + inline auto crbegin() const noexcept { return _bits.crbegin(); } + + /*! \brief Constant teverse end iterator to bits. + */ + inline auto crend() const noexcept { return _bits.crend(); } + + /*! \brief Assign other truth table if number of variables match. + + This replaces the current truth table with another truth table, if `other` + has the same number of variables. Otherwise, the truth table is not + changed. + + \param other Other truth table + */ + template + static_truth_table& operator=( const TT& other ) + { + if ( other.num_bits() == num_bits() ) + { + std::copy( other.begin(), other.end(), begin() ); + } + + return *this; + } + + /*! Masks the number of valid truth table bits. + + We know that we will have at least 7 variables in this data + structure. + */ + inline void mask_bits() noexcept {} + + /*! \cond PRIVATE */ +public: /* fields */ + std::array _bits; + /*! \endcond */ +}; + +} //namespace kitty + +#endif // _KITTY_STATIC_TT_H_ \ No newline at end of file diff --git a/src/acd/module.make b/src/acd/module.make new file mode 100644 index 000000000..b245d2c42 --- /dev/null +++ b/src/acd/module.make @@ -0,0 +1 @@ +SRC += src/acd/ac_wrapper.cpp diff --git a/src/base/abci/abc.c b/src/base/abci/abc.c index c8e2b1ef8..33b85e0bf 100644 --- a/src/base/abci/abc.c +++ b/src/base/abci/abc.c @@ -19447,7 +19447,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) If_ManSetDefaultPars( pPars ); pPars->pLutLib = (If_LibLut_t *)Abc_FrameReadLibLut(); Extra_UtilGetoptReset(); - while ( ( c = Extra_UtilGetopt( argc, argv, "KCFAGRNTXYDEWSqaflepmrsdbgxyzuojiktncvh" ) ) != EOF ) + while ( ( c = Extra_UtilGetopt( argc, argv, "KCFAGRNTXYDEWSqaflepmrsdbgxyuojiktnczvh" ) ) != EOF ) { switch ( c ) { @@ -19652,9 +19652,6 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) case 'y': pPars->fUserRecLib ^= 1; break; - case 'z': - pPars->fUserLutDec ^= 1; - break; case 'u': pPars->fUserSesLib ^= 1; break; @@ -19679,6 +19676,9 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) case 'c': pPars->fUseTtPerm ^= 1; break; + case 'z': + pPars->fAcd ^= 1; + break; case 'v': pPars->fVerbose ^= 1; break; @@ -19810,7 +19810,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) pPars->pLutLib = NULL; } // modify for delay optimization - if ( pPars->fDelayOpt || pPars->fDsdBalance || pPars->fDelayOptLut || pPars->fUserLutDec ) + if ( pPars->fDelayOpt || pPars->fDsdBalance || pPars->fDelayOptLut || pPars->fAcd ) { pPars->fTruth = 1; pPars->fCutMin = 1; @@ -19956,7 +19956,7 @@ usage: sprintf(LutSize, "library" ); else sprintf(LutSize, "%d", pPars->nLutSize ); - Abc_Print( -2, "usage: if [-KCFAGRNTXY num] [-DEW float] [-S str] [-qarlepmsdbgxyzuojiktncvh]\n" ); + Abc_Print( -2, "usage: if [-KCFAGRNTXY num] [-DEW float] [-S str] [-qarlepmsdbgxyuojiktncvh]\n" ); Abc_Print( -2, "\t performs FPGA technology mapping of the network\n" ); Abc_Print( -2, "\t-K num : the number of LUT inputs (2 < num < %d) [default = %s]\n", IF_MAX_LUTSIZE+1, LutSize ); Abc_Print( -2, "\t-C num : the max number of priority cuts (0 < num < 2^12) [default = %d]\n", pPars->nCutsMax ); @@ -19985,7 +19985,6 @@ usage: Abc_Print( -2, "\t-g : toggles delay optimization by SOP balancing [default = %s]\n", pPars->fDelayOpt? "yes": "no" ); Abc_Print( -2, "\t-x : toggles delay optimization by DSD balancing [default = %s]\n", pPars->fDsdBalance? "yes": "no" ); Abc_Print( -2, "\t-y : toggles delay optimization with recorded library [default = %s]\n", pPars->fUserRecLib? "yes": "no" ); - Abc_Print( -2, "\t-z : toggles delay optimization with LUT decomposition [default = %s]\n", pPars->fUserLutDec? "yes": "no" ); Abc_Print( -2, "\t-u : toggles delay optimization with SAT-based library [default = %s]\n", pPars->fUserSesLib? "yes": "no" ); Abc_Print( -2, "\t-o : toggles using buffers to decouple combinational outputs [default = %s]\n", pPars->fUseBuffs? "yes": "no" ); Abc_Print( -2, "\t-j : toggles enabling additional check [default = %s]\n", pPars->fEnableCheck07? "yes": "no" ); @@ -19994,6 +19993,7 @@ usage: Abc_Print( -2, "\t-t : toggles optimizing average rather than maximum level [default = %s]\n", pPars->fDoAverage? "yes": "no" ); Abc_Print( -2, "\t-n : toggles computing DSDs of the cut functions [default = %s]\n", pPars->fUseDsd? "yes": "no" ); Abc_Print( -2, "\t-c : toggles computing truth tables in a new way [default = %s]\n", pPars->fUseTtPerm? "yes": "no" ); + Abc_Print( -2, "\t-z : toggles using ACD decomposition [default = %s]\n", pPars->fAcd? "yes": "no" ); Abc_Print( -2, "\t-v : toggles verbose output [default = %s]\n", pPars->fVerbose? "yes": "no" ); Abc_Print( -2, "\t-h : prints the command usage\n"); return 1; diff --git a/src/base/abci/abcIf.c b/src/base/abci/abcIf.c index e92a2282e..079cd0066 100644 --- a/src/base/abci/abcIf.c +++ b/src/base/abci/abcIf.c @@ -116,7 +116,7 @@ Abc_Ntk_t * Abc_NtkIf( Abc_Ntk_t * pNtk, If_Par_t * pPars ) pPars->pTimesReq = Abc_NtkGetCoRequiredFloats(pNtk); // update timing info to reflect logic level - if ( (pPars->fDelayOpt || pPars->fDsdBalance || pPars->fUserRecLib || pPars->fUserSesLib || pPars->fUserLutDec) && pNtk->pManTime ) + if ( (pPars->fDelayOpt || pPars->fDsdBalance || pPars->fUserRecLib || pPars->fUserSesLib || pPars->fAcd) && pNtk->pManTime ) { int c; if ( pNtk->AndGateDelay == 0.0 ) @@ -427,28 +427,143 @@ Hop_Obj_t * Abc_NodeBuildFromMini( Hop_Man_t * pMan, If_Man_t * p, If_Cut_t * pC } /**Function************************************************************* + Synopsis [Implements decomposed LUT-structure of the cut.] + Description [] + + SideEffects [] + SeeAlso [] + ***********************************************************************/ + void Abc_DecRecordToHop( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Cut_t * pCutBest, If_Obj_t * pIfObj, Vec_Int_t * vCover, Abc_Obj_t * pNodeTop ) + { + extern Hop_Obj_t * Kit_TruthToHop( Hop_Man_t * pMan, unsigned * pTruth, int nVars, Vec_Int_t * vMemory ); + assert( !pIfMan->pPars->fUseTtPerm ); - Synopsis [Implements decomposed LUT-structure of the cut.] - - Description [] - - SideEffects [] - - SeeAlso [] - -***********************************************************************/ -Hop_Obj_t * Abc_DecRecordToHop( Hop_Man_t * pMan, If_Man_t * pIfMan, If_Cut_t * pCutBest, If_Obj_t * pIfObj, Vec_Int_t * vCover ) -{ // get the truth table + word * pTruth = If_CutTruthW(pIfMan, pCutBest); + int v; + If_Obj_t * pIfLeaf; + + if ( pCutBest->nLeaves <= 6 ) + { + /* add fanins */ + If_CutForEachLeaf( pIfMan, pCutBest, pIfLeaf, v ) + Abc_ObjAddFanin( pNodeTop, (Abc_Obj_t *)If_ObjCopy( pIfLeaf ) ); + + pNodeTop->Level = Abc_ObjLevelNew( pNodeTop ); + + pNodeTop->pData = Kit_TruthToHop( (Hop_Man_t *)pNtkNew->pManFunc, (unsigned *)pTruth, If_CutLeaveNum(pCutBest), vCover ); + return; + } + + // get the delay profile + unsigned delayProfile = pCutBest->acdDelay; + + // If_Obj_t * pLeaf; + // int i, leafDelay; + // int DelayMax = -1, nLeafMax = 0; + // unsigned uLeafMask = 0; + + // If_CutForEachLeaf( pIfMan, pCutBest, pLeaf, i ) + // { + // leafDelay = If_ObjCutBest(pLeaf)->Delay; + + // if ( DelayMax < leafDelay ) + // { + // DelayMax = leafDelay; + // nLeafMax = 1; + // uLeafMask = (1 << i); + // } + // else if ( DelayMax == leafDelay ) + // { + // nLeafMax++; + // uLeafMask |= (1 << i); + // } + // } + // perform LUT-decomposition and return the LUT-structure + unsigned char decompArray[92]; + int val = acd_decompose( pTruth, pCutBest->nLeaves, 6, &(delayProfile), decompArray ); + + assert( val == 0 ); + // assert( DelayMax + 2 >= pCutBest->Delay ); + // convert the LUT-structure into a set of logic nodes in Abc_Ntk_t + unsigned char bytes_check = decompArray[0]; + assert( bytes_check <= 92 ); + + int byte_p = 2; + unsigned char i, j, k, num_fanins, num_words, num_bytes; + int level, fanin; + word *tt; + Abc_Obj_t *pNewNodes[5]; + + /* create intermediate LUTs*/ + assert( decompArray[1] - 1 <= 5 ); + Abc_Obj_t * pFanin; + for ( i = 0; i < decompArray[1]; ++i ) + { + if ( i < decompArray[1] - 1 ) + { + pNewNodes[i] = Abc_NtkCreateNode( pNtkNew ); + } + else + { + pNewNodes[i] = pNodeTop; + } + num_fanins = decompArray[byte_p++]; + level = 0; + for ( j = 0; j < num_fanins; ++j ) + { + fanin = (int)decompArray[byte_p++]; + if ( fanin < If_CutLeaveNum(pCutBest) ) + { + pFanin = (Abc_Obj_t *)If_ObjCopy( If_CutLeaf(pIfMan, pCutBest, fanin) ); + } + else + { + assert( fanin - If_CutLeaveNum(pCutBest) < i ); + pFanin = pNewNodes[fanin - If_CutLeaveNum(pCutBest)]; + } + Abc_ObjAddFanin( pNewNodes[i], pFanin ); + level = Abc_MaxInt( level, Abc_ObjLevel(pFanin) ); + } + + pNewNodes[i]->Level = level + (int)(Abc_ObjFaninNum(pNewNodes[i]) > 0); + + /* extract the truth table */ + tt = pIfMan->puTempW; + num_words = ( num_fanins <= 6 ) ? 1 : ( 1 << ( num_fanins - 6 ) ); + num_bytes = ( num_fanins <= 3 ) ? 1 : ( 1 << ( Abc_MinInt( (int)num_fanins, 6 ) - 3 ) ); + for ( j = 0; j < num_words; ++j ) + { + tt[j] = 0; + for ( k = 0; k < num_bytes; ++k ) + { + tt[j] |= ( (word)(decompArray[byte_p++]) ) << ( k << 3 ); + } + } + + /* extend truth table if size < 5 */ + assert( num_fanins != 1 ); + if ( num_fanins == 2 ) + { + tt[0] |= tt[0] << 4; + } + while ( num_bytes < 4 ) + { + tt[0] |= tt[0] << ( num_bytes << 3 ); + num_bytes <<= 1; + } + + /* add node data */ + pNewNodes[i]->pData = Kit_TruthToHop( (Hop_Man_t *)pNtkNew->pManFunc, (unsigned *)tt, (int) num_fanins, vCover ); + } + + /* check correct read */ + assert( byte_p == decompArray[0] ); // this is a placeholder, which takes the truth table and converts it into an AIG without LUT-decomposition - extern Hop_Obj_t * Kit_TruthToHop( Hop_Man_t * pMan, unsigned * pTruth, int nVars, Vec_Int_t * vMemory ); - word * pTruth = If_CutTruthW(pIfMan, pCutBest); - assert( !pIfMan->pPars->fUseTtPerm ); - return Kit_TruthToHop( (Hop_Man_t *)pMan, (unsigned *)pTruth, If_CutLeaveNum(pCutBest), vCover ); -} + } /**Function************************************************************* @@ -488,13 +603,18 @@ Abc_Obj_t * Abc_NodeFromIf_rec( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Obj_t pNodeNew = Abc_NtkCreateNode( pNtkNew ); // if ( pIfMan->pPars->pLutLib && pIfMan->pPars->pLutLib->fVarPinDelays ) if ( !pIfMan->pPars->fDelayOpt && !pIfMan->pPars->fDelayOptLut && !pIfMan->pPars->fDsdBalance && !pIfMan->pPars->fUseTtPerm && - !pIfMan->pPars->pLutStruct && !pIfMan->pPars->fUserRecLib && !pIfMan->pPars->fUserSesLib && !pIfMan->pPars->fUserLutDec && !pIfMan->pPars->nGateSize ) + !pIfMan->pPars->pLutStruct && !pIfMan->pPars->fAcd && !pIfMan->pPars->fUserRecLib && !pIfMan->pPars->fUserSesLib && !pIfMan->pPars->nGateSize ) If_CutRotatePins( pIfMan, pCutBest ); if ( pIfMan->pPars->fUseCnfs || pIfMan->pPars->fUseMv ) { If_CutForEachLeafReverse( pIfMan, pCutBest, pIfLeaf, i ) Abc_ObjAddFanin( pNodeNew, Abc_NodeFromIf_rec(pNtkNew, pIfMan, pIfLeaf, vCover) ); } + else if ( pIfMan->pPars->fAcd ) + { + If_CutForEachLeaf( pIfMan, pCutBest, pIfLeaf, i ) + Abc_NodeFromIf_rec(pNtkNew, pIfMan, pIfLeaf, vCover); + } else { If_CutForEachLeaf( pIfMan, pCutBest, pIfLeaf, i ) @@ -548,10 +668,10 @@ Abc_Obj_t * Abc_NodeFromIf_rec( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Obj_t extern Hop_Obj_t * Abc_RecToHop3( Hop_Man_t * pMan, If_Man_t * pIfMan, If_Cut_t * pCut, If_Obj_t * pIfObj ); pNodeNew->pData = Abc_RecToHop3( (Hop_Man_t *)pNtkNew->pManFunc, pIfMan, pCutBest, pIfObj ); } - else if ( pIfMan->pPars->fUserLutDec ) + else if ( pIfMan->pPars->fAcd ) { - extern Hop_Obj_t * Abc_DecRecordToHop( Hop_Man_t * pMan, If_Man_t * pIfMan, If_Cut_t * pCut, If_Obj_t * pIfObj, Vec_Int_t * vMemory ); - pNodeNew->pData = Abc_DecRecordToHop( (Hop_Man_t *)pNtkNew->pManFunc, pIfMan, pCutBest, pIfObj, vCover ); + extern void Abc_DecRecordToHop( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Cut_t * pCut, If_Obj_t * pIfObj, Vec_Int_t * vMemory, Abc_Obj_t * pNodeTop ); + Abc_DecRecordToHop( pNtkNew, pIfMan, pCutBest, pIfObj, vCover, pNodeNew ); } else { diff --git a/src/map/if/if.h b/src/map/if/if.h index 93cb0f6ca..156e8679f 100644 --- a/src/map/if/if.h +++ b/src/map/if/if.h @@ -40,6 +40,7 @@ #include "opt/dau/dau.h" #include "misc/vec/vecHash.h" #include "misc/vec/vecWec.h" +#include "ACD/ac_wrapper.h" ABC_NAMESPACE_HEADER_START @@ -126,7 +127,6 @@ struct If_Par_t_ int fDsdBalance; // special delay optimization int fUserRecLib; // use recorded library int fUserSesLib; // use SAT-based synthesis - int fUserLutDec; // use LUT-based decomposition int fBidec; // use bi-decomposition int fUse34Spec; // use specialized matching int fUseBat; // use one specialized feature @@ -146,6 +146,7 @@ struct If_Par_t_ int fDeriveLuts; // enables deriving LUT structures int fDoAverage; // optimize average rather than maximum level int fHashMapping; // perform AIG hashing after mapping + int fAcd; // perform AIG hashing after mapping int fVerbose; // the verbosity flag int fVerboseTrace; // the verbosity flag char * pLutStruct; // LUT structure @@ -280,6 +281,7 @@ struct If_Man_t_ int pDumpIns[16]; Vec_Str_t * vMarks; Vec_Int_t * vVisited2; + int useLimitAdc; // timing manager Tim_Man_t * pManTim; @@ -303,6 +305,7 @@ struct If_Cut_t_ int iCutFunc; // TT ID of the cut int uMaskFunc; // polarity bitmask unsigned uSign; // cut signature + unsigned acdDelay; // Computed pin delay during ACD unsigned Cost : 12; // the user's cost of the cut (related to IF_COST_MAX) unsigned fCompl : 1; // the complemented attribute unsigned fUser : 1; // using the user's area and delay @@ -552,6 +555,7 @@ extern int If_CutPerformCheck45( If_Man_t * p, unsigned * pTruth, in extern int If_CutPerformCheck54( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern int If_CutPerformCheck75( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern float If_CutDelayLutStruct( If_Man_t * p, If_Cut_t * pCut, char * pStr, float WireDelay ); +// extern int If_CutPerformAcd( If_Man_t * p, unsigned nVars, int lutSize, unsigned * pdelay, int use_late_arrival, unsigned * cost ); extern int If_CluCheckExt( void * p, word * pTruth, int nVars, int nLutLeaf, int nLutRoot, char * pLut0, char * pLut1, word * pFunc0, word * pFunc1 ); extern int If_CluCheckExt3( void * p, word * pTruth, int nVars, int nLutLeaf, int nLutLeaf2, int nLutRoot, @@ -566,6 +570,9 @@ extern int If_CutSopBalancePinDelaysInt( Vec_Int_t * vCover, int * p extern int If_CutSopBalancePinDelays( If_Man_t * p, If_Cut_t * pCut, char * pPerm ); extern int If_CutLutBalanceEval( If_Man_t * p, If_Cut_t * pCut ); extern int If_CutLutBalancePinDelays( If_Man_t * p, If_Cut_t * pCut, char * pPerm ); +extern int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, int best_delay ); +extern int If_AcdReEval( If_Man_t * p, If_Cut_t * pCut ); +extern float If_AcdLeafProp( If_Man_t * p, If_Cut_t * pCut, int i, float required ); /*=== ifDsd.c =============================================================*/ extern If_DsdMan_t * If_DsdManAlloc( int nVars, int nLutSize ); extern void If_DsdManAllocIsops( If_DsdMan_t * p, int nLutSize ); @@ -693,6 +700,8 @@ extern int If_ManCountSpecialPos( If_Man_t * p ); extern void If_CutTraverse( If_Man_t * p, If_Obj_t * pRoot, If_Cut_t * pCut, Vec_Ptr_t * vNodes ); extern void If_ObjPrint( If_Obj_t * pObj ); +extern int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost ); +extern int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ); ABC_NAMESPACE_HEADER_END diff --git a/src/map/if/ifCore.c b/src/map/if/ifCore.c index c03061af2..a8e482912 100644 --- a/src/map/if/ifCore.c +++ b/src/map/if/ifCore.c @@ -62,6 +62,7 @@ void If_ManSetDefaultPars( If_Par_t * pPars ) pPars->fPower = 0; pPars->fCutMin = 0; pPars->fBidec = 0; + pPars->fAcd = 0; pPars->fVerbose = 0; } @@ -106,9 +107,16 @@ int If_ManPerformMappingComb( If_Man_t * p ) If_Obj_t * pObj; abctime clkTotal = Abc_Clock(); int i; + p->useLimitAdc = 1; + //p->vVisited2 = Vec_IntAlloc( 100 ); //p->vMarks = Vec_StrStart( If_ManObjNum(p) ); + // if ( p->pPars->fAcd ) + // { + // p->pPars->nLutSize = 6; + // } + // set arrival times and fanout estimates If_ManForEachCi( p, pObj, i ) { @@ -121,6 +129,16 @@ int If_ManPerformMappingComb( If_Man_t * p ) { // map for delay If_ManPerformMappingRound( p, p->pPars->nCutsMax, 0, 1, 1, "Delay" ); + + if ( p->pPars->fAcd ) + { + // p->pPars->nLutSize = oldLutSize; + p->useLimitAdc = 0; + If_ManPerformMappingRound( p, p->pPars->nCutsMax, 0, 1, 0, "Delay" ); + p->useLimitAdc = 1; + // p->pPars->nLutSize = 6; + } + // map for delay second option p->pPars->fFancy = 1; If_ManResetOriginalRefs( p ); diff --git a/src/map/if/ifCut.c b/src/map/if/ifCut.c index f4f72d1c8..8d1cccba0 100644 --- a/src/map/if/ifCut.c +++ b/src/map/if/ifCut.c @@ -604,10 +604,6 @@ static inline int If_ManSortCompare( If_Man_t * p, If_Cut_t * pC0, If_Cut_t * pC return -1; if ( pC0->nLeaves > pC1->nLeaves ) return 1; - if ( pC0->Delay < pC1->Delay - p->fEpsilon ) - return -1; - if ( pC0->Delay > pC1->Delay + p->fEpsilon ) - return 1; if ( pC0->fUseless < pC1->fUseless ) return -1; if ( pC0->fUseless > pC1->fUseless ) @@ -765,7 +761,7 @@ void If_CutSort( If_Man_t * p, If_Set_t * pCutSet, If_Cut_t * pCut ) if ( !pCut->fUseless && (p->pPars->fUseDsd || p->pPars->pFuncCell2 || p->pPars->fUseBat || - p->pPars->pLutStruct || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUserLutDec || + p->pPars->pLutStruct || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fAcd || p->pPars->fEnableCheck07 || p->pPars->fUseCofVars || p->pPars->fUseAndVars || p->pPars->fUse34Spec || p->pPars->fUseDsdTune || p->pPars->fEnableCheck75 || p->pPars->fEnableCheck75u || p->pPars->fUseCheck1 || p->pPars->fUseCheck2) ) { diff --git a/src/map/if/ifDelay.c b/src/map/if/ifDelay.c index cb25e767e..fcd53e348 100644 --- a/src/map/if/ifDelay.c +++ b/src/map/if/ifDelay.c @@ -411,6 +411,132 @@ int If_CutLutBalanceEval( If_Man_t * p, If_Cut_t * pCut ) return DelayMax + 2; } } + +int If_AcdEval( If_Man_t * p, If_Cut_t * pCut, int best_delay ) +{ + pCut->fUser = 1; + pCut->Cost = pCut->nLeaves > 1 ? 1 : 0; + pCut->acdDelay = 0; + if ( pCut->nLeaves == 0 ) // const + { + assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 0 ); + return 0; + } + if ( pCut->nLeaves == 1 ) // variable + { + assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 1 ); + return (int)If_ObjCutBest(If_CutLeaf(p, pCut, 0))->Delay; + } + + // int LutSize = p->pPars->pLutStruct[0] - '0'; + int LutSize = 6; + int i, leaf_delay; + int DelayMax = -1, nLeafMax = 0; + unsigned uLeafMask = 0; + for ( i = 0; i < If_CutLeaveNum(pCut); i++ ) + { + leaf_delay = If_ObjCutBest(If_CutLeaf(p, pCut, i))->Delay; + + if ( DelayMax < leaf_delay ) + { + DelayMax = leaf_delay; + nLeafMax = 1; + uLeafMask = (1 << i); + } + else if ( DelayMax == leaf_delay ) + { + nLeafMax++; + uLeafMask |= (1 << i); + } + } + if ( If_CutLeaveNum(pCut) <= LutSize ) + { + pCut->acdDelay = ( 1 << LutSize ) - 1; + return DelayMax + 1; + } + // else if ( DelayMax + 1 >= best_delay ) + // { + // return DelayMax + 2; + // } + + /* compute the decomposition */ + int use_late_arrival = DelayMax + 2 >= best_delay; + unsigned cost = 1; + + /* TODO: have checks based on delay */ + if ( use_late_arrival && nLeafMax > LutSize / 2 ) + { + pCut->Cost = IF_COST_MAX; + return ABC_INFINITY; + } + + /* remove from critical set */ + if ( !use_late_arrival ) + uLeafMask = 0; + + + word *pTruth = If_CutTruthW( p, pCut ); + int val = acd_evaluate( pTruth, pCut->nLeaves, LutSize, &uLeafMask, &cost ); + + /* not feasible decomposition */ + pCut->acdDelay = uLeafMask; + if ( val < 0 ) + { + pCut->Cost = IF_COST_MAX; + return ABC_INFINITY; + } + + pCut->Cost = cost; + + return DelayMax + ( use_late_arrival ? 1 : 2 ); +} + +int If_AcdReEval( If_Man_t * p, If_Cut_t * pCut ) +{ + // pCut->fUser = 1; + + if ( pCut->nLeaves == 0 ) // const + { + assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 0 ); + return 0; + } + if ( pCut->nLeaves == 1 ) // variable + { + assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 1 ); + return (int)If_ObjCutBest(If_CutLeaf(p, pCut, 0))->Delay; + } + + // int LutSize = p->pPars->pLutStruct[0] - '0'; + int LutSize = 6; + int i, leaf_delay; + int DelayMax = -1, nLeafMax = 0; + unsigned uLeafMask = 0; + for ( i = 0; i < If_CutLeaveNum(pCut); i++ ) + { + leaf_delay = If_ObjCutBest(If_CutLeaf(p, pCut, i))->Delay; + leaf_delay += ( ( pCut->acdDelay >> i ) & 1 ) == 0 ? 2 : 1; + DelayMax = Abc_MaxInt( leaf_delay, DelayMax ); + } + + return DelayMax; +} + +float If_AcdLeafProp( If_Man_t * p, If_Cut_t * pCut, int i, float required ) +{ + if ( pCut->nLeaves == 0 ) // const + { + assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 0 ); + return required; + } + if ( pCut->nLeaves == 1 ) // variable + { + assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 1 ); + return 0; + } + + return ( ( pCut->acdDelay >> i ) & 1 ) == 0 ? 2 : 1; +} + /* int If_CutLutBalanceEval( If_Man_t * p, If_Cut_t * pCut ) { diff --git a/src/map/if/ifMap.c b/src/map/if/ifMap.c index 4a5210e92..da83b5525 100644 --- a/src/map/if/ifMap.c +++ b/src/map/if/ifMap.c @@ -148,32 +148,6 @@ int * If_CutArrTimeProfile( If_Man_t * p, If_Cut_t * pCut ) return p->pArrTimeProfile; } - -/**Function************************************************************* - - Synopsis [Returns the node's delay if its cut it LUT-decomposed.] - - Description [] - - SideEffects [] - - SeeAlso [] - -***********************************************************************/ -int If_CutDelayLutDec( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj ) -{ - // get the truth table - // get the cut leaves' arrival times - // run LUT-decomposition in the evaluation mode - // return expected arrival time at the output - - // this is a placeholder code, which is assume the cut has unit delay - int i, ArrTimes = 0; - for ( i = 0; i < If_CutLeaveNum(pCut); i++ ) - ArrTimes = Abc_MaxInt( ArrTimes, (int)If_ObjCutBest(If_CutLeaf(p, pCut, i))->Delay ); - return ArrTimes + 1; -} - /**Function************************************************************* Synopsis [Finds the best cut for the given node.] @@ -192,7 +166,7 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep If_Cut_t * pCut0R, * pCut1R; int fFunc0R, fFunc1R; int i, k, v, iCutDsd, fChange; - int fSave0 = p->pPars->fDelayOpt || p->pPars->fDelayOptLut || p->pPars->fDsdBalance || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUserLutDec || + int fSave0 = p->pPars->fDelayOpt || p->pPars->fDelayOptLut || p->pPars->fDsdBalance || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fAcd || p->pPars->fUseDsdTune || p->pPars->fUseCofVars || p->pPars->fUseAndVars || p->pPars->fUse34Spec || p->pPars->pLutStruct || p->pPars->pFuncCell2 || p->pPars->fUseCheck1 || p->pPars->fUseCheck2; int fUseAndCut = (p->pPars->nAndDelay > 0) || (p->pPars->nAndArea > 0); assert( !If_ObjIsAnd(pObj->pFanin0) || pObj->pFanin0->pCutSet->nCuts > 0 ); @@ -234,8 +208,10 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep pCut->fUseless = 1; } } - else if ( p->pPars->fUserLutDec ) - pCut->Delay = If_CutDelayLutDec( p, pCut, pObj ); + else if ( p->pPars->fAcd ) + { + pCut->Delay = If_AcdReEval( p, pCut ); + } else if ( p->pPars->fDelayOptLut ) pCut->Delay = If_CutLutBalanceEval( p, pCut ); else if( p->pPars->nGateSize > 0 ) @@ -292,6 +268,8 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep if ( !If_CutMergeOrdered( p, pCut0, pCut1, pCut ) ) continue; } + if ( p->pPars->fAcd && p->useLimitAdc && pCut->nLeaves > 6 ) + continue; if ( pObj->fSpec && pCut->nLeaves == (unsigned)p->pPars->nLutSize ) continue; p->nCutsMerged++; @@ -450,7 +428,12 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep else if ( p->pPars->fDsdBalance ) pCut->Delay = If_CutDsdBalanceEval( p, pCut, NULL ); else if ( p->pPars->fUserRecLib ) - pCut->Delay = If_CutDelayRecCost3( p, pCut, pObj ); + pCut->Delay = If_CutDelayRecCost3( p, pCut, pObj ); + else if ( p->pPars->fAcd ) + { + pCut->Delay = If_AcdEval( p, pCut, fFirst ? ABC_INFINITY : (int) If_ObjCutBest(pObj)->Delay ); + pCut->fUseless = pCut->Delay == ABC_INFINITY; + } else if ( p->pPars->fUserSesLib ) { int Cost = 0; @@ -464,8 +447,6 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep pCut->fUseless = 1; } } - else if ( p->pPars->fUserLutDec ) - pCut->Delay = If_CutDelayLutDec( p, pCut, pObj ); else if ( p->pPars->fDelayOptLut ) pCut->Delay = If_CutLutBalanceEval( p, pCut ); else if( p->pPars->nGateSize > 0 ) @@ -537,7 +518,7 @@ void If_ObjPerformMappingChoice( If_Man_t * p, If_Obj_t * pObj, int Mode, int fP If_Set_t * pCutSet; If_Obj_t * pTemp; If_Cut_t * pCutTemp, * pCut; - int i, fSave0 = p->pPars->fDelayOpt || p->pPars->fDelayOptLut || p->pPars->fDsdBalance || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUserLutDec || p->pPars->fUse34Spec; + int i, fSave0 = p->pPars->fDelayOpt || p->pPars->fDelayOptLut || p->pPars->fDsdBalance || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUse34Spec || p->pPars->fAcd; assert( pObj->pEquiv != NULL ); // prepare diff --git a/src/map/if/ifTime.c b/src/map/if/ifTime.c index 9ceef1475..9bce5bc43 100644 --- a/src/map/if/ifTime.c +++ b/src/map/if/ifTime.c @@ -211,6 +211,12 @@ void If_CutPropagateRequired( If_Man_t * p, If_Obj_t * pObj, If_Cut_t * pCut, fl pLeaf->Required = IF_MIN( pLeaf->Required, Required - pLutDelays[0] ); } } + else if ( p->pPars->fAcd ) + { + Required = ObjRequired; + If_CutForEachLeaf( p, pCut, pLeaf, i ) + pLeaf->Required = IF_MIN( pLeaf->Required, Required - If_AcdLeafProp( p, pCut, i, ObjRequired ) ); + } else { if ( pCut->fUser )