From da0f14f5fe40eefa65a7eb536a26353683e943cd Mon Sep 17 00:00:00 2001 From: Lakira Ashley Date: Sun, 27 Nov 2022 23:22:51 +1030 Subject: [PATCH] Add bitstream compression option Signed-off-by: Lakira Ashley --- CMakeLists.txt | 4 +- Makefile | 5 +- lib/include/prjxray/xilinx/configuration.h | 257 +++++++++++++++++---- lib/xilinx/configuration.cc | 108 +++++++-- tools/xc7frames2bit.cc | 3 +- utils/fasm2frames.py | 2 +- 6 files changed, 311 insertions(+), 68 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ec89c6b..5fa4b525 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,8 @@ cmake_minimum_required(VERSION 3.5.0) project(prjxray) option(PRJXRAY_BUILD_TESTING "" OFF) +set(CMAKE_CXX_STANDARD 14) + # Add sanitizers-cmake package set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/third_party/sanitizers-cmake/cmake" ${CMAKE_MODULE_PATH}) find_package(Sanitizers) @@ -36,8 +38,6 @@ target_include_directories(yaml-cpp PUBLIC $ ) -# Set the CXX standard and compile time for our code only. -set(CMAKE_CXX_STANDARD 14) add_compile_options(-Wall -Werror) add_subdirectory(lib) diff --git a/Makefile b/Makefile index d66c2c10..79529bb3 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,7 @@ SHELL = bash ALL_EXCLUDE = third_party .git env build docs/env INSTALL_DIR ?= +ENV_DIR ?= env # Skip this check if the ALLOW_ROOT var is defined # E.g. when running in GH action custom runners CI @@ -20,9 +21,9 @@ endif endif # Tools + Environment -IN_ENV = if [ -e env/bin/activate ]; then . env/bin/activate; fi; source utils/environment.python.sh; +IN_ENV = if [ -e $(ENV_DIR)/bin/activate ]; then . $(ENV_DIR)/bin/activate; fi; source utils/environment.python.sh; env: - python3 -mvenv env + python3 -mvenv $(ENV_DIR) # Install project dependencies $(IN_ENV) python -mpip install -r requirements.txt # Install project's documentation dependencies diff --git a/lib/include/prjxray/xilinx/configuration.h b/lib/include/prjxray/xilinx/configuration.h index 0a883547..ff08a390 100644 --- a/lib/include/prjxray/xilinx/configuration.h +++ b/lib/include/prjxray/xilinx/configuration.h @@ -27,7 +27,16 @@ class Configuration { public: using FrameMap = std::map>; - using PacketData = std::vector; + + struct PacketData { + struct Frame { + typename ArchType::FrameAddress address; + std::vector repeats; + std::vector data; + }; + + std::vector frames; + }; // Returns a configuration, i.e. collection of frame addresses // and corresponding data from a collection of configuration packets. @@ -50,7 +59,8 @@ class Configuration { // which allows for bigger payload compared to type 1. static PacketData createType2ConfigurationPacketData( const typename Frames::Frames2Data& frames, - absl::optional& part); + absl::optional& part, + bool compressed = false); Configuration(const typename ArchType::Part& part, std::map typename Configuration::PacketData Configuration::createType2ConfigurationPacketData( const typename Frames::Frames2Data& frames, - absl::optional& part) { - PacketData packet_data; - // Certain configuration frames blocks are separated by Zero Frames, - // i.e. frames with words with all zeroes. For Series-7, US and US+ - // there zero frames separator consists of two frames. - static const int kZeroFramesSeparatorWords = - ArchType::words_per_frame * 2; - for (auto& frame : frames) { - std::copy(frame.second.begin(), frame.second.end(), - std::back_inserter(packet_data)); + absl::optional& part, + bool compressed) { + PacketData result; + if (!compressed) { + result.frames.push_back(typename PacketData::Frame{0U, {}, {}}); + std::vector& packet_data = result.frames.back().data; + // Certain configuration frames blocks are separated by Zero + // Frames, i.e. frames with words with all zeroes. For Series-7, + // US and US+ there zero frames separator consists of two + // frames. + static const int kZeroFramesSeparatorWords = + ArchType::words_per_frame * 2; + for (auto& frame : frames) { + std::copy(frame.second.begin(), frame.second.end(), + std::back_inserter(packet_data)); - auto next_address = part->GetNextFrameAddress(frame.first); - if (next_address && - (next_address->block_type() != frame.first.block_type() || - next_address->is_bottom_half_rows() != - frame.first.is_bottom_half_rows() || - next_address->row() != frame.first.row())) { - packet_data.insert(packet_data.end(), - kZeroFramesSeparatorWords, 0); + auto next_address = + part->GetNextFrameAddress(frame.first); + if (next_address && + (next_address->block_type() != + frame.first.block_type() || + next_address->is_bottom_half_rows() != + frame.first.is_bottom_half_rows() || + next_address->row() != frame.first.row())) { + packet_data.insert(packet_data.end(), + kZeroFramesSeparatorWords, + 0); + } + } + packet_data.insert(packet_data.end(), kZeroFramesSeparatorWords, + 0); + } else { + // First write takes priority. + // FDRI writes must be padded with a trailing zero-frame. + // FDRI writes followed by MFWRs must only write to a single + // frame. + // Frame writes can be joined, so long as the frame written + // to with the trailing zero-frame has already been written + // to, or is meant to be a zero-frame. + + using Frame = typename PacketData::Frame; + + auto similar_address = + [](const typename ArchType::FrameAddress& a, + const typename ArchType::FrameAddress& b) -> bool { + return a.block_type() == b.block_type() && + a.is_bottom_half_rows() == + b.is_bottom_half_rows() && + a.row() == b.row(); + }; + + for (const auto& frame : frames) { + result.frames.push_back( + Frame{frame.first, {}, frame.second}); + } + + auto dedup = [](auto begin, auto end, auto compare, + auto merge) { + while (begin != end) { + auto mid = std::stable_partition( + begin + 1, end, [&](const Frame& f) { + return !compare(*begin, f); + }); + for (auto it = mid; it != end; ++it) + merge(*begin, *it); + end = mid; + if (begin != end) + ++begin; + } + return begin; + }; + + auto can_merge = [&](const Frame& a, const Frame& b) -> bool { + return b.repeats.empty() && + similar_address(a.address, b.address) && + a.data == b.data; + }; + + auto merge = [](Frame& dst, Frame& src) { + dst.repeats.push_back(src.address); + }; + + result.frames.erase( + dedup(result.frames.begin(), result.frames.end(), can_merge, + merge), + result.frames.end()); + + std::set deduped_frames; + + auto zero_frames_between = + [&](const typename ArchType::FrameAddress& a, + const typename ArchType::FrameAddress& b, + size_t max) -> size_t { + if (a >= b) + return 0; + auto next = part->GetNextFrameAddress(a); + for (size_t result = 1; + result <= max && next && *next <= b && + deduped_frames.count(*next) > 0U; + ++result, + next = part->GetNextFrameAddress(*next)) { + if (*next == b) + return result; + } + return 0; + }; + + // Merge contiguous frames + Frame* previous = nullptr; + absl::optional + previous_next_address; + for (auto& frame : result.frames) { + if (!frame.repeats.empty()) { + if (previous) + deduped_frames.insert( + previous->repeats.begin(), + previous->repeats.end()); + previous = &frame; + } else { + if (previous_next_address) { + const size_t between = + zero_frames_between( + *previous_next_address, + frame.address, 2U); + if (between > 0U) { + previous->data.resize( + previous->data.size() + + (ArchType:: + words_per_frame * + between), + 0U); + previous_next_address = + frame.address; + } + } + if (previous_next_address && + *previous_next_address == frame.address) { + previous->data.insert( + previous->data.end(), + frame.data.begin(), + frame.data.end()); + frame.data.clear(); + } else { + if (previous) + deduped_frames.insert( + previous->repeats.begin(), + previous->repeats.end()); + previous = &frame; + } + } + if (previous) + previous_next_address = + part->GetNextFrameAddress(frame.address); + } + + result.frames.erase( + std::remove_if( + result.frames.begin(), result.frames.end(), + [](const Frame& frame) { return frame.data.empty(); }), + result.frames.end()); + + for (auto& frame : result.frames) { + if (frame.repeats.empty()) { + frame.data.resize(frame.data.size() + + ArchType::words_per_frame, + 0U); + } } } - packet_data.insert(packet_data.end(), kZeroFramesSeparatorWords, 0); - return packet_data; + return result; } template <> @@ -241,6 +398,8 @@ Configuration::InitWithPackets(const typename ArchType::Part& part, // Internal state machine for writes. bool start_new_write = false; + bool start_dup_write = false; + typename ArchType::FrameAddress last_write_frame_address = 0; typename ArchType::FrameAddress current_frame_address = 0; Configuration::FrameMap frames; @@ -272,6 +431,8 @@ Configuration::InitWithPackets(const typename ArchType::Part& part, // for the next FDIR. if (command_register == 0x1) { start_new_write = true; + } else if (command_register == 0x2) { + start_dup_write = true; } break; case ArchType::ConfRegType::IDCODE: @@ -293,38 +454,44 @@ Configuration::InitWithPackets(const typename ArchType::Part& part, // Per UG470, the command present in the CMD // register is executed each time the FAR - // register is laoded with a new value. As we - // only care about WCFG commands, just check - // that here. CTRL1 is completely undocumented - // but looking at generated bitstreams, bit 21 - // is used when per-frame CRC is enabled. - // Setting this bit seems to inhibit the - // re-execution of CMD during a FAR write. In - // practice, this is used so FAR writes can be - // added in the bitstream to show progress + // register is loaded with a new value. As we + // only care about WCFG and MFWR commands, just + // check that here. CTRL1 is completely + // undocumented but looking at generated + // bitstreams, bit 21 is used when per-frame CRC + // is enabled. Setting this bit seems to inhibit + // the re-execution of CMD during a FAR write. + // In practice, this is used so FAR writes can + // be added in the bitstream to show progress // markers without impacting the actual write // operation. - if (bit_field_get(ctl1_register, 21, 21) == 0 && - command_register == 0x1) { - start_new_write = true; + if (bit_field_get(ctl1_register, 21, 21) == 0) { + if (command_register == 0x1) { + start_new_write = true; + } else if (command_register == 0x2) { + start_dup_write = true; + } } break; case ArchType::ConfRegType::FDRI: { if (start_new_write) { - current_frame_address = - frame_address_register; + last_write_frame_address = + current_frame_address = + frame_address_register; start_new_write = false; } // Number of words in configuration frames - // depend on tje architecture. Writes to this + // depend on the architecture. Writes to this // register can be multiples of that number to // do auto-incrementing block writes. for (size_t ii = 0; ii < packet.data().size(); ii += ArchType::words_per_frame) { - frames[current_frame_address] = - packet.data().subspan( - ii, ArchType::words_per_frame); + frames.insert( + {current_frame_address, + packet.data().subspan( + ii, + ArchType::words_per_frame)}); auto next_address = part.GetNextFrameAddress( @@ -351,6 +518,16 @@ Configuration::InitWithPackets(const typename ArchType::Part& part, } break; } + case ArchType::ConfRegType::MFWR: { + if (start_dup_write) { + current_frame_address = + frame_address_register; + start_dup_write = false; + frames.insert( + {current_frame_address, + frames[last_write_frame_address]}); + } + } break; default: break; } diff --git a/lib/xilinx/configuration.cc b/lib/xilinx/configuration.cc index 881d9cd2..d2b2c3b6 100644 --- a/lib/xilinx/configuration.cc +++ b/lib/xilinx/configuration.cc @@ -31,9 +31,12 @@ template <> Configuration::PacketData Configuration::createType2ConfigurationPacketData( const Frames::Frames2Data& frames, - absl::optional& part) { + absl::optional& part, + bool compressed) { // Generate a single type 2 packet that writes everything at once. - PacketData packet_data; + PacketData result; + result.frames.push_back(typename PacketData::Frame{0U, {}, {}}); + std::vector& packet_data = result.frames.back().data; for (auto& frame : frames) { std::copy(frame.second.begin(), frame.second.end(), std::back_inserter(packet_data)); @@ -44,7 +47,7 @@ Configuration::createType2ConfigurationPacketData( packet_data.insert(packet_data.begin(), packet_data_size & 0xFFFF); packet_data.insert(packet_data.begin(), (packet_data_size >> 16) & 0xFFFF); - return packet_data; + return result; } template <> @@ -219,7 +222,7 @@ void Configuration::createConfigurationPackage( // Frame data write out_packets.emplace_back(new ConfigurationPacket( TYPE2, ConfigurationPacket::Opcode::Write, - ConfigurationRegister::FDRI, {packet_data})); + ConfigurationRegister::FDRI, {packet_data.frames.back().data})); // NOP packets for (int i = 0; i < 24; i++) { @@ -391,24 +394,85 @@ void Configuration::createConfigurationPackage( out_packets.emplace_back(new NopPacket()); out_packets.emplace_back(new NopPacket()); out_packets.emplace_back(new NopPacket()); - out_packets.emplace_back( - new ConfigurationPacketWithPayload<1, ConfigurationRegister>( - ConfigurationPacket::Opcode::Write, - ConfigurationRegister::FAR, {0x0})); - out_packets.emplace_back( - new ConfigurationPacketWithPayload<1, ConfigurationRegister>( - ConfigurationPacket::Opcode::Write, - ConfigurationRegister::CMD, - {static_cast(xc7series::Command::WCFG)})); - out_packets.emplace_back(new NopPacket()); // Frame data write - out_packets.emplace_back(new ConfigurationPacket( - TYPE1, ConfigurationPacket::Opcode::Write, - ConfigurationRegister::FDRI, {})); - out_packets.emplace_back(new ConfigurationPacket( - TYPE2, ConfigurationPacket::Opcode::Write, - ConfigurationRegister::FDRI, packet_data)); + for (const auto& frame : packet_data.frames) { + out_packets.emplace_back(new ConfigurationPacketWithPayload< + 1, ConfigurationRegister>( + ConfigurationPacket::Opcode::Write, + ConfigurationRegister::CMD, + {static_cast(xc7series::Command::WCFG)})); + out_packets.emplace_back(new ConfigurationPacketWithPayload< + 1, ConfigurationRegister>( + ConfigurationPacket::Opcode::Write, + ConfigurationRegister::FAR, {frame.address})); + + out_packets.emplace_back( + new NopPacket()); + + if (frame.data.size() < 0b111'1111'1111) { + out_packets.emplace_back( + new ConfigurationPacket( + TYPE1, + ConfigurationPacket< + ConfigurationRegister>::Opcode::Write, + ConfigurationRegister::FDRI, frame.data)); + } else { + out_packets.emplace_back( + new ConfigurationPacket( + TYPE1, + ConfigurationPacket< + ConfigurationRegister>::Opcode::Write, + ConfigurationRegister::FDRI, {})); + out_packets.emplace_back( + new ConfigurationPacket( + TYPE2, + ConfigurationPacket< + ConfigurationRegister>::Opcode::Write, + ConfigurationRegister::FDRI, frame.data)); + } + + if (!frame.repeats.empty()) { + out_packets.emplace_back( + new ConfigurationPacketWithPayload< + 1, ConfigurationRegister>( + ConfigurationPacket< + ConfigurationRegister>::Opcode::Write, + ConfigurationRegister::CMD, + {static_cast( + xc7series::Command::MFW)})); + + for (size_t i = 0; i < 12; ++i) + out_packets.emplace_back( + new NopPacket()); + + out_packets.emplace_back( + new ConfigurationPacketWithPayload< + 8, ConfigurationRegister>( + ConfigurationPacket< + ConfigurationRegister>::Opcode::Write, + ConfigurationRegister::MFWR, + {0U, 0U, 0U, 0U, 0U, 0U, 0U, 0U})); + + for (const auto& addr : frame.repeats) { + out_packets.emplace_back( + new ConfigurationPacketWithPayload< + 1, ConfigurationRegister>( + ConfigurationPacket< + ConfigurationRegister>::Opcode:: + Write, + ConfigurationRegister::FAR, {addr})); + out_packets.emplace_back( + new ConfigurationPacketWithPayload< + 4, ConfigurationRegister>( + ConfigurationPacket< + ConfigurationRegister>::Opcode:: + Write, + ConfigurationRegister::MFWR, + {0U, 0U, 0U, 0U})); + } + } + } // Finalization sequence out_packets.emplace_back( @@ -569,7 +633,7 @@ void Configuration::createConfigurationPackage( ConfigurationRegister::FDRI, {})); out_packets.emplace_back(new ConfigurationPacket( TYPE2, ConfigurationPacket::Opcode::Write, - ConfigurationRegister::FDRI, packet_data)); + ConfigurationRegister::FDRI, packet_data.frames.back().data)); // Finalization sequence out_packets.emplace_back( @@ -730,7 +794,7 @@ void Configuration::createConfigurationPackage( ConfigurationRegister::FDRI, {})); out_packets.emplace_back(new ConfigurationPacket( TYPE2, ConfigurationPacket::Opcode::Write, - ConfigurationRegister::FDRI, packet_data)); + ConfigurationRegister::FDRI, packet_data.frames.back().data)); // Finalization sequence out_packets.emplace_back( diff --git a/tools/xc7frames2bit.cc b/tools/xc7frames2bit.cc index 78863019..8c407483 100644 --- a/tools/xc7frames2bit.cc +++ b/tools/xc7frames2bit.cc @@ -14,6 +14,7 @@ #include #include +DEFINE_bool(compressed, false, "Attempt to deduplicate bitstream frames"); DEFINE_string(part_name, "", "Name of the 7-series part"); DEFINE_string(part_file, "", "Definition file for target 7-series part"); DEFINE_string( @@ -62,7 +63,7 @@ struct Frames2BitWriter { configuration_packet_data( xilinx::Configuration:: createType2ConfigurationPacketData( - frames.getFrames(), part)); + frames.getFrames(), part, FLAGS_compressed)); // Put together a configuration package typename ArchType::ConfigurationPackage configuration_package; diff --git a/utils/fasm2frames.py b/utils/fasm2frames.py index 82ddc355..b7c4123c 100755 --- a/utils/fasm2frames.py +++ b/utils/fasm2frames.py @@ -182,7 +182,7 @@ def run( fasm.parse_fasm_string('\n'.join(roi_j['required_features']))) # Get required extra features for the part - required_features = db.get_required_fasm_features(part) + required_features = db.get_required_fasm_features(part.split('t')[0] + 't') extra_features += list( fasm.parse_fasm_string('\n'.join(required_features)))