Add bitstream compression option

Signed-off-by: Lakira Ashley <lakridesagain@gmail.com>
This commit is contained in:
Lakira Ashley 2022-11-27 23:22:51 +10:30 committed by Hans Baier
parent 38e8b02f2c
commit da0f14f5fe
6 changed files with 311 additions and 68 deletions

View File

@ -3,6 +3,8 @@ cmake_minimum_required(VERSION 3.5.0)
project(prjxray)
option(PRJXRAY_BUILD_TESTING "" OFF)
set(CMAKE_CXX_STANDARD 14)
# Add sanitizers-cmake package
set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/third_party/sanitizers-cmake/cmake" ${CMAKE_MODULE_PATH})
find_package(Sanitizers)
@ -36,8 +38,6 @@ target_include_directories(yaml-cpp PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/third_party/yaml-cpp/include>
)
# Set the CXX standard and compile time for our code only.
set(CMAKE_CXX_STANDARD 14)
add_compile_options(-Wall -Werror)
add_subdirectory(lib)

View File

@ -9,6 +9,7 @@ SHELL = bash
ALL_EXCLUDE = third_party .git env build docs/env
INSTALL_DIR ?=
ENV_DIR ?= env
# Skip this check if the ALLOW_ROOT var is defined
# E.g. when running in GH action custom runners CI
@ -20,9 +21,9 @@ endif
endif
# Tools + Environment
IN_ENV = if [ -e env/bin/activate ]; then . env/bin/activate; fi; source utils/environment.python.sh;
IN_ENV = if [ -e $(ENV_DIR)/bin/activate ]; then . $(ENV_DIR)/bin/activate; fi; source utils/environment.python.sh;
env:
python3 -mvenv env
python3 -mvenv $(ENV_DIR)
# Install project dependencies
$(IN_ENV) python -mpip install -r requirements.txt
# Install project's documentation dependencies

View File

@ -27,7 +27,16 @@ class Configuration {
public:
using FrameMap = std::map<typename ArchType::FrameAddress,
absl::Span<const uint32_t>>;
using PacketData = std::vector<uint32_t>;
struct PacketData {
struct Frame {
typename ArchType::FrameAddress address;
std::vector<typename ArchType::FrameAddress> repeats;
std::vector<uint32_t> data;
};
std::vector<Frame> frames;
};
// Returns a configuration, i.e. collection of frame addresses
// and corresponding data from a collection of configuration packets.
@ -50,7 +59,8 @@ class Configuration {
// which allows for bigger payload compared to type 1.
static PacketData createType2ConfigurationPacketData(
const typename Frames<ArchType>::Frames2Data& frames,
absl::optional<typename ArchType::Part>& part);
absl::optional<typename ArchType::Part>& part,
bool compressed = false);
Configuration(const typename ArchType::Part& part,
std::map<typename ArchType::FrameAddress,
@ -79,29 +89,176 @@ template <typename ArchType>
typename Configuration<ArchType>::PacketData
Configuration<ArchType>::createType2ConfigurationPacketData(
const typename Frames<ArchType>::Frames2Data& frames,
absl::optional<typename ArchType::Part>& part) {
PacketData packet_data;
// Certain configuration frames blocks are separated by Zero Frames,
// i.e. frames with words with all zeroes. For Series-7, US and US+
// there zero frames separator consists of two frames.
static const int kZeroFramesSeparatorWords =
ArchType::words_per_frame * 2;
for (auto& frame : frames) {
std::copy(frame.second.begin(), frame.second.end(),
std::back_inserter(packet_data));
absl::optional<typename ArchType::Part>& part,
bool compressed) {
PacketData result;
if (!compressed) {
result.frames.push_back(typename PacketData::Frame{0U, {}, {}});
std::vector<uint32_t>& packet_data = result.frames.back().data;
// Certain configuration frames blocks are separated by Zero
// Frames, i.e. frames with words with all zeroes. For Series-7,
// US and US+ there zero frames separator consists of two
// frames.
static const int kZeroFramesSeparatorWords =
ArchType::words_per_frame * 2;
for (auto& frame : frames) {
std::copy(frame.second.begin(), frame.second.end(),
std::back_inserter(packet_data));
auto next_address = part->GetNextFrameAddress(frame.first);
if (next_address &&
(next_address->block_type() != frame.first.block_type() ||
next_address->is_bottom_half_rows() !=
frame.first.is_bottom_half_rows() ||
next_address->row() != frame.first.row())) {
packet_data.insert(packet_data.end(),
kZeroFramesSeparatorWords, 0);
auto next_address =
part->GetNextFrameAddress(frame.first);
if (next_address &&
(next_address->block_type() !=
frame.first.block_type() ||
next_address->is_bottom_half_rows() !=
frame.first.is_bottom_half_rows() ||
next_address->row() != frame.first.row())) {
packet_data.insert(packet_data.end(),
kZeroFramesSeparatorWords,
0);
}
}
packet_data.insert(packet_data.end(), kZeroFramesSeparatorWords,
0);
} else {
// First write takes priority.
// FDRI writes must be padded with a trailing zero-frame.
// FDRI writes followed by MFWRs must only write to a single
// frame.
// Frame writes can be joined, so long as the frame written
// to with the trailing zero-frame has already been written
// to, or is meant to be a zero-frame.
using Frame = typename PacketData::Frame;
auto similar_address =
[](const typename ArchType::FrameAddress& a,
const typename ArchType::FrameAddress& b) -> bool {
return a.block_type() == b.block_type() &&
a.is_bottom_half_rows() ==
b.is_bottom_half_rows() &&
a.row() == b.row();
};
for (const auto& frame : frames) {
result.frames.push_back(
Frame{frame.first, {}, frame.second});
}
auto dedup = [](auto begin, auto end, auto compare,
auto merge) {
while (begin != end) {
auto mid = std::stable_partition(
begin + 1, end, [&](const Frame& f) {
return !compare(*begin, f);
});
for (auto it = mid; it != end; ++it)
merge(*begin, *it);
end = mid;
if (begin != end)
++begin;
}
return begin;
};
auto can_merge = [&](const Frame& a, const Frame& b) -> bool {
return b.repeats.empty() &&
similar_address(a.address, b.address) &&
a.data == b.data;
};
auto merge = [](Frame& dst, Frame& src) {
dst.repeats.push_back(src.address);
};
result.frames.erase(
dedup(result.frames.begin(), result.frames.end(), can_merge,
merge),
result.frames.end());
std::set<typename ArchType::FrameAddress> deduped_frames;
auto zero_frames_between =
[&](const typename ArchType::FrameAddress& a,
const typename ArchType::FrameAddress& b,
size_t max) -> size_t {
if (a >= b)
return 0;
auto next = part->GetNextFrameAddress(a);
for (size_t result = 1;
result <= max && next && *next <= b &&
deduped_frames.count(*next) > 0U;
++result,
next = part->GetNextFrameAddress(*next)) {
if (*next == b)
return result;
}
return 0;
};
// Merge contiguous frames
Frame* previous = nullptr;
absl::optional<typename ArchType::FrameAddress>
previous_next_address;
for (auto& frame : result.frames) {
if (!frame.repeats.empty()) {
if (previous)
deduped_frames.insert(
previous->repeats.begin(),
previous->repeats.end());
previous = &frame;
} else {
if (previous_next_address) {
const size_t between =
zero_frames_between(
*previous_next_address,
frame.address, 2U);
if (between > 0U) {
previous->data.resize(
previous->data.size() +
(ArchType::
words_per_frame *
between),
0U);
previous_next_address =
frame.address;
}
}
if (previous_next_address &&
*previous_next_address == frame.address) {
previous->data.insert(
previous->data.end(),
frame.data.begin(),
frame.data.end());
frame.data.clear();
} else {
if (previous)
deduped_frames.insert(
previous->repeats.begin(),
previous->repeats.end());
previous = &frame;
}
}
if (previous)
previous_next_address =
part->GetNextFrameAddress(frame.address);
}
result.frames.erase(
std::remove_if(
result.frames.begin(), result.frames.end(),
[](const Frame& frame) { return frame.data.empty(); }),
result.frames.end());
for (auto& frame : result.frames) {
if (frame.repeats.empty()) {
frame.data.resize(frame.data.size() +
ArchType::words_per_frame,
0U);
}
}
}
packet_data.insert(packet_data.end(), kZeroFramesSeparatorWords, 0);
return packet_data;
return result;
}
template <>
@ -241,6 +398,8 @@ Configuration<ArchType>::InitWithPackets(const typename ArchType::Part& part,
// Internal state machine for writes.
bool start_new_write = false;
bool start_dup_write = false;
typename ArchType::FrameAddress last_write_frame_address = 0;
typename ArchType::FrameAddress current_frame_address = 0;
Configuration<ArchType>::FrameMap frames;
@ -272,6 +431,8 @@ Configuration<ArchType>::InitWithPackets(const typename ArchType::Part& part,
// for the next FDIR.
if (command_register == 0x1) {
start_new_write = true;
} else if (command_register == 0x2) {
start_dup_write = true;
}
break;
case ArchType::ConfRegType::IDCODE:
@ -293,38 +454,44 @@ Configuration<ArchType>::InitWithPackets(const typename ArchType::Part& part,
// Per UG470, the command present in the CMD
// register is executed each time the FAR
// register is laoded with a new value. As we
// only care about WCFG commands, just check
// that here. CTRL1 is completely undocumented
// but looking at generated bitstreams, bit 21
// is used when per-frame CRC is enabled.
// Setting this bit seems to inhibit the
// re-execution of CMD during a FAR write. In
// practice, this is used so FAR writes can be
// added in the bitstream to show progress
// register is loaded with a new value. As we
// only care about WCFG and MFWR commands, just
// check that here. CTRL1 is completely
// undocumented but looking at generated
// bitstreams, bit 21 is used when per-frame CRC
// is enabled. Setting this bit seems to inhibit
// the re-execution of CMD during a FAR write.
// In practice, this is used so FAR writes can
// be added in the bitstream to show progress
// markers without impacting the actual write
// operation.
if (bit_field_get(ctl1_register, 21, 21) == 0 &&
command_register == 0x1) {
start_new_write = true;
if (bit_field_get(ctl1_register, 21, 21) == 0) {
if (command_register == 0x1) {
start_new_write = true;
} else if (command_register == 0x2) {
start_dup_write = true;
}
}
break;
case ArchType::ConfRegType::FDRI: {
if (start_new_write) {
current_frame_address =
frame_address_register;
last_write_frame_address =
current_frame_address =
frame_address_register;
start_new_write = false;
}
// Number of words in configuration frames
// depend on tje architecture. Writes to this
// depend on the architecture. Writes to this
// register can be multiples of that number to
// do auto-incrementing block writes.
for (size_t ii = 0; ii < packet.data().size();
ii += ArchType::words_per_frame) {
frames[current_frame_address] =
packet.data().subspan(
ii, ArchType::words_per_frame);
frames.insert(
{current_frame_address,
packet.data().subspan(
ii,
ArchType::words_per_frame)});
auto next_address =
part.GetNextFrameAddress(
@ -351,6 +518,16 @@ Configuration<ArchType>::InitWithPackets(const typename ArchType::Part& part,
}
break;
}
case ArchType::ConfRegType::MFWR: {
if (start_dup_write) {
current_frame_address =
frame_address_register;
start_dup_write = false;
frames.insert(
{current_frame_address,
frames[last_write_frame_address]});
}
} break;
default:
break;
}

View File

@ -31,9 +31,12 @@ template <>
Configuration<Spartan6>::PacketData
Configuration<Spartan6>::createType2ConfigurationPacketData(
const Frames<Spartan6>::Frames2Data& frames,
absl::optional<Spartan6::Part>& part) {
absl::optional<Spartan6::Part>& part,
bool compressed) {
// Generate a single type 2 packet that writes everything at once.
PacketData packet_data;
PacketData result;
result.frames.push_back(typename PacketData::Frame{0U, {}, {}});
std::vector<uint32_t>& packet_data = result.frames.back().data;
for (auto& frame : frames) {
std::copy(frame.second.begin(), frame.second.end(),
std::back_inserter(packet_data));
@ -44,7 +47,7 @@ Configuration<Spartan6>::createType2ConfigurationPacketData(
packet_data.insert(packet_data.begin(), packet_data_size & 0xFFFF);
packet_data.insert(packet_data.begin(),
(packet_data_size >> 16) & 0xFFFF);
return packet_data;
return result;
}
template <>
@ -219,7 +222,7 @@ void Configuration<Spartan6>::createConfigurationPackage(
// Frame data write
out_packets.emplace_back(new ConfigurationPacket<ConfigurationRegister>(
TYPE2, ConfigurationPacket<ConfigurationRegister>::Opcode::Write,
ConfigurationRegister::FDRI, {packet_data}));
ConfigurationRegister::FDRI, {packet_data.frames.back().data}));
// NOP packets
for (int i = 0; i < 24; i++) {
@ -391,24 +394,85 @@ void Configuration<Series7>::createConfigurationPackage(
out_packets.emplace_back(new NopPacket<ConfigurationRegister>());
out_packets.emplace_back(new NopPacket<ConfigurationRegister>());
out_packets.emplace_back(new NopPacket<ConfigurationRegister>());
out_packets.emplace_back(
new ConfigurationPacketWithPayload<1, ConfigurationRegister>(
ConfigurationPacket<ConfigurationRegister>::Opcode::Write,
ConfigurationRegister::FAR, {0x0}));
out_packets.emplace_back(
new ConfigurationPacketWithPayload<1, ConfigurationRegister>(
ConfigurationPacket<ConfigurationRegister>::Opcode::Write,
ConfigurationRegister::CMD,
{static_cast<uint32_t>(xc7series::Command::WCFG)}));
out_packets.emplace_back(new NopPacket<ConfigurationRegister>());
// Frame data write
out_packets.emplace_back(new ConfigurationPacket<ConfigurationRegister>(
TYPE1, ConfigurationPacket<ConfigurationRegister>::Opcode::Write,
ConfigurationRegister::FDRI, {}));
out_packets.emplace_back(new ConfigurationPacket<ConfigurationRegister>(
TYPE2, ConfigurationPacket<ConfigurationRegister>::Opcode::Write,
ConfigurationRegister::FDRI, packet_data));
for (const auto& frame : packet_data.frames) {
out_packets.emplace_back(new ConfigurationPacketWithPayload<
1, ConfigurationRegister>(
ConfigurationPacket<ConfigurationRegister>::Opcode::Write,
ConfigurationRegister::CMD,
{static_cast<uint32_t>(xc7series::Command::WCFG)}));
out_packets.emplace_back(new ConfigurationPacketWithPayload<
1, ConfigurationRegister>(
ConfigurationPacket<ConfigurationRegister>::Opcode::Write,
ConfigurationRegister::FAR, {frame.address}));
out_packets.emplace_back(
new NopPacket<ConfigurationRegister>());
if (frame.data.size() < 0b111'1111'1111) {
out_packets.emplace_back(
new ConfigurationPacket<ConfigurationRegister>(
TYPE1,
ConfigurationPacket<
ConfigurationRegister>::Opcode::Write,
ConfigurationRegister::FDRI, frame.data));
} else {
out_packets.emplace_back(
new ConfigurationPacket<ConfigurationRegister>(
TYPE1,
ConfigurationPacket<
ConfigurationRegister>::Opcode::Write,
ConfigurationRegister::FDRI, {}));
out_packets.emplace_back(
new ConfigurationPacket<ConfigurationRegister>(
TYPE2,
ConfigurationPacket<
ConfigurationRegister>::Opcode::Write,
ConfigurationRegister::FDRI, frame.data));
}
if (!frame.repeats.empty()) {
out_packets.emplace_back(
new ConfigurationPacketWithPayload<
1, ConfigurationRegister>(
ConfigurationPacket<
ConfigurationRegister>::Opcode::Write,
ConfigurationRegister::CMD,
{static_cast<uint32_t>(
xc7series::Command::MFW)}));
for (size_t i = 0; i < 12; ++i)
out_packets.emplace_back(
new NopPacket<ConfigurationRegister>());
out_packets.emplace_back(
new ConfigurationPacketWithPayload<
8, ConfigurationRegister>(
ConfigurationPacket<
ConfigurationRegister>::Opcode::Write,
ConfigurationRegister::MFWR,
{0U, 0U, 0U, 0U, 0U, 0U, 0U, 0U}));
for (const auto& addr : frame.repeats) {
out_packets.emplace_back(
new ConfigurationPacketWithPayload<
1, ConfigurationRegister>(
ConfigurationPacket<
ConfigurationRegister>::Opcode::
Write,
ConfigurationRegister::FAR, {addr}));
out_packets.emplace_back(
new ConfigurationPacketWithPayload<
4, ConfigurationRegister>(
ConfigurationPacket<
ConfigurationRegister>::Opcode::
Write,
ConfigurationRegister::MFWR,
{0U, 0U, 0U, 0U}));
}
}
}
// Finalization sequence
out_packets.emplace_back(
@ -569,7 +633,7 @@ void Configuration<UltraScale>::createConfigurationPackage(
ConfigurationRegister::FDRI, {}));
out_packets.emplace_back(new ConfigurationPacket<ConfigurationRegister>(
TYPE2, ConfigurationPacket<ConfigurationRegister>::Opcode::Write,
ConfigurationRegister::FDRI, packet_data));
ConfigurationRegister::FDRI, packet_data.frames.back().data));
// Finalization sequence
out_packets.emplace_back(
@ -730,7 +794,7 @@ void Configuration<UltraScalePlus>::createConfigurationPackage(
ConfigurationRegister::FDRI, {}));
out_packets.emplace_back(new ConfigurationPacket<ConfigurationRegister>(
TYPE2, ConfigurationPacket<ConfigurationRegister>::Opcode::Write,
ConfigurationRegister::FDRI, packet_data));
ConfigurationRegister::FDRI, packet_data.frames.back().data));
// Finalization sequence
out_packets.emplace_back(

View File

@ -14,6 +14,7 @@
#include <prjxray/xilinx/bitstream_writer.h>
#include <prjxray/xilinx/configuration.h>
DEFINE_bool(compressed, false, "Attempt to deduplicate bitstream frames");
DEFINE_string(part_name, "", "Name of the 7-series part");
DEFINE_string(part_file, "", "Definition file for target 7-series part");
DEFINE_string(
@ -62,7 +63,7 @@ struct Frames2BitWriter {
configuration_packet_data(
xilinx::Configuration<ArchType>::
createType2ConfigurationPacketData(
frames.getFrames(), part));
frames.getFrames(), part, FLAGS_compressed));
// Put together a configuration package
typename ArchType::ConfigurationPackage configuration_package;

View File

@ -182,7 +182,7 @@ def run(
fasm.parse_fasm_string('\n'.join(roi_j['required_features'])))
# Get required extra features for the part
required_features = db.get_required_fasm_features(part)
required_features = db.get_required_fasm_features(part.split('t')[0] + 't')
extra_features += list(
fasm.parse_fasm_string('\n'.join(required_features)))