From 28a2b5321101d0e862da7280e0a9c311a3c9d6cb Mon Sep 17 00:00:00 2001
From: Yu-Sheng Lin <johnjohnlys@gmail.com>
Date: Tue, 3 Feb 2026 23:50:06 +0800
Subject: [PATCH 1/8] Use new FST writer API

---
 CMakeLists.txt                              |    2 +-
 Makefile.in                                 |   10 +-
 include/fstcpp/.clang-format                |   22 +
 include/fstcpp/fstcpp.h                     |  237 +
 include/fstcpp/fstcpp_assertion.h           |  117 +
 include/fstcpp/fstcpp_file.h                |   83 +
 include/fstcpp/fstcpp_stream_write_helper.h |  367 +
 include/fstcpp/fstcpp_variable_info.cpp     |   34 +
 include/fstcpp/fstcpp_variable_info.h       |  808 +++
 include/fstcpp/fstcpp_writer.cpp            |  890 +++
 include/fstcpp/fstcpp_writer.h              |  445 ++
 include/gtkwave/fastlz.c                    |  549 --
 include/gtkwave/fastlz.h                    |  109 -
 include/gtkwave/fst_config.h                |   12 -
 include/gtkwave/fst_win_unistd.h            |   52 -
 include/gtkwave/fstapi.c                    | 7004 -------------------
 include/gtkwave/fstapi.h                    |  548 --
 include/gtkwave/lz4.c                       | 2789 --------
 include/gtkwave/lz4.h                       |  868 ---
 include/verilated.mk.in                     |    8 +
 include/verilated_fst_c.cpp                 |  150 +-
 include/verilated_fst_c.h                   |    8 +-
 test_regress/t/t_dist_copyright.py          |    2 +-
 test_regress/t/t_dist_cppstyle.py           |    2 +-
 24 files changed, 3097 insertions(+), 12019 deletions(-)
 create mode 100644 include/fstcpp/.clang-format
 create mode 100644 include/fstcpp/fstcpp.h
 create mode 100644 include/fstcpp/fstcpp_assertion.h
 create mode 100644 include/fstcpp/fstcpp_file.h
 create mode 100644 include/fstcpp/fstcpp_stream_write_helper.h
 create mode 100644 include/fstcpp/fstcpp_variable_info.cpp
 create mode 100644 include/fstcpp/fstcpp_variable_info.h
 create mode 100644 include/fstcpp/fstcpp_writer.cpp
 create mode 100644 include/fstcpp/fstcpp_writer.h
 delete mode 100644 include/gtkwave/fastlz.c
 delete mode 100644 include/gtkwave/fastlz.h
 delete mode 100644 include/gtkwave/fst_config.h
 delete mode 100644 include/gtkwave/fst_win_unistd.h
 delete mode 100644 include/gtkwave/fstapi.c
 delete mode 100644 include/gtkwave/fstapi.h
 delete mode 100644 include/gtkwave/lz4.c
 delete mode 100644 include/gtkwave/lz4.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 614e5cac9..a05e61f03 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -182,6 +182,6 @@ install(
     PATTERN "include/*.cpp"
     PATTERN "include/*.vlt"
     PATTERN "include/*.sv"
-    PATTERN "include/gtkwave/*.[chv]*"
+    PATTERN "include/fstcpp/*.[chv]*"
     PATTERN "include/vltstd/*.[chv]*"
 )
diff --git a/Makefile.in b/Makefile.in
index c77a1c338..a23c3b5a4 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -260,7 +260,7 @@ VL_INST_INC_SRCDIR_FILES = \
   include/*.[chv]* \
   include/*.vlt \
   include/*.sv \
-  include/gtkwave/*.[chv]* \
+  include/fstcpp/*.[chv]* \
   include/vltstd/*.[chv]* \
 
 VL_INST_DATA_SRCDIR_FILES = \
@@ -311,7 +311,7 @@ installman: $(VL_INST_MAN_FILES)
 	done
 
 installdata:
-	$(MKINSTALLDIRS) $(DESTDIR)$(pkgdatadir)/include/gtkwave
+	$(MKINSTALLDIRS) $(DESTDIR)$(pkgdatadir)/include/fstcpp
 	$(MKINSTALLDIRS) $(DESTDIR)$(pkgdatadir)/include/vltstd
 	for p in $(VL_INST_INC_BLDDIR_FILES) ; do \
 		$(INSTALL_DATA) $$p $(DESTDIR)$(pkgdatadir)/$$p; \
@@ -356,7 +356,7 @@ uninstall:
 	-rm $(DESTDIR)$(pkgdatadir)/verilator-config.cmake
 	-rm $(DESTDIR)$(pkgdatadir)/verilator-config-version.cmake
 	-rmdir $(DESTDIR)$(pkgdatadir)/bin
-	-rmdir $(DESTDIR)$(pkgdatadir)/include/gtkwave
+	-rmdir $(DESTDIR)$(pkgdatadir)/include/fstcpp
 	-rmdir $(DESTDIR)$(pkgdatadir)/include/vltstd
 	-rmdir $(DESTDIR)$(pkgdatadir)/include
 	-rmdir $(DESTDIR)$(pkgdatadir)/examples/make_hello_binary
@@ -414,7 +414,7 @@ CPPCHECK_FLAGS += --cppcheck-build-dir=$(CPPCHECK_CACHE)
 CPPCHECK_FLAGS += -DVL_DEBUG=1 -DVL_CPPCHECK=1 -DINFILTER_PIPE=1 -D__GNUC__=1
 CPPCHECK_FLAGS += -j$(CPPCHECK_JOBS)
 CPPCHECK_INC = -I$(srcdir)/include
-CPPCHECK_INC += -I$(srcdir)/include/gtkwave
+CPPCHECK_INC += -I$(srcdir)/include/fstcpp
 CPPCHECK_INC += -I$(srcdir)/include/vltstd
 CPPCHECK_INC += -I$(srcdir)/src/obj_dbg
 CPPCHECK_INC += -I$(srcdir)/src
@@ -678,7 +678,7 @@ FASTCOV_OPT += --dump-statistic
 FASTCOV_OPT += --exclude-glob
 FASTCOV_OPT += '/usr/*'
 FASTCOV_OPT += '*examples/*'
-FASTCOV_OPT += '*include/gtkwave/*'
+FASTCOV_OPT += '*include/fstcpp/*'
 FASTCOV_OPT += '*src/obj_dbg/*'
 FASTCOV_OPT += '*src/obj_opt/*.yy.cpp'
 FASTCOV_OPT += '*src/obj_opt/V3Ast*'
diff --git a/include/fstcpp/.clang-format b/include/fstcpp/.clang-format
new file mode 100644
index 000000000..85adf0428
--- /dev/null
+++ b/include/fstcpp/.clang-format
@@ -0,0 +1,22 @@
+---
+Language: Cpp
+BasedOnStyle: Google
+
+AccessModifierOffset: -4
+AlignAfterOpenBracket: BlockIndent
+AlignEscapedNewlines: Left
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortFunctionsOnASingleLine: Inline
+BinPackArguments: false
+BinPackParameters: false
+BreakBeforeBraces: Attach
+ColumnLimit: 100
+ContinuationIndentWidth: 4
+DerivePointerAlignment: false
+IncludeBlocks: Preserve
+IndentCaseLabels: false
+IndentPPDirectives: AfterHash
+IndentWidth: 4
+PointerAlignment: Right
+TabWidth: 4
+UseTab: ForContinuationAndIndentation
diff --git a/include/fstcpp/fstcpp.h b/include/fstcpp/fstcpp.h
new file mode 100644
index 000000000..6808b5139
--- /dev/null
+++ b/include/fstcpp/fstcpp.h
@@ -0,0 +1,237 @@
+// SPDX-FileCopyrightText: 2025-2026 Yu-Sheng Lin <johnjohnlys@gmail.com>
+// SPDX-FileCopyrightText: 2025-2026 Yoda Lee <lc85301@gmail.com>
+// SPDX-License-Identifier: MIT
+// Project: libfstwriter
+// Website: https://github.com/gtkwave/libfstwriter
+#pragma once
+// direct include
+// C system headers
+// C++ standard library headers
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <utility>
+// Other libraries' .h files.
+// Your project's .h files.
+
+// Remove these when we upgrade to C++20
+#pragma GCC diagnostic ignored "-Wpragmas"
+#pragma GCC diagnostic ignored "-Wc++17-attribute-extensions"
+#pragma GCC diagnostic ignored "-Wc++20-attribute-extensions"
+
+namespace fst {
+
+typedef uint32_t Handle;
+typedef uint32_t EnumHandle;
+using string_view_pair = std::pair<const char *, std::size_t>;
+
+[[maybe_unused]]
+static inline string_view_pair make_string_view_pair(const char *data) {
+	if (not data) {
+		return {nullptr, 0};
+	}
+	return {data, std::strlen(data)};
+}
+
+[[maybe_unused]]
+static inline string_view_pair make_string_view_pair(const char *data, std::size_t size) {
+	return {data, size};
+}
+
+enum class WriterPackType : uint8_t {
+	ZLIB = 0,    // not supported
+	FASTLZ = 1,  // not supported
+	LZ4 = 2,
+	// usually for testing, you should use eLz4
+	// This will turn off compression for geometry/hierarchy/wave data
+	NO_COMPRESSION = 3,
+};
+
+enum class FileType : uint8_t {
+	VERILOG = 0,
+	VHDL,
+	VERILOG_VHDL,
+};
+
+enum class EncodingType : uint8_t {
+	BINARY = 0,   // 1 bit per bit to represent 0,1
+	VERILOG = 1,  // 2 bits per bit to represent X,Z
+	VHDL = 2,     // 4 bits per bit to represent H,U,W,L,-,?
+};
+
+[[maybe_unused]]
+static inline constexpr unsigned bitPerEncodedBit(EncodingType type) {
+	return 1 << static_cast<uint8_t>(type);
+}
+
+[[maybe_unused]]
+static const char* kEncodedBitToCharTable = (
+	"01" // Binary
+	"xzhu" // Verilog
+	"wl-?    " // Vhdl (padded with ' ')
+);
+
+struct Hierarchy {
+	enum class ScopeType : uint8_t {
+		VCD_MODULE = 0,
+		VCD_TASK = 1,
+		VCD_FUNCTION = 2,
+		VCD_BEGIN = 3,
+		VCD_FORK = 4,
+		VCD_GENERATE = 5,
+		VCD_STRUCT = 6,
+		VCD_UNION = 7,
+		VCD_CLASS = 8,
+		VCD_INTERFACE = 9,
+		VCD_PACKAGE = 10,
+		VCD_PROGRAM = 11,
+		VHDL_ARCHITECTURE = 12,
+		VHDL_PROCEDURE = 13,
+		VHDL_FUNCTION = 14,
+		VHDL_RECORD = 15,
+		VHDL_PROCESS = 16,
+		VHDL_BLOCK = 17,
+		VHDL_FORGENERATE = 18,
+		VHDL_IFGENERATE = 19,
+		VHDL_GENERATE = 20,
+		VHDL_PACKAGE = 21,
+		SV_ARRAY = 22,
+	};
+
+	enum class ScopeControlType : uint8_t {
+		GEN_ATTR_BEGIN = 252,
+		GEN_ATTR_END = 253,
+		VCD_SCOPE = 254,
+		VCD_UPSCOPE = 255,
+	};
+
+	enum class VarType : uint8_t {
+		VCD_EVENT = 0,
+		VCD_INTEGER = 1,
+		VCD_PARAMETER = 2,
+		VCD_REAL = 3,
+		VCD_REAL_PARAMETER = 4,
+		VCD_REG = 5,
+		VCD_SUPPLY0 = 6,
+		VCD_SUPPLY1 = 7,
+		VCD_TIME = 8,
+		VCD_TRI = 9,
+		VCD_TRIAND = 10,
+		VCD_TRIOR = 11,
+		VCD_TRIREG = 12,
+		VCD_TRI0 = 13,
+		VCD_TRI1 = 14,
+		VCD_WAND = 15,
+		VCD_WIRE = 16,
+		VCD_WOR = 17,
+		VCD_PORT = 18,
+		VCD_SPARRAY = 19,
+		VCD_REALTIME = 20,
+		GEN_STRING = 21,
+		SV_BIT = 22,
+		SV_LOGIC = 23,
+		SV_INT = 24,
+		SV_SHORTINT = 25,
+		SV_LONGINT = 26,
+		SV_BYTE = 27,
+		SV_ENUM = 28,
+		SV_SHORTREAL = 29,
+	};
+
+	enum class VarDirection : uint8_t {
+		MIN = 0,
+
+		IMPLICIT = 0,
+		INPUT = 1,
+		OUTPUT = 2,
+		INOUT = 3,
+		BUFFER = 4,
+		LINKAGE = 5,
+
+		MAX = 5,
+	};
+
+	enum class AttrType : uint8_t {
+		MIN = 0,
+		MISC = 0,
+		ARRAY = 1,
+		ENUM = 2,
+		PACK = 3,
+		MAX = 3,
+	};
+
+	enum class AttrSubType : uint8_t {
+		// For AttrType::eMisc
+		MISC_MIN = 0,
+		MISC_COMMENT = 0,
+		MISC_ENVVAR = 1,
+		MISC_SUPVAR = 2,
+		MISC_PATHNAME = 3,
+		MISC_SOURCESTEM = 4,
+		MISC_SOURCEISTEM = 5,
+		MISC_VALUELIST = 6,
+		MISC_ENUMTABLE = 7,
+		MISC_UNKNOWN = 8,
+		MISC_MAX = 8,
+
+		// For AttrType::eArray
+		ARRAY_MIN = 0,
+		ARRAY_NONE = 0,
+		ARRAY_UNPACKED = 1,
+		ARRAY_PACKED = 2,
+		ARRAY_SPARSE = 3,
+		ARRAY_MAX = 3,
+
+		// For AttrType::eEnum
+		ENUM_MIN = 0,
+		ENUM_SV_INTEGER = 0,
+		ENUM_SV_BIT = 1,
+		ENUM_SV_LOGIC = 2,
+		ENUM_SV_INT = 3,
+		ENUM_SV_SHORTINT = 4,
+		ENUM_SV_LONGINT = 5,
+		ENUM_SV_BYTE = 6,
+		ENUM_SV_UNSIGNED_INTEGER = 7,
+		ENUM_SV_UNSIGNED_BIT = 8,
+		ENUM_SV_UNSIGNED_LOGIC = 9,
+		ENUM_SV_UNSIGNED_INT = 10,
+		ENUM_SV_UNSIGNED_SHORTINT = 11,
+		ENUM_SV_UNSIGNED_LONGINT = 12,
+		ENUM_SV_UNSIGNED_BYTE = 13,
+		ENUM_REG = 14,
+		ENUM_TIME = 15,
+		ENUM_MAX = 15,
+
+		// For AttrType::ePack
+		PACK_MIN = 0,
+		PACK_NONE = 0,
+		PACK_UNPACKED = 1,
+		PACK_PACKED = 2,
+		PACK_SPARSE = 3,
+		PACK_MAX = 3,
+	};
+
+	enum class SupplementalVarType : uint8_t {};
+
+	enum class SupplementalDataType : uint8_t {};
+};
+
+struct Header {
+	uint64_t start_time = uint64_t(-1);
+	uint64_t end_time = 0;
+	int64_t timezero = 0;
+	// Match the original fstapi.c. Just for information, not used in FST.
+	uint64_t writer_memory_use = 1ull << 27;
+	uint64_t num_scopes = 0;
+	uint64_t num_vars = 0;     // #CreateVar calls, including aliases
+	uint64_t num_handles = 0;  // #unique handles, excluding aliases, shall be <= num_vars
+	uint64_t num_value_change_data_blocks = 0;
+	char writer[128]{};
+	char date[26]{};
+	FileType filetype = FileType::VERILOG;
+	int8_t timescale = -9;
+};
+
+static constexpr uint64_t kInvalidTime = uint64_t(-1);
+
+}  // namespace fst
diff --git a/include/fstcpp/fstcpp_assertion.h b/include/fstcpp/fstcpp_assertion.h
new file mode 100644
index 000000000..b8567d116
--- /dev/null
+++ b/include/fstcpp/fstcpp_assertion.h
@@ -0,0 +1,117 @@
+// SPDX-FileCopyrightText: 2025 Yu-Sheng Lin <johnjohnlys@gmail.com>
+// SPDX-License-Identifier: MIT
+// Project: libfstwriter
+// Website: https://github.com/gtkwave/libfstwriter
+#pragma once
+// direct include
+// C system headers
+// C++ standard library headers
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+// Other libraries' .h files.
+// Your project's .h files.
+
+#define FST_CHECK(a)                    \
+	if (!(a)) [[unlikely]] {            \
+		std::ostringstream oss;         \
+		oss << "FST_CHECK failed: " #a; \
+		const auto e = oss.str();       \
+		std::cerr << e << std::endl;    \
+		throw std::runtime_error(e);    \
+	}
+
+#define FST_CHECK_EQ(a, b)                           \
+	if ((a) != (b)) [[unlikely]] {                   \
+		std::ostringstream oss;                      \
+		oss << "FST_CHECK_EQ failed: " #a " != " #b; \
+		oss << " (" << (a) << " vs. " << (b) << ")"; \
+		const auto e = oss.str();                    \
+		std::cerr << e << std::endl;                 \
+		throw std::runtime_error(e);                 \
+	}
+
+#define FST_CHECK_NE(a, b)                           \
+	if ((a) == (b)) [[unlikely]] {                   \
+		std::ostringstream oss;                      \
+		oss << "FST_CHECK_NE failed: " #a " == " #b; \
+		oss << " (" << (a) << " vs. " << (b) << ")"; \
+		const auto e = oss.str();                    \
+		std::cerr << e << std::endl;                 \
+		throw std::runtime_error(e);                 \
+	}
+
+#define FST_CHECK_GT(a, b)                           \
+	if ((a) <= (b)) [[unlikely]] {                   \
+		std::ostringstream oss;                      \
+		oss << "FST_CHECK_GT failed: " #a " <= " #b; \
+		oss << " (" << (a) << " vs. " << (b) << ")"; \
+		const auto e = oss.str();                    \
+		std::cerr << e << std::endl;                 \
+		throw std::runtime_error(e);                 \
+	}
+
+#define FST_CHECK_GE(a, b)                           \
+	if ((a) < (b)) [[unlikely]] {                    \
+		std::ostringstream oss;                      \
+		oss << "FST_CHECK_GE failed: " #a " < " #b;  \
+		oss << " (" << (a) << " vs. " << (b) << ")"; \
+		const auto e = oss.str();                    \
+		std::cerr << e << std::endl;                 \
+		throw std::runtime_error(e);                 \
+	}
+
+#define FST_CHECK_LT(a, b)                           \
+	if ((a) >= (b)) [[unlikely]] {                   \
+		std::ostringstream oss;                      \
+		oss << "FST_CHECK_LT failed: " #a " >= " #b; \
+		oss << " (" << (a) << " vs. " << (b) << ")"; \
+		const auto e = oss.str();                    \
+		std::cerr << e << std::endl;                 \
+		throw std::runtime_error(e);                 \
+	}
+
+#define FST_CHECK_LE(a, b)                           \
+	if ((a) > (b)) [[unlikely]] {                    \
+		std::ostringstream oss;                      \
+		oss << "FST_CHECK_LE failed: " #a " > " #b;  \
+		oss << " (" << (a) << " vs. " << (b) << ")"; \
+		const auto e = oss.str();                    \
+		std::cerr << e << std::endl;                 \
+		throw std::runtime_error(e);                 \
+	}
+
+// We turn on all DCHECKs to CHECKs temporarily for better safety.
+#if 1
+#	define FST_DCHECK(a) FST_CHECK(a)
+#	define FST_DCHECK_EQ(a, b) FST_CHECK_EQ(a, b)
+#	define FST_DCHECK_NE(a, b) FST_CHECK_NE(a, b)
+#	define FST_DCHECK_GT(a, b) FST_CHECK_GT(a, b)
+#	define FST_DCHECK_GE(a, b) FST_CHECK_GE(a, b)
+#	define FST_DCHECK_LT(a, b) FST_CHECK_LT(a, b)
+#	define FST_DCHECK_LE(a, b) FST_CHECK_LE(a, b)
+#else
+#	define FST_DCHECK(a)
+#	define FST_DCHECK_EQ(a, b)
+#	define FST_DCHECK_NE(a, b)
+#	define FST_DCHECK_GT(a, b)
+#	define FST_DCHECK_GE(a, b)
+#	define FST_DCHECK_LT(a, b)
+#	define FST_DCHECK_LE(a, b)
+#endif
+
+// Compatibility layer for unreachable code hint
+#if defined(__cplusplus) && __cplusplus >= 202302L
+// Prefer the standard library version if available
+#	include <utility>
+#	define FST_UNREACHABLE std::unreachable()
+#elif defined(__GNUC__) || defined(__clang__)
+// --- GCC / Clang ---
+#	define FST_UNREACHABLE __builtin_unreachable()
+#elif defined(_MSC_VER)
+// --- MSVC ---
+#	define FST_UNREACHABLE __assume(0)
+#else
+// --- Fallback ---
+#	define FST_UNREACHABLE std::abort()
+#endif
diff --git a/include/fstcpp/fstcpp_file.h b/include/fstcpp/fstcpp_file.h
new file mode 100644
index 000000000..a65a68d87
--- /dev/null
+++ b/include/fstcpp/fstcpp_file.h
@@ -0,0 +1,83 @@
+// SPDX-FileCopyrightText: 2025-2026 Yu-Sheng Lin <johnjohnlys@gmail.com>
+// SPDX-FileCopyrightText: 2025-2026 Yoda Lee <lc85301@gmail.com>
+// SPDX-License-Identifier: MIT
+// Project: libfstwriter
+// Website: https://github.com/gtkwave/libfstwriter
+#pragma once
+// direct include
+// C system headers
+// C++ standard library headers
+#include <cstdint>
+// Other libraries' .h files.
+// Your project's .h files.
+
+namespace fst {
+
+// Original block types from fstapi.h
+// FST_BL_HDR = 0,
+// FST_BL_VCDATA = 1,
+// FST_BL_BLACKOUT = 2,
+// FST_BL_GEOM = 3,
+// FST_BL_HIER = 4,
+// FST_BL_VCDATA_DYN_ALIAS = 5,
+// FST_BL_HIER_LZ4 = 6,
+// FST_BL_HIER_LZ4DUO = 7,
+// FST_BL_VCDATA_DYN_ALIAS2 = 8,
+// FST_BL_ZWRAPPER = 254,
+// FST_BL_SKIP = 255
+enum class BlockType : uint8_t {
+	HEADER = 0,
+	WAVE_DATA_VERSION1 = 1,  // not implemented
+	BLACKOUT = 2,
+	GEOMETRY = 3,
+	HIERARCHY_GZ_COMPRESSED = 4,  // not implemented
+	WAVE_DATA_VERSION2 = 5,       // not implemented
+	HIERARCHY_LZ4_COMPRESSED = 6,
+	HIERARCHY_LZ4_COMPRESSED_TWICE = 7,  // not implemented
+	WAVE_DATA_VERSION3 = 8,
+
+	ZWRAPPER = 254,  // not implemented
+	SKIP = 255       // not implemented
+};
+
+constexpr unsigned kSharedBlockHeaderSize = 1 /* BlockType */ + 8 /* size (u64) */;
+
+struct HeaderInfo {
+	struct Size {
+		static constexpr unsigned start_time = 0;
+		static constexpr unsigned end_time = 8;
+		static constexpr unsigned real_endianness = 8;
+		static constexpr unsigned writer_memory_use = 8;
+		static constexpr unsigned num_scopes = 8;
+		static constexpr unsigned num_vars = 8;
+		static constexpr unsigned num_handles = 8;
+		static constexpr unsigned num_wave_data_blocks = 8;
+		static constexpr unsigned timescale = 1;
+		static constexpr unsigned writer = 128;
+		static constexpr unsigned date = 26;
+		static constexpr unsigned reserved = 93;
+		static constexpr unsigned filetype = 1;
+		static constexpr unsigned timezero = 8;
+	};
+	struct Offset {
+		static constexpr unsigned start_time = 0;
+		static constexpr unsigned end_time = start_time + Size::end_time;
+		static constexpr unsigned real_endianness = end_time + Size::real_endianness;
+		static constexpr unsigned writer_memory_use = real_endianness + Size::writer_memory_use;
+		static constexpr unsigned num_scopes = writer_memory_use + Size::num_scopes;
+		static constexpr unsigned num_vars = num_scopes + Size::num_vars;
+		static constexpr unsigned num_handles = num_vars + Size::num_vars;
+		static constexpr unsigned num_wave_data_blocks = num_handles + Size::num_handles;
+		static constexpr unsigned timescale = num_wave_data_blocks + Size::num_wave_data_blocks;
+		static constexpr unsigned writer = timescale + Size::timescale;
+		static constexpr unsigned date = writer + Size::writer;
+		static constexpr unsigned reserved = date + Size::date;
+		static constexpr unsigned filetype = reserved + Size::reserved;
+		static constexpr unsigned timezero = filetype + Size::filetype;
+	};
+	static constexpr unsigned total_size = Offset::timezero + Size::timezero;
+	static constexpr double kEndianessMagicIdentifier = 2.7182818284590452354;
+	static_assert(total_size == 321, "Total size of HeaderInfo must be 321 bytes");
+};
+
+}  // namespace fst
diff --git a/include/fstcpp/fstcpp_stream_write_helper.h b/include/fstcpp/fstcpp_stream_write_helper.h
new file mode 100644
index 000000000..70621e0a1
--- /dev/null
+++ b/include/fstcpp/fstcpp_stream_write_helper.h
@@ -0,0 +1,367 @@
+// SPDX-FileCopyrightText: 2025-2026 Yu-Sheng Lin <johnjohnlys@gmail.com>
+// SPDX-FileCopyrightText: 2025-2026 Yoda Lee <lc85301@gmail.com>
+// SPDX-License-Identifier: MIT
+// Project: libfstwriter
+// Website: https://github.com/gtkwave/libfstwriter
+#pragma once
+// direct include
+// C system headers
+// C++ standard library headers
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include <vector>
+// Other libraries' .h files.
+// Your project's .h files.
+#include "fstcpp/fstcpp.h"
+#include "fstcpp/fstcpp_file.h"
+
+namespace fst {
+
+namespace platform {
+
+// For C++14
+// Can remove once C++23 is required
+#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+// clang-format off
+template <typename U> U to_big_endian(U u) { return u; }
+#else
+template<typename U> U to_big_endian(U u, std::integral_constant<size_t, 1>) { return u; }
+template<typename U> U to_big_endian(U u, std::integral_constant<size_t, 2>) { return __builtin_bswap16(u); }
+template<typename U> U to_big_endian(U u, std::integral_constant<size_t, 4>) { return __builtin_bswap32(u); }
+template<typename U> U to_big_endian(U u, std::integral_constant<size_t, 8>) { return __builtin_bswap64(u); }
+// clang-format on
+template <typename U>
+U to_big_endian(U u) {
+	return platform::to_big_endian(u, std::integral_constant<size_t, sizeof(U)>());
+}
+#endif
+
+}  // namespace platform
+
+struct StreamWriteHelper {
+	std::ostream *os;
+
+	StreamWriteHelper(std::ostream &os_) : os(&os_) {}
+	StreamWriteHelper(std::ostream *os_) : os(os_) {}
+
+	// Write the entire uint, big-endian
+	// We do not provide little-endian version since FST only uses big-endian
+	template <typename U>
+	StreamWriteHelper &writeUInt(U u) {
+		u = platform::to_big_endian(u);
+		os->write(reinterpret_cast<const char *>(&u), sizeof(u));
+		return *this;
+	}
+
+	// Write the uint, big-endian, left-aligned but only (bitwidth+7)/8 bytes
+	// This is a very special case for value changes
+	// For example, if the value is 10-bits (e.g. logic [9:0] in Verilog),
+	// then the first byte will be [9-:8], then {[1:0], 6'b0}.
+	template <typename U>
+	StreamWriteHelper &writeUIntPartialForValueChange(U u, size_t bitwidth) {
+		// Shift left to align the MSB to the MSB of the uint
+		u <<= sizeof(u) * 8 - bitwidth;
+		// Write the first (bitwidth+7)/8 bytes
+		u = platform::to_big_endian(u);
+		os->write(reinterpret_cast<const char *>(&u), (bitwidth + 7) / 8);
+		return *this;
+	}
+
+	StreamWriteHelper &writeLEB128(uint64_t v) {
+		// Just reuse the logic from fstapi.c, is there a better way?
+		uint64_t nxt;
+		unsigned char buf[10]; /* ceil(64/7) = 10 */
+		unsigned char *pnt = buf;
+		int len;
+		while ((nxt = v >> 7)) {
+			*(pnt++) = ((unsigned char)v) | 0x80;
+			v = nxt;
+		}
+		*(pnt++) = (unsigned char)v;
+		len = pnt - buf;
+		os->write(reinterpret_cast<const char *>(buf), len);
+		return *this;
+	}
+
+	StreamWriteHelper &writeLEB128Signed(int64_t v) {
+		// Just reuse the logic from fstapi.c, is there a better way?
+		unsigned char buf[15]; /* ceil(64/7) = 10 + sign byte padded way up */
+		unsigned char byt;
+		unsigned char *pnt = buf;
+		int more = 1;
+		int len;
+		do {
+			byt = v | 0x80;
+			v >>= 7;
+
+			if (((!v) && (!(byt & 0x40))) || ((v == -1) && (byt & 0x40))) {
+				more = 0;
+				byt &= 0x7f;
+			}
+
+			*(pnt++) = byt;
+		} while (more);
+		len = pnt - buf;
+		os->write(reinterpret_cast<const char *>(buf), len);
+		return *this;
+	}
+
+	template <typename F>
+	StreamWriteHelper &writeFloat(F f) {
+		// Always write in native endianness
+		os->write(reinterpret_cast<const char *>(&f), sizeof(f));
+		return *this;
+	}
+
+	StreamWriteHelper &writeBlockHeader(fst::BlockType block_type, uint64_t block_length) {
+		return (
+			this  //
+				->writeUInt(static_cast<uint8_t>(block_type))
+				.writeUInt(
+					block_length + 8
+				)  // The 8 is required by FST, which is the size of this uint64_t
+		);
+	}
+
+	// Write the string, non-null-terminated
+	StreamWriteHelper &writeString(const fst::string_view_pair str) {
+		os->write(str.first, str.second);
+		return *this;
+	}
+
+	// Write the string, null-terminated
+	StreamWriteHelper &writeString0(const fst::string_view_pair str) {
+		os->write(str.first, str.second).put('\0');
+		return *this;
+	}
+	StreamWriteHelper &writeString(const std::string &str) {
+		return writeString0(fst::make_string_view_pair(str.c_str(), str.size()));
+	}
+	StreamWriteHelper &writeString(const char *str) {
+		return writeString0(fst::make_string_view_pair(str));
+	}
+
+	StreamWriteHelper &write(const char *ptr, size_t size) {
+		os->write(ptr, size);
+		return *this;
+	}
+
+	StreamWriteHelper &write(const uint8_t *ptr, size_t size) {
+		os->write(reinterpret_cast<const char *>(ptr), size);
+		return *this;
+	}
+
+	StreamWriteHelper &seek(std::streamoff pos, std::ios_base::seekdir dir) {
+		os->seekp(pos, dir);
+		return *this;
+	}
+
+	StreamWriteHelper &fill(char fill_char, size_t size) {
+		if (size > 32) {
+			// optimize large fills
+			constexpr unsigned kChunkSize = 16;
+			char buf[kChunkSize];
+			std::memset(buf, fill_char, kChunkSize);
+			for (size_t i = 0; i < size / kChunkSize; ++i) {
+				os->write(buf, kChunkSize);
+			}
+			size %= kChunkSize;
+		}
+		for (size_t i = 0; i < size; ++i) {
+			os->put(fill_char);
+		}
+		return *this;
+	}
+
+	// Handy functions for writing variable length data, you can
+	// cascade multiple write() calls after RecordOffset(), then
+	// call DiffOffset() to get the total number of bytes written.
+
+	// (1)
+	// std::streamoff diff;
+	// h
+	// .beginOffset(diff)
+	// .write(...)
+	// ... do other stuff ...
+	// .endOffset(&diff); <-- diff will be set to the number of bytes written
+	// (2)
+	// std::streamoff pos, diff;
+	// h
+	// .beginOffset(pos)
+	// .write(...)
+	// ... do other stuff ...
+	// .endOffset(&diff, pos); <-- diff will be set to the number of bytes written
+
+	// The API uses pointer on purpose to prevent you pass (pos, diff) as arguments
+	// to endOffset(), which is a common mistake.
+
+	StreamWriteHelper &beginOffset(std::streamoff &pos) {
+		pos = os->tellp();
+		return *this;
+	}
+
+	StreamWriteHelper &endOffset(std::streamoff *diff) {
+		// diff shall store previous position before calling this function
+		*diff = os->tellp() - *diff;
+		return *this;
+	}
+
+	StreamWriteHelper &endOffset(std::streamoff *diff, std::streamoff pos) {
+		*diff = os->tellp() - pos;
+		return *this;
+	}
+};
+
+struct StreamVectorWriteHelper {
+	std::vector<uint8_t> &vec;
+
+	StreamVectorWriteHelper(std::vector<uint8_t> &vec_) : vec(vec_) {}
+
+	template <typename T>
+	StreamVectorWriteHelper &write(T u) {
+		const size_t s = sizeof(u);
+		vec.resize(vec.size() + s);
+		std::memcpy(vec.data() + vec.size() - s, &u, s);
+		return *this;
+	}
+
+	template <typename T>
+	StreamVectorWriteHelper &fill(T u, size_t count) {
+		const size_t s = sizeof(u) * count;
+		vec.resize(vec.size() + s);
+		for (size_t i = 0; i < count; ++i) {
+			std::memcpy(vec.data() + vec.size() - s + i * sizeof(u), &u, sizeof(u));
+		}
+		return *this;
+	}
+
+	template <typename T>
+	StreamVectorWriteHelper &write(T *u, size_t size) {
+		const size_t s = sizeof(u) * size;
+		vec.resize(vec.size() + s);
+		std::memcpy(vec.data() + vec.size() - s, u, s);
+		return *this;
+	}
+
+	template <typename E>
+	StreamVectorWriteHelper &writeU8Enum(E e) {
+		vec.push_back(static_cast<uint8_t>(e));
+		return *this;
+	}
+
+	// Write the entire uint, big-endian
+	// We do not provide little-endian version since FST only uses big-endian
+	template <typename U>
+	StreamVectorWriteHelper &writeUIntBE(U u) {
+		u = platform::to_big_endian(u);
+		const size_t s = sizeof(u);
+		vec.resize(vec.size() + s);
+		std::memcpy(vec.data() + vec.size() - s, &u, s);
+		return *this;
+	}
+
+	// Write the uint, big-endian, left-aligned but only (bitwidth+7)/8 bytes
+	// This is a very special case for value changes
+	// For example, if the value is 10-bits (e.g. logic [9:0] in Verilog),
+	// then the first byte will be [9-:8], then {[1:0], 6'b0}.
+	template <typename U>
+	StreamVectorWriteHelper &writeUIntPartialForValueChange(U u, size_t bitwidth) {
+		// Shift left to align the MSB to the MSB of the uint
+		u <<= sizeof(u) * 8 - bitwidth;
+		// Write the first (bitwidth+7)/8 bytes
+		u = platform::to_big_endian(u);
+		const size_t s = (bitwidth + 7) / 8;
+		vec.resize(vec.size() + s);
+		std::memcpy(vec.data() + vec.size() - s, &u, s);
+		return *this;
+	}
+
+	StreamVectorWriteHelper &writeLEB128(uint64_t v) {
+		// Just reuse the logic from fstapi.c, is there a better way?
+		uint64_t nxt;
+		unsigned char buf[10]; /* ceil(64/7) = 10 */
+		unsigned char *pnt = buf;
+		int len;
+		while ((nxt = v >> 7)) {
+			*(pnt++) = ((unsigned char)v) | 0x80;
+			v = nxt;
+		}
+		*(pnt++) = (unsigned char)v;
+		len = pnt - buf;
+
+		const size_t cur = vec.size();
+		vec.resize(cur + len);
+		std::memcpy(vec.data() + cur, buf, len);
+		return *this;
+	}
+
+	StreamVectorWriteHelper &writeLEB128Signed(int64_t v) {
+		// Just reuse the logic from fstapi.c, is there a better way?
+		unsigned char buf[15]; /* ceil(64/7) = 10 + sign byte padded way up */
+		unsigned char byt;
+		unsigned char *pnt = buf;
+		int more = 1;
+		int len;
+		do {
+			byt = v | 0x80;
+			v >>= 7;
+
+			if (((!v) && (!(byt & 0x40))) || ((v == -1) && (byt & 0x40))) {
+				more = 0;
+				byt &= 0x7f;
+			}
+
+			*(pnt++) = byt;
+		} while (more);
+		len = pnt - buf;
+
+		const size_t cur = vec.size();
+		vec.resize(cur + len);
+		std::memcpy(vec.data() + cur, buf, len);
+		return *this;
+	}
+
+	StreamVectorWriteHelper &writeBlockHeader(fst::BlockType block_type, uint64_t block_length) {
+		return (
+			this  //
+				->writeUIntBE(static_cast<uint8_t>(block_type))
+				.writeUIntBE(
+					block_length + 8
+				)  // The 8 is required by FST, which is the size of this uint64_t
+		);
+	}
+
+	// Write the string, non-null-terminated
+	StreamVectorWriteHelper &writeString(const fst::string_view_pair str) {
+		if (str.second != 0) {
+			const size_t len = str.second;
+			const size_t cur = vec.size();
+			vec.resize(cur + len);
+			std::memcpy(vec.data() + cur, str.first, len);
+		}
+		return *this;
+	}
+
+	// Write the string, null-terminated
+	StreamVectorWriteHelper &writeString0(const fst::string_view_pair str) {
+		if (str.second != 0) {
+			const size_t len = str.second;
+			const size_t cur = vec.size();
+			vec.resize(cur + len + 1);
+			std::memcpy(vec.data() + cur, str.first, len);
+			vec[cur + len] = '\0';
+		} else {
+			vec.push_back('\0');
+		}
+		return *this;
+	}
+	StreamVectorWriteHelper &writeString(const std::string &str) {
+		return writeString0(fst::make_string_view_pair(str.c_str(), str.size()));
+	}
+	StreamVectorWriteHelper &writeString(const char *str) {
+		return writeString0(fst::make_string_view_pair(str));
+	}
+};
+
+}  // namespace fst
diff --git a/include/fstcpp/fstcpp_variable_info.cpp b/include/fstcpp/fstcpp_variable_info.cpp
new file mode 100644
index 000000000..d748c2e29
--- /dev/null
+++ b/include/fstcpp/fstcpp_variable_info.cpp
@@ -0,0 +1,34 @@
+// SPDX-FileCopyrightText: 2026 Yu-Sheng Lin <johnjohnlys@gmail.com>
+// SPDX-License-Identifier: MIT
+// Project: libfstwriter
+// Website: https://github.com/gtkwave/libfstwriter
+// direct include
+#include "fstcpp/fstcpp_variable_info.h"
+// C system headers
+// C++ standard library headers
+#include <algorithm>
+// Other libraries' .h files.
+// Your project's .h files.
+
+namespace fst {
+
+// I don't know why I need to define them here, but StackOverflow says it
+constexpr uint64_t VariableInfo::kCapacityBaseShift;
+constexpr uint64_t VariableInfo::kCapacityBase;
+
+void VariableInfo::reallocate(uint64_t new_size) {
+	// Allocate new memory
+	const uint32_t new_capacity_log2 =
+		std::max(platform::clog2(new_size), kCapacityBaseShift) - kCapacityBaseShift;
+	uint8_t *new_data = new uint8_t[kCapacityBase << new_capacity_log2];
+	// Copy old data to new memory
+	if (data != nullptr) {
+		const uint64_t old_size = size();
+		std::copy_n(data, old_size, new_data);
+		delete[] data;
+	}
+	data = new_data;
+	capacity_log2(new_capacity_log2);
+}
+
+}  // namespace fst
diff --git a/include/fstcpp/fstcpp_variable_info.h b/include/fstcpp/fstcpp_variable_info.h
new file mode 100644
index 000000000..a6d64fc8e
--- /dev/null
+++ b/include/fstcpp/fstcpp_variable_info.h
@@ -0,0 +1,808 @@
+// SPDX-FileCopyrightText: 2025-2026 Yu-Sheng Lin <johnjohnlys@gmail.com>
+// SPDX-FileCopyrightText: 2025-2026 Yoda Lee <lc85301@gmail.com>
+// SPDX-License-Identifier: MIT
+// Project: libfstwriter
+// Website: https://github.com/gtkwave/libfstwriter
+#pragma once
+// direct include
+#include "fstcpp/fstcpp.h"
+// C system headers
+// C++ standard library headers
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+#include <vector>
+// Other libraries' .h files.
+// Your project's .h files.
+#include "fstcpp/fstcpp_assertion.h"
+#include "fstcpp/fstcpp_stream_write_helper.h"
+
+namespace fst {
+
+namespace platform {
+
+// Can be replaced with std::bit_width when C++20 is available
+inline uint64_t clog2(uint64_t x) {
+	return 64 - __builtin_clzll(x - 1);
+}
+
+inline constexpr uint32_t gen_mask_safe(unsigned width) {
+	// works even when width == 32
+	return ((uint32_t(1) << (width - 1)) << 1) - 1;
+}
+
+inline uint32_t read_field(const uint32_t src, unsigned width, unsigned offset) {
+	const uint32_t mask = gen_mask_safe(width);
+	return (src >> offset) & mask;
+}
+
+inline void write_field(uint32_t &dst, const uint32_t src, unsigned width, unsigned offset) {
+	const uint32_t mask = gen_mask_safe(width) << offset;
+	dst = (dst & ~mask) | ((src << offset) & mask);
+}
+
+}  // namespace platform
+
+class VariableInfo final {
+	static constexpr uint64_t kCapacityBaseShift = 5;
+	static constexpr uint64_t kCapacityBase = 1 << kCapacityBaseShift;
+
+	// To maximize cache efficiency, we compact the data members into 16 bytes.
+	// We make use of bitfields to store multiple pieces of information in a single integer.
+	// But standard does not guarantee the layout of bitfields (the `int x : N;` syntax),
+	// so we use helper functions to access bitfields.
+
+	// begin of data members
+	// 1. 8B pointer (assume 64-bit architecture), its size can be:
+	//   - 0 if data is nullptr
+	//   - `kCapacityBase * pow(2, capacity_log2)` if data is not nullptr
+	//   - If we want more bits, we can use the `kCapacityBaseShift` LSB for other purposes.
+	uint8_t *data = nullptr;
+	// 2. 4B size. The same as vector.size(), but we only need 32b.
+	uint32_t size_ = 0;
+	// 3. 4B misc. Highly compacted information for max cache efficiency.
+	//    - 6b capacity_log2
+	//    - 2b last_encoding_type
+	//    - 23b bitwidth
+	//    - 1b is_real
+
+	// Note: optimization possibility (not implemented)
+	//    - real is always 64-bit double, so we can use 24 bits to encode
+	//      is_real and bitwidth together, and bitwidth = (1<<24-1) is a special
+	//      value to indicate that the variable is a real.
+	//    - Currently bitwidth is whatever you pass to Writer::createVar.
+	//    - Not implemented since nobody needs 16M-bit over 8M-bit bitwidth IMO.
+	static constexpr uint32_t kIsRealWidth = 1;
+	static constexpr uint32_t kBitwidthWidth = 23;
+	static constexpr uint32_t kLastEncodingTypeWidth = 2;
+	static constexpr uint32_t kCapacityLog2Width = 6;
+
+	static constexpr uint32_t kIsRealOffset = 0;
+	static constexpr uint32_t kBitwidthOffset = kIsRealOffset + kIsRealWidth;
+	static constexpr uint32_t kLastEncodingTypeOffset = kBitwidthOffset + kBitwidthWidth;
+	static constexpr uint32_t kCapacityLog2Offset =
+		kLastEncodingTypeOffset + kLastEncodingTypeWidth;
+	uint32_t misc = 0;
+	// end of data members
+
+	void capacity_log2(uint32_t capacity_log2_) {
+		platform::write_field(misc, capacity_log2_, kCapacityLog2Width, kCapacityLog2Offset);
+	}
+	uint32_t capacity() const {
+		if (data == nullptr) {
+			return 0;
+		}
+		return kCapacityBase << platform::read_field(misc, kCapacityLog2Width, kCapacityLog2Offset);
+	}
+
+	inline bool need_reallocate(uint64_t new_size) const { return capacity() < new_size; }
+	// This function is cold, so we don't inline it
+	void reallocate(uint64_t new_size);
+
+	inline void size(uint64_t s) { size_ = s; }
+
+public:
+	static constexpr uint32_t kMaxSupportedBitwidth = 0x7fffff;
+	inline uint64_t size() const { return size_; }
+	inline uint32_t bitwidth() const {
+		return platform::read_field(misc, kBitwidthWidth, kBitwidthOffset);
+	}
+	inline bool is_real() const {
+		return bool(platform::read_field(misc, kIsRealWidth, kIsRealOffset));
+	}
+	inline void last_written_encode_type(EncodingType encoding_) {
+		platform::write_field(
+			misc, static_cast<uint32_t>(encoding_), kLastEncodingTypeWidth, kLastEncodingTypeOffset
+		);
+	}
+	inline EncodingType last_written_encode_type() const {
+		return static_cast<EncodingType>(
+			platform::read_field(misc, kLastEncodingTypeWidth, kLastEncodingTypeOffset)
+		);
+	}
+	uint64_t last_written_bytes() const;
+
+	template <typename Callable, typename... Args>
+	auto dispatchHelper(Callable &&callable, Args &&...args) const;
+
+	VariableInfo(uint32_t bitwidth_, bool is_real_ = false);
+	~VariableInfo() {
+		if (data_ptr() != nullptr) {
+			// don't delete data directly for better abstraction
+			// we might use the LSB of data in the future as LSB is
+			// always aligned to kCapacityBase
+			delete[] data_ptr();
+		}
+	}
+	VariableInfo(VariableInfo &&rhs) {
+		data = rhs.data;
+		rhs.data = nullptr;
+		misc = rhs.misc;
+		size_ = rhs.size_;
+		// rhs.misc = 0;
+	}
+
+	uint32_t emitValueChange(uint64_t current_time_index, const uint64_t val);
+	uint32_t emitValueChange(
+		uint64_t current_time_index, const uint32_t *val, EncodingType encoding
+	);
+	uint32_t emitValueChange(
+		uint64_t current_time_index, const uint64_t *val, EncodingType encoding
+	);
+
+	void keepOnlyTheLatestValue() {
+		const auto last_written_bytes_ = last_written_bytes();
+		const auto data_ptr_ = data_ptr();
+		std::copy_n(data_ptr_ + size() - last_written_bytes_, last_written_bytes_, data_ptr_);
+		size(last_written_bytes_);
+	}
+	void dumpInitialBits(std::vector<uint8_t> &buf) const;
+	void dumpValueChanges(std::vector<uint8_t> &buf) const;
+
+	// We only need to make this class compatible with vector
+	// delete copy constructor and assignment operator
+	VariableInfo(const VariableInfo &) = delete;
+	VariableInfo &operator=(const VariableInfo &) = delete;
+	VariableInfo &operator=(VariableInfo &&) = delete;
+
+	void resize(size_t new_size) {
+		if (need_reallocate(new_size)) {
+			reallocate(new_size);
+		}
+		size(new_size);
+	}
+	void add_size(size_t added_size) { resize(size() + added_size); }
+	uint8_t *data_ptr() { return data; }
+};
+static_assert(
+	sizeof(VariableInfo) != 12,
+	"We don't support 32-bit architecture, comment out the assertions and take the risk"
+);
+static_assert(sizeof(VariableInfo) == 16, "VariableInfo should be small");
+
+namespace detail {
+
+constexpr size_t kEmitTimeIndexAndEncodingSize = sizeof(uint64_t) + sizeof(fst::EncodingType);
+
+// EmitReaderHelper and EmitWriterHelper are very optimized for emit functions
+// User must ensure the pointer points to the valid memory region
+struct EmitReaderHelper {
+	const uint8_t *ptr;
+	EmitReaderHelper(const uint8_t *ptr_) : ptr(ptr_) {}
+
+	std::pair<uint64_t, fst::EncodingType> readTimeIndexAndEncoding() {
+		const auto time_index = read<uint64_t>();
+		const auto encoding = read<fst::EncodingType>();
+		return std::make_pair(time_index, encoding);
+	}
+
+	template <typename T>
+	T read() {
+		const size_t s = sizeof(T);
+		T u;
+		std::memcpy(&u, ptr, s);
+		ptr += s;
+		return u;
+	}
+
+	void skip(size_t count) { ptr += count; }
+
+	template <typename T>
+	T peek(size_t i = 0) {
+		const size_t s = sizeof(T);
+		T u;
+		std::memcpy(&u, ptr + i * s, s);
+		return u;
+	}
+};
+
+struct EmitWriterHelper {
+	uint8_t *ptr;
+
+	EmitWriterHelper(uint8_t *ptr_) : ptr(ptr_) {}
+
+	EmitWriterHelper &writeTimeIndexAndEncoding(uint64_t time_index, fst::EncodingType encoding) {
+		write(time_index);
+		write(encoding);
+		return *this;
+	}
+
+	template <typename T>
+	EmitWriterHelper &write(T u) {
+		const size_t s = sizeof(u);
+		std::memcpy(ptr, &u, s);
+		ptr += s;
+		return *this;
+	}
+
+	template <typename T>
+	EmitWriterHelper &fill(T u, size_t count) {
+		for (size_t i = 0; i < count; ++i) {
+			std::memcpy(ptr, &u, sizeof(u));
+			ptr += sizeof(u);
+		}
+		return *this;
+	}
+
+	template <typename T>
+	EmitWriterHelper &write(T *u, size_t size) {
+		for (size_t i = 0; i < size; ++i) {
+			std::memcpy(ptr, u + i, sizeof(T));
+			ptr += sizeof(T);
+		}
+		return *this;
+	}
+};
+
+class VariableInfoDouble {
+	VariableInfo &info;
+
+public:
+	VariableInfoDouble(VariableInfo &info_) : info(info_) {}
+
+public:
+	inline size_t computeBytesNeeded(EncodingType encoding) const {
+		(void)encoding;
+		return kEmitTimeIndexAndEncodingSize + sizeof(double);
+	}
+
+	inline EmitWriterHelper emitValueChangeCommonPart(
+		uint64_t current_time_index, EncodingType encoding
+	) {
+		if (current_time_index + 1 == 0) {
+			info.resize(0);
+		}
+		// For Double, value is always 8 bytes (sizeof(double) or uint64_t)
+		const size_t added_size = computeBytesNeeded(encoding);
+		const size_t old_size = info.size();
+		info.add_size(added_size);
+
+		EmitWriterHelper wh(info.data_ptr() + old_size);
+		wh.writeTimeIndexAndEncoding(current_time_index, encoding);
+		return wh;
+	}
+
+public:
+	void construct() {
+		const size_t needed = computeBytesNeeded(EncodingType::BINARY);
+		info.resize(needed);
+		EmitWriterHelper wh(info.data_ptr());
+		const double nan_val = std::numeric_limits<double>::quiet_NaN();
+		const uint64_t nan_val_u64 = *reinterpret_cast<const uint64_t *>(&nan_val);
+		wh.writeTimeIndexAndEncoding(0, EncodingType::BINARY).write<uint64_t>(nan_val_u64);
+	}
+
+	void emitValueChange(uint64_t current_time_index, const uint64_t val) {
+		auto wh = emitValueChangeCommonPart(current_time_index, EncodingType::BINARY);
+		std::cout << current_time_index << ": " << std::hex << val << std::endl;
+		// Note, do not use write<double> here since the uint64_t is
+		// already bit_cast'ed from double
+		wh.write<uint64_t>(val);
+	}
+
+	// Double variables should not use these array-based emitValueChange overloads.
+	// We implement them to satisfy the VairableInfo::dispatchHelper template instantiation.
+	void emitValueChange(uint64_t, const uint32_t *, EncodingType) {
+		throw std::runtime_error("emitValueChange(uint32_t*) not supported for Double");
+	}
+	void emitValueChange(uint64_t, const uint64_t *, EncodingType) {
+		throw std::runtime_error("emitValueChange(uint64_t*) not supported for Double");
+	}
+
+	void dumpInitialBits(std::vector<uint8_t> &buf) const {
+		FST_DCHECK_GT(info.size(), kEmitTimeIndexAndEncodingSize);
+		EmitReaderHelper rh(info.data_ptr());
+		StreamVectorWriteHelper wh(buf);
+		(void)rh.readTimeIndexAndEncoding();
+		auto v = rh.read<double>();
+		wh.write<double>(v);
+	}
+
+	void dumpValueChanges(std::vector<uint8_t> &buf) const {
+		StreamVectorWriteHelper wh(buf);
+		EmitReaderHelper rh(info.data_ptr());
+		const uint8_t *tail = info.data_ptr() + info.size();
+
+		bool first = true;
+		uint64_t prev_time_index = 0;
+
+		while (true) {
+			if (rh.ptr == tail) break;
+			FST_CHECK_GT(tail, rh.ptr);
+			const auto time_index = rh.read<uint64_t>();
+			const auto enc = rh.read<EncodingType>();
+			const auto num_byte = sizeof(double);
+			if (first) {
+				// Note: [0] is initial value, which is already dumped in dumpInitialBits()
+				first = false;
+			} else {
+				FST_CHECK(enc == EncodingType::BINARY);
+				const uint64_t delta_time_index = time_index - prev_time_index;
+				prev_time_index = time_index;
+				// Double shall be treated as non-binary
+				const bool has_non_binary = true;
+				wh  //
+					.writeLEB128((delta_time_index << 1) | has_non_binary)
+					.write<double>(rh.peek<double>());
+			}
+			rh.skip(num_byte);
+		}
+	}
+};
+
+template <typename T>
+class VariableInfoScalarInt {
+	VariableInfo &info;
+
+public:
+	VariableInfoScalarInt(VariableInfo &info_) : info(info_) {}
+
+public:
+	inline size_t computeBytesNeeded(EncodingType encoding) const {
+		return kEmitTimeIndexAndEncodingSize + sizeof(T) * bitPerEncodedBit(encoding);
+	}
+
+	// The returning address points to the first byte of the value
+	inline EmitWriterHelper emitValueChangeCommonPart(
+		uint64_t current_time_index, EncodingType encoding
+	) {
+		if (current_time_index + 1 == 0) {
+			// This is the first value change, we need to remove everything
+			// and then add the new value
+			info.resize(0);
+		}
+		const size_t added_size = computeBytesNeeded(encoding);
+		const size_t old_size = info.size();
+		info.add_size(added_size);
+		EmitWriterHelper wh(info.data_ptr() + old_size);
+		wh.writeTimeIndexAndEncoding(current_time_index, encoding);
+		return wh;
+	}
+
+public:
+	void construct() {
+		info.resize(computeBytesNeeded(EncodingType::VERILOG));
+		EmitWriterHelper wh(info.data_ptr());
+		wh.writeTimeIndexAndEncoding(0, EncodingType::VERILOG).write(T(0)).write(T(-1));
+	}
+
+	void emitValueChange(uint64_t current_time_index, const uint64_t val) {
+		auto wh = emitValueChangeCommonPart(current_time_index, EncodingType::BINARY);
+		wh.template write<T>(val);
+	}
+
+	void emitValueChange(uint64_t current_time_index, const uint32_t *val, EncodingType encoding) {
+		auto wh = emitValueChangeCommonPart(current_time_index, encoding);
+		for (unsigned i = 0; i < bitPerEncodedBit(encoding); ++i) {
+			// C++17: replace this with if constexpr
+			if (sizeof(T) == 8) {
+				uint64_t v = val[1];  // high bits
+				v <<= 32;
+				v |= val[0];  // low bits
+				wh.template write<uint64_t>(v);
+				val += 2;
+			} else {
+				wh.template write<T>(val[0]);
+				val += 1;
+			}
+		}
+	}
+
+	void emitValueChange(uint64_t current_time_index, const uint64_t *val, EncodingType encoding) {
+		auto wh = emitValueChangeCommonPart(current_time_index, encoding);
+		for (unsigned i = 0; i < bitPerEncodedBit(encoding); ++i) {
+			wh.template write<T>(val[i]);
+		}
+	}
+
+	void dumpInitialBits(std::vector<uint8_t> &buf) const {
+		// FST requires initial bits present
+		FST_DCHECK_GT(info.size(), kEmitTimeIndexAndEncodingSize);
+		EmitReaderHelper rh(info.data_ptr());
+		const auto time_index_enc = rh.readTimeIndexAndEncoding();
+		const auto enc = time_index_enc.second;
+		const auto bitwidth = info.bitwidth();
+
+		switch (enc) {
+		case EncodingType::BINARY: {
+			auto v0 = rh.read<T>();
+			for (unsigned i = bitwidth; i-- > 0;) {
+				const char c = ((v0 >> i) & T(1)) ? '1' : '0';
+				buf.push_back(c);
+			}
+			break;
+		}
+
+		case EncodingType::VERILOG: {
+			auto v0 = rh.read<T>();
+			auto v1 = rh.read<T>();
+			for (unsigned i = bitwidth; i-- > 0;) {
+				const T b1 = ((v1 >> i) & T(1));
+				const T b0 = ((v0 >> i) & T(1));
+				const char c = kEncodedBitToCharTable[(b1 << 1) | b0];
+				buf.push_back(c);
+			}
+			break;
+		}
+		// Not supporting VHDL now
+		// LCOV_EXCL_START
+		default:
+		case EncodingType::VHDL: {
+			auto v0 = rh.read<T>();
+			auto v1 = rh.read<T>();
+			auto v2 = rh.read<T>();
+			for (unsigned i = bitwidth; i-- > 0;) {
+				const T b2 = ((v2 >> i) & T(1));
+				const T b1 = ((v1 >> i) & T(1));
+				const T b0 = ((v0 >> i) & T(1));
+				const char c = kEncodedBitToCharTable[(b2 << 2) | (b1 << 1) | b0];
+				buf.push_back(c);
+			}
+			break;
+		}
+		}
+		// LCOV_EXCL_STOP
+	}
+
+	void dumpValueChanges(std::vector<uint8_t> &buf) const {
+		StreamVectorWriteHelper h(buf);
+		EmitReaderHelper rh(info.data_ptr());
+		const uint8_t *tail = info.data_ptr() + info.size();
+		const auto bitwidth = info.bitwidth();
+		bool first = true;
+		uint64_t prev_time_index = 0;
+		if (bitwidth == 1) {
+			while (true) {
+				if (rh.ptr == tail) {
+					break;
+				}
+				FST_DCHECK_GT(tail, rh.ptr);
+				const auto time_index = rh.read<uint64_t>();
+				const auto enc = rh.read<EncodingType>();
+				const auto num_element = bitPerEncodedBit(enc);
+				const auto num_byte = num_element * sizeof(T);
+				if (first) {
+					// Note: [0] is initial value, which is already dumped in dumpInitialBits()
+					first = false;
+				} else {
+					unsigned val = 0;
+					for (unsigned i = 0; i < num_element; ++i) {
+						val |= rh.peek<T>(i);
+					}
+					uint64_t delta_time_index = time_index - prev_time_index;
+					prev_time_index = time_index;
+					switch (val) {
+						// clang-format off
+					case 0: delta_time_index = (delta_time_index<<2) | (0<<1) | 0; break; // '0'
+					case 1: delta_time_index = (delta_time_index<<2) | (1<<1) | 0; break; // '1'
+					case 2: delta_time_index = (delta_time_index<<4) | (0<<1) | 1; break; // 'X'
+					case 3: delta_time_index = (delta_time_index<<4) | (1<<1) | 1; break; // 'Z'
+					// Not supporting VHDL now
+					// LCOV_EXCL_START
+					case 4: delta_time_index = (delta_time_index<<4) | (2<<1) | 1; break; // 'H'
+					case 5: delta_time_index = (delta_time_index<<4) | (3<<1) | 1; break; // 'U'
+					case 6: delta_time_index = (delta_time_index<<4) | (4<<1) | 1; break; // 'W'
+					case 7: delta_time_index = (delta_time_index<<4) | (5<<1) | 1; break; // 'L'
+					case 8: delta_time_index = (delta_time_index<<4) | (6<<1) | 1; break; // '-'
+					case 9: delta_time_index = (delta_time_index<<4) | (7<<1) | 1; break; // '?'
+					default: break;
+					// LCOV_EXCL_STOP
+						// clang-format on
+					}
+					h.writeLEB128(delta_time_index);
+				}
+				rh.skip(num_byte);
+			}
+		} else {
+			while (true) {
+				if (rh.ptr == tail) {
+					break;
+				}
+				FST_CHECK_GT(tail, rh.ptr);
+				const auto time_index = rh.read<uint64_t>();
+				const auto enc = rh.read<EncodingType>();
+				const auto num_element = bitPerEncodedBit(enc);
+				const auto num_byte = num_element * sizeof(T);
+				if (first) {
+					first = false;
+				} else {
+					FST_CHECK(enc == EncodingType::BINARY);  // TODO
+					const bool has_non_binary = enc != EncodingType::BINARY;
+					const uint64_t delta_time_index = time_index - prev_time_index;
+					prev_time_index = time_index;
+					h  //
+						.writeLEB128((delta_time_index << 1) | has_non_binary)
+						.writeUIntPartialForValueChange(rh.peek<T>(), bitwidth);
+				}
+				rh.skip(num_byte);
+			}
+		}
+	}
+};
+
+class VariableInfoLongInt {
+	VariableInfo &info;
+	unsigned num_words() const { return (info.bitwidth() + 63) / 64; }
+
+public:
+	VariableInfoLongInt(VariableInfo &info_) : info(info_) {}
+
+public:
+	inline size_t computeBytesNeeded(EncodingType encoding) const {
+		return (
+			kEmitTimeIndexAndEncodingSize +
+			num_words() * sizeof(uint64_t) * bitPerEncodedBit(encoding)
+		);
+	}
+
+	inline EmitWriterHelper emitValueChangeCommonPart(
+		uint64_t current_time_index, EncodingType encoding
+	) {
+		if (current_time_index + 1 == 0) {
+			info.resize(0);
+		}
+		const size_t added_size = computeBytesNeeded(encoding);
+		const size_t old_size = info.size();
+		info.add_size(added_size);
+
+		EmitWriterHelper wh(info.data_ptr() + old_size);
+		wh.writeTimeIndexAndEncoding(current_time_index, encoding);
+		return wh;
+	}
+
+public:
+	void construct() {
+		const size_t nw = num_words();
+		info.resize(computeBytesNeeded(EncodingType::VERILOG));
+		EmitWriterHelper wh(info.data_ptr());
+		wh  //
+			.writeTimeIndexAndEncoding(0, EncodingType::VERILOG)
+			.fill(uint64_t(0), nw)
+			.fill(uint64_t(-1), nw);
+	}
+
+	void emitValueChange(uint64_t current_time_index, const uint64_t val) {
+		const unsigned nw = num_words();
+		auto wh = emitValueChangeCommonPart(current_time_index, EncodingType::BINARY);
+		wh.write(val).fill(uint64_t(0), nw - 1);
+	}
+
+	void emitValueChange(uint64_t current_time_index, const uint32_t *val, EncodingType encoding) {
+		const unsigned nw32 = (info.bitwidth() + 31) / 32;
+		const unsigned bpb = bitPerEncodedBit(encoding);
+
+		auto wh = emitValueChangeCommonPart(current_time_index, encoding);
+
+		for (unsigned i = 0; i < bpb; ++i) {
+			for (unsigned j = 0; j < nw32 / 2; ++j) {
+				uint64_t v = val[1];  // high bits
+				v <<= 32;
+				v |= val[0];  // low bits
+				wh.write(v);
+				val += 2;
+			}
+			if (nw32 % 2 != 0) {
+				uint64_t v = val[0];
+				wh.write(v);
+				val += 1;
+			}
+		}
+	}
+
+	void emitValueChange(uint64_t current_time_index, const uint64_t *val, EncodingType encoding) {
+		const unsigned nw_encoded = num_words() * bitPerEncodedBit(encoding);
+		auto wh = emitValueChangeCommonPart(current_time_index, encoding);
+		wh.write(val, nw_encoded);
+	}
+
+	void dumpInitialBits(std::vector<uint8_t> &buf) const {
+		FST_DCHECK_GT(info.size(), kEmitTimeIndexAndEncodingSize);
+		EmitReaderHelper rh(info.data_ptr());
+		const auto time_index_enc = rh.readTimeIndexAndEncoding();
+		const auto enc = time_index_enc.second;
+		const unsigned nw = num_words();
+		switch (enc) {
+		case EncodingType::BINARY: {
+			for (unsigned word_index = nw; word_index-- > 0;) {
+				const uint64_t v0 = rh.peek<uint64_t>(word_index);
+				const unsigned num_bit =
+					(word_index * 64 + 64 > info.bitwidth()) ? (info.bitwidth() % 64) : 64;
+				for (unsigned bit_index = num_bit; bit_index-- > 0;) {
+					const char c = ((v0 >> bit_index) & uint64_t(1)) ? '1' : '0';
+					buf.push_back(c);
+				}
+			}
+			break;
+		}
+		case EncodingType::VERILOG: {
+			for (unsigned word_index = nw; word_index-- > 0;) {
+				const uint64_t v0 = rh.peek<uint64_t>(nw * 0 + word_index);
+				const uint64_t v1 = rh.peek<uint64_t>(nw * 1 + word_index);
+				const unsigned num_bit =
+					(word_index * 64 + 64 > info.bitwidth()) ? (info.bitwidth() % 64) : 64;
+				for (unsigned bit_index = num_bit; bit_index-- > 0;) {
+					const bool b0 = ((v0 >> bit_index) & uint64_t(1));
+					const bool b1 = ((v1 >> bit_index) & uint64_t(1));
+					const char c = kEncodedBitToCharTable[(b1 << 1) | b0];
+					buf.push_back(c);
+				}
+			}
+			break;
+		}
+		default:
+		case EncodingType::VHDL: {
+			// Not supporting VHDL now
+			// LCOV_EXCL_START
+			for (unsigned word_index = nw; word_index-- > 0;) {
+				const uint64_t v0 = rh.peek<uint64_t>(nw * 0 + word_index);
+				const uint64_t v1 = rh.peek<uint64_t>(nw * 1 + word_index);
+				const uint64_t v2 = rh.peek<uint64_t>(nw * 2 + word_index);
+				const unsigned num_bit =
+					(word_index * 64 + 64 > info.bitwidth()) ? (info.bitwidth() % 64) : 64;
+				for (unsigned bit_index = num_bit; bit_index-- > 0;) {
+					const bool b0 = ((v0 >> bit_index) & uint64_t(1));
+					const bool b1 = ((v1 >> bit_index) & uint64_t(1));
+					const bool b2 = ((v2 >> bit_index) & uint64_t(1));
+					const char c = kEncodedBitToCharTable[(b2 << 2) | (b1 << 1) | b0];
+					buf.push_back(c);
+				}
+			}
+			break;
+			// LCOV_EXCL_STOP
+		}
+			rh.skip(sizeof(uint64_t) * nw * bitPerEncodedBit(enc));
+		}
+	}
+
+	void dumpValueChanges(std::vector<uint8_t> &buf) const {
+		StreamVectorWriteHelper h(buf);
+		EmitReaderHelper rh(info.data_ptr());
+		const uint8_t *tail = info.data_ptr() + info.size();
+		const unsigned nw = num_words();
+		const unsigned bitwidth = info.bitwidth();  // Local copy for lambda capture/usage if needed
+
+		bool first = true;
+		uint64_t prev_time_index = 0;
+
+		while (true) {
+			if (rh.ptr == tail) break;
+			FST_DCHECK_GT(tail, rh.ptr);
+			const auto time_index = rh.read<uint64_t>();
+			const auto enc = rh.read<EncodingType>();
+			const auto num_element = bitPerEncodedBit(enc);
+			const auto num_byte = num_element * nw * sizeof(uint64_t);
+			if (first) {
+				// Note: [0] is initial value, which is already dumped in dumpInitialBits()
+				first = false;
+			} else {
+				FST_CHECK(enc == EncodingType::BINARY);  // TODO
+				const bool has_non_binary = enc != EncodingType::BINARY;
+				const uint64_t delta_time_index = time_index - prev_time_index;
+				prev_time_index = time_index;
+				h.writeLEB128((delta_time_index << 1) | has_non_binary);
+				if (bitwidth % 64 != 0) {
+					const unsigned remaining = bitwidth % 64;
+					uint64_t hi64 = rh.peek<uint64_t>(nw - 1);
+					// write from nw-1 to 1
+					for (unsigned j = nw - 1; j > 0; --j) {
+						uint64_t lo64 = rh.peek<uint64_t>(j - 1);
+						h.writeUIntBE((hi64 << (64 - remaining)) | (lo64 >> remaining));
+						hi64 = lo64;
+					}
+					// write 0
+					h.writeUIntPartialForValueChange(hi64, remaining);
+				} else {
+					// write from nw-1 to 0
+					for (unsigned j = nw; j-- > 0;) {
+						h.writeUIntBE(rh.peek<uint64_t>(j));
+					}
+				}
+			}
+			rh.skip(num_byte);
+		}
+	}
+};
+
+}  // namespace detail
+
+template <typename Callable, typename... Args>
+auto VariableInfo::dispatchHelper(Callable &&callable, Args &&...args) const {
+	const auto bitwidth = this->bitwidth();
+	const auto is_real = this->is_real();
+	if (not is_real) {
+		// Decision: the branch miss is too expensive for large design, so we only use 3 types of
+		// int
+		if (bitwidth <= 8) {
+			return callable(
+				detail::VariableInfoScalarInt<uint8_t>(const_cast<VariableInfo &>(*this)),
+				std::forward<Args>(args)...
+			);
+			// } else if (bitwidth <= 16) {
+			// 	return
+			// callable(detail::VariableInfoScalarInt<uint16_t>(const_cast<VariableInfo&>(*this)),
+			// std::forward<Args>(args)...); } else if (bitwidth <= 32) { 	return
+			// callable(detail::VariableInfoScalarInt<uint32_t>(const_cast<VariableInfo&>(*this)),
+			// std::forward<Args>(args)...);
+		} else if (bitwidth <= 64) {
+			return callable(
+				detail::VariableInfoScalarInt<uint64_t>(const_cast<VariableInfo &>(*this)),
+				std::forward<Args>(args)...
+			);
+		} else {
+			return callable(
+				detail::VariableInfoLongInt(const_cast<VariableInfo &>(*this)),
+				std::forward<Args>(args)...
+			);
+		}
+	}
+	return callable(
+		detail::VariableInfoDouble(const_cast<VariableInfo &>(*this)), std::forward<Args>(args)...
+	);
+}
+
+inline VariableInfo::VariableInfo(uint32_t bitwidth_, bool is_real_) {
+	platform::write_field(misc, bitwidth_, kBitwidthWidth, kBitwidthOffset);
+	platform::write_field(misc, is_real_, kIsRealWidth, kIsRealOffset);
+	dispatchHelper([](auto obj) { obj.construct(); });
+	last_written_encode_type(EncodingType::BINARY);
+}
+
+inline uint32_t VariableInfo::emitValueChange(uint64_t current_time_index, const uint64_t val) {
+	const auto old_size = size();
+	dispatchHelper([=](auto obj) { obj.emitValueChange(current_time_index, val); });
+	last_written_encode_type(EncodingType::BINARY);
+	return size() - old_size;
+}
+
+inline uint32_t VariableInfo::emitValueChange(
+	uint64_t current_time_index, const uint32_t *val, EncodingType encoding
+) {
+	const auto old_size = size();
+	dispatchHelper([=](auto obj) { obj.emitValueChange(current_time_index, val, encoding); });
+	last_written_encode_type(encoding);
+	return size() - old_size;
+}
+
+inline uint32_t VariableInfo::emitValueChange(
+	uint64_t current_time_index, const uint64_t *val, EncodingType encoding
+) {
+	const auto old_size = size();
+	dispatchHelper([=](auto obj) { obj.emitValueChange(current_time_index, val, encoding); });
+	last_written_encode_type(encoding);
+	return size() - old_size;
+}
+
+inline void VariableInfo::dumpInitialBits(std::vector<uint8_t> &buf) const {
+	dispatchHelper([&](auto obj) { obj.dumpInitialBits(buf); });
+}
+
+inline void VariableInfo::dumpValueChanges(std::vector<uint8_t> &buf) const {
+	dispatchHelper([&](auto obj) { obj.dumpValueChanges(buf); });
+}
+
+inline uint64_t VariableInfo::last_written_bytes() const {
+	const auto encoding = last_written_encode_type();
+	return dispatchHelper([encoding](auto obj) { return obj.computeBytesNeeded(encoding); });
+}
+
+}  // namespace fst
diff --git a/include/fstcpp/fstcpp_writer.cpp b/include/fstcpp/fstcpp_writer.cpp
new file mode 100644
index 000000000..6df611869
--- /dev/null
+++ b/include/fstcpp/fstcpp_writer.cpp
@@ -0,0 +1,890 @@
+// SPDX-FileCopyrightText: 2025-2026 Yu-Sheng Lin <johnjohnlys@gmail.com>
+// SPDX-FileCopyrightText: 2025-2026 Yoda Lee <lc85301@gmail.com>
+// SPDX-License-Identifier: MIT
+// Project: libfstwriter
+// Website: https://github.com/gtkwave/libfstwriter
+// direct include
+#include "fstcpp/fstcpp_writer.h"
+// C system headers
+// C++ standard library headers
+#include <cstdio>
+#include <cstring>
+#include <iostream>
+#include <numeric>
+#include <stdexcept>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+// Other libraries' .h files.
+#include <lz4.h>
+#include <zlib.h>
+// Your project's .h files.
+#include "fstcpp/fstcpp.h"
+#include "fstcpp/fstcpp_assertion.h"
+#include "fstcpp/fstcpp_stream_write_helper.h"
+#include "fstcpp/fstcpp_variable_info.h"
+
+using namespace std;
+
+// AT(x) is used to access vector at index x, and it will throw exception if out of bound
+// in debug mode, but in release mode, it will not throw exception
+// Usually you should only need AT(x) only at very hot code path.
+#ifndef NDEBUG
+#	define AT(x) .at(x)
+#else
+#	define AT(x) [x]
+#endif
+
+namespace fst {
+
+namespace detail {
+
+void BlackoutData::emitDumpActive(uint64_t current_timestamp, bool enable) {
+	StreamVectorWriteHelper h(buffer);
+	h.writeUIntBE<uint8_t>(enable).writeLEB128(current_timestamp - previous_timestamp);
+	++count;
+}
+
+ValueChangeData::ValueChangeData() {
+	variable_infos.reserve(1024);
+}
+
+ValueChangeData::~ValueChangeData() = default;
+
+void ValueChangeData::keepOnlyTheLatestValue() {
+	for (auto &v : variable_infos) {
+		v.keepOnlyTheLatestValue();
+	}
+	FST_CHECK(not timestamps.empty());
+	timestamps.front() = timestamps.back();
+	timestamps.resize(1);
+}
+
+}  // namespace detail
+
+void Writer::open(const string_view_pair name) {
+	FST_CHECK(not main_fst_file_.is_open());
+	main_fst_file_.open(string(name.first, name.second), ios::binary);
+	// reserve space for header, we will write it at Close(), append geometry and hierarchy at the
+	// end wave data will be flushed in between
+	main_fst_file_.seekp(kSharedBlockHeaderSize + HeaderInfo::total_size, ios_base::beg);
+}
+
+void Writer::close() {
+	if (not main_fst_file_.is_open()) return;
+	// Finalize header fields
+	if (header_.date[0] == '\0') {
+		// date is not set yet, set to the current date
+		setDate();
+	}
+	if (header_.start_time == kInvalidTime) {
+		header_.start_time = 0;
+	}
+	flushValueChangeData_(value_change_data_, main_fst_file_);
+	appendGeometry_(main_fst_file_);
+	appendHierarchy_(main_fst_file_);
+	appendBlackout_(main_fst_file_);
+	// Note: write header seek to 0, so we need to do
+	// this after all append operations
+	writeHeader_(header_, main_fst_file_);
+	main_fst_file_.close();
+}
+
+/////////////////////////////////////////
+// Hierarchy / variable API
+/////////////////////////////////////////
+void Writer::setScope(
+	Hierarchy::ScopeType scopetype,
+	const string_view_pair scopename,
+	const string_view_pair scopecomp
+) {
+	FST_CHECK(not hierarchy_finalized_);
+	StreamVectorWriteHelper h(hierarchy_buffer_);
+	h  //
+		.writeU8Enum(Hierarchy::ScopeControlType::VCD_SCOPE)
+		.writeU8Enum(scopetype)
+		.writeString0(scopename)
+		.writeString0(scopecomp);
+	++header_.num_scopes;
+}
+
+void Writer::upscope() {
+	FST_CHECK(not hierarchy_finalized_);
+	// TODO: shall we inline it?
+	StreamVectorWriteHelper h(hierarchy_buffer_);
+	h.writeU8Enum(Hierarchy::ScopeControlType::VCD_UPSCOPE);
+}
+
+Handle Writer::createVar(
+	Hierarchy::VarType vartype,
+	Hierarchy::VarDirection vardir,
+	uint32_t bitwidth,
+	const string_view_pair name,
+	Handle alias_handle
+) {
+	FST_CHECK(not hierarchy_finalized_);
+	FST_CHECK_LE(bitwidth, VariableInfo::kMaxSupportedBitwidth);
+	// write hierarchy entry: type, direction, name, length, alias
+	StreamVectorWriteHelper h(hierarchy_buffer_);
+
+	// determine real/string handling like original C implementation
+	bool is_real = false;
+	switch (vartype) {
+	case Hierarchy::VarType::VCD_REAL:
+	case Hierarchy::VarType::VCD_REAL_PARAMETER:
+	case Hierarchy::VarType::VCD_REALTIME:
+	case Hierarchy::VarType::SV_SHORTREAL:
+		is_real = true;
+		bitwidth = 8;  // recast to double size
+		break;
+	case Hierarchy::VarType::GEN_STRING:
+		bitwidth = 0;
+		break;
+	default:
+		break;
+	}
+	if (alias_handle > header_.num_handles) {
+		// sanitize
+		alias_handle = 0;
+	}
+	const bool is_alias = alias_handle != 0;
+	// This counter is incremented whether alias or non-alias
+	++header_.num_vars;
+	if (not is_alias) {
+		// This counter is incremented only for non-alias variables
+		++header_.num_handles;
+		alias_handle = header_.num_handles;
+	}
+
+	h  //
+		.writeU8Enum(vartype)
+		.writeU8Enum(vardir)
+		.writeString0(name)
+		.writeLEB128(bitwidth)
+		.writeLEB128(is_alias ? alias_handle : 0);
+
+	// If alias_handle == 0, we must allocate geom/valpos/curval entries and create a new handle
+	if (not is_alias) {
+		StreamVectorWriteHelper g(geometry_buffer_);
+		// I don't know why the original C implementation encode bitwidth again
+		const uint32_t geom_len = (bitwidth == 0 ? uint32_t(-1) : is_real ? uint32_t(0) : bitwidth);
+		g.writeLEB128(geom_len);
+		value_change_data_.variable_infos.emplace_back(bitwidth, is_real);
+	}
+
+	return alias_handle;
+}
+
+// LCOV_EXCL_START
+Handle Writer::createVar2(
+	Hierarchy::VarType vartype,
+	Hierarchy::VarDirection vardir,
+	uint32_t bitwidth,
+	const string_view_pair name,
+	Handle alias_handle,
+	const string_view_pair type,
+	Hierarchy::SupplementalVarType svt,
+	Hierarchy::SupplementalDataType sdt
+) {
+	FST_CHECK(not hierarchy_finalized_);
+	(void)vartype;
+	(void)vardir;
+	(void)bitwidth;
+	(void)name;
+	(void)alias_handle;
+	(void)type;
+	(void)svt;
+	(void)sdt;
+	throw runtime_error("TODO");
+	return 0;
+}
+// LCOV_EXCL_STOP
+
+/////////////////////////////////////////
+// Waveform API
+/////////////////////////////////////////
+void Writer::emitTimeChange(uint64_t tim) {
+	finalizeHierarchy_();
+
+	if (value_change_data_usage_ > value_change_data_flush_threshold_ or flush_pending_) {
+		flushValueChangeData_(value_change_data_, main_fst_file_);
+	}
+
+	// Update header
+	header_.start_time = min(header_.start_time, tim);
+	header_.end_time = tim;
+
+	if (value_change_data_.timestamps.empty() or value_change_data_.timestamps.back() != tim) {
+		value_change_data_.timestamps.push_back(tim);
+	}
+}
+
+void Writer::emitDumpActive(bool enable) {
+	// TODO: this API is not fully understood, need to check
+	FST_CHECK(not value_change_data_.timestamps.empty());
+	blackout_data_.emitDumpActive(value_change_data_.timestamps.back(), enable);
+}
+
+template <typename T, typename... U>
+uint64_t emitValueHelperStaticDispatch_(
+	VariableInfo *var_info, const uint64_t time_index, U &&...val
+) {
+	return static_cast<T *>(var_info)->emitValueChange(time_index, std::forward<U>(val)...);
+}
+
+template <typename... T>
+void Writer::emitValueChangeHelper_(Handle handle, T &&...val) {
+	// Let data prefetch go first
+	auto &var_info = value_change_data_.variable_infos AT(handle - 1);
+	__builtin_prefetch(var_info.data_ptr() + var_info.size() - 1, 1, 0);
+
+	finalizeHierarchy_();
+
+	// Original implementation: virtual, but vtable is too costly, we switch to if-else static
+	// dispatch
+	value_change_data_usage_ +=
+		var_info.emitValueChange(value_change_data_.timestamps.size() - 1, std::forward<T>(val)...);
+}
+
+void Writer::emitValueChange(Handle handle, const uint32_t *val, EncodingType encoding) {
+	emitValueChangeHelper_(handle, val, encoding);
+}
+
+void Writer::emitValueChange(Handle handle, const uint64_t *val, EncodingType encoding) {
+	emitValueChangeHelper_(handle, val, encoding);
+}
+
+void Writer::emitValueChange(Handle handle, uint64_t val) {
+	emitValueChangeHelper_(handle, val);
+}
+
+void Writer::emitValueChange(Handle handle, const char *val) {
+	finalizeHierarchy_();
+	auto &var_info = value_change_data_.variable_infos AT(handle - 1);
+
+	// For double handles, const char* is interpreted as a double* (8B)
+	// This double shall be written out as raw IEEE 754 double
+	// So we just reinterpret_cast it to uint64_t and emit it
+	if (var_info.is_real()) {
+		emitValueChange(handle, *reinterpret_cast<const uint64_t *>(val));
+		return;
+	}
+
+	// For normal integer handles, const char* is "01xz..." (1B per bit)
+	const uint32_t bitwidth = var_info.bitwidth();
+	FST_DCHECK_NE(bitwidth, 0);
+
+	val += bitwidth;
+	thread_local static vector<uint64_t> packed_value_buffer;
+	const unsigned num_words = (bitwidth + 63) / 64;
+	packed_value_buffer.assign(num_words, 0);
+	for (unsigned i = 0; i < num_words; ++i) {
+		const char *start = val - std::min((i + 1) * 64, bitwidth);
+		const char *end = val - 64 * i;
+		packed_value_buffer[i] = 0;
+		for (const char *p = start; p < end; ++p) {
+			// No checking for invalid characters, follow original C implementation
+			packed_value_buffer[i] <<= 1;
+			packed_value_buffer[i] |= (*p - '0');
+		}
+	}
+
+	if (bitwidth <= 64) {
+		emitValueChange(handle, packed_value_buffer.front());
+	} else {
+		emitValueChange(handle, packed_value_buffer.data(), EncodingType::BINARY);
+	}
+}
+
+/////////////////////////////////////////
+// File flushing functions
+/////////////////////////////////////////
+void Writer::writeHeader_(const Header &header, ostream &os) {
+	StreamWriteHelper h(os);
+	static char kDefaultWriterName[sizeof(header.writer)] = "fstcppWriter";
+	const char *writer_name = header.writer[0] == '\0' ? kDefaultWriterName : header.writer;
+
+	// Actual write
+	h  //
+		.seek(streamoff(0), ios_base::beg)
+		.writeBlockHeader(BlockType::HEADER, HeaderInfo::total_size)
+		.writeUInt(header.start_time)
+		.writeUInt(header.end_time)
+		.writeFloat(HeaderInfo::kEndianessMagicIdentifier)
+		.writeUInt(header.writer_memory_use)
+		.writeUInt(header.num_scopes)
+		.writeUInt(header.num_vars)
+		.writeUInt(header.num_handles)
+		.writeUInt(header.num_value_change_data_blocks)
+		.writeUInt(header.timescale)
+		.write(writer_name, sizeof(header.writer))
+		.write(header.date, sizeof(header.date))
+		.fill('\0', HeaderInfo::Size::reserved)
+		.writeUInt(static_cast<uint8_t>(header.filetype))
+		.writeUInt(header.timezero);
+
+	FST_DCHECK_EQ(os.tellp(), HeaderInfo::total_size + kSharedBlockHeaderSize);
+};
+
+namespace {  // compression helpers
+
+// These API pass compressed_data to avoid frequent reallocations
+void compressUsingLz4(const vector<uint8_t> &uncompressed_data, vector<uint8_t> &compressed_data) {
+	const int uncompressed_size = uncompressed_data.size();
+	const int compressed_bound = LZ4_compressBound(uncompressed_size);
+	compressed_data.resize(compressed_bound);
+	const int compressed_size = LZ4_compress_default(
+		reinterpret_cast<const char *>(uncompressed_data.data()),
+		reinterpret_cast<char *>(compressed_data.data()),
+		uncompressed_size,
+		compressed_bound
+	);
+	compressed_data.resize(compressed_size);
+}
+
+void compressUsingZlib(
+	const vector<uint8_t> &uncompressed_data, vector<uint8_t> &compressed_data, int level
+) {
+	// compress using zlib
+	const uLong uncompressed_size = uncompressed_data.size();
+	uLongf compressed_bound = compressBound(uncompressed_size);
+	compressed_data.resize(compressed_bound);
+	const auto z_status = compress2(
+		reinterpret_cast<Bytef *>(compressed_data.data()),
+		&compressed_bound,
+		reinterpret_cast<const Bytef *>(uncompressed_data.data()),
+		uncompressed_size,
+		level
+	);
+	if (z_status != Z_OK) {
+		throw runtime_error(
+			"Failed to compress data with zlib, error code: " + to_string(z_status)
+		);
+	}
+	compressed_data.resize(compressed_bound);
+}
+
+pair<const uint8_t *, size_t> selectSmaller(
+	const vector<uint8_t> &compressed_data, const vector<uint8_t> &uncompressed_data
+) {
+	pair<const uint8_t *, size_t> ret;
+	if (compressed_data.size() < uncompressed_data.size()) {
+		ret.first = compressed_data.data();
+		ret.second = compressed_data.size();
+	} else {
+		ret.first = uncompressed_data.data();
+		ret.second = uncompressed_data.size();
+	}
+	return ret;
+}
+
+}  // namespace
+
+// AppendHierarchy_ and AppendGeometry_ shares a very similar structure
+// But they are slightly different in the original C implementation...
+void Writer::appendGeometry_(ostream &os) {
+	if (geometry_buffer_.empty()) {
+		// skip the geometry block if there is no data
+		return;
+	}
+	vector<uint8_t> geometry_buffer_compressed_;
+	compressUsingZlib(geometry_buffer_, geometry_buffer_compressed_, 9);
+	// TODO: Replace with structured binding in C++17
+	const auto selected_pair = selectSmaller(geometry_buffer_compressed_, geometry_buffer_);
+	const auto selected_data = selected_pair.first;
+	const auto selected_size = selected_pair.second;
+
+	StreamWriteHelper h(os);
+	h  //
+		.seek(0, ios_base::end)
+		// 16 is for the uncompressed_size and header_.num_handles
+		.writeBlockHeader(BlockType::GEOMETRY, selected_size + 16)
+		.writeUInt<uint64_t>(geometry_buffer_.size())
+		// I don't know why the original C implementation write num_handles again here
+		// but we have to follow it
+		.writeUInt(header_.num_handles)
+		.write(selected_data, selected_size);
+}
+
+void Writer::appendHierarchy_(ostream &os) {
+	if (hierarchy_buffer_.empty()) {
+		// skip the hierarchy block if there is no data
+		return;
+	}
+
+	// compress hierarchy_buffer_ using LZ4.
+	const int compressed_bound = LZ4_compressBound(hierarchy_buffer_.size());
+	vector<uint8_t> hierarchy_buffer_compressed_(compressed_bound);
+	const int compressed_size = LZ4_compress_default(
+		reinterpret_cast<const char *>(hierarchy_buffer_.data()),
+		reinterpret_cast<char *>(hierarchy_buffer_compressed_.data()),
+		hierarchy_buffer_.size(),
+		compressed_bound
+	);
+
+	StreamWriteHelper h(os);
+	h  //
+		.seek(0, ios_base::end)
+		// +16 is for the uncompressed_size
+		.writeBlockHeader(BlockType::HIERARCHY_LZ4_COMPRESSED, compressed_size + 8)
+		.writeUInt<uint64_t>(hierarchy_buffer_.size())
+		.write(hierarchy_buffer_compressed_.data(), compressed_size);
+}
+
+void Writer::appendBlackout_(ostream &os) {
+	if (blackout_data_.count == 0) {
+		// skip the blackout block if there is no data
+		return;
+	}
+	const vector<uint8_t> &blackout_data = blackout_data_.buffer;
+	const auto begin_of_blackout_block = os.tellp();
+	StreamWriteHelper h(os);
+	h  //
+	   // skip the block header
+		.seek(kSharedBlockHeaderSize, ios_base::cur)
+		// Note: we cannot know the size beforehand since this length is LEB128 encoded
+		.writeLEB128(blackout_data.size())
+		.write(blackout_data.data(), blackout_data.size());
+
+	const auto size_of_blackout_block = os.tellp() - begin_of_blackout_block;
+	h  //
+	   // go back to the beginning of the block
+		.seek(begin_of_blackout_block, ios_base::beg)
+		// and write the block header
+		.writeBlockHeader(BlockType::BLACKOUT, size_of_blackout_block - kSharedBlockHeaderSize);
+}
+
+void detail::ValueChangeData::writeInitialBits(vector<uint8_t> &os) const {
+	// Build vc_bits_data by concatenating each variable's initial bits as documented.
+	// We will not compress for now; just generate the raw bytes and print summary to stdout.
+	for (size_t i = 0; i < variable_infos.size(); ++i) {
+		auto &vref = variable_infos[i];
+		vref.dumpInitialBits(os);
+	}
+}
+
+vector<vector<uint8_t>> detail::ValueChangeData::computeWaveData() const {
+	const size_t N = variable_infos.size();
+	vector<vector<uint8_t>> data(N);
+	for (size_t i = 0; i < N; ++i) {
+		variable_infos[i].dumpValueChanges(data[i]);
+	}
+	return data;
+}
+
+vector<int64_t> detail::ValueChangeData::uniquifyWaveData(vector<vector<uint8_t>> &data) {
+	// After this function, positions[i] is:
+	//  - = 0: If data[i] is unique (first occurrence)
+	//  - < 0: If data[i] is a duplicate, encoded as -(original_index + 1)
+	vector<int64_t> positions(data.size(), 0);
+	struct MyHash {
+		size_t operator()(const vector<uint8_t> *vec) const {
+			size_t seed = 0;
+			for (auto v : *vec) {
+				seed ^= v + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+			}
+			return seed;
+		}
+	};
+	struct MyEqual {
+		bool operator()(const vector<uint8_t> *a, const vector<uint8_t> *b) const {
+			return *a == *b;
+		}
+	};
+	unordered_map<const vector<uint8_t> *, int64_t, MyHash, MyEqual> data_map;
+	for (size_t i = 0; i < data.size(); ++i) {
+		if (data[i].empty()) {
+			continue;
+		}
+		// insert vec->i to data_map if not exists
+		auto p = data_map.emplace(&data[i], static_cast<int64_t>(i));
+		auto it = p.first;
+		auto inserted = p.second;
+
+		if (not inserted) {
+			// duplicated wave data found
+			positions[i] = -(it->second + 1);
+			// clear data to save memory
+			data[i].clear();
+		}
+	}
+	return positions;
+}
+
+uint64_t detail::ValueChangeData::encodePositionsAndwriteUniqueWaveData(
+	ostream &os,
+	const vector<vector<uint8_t>> &data,
+	vector<int64_t> &positions,
+	WriterPackType pack_type
+) {
+	// After this function, positions[i] is:
+	//  - = 0: If variable i has no wave data
+	//  - < 0: The negative value from flushValueChangeData_ValueChanges_UniquifyWaveData_,
+	//  unchanged
+	//  - > 0: The size (in bytes) of the wave data block for *previous* variable,
+	//         the previous block size of the first block is 1 (required by FST spec).
+	StreamWriteHelper h(os);
+	int64_t previous_size = 1;
+	uint64_t written_count = 0;
+	vector<uint8_t> compressed_data;
+	for (size_t i = 0; i < positions.size(); ++i) {
+		if (positions[i] < 0) {
+			// duplicate (negative index), do nothing
+		} else if (data[i].empty()) {
+			// no change (empty data), positions[i] remains 0
+		} else {
+			// try to compress
+			const uint8_t *selected_data;
+			size_t selected_size;
+			if (pack_type == WriterPackType::NO_COMPRESSION or data[i].size() <= 32) {
+				selected_data = data[i].data();
+				selected_size = data[i].size();
+			} else {
+				compressUsingLz4(data[i], compressed_data);
+				const auto selected_pair = selectSmaller(compressed_data, data[i]);
+				selected_data = selected_pair.first;
+				selected_size = selected_pair.second;
+			}
+			const bool is_compressed = selected_data != data[i].data();
+
+			// non-empty unique data, write it
+			written_count++;
+			streamoff bytes_written;
+			h  //
+				.beginOffset(bytes_written)
+				// FST spec: 0 means no compression, >0 for the size of the original data
+				.writeLEB128(is_compressed ? data[i].size() : 0)
+				.write(selected_data, selected_size)
+				.endOffset(&bytes_written);
+			positions[i] = previous_size;
+			previous_size = bytes_written;
+		}
+	}
+	return written_count;
+}
+
+void detail::ValueChangeData::writeEncodedPositions(
+	const vector<int64_t> &encoded_positions, ostream &os
+) {
+	// Encode positions with the specified run/varint rules into a varint buffer.
+	StreamWriteHelper h(os);
+
+	size_t i = 0;
+	const size_t n = encoded_positions.size();
+
+	// arbitrary positive value for prev_negative
+	// so that first negative is always != prev_negative
+	int64_t prev_negative = 1;
+
+	// Please refer to the comments in
+	// flushValueChangeData_ValueChanges_EncodePositionsAndwriteWaveData_() for the encoding rules
+	// of positions.
+	while (i < n) {
+		if (encoded_positions[i] == 0) {
+			// zero: handle zero run-length
+			size_t run = 0;
+			while (i < n && encoded_positions[i] == 0) {
+				++run;
+				++i;
+			}
+			// encode as signed (run << 1) | 0 and write as signed LEB128
+			h.writeLEB128(run << 1);
+		} else {
+			// non-zero
+			int64_t value_to_encode = 0;
+			int64_t cur = encoded_positions[i];
+			if (cur < 0) {
+				if (cur == prev_negative) {
+					value_to_encode = 0;
+				} else {
+					value_to_encode = cur;
+					prev_negative = cur;
+				}
+			} else {
+				value_to_encode = cur;
+			}
+
+			// encode as signed (value << 1) | 1 and write as signed LEB128
+			h.writeLEB128Signed((value_to_encode << 1) | 1);
+
+			++i;
+		}
+	}
+}
+
+void detail::ValueChangeData::writeTimestamps(vector<uint8_t> &os) const {
+	// Build LEB128-encoded delta stream (first delta is timestamp[0] - 0)
+	StreamVectorWriteHelper h(os);
+	uint64_t prev = 0;
+	for (size_t i = 0; i < timestamps.size(); ++i) {
+		const uint64_t cur = timestamps[i];
+		const uint64_t delta = cur - prev;
+		h.writeLEB128(delta);
+		prev = cur;
+	}
+}
+
+void Writer::flushValueChangeDataConstPart_(
+	const detail::ValueChangeData &vcd, ostream &os, WriterPackType pack_type
+) {
+	// 0. setup
+	StreamWriteHelper h(os);
+
+	// 1. write Block Header & Global Fields (start/end/mem_req placeholder)
+	// FST_BL_VCDATA_DYN_ALIAS2 (8) maps to WaveDataVersion3 in fst_file.h
+	// The positions we cannot fill in yet
+	const auto p_tmp1 = [&]() {
+		streamoff start_pos, memory_usage_pos;
+		h                            //
+			.beginOffset(start_pos)  // record start position
+			.writeBlockHeader(BlockType::WAVE_DATA_VERSION3, 0 /* Length placeholder 0 */)
+			.writeUInt(vcd.timestamps.front())
+			.writeUInt(vcd.timestamps.back())
+			.beginOffset(memory_usage_pos)  // record memory usage position
+			.writeUInt<uint64_t>(0);        // placeholder for memory usage
+		return make_pair(start_pos, memory_usage_pos);
+	}();
+	const auto start_pos = p_tmp1.first;
+	const auto memory_usage_pos = p_tmp1.second;
+
+	// 2. Bits Section
+	{
+		vector<uint8_t> bits_data;
+		vcd.writeInitialBits(bits_data);
+		vector<uint8_t> bits_data_compressed;
+		const uint8_t *selected_data;
+		size_t selected_size;
+		if (pack_type == WriterPackType::NO_COMPRESSION or bits_data.size() < 32) {
+			selected_data = bits_data.data();
+			selected_size = bits_data.size();
+		} else {
+			compressUsingZlib(bits_data, bits_data_compressed, 4);
+			const auto selected_pair = selectSmaller(bits_data_compressed, bits_data);
+			selected_data = selected_pair.first;
+			selected_size = selected_pair.second;
+		}
+
+		h                                            //
+			.writeLEB128(bits_data.size())           // uncompressed length
+			.writeLEB128(selected_size)              // compressed length
+			.writeLEB128(vcd.variable_infos.size())  // bits count
+			.write(selected_data, selected_size);
+	}
+
+	// 3. Waves Section
+	// Note: We need positions for the next section
+	const auto p_tmp2 = [&, pack_type]() {
+		auto wave_data = vcd.computeWaveData();
+		const size_t memory_usage =
+			accumulate(wave_data.begin(), wave_data.end(), size_t(0), [](size_t a, const auto &b) {
+				return a + b.size();
+			});
+		auto positions = vcd.uniquifyWaveData(wave_data);
+		h
+			// Note: this is not a typo, I expect we shall write count here.
+			// but the spec indeed write vcd.variable_infos.size(),
+			// which is repeated 1 times in header block, 2 times in valuechange block
+			.writeLEB128(vcd.variable_infos.size())
+			.writeUInt(uint8_t('4'));
+		const uint64_t count = detail::ValueChangeData::encodePositionsAndwriteUniqueWaveData(
+			os, wave_data, positions, pack_type
+		);
+		(void)count;
+		return make_pair(positions, memory_usage);
+	}();
+	const auto positions = p_tmp2.first;
+	const auto memory_usage = p_tmp2.second;
+
+	// 4. Position Section
+	{
+		const auto pos_begin = os.tellp();
+		vcd.writeEncodedPositions(positions, os);
+		const uint64_t pos_size = os.tellp() - pos_begin;
+		h.writeUInt(pos_size);  // Length comes AFTER data for positions
+	}
+
+	// 5. Time Section
+	{
+		vector<uint8_t> time_data;
+		vcd.writeTimestamps(time_data);
+		vector<uint8_t> time_data_compressed;
+		const uint8_t *selected_data;
+		size_t selected_size;
+		if (pack_type == WriterPackType::NO_COMPRESSION) {
+			selected_data = time_data.data();
+			selected_size = time_data.size();
+		} else {
+			compressUsingZlib(time_data, time_data_compressed, 9);
+			const auto selected_pair = selectSmaller(time_data_compressed, time_data);
+			selected_data = selected_pair.first;
+			selected_size = selected_pair.second;
+		}
+		h                                                 //
+			.write(selected_data, selected_size)          // time data
+			.writeUInt(time_data.size())                  // uncompressed len
+			.writeUInt(selected_size)                     // compressed len
+			.writeUInt(uint64_t(vcd.timestamps.size()));  // count
+	}
+
+	// 6. Patch Block Length and Memory Required
+	streamoff end_pos;
+	h  //
+		.beginOffset(end_pos)
+		// Patch Block Length (after 1 byte Type)
+		.seek(start_pos + streamoff(1), ios_base::beg)
+		.writeUInt<uint64_t>(end_pos - start_pos - 1)
+		// Patch Memory Required
+		.seek(memory_usage_pos, ios_base::beg)
+		.writeUInt<uint64_t>(memory_usage)
+		// Restore position to end
+		.seek(end_pos, ios_base::beg);
+}
+
+namespace {  // Helper functions for createEnumTable
+
+void appendEscToString(const string_view_pair in, string &out) {
+	for (size_t i = 0; i < in.second; ++i) {
+		const char c = in.first[i];
+		switch (c) {
+			// clang-format off
+		case '\a': { out += "\\a"; break; }
+		case '\b': { out += "\\b"; break; }
+		case '\f': { out += "\\f"; break; }
+		case '\n': { out += "\\n"; break; }
+		case '\r': { out += "\\r"; break; }
+		case '\t': { out += "\\t"; break; }
+		case '\v': { out += "\\v"; break; }
+		case '\'': { out += "\\'"; break; }
+		case '\"': { out += "\\\""; break; }
+		case '\\': { out += "\\\\"; break; }
+		case '?': { out += "\\?"; break; }
+		// clang-format on
+		default: {
+			if (c > ' ' && c <= '~') {
+				out += c;
+			} else {
+				unsigned char val = static_cast<unsigned char>(c);
+				out += '\\';
+				out += (val / 64) + '0';
+				val &= 63;
+				out += (val / 8) + '0';
+				val &= 7;
+				out += val + '0';
+			}
+			break;
+		}
+		}
+	}
+}
+
+}  // namespace
+
+void Writer::setAttrBegin(
+	Hierarchy::AttrType attrtype,
+	Hierarchy::AttrSubType subtype,
+	const string_view_pair attrname,
+	uint64_t arg
+) {
+	FST_CHECK(not hierarchy_finalized_);
+
+	StreamVectorWriteHelper h(hierarchy_buffer_);
+
+	if (attrtype > Hierarchy::AttrType::MAX) {
+		attrtype = Hierarchy::AttrType::MISC;
+		subtype = Hierarchy::AttrSubType::MISC_UNKNOWN;
+	}
+
+	switch (attrtype) {
+		// clang-format off
+	case Hierarchy::AttrType::ARRAY:
+		if (
+			subtype < Hierarchy::AttrSubType::ARRAY_NONE ||
+			subtype > Hierarchy::AttrSubType::ARRAY_SPARSE
+		) {
+			subtype = Hierarchy::AttrSubType::ARRAY_NONE;
+		}
+		break;
+	case Hierarchy::AttrType::ENUM:
+		if (
+			subtype < Hierarchy::AttrSubType::ENUM_SV_INTEGER ||
+			subtype > Hierarchy::AttrSubType::ENUM_TIME
+		) {
+			subtype = Hierarchy::AttrSubType::ENUM_SV_INTEGER;
+		}
+		break;
+	case Hierarchy::AttrType::PACK:
+		if (
+			subtype < Hierarchy::AttrSubType::PACK_NONE ||
+			subtype > Hierarchy::AttrSubType::PACK_SPARSE
+		) {
+			subtype = Hierarchy::AttrSubType::PACK_NONE;
+		}
+		break;
+	// clang-format on
+	case Hierarchy::AttrType::MISC:
+	default:
+		break;
+	}
+
+	h  //
+		.writeU8Enum(Hierarchy::ScopeControlType::GEN_ATTR_BEGIN)
+		.writeU8Enum(attrtype)
+		.writeU8Enum(subtype)
+		.writeString0(attrname)
+		.writeLEB128(arg);
+}
+
+namespace {
+
+// overload for string += string_view_
+// Remove this once C++17 is required
+}  // namespace
+
+EnumHandle Writer::createEnumTable(
+	const string_view_pair name,
+	uint32_t min_valbits,
+	const vector<pair<string_view_pair, string_view_pair>> &literal_val_arr
+) {
+	EnumHandle handle = 0;
+
+	if (name.second == 0 or literal_val_arr.empty()) {
+		return handle;
+	}
+
+	string attr_str;
+	attr_str.reserve(256);
+	attr_str.append(name.first, name.second);
+	attr_str += ' ';
+	attr_str += to_string(literal_val_arr.size());
+	attr_str += ' ';
+
+	for (const auto &p : literal_val_arr) {
+		const auto &literal = p.first;
+		// literal
+		appendEscToString(literal, attr_str);
+		attr_str += ' ';
+	}
+	for (const auto &p : literal_val_arr) {
+		const auto &val = p.second;
+		// val (with padding)
+		if (min_valbits > 0 and val.second < min_valbits) {
+			attr_str.insert(attr_str.end(), min_valbits - val.second, '0');
+		}
+		appendEscToString(val, attr_str);
+		attr_str += ' ';
+	}
+	attr_str.pop_back();  // remove last space
+
+	handle = ++enum_count_;
+	setAttrBegin(
+		Hierarchy::AttrType::MISC,
+		Hierarchy::AttrSubType::MISC_ENUMTABLE,
+		make_string_view_pair(attr_str.c_str(), attr_str.size()),
+		handle
+	);
+
+	return handle;
+}
+
+}  // namespace fst
diff --git a/include/fstcpp/fstcpp_writer.h b/include/fstcpp/fstcpp_writer.h
new file mode 100644
index 000000000..8ec161e67
--- /dev/null
+++ b/include/fstcpp/fstcpp_writer.h
@@ -0,0 +1,445 @@
+// SPDX-FileCopyrightText: 2025-2026 Yu-Sheng Lin <johnjohnlys@gmail.com>
+// SPDX-FileCopyrightText: 2025-2026 Yoda Lee <lc85301@gmail.com>
+// SPDX-License-Identifier: MIT
+// Project: libfstwriter
+// Website: https://github.com/gtkwave/libfstwriter
+#pragma once
+// direct include
+#include "fstcpp/fstcpp.h"
+// C system headers
+// C++ standard library headers
+#include <algorithm>
+#include <cstdint>
+#include <ctime>
+#include <fstream>
+#include <string>
+#include <vector>
+#if __cplusplus >= 201703L
+#	include <string_view>
+#endif
+// Other libraries' .h files.
+// Your project's .h files.
+#include "fstcpp/fstcpp_assertion.h"
+#include "fstcpp/fstcpp_variable_info.h"
+
+namespace fst {
+
+class Writer;
+
+namespace detail {
+
+// We define WriterWaveData here for better code inlining, no forward declaration
+struct BlackoutData {
+	std::vector<uint8_t> buffer;
+	uint64_t previous_timestamp = 0;
+	uint64_t count = 0;
+
+	void emitDumpActive(uint64_t current_timestamp, bool enable);
+};
+
+// We define ValueChangeData here for better code inlining, no forward declaration
+struct ValueChangeData {
+	std::vector<VariableInfo> variable_infos;
+	std::vector<uint64_t> timestamps;
+
+	ValueChangeData();
+	~ValueChangeData();
+
+	void writeInitialBits(std::vector<uint8_t> &os) const;
+	std::vector<std::vector<uint8_t>> computeWaveData() const;
+	static std::vector<int64_t> uniquifyWaveData(std::vector<std::vector<uint8_t>> &data);
+	static uint64_t encodePositionsAndwriteUniqueWaveData(
+		std::ostream &os,
+		const std::vector<std::vector<uint8_t>> &unique_data,
+		std::vector<int64_t> &positions,
+		WriterPackType pack_type
+	);
+	static void writeEncodedPositions(
+		const std::vector<int64_t> &encoded_positions, std::ostream &os
+	);
+	void writeTimestamps(std::vector<uint8_t> &os) const;
+	void keepOnlyTheLatestValue();
+};
+
+}  // namespace detail
+
+class Writer {
+	friend class WriterTest;
+
+public:
+	Writer() {}
+	Writer(const string_view_pair name) {
+		if (name.second != 0) open(name);
+	}
+	~Writer() { close(); }
+
+	Writer(const Writer &) = delete;
+	Writer(Writer &&) = delete;
+	Writer &operator=(const Writer &) = delete;
+	Writer &operator=(Writer &&) = delete;
+
+	// File control
+	void open(const string_view_pair name);
+	void close();
+
+	//////////////////////////////
+	// Header manipulation API
+	//////////////////////////////
+	const Header &getHeader() const;
+	void setTimecale(int8_t timescale) { header_.timescale = timescale; }
+	void setWriter(const string_view_pair Writer) {
+		const auto len = std::min(Writer.second, sizeof(header_.writer));
+		std::copy_n(Writer.first, len, header_.writer);
+		if (len != sizeof(header_.writer)) {
+			header_.writer[len] = '\0';
+		}
+	}
+	void setDate(const string_view_pair date_str) {
+		const auto len = date_str.second;
+		FST_CHECK_EQ(len, sizeof(header_.date) - 1);
+		std::copy_n(date_str.first, len, header_.date);
+		header_.date[len] = '\0';
+	}
+	void setTimezero(int64_t timezero) { header_.timezero = timezero; }
+
+	//////////////////////////////
+	// Change scope API
+	//////////////////////////////
+	void setScope(
+		Hierarchy::ScopeType scopetype,
+		const string_view_pair scopename,
+		const string_view_pair scopecomp
+	);
+	void upscope();
+
+	//////////////////////////////
+	// Attribute / Misc API
+	//////////////////////////////
+	void setAttrBegin(
+		Hierarchy::AttrType attrtype,
+		Hierarchy::AttrSubType subtype,
+		const string_view_pair attrname,
+		uint64_t arg
+	);
+	inline void setAttrEnd() {
+		hierarchy_buffer_.push_back(
+			static_cast<uint8_t>(Hierarchy::ScopeControlType::GEN_ATTR_END)
+		);
+	}
+	EnumHandle createEnumTable(
+		const string_view_pair name,
+		uint32_t min_valbits,
+		const std::vector<std::pair<string_view_pair, string_view_pair>> &literal_val_arr
+	);
+	inline void emitEnumTableRef(EnumHandle handle) {
+		setAttrBegin(
+			Hierarchy::AttrType::MISC,
+			Hierarchy::AttrSubType::MISC_ENUMTABLE,
+			make_string_view_pair(nullptr, 0),
+			handle
+		);
+	}
+	inline void setWriterPackType(WriterPackType pack_type) {
+		FST_CHECK(pack_type != WriterPackType::ZLIB and pack_type != WriterPackType::FASTLZ);
+		pack_type_ = pack_type;
+	}
+
+	//////////////////////////////
+	// Create variable API
+	//////////////////////////////
+	Handle createVar(
+		Hierarchy::VarType vartype,
+		Hierarchy::VarDirection vardir,
+		uint32_t bitwidth,
+		const string_view_pair name,
+		uint32_t alias_handle
+	);
+	Handle createVar2(
+		Hierarchy::VarType vartype,
+		Hierarchy::VarDirection vardir,
+		uint32_t bitwidth,
+		const string_view_pair name,
+		uint32_t alias_handle,
+		const string_view_pair type,
+		Hierarchy::SupplementalVarType svt,
+		Hierarchy::SupplementalDataType sdt
+	);
+
+	//////////////////////////////
+	// Waveform API
+	//////////////////////////////
+	void emitTimeChange(uint64_t tim);
+	void emitDumpActive(bool enable);
+	void emitValueChange(
+		Handle handle, const uint32_t *val, EncodingType encoding = EncodingType::BINARY
+	);
+	void emitValueChange(
+		Handle handle, const uint64_t *val, EncodingType encoding = EncodingType::BINARY
+	);
+	// Pass by value for small integers
+	void emitValueChange(Handle handle, uint64_t val);
+	// Add support for C-string value changes (e.g. fst string values)
+	// Note: This function is mainly for GtkWave compatibility.
+	// It is very dirty and inefficient, users should avoid using it.
+	// - For double handles, const char* is interpreted as a double* (8B)
+	// - For normal integer handles, const char* is "01xz..." (1B per bit)
+	// We only ensure that this function works where Verilator use it.
+	void emitValueChange(Handle handle, const char *val);
+
+	//////////////////////////////
+	// Alias version
+	//////////////////////////////
+	// Constructor
+	Writer(const char *name) : Writer(make_string_view_pair(name)) {}
+	Writer(const std::string &name) : Writer(make_string_view_pair(name.c_str(), name.size())) {}
+	// Open
+	inline void open(const char *name) { open(make_string_view_pair(name)); }
+	inline void open(const std::string &name) {
+		open(make_string_view_pair(name.c_str(), name.size()));
+	}
+	// setWriter
+	inline void setWriter(const char *Writer) {
+		if (Writer) setWriter(make_string_view_pair(Writer));
+	}
+	inline void setWriter(const std::string &Writer) {
+		setWriter(make_string_view_pair(Writer.c_str(), Writer.size()));
+	}
+	// setDate
+	inline void setDate(const char *date_str) {
+		if (date_str) setDate(make_string_view_pair(date_str));
+	}
+	inline void setDate(const std::string &date_str) {
+		setDate(make_string_view_pair(date_str.c_str(), date_str.size()));
+	}
+	inline void setDate(const std::tm *d) { setDate(make_string_view_pair(std::asctime(d))); }
+	inline void setDate() {
+		// set date to now
+		std::time_t t = std::time(nullptr);
+		setDate(std::localtime(&t));
+	}
+	// CreateVar(2)
+	inline Handle createVar(
+		Hierarchy::VarType vartype,
+		Hierarchy::VarDirection vardir,
+		uint32_t bitwidth,
+		const char *name,
+		uint32_t alias_handle
+	) {
+		FST_CHECK_NE(name, static_cast<void *>(nullptr));
+		return createVar(vartype, vardir, bitwidth, make_string_view_pair(name), alias_handle);
+	}
+	inline Handle createVar(
+		Hierarchy::VarType vartype,
+		Hierarchy::VarDirection vardir,
+		uint32_t bitwidth,
+		const std::string &name,
+		uint32_t alias_handle
+	) {
+		return createVar(
+			vartype,
+			vardir,
+			bitwidth,
+			make_string_view_pair(name.c_str(), name.size()),
+			alias_handle
+		);
+	}
+	// setScope
+	inline void setScope(
+		Hierarchy::ScopeType scopetype, const std::string &scopename, const std::string &scopecomp
+	) {
+		setScope(
+			scopetype,
+			make_string_view_pair(scopename.c_str(), scopename.size()),
+			make_string_view_pair(scopecomp.c_str(), scopecomp.size())
+		);
+	}
+	inline void setScope(
+		Hierarchy::ScopeType scopetype, const char *scopename, const char *scopecomp
+	) {
+		setScope(scopetype, make_string_view_pair(scopename), make_string_view_pair(scopecomp));
+	}
+	// setAttrBegin
+	inline void setAttrBegin(
+		Hierarchy::AttrType attrtype,
+		Hierarchy::AttrSubType subtype,
+		const char *attrname,
+		uint64_t arg
+	) {
+		setAttrBegin(attrtype, subtype, make_string_view_pair(attrname), arg);
+	}
+	// CreateEnumTable
+	EnumHandle createEnumTable(
+		const char *name,
+		uint32_t min_valbits,
+		const std::vector<std::pair<const char *, const char *>> &literal_val_arr
+	) {
+		std::vector<std::pair<string_view_pair, string_view_pair>> arr;
+		arr.reserve(literal_val_arr.size());
+		for (const auto &p : literal_val_arr) {
+			arr.emplace_back(make_string_view_pair(p.first), make_string_view_pair(p.second));
+		}
+		return createEnumTable(make_string_view_pair(name), min_valbits, arr);
+	}
+	// CreateVar2
+	inline Handle createVar2(
+		Hierarchy::VarType vartype,
+		Hierarchy::VarDirection vardir,
+		uint32_t bitwidth,
+		const char *name,
+		uint32_t alias_handle,
+		const char *type,
+		Hierarchy::SupplementalVarType svt,
+		Hierarchy::SupplementalDataType sdt
+	) {
+		return createVar2(
+			vartype,
+			vardir,
+			bitwidth,
+			make_string_view_pair(name),
+			alias_handle,
+			make_string_view_pair(type),
+			svt,
+			sdt
+		);
+	}
+	// Flush value change data
+	inline void flushValueChangeData() { flush_pending_ = true; }
+
+#if __cplusplus >= 201703L
+	// All APIs with string_view_pair --> define a
+	// string_view version and forward to the string_view_pair version
+	inline Writer(std::string_view name)
+		: Writer(make_string_view_pair(name.data(), name.size())) {}
+	inline void open(std::string_view name) {
+		open(make_string_view_pair(name.data(), name.size()));
+	}
+	inline void setWriter(std::string_view Writer) {
+		setWriter(make_string_view_pair(Writer.data(), Writer.size()));
+	}
+	inline void setDate(std::string_view date_str) {
+		setDate(make_string_view_pair(date_str.data(), date_str.size()));
+	}
+
+	inline void setScope(
+		Hierarchy::ScopeType scopetype, std::string_view scopename, std::string_view scopecomp
+	) {
+		setScope(
+			scopetype,
+			make_string_view_pair(scopename.data(), scopename.size()),
+			make_string_view_pair(scopecomp.data(), scopecomp.size())
+		);
+	}
+
+	inline void setAttrBegin(
+		Hierarchy::AttrType attrtype,
+		Hierarchy::AttrSubType subtype,
+		std::string_view attrname,
+		uint64_t arg
+	) {
+		setAttrBegin(
+			attrtype, subtype, make_string_view_pair(attrname.data(), attrname.size()), arg
+		);
+	}
+
+	EnumHandle createEnumTable(
+		std::string_view name,
+		uint32_t min_valbits,
+		const std::vector<std::pair<std::string_view, std::string_view>> &literal_val_arr
+	) {
+		std::vector<std::pair<string_view_pair, string_view_pair>> arr;
+		arr.reserve(literal_val_arr.size());
+		for (const auto &p : literal_val_arr) {
+			arr.emplace_back(
+				make_string_view_pair(p.first.data(), p.first.size()),
+				make_string_view_pair(p.second.data(), p.second.size())
+			);
+		}
+		return createEnumTable(make_string_view_pair(name.data(), name.size()), min_valbits, arr);
+	}
+
+	inline Handle createVar(
+		Hierarchy::VarType vartype,
+		Hierarchy::VarDirection vardir,
+		uint32_t bitwidth,
+		std::string_view name,
+		uint32_t alias_handle
+	) {
+		return createVar(
+			vartype, vardir, bitwidth, make_string_view_pair(name.data(), name.size()), alias_handle
+		);
+	}
+
+	inline Handle createVar2(
+		Hierarchy::VarType vartype,
+		Hierarchy::VarDirection vardir,
+		uint32_t bitwidth,
+		std::string_view name,
+		uint32_t alias_handle,
+		std::string_view type,
+		Hierarchy::SupplementalVarType svt,
+		Hierarchy::SupplementalDataType sdt
+	) {
+		return createVar2(
+			vartype,
+			vardir,
+			bitwidth,
+			make_string_view_pair(name.data(), name.size()),
+			alias_handle,
+			make_string_view_pair(type.data(), type.size()),
+			svt,
+			sdt
+		);
+	}
+#endif
+private:
+	// File/memory buffers
+	// 1. For hierarchy and geometry, we do not keep the data structure, instead we just
+	//    serialize them into buffers, and compress+write them at the end of file.
+	// 2. For header, we keep the data structure in memory since it is quite small
+	// 3. For wave data, we keep a complicated data structure in memory,
+	//    and flush them to file when necessary
+	std::ofstream main_fst_file_;
+	std::vector<uint8_t> hierarchy_buffer_;
+	std::vector<uint8_t> geometry_buffer_;
+	Header header_{};
+	detail::BlackoutData blackout_data_;
+	detail::ValueChangeData value_change_data_;
+	bool hierarchy_finalized_ = false;
+	WriterPackType pack_type_ = WriterPackType::LZ4;
+	uint64_t value_change_data_usage_ = 0;  // Note: this value is just an estimation
+	uint64_t value_change_data_flush_threshold_ = 128 << 20;  // 128MB
+	uint32_t enum_count_ = 0;
+	bool flush_pending_ = false;
+
+	// internal helpers
+	static void writeHeader_(const Header &header, std::ostream &os);
+	void appendGeometry_(std::ostream &os);
+	void appendHierarchy_(std::ostream &os);
+	void appendBlackout_(std::ostream &os);
+	// This function is used to flush value change data to file, and keep only the latest value in
+	// memory Just want to separate the const part from the non-const part for code clarity
+	static void flushValueChangeDataConstPart_(
+		const detail::ValueChangeData &vcd, std::ostream &os, WriterPackType pack_type
+	);
+	inline void flushValueChangeData_(detail::ValueChangeData &vcd, std::ostream &os) {
+		if (vcd.timestamps.empty()) {
+			return;
+		}
+		flushValueChangeDataConstPart_(vcd, os, pack_type_);
+		vcd.keepOnlyTheLatestValue();
+		++header_.num_value_change_data_blocks;
+		value_change_data_usage_ = 0;
+		flush_pending_ = false;
+	}
+	void finalizeHierarchy_() {
+		if (hierarchy_finalized_) return;
+		hierarchy_finalized_ = true;
+		// Original FST code comments: as a default, use 128MB and increment when
+		// every 1M signals are defined.
+		value_change_data_flush_threshold_ = (((header_.num_handles - 1) >> 20) + 1) << 27;
+	}
+	template <typename... T>
+	void emitValueChangeHelper_(Handle handle, T &&...val);
+};
+
+}  // namespace fst
diff --git a/include/gtkwave/fastlz.c b/include/gtkwave/fastlz.c
deleted file mode 100644
index b52a799b9..000000000
--- a/include/gtkwave/fastlz.c
+++ /dev/null
@@ -1,549 +0,0 @@
-/*
-  FastLZ - lightning-fast lossless compression library
-
-  Copyright (C) 2007 Ariya Hidayat (ariya@kde.org)
-  Copyright (C) 2006 Ariya Hidayat (ariya@kde.org)
-  Copyright (C) 2005 Ariya Hidayat (ariya@kde.org)
-
-  Permission is hereby granted, free of charge, to any person obtaining a copy
-  of this software and associated documentation files (the "Software"), to deal
-  in the Software without restriction, including without limitation the rights
-  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-  copies of the Software, and to permit persons to whom the Software is
-  furnished to do so, subject to the following conditions:
-
-  The above copyright notice and this permission notice shall be included in
-  all copies or substantial portions of the Software.
-
-  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-  THE SOFTWARE.
-
-  SPDX-License-Identifier: MIT
-*/
-
-#include "fastlz.h"
-
-#if !defined(FASTLZ__COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR)
-
-/*
- * Always check for bound when decompressing.
- * Generally it is best to leave it defined.
- */
-#define FASTLZ_SAFE
-
-
-/*
- * Give hints to the compiler for branch prediction optimization.
- */
-#if defined(__GNUC__) && (__GNUC__ > 2)
-#define FASTLZ_EXPECT_CONDITIONAL(c)    (__builtin_expect((c), 1))
-#define FASTLZ_UNEXPECT_CONDITIONAL(c)  (__builtin_expect((c), 0))
-#else
-#define FASTLZ_EXPECT_CONDITIONAL(c)    (c)
-#define FASTLZ_UNEXPECT_CONDITIONAL(c)  (c)
-#endif
-
-/*
- * Use inlined functions for supported systems.
- */
-#if defined(__GNUC__) || defined(__DMC__) || defined(__POCC__) || defined(__WATCOMC__) || defined(__SUNPRO_C)
-#define FASTLZ_INLINE inline
-#elif defined(__BORLANDC__) || defined(_MSC_VER) || defined(__LCC__)
-#define FASTLZ_INLINE __inline
-#else
-#define FASTLZ_INLINE
-#endif
-
-/*
- * Prevent accessing more than 8-bit at once, except on x86 architectures.
- */
-#if !defined(FASTLZ_STRICT_ALIGN)
-#define FASTLZ_STRICT_ALIGN
-#if defined(__i386__) || defined(__386)  /* GNU C, Sun Studio */
-#undef FASTLZ_STRICT_ALIGN
-#elif defined(__i486__) || defined(__i586__) || defined(__i686__) || defined(__amd64) /* GNU C */
-#undef FASTLZ_STRICT_ALIGN
-#elif defined(_M_IX86) /* Intel, MSVC */
-#undef FASTLZ_STRICT_ALIGN
-#elif defined(__386)
-#undef FASTLZ_STRICT_ALIGN
-#elif defined(_X86_) /* MinGW */
-#undef FASTLZ_STRICT_ALIGN
-#elif defined(__I86__) /* Digital Mars */
-#undef FASTLZ_STRICT_ALIGN
-#endif
-#endif
-
-/* prototypes */
-int fastlz_compress(const void* input, int length, void* output);
-int fastlz_compress_level(int level, const void* input, int length, void* output);
-int fastlz_decompress(const void* input, int length, void* output, int maxout);
-
-#define MAX_COPY       32
-#define MAX_LEN       264  /* 256 + 8 */
-#define MAX_DISTANCE 8192
-
-#if !defined(FASTLZ_STRICT_ALIGN)
-#define FASTLZ_READU16(p) *((const flzuint16*)(p))
-#else
-#define FASTLZ_READU16(p) ((p)[0] | (p)[1]<<8)
-#endif
-
-#define HASH_LOG  13
-#define HASH_SIZE (1<< HASH_LOG)
-#define HASH_MASK  (HASH_SIZE-1)
-#define HASH_FUNCTION(v,p) { v = FASTLZ_READU16(p); v ^= FASTLZ_READU16(p+1)^(v>>(16-HASH_LOG));v &= HASH_MASK; }
-
-#undef FASTLZ_LEVEL
-#define FASTLZ_LEVEL 1
-
-#undef FASTLZ_COMPRESSOR
-#undef FASTLZ_DECOMPRESSOR
-#define FASTLZ_COMPRESSOR fastlz1_compress
-#define FASTLZ_DECOMPRESSOR fastlz1_decompress
-static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output);
-static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout);
-#include "fastlz.c"
-
-#undef FASTLZ_LEVEL
-#define FASTLZ_LEVEL 2
-
-#undef MAX_DISTANCE
-#define MAX_DISTANCE 8191
-#define MAX_FARDISTANCE (65535+MAX_DISTANCE-1)
-
-#undef FASTLZ_COMPRESSOR
-#undef FASTLZ_DECOMPRESSOR
-#define FASTLZ_COMPRESSOR fastlz2_compress
-#define FASTLZ_DECOMPRESSOR fastlz2_decompress
-static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output);
-static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout);
-#include "fastlz.c"
-
-int fastlz_compress(const void* input, int length, void* output)
-{
-  /* for short block, choose fastlz1 */
-  if(length < 65536)
-    return fastlz1_compress(input, length, output);
-
-  /* else... */
-  return fastlz2_compress(input, length, output);
-}
-
-int fastlz_decompress(const void* input, int length, void* output, int maxout)
-{
-  /* magic identifier for compression level */
-  int level = ((*(const flzuint8*)input) >> 5) + 1;
-
-  if(level == 1)
-    return fastlz1_decompress(input, length, output, maxout);
-  if(level == 2)
-    return fastlz2_decompress(input, length, output, maxout);
-
-  /* unknown level, trigger error */
-  return 0;
-}
-
-int fastlz_compress_level(int level, const void* input, int length, void* output)
-{
-  if(level == 1)
-    return fastlz1_compress(input, length, output);
-  if(level == 2)
-    return fastlz2_compress(input, length, output);
-
-  return 0;
-}
-
-#else /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */
-
-static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output)
-{
-  const flzuint8* ip = (const flzuint8*) input;
-  const flzuint8* ip_bound = ip + length - 2;
-  const flzuint8* ip_limit = ip + length - 12;
-  flzuint8* op = (flzuint8*) output;
-
-  const flzuint8* htab[HASH_SIZE];
-  const flzuint8** hslot;
-  flzuint32 hval;
-
-  flzuint32 copy;
-
-  /* sanity check */
-  if(FASTLZ_UNEXPECT_CONDITIONAL(length < 4))
-  {
-    if(length)
-    {
-      /* create literal copy only */
-      *op++ = length-1;
-      ip_bound++;
-      while(ip <= ip_bound)
-        *op++ = *ip++;
-      return length+1;
-    }
-    else
-      return 0;
-  }
-
-  /* initializes hash table */
-  for (hslot = htab; hslot < htab + HASH_SIZE; hslot++)
-    *hslot = ip;
-
-  /* we start with literal copy */
-  copy = 2;
-  *op++ = MAX_COPY-1;
-  *op++ = *ip++;
-  *op++ = *ip++;
-
-  /* main loop */
-  while(FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit))
-  {
-    const flzuint8* ref;
-    flzuint32 distance;
-
-    /* minimum match length */
-    flzuint32 len = 3;
-
-    /* comparison starting-point */
-    const flzuint8* anchor = ip;
-
-    /* check for a run */
-#if FASTLZ_LEVEL==2
-    if(ip[0] == ip[-1] && FASTLZ_READU16(ip-1)==FASTLZ_READU16(ip+1))
-    {
-      distance = 1;
-      /* ip += 3; */ /* scan-build, never used */
-      ref = anchor - 1 + 3;
-      goto match;
-    }
-#endif
-
-    /* find potential match */
-    HASH_FUNCTION(hval,ip);
-    hslot = htab + hval;
-    ref = htab[hval];
-
-    /* calculate distance to the match */
-    distance = anchor - ref;
-
-    /* update hash table */
-    *hslot = anchor;
-
-    /* is this a match? check the first 3 bytes */
-    if(distance==0 ||
-#if FASTLZ_LEVEL==1
-    (distance >= MAX_DISTANCE) ||
-#else
-    (distance >= MAX_FARDISTANCE) ||
-#endif
-    *ref++ != *ip++ || *ref++!=*ip++ || *ref++!=*ip++)
-      goto literal;
-
-#if FASTLZ_LEVEL==2
-    /* far, needs at least 5-byte match */
-    if(distance >= MAX_DISTANCE)
-    {
-      if(*ip++ != *ref++ || *ip++!= *ref++)
-        goto literal;
-      len += 2;
-    }
-
-    match:
-#endif
-
-    /* last matched byte */
-    ip = anchor + len;
-
-    /* distance is biased */
-    distance--;
-
-    if(!distance)
-    {
-      /* zero distance means a run */
-      flzuint8 x = ip[-1];
-      while(ip < ip_bound)
-        if(*ref++ != x) break; else ip++;
-    }
-    else
-    for(;;)
-    {
-      /* safe because the outer check against ip limit */
-      if(*ref++ != *ip++) break;
-      if(*ref++ != *ip++) break;
-      if(*ref++ != *ip++) break;
-      if(*ref++ != *ip++) break;
-      if(*ref++ != *ip++) break;
-      if(*ref++ != *ip++) break;
-      if(*ref++ != *ip++) break;
-      if(*ref++ != *ip++) break;
-      while(ip < ip_bound)
-        if(*ref++ != *ip++) break;
-      break;
-    }
-
-    /* if we have copied something, adjust the copy count */
-    if(copy)
-      /* copy is biased, '0' means 1 byte copy */
-      *(op-copy-1) = copy-1;
-    else
-      /* back, to overwrite the copy count */
-      op--;
-
-    /* reset literal counter */
-    copy = 0;
-
-    /* length is biased, '1' means a match of 3 bytes */
-    ip -= 3;
-    len = ip - anchor;
-
-    /* encode the match */
-#if FASTLZ_LEVEL==2
-    if(distance < MAX_DISTANCE)
-    {
-      if(len < 7)
-      {
-        *op++ = (len << 5) + (distance >> 8);
-        *op++ = (distance & 255);
-      }
-      else
-      {
-        *op++ = (7 << 5) + (distance >> 8);
-        for(len-=7; len >= 255; len-= 255)
-          *op++ = 255;
-        *op++ = len;
-        *op++ = (distance & 255);
-      }
-    }
-    else
-    {
-      /* far away, but not yet in the another galaxy... */
-      if(len < 7)
-      {
-        distance -= MAX_DISTANCE;
-        *op++ = (len << 5) + 31;
-        *op++ = 255;
-        *op++ = distance >> 8;
-        *op++ = distance & 255;
-      }
-      else
-      {
-        distance -= MAX_DISTANCE;
-        *op++ = (7 << 5) + 31;
-        for(len-=7; len >= 255; len-= 255)
-          *op++ = 255;
-        *op++ = len;
-        *op++ = 255;
-        *op++ = distance >> 8;
-        *op++ = distance & 255;
-      }
-    }
-#else
-
-    if(FASTLZ_UNEXPECT_CONDITIONAL(len > MAX_LEN-2))
-      while(len > MAX_LEN-2)
-      {
-        *op++ = (7 << 5) + (distance >> 8);
-        *op++ = MAX_LEN - 2 - 7 -2;
-        *op++ = (distance & 255);
-        len -= MAX_LEN-2;
-      }
-
-    if(len < 7)
-    {
-      *op++ = (len << 5) + (distance >> 8);
-      *op++ = (distance & 255);
-    }
-    else
-    {
-      *op++ = (7 << 5) + (distance >> 8);
-      *op++ = len - 7;
-      *op++ = (distance & 255);
-    }
-#endif
-
-    /* update the hash at match boundary */
-    HASH_FUNCTION(hval,ip);
-    htab[hval] = ip++;
-    HASH_FUNCTION(hval,ip);
-    htab[hval] = ip++;
-
-    /* assuming literal copy */
-    *op++ = MAX_COPY-1;
-
-    continue;
-
-    literal:
-      *op++ = *anchor++;
-      ip = anchor;
-      copy++;
-      if(FASTLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY))
-      {
-        copy = 0;
-        *op++ = MAX_COPY-1;
-      }
-  }
-
-  /* left-over as literal copy */
-  ip_bound++;
-  while(ip <= ip_bound)
-  {
-    *op++ = *ip++;
-    copy++;
-    if(copy == MAX_COPY)
-    {
-      copy = 0;
-      *op++ = MAX_COPY-1;
-    }
-  }
-
-  /* if we have copied something, adjust the copy length */
-  if(copy)
-    *(op-copy-1) = copy-1;
-  else
-    op--;
-
-#if FASTLZ_LEVEL==2
-  /* marker for fastlz2 */
-  *(flzuint8*)output |= (1 << 5);
-#endif
-
-  return op - (flzuint8*)output;
-}
-
-static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout)
-{
-  const flzuint8* ip = (const flzuint8*) input;
-  const flzuint8* ip_limit  = ip + length;
-  flzuint8* op = (flzuint8*) output;
-  flzuint8* op_limit = op + maxout;
-  flzuint32 ctrl = (*ip++) & 31;
-  int loop = 1;
-
-  do
-  {
-    const flzuint8* ref = op;
-    flzuint32 len = ctrl >> 5;
-    flzuint32 ofs = (ctrl & 31) << 8;
-
-    if(ctrl >= 32)
-    {
-#if FASTLZ_LEVEL==2
-      flzuint8 code;
-#endif
-      len--;
-      ref -= ofs;
-      if (len == 7-1)
-#if FASTLZ_LEVEL==1
-        len += *ip++;
-      ref -= *ip++;
-#else
-        do
-        {
-          code = *ip++;
-          len += code;
-        } while (code==255);
-      code = *ip++;
-      ref -= code;
-
-      /* match from 16-bit distance */
-      if(FASTLZ_UNEXPECT_CONDITIONAL(code==255))
-      if(FASTLZ_EXPECT_CONDITIONAL(ofs==(31 << 8)))
-      {
-        ofs = (*ip++) << 8;
-        ofs += *ip++;
-        ref = op - ofs - MAX_DISTANCE;
-      }
-#endif
-
-#ifdef FASTLZ_SAFE
-      if (FASTLZ_UNEXPECT_CONDITIONAL(op + len + 3 > op_limit))
-        return 0;
-
-      if (FASTLZ_UNEXPECT_CONDITIONAL(ref-1 < (flzuint8 *)output))
-        return 0;
-#endif
-
-      if(FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit))
-        ctrl = *ip++;
-      else
-        loop = 0;
-
-      if(ref == op)
-      {
-        /* optimize copy for a run */
-        flzuint8 b = ref[-1];
-        *op++ = b;
-        *op++ = b;
-        *op++ = b;
-        for(; len; --len)
-          *op++ = b;
-      }
-      else
-      {
-#if !defined(FASTLZ_STRICT_ALIGN)
-        const flzuint16* p;
-        flzuint16* q;
-#endif
-        /* copy from reference */
-        ref--;
-        *op++ = *ref++;
-        *op++ = *ref++;
-        *op++ = *ref++;
-
-#if !defined(FASTLZ_STRICT_ALIGN)
-        /* copy a byte, so that now it's word aligned */
-        if(len & 1)
-        {
-          *op++ = *ref++;
-          len--;
-        }
-
-        /* copy 16-bit at once */
-        q = (flzuint16*) op;
-        op += len;
-        p = (const flzuint16*) ref;
-        for(len>>=1; len > 4; len-=4)
-        {
-          *q++ = *p++;
-          *q++ = *p++;
-          *q++ = *p++;
-          *q++ = *p++;
-        }
-        for(; len; --len)
-          *q++ = *p++;
-#else
-        for(; len; --len)
-          *op++ = *ref++;
-#endif
-      }
-    }
-    else
-    {
-      ctrl++;
-#ifdef FASTLZ_SAFE
-      if (FASTLZ_UNEXPECT_CONDITIONAL(op + ctrl > op_limit))
-        return 0;
-      if (FASTLZ_UNEXPECT_CONDITIONAL(ip + ctrl > ip_limit))
-        return 0;
-#endif
-
-      *op++ = *ip++;
-      for(--ctrl; ctrl; ctrl--)
-        *op++ = *ip++;
-
-      loop = FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit);
-      if(loop)
-        ctrl = *ip++;
-    }
-  }
-  while(FASTLZ_EXPECT_CONDITIONAL(loop));
-
-  return op - (flzuint8*)output;
-}
-
-#endif /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */
diff --git a/include/gtkwave/fastlz.h b/include/gtkwave/fastlz.h
deleted file mode 100644
index 1ce44a32a..000000000
--- a/include/gtkwave/fastlz.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
-  FastLZ - lightning-fast lossless compression library
-
-  Copyright (C) 2007 Ariya Hidayat (ariya@kde.org)
-  Copyright (C) 2006 Ariya Hidayat (ariya@kde.org)
-  Copyright (C) 2005 Ariya Hidayat (ariya@kde.org)
-
-  Permission is hereby granted, free of charge, to any person obtaining a copy
-  of this software and associated documentation files (the "Software"), to deal
-  in the Software without restriction, including without limitation the rights
-  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-  copies of the Software, and to permit persons to whom the Software is
-  furnished to do so, subject to the following conditions:
-
-  The above copyright notice and this permission notice shall be included in
-  all copies or substantial portions of the Software.
-
-  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-  THE SOFTWARE.
-
-  SPDX-License-Identifier: MIT
-*/
-
-#ifndef FASTLZ_H
-#define FASTLZ_H
-
-#include <inttypes.h>
-
-#define flzuint8 uint8_t
-#define flzuint16 uint16_t
-#define flzuint32 uint32_t
-
-
-#define FASTLZ_VERSION 0x000100
-
-#define FASTLZ_VERSION_MAJOR     0
-#define FASTLZ_VERSION_MINOR     0
-#define FASTLZ_VERSION_REVISION  0
-
-#define FASTLZ_VERSION_STRING "0.1.0"
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-/**
-  Compress a block of data in the input buffer and returns the size of
-  compressed block. The size of input buffer is specified by length. The
-  minimum input buffer size is 16.
-
-  The output buffer must be at least 5% larger than the input buffer
-  and can not be smaller than 66 bytes.
-
-  If the input is not compressible, the return value might be larger than
-  length (input buffer size).
-
-  The input buffer and the output buffer can not overlap.
-*/
-
-int fastlz_compress(const void* input, int length, void* output);
-
-/**
-  Decompress a block of compressed data and returns the size of the
-  decompressed block. If error occurs, e.g. the compressed data is
-  corrupted or the output buffer is not large enough, then 0 (zero)
-  will be returned instead.
-
-  The input buffer and the output buffer can not overlap.
-
-  Decompression is memory safe and guaranteed not to write the output buffer
-  more than what is specified in maxout.
- */
-
-int fastlz_decompress(const void* input, int length, void* output, int maxout);
-
-/**
-  Compress a block of data in the input buffer and returns the size of
-  compressed block. The size of input buffer is specified by length. The
-  minimum input buffer size is 16.
-
-  The output buffer must be at least 5% larger than the input buffer
-  and can not be smaller than 66 bytes.
-
-  If the input is not compressible, the return value might be larger than
-  length (input buffer size).
-
-  The input buffer and the output buffer can not overlap.
-
-  Compression level can be specified in parameter level. At the moment,
-  only level 1 and level 2 are supported.
-  Level 1 is the fastest compression and generally useful for short data.
-  Level 2 is slightly slower but it gives better compression ratio.
-
-  Note that the compressed data, regardless of the level, can always be
-  decompressed using the function fastlz_decompress above.
-*/
-
-int fastlz_compress_level(int level, const void* input, int length, void* output);
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif /* FASTLZ_H */
diff --git a/include/gtkwave/fst_config.h b/include/gtkwave/fst_config.h
deleted file mode 100644
index cd38760df..000000000
--- a/include/gtkwave/fst_config.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// This file specifically for FST usage
-// Originally generated from config.h.in by configure.
-// SPDX-FileCopyrightText: 2018-2026 Wilson Snyder
-// SPDX-License-Identifier: CC0-1.0
-
-/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix). */
-#if !defined(__MINGW32__) && !defined(__FreeBSD__)
-# define HAVE_ALLOCA_H 1
-#endif
-
-/* Define to 1 if fseeko (and presumably ftello) exists and is declared. */
-#define HAVE_FSEEKO 1
diff --git a/include/gtkwave/fst_win_unistd.h b/include/gtkwave/fst_win_unistd.h
deleted file mode 100644
index 15ab2c1fc..000000000
--- a/include/gtkwave/fst_win_unistd.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2009-2018 Tony Bybell.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * SPDX-License-Identifier: MIT
- */
-
-#ifndef WIN_UNISTD_H
-#define WIN_UNISTD_H
-
-#include <stdlib.h>
-#ifdef _WIN64
-#include <io.h>
-#else
-#include <sys/io.h>
-#endif
-
-#include <process.h>
-
-#define ftruncate _chsize_s
-#define unlink _unlink
-#define fileno _fileno
-#define lseek _lseeki64
-
-#ifdef _WIN64
-#define ssize_t __int64
-#define SSIZE_MAX 9223372036854775807i64
-#else
-#define ssize_t long
-#define SSIZE_MAX 2147483647L
-#endif
-
-#include "stdint.h"
-
-#endif //WIN_UNISTD_H
diff --git a/include/gtkwave/fstapi.c b/include/gtkwave/fstapi.c
deleted file mode 100644
index 1b71a5eba..000000000
--- a/include/gtkwave/fstapi.c
+++ /dev/null
@@ -1,7004 +0,0 @@
-/*
- * Copyright (c) 2009-2026 Tony Bybell.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * SPDX-License-Identifier: MIT
- */
-
-/*
- * possible disables:
- *
- * FST_DYNAMIC_ALIAS_DISABLE : dynamic aliases are not processed
- * FST_DYNAMIC_ALIAS2_DISABLE : new encoding for dynamic aliases is not generated
- * FST_WRITEX_DISABLE : fast write I/O routines are disabled
- *
- * possible enables:
- *
- * FST_DEBUG : not for production use, only enable for development
- * FST_REMOVE_DUPLICATE_VC : glitch removal (has writer performance impact)
- * HAVE_LIBPTHREAD -> FST_WRITER_PARALLEL : enables inclusion of parallel writer code
- *
- */
-
-#ifdef FST_INCLUDE_CONFIG
-#include <config.h>
-#endif
-
-#include "fstapi.h"
-#include "fastlz.h"
-#include "lz4.h"
-#include <errno.h>
-
-#ifndef HAVE_LIBPTHREAD
-#undef FST_WRITER_PARALLEL
-#endif
-
-#ifdef FST_WRITER_PARALLEL
-#include <pthread.h>
-#endif
-
-#ifdef __MINGW32__
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-#endif
-
-#ifndef PATH_MAX
-#define PATH_MAX (4096)
-#endif
-
-#if defined(_MSC_VER)
-typedef int64_t fst_off_t;
-#else
-typedef off_t fst_off_t;
-#endif
-
-/* should be more than enough for fstWriterSetSourceStem() */
-#define FST_PATH_HASHMASK ((1UL << 16) - 1)
-typedef const void *Pcvoid_t;
-typedef void *Pvoid_t;
-typedef void **PPvoid_t;
-void JenkinsFree(void *base_i, uint32_t hashmask);
-void **JenkinsIns(void *base_i, const unsigned char *mem, uint32_t length, uint32_t hashmask);
-
-#ifndef FST_WRITEX_DISABLE
-#define FST_WRITEX_MAX (64 * 1024)
-#else
-#define fstWritex(a, b, c) fstFwrite((b), (c), 1, fv)
-#endif
-
-/* these defines have a large impact on writer speed when a model has a */
-/* huge number of symbols.  as a default, use 128MB and increment when  */
-/* every 1M signals are defined.                                        */
-#define FST_BREAK_SIZE (1UL << 27)
-#define FST_BREAK_ADD_SIZE (1UL << 22)
-#define FST_BREAK_SIZE_MAX (1UL << 31)
-#define FST_ACTIVATE_HUGE_BREAK (1000000)
-#define FST_ACTIVATE_HUGE_INC (1000000)
-
-#define FST_WRITER_STR "fstWriter"
-#define FST_ID_NAM_SIZ (512)
-#define FST_ID_NAM_ATTR_SIZ (65536 + 4096)
-#define FST_DOUBLE_ENDTEST (2.7182818284590452354)
-#define FST_HDR_SIM_VERSION_SIZE (128)
-#define FST_HDR_DATE_SIZE (119)
-#define FST_HDR_FILETYPE_SIZE (1)
-#define FST_HDR_TIMEZERO_SIZE (8)
-#define FST_GZIO_LEN (32768)
-#define FST_HDR_FOURPACK_DUO_SIZE (4 * 1024 * 1024)
-#define FST_ZWRAPPER_HDR_SIZE (1 + 8 + 8)
-
-#if defined(__APPLE__) && defined(__MACH__)
-#define FST_MACOSX
-#include <sys/sysctl.h>
-#endif
-
-#if defined(FST_MACOSX) || defined(__MINGW32__) || defined(__OpenBSD__) || defined(__FreeBSD__) || \
-    defined(__NetBSD__)
-#define FST_UNBUFFERED_IO
-#endif
-
-#ifdef __GNUC__
-/* Boolean expression more often true than false */
-#define FST_LIKELY(x) __builtin_expect(!!(x), 1)
-/* Boolean expression more often false than true */
-#define FST_UNLIKELY(x) __builtin_expect(!!(x), 0)
-#else
-#define FST_LIKELY(x) (!!(x))
-#define FST_UNLIKELY(x) (!!(x))
-#endif
-
-#define FST_APIMESS "FSTAPI  | "
-
-/***********************/
-/***                 ***/
-/*** common function ***/
-/***                 ***/
-/***********************/
-
-#ifdef __MINGW32__
-#include <io.h>
-#ifndef HAVE_FSEEKO
-#define ftello _ftelli64
-#define fseeko _fseeki64
-#endif
-#endif
-
-/*
- * the recoded "extra" values...
- * note that FST_RCV_Q is currently unused and is for future expansion.
- * its intended use is as another level of escape such that any arbitrary
- * value can be stored as the value: { time_delta, 8 bits, FST_RCV_Q }.
- * this is currently not implemented so that the branchless decode is:
- * uint32_t shcnt = 2 << (vli & 1); tdelta = vli >> shcnt;
- */
-#define FST_RCV_X (1 | (0 << 1))
-#define FST_RCV_Z (1 | (1 << 1))
-#define FST_RCV_H (1 | (2 << 1))
-#define FST_RCV_U (1 | (3 << 1))
-#define FST_RCV_W (1 | (4 << 1))
-#define FST_RCV_L (1 | (5 << 1))
-#define FST_RCV_D (1 | (6 << 1))
-#define FST_RCV_Q (1 | (7 << 1))
-
-#define FST_RCV_STR "xzhuwl-?"
-/*                   01234567 */
-
-/*
- * report abort messages
- */
-static void chk_report_abort(const char *s)
-{
-    fprintf(stderr, "Triggered %s security check, exiting.\n", s);
-    abort();
-}
-
-/*
- * prevent old file overwrite when currently being read
- */
-static FILE *unlink_fopen(const char *nam, const char *mode)
-{
-    unlink(nam);
-    return (fopen(nam, mode));
-}
-
-/*
- * system-specific temp file handling
- */
-#ifdef __MINGW32__
-
-static FILE *tmpfile_open(char **nam)
-{
-    char *fname = NULL;
-    TCHAR szTempFileName[MAX_PATH];
-    TCHAR lpTempPathBuffer[MAX_PATH];
-    DWORD dwRetVal = 0;
-    UINT uRetVal = 0;
-    FILE *fh = NULL;
-
-    if (nam) /* cppcheck warning fix: nam is always defined, so this is not needed */
-    {
-        dwRetVal = GetTempPath(MAX_PATH, lpTempPathBuffer);
-        if ((dwRetVal > MAX_PATH) || (dwRetVal == 0)) {
-            fprintf(stderr,
-                    FST_APIMESS "GetTempPath() failed in " __FILE__ " line %d, exiting.\n",
-                    __LINE__);
-            exit(255);
-        } else {
-            uRetVal = GetTempFileName(lpTempPathBuffer, TEXT("FSTW"), 0, szTempFileName);
-            if (uRetVal == 0) {
-                fprintf(stderr,
-                        FST_APIMESS "GetTempFileName() failed in " __FILE__ " line %d, exiting.\n",
-                        __LINE__);
-                exit(255);
-            } else {
-                fname = strdup(szTempFileName);
-            }
-        }
-
-        if (fname) {
-            *nam = fname;
-            fh = unlink_fopen(fname, "w+b");
-        }
-    }
-
-    return (fh);
-}
-
-#else
-
-static FILE *tmpfile_open(char **nam)
-{
-    FILE *f = tmpfile(); /* replace with mkstemp() + fopen(), etc if this is not good enough */
-    if (nam) {
-        *nam = NULL;
-    }
-    return (f);
-}
-
-#endif
-
-static void tmpfile_close(FILE **f, char **nam)
-{
-    if (f) {
-        if (*f) {
-            fclose(*f);
-            *f = NULL;
-        }
-    }
-
-    if (nam) {
-        if (*nam) {
-            unlink(*nam);
-            free(*nam);
-            *nam = NULL;
-        }
-    }
-}
-
-/*****************************************/
-
-/*
- * to remove warn_unused_result compile time messages
- * (in the future there needs to be results checking)
- */
-static size_t fstFread(void *buf, size_t siz, size_t cnt, FILE *fp)
-{
-    return (fread(buf, siz, cnt, fp));
-}
-
-static size_t fstFwrite(const void *buf, size_t siz, size_t cnt, FILE *fp)
-{
-    return (fwrite(buf, siz, cnt, fp));
-}
-
-static int fstFtruncate(int fd, fst_off_t length)
-{
-    return (ftruncate(fd, length));
-}
-
-/*
- * realpath compatibility
- */
-static char *fstRealpath(const char *path, char *resolved_path)
-{
-#if defined __USE_BSD || defined __USE_XOPEN_EXTENDED || defined __CYGWIN__ || defined HAVE_REALPATH
-#if (defined(__MACH__) && defined(__APPLE__))
-    if (!resolved_path) {
-        resolved_path =
-            (char *)malloc(PATH_MAX + 1); /* fixes bug on Leopard when resolved_path == NULL */
-    }
-#endif
-
-    return (realpath(path, resolved_path));
-
-#else
-#ifdef __MINGW32__
-    if (!resolved_path) {
-        resolved_path = (char *)malloc(PATH_MAX + 1);
-    }
-    return (_fullpath(resolved_path, path, PATH_MAX));
-#else
-    (void)path;
-    (void)resolved_path;
-    return (NULL);
-#endif
-#endif
-}
-
-/*
- * mmap compatibility
- */
-#if defined __MINGW32__
-#include <limits.h>
-#define fstMmap(__addr, __len, __prot, __flags, __fd, __off) fstMmap2((__len), (__fd), (__off))
-#define fstMunmap(__addr, __len) UnmapViewOfFile((LPCVOID)__addr)
-
-static void *fstMmap2(size_t __len, int __fd, fst_off_t __off)
-{
-    DWORD64 len64 = __len; /* Must be 64-bit for shift below */
-    HANDLE handle = CreateFileMapping((HANDLE)_get_osfhandle(__fd),
-                                      NULL,
-                                      PAGE_READWRITE,
-                                      (DWORD)(len64 >> 32),
-                                      (DWORD)__len,
-                                      NULL);
-    if (!handle) {
-        return NULL;
-    }
-
-    void *ptr = MapViewOfFileEx(handle,
-                                FILE_MAP_READ | FILE_MAP_WRITE,
-                                0,
-                                (DWORD)__off,
-                                (SIZE_T)__len,
-                                (LPVOID)NULL);
-    CloseHandle(handle);
-    return ptr;
-}
-#else
-#include <sys/mman.h>
-#if defined(__SUNPRO_C)
-#define FST_CADDR_T_CAST (caddr_t)
-#else
-#define FST_CADDR_T_CAST
-#endif
-#define fstMmap(__addr, __len, __prot, __flags, __fd, __off) \
-    (void *)mmap(FST_CADDR_T_CAST(__addr), (__len), (__prot), (__flags), (__fd), (__off))
-#define fstMunmap(__addr, __len) \
-    { \
-        if (__addr) \
-            munmap(FST_CADDR_T_CAST(__addr), (__len)); \
-    }
-#endif
-
-/*
- * regular and variable-length integer access functions
- */
-static uint32_t fstGetUint32(unsigned char *mem)
-{
-    uint32_t u32;
-    unsigned char *buf = (unsigned char *)(&u32);
-
-    memcpy(buf, mem, sizeof(uint32_t));
-
-    return (*(uint32_t *)buf);
-}
-
-static int fstWriterUint64(FILE *handle, uint64_t v)
-{
-    unsigned char buf[8];
-    int i;
-
-    for (i = 7; i >= 0; i--) {
-        buf[i] = v & 0xff;
-        v >>= 8;
-    }
-
-    fstFwrite(buf, 8, 1, handle);
-    return (8);
-}
-
-static uint64_t fstReaderUint64(FILE *f)
-{
-    uint64_t val = 0;
-    unsigned char buf[sizeof(uint64_t)];
-    unsigned int i;
-
-    fstFread(buf, sizeof(uint64_t), 1, f);
-    for (i = 0; i < sizeof(uint64_t); i++) {
-        val <<= 8;
-        val |= buf[i];
-    }
-
-    return (val);
-}
-
-static uint32_t fstGetVarint32(unsigned char *mem, int *skiplen)
-{
-    unsigned char *mem_orig = mem;
-    uint32_t rc = 0;
-    while (*mem & 0x80) {
-        mem++;
-    }
-
-    *skiplen = mem - mem_orig + 1;
-    for (;;) {
-        rc <<= 7;
-        rc |= (uint32_t)(*mem & 0x7f);
-        if (mem == mem_orig) {
-            break;
-        }
-        mem--;
-    }
-
-    return (rc);
-}
-
-static uint32_t fstGetVarint32Length(unsigned char *mem)
-{
-    unsigned char *mem_orig = mem;
-
-    while (*mem & 0x80) {
-        mem++;
-    }
-
-    return (mem - mem_orig + 1);
-}
-
-static uint32_t fstGetVarint32NoSkip(unsigned char *mem)
-{
-    unsigned char *mem_orig = mem;
-    uint32_t rc = 0;
-    while (*mem & 0x80) {
-        mem++;
-    }
-
-    for (;;) {
-        rc <<= 7;
-        rc |= (uint32_t)(*mem & 0x7f);
-        if (mem == mem_orig) {
-            break;
-        }
-        mem--;
-    }
-
-    return (rc);
-}
-
-static unsigned char *fstCopyVarint32ToLeft(unsigned char *pnt, uint32_t v)
-{
-    unsigned char *spnt;
-    uint32_t nxt = v;
-    int cnt = 1;
-    int i;
-
-    while ((nxt = nxt >>
-                  7)) /* determine len to avoid temp buffer copying to cut down on load-hit-store */
-    {
-        cnt++;
-    }
-
-    pnt -= cnt;
-    spnt = pnt;
-    cnt--;
-
-    for (i = 0; i < cnt; i++) /* now generate left to right as normal */
-    {
-        nxt = v >> 7;
-        *(spnt++) = ((unsigned char)v) | 0x80;
-        v = nxt;
-    }
-    *spnt = (unsigned char)v;
-
-    return (pnt);
-}
-
-static unsigned char *fstCopyVarint64ToRight(unsigned char *pnt, uint64_t v)
-{
-    uint64_t nxt;
-
-    while ((nxt = v >> 7)) {
-        *(pnt++) = ((unsigned char)v) | 0x80;
-        v = nxt;
-    }
-    *(pnt++) = (unsigned char)v;
-
-    return (pnt);
-}
-
-static uint64_t fstGetVarint64(unsigned char *mem, int *skiplen)
-{
-    unsigned char *mem_orig = mem;
-    uint64_t rc = 0;
-    while (*mem & 0x80) {
-        mem++;
-    }
-
-    *skiplen = mem - mem_orig + 1;
-    for (;;) {
-        rc <<= 7;
-        rc |= (uint64_t)(*mem & 0x7f);
-        if (mem == mem_orig) {
-            break;
-        }
-        mem--;
-    }
-
-    return (rc);
-}
-
-static uint32_t fstReaderVarint32(FILE *f)
-{
-    const int chk_len_max = 5; /* TALOS-2023-1783 */
-    int chk_len = chk_len_max;
-    unsigned char buf[chk_len_max];
-    unsigned char *mem = buf;
-    uint32_t rc = 0;
-    int ch;
-
-    do {
-        ch = fgetc(f);
-        *(mem++) = ch;
-    } while ((ch & 0x80) && (--chk_len));
-
-    if (ch & 0x80)
-        chk_report_abort("TALOS-2023-1783");
-    mem--;
-
-    for (;;) {
-        rc <<= 7;
-        rc |= (uint32_t)(*mem & 0x7f);
-        if (mem == buf) {
-            break;
-        }
-        mem--;
-    }
-
-    return (rc);
-}
-
-static uint32_t fstReaderVarint32WithSkip(FILE *f, uint32_t *skiplen)
-{
-    const int chk_len_max = 5; /* TALOS-2023-1783 */
-    int chk_len = chk_len_max;
-    unsigned char buf[chk_len_max];
-    unsigned char *mem = buf;
-    uint32_t rc = 0;
-    int ch;
-
-    do {
-        ch = fgetc(f);
-        *(mem++) = ch;
-    } while ((ch & 0x80) && (--chk_len));
-
-    if (ch & 0x80)
-        chk_report_abort("TALOS-2023-1783");
-    *skiplen = mem - buf;
-    mem--;
-
-    for (;;) {
-        rc <<= 7;
-        rc |= (uint32_t)(*mem & 0x7f);
-        if (mem == buf) {
-            break;
-        }
-        mem--;
-    }
-
-    return (rc);
-}
-
-static uint64_t fstReaderVarint64(FILE *f)
-{
-    const int chk_len_max = 16; /* TALOS-2023-1783 */
-    int chk_len = chk_len_max;
-    unsigned char buf[chk_len_max];
-    unsigned char *mem = buf;
-    uint64_t rc = 0;
-    int ch;
-
-    do {
-        ch = fgetc(f);
-        *(mem++) = ch;
-    } while ((ch & 0x80) && (--chk_len));
-
-    if (ch & 0x80)
-        chk_report_abort("TALOS-2023-1783");
-    mem--;
-
-    for (;;) {
-        rc <<= 7;
-        rc |= (uint64_t)(*mem & 0x7f);
-        if (mem == buf) {
-            break;
-        }
-        mem--;
-    }
-
-    return (rc);
-}
-
-static int fstWriterVarint(FILE *handle, uint64_t v)
-{
-    uint64_t nxt;
-    unsigned char buf[10]; /* ceil(64/7) = 10 */
-    unsigned char *pnt = buf;
-    int len;
-
-    while ((nxt = v >> 7)) {
-        *(pnt++) = ((unsigned char)v) | 0x80;
-        v = nxt;
-    }
-    *(pnt++) = (unsigned char)v;
-
-    len = pnt - buf;
-    fstFwrite(buf, len, 1, handle);
-    return (len);
-}
-
-/* signed integer read/write routines are currently unused */
-static int64_t fstGetSVarint64(unsigned char *mem, int *skiplen)
-{
-    unsigned char *mem_orig = mem;
-    int64_t rc = 0;
-    const int64_t one = 1;
-    const int siz = sizeof(int64_t) * 8;
-    int shift = 0;
-    unsigned char byt;
-
-    do {
-        byt = *(mem++);
-        rc |= ((int64_t)(byt & 0x7f)) << shift;
-        shift += 7;
-
-    } while (byt & 0x80);
-
-    if ((shift < siz) && (byt & 0x40)) {
-        rc |= -(one << shift); /* sign extend */
-    }
-
-    *skiplen = mem - mem_orig;
-
-    return (rc);
-}
-
-#ifndef FST_DYNAMIC_ALIAS2_DISABLE
-static int fstWriterSVarint(FILE *handle, int64_t v)
-{
-    unsigned char buf[15]; /* ceil(64/7) = 10 + sign byte padded way up */
-    unsigned char byt;
-    unsigned char *pnt = buf;
-    int more = 1;
-    int len;
-
-    do {
-        byt = v | 0x80;
-        v >>= 7;
-
-        if (((!v) && (!(byt & 0x40))) || ((v == -1) && (byt & 0x40))) {
-            more = 0;
-            byt &= 0x7f;
-        }
-
-        *(pnt++) = byt;
-    } while (more);
-
-    len = pnt - buf;
-    fstFwrite(buf, len, 1, handle);
-    return (len);
-}
-#endif
-
-/***********************/
-/***                 ***/
-/*** writer function ***/
-/***                 ***/
-/***********************/
-
-/*
- * private structs
- */
-struct fstBlackoutChain
-{
-    struct fstBlackoutChain *next;
-    uint64_t tim;
-    unsigned active : 1;
-};
-
-struct fstWriterContext
-{
-    FILE *handle;
-    FILE *hier_handle;
-    FILE *geom_handle;
-    FILE *valpos_handle;
-    FILE *curval_handle;
-    FILE *tchn_handle;
-
-    unsigned char *vchg_mem;
-
-    fst_off_t hier_file_len;
-
-    uint32_t *valpos_mem;
-    unsigned char *curval_mem;
-
-    unsigned char *outval_mem; /* for two-state / Verilator-style value changes */
-    uint32_t outval_alloc_siz;
-
-    char *filename;
-
-    fstHandle maxhandle;
-    fstHandle numsigs;
-    uint32_t maxvalpos;
-
-    unsigned vc_emitted : 1;
-    unsigned is_initial_time : 1;
-    unsigned fourpack : 1;
-    unsigned fastpack : 1;
-
-    int64_t timezero;
-    fst_off_t section_header_truncpos;
-    uint32_t tchn_cnt, tchn_idx;
-    uint64_t curtime;
-    uint64_t firsttime;
-    uint32_t vchg_siz;
-    uint32_t vchg_alloc_siz;
-
-    uint32_t secnum;
-    fst_off_t section_start;
-
-    uint32_t numscopes;
-    double nan; /* nan value for uninitialized doubles */
-
-    struct fstBlackoutChain *blackout_head;
-    struct fstBlackoutChain *blackout_curr;
-    uint32_t num_blackouts;
-
-    uint64_t dump_size_limit;
-
-    unsigned char filetype; /* default is 0, FST_FT_VERILOG */
-
-    unsigned compress_hier : 1;
-    unsigned repack_on_close : 1;
-    unsigned skip_writing_section_hdr : 1;
-    unsigned size_limit_locked : 1;
-    unsigned section_header_only : 1;
-    unsigned flush_context_pending : 1;
-    unsigned parallel_enabled : 1;
-    unsigned parallel_was_enabled : 1;
-
-    /* should really be semaphores, but are bytes to cut down on read-modify-write window size */
-    unsigned char already_in_flush; /* in case control-c handlers interrupt */
-    unsigned char already_in_close; /* in case control-c handlers interrupt */
-
-#ifdef FST_WRITER_PARALLEL
-    pthread_mutex_t mutex;
-    pthread_t thread;
-    pthread_attr_t thread_attr;
-    struct fstWriterContext *xc_parent;
-#endif
-    unsigned in_pthread : 1;
-
-    size_t fst_orig_break_size;
-    size_t fst_orig_break_add_size;
-
-    size_t fst_break_size;
-    size_t fst_break_add_size;
-
-    size_t fst_huge_break_size;
-
-    fstHandle next_huge_break;
-
-    Pvoid_t path_array;
-    uint32_t path_array_count;
-
-    unsigned fseek_failed : 1;
-
-    char *geom_handle_nam;
-    char *valpos_handle_nam;
-    char *curval_handle_nam;
-    char *tchn_handle_nam;
-
-    fstEnumHandle max_enumhandle;
-};
-
-static int fstWriterFseeko(struct fstWriterContext *xc, FILE *stream, fst_off_t offset, int whence)
-{
-    int rc = fseeko(stream, offset, whence);
-
-    if (rc < 0) {
-        xc->fseek_failed = 1;
-#ifdef FST_DEBUG
-        fprintf(stderr, FST_APIMESS "Seek to #%" PRId64 " (whence = %d) failed!\n", offset, whence);
-        perror("Why");
-#endif
-    }
-
-    return (rc);
-}
-
-static uint32_t fstWriterUint32WithVarint32(struct fstWriterContext *xc,
-                                            uint32_t *u,
-                                            uint32_t v,
-                                            const void *dbuf,
-                                            uint32_t siz)
-{
-    unsigned char *buf = xc->vchg_mem + xc->vchg_siz;
-    unsigned char *pnt = buf;
-    uint32_t nxt;
-    uint32_t len;
-
-    memcpy(pnt, u, sizeof(uint32_t));
-    pnt += 4;
-
-    while ((nxt = v >> 7)) {
-        *(pnt++) = ((unsigned char)v) | 0x80;
-        v = nxt;
-    }
-    *(pnt++) = (unsigned char)v;
-    memcpy(pnt, dbuf, siz);
-
-    len = pnt - buf + siz;
-    return (len);
-}
-
-static uint32_t fstWriterUint32WithVarint32AndLength(struct fstWriterContext *xc,
-                                                     uint32_t *u,
-                                                     uint32_t v,
-                                                     const void *dbuf,
-                                                     uint32_t siz)
-{
-    unsigned char *buf = xc->vchg_mem + xc->vchg_siz;
-    unsigned char *pnt = buf;
-    uint32_t nxt;
-    uint32_t len;
-
-    memcpy(pnt, u, sizeof(uint32_t));
-    pnt += 4;
-
-    while ((nxt = v >> 7)) {
-        *(pnt++) = ((unsigned char)v) | 0x80;
-        v = nxt;
-    }
-    *(pnt++) = (unsigned char)v;
-
-    v = siz;
-    while ((nxt = v >> 7)) {
-        *(pnt++) = ((unsigned char)v) | 0x80;
-        v = nxt;
-    }
-    *(pnt++) = (unsigned char)v;
-
-    memcpy(pnt, dbuf, siz);
-
-    len = pnt - buf + siz;
-    return (len);
-}
-
-/*
- * header bytes, write here so defines are set up before anything else
- * that needs to use them
- */
-static void fstWriterEmitHdrBytes(struct fstWriterContext *xc)
-{
-    char vbuf[FST_HDR_SIM_VERSION_SIZE];
-    char dbuf[FST_HDR_DATE_SIZE];
-    double endtest = FST_DOUBLE_ENDTEST;
-    time_t walltime;
-
-#define FST_HDR_OFFS_TAG (0)
-    fputc(FST_BL_HDR, xc->handle); /* +0 tag */
-
-#define FST_HDR_OFFS_SECLEN (FST_HDR_OFFS_TAG + 1)
-    fstWriterUint64(xc->handle, 329); /* +1 section length */
-
-#define FST_HDR_OFFS_START_TIME (FST_HDR_OFFS_SECLEN + 8)
-    fstWriterUint64(xc->handle, 0); /* +9 start time */
-
-#define FST_HDR_OFFS_END_TIME (FST_HDR_OFFS_START_TIME + 8)
-    fstWriterUint64(xc->handle, 0); /* +17 end time */
-
-#define FST_HDR_OFFS_ENDIAN_TEST (FST_HDR_OFFS_END_TIME + 8)
-    fstFwrite(&endtest, 8, 1, xc->handle); /* +25 endian test for reals */
-
-#define FST_HDR_OFFS_MEM_USED (FST_HDR_OFFS_ENDIAN_TEST + 8)
-    fstWriterUint64(xc->handle, xc->fst_break_size); /* +33 memory used by writer */
-
-#define FST_HDR_OFFS_NUM_SCOPES (FST_HDR_OFFS_MEM_USED + 8)
-    fstWriterUint64(xc->handle, 0); /* +41 scope creation count */
-
-#define FST_HDR_OFFS_NUM_VARS (FST_HDR_OFFS_NUM_SCOPES + 8)
-    fstWriterUint64(xc->handle, 0); /* +49 var creation count */
-
-#define FST_HDR_OFFS_MAXHANDLE (FST_HDR_OFFS_NUM_VARS + 8)
-    fstWriterUint64(xc->handle, 0); /* +57 max var idcode */
-
-#define FST_HDR_OFFS_SECTION_CNT (FST_HDR_OFFS_MAXHANDLE + 8)
-    fstWriterUint64(xc->handle, 0); /* +65 vc section count */
-
-#define FST_HDR_OFFS_TIMESCALE (FST_HDR_OFFS_SECTION_CNT + 8)
-    fputc((-9) & 255, xc->handle); /* +73 timescale 1ns */
-
-#define FST_HDR_OFFS_SIM_VERSION (FST_HDR_OFFS_TIMESCALE + 1)
-    memset(vbuf, 0, FST_HDR_SIM_VERSION_SIZE);
-    strcpy(vbuf, FST_WRITER_STR);
-    fstFwrite(vbuf, FST_HDR_SIM_VERSION_SIZE, 1, xc->handle); /* +74 version */
-
-#define FST_HDR_OFFS_DATE (FST_HDR_OFFS_SIM_VERSION + FST_HDR_SIM_VERSION_SIZE)
-    memset(dbuf, 0, FST_HDR_DATE_SIZE);
-    time(&walltime);
-    strcpy(dbuf, asctime(localtime(&walltime)));
-    fstFwrite(dbuf, FST_HDR_DATE_SIZE, 1, xc->handle); /* +202 date */
-
-    /* date size is deliberately overspecified at 119 bytes (originally 128) in order to provide
-     * backfill for new args */
-
-#define FST_HDR_OFFS_FILETYPE (FST_HDR_OFFS_DATE + FST_HDR_DATE_SIZE)
-    fputc(xc->filetype, xc->handle); /* +321 filetype */
-
-#define FST_HDR_OFFS_TIMEZERO (FST_HDR_OFFS_FILETYPE + FST_HDR_FILETYPE_SIZE)
-    fstWriterUint64(xc->handle, xc->timezero); /* +322 timezero */
-
-#define FST_HDR_LENGTH (FST_HDR_OFFS_TIMEZERO + FST_HDR_TIMEZERO_SIZE)
-    /* +330 next section starts here */
-    fflush(xc->handle);
-}
-
-/*
- * mmap functions
- */
-static void fstWriterMmapSanity(void *pnt, const char *file, int line, const char *usage)
-{
-    if (pnt == NULL
-#ifdef MAP_FAILED
-        || pnt == MAP_FAILED
-#endif
-    ) {
-        fprintf(stderr,
-                "fstMmap() assigned to %s failed: errno: %d, file %s, line %d.\n",
-                usage,
-                errno,
-                file,
-                line);
-#if !defined(__MINGW32__)
-        perror("Why");
-#else
-        LPSTR mbuf = NULL;
-        FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
-                          FORMAT_MESSAGE_IGNORE_INSERTS,
-                      NULL,
-                      GetLastError(),
-                      MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
-                      (LPSTR)&mbuf,
-                      0,
-                      NULL);
-        fprintf(stderr, "%s", mbuf);
-        LocalFree(mbuf);
-#endif
-        pnt = NULL;
-    }
-}
-
-static void fstWriterCreateMmaps(struct fstWriterContext *xc)
-{
-    fst_off_t curpos = ftello(xc->handle);
-
-    fflush(xc->hier_handle);
-
-    /* write out intermediate header */
-    fstWriterFseeko(xc, xc->handle, FST_HDR_OFFS_START_TIME, SEEK_SET);
-    fstWriterUint64(xc->handle, xc->firsttime);
-    fstWriterUint64(xc->handle, xc->curtime);
-    fstWriterFseeko(xc, xc->handle, FST_HDR_OFFS_NUM_SCOPES, SEEK_SET);
-    fstWriterUint64(xc->handle, xc->numscopes);
-    fstWriterUint64(xc->handle, xc->numsigs);
-    fstWriterUint64(xc->handle, xc->maxhandle);
-    fstWriterUint64(xc->handle, xc->secnum);
-    fstWriterFseeko(xc, xc->handle, curpos, SEEK_SET);
-    fflush(xc->handle);
-
-    /* do mappings */
-    if (!xc->valpos_mem) {
-        fflush(xc->valpos_handle);
-        errno = 0;
-        if (xc->maxhandle) {
-            fstWriterMmapSanity(xc->valpos_mem =
-                                    (uint32_t *)fstMmap(NULL,
-                                                        xc->maxhandle * 4 * sizeof(uint32_t),
-                                                        PROT_READ | PROT_WRITE,
-                                                        MAP_SHARED,
-                                                        fileno(xc->valpos_handle),
-                                                        0),
-                                __FILE__,
-                                __LINE__,
-                                "xc->valpos_mem");
-        }
-    }
-    if (!xc->curval_mem) {
-        fflush(xc->curval_handle);
-        errno = 0;
-        if (xc->maxvalpos) {
-            fstWriterMmapSanity(xc->curval_mem = (unsigned char *)fstMmap(NULL,
-                                                                          xc->maxvalpos,
-                                                                          PROT_READ | PROT_WRITE,
-                                                                          MAP_SHARED,
-                                                                          fileno(xc->curval_handle),
-                                                                          0),
-                                __FILE__,
-                                __LINE__,
-                                "xc->curval_handle");
-        }
-    }
-}
-
-static void fstDestroyMmaps(struct fstWriterContext *xc, int is_closing)
-{
-    (void)is_closing;
-
-    fstMunmap(xc->valpos_mem, xc->maxhandle * 4 * sizeof(uint32_t));
-    xc->valpos_mem = NULL;
-
-    fstMunmap(xc->curval_mem, xc->maxvalpos);
-    xc->curval_mem = NULL;
-}
-
-/*
- * set up large and small memory usages
- * crossover point in model is FST_ACTIVATE_HUGE_BREAK number of signals
- */
-static void fstDetermineBreakSize(struct fstWriterContext *xc)
-{
-#if defined(__linux__) || defined(FST_MACOSX)
-    int was_set = 0;
-
-#ifdef __linux__
-    FILE *f = fopen("/proc/meminfo", "rb");
-
-    if (f) {
-        char buf[257];
-        char *s;
-        while (!feof(f)) {
-            buf[0] = 0;
-            s = fgets(buf, 256, f);
-            if (s && *s) {
-                if (!strncmp(s, "MemTotal:", 9)) {
-                    size_t v = atol(s + 10);
-                    v *= 1024; /* convert to bytes */
-                    v /= 8; /* chop down to 1/8 physical memory */
-                    if (v > FST_BREAK_SIZE) {
-                        if (v > FST_BREAK_SIZE_MAX) {
-                            v = FST_BREAK_SIZE_MAX;
-                        }
-
-                        xc->fst_huge_break_size = v;
-                        was_set = 1;
-                        break;
-                    }
-                }
-            }
-        }
-
-        fclose(f);
-    }
-
-    if (!was_set) {
-        xc->fst_huge_break_size = FST_BREAK_SIZE;
-    }
-#else
-    int mib[2];
-    int64_t v;
-    size_t length;
-
-    mib[0] = CTL_HW;
-    mib[1] = HW_MEMSIZE;
-    length = sizeof(int64_t);
-    if (!sysctl(mib, 2, &v, &length, NULL, 0)) {
-        v /= 8;
-
-        if (v > (int64_t)FST_BREAK_SIZE) {
-            if (v > (int64_t)FST_BREAK_SIZE_MAX) {
-                v = FST_BREAK_SIZE_MAX;
-            }
-
-            xc->fst_huge_break_size = v;
-            was_set = 1;
-        }
-    }
-
-    if (!was_set) {
-        xc->fst_huge_break_size = FST_BREAK_SIZE;
-    }
-#endif
-#else
-    xc->fst_huge_break_size = FST_BREAK_SIZE;
-#endif
-
-    xc->fst_break_size = xc->fst_orig_break_size = FST_BREAK_SIZE;
-    xc->fst_break_add_size = xc->fst_orig_break_add_size = FST_BREAK_ADD_SIZE;
-    xc->next_huge_break = FST_ACTIVATE_HUGE_BREAK;
-}
-
-/*
- * file creation and close
- */
-fstWriterContext *fstWriterCreate(const char *nam, int use_compressed_hier)
-{
-    fstWriterContext *xc = (fstWriterContext *)calloc(1, sizeof(fstWriterContext));
-
-    xc->compress_hier = use_compressed_hier;
-    fstDetermineBreakSize(xc);
-
-    if ((!nam) || (!(xc->handle = unlink_fopen(nam, "w+b")))) {
-        free(xc);
-        xc = NULL;
-    } else {
-        int flen = strlen(nam);
-        char *hf = (char *)calloc(1, flen + 6);
-
-        memcpy(hf, nam, flen);
-        strcpy(hf + flen, ".hier");
-        xc->hier_handle = unlink_fopen(hf, "w+b");
-
-        xc->geom_handle = tmpfile_open(&xc->geom_handle_nam); /* .geom */
-        xc->valpos_handle = tmpfile_open(&xc->valpos_handle_nam); /* .offs */
-        xc->curval_handle = tmpfile_open(&xc->curval_handle_nam); /* .bits */
-        xc->tchn_handle = tmpfile_open(&xc->tchn_handle_nam); /* .tchn */
-        xc->vchg_alloc_siz = xc->fst_break_size + xc->fst_break_add_size;
-        xc->vchg_mem = (unsigned char *)malloc(xc->vchg_alloc_siz);
-
-        if (xc->hier_handle && xc->geom_handle && xc->valpos_handle && xc->curval_handle &&
-            xc->vchg_mem && xc->tchn_handle) {
-            xc->filename = strdup(nam);
-            xc->is_initial_time = 1;
-
-            fstWriterEmitHdrBytes(xc);
-            xc->nan = strtod("NaN", NULL);
-#ifdef FST_WRITER_PARALLEL
-            pthread_mutex_init(&xc->mutex, NULL);
-            pthread_attr_init(&xc->thread_attr);
-            pthread_attr_setdetachstate(&xc->thread_attr, PTHREAD_CREATE_DETACHED);
-#endif
-        } else {
-            fclose(xc->handle);
-            if (xc->hier_handle) {
-                fclose(xc->hier_handle);
-                unlink(hf);
-            }
-            tmpfile_close(&xc->geom_handle, &xc->geom_handle_nam);
-            tmpfile_close(&xc->valpos_handle, &xc->valpos_handle_nam);
-            tmpfile_close(&xc->curval_handle, &xc->curval_handle_nam);
-            tmpfile_close(&xc->tchn_handle, &xc->tchn_handle_nam);
-            free(xc->vchg_mem);
-            free(xc);
-            xc = NULL;
-        }
-
-        free(hf);
-    }
-
-    return (xc);
-}
-
-/*
- * generation and writing out of value change data sections
- */
-static void fstWriterEmitSectionHeader(fstWriterContext *xc)
-{
-    if (xc) {
-        unsigned long destlen;
-        unsigned char *dmem;
-        int rc;
-
-        destlen = xc->maxvalpos;
-        dmem = (unsigned char *)malloc(compressBound(destlen));
-        rc = compress2(dmem,
-                       &destlen,
-                       xc->curval_mem,
-                       xc->maxvalpos,
-                       4); /* was 9...which caused performance drag on traces with many signals */
-
-        fputc(FST_BL_SKIP,
-              xc->handle); /* temporarily tag the section, use FST_BL_VCDATA on finalize */
-        xc->section_start = ftello(xc->handle);
-#ifdef FST_WRITER_PARALLEL
-        if (xc->xc_parent)
-            xc->xc_parent->section_start = xc->section_start;
-#endif
-        xc->section_header_only = 1; /* indicates truncate might be needed */
-        fstWriterUint64(xc->handle, 0); /* placeholder = section length */
-        fstWriterUint64(xc->handle,
-                        xc->is_initial_time ? xc->firsttime
-                                            : xc->curtime); /* begin time of section */
-        fstWriterUint64(xc->handle, xc->curtime); /* end time of section (placeholder) */
-        fstWriterUint64(
-            xc->handle,
-            0); /* placeholder = amount of buffer memory required in reader for full vc traversal */
-        fstWriterVarint(xc->handle, xc->maxvalpos); /* maxvalpos = length of uncompressed data */
-
-        if ((rc == Z_OK) && (destlen < xc->maxvalpos)) {
-            fstWriterVarint(xc->handle, destlen); /* length of compressed data */
-        } else {
-            fstWriterVarint(xc->handle,
-                            xc->maxvalpos); /* length of (unable to be) compressed data */
-        }
-        fstWriterVarint(xc->handle, xc->maxhandle); /* max handle associated with this data (in case
-                                                       of dynamic facility adds) */
-
-        if ((rc == Z_OK) && (destlen < xc->maxvalpos)) {
-            fstFwrite(dmem, destlen, 1, xc->handle);
-        } else /* comparison between compressed / decompressed len tells if compressed */
-        {
-            fstFwrite(xc->curval_mem, xc->maxvalpos, 1, xc->handle);
-        }
-
-        free(dmem);
-    }
-}
-
-/*
- * only to be called directly by fst code...otherwise must
- * be synced up with time changes
- */
-#ifdef FST_WRITER_PARALLEL
-static void fstWriterFlushContextPrivate2(fstWriterContext *xc)
-#else
-static void fstWriterFlushContextPrivate(fstWriterContext *xc)
-#endif
-{
-#ifdef FST_DEBUG
-    int cnt = 0;
-#endif
-    unsigned int i;
-    unsigned char *vchg_mem;
-    FILE *f;
-    fst_off_t fpos, indxpos, endpos;
-    uint32_t prevpos;
-    int zerocnt;
-    unsigned char *scratchpad;
-    unsigned char *scratchpnt;
-    unsigned char *tmem;
-    fst_off_t tlen;
-    fst_off_t unc_memreq = 0; /* for reader */
-    unsigned char *packmem;
-    unsigned int packmemlen;
-    uint32_t *vm4ip;
-#ifdef FST_WRITER_PARALLEL
-    struct fstWriterContext *xc2 = xc->xc_parent;
-#else
-    struct fstWriterContext *xc2 = xc;
-#endif
-
-#ifndef FST_DYNAMIC_ALIAS_DISABLE
-    Pvoid_t PJHSArray = (Pvoid_t)NULL;
-    uint32_t hashmask = xc->maxhandle;
-    hashmask |= hashmask >> 1;
-    hashmask |= hashmask >> 2;
-    hashmask |= hashmask >> 4;
-    hashmask |= hashmask >> 8;
-    hashmask |= hashmask >> 16;
-#endif
-
-    if ((xc->vchg_siz <= 1) || (xc->already_in_flush))
-        return;
-    xc->already_in_flush = 1; /* should really do this with a semaphore */
-
-    xc->section_header_only = 0;
-    scratchpad = (unsigned char *)malloc(xc->vchg_siz);
-
-    vchg_mem = xc->vchg_mem;
-
-    f = xc->handle;
-    fstWriterVarint(f, xc->maxhandle); /* emit current number of handles */
-    fputc(xc->fourpack ? '4' : (xc->fastpack ? 'F' : 'Z'), f);
-    fpos = 1;
-
-    packmemlen = 1024; /* maintain a running "longest" allocation to */
-    packmem =
-        (unsigned char *)malloc(packmemlen); /* prevent continual malloc...free every loop iter */
-
-    for (i = 0; i < xc->maxhandle; i++) {
-        vm4ip = &(xc->valpos_mem[4 * i]);
-
-        if (vm4ip[2]) {
-            uint32_t offs = vm4ip[2];
-            uint32_t next_offs;
-            unsigned int wrlen;
-
-            vm4ip[2] = fpos;
-
-            scratchpnt = scratchpad + xc->vchg_siz; /* build this buffer backwards */
-            if (vm4ip[1] <= 1) {
-                if (vm4ip[1] == 1) {
-                    wrlen = fstGetVarint32Length(vchg_mem + offs +
-                                                 4); /* used to advance and determine wrlen */
-#ifndef FST_REMOVE_DUPLICATE_VC
-                    xc->curval_mem[vm4ip[0]] = vchg_mem[offs + 4 + wrlen]; /* checkpoint variable */
-#endif
-                    while (offs) {
-                        unsigned char val;
-                        uint32_t time_delta, rcv;
-                        next_offs = fstGetUint32(vchg_mem + offs);
-                        offs += 4;
-
-                        time_delta = fstGetVarint32(vchg_mem + offs, (int *)&wrlen);
-                        val = vchg_mem[offs + wrlen];
-                        offs = next_offs;
-
-                        switch (val) {
-                            case '0':
-                            case '1':
-                                rcv = ((val & 1) << 1) | (time_delta << 2);
-                                break; /* pack more delta bits in for 0/1 vchs */
-
-                            case 'x':
-                            case 'X':
-                                rcv = FST_RCV_X | (time_delta << 4);
-                                break;
-                            case 'z':
-                            case 'Z':
-                                rcv = FST_RCV_Z | (time_delta << 4);
-                                break;
-                            case 'h':
-                            case 'H':
-                                rcv = FST_RCV_H | (time_delta << 4);
-                                break;
-                            case 'u':
-                            case 'U':
-                                rcv = FST_RCV_U | (time_delta << 4);
-                                break;
-                            case 'w':
-                            case 'W':
-                                rcv = FST_RCV_W | (time_delta << 4);
-                                break;
-                            case 'l':
-                            case 'L':
-                                rcv = FST_RCV_L | (time_delta << 4);
-                                break;
-                            default:
-                                rcv = FST_RCV_D | (time_delta << 4);
-                                break;
-                        }
-
-                        scratchpnt = fstCopyVarint32ToLeft(scratchpnt, rcv);
-                    }
-                } else {
-                    /* variable length */
-                    /* fstGetUint32 (next_offs) + fstGetVarint32 (time_delta) + fstGetVarint32 (len)
-                     * + payload */
-                    unsigned char *pnt;
-                    uint32_t record_len;
-                    uint32_t time_delta;
-
-                    while (offs) {
-                        next_offs = fstGetUint32(vchg_mem + offs);
-                        offs += 4;
-                        pnt = vchg_mem + offs;
-                        offs = next_offs;
-                        time_delta = fstGetVarint32(pnt, (int *)&wrlen);
-                        pnt += wrlen;
-                        record_len = fstGetVarint32(pnt, (int *)&wrlen);
-                        pnt += wrlen;
-
-                        scratchpnt -= record_len;
-                        memcpy(scratchpnt, pnt, record_len);
-
-                        scratchpnt = fstCopyVarint32ToLeft(scratchpnt, record_len);
-                        scratchpnt = fstCopyVarint32ToLeft(
-                            scratchpnt,
-                            (time_delta << 1)); /* reserve | 1 case for future expansion */
-                    }
-                }
-            } else {
-                wrlen = fstGetVarint32Length(vchg_mem + offs +
-                                             4); /* used to advance and determine wrlen */
-#ifndef FST_REMOVE_DUPLICATE_VC
-                memcpy(xc->curval_mem + vm4ip[0],
-                       vchg_mem + offs + 4 + wrlen,
-                       vm4ip[1]); /* checkpoint variable */
-#endif
-                while (offs) {
-                    unsigned int idx;
-                    char is_binary = 1;
-                    unsigned char *pnt;
-                    uint32_t time_delta;
-
-                    next_offs = fstGetUint32(vchg_mem + offs);
-                    offs += 4;
-
-                    time_delta = fstGetVarint32(vchg_mem + offs, (int *)&wrlen);
-
-                    pnt = vchg_mem + offs + wrlen;
-                    offs = next_offs;
-
-                    for (idx = 0; idx < vm4ip[1]; idx++) {
-                        if ((pnt[idx] == '0') || (pnt[idx] == '1')) {
-                            continue;
-                        } else {
-                            is_binary = 0;
-                            break;
-                        }
-                    }
-
-                    if (is_binary) {
-                        unsigned char acc = 0;
-                        /* new algorithm */
-                        idx = ((vm4ip[1] + 7) & ~7);
-                        switch (vm4ip[1] & 7) {
-                            case 0:
-                                do {
-                                    acc = (pnt[idx + 7 - 8] & 1) << 0; /* fallthrough */
-                                    case 7:
-                                        acc |= (pnt[idx + 6 - 8] & 1) << 1; /* fallthrough */
-                                    case 6:
-                                        acc |= (pnt[idx + 5 - 8] & 1) << 2; /* fallthrough */
-                                    case 5:
-                                        acc |= (pnt[idx + 4 - 8] & 1) << 3; /* fallthrough */
-                                    case 4:
-                                        acc |= (pnt[idx + 3 - 8] & 1) << 4; /* fallthrough */
-                                    case 3:
-                                        acc |= (pnt[idx + 2 - 8] & 1) << 5; /* fallthrough */
-                                    case 2:
-                                        acc |= (pnt[idx + 1 - 8] & 1) << 6; /* fallthrough */
-                                    case 1:
-                                        acc |= (pnt[idx + 0 - 8] & 1) << 7;
-                                        *(--scratchpnt) = acc;
-                                        idx -= 8;
-                                } while (idx);
-                        }
-
-                        scratchpnt = fstCopyVarint32ToLeft(scratchpnt, (time_delta << 1));
-                    } else {
-                        scratchpnt -= vm4ip[1];
-                        memcpy(scratchpnt, pnt, vm4ip[1]);
-
-                        scratchpnt = fstCopyVarint32ToLeft(scratchpnt, (time_delta << 1) | 1);
-                    }
-                }
-            }
-
-            wrlen = scratchpad + xc->vchg_siz - scratchpnt;
-            unc_memreq += wrlen;
-            if (wrlen > 32) {
-                unsigned long destlen = wrlen;
-                unsigned char *dmem;
-                unsigned int rc;
-
-                if (!xc->fastpack) {
-                    if (wrlen <= packmemlen) {
-                        dmem = packmem;
-                    } else {
-                        free(packmem);
-                        dmem = packmem = (unsigned char *)malloc(compressBound(packmemlen = wrlen));
-                    }
-
-                    rc = compress2(dmem, &destlen, scratchpnt, wrlen, 4);
-                    if (rc == Z_OK) {
-#ifndef FST_DYNAMIC_ALIAS_DISABLE
-                        PPvoid_t pv = JenkinsIns(&PJHSArray, dmem, destlen, hashmask);
-                        if (*pv) {
-                            uint32_t pvi = (intptr_t)(*pv);
-                            vm4ip[2] = -pvi;
-                        } else {
-                            *pv = (void *)(intptr_t)(i + 1);
-#endif
-                            fpos += fstWriterVarint(f, wrlen);
-                            fpos += destlen;
-                            fstFwrite(dmem, destlen, 1, f);
-#ifndef FST_DYNAMIC_ALIAS_DISABLE
-                        }
-#endif
-                    } else {
-#ifndef FST_DYNAMIC_ALIAS_DISABLE
-                        PPvoid_t pv = JenkinsIns(&PJHSArray, scratchpnt, wrlen, hashmask);
-                        if (*pv) {
-                            uint32_t pvi = (intptr_t)(*pv);
-                            vm4ip[2] = -pvi;
-                        } else {
-                            *pv = (void *)(intptr_t)(i + 1);
-#endif
-                            fpos += fstWriterVarint(f, 0);
-                            fpos += wrlen;
-                            fstFwrite(scratchpnt, wrlen, 1, f);
-#ifndef FST_DYNAMIC_ALIAS_DISABLE
-                        }
-#endif
-                    }
-                } else {
-                    /* this is extremely conservative: fastlz needs +5% for worst case, lz4 needs
-                     * siz+(siz/255)+16 */
-                    if (((wrlen * 2) + 2) <= packmemlen) {
-                        dmem = packmem;
-                    } else {
-                        free(packmem);
-                        dmem = packmem = (unsigned char *)malloc(packmemlen = (wrlen * 2) + 2);
-                    }
-
-                    rc = (xc->fourpack) ? LZ4_compress_default((char *)scratchpnt,
-                                                               (char *)dmem,
-                                                               wrlen,
-                                                               packmemlen)
-                                        : fastlz_compress(scratchpnt, wrlen, dmem);
-                    if (rc < destlen) {
-#ifndef FST_DYNAMIC_ALIAS_DISABLE
-                        PPvoid_t pv = JenkinsIns(&PJHSArray, dmem, rc, hashmask);
-                        if (*pv) {
-                            uint32_t pvi = (intptr_t)(*pv);
-                            vm4ip[2] = -pvi;
-                        } else {
-                            *pv = (void *)(intptr_t)(i + 1);
-#endif
-                            fpos += fstWriterVarint(f, wrlen);
-                            fpos += rc;
-                            fstFwrite(dmem, rc, 1, f);
-#ifndef FST_DYNAMIC_ALIAS_DISABLE
-                        }
-#endif
-                    } else {
-#ifndef FST_DYNAMIC_ALIAS_DISABLE
-                        PPvoid_t pv = JenkinsIns(&PJHSArray, scratchpnt, wrlen, hashmask);
-                        if (*pv) {
-                            uint32_t pvi = (intptr_t)(*pv);
-                            vm4ip[2] = -pvi;
-                        } else {
-                            *pv = (void *)(intptr_t)(i + 1);
-#endif
-                            fpos += fstWriterVarint(f, 0);
-                            fpos += wrlen;
-                            fstFwrite(scratchpnt, wrlen, 1, f);
-#ifndef FST_DYNAMIC_ALIAS_DISABLE
-                        }
-#endif
-                    }
-                }
-            } else {
-#ifndef FST_DYNAMIC_ALIAS_DISABLE
-                PPvoid_t pv = JenkinsIns(&PJHSArray, scratchpnt, wrlen, hashmask);
-                if (*pv) {
-                    uint32_t pvi = (intptr_t)(*pv);
-                    vm4ip[2] = -pvi;
-                } else {
-                    *pv = (void *)(intptr_t)(i + 1);
-#endif
-                    fpos += fstWriterVarint(f, 0);
-                    fpos += wrlen;
-                    fstFwrite(scratchpnt, wrlen, 1, f);
-#ifndef FST_DYNAMIC_ALIAS_DISABLE
-                }
-#endif
-            }
-
-            /* vm4ip[3] = 0; ...redundant with clearing below */
-#ifdef FST_DEBUG
-            cnt++;
-#endif
-        }
-    }
-
-#ifndef FST_DYNAMIC_ALIAS_DISABLE
-    JenkinsFree(&PJHSArray, hashmask);
-#endif
-
-    free(packmem);
-    packmem = NULL; /* packmemlen = 0; */ /* scan-build */
-
-    prevpos = 0;
-    zerocnt = 0;
-    free(scratchpad);
-    scratchpad = NULL;
-
-    indxpos = ftello(f);
-    xc->secnum++;
-
-#ifndef FST_DYNAMIC_ALIAS2_DISABLE
-    if (1) {
-        uint32_t prev_alias = 0;
-
-        for (i = 0; i < xc->maxhandle; i++) {
-            vm4ip = &(xc->valpos_mem[4 * i]);
-
-            if (vm4ip[2]) {
-                if (zerocnt) {
-                    fpos += fstWriterVarint(f, (zerocnt << 1));
-                    zerocnt = 0;
-                }
-
-                if (vm4ip[2] & 0x80000000) {
-                    if (vm4ip[2] != prev_alias) {
-                        int32_t t_i32 =
-                            ((int32_t)(prev_alias = vm4ip[2])); /* vm4ip is generic unsigned data */
-                        int64_t t_i64 = (int64_t)t_i32; /* convert to signed */
-                        uint64_t t_u64 = (uint64_t)t_i64; /* sign extend through 64b */
-
-                        fpos += fstWriterSVarint(
-                            f,
-                            (int64_t)((t_u64 << 1) |
-                                      1)); /* all in this block was: fpos += fstWriterSVarint(f,
-                                              (((int64_t)((int32_t)(prev_alias = vm4ip[2]))) << 1) |
-                                              1); */
-                    } else {
-                        fpos += fstWriterSVarint(f, (0 << 1) | 1);
-                    }
-                } else {
-                    fpos += fstWriterSVarint(f, ((vm4ip[2] - prevpos) << 1) | 1);
-                    prevpos = vm4ip[2];
-                }
-                vm4ip[2] = 0;
-                vm4ip[3] = 0; /* clear out tchn idx */
-            } else {
-                zerocnt++;
-            }
-        }
-    } else
-#endif
-    {
-        for (i = 0; i < xc->maxhandle; i++) {
-            vm4ip = &(xc->valpos_mem[4 * i]);
-
-            if (vm4ip[2]) {
-                if (zerocnt) {
-                    fpos += fstWriterVarint(f, (zerocnt << 1));
-                    zerocnt = 0;
-                }
-
-                if (vm4ip[2] & 0x80000000) {
-                    fpos +=
-                        fstWriterVarint(f, 0); /* signal, note that using a *signed* varint would be
-                                                  more efficient than this byte escape! */
-                    fpos += fstWriterVarint(f, (-(int32_t)vm4ip[2]));
-                } else {
-                    fpos += fstWriterVarint(f, ((vm4ip[2] - prevpos) << 1) | 1);
-                    prevpos = vm4ip[2];
-                }
-                vm4ip[2] = 0;
-                vm4ip[3] = 0; /* clear out tchn idx */
-            } else {
-                zerocnt++;
-            }
-        }
-    }
-
-    if (zerocnt) {
-        /* fpos += */ fstWriterVarint(f, (zerocnt << 1)); /* scan-build */
-    }
-#ifdef FST_DEBUG
-    fprintf(stderr, FST_APIMESS "value chains: %d\n", cnt);
-#endif
-
-    xc->vchg_mem[0] = '!';
-    xc->vchg_siz = 1;
-
-    endpos = ftello(xc->handle);
-    fstWriterUint64(xc->handle,
-                    endpos - indxpos); /* write delta index position at very end of block */
-
-    /*emit time changes for block */
-    fflush(xc->tchn_handle);
-    tlen = ftello(xc->tchn_handle);
-    fstWriterFseeko(xc, xc->tchn_handle, 0, SEEK_SET);
-
-    errno = 0;
-    fstWriterMmapSanity(
-        tmem = (unsigned char *)
-            fstMmap(NULL, tlen, PROT_READ | PROT_WRITE, MAP_SHARED, fileno(xc->tchn_handle), 0),
-        __FILE__,
-        __LINE__,
-        "tmem");
-    if (tmem) {
-        unsigned long destlen = tlen;
-        unsigned char *dmem = (unsigned char *)malloc(compressBound(destlen));
-        int rc = compress2(dmem, &destlen, tmem, tlen, 9);
-
-        if ((rc == Z_OK) && (((fst_off_t)destlen) < tlen)) {
-            fstFwrite(dmem, destlen, 1, xc->handle);
-        } else /* comparison between compressed / decompressed len tells if compressed */
-        {
-            fstFwrite(tmem, tlen, 1, xc->handle);
-            destlen = tlen;
-        }
-        free(dmem);
-        fstMunmap(tmem, tlen);
-        fstWriterUint64(xc->handle, tlen); /* uncompressed */
-        fstWriterUint64(xc->handle, destlen); /* compressed */
-        fstWriterUint64(xc->handle, xc->tchn_cnt); /* number of time items */
-    }
-
-    xc->tchn_cnt = xc->tchn_idx = 0;
-    fstWriterFseeko(xc, xc->tchn_handle, 0, SEEK_SET);
-    fstFtruncate(fileno(xc->tchn_handle), 0);
-
-    /* write block trailer */
-    endpos = ftello(xc->handle);
-    fstWriterFseeko(xc, xc->handle, xc->section_start, SEEK_SET);
-    fstWriterUint64(xc->handle, endpos - xc->section_start); /* write block length */
-    fstWriterFseeko(xc, xc->handle, 8, SEEK_CUR); /* skip begin time */
-    fstWriterUint64(xc->handle, xc->curtime); /* write end time for section */
-    fstWriterUint64(xc->handle,
-                    unc_memreq); /* amount of buffer memory required in reader for full traversal */
-    fflush(xc->handle);
-
-    fstWriterFseeko(xc,
-                    xc->handle,
-                    xc->section_start - 1,
-                    SEEK_SET); /* write out FST_BL_VCDATA over FST_BL_SKIP */
-
-#ifndef FST_DYNAMIC_ALIAS_DISABLE
-#ifndef FST_DYNAMIC_ALIAS2_DISABLE
-    fputc(FST_BL_VCDATA_DYN_ALIAS2, xc->handle);
-#else
-    fputc(FST_BL_VCDATA_DYN_ALIAS, xc->handle);
-#endif
-#else
-    fputc(FST_BL_VCDATA, xc->handle);
-#endif
-
-    fflush(xc->handle);
-
-    fstWriterFseeko(xc, xc->handle, endpos, SEEK_SET); /* seek to end of file */
-
-    xc2->section_header_truncpos = endpos; /* cache in case of need to truncate */
-    if (xc->dump_size_limit) {
-        if (endpos >= ((fst_off_t)xc->dump_size_limit)) {
-            xc2->skip_writing_section_hdr = 1;
-            xc2->size_limit_locked = 1;
-            xc2->is_initial_time = 1; /* to trick emit value and emit time change */
-#ifdef FST_DEBUG
-            fprintf(stderr, FST_APIMESS "<< dump file size limit reached, stopping dumping >>\n");
-#endif
-        }
-    }
-
-    if (!xc2->skip_writing_section_hdr) {
-        fstWriterEmitSectionHeader(xc); /* emit next section header */
-    }
-    fflush(xc->handle);
-
-    xc->already_in_flush = 0;
-}
-
-#ifdef FST_WRITER_PARALLEL
-static void *fstWriterFlushContextPrivate1(void *ctx)
-{
-    struct fstWriterContext *xc = (struct fstWriterContext *)ctx;
-    struct fstWriterContext *xc_parent;
-
-    pthread_mutex_lock(&(xc->xc_parent->mutex));
-    fstWriterFlushContextPrivate2(xc);
-
-#ifdef FST_REMOVE_DUPLICATE_VC
-    free(xc->curval_mem);
-#endif
-    free(xc->valpos_mem);
-    free(xc->vchg_mem);
-    tmpfile_close(&xc->tchn_handle, &xc->tchn_handle_nam);
-    xc_parent = xc->xc_parent;
-    free(xc);
-
-    xc_parent->in_pthread = 0;
-    pthread_mutex_unlock(&(xc_parent->mutex));
-
-    return (NULL);
-}
-
-static void fstWriterFlushContextPrivate(fstWriterContext *xc)
-{
-    if (xc->parallel_enabled) {
-        struct fstWriterContext *xc2 =
-            (struct fstWriterContext *)malloc(sizeof(struct fstWriterContext));
-        unsigned int i;
-
-        pthread_mutex_lock(&xc->mutex);
-        pthread_mutex_unlock(&xc->mutex);
-
-        xc->xc_parent = xc;
-        memcpy(xc2, xc, sizeof(struct fstWriterContext));
-
-        if (sizeof(size_t) < sizeof(uint64_t)) {
-            /* TALOS-2023-1777 for 32b overflow */
-            uint64_t chk_64 = xc->maxhandle * 4 * sizeof(uint32_t);
-            size_t chk_32 = xc->maxhandle * 4 * sizeof(uint32_t);
-            if (chk_64 != chk_32)
-                chk_report_abort("TALOS-2023-1777");
-        }
-
-        xc2->valpos_mem = (uint32_t *)malloc(xc->maxhandle * 4 * sizeof(uint32_t));
-        memcpy(xc2->valpos_mem, xc->valpos_mem, xc->maxhandle * 4 * sizeof(uint32_t));
-
-        /* curval mem is updated in the thread */
-#ifdef FST_REMOVE_DUPLICATE_VC
-        xc2->curval_mem = (unsigned char *)malloc(xc->maxvalpos);
-        memcpy(xc2->curval_mem, xc->curval_mem, xc->maxvalpos);
-#endif
-
-        xc->vchg_mem = (unsigned char *)malloc(xc->vchg_alloc_siz);
-        xc->vchg_mem[0] = '!';
-        xc->vchg_siz = 1;
-
-        for (i = 0; i < xc->maxhandle; i++) {
-            uint32_t *vm4ip = &(xc->valpos_mem[4 * i]);
-            vm4ip[2] = 0; /* zero out offset val */
-            vm4ip[3] = 0; /* zero out last time change val */
-        }
-
-        xc->tchn_cnt = xc->tchn_idx = 0;
-        xc->tchn_handle =
-            tmpfile_open(&xc->tchn_handle_nam); /* child thread will deallocate file/name */
-        fstWriterFseeko(xc, xc->tchn_handle, 0, SEEK_SET);
-        fstFtruncate(fileno(xc->tchn_handle), 0);
-
-        xc->section_header_only = 0;
-        xc->secnum++;
-
-        while (xc->in_pthread) {
-            pthread_mutex_lock(&xc->mutex);
-            pthread_mutex_unlock(&xc->mutex);
-        };
-
-        pthread_mutex_lock(&xc->mutex);
-        xc->in_pthread = 1;
-        pthread_mutex_unlock(&xc->mutex);
-
-        pthread_create(&xc->thread, &xc->thread_attr, fstWriterFlushContextPrivate1, xc2);
-    } else {
-        if (xc->parallel_was_enabled) /* conservatively block */
-        {
-            pthread_mutex_lock(&xc->mutex);
-            pthread_mutex_unlock(&xc->mutex);
-        }
-
-        xc->xc_parent = xc;
-        fstWriterFlushContextPrivate2(xc);
-    }
-}
-#endif
-
-/*
- * queues up a flush context operation
- */
-void fstWriterFlushContext(fstWriterContext *xc)
-{
-    if (xc) {
-        if (xc->tchn_idx > 1) {
-            xc->flush_context_pending = 1;
-        }
-    }
-}
-
-/*
- * close out FST file
- */
-void fstWriterClose(fstWriterContext *xc)
-{
-#ifdef FST_WRITER_PARALLEL
-    if (xc) {
-        pthread_mutex_lock(&xc->mutex);
-        pthread_mutex_unlock(&xc->mutex);
-    }
-#endif
-
-    if (xc && !xc->already_in_close && !xc->already_in_flush) {
-        unsigned char *tmem = NULL;
-        fst_off_t fixup_offs, tlen, hlen;
-
-        xc->already_in_close = 1; /* never need to zero this out as it is freed at bottom */
-
-        if (xc->section_header_only && xc->section_header_truncpos && (xc->vchg_siz <= 1) &&
-            (!xc->is_initial_time)) {
-            fstFtruncate(fileno(xc->handle), xc->section_header_truncpos);
-            fstWriterFseeko(xc, xc->handle, xc->section_header_truncpos, SEEK_SET);
-            xc->section_header_only = 0;
-        } else {
-            xc->skip_writing_section_hdr = 1;
-            if (!xc->size_limit_locked) {
-                if (FST_UNLIKELY(xc->is_initial_time)) /* simulation time never advanced so mock up
-                                                          the changes as time zero ones */
-                {
-                    fstHandle dupe_idx;
-
-                    fstWriterEmitTimeChange(xc, 0); /* emit some time change just to have one */
-                    for (dupe_idx = 0; dupe_idx < xc->maxhandle;
-                         dupe_idx++) /* now clone the values */
-                    {
-                        fstWriterEmitValueChange(xc,
-                                                 dupe_idx + 1,
-                                                 xc->curval_mem + xc->valpos_mem[4 * dupe_idx]);
-                    }
-                }
-                fstWriterFlushContextPrivate(xc);
-#ifdef FST_WRITER_PARALLEL
-                pthread_mutex_lock(&xc->mutex);
-                pthread_mutex_unlock(&xc->mutex);
-
-                while (xc->in_pthread) {
-                    pthread_mutex_lock(&xc->mutex);
-                    pthread_mutex_unlock(&xc->mutex);
-                };
-#endif
-            }
-        }
-        fstDestroyMmaps(xc, 1);
-        if (xc->outval_mem) {
-            free(xc->outval_mem);
-            xc->outval_mem = NULL;
-            xc->outval_alloc_siz = 0;
-        }
-
-        /* write out geom section */
-        fflush(xc->geom_handle);
-        tlen = ftello(xc->geom_handle);
-        errno = 0;
-        if (tlen) {
-            fstWriterMmapSanity(tmem = (unsigned char *)fstMmap(NULL,
-                                                                tlen,
-                                                                PROT_READ | PROT_WRITE,
-                                                                MAP_SHARED,
-                                                                fileno(xc->geom_handle),
-                                                                0),
-                                __FILE__,
-                                __LINE__,
-                                "tmem");
-        }
-
-        if (tmem) {
-            unsigned long destlen = tlen;
-            unsigned char *dmem = (unsigned char *)malloc(compressBound(destlen));
-            int rc = compress2(dmem, &destlen, tmem, tlen, 9);
-
-            if ((rc != Z_OK) || (((fst_off_t)destlen) > tlen)) {
-                destlen = tlen;
-            }
-
-            fixup_offs = ftello(xc->handle);
-            fputc(FST_BL_SKIP, xc->handle); /* temporary tag */
-            fstWriterUint64(xc->handle, destlen + 24); /* section length */
-            fstWriterUint64(xc->handle, tlen); /* uncompressed */
-            /* compressed len is section length - 24 */
-            fstWriterUint64(xc->handle, xc->maxhandle); /* maxhandle */
-            fstFwrite((((fst_off_t)destlen) != tlen) ? dmem : tmem, destlen, 1, xc->handle);
-            fflush(xc->handle);
-
-            fstWriterFseeko(xc, xc->handle, fixup_offs, SEEK_SET);
-            fputc(FST_BL_GEOM, xc->handle); /* actual tag */
-
-            fstWriterFseeko(xc,
-                            xc->handle,
-                            0,
-                            SEEK_END); /* move file pointer to end for any section adds */
-            fflush(xc->handle);
-
-            free(dmem);
-            fstMunmap(tmem, tlen);
-        }
-
-        if (xc->num_blackouts) {
-            uint64_t cur_bl = 0;
-            fst_off_t bpos, eos;
-            uint32_t i;
-
-            fixup_offs = ftello(xc->handle);
-            fputc(FST_BL_SKIP, xc->handle); /* temporary tag */
-            bpos = fixup_offs + 1;
-            fstWriterUint64(xc->handle, 0); /* section length */
-            fstWriterVarint(xc->handle, xc->num_blackouts);
-
-            for (i = 0; i < xc->num_blackouts; i++) {
-                fputc(xc->blackout_head->active, xc->handle);
-                fstWriterVarint(xc->handle, xc->blackout_head->tim - cur_bl);
-                cur_bl = xc->blackout_head->tim;
-                xc->blackout_curr = xc->blackout_head->next;
-                free(xc->blackout_head);
-                xc->blackout_head = xc->blackout_curr;
-            }
-
-            eos = ftello(xc->handle);
-            fstWriterFseeko(xc, xc->handle, bpos, SEEK_SET);
-            fstWriterUint64(xc->handle, eos - bpos);
-            fflush(xc->handle);
-
-            fstWriterFseeko(xc, xc->handle, fixup_offs, SEEK_SET);
-            fputc(FST_BL_BLACKOUT, xc->handle); /* actual tag */
-
-            fstWriterFseeko(xc,
-                            xc->handle,
-                            0,
-                            SEEK_END); /* move file pointer to end for any section adds */
-            fflush(xc->handle);
-        }
-
-        if (xc->compress_hier) {
-            fst_off_t hl, eos;
-            gzFile zhandle;
-            int zfd;
-            int fourpack_duo = 0;
-#ifndef __MINGW32__
-            int fnam_len = strlen(xc->filename) + 5 + 1;
-            char *fnam = (char *)malloc(fnam_len);
-#endif
-
-            fixup_offs = ftello(xc->handle);
-            fputc(FST_BL_SKIP, xc->handle); /* temporary tag */
-            hlen = ftello(xc->handle);
-            fstWriterUint64(xc->handle, 0); /* section length */
-            fstWriterUint64(xc->handle, xc->hier_file_len); /* uncompressed length */
-
-            if (!xc->fourpack) {
-                unsigned char *mem = (unsigned char *)malloc(FST_GZIO_LEN);
-                zfd = dup(fileno(xc->handle));
-                fflush(xc->handle);
-                zhandle = gzdopen(zfd, "wb4");
-                if (zhandle) {
-                    fstWriterFseeko(xc, xc->hier_handle, 0, SEEK_SET);
-                    for (hl = 0; hl < xc->hier_file_len; hl += FST_GZIO_LEN) {
-                        unsigned len = ((xc->hier_file_len - hl) > FST_GZIO_LEN)
-                                           ? FST_GZIO_LEN
-                                           : (xc->hier_file_len - hl);
-                        fstFread(mem, len, 1, xc->hier_handle);
-                        gzwrite(zhandle, mem, len);
-                    }
-                    gzclose(zhandle);
-                } else {
-                    close(zfd);
-                }
-                free(mem);
-            } else {
-                int lz4_maxlen;
-                unsigned char *mem;
-                unsigned char *hmem = NULL;
-                int packed_len;
-
-                fflush(xc->handle);
-
-                lz4_maxlen = LZ4_compressBound(xc->hier_file_len);
-                mem = (unsigned char *)malloc(lz4_maxlen);
-                errno = 0;
-                if (xc->hier_file_len) {
-                    fstWriterMmapSanity(hmem = (unsigned char *)fstMmap(NULL,
-                                                                        xc->hier_file_len,
-                                                                        PROT_READ | PROT_WRITE,
-                                                                        MAP_SHARED,
-                                                                        fileno(xc->hier_handle),
-                                                                        0),
-                                        __FILE__,
-                                        __LINE__,
-                                        "hmem");
-                }
-                packed_len =
-                    LZ4_compress_default((char *)hmem, (char *)mem, xc->hier_file_len, lz4_maxlen);
-                fstMunmap(hmem, xc->hier_file_len);
-
-                fourpack_duo =
-                    (!xc->repack_on_close) &&
-                    (xc->hier_file_len >
-                     FST_HDR_FOURPACK_DUO_SIZE); /* double pack when hierarchy is large */
-
-                if (fourpack_duo) /* double packing with LZ4 is faster than gzip */
-                {
-                    unsigned char *mem_duo;
-                    int lz4_maxlen_duo;
-                    int packed_len_duo;
-
-                    lz4_maxlen_duo = LZ4_compressBound(packed_len);
-                    mem_duo = (unsigned char *)malloc(lz4_maxlen_duo);
-                    packed_len_duo = LZ4_compress_default((char *)mem,
-                                                          (char *)mem_duo,
-                                                          packed_len,
-                                                          lz4_maxlen_duo);
-
-                    fstWriterVarint(xc->handle, packed_len); /* 1st round compressed length */
-                    fstFwrite(mem_duo, packed_len_duo, 1, xc->handle);
-                    free(mem_duo);
-                } else {
-                    fstFwrite(mem, packed_len, 1, xc->handle);
-                }
-
-                free(mem);
-            }
-
-            fstWriterFseeko(xc, xc->handle, 0, SEEK_END);
-            eos = ftello(xc->handle);
-            fstWriterFseeko(xc, xc->handle, hlen, SEEK_SET);
-            fstWriterUint64(xc->handle, eos - hlen);
-            fflush(xc->handle);
-
-            fstWriterFseeko(xc, xc->handle, fixup_offs, SEEK_SET);
-            fputc(xc->fourpack ? (fourpack_duo ? FST_BL_HIER_LZ4DUO : FST_BL_HIER_LZ4)
-                               : FST_BL_HIER,
-                  xc->handle); /* actual tag now also == compression type */
-
-            fstWriterFseeko(xc,
-                            xc->handle,
-                            0,
-                            SEEK_END); /* move file pointer to end for any section adds */
-            fflush(xc->handle);
-
-#ifndef __MINGW32__
-            snprintf(fnam, fnam_len, "%s.hier", xc->filename);
-            unlink(fnam);
-            free(fnam);
-#endif
-        }
-
-        /* finalize out header */
-        fstWriterFseeko(xc, xc->handle, FST_HDR_OFFS_START_TIME, SEEK_SET);
-        fstWriterUint64(xc->handle, xc->firsttime);
-        fstWriterUint64(xc->handle, xc->curtime);
-        fstWriterFseeko(xc, xc->handle, FST_HDR_OFFS_NUM_SCOPES, SEEK_SET);
-        fstWriterUint64(xc->handle, xc->numscopes);
-        fstWriterUint64(xc->handle, xc->numsigs);
-        fstWriterUint64(xc->handle, xc->maxhandle);
-        fstWriterUint64(xc->handle, xc->secnum);
-        fflush(xc->handle);
-
-        tmpfile_close(&xc->tchn_handle, &xc->tchn_handle_nam);
-        free(xc->vchg_mem);
-        xc->vchg_mem = NULL;
-        tmpfile_close(&xc->curval_handle, &xc->curval_handle_nam);
-        tmpfile_close(&xc->valpos_handle, &xc->valpos_handle_nam);
-        tmpfile_close(&xc->geom_handle, &xc->geom_handle_nam);
-        if (xc->hier_handle) {
-            fclose(xc->hier_handle);
-            xc->hier_handle = NULL;
-        }
-        if (xc->handle) {
-            if (xc->repack_on_close) {
-                FILE *fp;
-                fst_off_t offpnt, uclen;
-                int flen = strlen(xc->filename);
-                char *hf = (char *)calloc(1, flen + 5);
-
-                strcpy(hf, xc->filename);
-                strcpy(hf + flen, ".pak");
-                fp = fopen(hf, "wb");
-
-                if (fp) {
-                    gzFile dsth;
-                    int zfd;
-                    char gz_membuf[FST_GZIO_LEN];
-
-                    fstWriterFseeko(xc, xc->handle, 0, SEEK_END);
-                    uclen = ftello(xc->handle);
-
-                    fputc(FST_BL_ZWRAPPER, fp);
-                    fstWriterUint64(fp, 0);
-                    fstWriterUint64(fp, uclen);
-                    fflush(fp);
-
-                    fstWriterFseeko(xc, xc->handle, 0, SEEK_SET);
-                    zfd = dup(fileno(fp));
-                    dsth = gzdopen(zfd, "wb4");
-                    if (dsth) {
-                        for (offpnt = 0; offpnt < uclen; offpnt += FST_GZIO_LEN) {
-                            size_t this_len =
-                                ((uclen - offpnt) > FST_GZIO_LEN) ? FST_GZIO_LEN : (uclen - offpnt);
-                            fstFread(gz_membuf, this_len, 1, xc->handle);
-                            gzwrite(dsth, gz_membuf, this_len);
-                        }
-                        gzclose(dsth);
-                    } else {
-                        close(zfd);
-                    }
-                    fstWriterFseeko(xc, fp, 0, SEEK_END);
-                    offpnt = ftello(fp);
-                    fstWriterFseeko(xc, fp, 1, SEEK_SET);
-                    fstWriterUint64(fp, offpnt - 1);
-                    fclose(fp);
-                    fclose(xc->handle);
-                    xc->handle = NULL;
-
-                    unlink(xc->filename);
-                    rename(hf, xc->filename);
-                } else {
-                    xc->repack_on_close = 0;
-                    fclose(xc->handle);
-                    xc->handle = NULL;
-                }
-
-                free(hf);
-            } else {
-                fclose(xc->handle);
-                xc->handle = NULL;
-            }
-        }
-
-#ifdef __MINGW32__
-        {
-            int flen = strlen(xc->filename);
-            char *hf = (char *)calloc(1, flen + 6);
-            strcpy(hf, xc->filename);
-
-            if (xc->compress_hier) {
-                strcpy(hf + flen, ".hier");
-                unlink(hf); /* no longer needed as a section now exists for this */
-            }
-
-            free(hf);
-        }
-#endif
-
-#ifdef FST_WRITER_PARALLEL
-        pthread_mutex_destroy(&xc->mutex);
-        pthread_attr_destroy(&xc->thread_attr);
-#endif
-
-        if (xc->path_array) {
-            const uint32_t hashmask = FST_PATH_HASHMASK;
-            JenkinsFree(&(xc->path_array), hashmask);
-        }
-
-        free(xc->filename);
-        xc->filename = NULL;
-        free(xc);
-    }
-}
-
-/*
- * functions to set miscellaneous header/block information
- */
-void fstWriterSetDate(fstWriterContext *xc, const char *dat)
-{
-    if (xc) {
-        char s[FST_HDR_DATE_SIZE];
-        fst_off_t fpos = ftello(xc->handle);
-        int len = strlen(dat);
-
-        fstWriterFseeko(xc, xc->handle, FST_HDR_OFFS_DATE, SEEK_SET);
-        memset(s, 0, FST_HDR_DATE_SIZE);
-        memcpy(s, dat, (len < FST_HDR_DATE_SIZE) ? len : FST_HDR_DATE_SIZE);
-        fstFwrite(s, FST_HDR_DATE_SIZE, 1, xc->handle);
-        fflush(xc->handle);
-        fstWriterFseeko(xc, xc->handle, fpos, SEEK_SET);
-    }
-}
-
-void fstWriterSetVersion(fstWriterContext *xc, const char *vers)
-{
-    if (xc && vers) {
-        char s[FST_HDR_SIM_VERSION_SIZE];
-        fst_off_t fpos = ftello(xc->handle);
-        int len = strlen(vers);
-
-        fstWriterFseeko(xc, xc->handle, FST_HDR_OFFS_SIM_VERSION, SEEK_SET);
-        memset(s, 0, FST_HDR_SIM_VERSION_SIZE);
-        memcpy(s, vers, (len < FST_HDR_SIM_VERSION_SIZE) ? len : FST_HDR_SIM_VERSION_SIZE);
-        fstFwrite(s, FST_HDR_SIM_VERSION_SIZE, 1, xc->handle);
-        fflush(xc->handle);
-        fstWriterFseeko(xc, xc->handle, fpos, SEEK_SET);
-    }
-}
-
-void fstWriterSetFileType(fstWriterContext *xc, enum fstFileType filetype)
-{
-    if (xc) {
-        if (/*(filetype >= FST_FT_MIN) &&*/ (filetype <= FST_FT_MAX)) {
-            fst_off_t fpos = ftello(xc->handle);
-
-            xc->filetype = filetype;
-
-            fstWriterFseeko(xc, xc->handle, FST_HDR_OFFS_FILETYPE, SEEK_SET);
-            fputc(xc->filetype, xc->handle);
-            fflush(xc->handle);
-            fstWriterFseeko(xc, xc->handle, fpos, SEEK_SET);
-        }
-    }
-}
-
-static void fstWriterSetAttrDoubleArgGeneric(fstWriterContext *xc,
-                                             int typ,
-                                             uint64_t arg1,
-                                             uint64_t arg2)
-{
-    if (xc) {
-        unsigned char buf[11]; /* ceil(64/7) = 10 + null term */
-        unsigned char *pnt = fstCopyVarint64ToRight(buf, arg1);
-        if (arg1) {
-            *pnt =
-                0; /* this converts any *nonzero* arg1 when made a varint into a null-term string */
-        }
-
-        fstWriterSetAttrBegin(xc, FST_AT_MISC, typ, (char *)buf, arg2);
-    }
-}
-
-static void fstWriterSetAttrGeneric(fstWriterContext *xc, const char *comm, int typ, uint64_t arg)
-{
-    if (xc && comm) {
-        char *s = strdup(comm);
-        char *sf = s;
-
-        while (*s) {
-            if ((*s == '\n') || (*s == '\r'))
-                *s = ' ';
-            s++;
-        }
-
-        fstWriterSetAttrBegin(xc, FST_AT_MISC, typ, sf, arg);
-        free(sf);
-    }
-}
-
-static void fstWriterSetSourceStem_2(fstWriterContext *xc,
-                                     const char *path,
-                                     unsigned int line,
-                                     unsigned int use_realpath,
-                                     int typ)
-{
-    if (xc && path && path[0]) {
-        uint64_t sidx = 0;
-        int slen = strlen(path);
-        const uint32_t hashmask = FST_PATH_HASHMASK;
-        const unsigned char *path2 = (const unsigned char *)path;
-        PPvoid_t pv;
-
-        pv = JenkinsIns(&(xc->path_array), path2, slen, hashmask);
-        if (*pv) {
-            sidx = (intptr_t)(*pv);
-        } else {
-            char *rp = NULL;
-
-            sidx = ++xc->path_array_count;
-            *pv = (void *)(intptr_t)(xc->path_array_count);
-
-            if (use_realpath) {
-                rp = fstRealpath((const char *)path2, NULL);
-            }
-
-            fstWriterSetAttrGeneric(xc, rp ? rp : (const char *)path2, FST_MT_PATHNAME, sidx);
-
-            if (rp) {
-                free(rp);
-            }
-        }
-
-        fstWriterSetAttrDoubleArgGeneric(xc, typ, sidx, line);
-    }
-}
-
-void fstWriterSetSourceStem(fstWriterContext *ctx,
-                            const char *path,
-                            unsigned int line,
-                            unsigned int use_realpath)
-{
-    fstWriterSetSourceStem_2(ctx, path, line, use_realpath, FST_MT_SOURCESTEM);
-}
-
-void fstWriterSetSourceInstantiationStem(fstWriterContext *ctx,
-                                         const char *path,
-                                         unsigned int line,
-                                         unsigned int use_realpath)
-{
-    fstWriterSetSourceStem_2(ctx, path, line, use_realpath, FST_MT_SOURCEISTEM);
-}
-
-void fstWriterSetComment(fstWriterContext *ctx, const char *comm)
-{
-    fstWriterSetAttrGeneric(ctx, comm, FST_MT_COMMENT, 0);
-}
-
-void fstWriterSetValueList(fstWriterContext *ctx, const char *vl)
-{
-    fstWriterSetAttrGeneric(ctx, vl, FST_MT_VALUELIST, 0);
-}
-
-void fstWriterSetEnvVar(fstWriterContext *ctx, const char *envvar)
-{
-    fstWriterSetAttrGeneric(ctx, envvar, FST_MT_ENVVAR, 0);
-}
-
-void fstWriterSetTimescale(fstWriterContext *xc, int ts)
-{
-    if (xc) {
-        fst_off_t fpos = ftello(xc->handle);
-        fstWriterFseeko(xc, xc->handle, FST_HDR_OFFS_TIMESCALE, SEEK_SET);
-        fputc(ts & 255, xc->handle);
-        fflush(xc->handle);
-        fstWriterFseeko(xc, xc->handle, fpos, SEEK_SET);
-    }
-}
-
-void fstWriterSetTimescaleFromString(fstWriterContext *xc, const char *s)
-{
-    if (xc && s) {
-        int mat = 0;
-        int seconds_exp = -9;
-        int tv = atoi(s);
-        const char *pnt = s;
-
-        while (*pnt) {
-            switch (*pnt) {
-                case 'm':
-                    seconds_exp = -3;
-                    mat = 1;
-                    break;
-                case 'u':
-                    seconds_exp = -6;
-                    mat = 1;
-                    break;
-                case 'n':
-                    seconds_exp = -9;
-                    mat = 1;
-                    break;
-                case 'p':
-                    seconds_exp = -12;
-                    mat = 1;
-                    break;
-                case 'f':
-                    seconds_exp = -15;
-                    mat = 1;
-                    break;
-                case 'a':
-                    seconds_exp = -18;
-                    mat = 1;
-                    break;
-                case 'z':
-                    seconds_exp = -21;
-                    mat = 1;
-                    break;
-                case 's':
-                    seconds_exp = 0;
-                    mat = 1;
-                    break;
-                default:
-                    break;
-            }
-
-            if (mat)
-                break;
-            pnt++;
-        }
-
-        if (tv == 10) {
-            seconds_exp++;
-        } else if (tv == 100) {
-            seconds_exp += 2;
-        }
-
-        fstWriterSetTimescale(xc, seconds_exp);
-    }
-}
-
-void fstWriterSetTimezero(fstWriterContext *xc, int64_t tim)
-{
-    if (xc) {
-        fst_off_t fpos = ftello(xc->handle);
-        fstWriterFseeko(xc, xc->handle, FST_HDR_OFFS_TIMEZERO, SEEK_SET);
-        fstWriterUint64(xc->handle, (xc->timezero = tim));
-        fflush(xc->handle);
-        fstWriterFseeko(xc, xc->handle, fpos, SEEK_SET);
-    }
-}
-
-void fstWriterSetPackType(fstWriterContext *xc, enum fstWriterPackType typ)
-{
-    if (xc) {
-        xc->fastpack = (typ != FST_WR_PT_ZLIB);
-        xc->fourpack = (typ == FST_WR_PT_LZ4);
-    }
-}
-
-void fstWriterSetRepackOnClose(fstWriterContext *xc, int enable)
-{
-    if (xc) {
-        xc->repack_on_close = (enable != 0);
-    }
-}
-
-void fstWriterSetParallelMode(fstWriterContext *xc, int enable)
-{
-    if (xc) {
-        xc->parallel_was_enabled |= xc->parallel_enabled; /* make sticky */
-        xc->parallel_enabled = (enable != 0);
-#ifndef FST_WRITER_PARALLEL
-        if (xc->parallel_enabled) {
-            fprintf(stderr,
-                    FST_APIMESS "fstWriterSetParallelMode(), FST_WRITER_PARALLEL not enabled "
-                                "during compile, exiting.\n");
-            exit(255);
-        }
-#endif
-    }
-}
-
-void fstWriterSetDumpSizeLimit(fstWriterContext *xc, uint64_t numbytes)
-{
-    if (xc) {
-        xc->dump_size_limit = numbytes;
-    }
-}
-
-int fstWriterGetDumpSizeLimitReached(fstWriterContext *xc)
-{
-    if (xc) {
-        return (xc->size_limit_locked != 0);
-    }
-
-    return (0);
-}
-
-int fstWriterGetFseekFailed(fstWriterContext *xc)
-{
-    if (xc) {
-        return (xc->fseek_failed != 0);
-    }
-
-    return (0);
-}
-
-static int fstWriterGetFlushContextPendingInternal(fstWriterContext *xc)
-{
-    return (xc->vchg_siz >= xc->fst_break_size) || (xc->flush_context_pending);
-}
-
-int fstWriterGetFlushContextPending(fstWriterContext *xc)
-{
-    return xc && !xc->is_initial_time && fstWriterGetFlushContextPendingInternal(xc);
-}
-
-/*
- * writer attr/scope/var creation:
- * fstWriterCreateVar2() is used to dump VHDL or other languages, but the
- * underlying variable needs to map to Verilog/SV via the proper fstVarType vt
- */
-fstHandle fstWriterCreateVar2(fstWriterContext *ctx,
-                              enum fstVarType vt,
-                              enum fstVarDir vd,
-                              uint32_t len,
-                              const char *nam,
-                              fstHandle aliasHandle,
-                              const char *type,
-                              enum fstSupplementalVarType svt,
-                              enum fstSupplementalDataType sdt)
-{
-    fstWriterSetAttrGeneric(ctx,
-                            type ? type : "",
-                            FST_MT_SUPVAR,
-                            (svt << FST_SDT_SVT_SHIFT_COUNT) | (sdt & FST_SDT_ABS_MAX));
-    return (fstWriterCreateVar(ctx, vt, vd, len, nam, aliasHandle));
-}
-
-fstHandle fstWriterCreateVar(fstWriterContext *xc,
-                             enum fstVarType vt,
-                             enum fstVarDir vd,
-                             uint32_t len,
-                             const char *nam,
-                             fstHandle aliasHandle)
-{
-    unsigned int i;
-    int nlen, is_real;
-
-    if (xc && nam) {
-        if (xc->valpos_mem) {
-            fstDestroyMmaps(xc, 0);
-        }
-
-        fputc(vt, xc->hier_handle);
-        fputc(vd, xc->hier_handle);
-        nlen = strlen(nam);
-        fstFwrite(nam, nlen, 1, xc->hier_handle);
-        fputc(0, xc->hier_handle);
-        xc->hier_file_len += (nlen + 3);
-
-        if ((vt == FST_VT_VCD_REAL) || (vt == FST_VT_VCD_REAL_PARAMETER) ||
-            (vt == FST_VT_VCD_REALTIME) || (vt == FST_VT_SV_SHORTREAL)) {
-            is_real = 1;
-            len = 8; /* recast number of bytes to that of what a double is */
-        } else {
-            is_real = 0;
-            if (vt == FST_VT_GEN_STRING) {
-                len = 0;
-            }
-        }
-
-        xc->hier_file_len += fstWriterVarint(xc->hier_handle, len);
-
-        if (aliasHandle > xc->maxhandle)
-            aliasHandle = 0;
-        xc->hier_file_len += fstWriterVarint(xc->hier_handle, aliasHandle);
-        xc->numsigs++;
-        if (xc->numsigs == xc->next_huge_break) {
-            if (xc->fst_break_size < xc->fst_huge_break_size) {
-                xc->next_huge_break += FST_ACTIVATE_HUGE_INC;
-                xc->fst_break_size += xc->fst_orig_break_size;
-                xc->fst_break_add_size += xc->fst_orig_break_add_size;
-
-                xc->vchg_alloc_siz = xc->fst_break_size + xc->fst_break_add_size;
-                if (xc->vchg_mem) {
-                    xc->vchg_mem = (unsigned char *)realloc(xc->vchg_mem, xc->vchg_alloc_siz);
-                }
-            }
-        }
-
-        if (!aliasHandle) {
-            uint32_t zero = 0;
-
-            if (len) {
-                fstWriterVarint(xc->geom_handle,
-                                !is_real ? len : 0); /* geom section encodes reals as zero byte */
-            } else {
-                fstWriterVarint(xc->geom_handle,
-                                0xFFFFFFFF); /* geom section encodes zero len as 32b -1 */
-            }
-
-            fstFwrite(&xc->maxvalpos, sizeof(uint32_t), 1, xc->valpos_handle);
-            fstFwrite(&len, sizeof(uint32_t), 1, xc->valpos_handle);
-            fstFwrite(&zero, sizeof(uint32_t), 1, xc->valpos_handle);
-            fstFwrite(&zero, sizeof(uint32_t), 1, xc->valpos_handle);
-
-            if (!is_real) {
-                for (i = 0; i < len; i++) {
-                    fputc('x', xc->curval_handle);
-                }
-            } else {
-                fstFwrite(&xc->nan,
-                          8,
-                          1,
-                          xc->curval_handle); /* initialize doubles to NaN rather than x */
-            }
-
-            xc->maxvalpos += len;
-            xc->maxhandle++;
-            return (xc->maxhandle);
-        } else {
-            return (aliasHandle);
-        }
-    }
-
-    return (0);
-}
-
-void fstWriterSetScope(fstWriterContext *xc,
-                       enum fstScopeType scopetype,
-                       const char *scopename,
-                       const char *scopecomp)
-{
-    if (xc) {
-        fputc(FST_ST_VCD_SCOPE, xc->hier_handle);
-        if (/*(scopetype < FST_ST_VCD_MODULE) ||*/ (scopetype > FST_ST_MAX)) {
-            scopetype = FST_ST_VCD_MODULE;
-        }
-        fputc(scopetype, xc->hier_handle);
-        fprintf(xc->hier_handle,
-                "%s%c%s%c",
-                scopename ? scopename : "",
-                0,
-                scopecomp ? scopecomp : "",
-                0);
-
-        if (scopename) {
-            xc->hier_file_len += strlen(scopename);
-        }
-        if (scopecomp) {
-            xc->hier_file_len += strlen(scopecomp);
-        }
-
-        xc->hier_file_len += 4; /* FST_ST_VCD_SCOPE + scopetype + two string terminating zeros */
-        xc->numscopes++;
-    }
-}
-
-void fstWriterSetUpscope(fstWriterContext *xc)
-{
-    if (xc) {
-        fputc(FST_ST_VCD_UPSCOPE, xc->hier_handle);
-        xc->hier_file_len++;
-    }
-}
-
-void fstWriterSetAttrBegin(fstWriterContext *xc,
-                           enum fstAttrType attrtype,
-                           int subtype,
-                           const char *attrname,
-                           uint64_t arg)
-{
-    if (xc) {
-        fputc(FST_ST_GEN_ATTRBEGIN, xc->hier_handle);
-        if (/*(attrtype < FST_AT_MISC) ||*/ (attrtype > FST_AT_MAX)) {
-            attrtype = FST_AT_MISC;
-            subtype = FST_MT_UNKNOWN;
-        }
-        fputc(attrtype, xc->hier_handle);
-
-        switch (attrtype) {
-            case FST_AT_ARRAY:
-                if ((subtype < FST_AR_NONE) || (subtype > FST_AR_MAX))
-                    subtype = FST_AR_NONE;
-                break;
-            case FST_AT_ENUM:
-                if ((subtype < FST_EV_SV_INTEGER) || (subtype > FST_EV_MAX))
-                    subtype = FST_EV_SV_INTEGER;
-                break;
-            case FST_AT_PACK:
-                if ((subtype < FST_PT_NONE) || (subtype > FST_PT_MAX))
-                    subtype = FST_PT_NONE;
-                break;
-
-            case FST_AT_MISC:
-            default:
-                break;
-        }
-
-        fputc(subtype, xc->hier_handle);
-        fprintf(xc->hier_handle, "%s%c", attrname ? attrname : "", 0);
-
-        if (attrname) {
-            xc->hier_file_len += strlen(attrname);
-        }
-
-        xc->hier_file_len +=
-            4; /* FST_ST_GEN_ATTRBEGIN + type + subtype + string terminating zero */
-        xc->hier_file_len += fstWriterVarint(xc->hier_handle, arg);
-    }
-}
-
-void fstWriterSetAttrEnd(fstWriterContext *xc)
-{
-    if (xc) {
-        fputc(FST_ST_GEN_ATTREND, xc->hier_handle);
-        xc->hier_file_len++;
-    }
-}
-
-fstEnumHandle fstWriterCreateEnumTable(fstWriterContext *xc,
-                                       const char *name,
-                                       uint32_t elem_count,
-                                       unsigned int min_valbits,
-                                       const char **literal_arr,
-                                       const char **val_arr)
-{
-    fstEnumHandle handle = 0;
-    unsigned int *literal_lens = NULL;
-    unsigned int *val_lens = NULL;
-    int lit_len_tot = 0;
-    int val_len_tot = 0;
-    int name_len;
-    char elem_count_buf[16];
-    int elem_count_len;
-    int total_len;
-    int pos = 0;
-    char *attr_str = NULL;
-
-    if (xc && name && literal_arr && val_arr && (elem_count != 0)) {
-        uint32_t i;
-
-        name_len = strlen(name);
-        elem_count_len = snprintf(elem_count_buf, 16, "%" PRIu32, elem_count);
-
-        literal_lens = (unsigned int *)calloc(elem_count, sizeof(unsigned int));
-        val_lens = (unsigned int *)calloc(elem_count, sizeof(unsigned int));
-
-        for (i = 0; i < elem_count; i++) {
-            literal_lens[i] = strlen(literal_arr[i]);
-            lit_len_tot +=
-                fstUtilityBinToEscConvertedLen((unsigned char *)literal_arr[i], literal_lens[i]);
-
-            val_lens[i] = strlen(val_arr[i]);
-            val_len_tot += fstUtilityBinToEscConvertedLen((unsigned char *)val_arr[i], val_lens[i]);
-
-            if (min_valbits > 0) {
-                if (val_lens[i] < min_valbits) {
-                    val_len_tot +=
-                        (min_valbits -
-                         val_lens[i]); /* additional converted len is same for '0' character */
-                }
-            }
-        }
-
-        total_len =
-            name_len + 1 + elem_count_len + 1 + lit_len_tot + elem_count + val_len_tot + elem_count;
-
-        attr_str = (char *)malloc(total_len);
-        pos = 0;
-
-        memcpy(attr_str + pos, name, name_len);
-        pos += name_len;
-        attr_str[pos++] = ' ';
-
-        memcpy(attr_str + pos, elem_count_buf, elem_count_len);
-        pos += elem_count_len;
-        attr_str[pos++] = ' ';
-
-        for (i = 0; i < elem_count; i++) {
-            pos += fstUtilityBinToEsc((unsigned char *)attr_str + pos,
-                                      (unsigned char *)literal_arr[i],
-                                      literal_lens[i]);
-            attr_str[pos++] = ' ';
-        }
-
-        for (i = 0; i < elem_count; i++) {
-            if (min_valbits > 0) {
-                if (val_lens[i] < min_valbits) {
-                    memset(attr_str + pos, '0', min_valbits - val_lens[i]);
-                    pos += (min_valbits - val_lens[i]);
-                }
-            }
-
-            pos += fstUtilityBinToEsc((unsigned char *)attr_str + pos,
-                                      (unsigned char *)val_arr[i],
-                                      val_lens[i]);
-            attr_str[pos++] = ' ';
-        }
-
-        attr_str[pos - 1] = 0;
-
-#ifdef FST_DEBUG
-        fprintf(stderr,
-                FST_APIMESS "fstWriterCreateEnumTable() total_len: %d, pos: %d\n",
-                total_len,
-                pos);
-        fprintf(stderr, FST_APIMESS "*%s*\n", attr_str);
-#endif
-
-        fstWriterSetAttrBegin(xc,
-                              FST_AT_MISC,
-                              FST_MT_ENUMTABLE,
-                              attr_str,
-                              handle = ++xc->max_enumhandle);
-
-        free(attr_str);
-        free(val_lens);
-        free(literal_lens);
-    }
-
-    return (handle);
-}
-
-void fstWriterEmitEnumTableRef(fstWriterContext *xc, fstEnumHandle handle)
-{
-    if (xc && handle) {
-        fstWriterSetAttrBegin(xc, FST_AT_MISC, FST_MT_ENUMTABLE, NULL, handle);
-    }
-}
-
-/*
- * value and time change emission
- */
-void fstWriterEmitValueChange(fstWriterContext *xc, fstHandle handle, const void *val)
-{
-    const unsigned char *buf = (const unsigned char *)val;
-    uint32_t offs;
-    int len;
-
-    if (FST_LIKELY((xc) && (handle <= xc->maxhandle))) {
-        uint32_t fpos;
-        uint32_t *vm4ip;
-
-        if (FST_UNLIKELY(!xc->valpos_mem)) {
-            xc->vc_emitted = 1;
-            fstWriterCreateMmaps(xc);
-        }
-
-        handle--; /* move starting at 1 index to starting at 0 */
-        vm4ip = &(xc->valpos_mem[4 * handle]);
-
-        len = vm4ip[1];
-        if (FST_LIKELY(len)) /* len of zero = variable length, use
-                                fstWriterEmitVariableLengthValueChange */
-        {
-            if (FST_LIKELY(!xc->is_initial_time)) {
-                fpos = xc->vchg_siz;
-
-                if (FST_UNLIKELY((fpos + len + 10) > xc->vchg_alloc_siz)) {
-                    xc->vchg_alloc_siz +=
-                        (xc->fst_break_add_size + len); /* +len added in the case of extremely long
-                                                           vectors and small break add sizes */
-                    xc->vchg_mem = (unsigned char *)realloc(xc->vchg_mem, xc->vchg_alloc_siz);
-                    if (FST_UNLIKELY(!xc->vchg_mem)) {
-                        fprintf(stderr,
-                                FST_APIMESS
-                                "Could not realloc() in fstWriterEmitValueChange, exiting.\n");
-                        exit(255);
-                    }
-                }
-#ifdef FST_REMOVE_DUPLICATE_VC
-                offs = vm4ip[0];
-
-                if (len != 1) {
-                    if ((vm4ip[3] == xc->tchn_idx) && (vm4ip[2])) {
-                        unsigned char *old_value =
-                            xc->vchg_mem + vm4ip[2] + 4; /* the +4 skips old vm4ip[2] value */
-                        while (*(old_value++) &
-                               0x80) { /* skips over varint encoded "xc->tchn_idx - vm4ip[3]" */
-                        }
-                        memcpy(old_value, buf, len); /* overlay new value */
-
-                        memcpy(xc->curval_mem + offs, buf, len);
-                        return;
-                    } else {
-                        if (!memcmp(xc->curval_mem + offs, buf, len)) {
-                            if (!xc->curtime) {
-                                int i;
-                                for (i = 0; i < len; i++) {
-                                    if (buf[i] != 'x')
-                                        break;
-                                }
-
-                                if (i < len)
-                                    return;
-                            } else {
-                                return;
-                            }
-                        }
-                    }
-
-                    memcpy(xc->curval_mem + offs, buf, len);
-                } else {
-                    if ((vm4ip[3] == xc->tchn_idx) && (vm4ip[2])) {
-                        unsigned char *old_value =
-                            xc->vchg_mem + vm4ip[2] + 4; /* the +4 skips old vm4ip[2] value */
-                        while (*(old_value++) &
-                               0x80) { /* skips over varint encoded "xc->tchn_idx - vm4ip[3]" */
-                        }
-                        *old_value = *buf; /* overlay new value */
-
-                        *(xc->curval_mem + offs) = *buf;
-                        return;
-                    } else {
-                        if ((*(xc->curval_mem + offs)) == (*buf)) {
-                            if (!xc->curtime) {
-                                if (*buf != 'x')
-                                    return;
-                            } else {
-                                return;
-                            }
-                        }
-                    }
-
-                    *(xc->curval_mem + offs) = *buf;
-                }
-#endif
-                xc->vchg_siz += fstWriterUint32WithVarint32(xc,
-                                                            &vm4ip[2],
-                                                            xc->tchn_idx - vm4ip[3],
-                                                            buf,
-                                                            len); /* do one fwrite op only */
-                vm4ip[3] = xc->tchn_idx;
-                vm4ip[2] = fpos;
-            } else {
-                offs = vm4ip[0];
-                memcpy(xc->curval_mem + offs, buf, len);
-            }
-        }
-    }
-}
-
-void fstWriterEmitValueChange32(fstWriterContext *ctx,
-                                fstHandle handle,
-                                uint32_t bits,
-                                uint32_t val)
-{
-    char buf[32];
-    char *s = buf;
-    uint32_t i;
-    for (i = 0; i < bits; ++i) {
-        *s++ = '0' + ((val >> (bits - i - 1)) & 1);
-    }
-    fstWriterEmitValueChange(ctx, handle, buf);
-}
-
-void fstWriterEmitValueChange64(fstWriterContext *ctx,
-                                fstHandle handle,
-                                uint32_t bits,
-                                uint64_t val)
-{
-    char buf[64];
-    char *s = buf;
-    uint32_t i;
-    for (i = 0; i < bits; ++i) {
-        *s++ = '0' + ((val >> (bits - i - 1)) & 1);
-    }
-    fstWriterEmitValueChange(ctx, handle, buf);
-}
-
-void fstWriterEmitValueChangeVec32(fstWriterContext *xc,
-                                   fstHandle handle,
-                                   uint32_t bits,
-                                   const uint32_t *val)
-{
-    if (FST_UNLIKELY(bits <= 32)) {
-        fstWriterEmitValueChange32(xc, handle, bits, val[0]);
-    } else if (FST_LIKELY(xc)) {
-        int bq = bits / 32;
-        int br = bits & 31;
-        int i;
-        int w;
-        uint32_t v;
-        unsigned char *s;
-        if (FST_UNLIKELY(bits > xc->outval_alloc_siz)) {
-            xc->outval_alloc_siz = bits * 2 + 1;
-            xc->outval_mem = (unsigned char *)realloc(xc->outval_mem, xc->outval_alloc_siz);
-            if (FST_UNLIKELY(!xc->outval_mem)) {
-                fprintf(stderr,
-                        FST_APIMESS
-                        "Could not realloc() in fstWriterEmitValueChangeVec32, exiting.\n");
-                exit(255);
-            }
-        }
-        s = xc->outval_mem;
-        {
-            w = bq;
-            v = val[w];
-            for (i = 0; i < br; ++i) {
-                *s++ = '0' + ((v >> (br - i - 1)) & 1);
-            }
-        }
-        for (w = bq - 1; w >= 0; --w) {
-            v = val[w];
-            for (i = (32 - 4); i >= 0; i -= 4) {
-                s[0] = '0' + ((v >> (i + 3)) & 1);
-                s[1] = '0' + ((v >> (i + 2)) & 1);
-                s[2] = '0' + ((v >> (i + 1)) & 1);
-                s[3] = '0' + ((v >> (i + 0)) & 1);
-                s += 4;
-            }
-        }
-        fstWriterEmitValueChange(xc, handle, xc->outval_mem);
-    }
-}
-void fstWriterEmitValueChangeVec64(fstWriterContext *xc,
-                                   fstHandle handle,
-                                   uint32_t bits,
-                                   const uint64_t *val)
-{
-    if (FST_UNLIKELY(bits <= 64)) {
-        fstWriterEmitValueChange64(xc, handle, bits, val[0]);
-    } else if (FST_LIKELY(xc)) {
-        int bq = bits / 64;
-        int br = bits & 63;
-        int i;
-        int w;
-        uint32_t v;
-        unsigned char *s;
-        if (FST_UNLIKELY(bits > xc->outval_alloc_siz)) {
-            xc->outval_alloc_siz = bits * 2 + 1;
-            xc->outval_mem = (unsigned char *)realloc(xc->outval_mem, xc->outval_alloc_siz);
-            if (FST_UNLIKELY(!xc->outval_mem)) {
-                fprintf(stderr,
-                        FST_APIMESS
-                        "Could not realloc() in fstWriterEmitValueChangeVec64, exiting.\n");
-                exit(255);
-            }
-        }
-        s = xc->outval_mem;
-        {
-            w = bq;
-            v = val[w];
-            for (i = 0; i < br; ++i) {
-                *s++ = '0' + ((v >> (br - i - 1)) & 1);
-            }
-        }
-        for (w = bq - 1; w >= 0; --w) {
-            v = val[w];
-            for (i = (64 - 4); i >= 0; i -= 4) {
-                s[0] = '0' + ((v >> (i + 3)) & 1);
-                s[1] = '0' + ((v >> (i + 2)) & 1);
-                s[2] = '0' + ((v >> (i + 1)) & 1);
-                s[3] = '0' + ((v >> (i + 0)) & 1);
-                s += 4;
-            }
-        }
-        fstWriterEmitValueChange(xc, handle, xc->outval_mem);
-    }
-}
-
-void fstWriterEmitVariableLengthValueChange(fstWriterContext *xc,
-                                            fstHandle handle,
-                                            const void *val,
-                                            uint32_t len)
-{
-    const unsigned char *buf = (const unsigned char *)val;
-
-    if (FST_LIKELY((xc) && (handle <= xc->maxhandle))) {
-        uint32_t fpos;
-        uint32_t *vm4ip;
-
-        if (FST_UNLIKELY(!xc->valpos_mem)) {
-            xc->vc_emitted = 1;
-            fstWriterCreateMmaps(xc);
-        }
-
-        handle--; /* move starting at 1 index to starting at 0 */
-        vm4ip = &(xc->valpos_mem[4 * handle]);
-
-        /* there is no initial time dump for variable length value changes */
-        if (FST_LIKELY(!vm4ip[1])) /* len of zero = variable length */
-        {
-            fpos = xc->vchg_siz;
-
-            if (FST_UNLIKELY((fpos + len + 10 + 5) > xc->vchg_alloc_siz)) {
-                xc->vchg_alloc_siz +=
-                    (xc->fst_break_add_size + len + 5); /* +len added in the case of extremely long
-                                                           vectors and small break add sizes */
-                xc->vchg_mem = (unsigned char *)realloc(xc->vchg_mem, xc->vchg_alloc_siz);
-                if (FST_UNLIKELY(!xc->vchg_mem)) {
-                    fprintf(stderr,
-                            FST_APIMESS "Could not realloc() in "
-                                        "fstWriterEmitVariableLengthValueChange, exiting.\n");
-                    exit(255);
-                }
-            }
-
-            xc->vchg_siz += fstWriterUint32WithVarint32AndLength(xc,
-                                                                 &vm4ip[2],
-                                                                 xc->tchn_idx - vm4ip[3],
-                                                                 buf,
-                                                                 len); /* do one fwrite op only */
-            vm4ip[3] = xc->tchn_idx;
-            vm4ip[2] = fpos;
-        }
-    }
-}
-
-void fstWriterEmitTimeChange(fstWriterContext *xc, uint64_t tim)
-{
-    unsigned int i;
-    int skip = 0;
-    if (xc) {
-        if (FST_UNLIKELY(xc->is_initial_time)) {
-            if (xc->size_limit_locked) /* this resets xc->is_initial_time to one */
-            {
-                return;
-            }
-
-            if (!xc->valpos_mem) {
-                fstWriterCreateMmaps(xc);
-            }
-
-            skip = 1;
-
-            xc->firsttime = (xc->vc_emitted) ? 0 : tim;
-            xc->curtime = 0;
-            xc->vchg_mem[0] = '!';
-            xc->vchg_siz = 1;
-            fstWriterEmitSectionHeader(xc);
-            for (i = 0; i < xc->maxhandle; i++) {
-                xc->valpos_mem[4 * i + 2] = 0; /* zero out offset val */
-                xc->valpos_mem[4 * i + 3] = 0; /* zero out last time change val */
-            }
-            xc->is_initial_time = 0;
-        } else {
-            if (fstWriterGetFlushContextPendingInternal(xc)) {
-                xc->flush_context_pending = 0;
-                fstWriterFlushContextPrivate(xc);
-                xc->tchn_cnt++;
-                fstWriterVarint(xc->tchn_handle, xc->curtime);
-            }
-        }
-
-        if (!skip) {
-            xc->tchn_idx++;
-        }
-        fstWriterVarint(xc->tchn_handle, tim - xc->curtime);
-        xc->tchn_cnt++;
-        xc->curtime = tim;
-    }
-}
-
-void fstWriterEmitDumpActive(fstWriterContext *xc, int enable)
-{
-    if (xc) {
-        struct fstBlackoutChain *b =
-            (struct fstBlackoutChain *)calloc(1, sizeof(struct fstBlackoutChain));
-
-        b->tim = xc->curtime;
-        b->active = (enable != 0);
-
-        xc->num_blackouts++;
-        if (xc->blackout_curr) {
-            xc->blackout_curr->next = b;
-            xc->blackout_curr = b;
-        } else {
-            xc->blackout_head = b;
-            xc->blackout_curr = b;
-        }
-    }
-}
-
-/***********************/
-/***                 ***/
-/*** reader function ***/
-/***                 ***/
-/***********************/
-
-/*
- * private structs
- */
-static const char *vartypes[] = {
-    "event", "integer",  "parameter", "real",    "real_parameter", "reg",      "supply0", "supply1",
-    "time",  "tri",      "triand",    "trior",   "trireg",         "tri0",     "tri1",    "wand",
-    "wire",  "wor",      "port",      "sparray", "realtime",       "string",   "bit",     "logic",
-    "int",   "shortint", "longint",   "byte",    "enum",           "shortreal"};
-
-static const char *modtypes[] = {"module",
-                                 "task",
-                                 "function",
-                                 "begin",
-                                 "fork",
-                                 "generate",
-                                 "struct",
-                                 "union",
-                                 "class",
-                                 "interface",
-                                 "package",
-                                 "program",
-                                 "vhdl_architecture",
-                                 "vhdl_procedure",
-                                 "vhdl_function",
-                                 "vhdl_record",
-                                 "vhdl_process",
-                                 "vhdl_block",
-                                 "vhdl_for_generate",
-                                 "vhdl_if_generate",
-                                 "vhdl_generate",
-                                 "vhdl_package",
-                                 "sv_array"};
-
-static const char *attrtypes[] = {"misc", "array", "enum", "class"};
-
-static const char *arraytypes[] = {"none", "unpacked", "packed", "sparse"};
-
-static const char *enumvaluetypes[] = {"integer",
-                                       "bit",
-                                       "logic",
-                                       "int",
-                                       "shortint",
-                                       "longint",
-                                       "byte",
-                                       "unsigned_integer",
-                                       "unsigned_bit",
-                                       "unsigned_logic",
-                                       "unsigned_int",
-                                       "unsigned_shortint",
-                                       "unsigned_longint",
-                                       "unsigned_byte"};
-
-static const char *packtypes[] = {"none", "unpacked", "packed", "tagged_packed"};
-
-struct fstCurrHier
-{
-    struct fstCurrHier *prev;
-    void *user_info;
-    int len;
-};
-
-struct fstReaderContext
-{
-    /* common entries */
-
-    FILE *f, *fh;
-
-    uint64_t start_time, end_time;
-    uint64_t mem_used_by_writer;
-    uint64_t scope_count;
-    uint64_t var_count;
-    fstHandle maxhandle;
-    uint64_t num_alias;
-    uint64_t vc_section_count;
-
-    uint32_t *signal_lens; /* maxhandle sized */
-    unsigned char *signal_typs; /* maxhandle sized */
-    unsigned char *process_mask; /* maxhandle-based, bitwise sized */
-    uint32_t longest_signal_value_len; /* longest len value encountered */
-    unsigned char *temp_signal_value_buf; /* malloced for len in longest_signal_value_len */
-
-    signed char timescale;
-    unsigned char filetype;
-
-    unsigned use_vcd_extensions : 1;
-    unsigned double_endian_match : 1;
-    unsigned native_doubles_for_cb : 1;
-    unsigned contains_geom_section : 1;
-    unsigned contains_hier_section : 1; /* valid for hier_pos */
-    unsigned contains_hier_section_lz4duo : 1; /* valid for hier_pos (contains_hier_section_lz4
-                                                  always also set) */
-    unsigned contains_hier_section_lz4 : 1; /* valid for hier_pos */
-    unsigned limit_range_valid : 1; /* valid for limit_range_start, limit_range_end */
-
-    char version[FST_HDR_SIM_VERSION_SIZE + 1];
-    char date[FST_HDR_DATE_SIZE + 1];
-    int64_t timezero;
-
-    char *filename, *filename_unpacked;
-    fst_off_t hier_pos;
-
-    uint32_t num_blackouts;
-    uint64_t *blackout_times;
-    unsigned char *blackout_activity;
-
-    uint64_t limit_range_start, limit_range_end;
-
-    /* entries specific to read value at time functions */
-
-    unsigned rvat_data_valid : 1;
-    uint64_t *rvat_time_table;
-    uint64_t rvat_beg_tim, rvat_end_tim;
-    unsigned char *rvat_frame_data;
-    uint64_t rvat_frame_maxhandle;
-    fst_off_t *rvat_chain_table;
-    uint32_t *rvat_chain_table_lengths;
-    uint64_t rvat_vc_maxhandle;
-    fst_off_t rvat_vc_start;
-    uint32_t *rvat_sig_offs;
-    int rvat_packtype;
-
-    uint32_t rvat_chain_len;
-    unsigned char *rvat_chain_mem;
-    fstHandle rvat_chain_facidx;
-
-    uint32_t rvat_chain_pos_tidx;
-    uint32_t rvat_chain_pos_idx;
-    uint64_t rvat_chain_pos_time;
-    unsigned rvat_chain_pos_valid : 1;
-
-    /* entries specific to hierarchy traversal */
-
-    struct fstHier hier;
-    struct fstCurrHier *curr_hier;
-    fstHandle current_handle;
-    char *curr_flat_hier_nam;
-    int flat_hier_alloc_len;
-    unsigned do_rewind : 1;
-    char str_scope_nam[FST_ID_NAM_SIZ + 1];
-    char str_scope_comp[FST_ID_NAM_SIZ + 1];
-    char *str_scope_attr;
-
-    unsigned fseek_failed : 1;
-
-    /* self-buffered I/O for writes */
-
-#ifndef FST_WRITEX_DISABLE
-    int writex_pos;
-    int writex_fd;
-    unsigned char writex_buf[FST_WRITEX_MAX];
-#endif
-
-    char *f_nam;
-    char *fh_nam;
-};
-
-int fstReaderFseeko(struct fstReaderContext *xc, FILE *stream, fst_off_t offset, int whence)
-{
-    int rc = fseeko(stream, offset, whence);
-
-    if (rc < 0) {
-        xc->fseek_failed = 1;
-#ifdef FST_DEBUG
-        fprintf(stderr, FST_APIMESS "Seek to #%" PRId64 " (whence = %d) failed!\n", offset, whence);
-        perror("Why");
-#endif
-    }
-
-    return (rc);
-}
-
-#ifndef FST_WRITEX_DISABLE
-static void fstWritex(struct fstReaderContext *xc,
-                      void *v,
-                      uint32_t len) /* TALOS-2023-1793: change len to unsigned */
-{
-    unsigned char *s = (unsigned char *)v;
-
-    if (len) {
-        if (len < FST_WRITEX_MAX) {
-            if (xc->writex_pos + len >= FST_WRITEX_MAX) {
-                fstWritex(xc, NULL, 0);
-            }
-
-            memcpy(xc->writex_buf + xc->writex_pos, s, len);
-            xc->writex_pos += len;
-        } else {
-            fstWritex(xc, NULL, 0);
-            if (write(xc->writex_fd, s, len)) {
-            };
-        }
-    } else {
-        if (xc->writex_pos) {
-            if (write(xc->writex_fd, xc->writex_buf, xc->writex_pos)) {
-            };
-            xc->writex_pos = 0;
-        }
-    }
-}
-#endif
-
-/*
- * scope -> flat name handling
- */
-static void fstReaderDeallocateScopeData(fstReaderContext *xc)
-{
-    struct fstCurrHier *chp;
-
-    free(xc->curr_flat_hier_nam);
-    xc->curr_flat_hier_nam = NULL;
-    while (xc->curr_hier) {
-        chp = xc->curr_hier->prev;
-        free(xc->curr_hier);
-        xc->curr_hier = chp;
-    }
-}
-
-const char *fstReaderGetCurrentFlatScope(fstReaderContext *xc)
-{
-    if (xc) {
-        return (xc->curr_flat_hier_nam ? xc->curr_flat_hier_nam : "");
-    } else {
-        return (NULL);
-    }
-}
-
-void *fstReaderGetCurrentScopeUserInfo(fstReaderContext *xc)
-{
-    if (xc) {
-        return (xc->curr_hier ? xc->curr_hier->user_info : NULL);
-    } else {
-        return (NULL);
-    }
-}
-
-const char *fstReaderPopScope(fstReaderContext *xc)
-{
-    if (xc && xc->curr_hier) {
-        struct fstCurrHier *ch = xc->curr_hier;
-        if (xc->curr_hier->prev) {
-            xc->curr_flat_hier_nam[xc->curr_hier->prev->len] = 0;
-        } else {
-            *xc->curr_flat_hier_nam = 0;
-        }
-        xc->curr_hier = xc->curr_hier->prev;
-        free(ch);
-        return (xc->curr_flat_hier_nam ? xc->curr_flat_hier_nam : "");
-    }
-
-    return (NULL);
-}
-
-void fstReaderResetScope(fstReaderContext *xc)
-{
-    if (xc) {
-        while (fstReaderPopScope(xc))
-            ; /* remove any already-built scoping info */
-    }
-}
-
-const char *fstReaderPushScope(fstReaderContext *xc, const char *nam, void *user_info)
-{
-    if (xc) {
-        struct fstCurrHier *ch = (struct fstCurrHier *)malloc(sizeof(struct fstCurrHier));
-        int chl = xc->curr_hier ? xc->curr_hier->len : 0;
-        int len = chl + 1 + strlen(nam);
-        if (len >= xc->flat_hier_alloc_len) {
-            xc->curr_flat_hier_nam = xc->curr_flat_hier_nam
-                                         ? (char *)realloc(xc->curr_flat_hier_nam, len + 1)
-                                         : (char *)malloc(len + 1);
-        }
-
-        if (chl) {
-            xc->curr_flat_hier_nam[chl] = '.';
-            strcpy(xc->curr_flat_hier_nam + chl + 1, nam);
-        } else {
-            strcpy(xc->curr_flat_hier_nam, nam);
-            len--;
-        }
-
-        ch->len = len;
-        ch->prev = xc->curr_hier;
-        ch->user_info = user_info;
-        xc->curr_hier = ch;
-        return (xc->curr_flat_hier_nam);
-    }
-
-    return (NULL);
-}
-
-int fstReaderGetCurrentScopeLen(fstReaderContext *xc)
-{
-    if (xc && xc->curr_hier) {
-        return (xc->curr_hier->len);
-    }
-
-    return (0);
-}
-
-int fstReaderGetFseekFailed(fstReaderContext *xc)
-{
-    if (xc) {
-        return (xc->fseek_failed != 0);
-    }
-
-    return (0);
-}
-
-/*
- * iter mask manipulation util functions
- */
-int fstReaderGetFacProcessMask(fstReaderContext *xc, fstHandle facidx)
-{
-    if (xc) {
-        facidx--;
-        if (facidx < xc->maxhandle) {
-            int process_idx = facidx / 8;
-            int process_bit = facidx & 7;
-
-            return ((xc->process_mask[process_idx] & (1 << process_bit)) != 0);
-        }
-    }
-    return (0);
-}
-
-void fstReaderSetFacProcessMask(fstReaderContext *xc, fstHandle facidx)
-{
-    if (xc) {
-        facidx--;
-        if (facidx < xc->maxhandle) {
-            int idx = facidx / 8;
-            int bitpos = facidx & 7;
-
-            xc->process_mask[idx] |= (1 << bitpos);
-        }
-    }
-}
-
-void fstReaderClrFacProcessMask(fstReaderContext *xc, fstHandle facidx)
-{
-    if (xc) {
-        facidx--;
-        if (facidx < xc->maxhandle) {
-            int idx = facidx / 8;
-            int bitpos = facidx & 7;
-
-            xc->process_mask[idx] &= (~(1 << bitpos));
-        }
-    }
-}
-
-void fstReaderSetFacProcessMaskAll(fstReaderContext *xc)
-{
-    if (xc) {
-        memset(xc->process_mask, 0xff, (xc->maxhandle + 7) / 8);
-    }
-}
-
-void fstReaderClrFacProcessMaskAll(fstReaderContext *xc)
-{
-    if (xc) {
-        memset(xc->process_mask, 0x00, (xc->maxhandle + 7) / 8);
-    }
-}
-
-/*
- * various utility read/write functions
- */
-signed char fstReaderGetTimescale(fstReaderContext *xc)
-{
-    return (xc ? xc->timescale : 0);
-}
-
-uint64_t fstReaderGetStartTime(fstReaderContext *xc)
-{
-    return (xc ? xc->start_time : 0);
-}
-
-uint64_t fstReaderGetEndTime(fstReaderContext *xc)
-{
-    return (xc ? xc->end_time : 0);
-}
-
-uint64_t fstReaderGetMemoryUsedByWriter(fstReaderContext *xc)
-{
-    return (xc ? xc->mem_used_by_writer : 0);
-}
-
-uint64_t fstReaderGetScopeCount(fstReaderContext *xc)
-{
-    return (xc ? xc->scope_count : 0);
-}
-
-uint64_t fstReaderGetVarCount(fstReaderContext *xc)
-{
-    return (xc ? xc->var_count : 0);
-}
-
-fstHandle fstReaderGetMaxHandle(fstReaderContext *xc)
-{
-    return (xc ? xc->maxhandle : 0);
-}
-
-uint64_t fstReaderGetAliasCount(fstReaderContext *xc)
-{
-    return (xc ? xc->num_alias : 0);
-}
-
-uint64_t fstReaderGetValueChangeSectionCount(fstReaderContext *xc)
-{
-    return (xc ? xc->vc_section_count : 0);
-}
-
-int fstReaderGetDoubleEndianMatchState(fstReaderContext *xc)
-{
-    return (xc ? xc->double_endian_match : 0);
-}
-
-const char *fstReaderGetVersionString(fstReaderContext *xc)
-{
-    return (xc ? xc->version : NULL);
-}
-
-const char *fstReaderGetDateString(fstReaderContext *xc)
-{
-    return (xc ? xc->date : NULL);
-}
-
-int fstReaderGetFileType(fstReaderContext *xc)
-{
-    return (xc ? (int)xc->filetype : (int)FST_FT_VERILOG);
-}
-
-int64_t fstReaderGetTimezero(fstReaderContext *xc)
-{
-    return (xc ? xc->timezero : 0);
-}
-
-uint32_t fstReaderGetNumberDumpActivityChanges(fstReaderContext *xc)
-{
-    return (xc ? xc->num_blackouts : 0);
-}
-
-uint64_t fstReaderGetDumpActivityChangeTime(fstReaderContext *xc, uint32_t idx)
-{
-    if (xc && (idx < xc->num_blackouts) && (xc->blackout_times)) {
-        return (xc->blackout_times[idx]);
-    } else {
-        return (0);
-    }
-}
-
-unsigned char fstReaderGetDumpActivityChangeValue(fstReaderContext *xc, uint32_t idx)
-{
-    if (xc && (idx < xc->num_blackouts) && (xc->blackout_activity)) {
-        return (xc->blackout_activity[idx]);
-    } else {
-        return (0);
-    }
-}
-
-void fstReaderSetLimitTimeRange(fstReaderContext *xc, uint64_t start_time, uint64_t end_time)
-{
-    if (xc) {
-        xc->limit_range_valid = 1;
-        xc->limit_range_start = start_time;
-        xc->limit_range_end = end_time;
-    }
-}
-
-void fstReaderSetUnlimitedTimeRange(fstReaderContext *xc)
-{
-    if (xc) {
-        xc->limit_range_valid = 0;
-    }
-}
-
-void fstReaderSetVcdExtensions(fstReaderContext *xc, int enable)
-{
-    if (xc) {
-        xc->use_vcd_extensions = (enable != 0);
-    }
-}
-
-void fstReaderIterBlocksSetNativeDoublesOnCallback(fstReaderContext *xc, int enable)
-{
-    if (xc) {
-        xc->native_doubles_for_cb = (enable != 0);
-    }
-}
-
-/*
- * hierarchy processing
- */
-static void fstVcdID(char *buf, unsigned int value)
-{
-    char *pnt = buf;
-
-    /* zero is illegal for a value...it is assumed they start at one */
-    while (value) {
-        value--;
-        *(pnt++) = (char)('!' + value % 94);
-        value = value / 94;
-    }
-
-    *pnt = 0;
-}
-
-static int fstVcdIDForFwrite(char *buf, unsigned int value)
-{
-    char *pnt = buf;
-    int len = 0;
-
-    /* zero is illegal for a value...it is assumed they start at one */
-    while (value && len < 14) {
-        value--;
-        ++len;
-        *(pnt++) = (char)('!' + value % 94);
-        value = value / 94;
-    }
-
-    return len;
-}
-
-static int fstReaderRecreateHierFile(struct fstReaderContext *xc)
-{
-    int pass_status = 1;
-
-    if (!xc->fh) {
-        fst_off_t offs_cache = ftello(xc->f);
-        int fnam_len = strlen(xc->filename) + 6 + 16 + 32 + 1;
-        char *fnam = (char *)malloc(fnam_len);
-        unsigned char *mem = (unsigned char *)malloc(FST_GZIO_LEN);
-        fst_off_t hl, uclen;
-        fst_off_t clen = 0;
-        gzFile zhandle = NULL;
-        int zfd;
-        int htyp = FST_BL_SKIP;
-
-        /* can't handle both set at once should never happen in a real file */
-        if (!xc->contains_hier_section_lz4 && xc->contains_hier_section) {
-            htyp = FST_BL_HIER;
-        } else if (xc->contains_hier_section_lz4 && !xc->contains_hier_section) {
-            htyp = xc->contains_hier_section_lz4duo ? FST_BL_HIER_LZ4DUO : FST_BL_HIER_LZ4;
-        }
-
-        snprintf(fnam, fnam_len, "%s.hier_%d_%p", xc->filename, getpid(), (void *)xc);
-        fstReaderFseeko(xc, xc->f, xc->hier_pos, SEEK_SET);
-        uclen = fstReaderUint64(xc->f);
-#ifndef __MINGW32__
-        fflush(xc->f);
-#endif
-        if (htyp == FST_BL_HIER) {
-            fstReaderFseeko(xc, xc->f, xc->hier_pos, SEEK_SET);
-            uclen = fstReaderUint64(xc->f);
-#ifndef __MINGW32__
-            fflush(xc->f);
-#endif
-            zfd = dup(fileno(xc->f));
-            zhandle = gzdopen(zfd, "rb");
-            if (!zhandle) {
-                close(zfd);
-                free(mem);
-                free(fnam);
-                return (0);
-            }
-        } else if ((htyp == FST_BL_HIER_LZ4) || (htyp == FST_BL_HIER_LZ4DUO)) {
-            fstReaderFseeko(xc, xc->f, xc->hier_pos - 8, SEEK_SET); /* get section len */
-            clen = fstReaderUint64(xc->f) - 16;
-            uclen = fstReaderUint64(xc->f);
-#ifndef __MINGW32__
-            fflush(xc->f);
-#endif
-        }
-
-#ifndef __MINGW32__
-        xc->fh = fopen(fnam, "w+b");
-        if (!xc->fh)
-#endif
-        {
-            xc->fh = tmpfile_open(&xc->fh_nam);
-            free(fnam);
-            fnam = NULL;
-            if (!xc->fh) {
-                tmpfile_close(&xc->fh, &xc->fh_nam);
-                free(mem);
-                return (0);
-            }
-        }
-
-#ifndef __MINGW32__
-        if (fnam)
-            unlink(fnam);
-#endif
-
-        if (htyp == FST_BL_HIER) {
-            for (hl = 0; hl < uclen; hl += FST_GZIO_LEN) {
-                size_t len = ((uclen - hl) > FST_GZIO_LEN) ? FST_GZIO_LEN : (uclen - hl);
-                size_t gzreadlen = gzread(zhandle, mem, len); /* rc should equal len... */
-                size_t fwlen;
-
-                if (gzreadlen != len) {
-                    pass_status = 0;
-                    break;
-                }
-
-                fwlen = fstFwrite(mem, len, 1, xc->fh);
-                if (fwlen != 1) {
-                    pass_status = 0;
-                    break;
-                }
-            }
-            gzclose(zhandle);
-        } else if (htyp == FST_BL_HIER_LZ4DUO) {
-            unsigned char *lz4_cmem = (unsigned char *)malloc(clen);
-            unsigned char *lz4_ucmem = (unsigned char *)malloc(uclen);
-            unsigned char *lz4_ucmem2;
-            uint64_t uclen2;
-            int skiplen2 = 0;
-
-            fstFread(lz4_cmem, clen, 1, xc->f);
-
-            uclen2 = fstGetVarint64(lz4_cmem, &skiplen2);
-            lz4_ucmem2 = (unsigned char *)malloc(uclen2);
-            pass_status =
-                (uclen2 == (uint64_t)LZ4_decompress_safe_partial((char *)lz4_cmem + skiplen2,
-                                                                 (char *)lz4_ucmem2,
-                                                                 clen - skiplen2,
-                                                                 uclen2,
-                                                                 uclen2));
-            if (pass_status) {
-                pass_status = (uclen == LZ4_decompress_safe_partial((char *)lz4_ucmem2,
-                                                                    (char *)lz4_ucmem,
-                                                                    uclen2,
-                                                                    uclen,
-                                                                    uclen));
-
-                if (fstFwrite(lz4_ucmem, uclen, 1, xc->fh) != 1) {
-                    pass_status = 0;
-                }
-            }
-
-            free(lz4_ucmem2);
-            free(lz4_ucmem);
-            free(lz4_cmem);
-        } else if (htyp == FST_BL_HIER_LZ4) {
-            unsigned char *lz4_cmem = (unsigned char *)malloc(clen);
-            unsigned char *lz4_ucmem = (unsigned char *)malloc(uclen);
-
-            fstFread(lz4_cmem, clen, 1, xc->f);
-            pass_status = (uclen == LZ4_decompress_safe_partial((char *)lz4_cmem,
-                                                                (char *)lz4_ucmem,
-                                                                clen,
-                                                                uclen,
-                                                                uclen));
-
-            if (fstFwrite(lz4_ucmem, uclen, 1, xc->fh) != 1) {
-                pass_status = 0;
-            }
-
-            free(lz4_ucmem);
-            free(lz4_cmem);
-        } else /* FST_BL_SKIP */
-        {
-            pass_status = 0;
-            if (xc->fh) {
-                fclose(xc->fh);
-                xc->fh =
-                    NULL; /* needed in case .hier file is missing and there are no hier sections */
-            }
-        }
-
-        free(mem);
-        free(fnam);
-
-        fstReaderFseeko(xc, xc->f, offs_cache, SEEK_SET);
-    }
-
-    return (pass_status);
-}
-
-int fstReaderIterateHierRewind(fstReaderContext *xc)
-{
-    int pass_status = 0;
-
-    if (xc) {
-        pass_status = 1;
-        if (!xc->fh) {
-            pass_status = fstReaderRecreateHierFile(xc);
-        }
-
-        xc->do_rewind = 1;
-    }
-
-    return (pass_status);
-}
-
-struct fstHier *fstReaderIterateHier(fstReaderContext *xc)
-{
-    int isfeof;
-    fstHandle alias;
-    char *pnt;
-    int ch;
-    int unnamed_scope_idx = 0;
-
-    if (!xc)
-        return (NULL);
-
-    if (!xc->fh) {
-        if (!fstReaderRecreateHierFile(xc)) {
-            return (NULL);
-        }
-    }
-
-    if (xc->do_rewind) {
-        xc->do_rewind = 0;
-        xc->current_handle = 0;
-        fstReaderFseeko(xc, xc->fh, 0, SEEK_SET);
-        clearerr(xc->fh);
-    }
-
-    if (!(isfeof = feof(xc->fh))) {
-        int tag = fgetc(xc->fh);
-        int cl;
-        switch (tag) {
-            case FST_ST_VCD_SCOPE:
-                xc->hier.htyp = FST_HT_SCOPE;
-                xc->hier.u.scope.typ = fgetc(xc->fh);
-                xc->hier.u.scope.name = pnt = xc->str_scope_nam;
-                cl = 0;
-                while ((ch = fgetc(xc->fh))) {
-                    if (cl < FST_ID_NAM_SIZ) {
-                        pnt[cl++] = ch;
-                    }
-                }; /* scopename */
-                if (!cl) {
-                    cl = snprintf(pnt, FST_ID_NAM_SIZ, "$unnamed_scope_%d", unnamed_scope_idx++);
-                }
-                pnt[cl] = 0;
-                xc->hier.u.scope.name_length = cl;
-
-                xc->hier.u.scope.component = pnt = xc->str_scope_comp;
-                cl = 0;
-                while ((ch = fgetc(xc->fh))) {
-                    if (cl < FST_ID_NAM_SIZ) {
-                        pnt[cl++] = ch;
-                    }
-                }; /* scopecomp */
-                pnt[cl] = 0;
-                xc->hier.u.scope.component_length = cl;
-                break;
-
-            case FST_ST_VCD_UPSCOPE:
-                xc->hier.htyp = FST_HT_UPSCOPE;
-                break;
-
-            case FST_ST_GEN_ATTRBEGIN:
-                xc->hier.htyp = FST_HT_ATTRBEGIN;
-                xc->hier.u.attr.typ = fgetc(xc->fh);
-                xc->hier.u.attr.subtype = fgetc(xc->fh);
-                if (!xc->str_scope_attr) {
-                    xc->str_scope_attr = (char *)calloc(1, FST_ID_NAM_ATTR_SIZ + 1);
-                }
-                xc->hier.u.attr.name = pnt = xc->str_scope_attr;
-                cl = 0;
-                while ((ch = fgetc(xc->fh))) {
-                    if (cl < FST_ID_NAM_ATTR_SIZ) {
-                        pnt[cl++] = ch;
-                    }
-                }; /* attrname */
-                pnt[cl] = 0;
-                xc->hier.u.attr.name_length = cl;
-
-                xc->hier.u.attr.arg = fstReaderVarint64(xc->fh);
-
-                if (xc->hier.u.attr.typ == FST_AT_MISC) {
-                    if ((xc->hier.u.attr.subtype == FST_MT_SOURCESTEM) ||
-                        (xc->hier.u.attr.subtype == FST_MT_SOURCEISTEM)) {
-                        int sidx_skiplen_dummy = 0;
-                        xc->hier.u.attr.arg_from_name =
-                            fstGetVarint64((unsigned char *)xc->str_scope_attr,
-                                           &sidx_skiplen_dummy);
-                    }
-                }
-                break;
-
-            case FST_ST_GEN_ATTREND:
-                xc->hier.htyp = FST_HT_ATTREND;
-                break;
-
-            case FST_VT_VCD_EVENT:
-            case FST_VT_VCD_INTEGER:
-            case FST_VT_VCD_PARAMETER:
-            case FST_VT_VCD_REAL:
-            case FST_VT_VCD_REAL_PARAMETER:
-            case FST_VT_VCD_REG:
-            case FST_VT_VCD_SUPPLY0:
-            case FST_VT_VCD_SUPPLY1:
-            case FST_VT_VCD_TIME:
-            case FST_VT_VCD_TRI:
-            case FST_VT_VCD_TRIAND:
-            case FST_VT_VCD_TRIOR:
-            case FST_VT_VCD_TRIREG:
-            case FST_VT_VCD_TRI0:
-            case FST_VT_VCD_TRI1:
-            case FST_VT_VCD_WAND:
-            case FST_VT_VCD_WIRE:
-            case FST_VT_VCD_WOR:
-            case FST_VT_VCD_PORT:
-            case FST_VT_VCD_SPARRAY:
-            case FST_VT_VCD_REALTIME:
-            case FST_VT_GEN_STRING:
-            case FST_VT_SV_BIT:
-            case FST_VT_SV_LOGIC:
-            case FST_VT_SV_INT:
-            case FST_VT_SV_SHORTINT:
-            case FST_VT_SV_LONGINT:
-            case FST_VT_SV_BYTE:
-            case FST_VT_SV_ENUM:
-            case FST_VT_SV_SHORTREAL:
-                xc->hier.htyp = FST_HT_VAR;
-                xc->hier.u.var.svt_workspace = FST_SVT_NONE;
-                xc->hier.u.var.sdt_workspace = FST_SDT_NONE;
-                xc->hier.u.var.sxt_workspace = 0;
-                xc->hier.u.var.typ = tag;
-                xc->hier.u.var.direction = fgetc(xc->fh);
-                xc->hier.u.var.name = pnt = xc->str_scope_nam;
-                cl = 0;
-                while ((ch = fgetc(xc->fh))) {
-                    if (cl < FST_ID_NAM_SIZ) {
-                        pnt[cl++] = ch;
-                    }
-                }; /* varname */
-                pnt[cl] = 0;
-                xc->hier.u.var.name_length = cl;
-                xc->hier.u.var.length = fstReaderVarint32(xc->fh);
-                if (tag == FST_VT_VCD_PORT) {
-                    xc->hier.u.var.length -= 2; /* removal of delimiting spaces */
-                    xc->hier.u.var.length /= 3; /* port -> signal size adjust */
-                }
-
-                alias = fstReaderVarint32(xc->fh);
-
-                if (!alias) {
-                    xc->current_handle++;
-                    xc->hier.u.var.handle = xc->current_handle;
-                    xc->hier.u.var.is_alias = 0;
-                } else {
-                    xc->hier.u.var.handle = alias;
-                    xc->hier.u.var.is_alias = 1;
-                }
-
-                break;
-
-            default:
-                isfeof = 1;
-                break;
-        }
-    }
-
-    return (!isfeof ? &xc->hier : NULL);
-}
-
-int fstReaderProcessHier(fstReaderContext *xc, FILE *fv)
-{
-    char *str;
-    char *pnt;
-    int ch, scopetype;
-    int vartype;
-    uint32_t len, alias;
-    /* uint32_t maxvalpos=0; */
-    unsigned int num_signal_dyn = 65536;
-    int attrtype, subtype;
-    uint64_t attrarg;
-    fstHandle maxhandle_scanbuild;
-    int cl;
-    int unnamed_scope_idx = 0;
-
-    if (!xc)
-        return (0);
-
-    xc->longest_signal_value_len =
-        32; /* arbitrarily set at 32...this is much longer than an expanded double */
-
-    if (!xc->fh) {
-        if (!fstReaderRecreateHierFile(xc)) {
-            return (0);
-        }
-    }
-
-    str = (char *)malloc(FST_ID_NAM_ATTR_SIZ + 1);
-
-    if (fv) {
-        char time_dimension[2] = {0, 0};
-        int time_scale = 1;
-
-        fprintf(fv, "$date\n\t%s\n$end\n", xc->date);
-        fprintf(fv, "$version\n\t%s\n$end\n", xc->version);
-        if (xc->timezero)
-            fprintf(fv, "$timezero\n\t%" PRId64 "\n$end\n", xc->timezero);
-
-        switch (xc->timescale) {
-            case 2:
-                time_scale = 100;
-                time_dimension[0] = 0;
-                break;
-            case 1:
-                time_scale = 10; /* fallthrough */
-            case 0:
-                time_dimension[0] = 0;
-                break;
-
-            case -1:
-                time_scale = 100;
-                time_dimension[0] = 'm';
-                break;
-            case -2:
-                time_scale = 10; /* fallthrough */
-            case -3:
-                time_dimension[0] = 'm';
-                break;
-
-            case -4:
-                time_scale = 100;
-                time_dimension[0] = 'u';
-                break;
-            case -5:
-                time_scale = 10; /* fallthrough */
-            case -6:
-                time_dimension[0] = 'u';
-                break;
-
-            case -10:
-                time_scale = 100;
-                time_dimension[0] = 'p';
-                break;
-            case -11:
-                time_scale = 10; /* fallthrough */
-            case -12:
-                time_dimension[0] = 'p';
-                break;
-
-            case -13:
-                time_scale = 100;
-                time_dimension[0] = 'f';
-                break;
-            case -14:
-                time_scale = 10; /* fallthrough */
-            case -15:
-                time_dimension[0] = 'f';
-                break;
-
-            case -16:
-                time_scale = 100;
-                time_dimension[0] = 'a';
-                break;
-            case -17:
-                time_scale = 10; /* fallthrough */
-            case -18:
-                time_dimension[0] = 'a';
-                break;
-
-            case -19:
-                time_scale = 100;
-                time_dimension[0] = 'z';
-                break;
-            case -20:
-                time_scale = 10; /* fallthrough */
-            case -21:
-                time_dimension[0] = 'z';
-                break;
-
-            case -7:
-                time_scale = 100;
-                time_dimension[0] = 'n';
-                break;
-            case -8:
-                time_scale = 10; /* fallthrough */
-            case -9:
-            default:
-                time_dimension[0] = 'n';
-                break;
-        }
-
-        if (fv)
-            fprintf(fv, "$timescale\n\t%d%ss\n$end\n", time_scale, time_dimension);
-    }
-
-    xc->maxhandle = 0;
-    xc->num_alias = 0;
-
-    free(xc->signal_lens);
-    xc->signal_lens = (uint32_t *)malloc(num_signal_dyn * sizeof(uint32_t));
-
-    free(xc->signal_typs);
-    xc->signal_typs = (unsigned char *)malloc(num_signal_dyn * sizeof(unsigned char));
-
-    fstReaderFseeko(xc, xc->fh, 0, SEEK_SET);
-    while (!feof(xc->fh)) {
-        int tag = fgetc(xc->fh);
-        switch (tag) {
-            case FST_ST_VCD_SCOPE:
-                scopetype = fgetc(xc->fh);
-                if ((scopetype < FST_ST_MIN) || (scopetype > FST_ST_MAX))
-                    scopetype = FST_ST_VCD_MODULE;
-                pnt = str;
-                cl = 0;
-                while ((ch = fgetc(xc->fh))) {
-                    if (cl < FST_ID_NAM_ATTR_SIZ) {
-                        pnt[cl++] = ch;
-                    }
-                }; /* scopename */
-                if (!cl) {
-                    cl = snprintf(pnt, FST_ID_NAM_SIZ, "$unnamed_scope_%d", unnamed_scope_idx++);
-                }
-                pnt[cl] = 0;
-                while (fgetc(xc->fh)) {
-                }; /* scopecomp */
-
-                if (fv)
-                    fprintf(fv, "$scope %s %s $end\n", modtypes[scopetype], str);
-                break;
-
-            case FST_ST_VCD_UPSCOPE:
-                if (fv)
-                    fprintf(fv, "$upscope $end\n");
-                break;
-
-            case FST_ST_GEN_ATTRBEGIN:
-                attrtype = fgetc(xc->fh);
-                subtype = fgetc(xc->fh);
-                pnt = str;
-                cl = 0;
-                while ((ch = fgetc(xc->fh))) {
-                    if (cl < FST_ID_NAM_ATTR_SIZ) {
-                        pnt[cl++] = ch;
-                    }
-                }; /* attrname */
-                pnt[cl] = 0;
-
-                if (!str[0]) {
-                    strcpy(str, "\"\"");
-                }
-
-                attrarg = fstReaderVarint64(xc->fh);
-
-                if (fv && xc->use_vcd_extensions) {
-                    switch (attrtype) {
-                        case FST_AT_ARRAY:
-                            if ((subtype < FST_AR_NONE) || (subtype > FST_AR_MAX))
-                                subtype = FST_AR_NONE;
-                            fprintf(fv,
-                                    "$attrbegin %s %s %s %" PRId64 " $end\n",
-                                    attrtypes[attrtype],
-                                    arraytypes[subtype],
-                                    str,
-                                    attrarg);
-                            break;
-                        case FST_AT_ENUM:
-                            if ((subtype < FST_EV_SV_INTEGER) || (subtype > FST_EV_MAX))
-                                subtype = FST_EV_SV_INTEGER;
-                            fprintf(fv,
-                                    "$attrbegin %s %s %s %" PRId64 " $end\n",
-                                    attrtypes[attrtype],
-                                    enumvaluetypes[subtype],
-                                    str,
-                                    attrarg);
-                            break;
-                        case FST_AT_PACK:
-                            if ((subtype < FST_PT_NONE) || (subtype > FST_PT_MAX))
-                                subtype = FST_PT_NONE;
-                            fprintf(fv,
-                                    "$attrbegin %s %s %s %" PRId64 " $end\n",
-                                    attrtypes[attrtype],
-                                    packtypes[subtype],
-                                    str,
-                                    attrarg);
-                            break;
-                        case FST_AT_MISC:
-                        default:
-                            attrtype = FST_AT_MISC;
-                            if (subtype == FST_MT_COMMENT) {
-                                fprintf(fv, "$comment\n\t%s\n$end\n", str);
-                            } else {
-                                if ((subtype == FST_MT_SOURCESTEM) ||
-                                    (subtype == FST_MT_SOURCEISTEM)) {
-                                    int sidx_skiplen_dummy = 0;
-                                    uint64_t sidx =
-                                        fstGetVarint64((unsigned char *)str, &sidx_skiplen_dummy);
-
-                                    fprintf(fv,
-                                            "$attrbegin %s %02x %" PRId64 " %" PRId64 " $end\n",
-                                            attrtypes[attrtype],
-                                            subtype,
-                                            sidx,
-                                            attrarg);
-                                } else {
-                                    fprintf(fv,
-                                            "$attrbegin %s %02x %s %" PRId64 " $end\n",
-                                            attrtypes[attrtype],
-                                            subtype,
-                                            str,
-                                            attrarg);
-                                }
-                            }
-                            break;
-                    }
-                }
-                break;
-
-            case FST_ST_GEN_ATTREND:
-                if (fv && xc->use_vcd_extensions)
-                    fprintf(fv, "$attrend $end\n");
-                break;
-
-            case FST_VT_VCD_EVENT:
-            case FST_VT_VCD_INTEGER:
-            case FST_VT_VCD_PARAMETER:
-            case FST_VT_VCD_REAL:
-            case FST_VT_VCD_REAL_PARAMETER:
-            case FST_VT_VCD_REG:
-            case FST_VT_VCD_SUPPLY0:
-            case FST_VT_VCD_SUPPLY1:
-            case FST_VT_VCD_TIME:
-            case FST_VT_VCD_TRI:
-            case FST_VT_VCD_TRIAND:
-            case FST_VT_VCD_TRIOR:
-            case FST_VT_VCD_TRIREG:
-            case FST_VT_VCD_TRI0:
-            case FST_VT_VCD_TRI1:
-            case FST_VT_VCD_WAND:
-            case FST_VT_VCD_WIRE:
-            case FST_VT_VCD_WOR:
-            case FST_VT_VCD_PORT:
-            case FST_VT_VCD_SPARRAY:
-            case FST_VT_VCD_REALTIME:
-            case FST_VT_GEN_STRING:
-            case FST_VT_SV_BIT:
-            case FST_VT_SV_LOGIC:
-            case FST_VT_SV_INT:
-            case FST_VT_SV_SHORTINT:
-            case FST_VT_SV_LONGINT:
-            case FST_VT_SV_BYTE:
-            case FST_VT_SV_ENUM:
-            case FST_VT_SV_SHORTREAL:
-                vartype = tag;
-                /* vardir = */ fgetc(
-                    xc->fh); /* unused in VCD reader, but need to advance read pointer */
-                pnt = str;
-                cl = 0;
-                while ((ch = fgetc(xc->fh))) {
-                    if (cl < FST_ID_NAM_ATTR_SIZ) {
-                        pnt[cl++] = ch;
-                    }
-                }; /* varname */
-                pnt[cl] = 0;
-                len = fstReaderVarint32(xc->fh);
-                alias = fstReaderVarint32(xc->fh);
-
-                if (!alias) {
-                    if (xc->maxhandle == num_signal_dyn) {
-                        num_signal_dyn *= 2;
-                        xc->signal_lens =
-                            (uint32_t *)realloc(xc->signal_lens, num_signal_dyn * sizeof(uint32_t));
-                        xc->signal_typs =
-                            (unsigned char *)realloc(xc->signal_typs,
-                                                     num_signal_dyn * sizeof(unsigned char));
-                    }
-                    xc->signal_lens[xc->maxhandle] = len;
-                    xc->signal_typs[xc->maxhandle] = vartype;
-
-                    /* maxvalpos+=len; */
-                    if (len > xc->longest_signal_value_len) {
-                        xc->longest_signal_value_len = len;
-                    }
-
-                    if ((vartype == FST_VT_VCD_REAL) || (vartype == FST_VT_VCD_REAL_PARAMETER) ||
-                        (vartype == FST_VT_VCD_REALTIME) || (vartype == FST_VT_SV_SHORTREAL)) {
-                        len = (vartype != FST_VT_SV_SHORTREAL) ? 64 : 32;
-                        xc->signal_typs[xc->maxhandle] = FST_VT_VCD_REAL;
-                    }
-                    if (fv) {
-                        char vcdid_buf[16];
-                        uint32_t modlen = (vartype != FST_VT_VCD_PORT) ? len : ((len - 2) / 3);
-                        fstVcdID(vcdid_buf, xc->maxhandle + 1);
-                        fprintf(fv,
-                                "$var %s %" PRIu32 " %s %s $end\n",
-                                vartypes[vartype],
-                                modlen,
-                                vcdid_buf,
-                                str);
-                    }
-                    xc->maxhandle++;
-                } else {
-                    if ((vartype == FST_VT_VCD_REAL) || (vartype == FST_VT_VCD_REAL_PARAMETER) ||
-                        (vartype == FST_VT_VCD_REALTIME) || (vartype == FST_VT_SV_SHORTREAL)) {
-                        len = (vartype != FST_VT_SV_SHORTREAL) ? 64 : 32;
-                        xc->signal_typs[xc->maxhandle] = FST_VT_VCD_REAL;
-                    }
-                    if (fv) {
-                        char vcdid_buf[16];
-                        uint32_t modlen = (vartype != FST_VT_VCD_PORT) ? len : ((len - 2) / 3);
-                        fstVcdID(vcdid_buf, alias);
-                        fprintf(fv,
-                                "$var %s %" PRIu32 " %s %s $end\n",
-                                vartypes[vartype],
-                                modlen,
-                                vcdid_buf,
-                                str);
-                    }
-                    xc->num_alias++;
-                }
-
-                break;
-
-            default:
-                break;
-        }
-    }
-    if (fv)
-        fprintf(fv, "$enddefinitions $end\n");
-
-    maxhandle_scanbuild =
-        xc->maxhandle
-            ? xc->maxhandle
-            : 1; /*scan-build warning suppression, in reality we have at least one signal */
-
-    xc->signal_lens = (uint32_t *)realloc(xc->signal_lens, maxhandle_scanbuild * sizeof(uint32_t));
-    xc->signal_typs =
-        (unsigned char *)realloc(xc->signal_typs, maxhandle_scanbuild * sizeof(unsigned char));
-
-    free(xc->process_mask);
-    xc->process_mask = (unsigned char *)calloc(1, (maxhandle_scanbuild + 7) / 8);
-
-    free(xc->temp_signal_value_buf);
-    xc->temp_signal_value_buf = (unsigned char *)malloc(xc->longest_signal_value_len + 1);
-
-    xc->var_count = xc->maxhandle + xc->num_alias;
-
-    free(str);
-    return (1);
-}
-
-/*
- * reader file open/close functions
- */
-int fstReaderInit(struct fstReaderContext *xc)
-{
-    fst_off_t blkpos = 0;
-    fst_off_t endfile;
-    uint64_t seclen;
-    int sectype;
-    uint64_t vc_section_count_actual = 0;
-    int hdr_incomplete = 0;
-    int hdr_seen = 0;
-    int gzread_pass_status = 1;
-
-    sectype = fgetc(xc->f);
-    if (sectype == FST_BL_ZWRAPPER) {
-        FILE *fcomp;
-        fst_off_t offpnt, uclen;
-        char gz_membuf[FST_GZIO_LEN];
-        gzFile zhandle;
-        int zfd;
-        int flen = strlen(xc->filename);
-        char *hf;
-        int hf_len;
-
-        seclen = fstReaderUint64(xc->f);
-        uclen = fstReaderUint64(xc->f);
-
-        if (!seclen)
-            return (0); /* not finished compressing, this is a failed read */
-
-        hf_len = flen + 16 + 32 + 1;
-        hf = (char *)calloc(1, hf_len);
-
-        snprintf(hf, hf_len, "%s.upk_%d_%p", xc->filename, getpid(), (void *)xc);
-        fcomp = fopen(hf, "w+b");
-        if (!fcomp) {
-            fcomp = tmpfile_open(&xc->f_nam);
-            free(hf);
-            hf = NULL;
-            if (!fcomp) {
-                tmpfile_close(&fcomp, &xc->f_nam);
-                return (0);
-            }
-        }
-
-#if defined(FST_UNBUFFERED_IO)
-        setvbuf(fcomp,
-                (char *)NULL,
-                _IONBF,
-                0); /* keeps gzip from acting weird in tandem with fopen */
-#endif
-
-#ifdef __MINGW32__
-        xc->filename_unpacked = hf;
-#else
-        if (hf) {
-            unlink(hf);
-            free(hf);
-        }
-#endif
-
-        fstReaderFseeko(xc, xc->f, FST_ZWRAPPER_HDR_SIZE, SEEK_SET);
-#ifndef __MINGW32__
-        fflush(xc->f);
-#else
-        /* Windows UCRT runtime library reads one byte ahead in the file
-           even with buffering disabled and does not synchronise the
-           file position after fseek. */
-        _lseek(fileno(xc->f), FST_ZWRAPPER_HDR_SIZE, SEEK_SET);
-#endif
-
-        zfd = dup(fileno(xc->f));
-        zhandle = gzdopen(zfd, "rb");
-        if (zhandle) {
-            for (offpnt = 0; offpnt < uclen; offpnt += FST_GZIO_LEN) {
-                size_t this_len =
-                    ((uclen - offpnt) > FST_GZIO_LEN) ? FST_GZIO_LEN : (uclen - offpnt);
-                size_t gzreadlen = gzread(zhandle, gz_membuf, this_len);
-                size_t fwlen;
-
-                if (gzreadlen != this_len) {
-                    gzread_pass_status = 0;
-                    break;
-                }
-                fwlen = fstFwrite(gz_membuf, this_len, 1, fcomp);
-                if (fwlen != 1) {
-                    gzread_pass_status = 0;
-                    break;
-                }
-            }
-            gzclose(zhandle);
-        } else {
-            close(zfd);
-        }
-        fflush(fcomp);
-        fclose(xc->f);
-        xc->f = fcomp;
-    }
-
-    if (gzread_pass_status) {
-        fstReaderFseeko(xc, xc->f, 0, SEEK_END);
-        endfile = ftello(xc->f);
-
-        while (blkpos < endfile) {
-            fstReaderFseeko(xc, xc->f, blkpos, SEEK_SET);
-
-            sectype = fgetc(xc->f);
-            seclen = fstReaderUint64(xc->f);
-
-            if (sectype == EOF) {
-                break;
-            }
-
-            if ((hdr_incomplete) && (!seclen)) {
-                break;
-            }
-
-            if (!hdr_seen && (sectype != FST_BL_HDR)) {
-                break;
-            }
-
-            blkpos++;
-            if (sectype == FST_BL_HDR) {
-                if (!hdr_seen) {
-                    int ch;
-                    double dcheck;
-
-                    xc->start_time = fstReaderUint64(xc->f);
-                    xc->end_time = fstReaderUint64(xc->f);
-
-                    hdr_incomplete = (xc->start_time == 0) && (xc->end_time == 0);
-
-                    fstFread(&dcheck, 8, 1, xc->f);
-                    xc->double_endian_match = (dcheck == FST_DOUBLE_ENDTEST);
-                    if (!xc->double_endian_match) {
-                        union
-                        {
-                            unsigned char rvs_buf[8];
-                            double d;
-                        } vu;
-
-                        unsigned char *dcheck_alias = (unsigned char *)&dcheck;
-                        int rvs_idx;
-
-                        for (rvs_idx = 0; rvs_idx < 8; rvs_idx++) {
-                            vu.rvs_buf[rvs_idx] = dcheck_alias[7 - rvs_idx];
-                        }
-                        if (vu.d != FST_DOUBLE_ENDTEST) {
-                            break; /* either corrupt file or wrong architecture (offset +33 also
-                                      functions as matchword) */
-                        }
-                    }
-
-                    hdr_seen = 1;
-
-                    xc->mem_used_by_writer = fstReaderUint64(xc->f);
-                    xc->scope_count = fstReaderUint64(xc->f);
-                    xc->var_count = fstReaderUint64(xc->f);
-                    xc->maxhandle = fstReaderUint64(xc->f);
-                    xc->num_alias = xc->var_count - xc->maxhandle;
-                    xc->vc_section_count = fstReaderUint64(xc->f);
-                    ch = fgetc(xc->f);
-                    xc->timescale = (signed char)ch;
-                    fstFread(xc->version, FST_HDR_SIM_VERSION_SIZE, 1, xc->f);
-                    xc->version[FST_HDR_SIM_VERSION_SIZE] = 0;
-                    fstFread(xc->date, FST_HDR_DATE_SIZE, 1, xc->f);
-                    xc->date[FST_HDR_DATE_SIZE] = 0;
-                    ch = fgetc(xc->f);
-                    xc->filetype = (unsigned char)ch;
-                    xc->timezero = fstReaderUint64(xc->f);
-                }
-            } else if ((sectype == FST_BL_VCDATA) || (sectype == FST_BL_VCDATA_DYN_ALIAS) ||
-                       (sectype == FST_BL_VCDATA_DYN_ALIAS2)) {
-                if (hdr_incomplete) {
-                    uint64_t bt = fstReaderUint64(xc->f);
-                    xc->end_time = fstReaderUint64(xc->f);
-
-                    if (!vc_section_count_actual) {
-                        xc->start_time = bt;
-                    }
-                }
-
-                vc_section_count_actual++;
-            } else if (sectype == FST_BL_GEOM) {
-                if (!hdr_incomplete) {
-                    uint64_t clen = seclen - 24;
-                    uint64_t uclen = fstReaderUint64(xc->f);
-                    unsigned char *ucdata = (unsigned char *)malloc(uclen);
-                    unsigned char *pnt = ucdata;
-                    unsigned int i;
-
-                    xc->contains_geom_section = 1;
-                    xc->maxhandle = fstReaderUint64(xc->f);
-                    xc->longest_signal_value_len = 32; /* arbitrarily set at 32...this is much
-                                                          longer than an expanded double */
-
-                    free(xc->process_mask);
-                    xc->process_mask = (unsigned char *)calloc(1, (xc->maxhandle + 7) / 8);
-
-                    if (clen != uclen) {
-                        unsigned char *cdata = (unsigned char *)malloc(clen);
-                        unsigned long destlen = uclen;
-                        unsigned long sourcelen = clen;
-                        int rc;
-
-                        fstFread(cdata, clen, 1, xc->f);
-                        rc = uncompress(ucdata, &destlen, cdata, sourcelen);
-
-                        if (rc != Z_OK) {
-                            fprintf(stderr,
-                                    FST_APIMESS
-                                    "fstReaderInit(), geom uncompress rc = %d, exiting.\n",
-                                    rc);
-                            exit(255);
-                        }
-
-                        free(cdata);
-                    } else {
-                        fstFread(ucdata, uclen, 1, xc->f);
-                    }
-
-                    free(xc->signal_lens);
-                    xc->signal_lens = (uint32_t *)malloc(sizeof(uint32_t) * xc->maxhandle);
-                    free(xc->signal_typs);
-                    xc->signal_typs =
-                        (unsigned char *)malloc(sizeof(unsigned char) * xc->maxhandle);
-
-                    for (i = 0; i < xc->maxhandle; i++) {
-                        int skiplen;
-                        uint64_t val = fstGetVarint32(pnt, &skiplen);
-
-                        pnt += skiplen;
-
-                        if (val) {
-                            xc->signal_lens[i] = (val != 0xFFFFFFFF) ? val : 0;
-                            xc->signal_typs[i] = FST_VT_VCD_WIRE;
-                            if (xc->signal_lens[i] > xc->longest_signal_value_len) {
-                                xc->longest_signal_value_len = xc->signal_lens[i];
-                            }
-                        } else {
-                            xc->signal_lens[i] = 8; /* backpatch in real */
-                            xc->signal_typs[i] = FST_VT_VCD_REAL;
-                            /* xc->longest_signal_value_len handled above by overly large init size
-                             */
-                        }
-                    }
-
-                    free(xc->temp_signal_value_buf);
-                    xc->temp_signal_value_buf =
-                        (unsigned char *)malloc(xc->longest_signal_value_len + 1);
-
-                    free(ucdata);
-                }
-            } else if (sectype == FST_BL_HIER) {
-                xc->contains_hier_section = 1;
-                xc->hier_pos = ftello(xc->f);
-            } else if (sectype == FST_BL_HIER_LZ4DUO) {
-                xc->contains_hier_section_lz4 = 1;
-                xc->contains_hier_section_lz4duo = 1;
-                xc->hier_pos = ftello(xc->f);
-            } else if (sectype == FST_BL_HIER_LZ4) {
-                xc->contains_hier_section_lz4 = 1;
-                xc->hier_pos = ftello(xc->f);
-            } else if (sectype == FST_BL_BLACKOUT) {
-                uint32_t i;
-                uint64_t cur_bl = 0;
-                uint64_t delta;
-
-                xc->num_blackouts = fstReaderVarint32(xc->f);
-                free(xc->blackout_times);
-                xc->blackout_times = (uint64_t *)calloc(xc->num_blackouts, sizeof(uint64_t));
-                free(xc->blackout_activity);
-                xc->blackout_activity =
-                    (unsigned char *)calloc(xc->num_blackouts, sizeof(unsigned char));
-
-                for (i = 0; i < xc->num_blackouts; i++) {
-                    xc->blackout_activity[i] = fgetc(xc->f) != 0;
-                    delta = fstReaderVarint64(xc->f);
-                    cur_bl += delta;
-                    xc->blackout_times[i] = cur_bl;
-                }
-            }
-
-            blkpos += seclen;
-            if (!hdr_seen)
-                break;
-        }
-
-        if (hdr_seen) {
-            if (xc->vc_section_count != vc_section_count_actual) {
-                xc->vc_section_count = vc_section_count_actual;
-            }
-
-            if (!xc->contains_geom_section) {
-                fstReaderProcessHier(xc, NULL); /* recreate signal_lens/signal_typs info */
-            }
-        }
-    }
-
-    return (hdr_seen);
-}
-
-fstReaderContext *fstReaderOpenForUtilitiesOnly(void)
-{
-    fstReaderContext *xc = (fstReaderContext *)calloc(1, sizeof(fstReaderContext));
-
-    return (xc);
-}
-
-fstReaderContext *fstReaderOpen(const char *nam)
-{
-    fstReaderContext *xc = (fstReaderContext *)calloc(1, sizeof(fstReaderContext));
-
-    if ((!nam) || (!(xc->f = fopen(nam, "rb")))) {
-        free(xc);
-        xc = NULL;
-    } else {
-        int flen = strlen(nam);
-        char *hf = (char *)calloc(1, flen + 6);
-        int rc;
-
-#if defined(FST_UNBUFFERED_IO)
-        setvbuf(xc->f,
-                (char *)NULL,
-                _IONBF,
-                0); /* keeps gzip from acting weird in tandem with fopen */
-#endif
-
-        memcpy(hf, nam, flen);
-        strcpy(hf + flen, ".hier");
-        xc->fh = fopen(hf, "rb");
-
-        free(hf);
-        xc->filename = strdup(nam);
-        rc = fstReaderInit(xc);
-
-        if ((rc) && (xc->vc_section_count) && (xc->maxhandle) &&
-            ((xc->fh) || (xc->contains_hier_section || (xc->contains_hier_section_lz4)))) {
-            /* more init */
-            xc->do_rewind = 1;
-        } else if (!rc) {
-            fstReaderClose(xc);
-            xc = NULL;
-        }
-    }
-
-    return (xc);
-}
-
-static void fstReaderDeallocateRvatData(fstReaderContext *xc)
-{
-    if (xc) {
-        free(xc->rvat_chain_mem);
-        xc->rvat_chain_mem = NULL;
-        free(xc->rvat_frame_data);
-        xc->rvat_frame_data = NULL;
-        free(xc->rvat_time_table);
-        xc->rvat_time_table = NULL;
-        free(xc->rvat_chain_table);
-        xc->rvat_chain_table = NULL;
-        free(xc->rvat_chain_table_lengths);
-        xc->rvat_chain_table_lengths = NULL;
-
-        xc->rvat_data_valid = 0;
-    }
-}
-
-void fstReaderClose(fstReaderContext *xc)
-{
-    if (xc) {
-        fstReaderDeallocateScopeData(xc);
-        fstReaderDeallocateRvatData(xc);
-        free(xc->rvat_sig_offs);
-        xc->rvat_sig_offs = NULL;
-
-        free(xc->process_mask);
-        xc->process_mask = NULL;
-        free(xc->blackout_times);
-        xc->blackout_times = NULL;
-        free(xc->blackout_activity);
-        xc->blackout_activity = NULL;
-        free(xc->temp_signal_value_buf);
-        xc->temp_signal_value_buf = NULL;
-        free(xc->signal_typs);
-        xc->signal_typs = NULL;
-        free(xc->signal_lens);
-        xc->signal_lens = NULL;
-        free(xc->filename);
-        xc->filename = NULL;
-        free(xc->str_scope_attr);
-        xc->str_scope_attr = NULL;
-
-        if (xc->fh) {
-            tmpfile_close(&xc->fh, &xc->fh_nam);
-        }
-
-        if (xc->f) {
-            tmpfile_close(&xc->f, &xc->f_nam);
-            if (xc->filename_unpacked) {
-                unlink(xc->filename_unpacked);
-                free(xc->filename_unpacked);
-            }
-        }
-
-        free(xc);
-    }
-}
-
-/*
- * read processing
- */
-
-/* normal read which re-interleaves the value change data */
-int fstReaderIterBlocks(fstReaderContext *ctx,
-                        void (*value_change_callback)(void *user_callback_data_pointer,
-                                                      uint64_t time,
-                                                      fstHandle facidx,
-                                                      const unsigned char *value),
-                        void *user_callback_data_pointer,
-                        FILE *fv)
-{
-    return (fstReaderIterBlocks2(ctx, value_change_callback, NULL, user_callback_data_pointer, fv));
-}
-
-int fstReaderIterBlocks2(fstReaderContext *ctx,
-                         void (*value_change_callback)(void *user_callback_data_pointer,
-                                                       uint64_t time,
-                                                       fstHandle facidx,
-                                                       const unsigned char *value),
-                         void (*value_change_callback_varlen)(void *user_callback_data_pointer,
-                                                              uint64_t time,
-                                                              fstHandle facidx,
-                                                              const unsigned char *value,
-                                                              uint32_t len),
-                         void *user_callback_data_pointer,
-                         FILE *fv)
-{
-    struct fstReaderContext *xc = (struct fstReaderContext *)ctx;
-
-    uint64_t previous_time = UINT64_MAX;
-    uint64_t *time_table = NULL;
-    uint64_t tsec_nitems;
-    unsigned int secnum = 0;
-    int blocks_skipped = 0;
-    fst_off_t blkpos = 0;
-    uint64_t seclen, beg_tim;
-    uint64_t end_tim;
-    uint64_t frame_uclen, frame_clen, frame_maxhandle, vc_maxhandle;
-    fst_off_t vc_start;
-    fst_off_t indx_pntr, indx_pos;
-    fst_off_t *chain_table = NULL;
-    uint32_t *chain_table_lengths = NULL;
-    unsigned char *chain_cmem;
-    unsigned char *pnt;
-    long chain_clen;
-    fstHandle idx, pidx = 0, i;
-    uint64_t pval;
-    uint64_t vc_maxhandle_largest = 0;
-    uint64_t tsec_uclen = 0, tsec_clen = 0;
-    int sectype;
-    uint64_t mem_required_for_traversal;
-    unsigned char *mem_for_traversal = NULL;
-    uint32_t traversal_mem_offs;
-    uint32_t *scatterptr, *headptr, *length_remaining;
-    uint32_t cur_blackout = 0;
-    int packtype;
-    unsigned char *mc_mem = NULL;
-    uint32_t mc_mem_len; /* corresponds to largest value encountered in chain_table_lengths[i] */
-    int dumpvars_state = 0;
-
-    if (!xc)
-        return (0);
-
-    scatterptr = (uint32_t *)calloc(xc->maxhandle, sizeof(uint32_t));
-    headptr = (uint32_t *)calloc(xc->maxhandle, sizeof(uint32_t));
-    length_remaining = (uint32_t *)calloc(xc->maxhandle, sizeof(uint32_t));
-
-    if (fv) {
-#ifndef FST_WRITEX_DISABLE
-        fflush(fv);
-        setvbuf(fv, (char *)NULL, _IONBF, 0); /* even buffered IO is slow so disable it and use our
-                                                 own routines that don't need seeking */
-        xc->writex_fd = fileno(fv);
-#endif
-    }
-
-    for (;;) {
-        uint32_t *tc_head = NULL;
-        uint32_t tc_head_items = 0;
-        traversal_mem_offs = 0;
-
-        fstReaderFseeko(xc, xc->f, blkpos, SEEK_SET);
-
-        sectype = fgetc(xc->f);
-        seclen = fstReaderUint64(xc->f);
-
-        if ((sectype == EOF) || (sectype == FST_BL_SKIP)) {
-#ifdef FST_DEBUG
-            fprintf(stderr, FST_APIMESS "<< EOF >>\n");
-#endif
-            break;
-        }
-
-        blkpos++;
-        if ((sectype != FST_BL_VCDATA) && (sectype != FST_BL_VCDATA_DYN_ALIAS) &&
-            (sectype != FST_BL_VCDATA_DYN_ALIAS2)) {
-            blkpos += seclen;
-            continue;
-        }
-
-        if (!seclen)
-            break;
-
-        beg_tim = fstReaderUint64(xc->f);
-        end_tim = fstReaderUint64(xc->f);
-
-        if (xc->limit_range_valid) {
-            if (end_tim < xc->limit_range_start) {
-                blocks_skipped++;
-                blkpos += seclen;
-                continue;
-            }
-
-            if (beg_tim > xc->limit_range_end) /* likely the compare in for(i=0;i<tsec_nitems;i++)
-                                                  below would do this earlier */
-            {
-                break;
-            }
-        }
-
-        mem_required_for_traversal =
-            fstReaderUint64(xc->f) + 66; /* add in potential fastlz overhead */
-        mem_for_traversal = (unsigned char *)malloc(mem_required_for_traversal);
-#ifdef FST_DEBUG
-        fprintf(stderr,
-                FST_APIMESS "sec: %u seclen: %d begtim: %d endtim: %d\n",
-                secnum,
-                (int)seclen,
-                (int)beg_tim,
-                (int)end_tim);
-        fprintf(stderr,
-                FST_APIMESS "mem_required_for_traversal: %d\n",
-                (int)mem_required_for_traversal - 66);
-#endif
-        /* process time block */
-        {
-            unsigned char *ucdata;
-            unsigned char *cdata;
-            unsigned long destlen /* = tsec_uclen */; /* scan-build */
-            unsigned long sourcelen /*= tsec_clen */; /* scan-build */
-            int rc;
-            unsigned char *tpnt;
-            uint64_t tpval;
-            unsigned int ti;
-
-            if (fstReaderFseeko(xc, xc->f, blkpos + seclen - 24, SEEK_SET) != 0)
-                break;
-            tsec_uclen = fstReaderUint64(xc->f);
-            tsec_clen = fstReaderUint64(xc->f);
-            tsec_nitems = fstReaderUint64(xc->f);
-#ifdef FST_DEBUG
-            fprintf(stderr,
-                    FST_APIMESS "time section unc: %d, com: %d (%d items)\n",
-                    (int)tsec_uclen,
-                    (int)tsec_clen,
-                    (int)tsec_nitems);
-#endif
-            if (tsec_clen > seclen)
-                break; /* corrupted tsec_clen: by definition it can't be larger than size of section
-                        */
-            ucdata = (unsigned char *)malloc(tsec_uclen);
-            if (!ucdata)
-                break; /* malloc fail as tsec_uclen out of range from corrupted file */
-            destlen = tsec_uclen;
-            sourcelen = tsec_clen;
-
-            fstReaderFseeko(xc, xc->f, -24 - ((fst_off_t)tsec_clen), SEEK_CUR);
-
-            if (tsec_uclen != tsec_clen) {
-                cdata = (unsigned char *)malloc(tsec_clen);
-                fstFread(cdata, tsec_clen, 1, xc->f);
-
-                rc = uncompress(ucdata, &destlen, cdata, sourcelen);
-
-                if (rc != Z_OK) {
-                    fprintf(stderr,
-                            FST_APIMESS
-                            "fstReaderIterBlocks2(), tsec uncompress rc = %d, exiting.\n",
-                            rc);
-                    exit(255);
-                }
-
-                free(cdata);
-            } else {
-                fstFread(ucdata, tsec_uclen, 1, xc->f);
-            }
-
-            free(time_table);
-
-            if (sizeof(size_t) < sizeof(uint64_t)) {
-                /* TALOS-2023-1792 for 32b overflow */
-                uint64_t chk_64 = tsec_nitems * sizeof(uint64_t);
-                size_t chk_32 = ((size_t)tsec_nitems) * sizeof(uint64_t);
-                if (chk_64 != chk_32)
-                    chk_report_abort("TALOS-2023-1792");
-            } else {
-                uint64_t chk_64 = tsec_nitems * sizeof(uint64_t);
-                if ((chk_64 / sizeof(uint64_t)) != tsec_nitems) {
-                    chk_report_abort("TALOS-2023-1792");
-                }
-            }
-            time_table = (uint64_t *)calloc(tsec_nitems, sizeof(uint64_t));
-            tpnt = ucdata;
-            tpval = 0;
-            for (ti = 0; ti < tsec_nitems; ti++) {
-                int skiplen;
-                uint64_t val = fstGetVarint64(tpnt, &skiplen);
-                tpval = time_table[ti] = tpval + val;
-                tpnt += skiplen;
-            }
-
-            tc_head_items = tsec_nitems /* scan-build */ ? tsec_nitems : 1;
-            if (sizeof(size_t) < sizeof(uint64_t)) {
-                /* TALOS-2023-1792 for 32b overflow */
-                uint64_t chk_64 = tc_head_items * sizeof(uint32_t);
-                size_t chk_32 = ((size_t)tc_head_items) * sizeof(uint32_t);
-                if (chk_64 != chk_32)
-                    chk_report_abort("TALOS-2023-1792");
-            } else {
-                uint64_t chk_64 = tc_head_items * sizeof(uint32_t);
-                if ((chk_64 / sizeof(uint32_t)) != tc_head_items) {
-                    chk_report_abort("TALOS-2023-1792");
-                }
-            }
-            tc_head = (uint32_t *)calloc(tc_head_items, sizeof(uint32_t));
-            free(ucdata);
-        }
-
-        fstReaderFseeko(xc, xc->f, blkpos + 32, SEEK_SET);
-
-        frame_uclen = fstReaderVarint64(xc->f);
-        frame_clen = fstReaderVarint64(xc->f);
-        frame_maxhandle = fstReaderVarint64(xc->f);
-
-        if (secnum == 0) {
-            if ((beg_tim != time_table[0]) || (blocks_skipped)) {
-                unsigned char *mu = (unsigned char *)malloc(frame_uclen);
-                uint32_t sig_offs = 0;
-
-                if (fv) {
-                    char wx_buf[32];
-                    int wx_len;
-
-                    if (beg_tim) {
-                        if (dumpvars_state == 1) {
-                            wx_len = snprintf(wx_buf, 32, "$end\n");
-                            fstWritex(xc, wx_buf, wx_len);
-                            dumpvars_state = 2;
-                        }
-                        wx_len = snprintf(wx_buf, 32, "#%" PRIu64 "\n", beg_tim);
-                        fstWritex(xc, wx_buf, wx_len);
-                        if (!dumpvars_state) {
-                            wx_len = snprintf(wx_buf, 32, "$dumpvars\n");
-                            fstWritex(xc, wx_buf, wx_len);
-                            dumpvars_state = 1;
-                        }
-                    }
-                    if ((xc->num_blackouts) && (cur_blackout != xc->num_blackouts)) {
-                        if (beg_tim == xc->blackout_times[cur_blackout]) {
-                            wx_len =
-                                snprintf(wx_buf,
-                                         32,
-                                         "$dump%s $end\n",
-                                         (xc->blackout_activity[cur_blackout++]) ? "on" : "off");
-                            fstWritex(xc, wx_buf, wx_len);
-                        }
-                    }
-                }
-
-                if (frame_uclen == frame_clen) {
-                    fstFread(mu, frame_uclen, 1, xc->f);
-                } else {
-                    unsigned char *mc = (unsigned char *)malloc(frame_clen);
-                    int rc;
-
-                    unsigned long destlen = frame_uclen;
-                    unsigned long sourcelen = frame_clen;
-
-                    fstFread(mc, sourcelen, 1, xc->f);
-                    rc = uncompress(mu, &destlen, mc, sourcelen);
-                    if (rc != Z_OK) {
-                        fprintf(stderr,
-                                FST_APIMESS
-                                "fstReaderIterBlocks2(), frame uncompress rc: %d, exiting.\n",
-                                rc);
-                        exit(255);
-                    }
-                    free(mc);
-                }
-
-                for (idx = 0; idx < frame_maxhandle; idx++) {
-                    int process_idx = idx / 8;
-                    int process_bit = idx & 7;
-
-                    if (xc->process_mask[process_idx] & (1 << process_bit)) {
-                        if (xc->signal_lens[idx] <= 1) {
-                            if (xc->signal_lens[idx] == 1) {
-                                unsigned char val = mu[sig_offs];
-                                if (value_change_callback) {
-                                    xc->temp_signal_value_buf[0] = val;
-                                    xc->temp_signal_value_buf[1] = 0;
-                                    value_change_callback(user_callback_data_pointer,
-                                                          beg_tim,
-                                                          idx + 1,
-                                                          xc->temp_signal_value_buf);
-                                } else {
-                                    if (fv) {
-                                        char vcd_id[16];
-
-                                        int vcdid_len = fstVcdIDForFwrite(vcd_id + 1, idx + 1);
-                                        vcd_id[0] = val; /* collapse 3 writes into one I/O call */
-                                        vcd_id[vcdid_len + 1] = '\n';
-                                        fstWritex(xc, vcd_id, vcdid_len + 2);
-                                    }
-                                }
-                            } else {
-                                /* variable-length ("0" length) records have no initial state */
-                            }
-                        } else {
-                            if (xc->signal_typs[idx] != FST_VT_VCD_REAL) {
-                                if (value_change_callback) {
-                                    if (xc->signal_lens[idx] > xc->longest_signal_value_len) {
-                                        chk_report_abort("TALOS-2023-1797");
-                                    }
-                                    memcpy(xc->temp_signal_value_buf,
-                                           mu + sig_offs,
-                                           xc->signal_lens[idx]);
-                                    xc->temp_signal_value_buf[xc->signal_lens[idx]] = 0;
-                                    value_change_callback(user_callback_data_pointer,
-                                                          beg_tim,
-                                                          idx + 1,
-                                                          xc->temp_signal_value_buf);
-                                } else {
-                                    if (fv) {
-                                        char vcd_id[16];
-                                        int vcdid_len = fstVcdIDForFwrite(vcd_id + 1, idx + 1);
-
-                                        vcd_id[0] =
-                                            (xc->signal_typs[idx] != FST_VT_VCD_PORT) ? 'b' : 'p';
-                                        fstWritex(xc, vcd_id, 1);
-                                        if ((sig_offs + xc->signal_lens[idx]) > frame_uclen) {
-                                            chk_report_abort("TALOS-2023-1793");
-                                        }
-                                        fstWritex(xc, mu + sig_offs, xc->signal_lens[idx]);
-
-                                        vcd_id[0] = ' '; /* collapse 3 writes into one I/O call */
-                                        vcd_id[vcdid_len + 1] = '\n';
-                                        fstWritex(xc, vcd_id, vcdid_len + 2);
-                                    }
-                                }
-                            } else {
-                                double d;
-                                unsigned char *clone_d;
-                                unsigned char *srcdata = mu + sig_offs;
-
-                                if (value_change_callback) {
-                                    if (xc->native_doubles_for_cb) {
-                                        if (xc->double_endian_match) {
-                                            clone_d = srcdata;
-                                        } else {
-                                            int j;
-
-                                            clone_d = (unsigned char *)&d;
-                                            for (j = 0; j < 8; j++) {
-                                                clone_d[j] = srcdata[7 - j];
-                                            }
-                                        }
-                                        value_change_callback(user_callback_data_pointer,
-                                                              beg_tim,
-                                                              idx + 1,
-                                                              clone_d);
-                                    } else {
-                                        clone_d = (unsigned char *)&d;
-                                        if (xc->double_endian_match) {
-                                            memcpy(clone_d, srcdata, 8);
-                                        } else {
-                                            int j;
-
-                                            for (j = 0; j < 8; j++) {
-                                                clone_d[j] = srcdata[7 - j];
-                                            }
-                                        }
-                                        snprintf((char *)xc->temp_signal_value_buf,
-                                                 xc->longest_signal_value_len + 1,
-                                                 "%.16g",
-                                                 d);
-                                        value_change_callback(user_callback_data_pointer,
-                                                              beg_tim,
-                                                              idx + 1,
-                                                              xc->temp_signal_value_buf);
-                                    }
-                                } else {
-                                    if (fv) {
-                                        char vcdid_buf[16];
-                                        char wx_buf[64];
-                                        int wx_len;
-
-                                        clone_d = (unsigned char *)&d;
-                                        if (xc->double_endian_match) {
-                                            memcpy(clone_d, srcdata, 8);
-                                        } else {
-                                            int j;
-
-                                            for (j = 0; j < 8; j++) {
-                                                clone_d[j] = srcdata[7 - j];
-                                            }
-                                        }
-
-                                        fstVcdID(vcdid_buf, idx + 1);
-                                        wx_len = snprintf(wx_buf, 64, "r%.16g %s\n", d, vcdid_buf);
-                                        fstWritex(xc, wx_buf, wx_len);
-                                    }
-                                }
-                            }
-                        }
-                    }
-
-                    sig_offs += xc->signal_lens[idx];
-                }
-
-                free(mu);
-                fstReaderFseeko(xc, xc->f, -((fst_off_t)frame_clen), SEEK_CUR);
-            }
-        }
-
-        fstReaderFseeko(xc, xc->f, (fst_off_t)frame_clen, SEEK_CUR); /* skip past compressed data */
-
-        vc_maxhandle = fstReaderVarint64(xc->f);
-        vc_start = ftello(xc->f); /* points to '!' character */
-        packtype = fgetc(xc->f);
-
-#ifdef FST_DEBUG
-        fprintf(stderr,
-                FST_APIMESS "frame_uclen: %d, frame_clen: %d, frame_maxhandle: %d\n",
-                (int)frame_uclen,
-                (int)frame_clen,
-                (int)frame_maxhandle);
-        fprintf(stderr,
-                FST_APIMESS "vc_maxhandle: %d, packtype: %c\n",
-                (int)vc_maxhandle,
-                packtype);
-#endif
-
-        indx_pntr = blkpos + seclen - 24 - tsec_clen - 8;
-        fstReaderFseeko(xc, xc->f, indx_pntr, SEEK_SET);
-        chain_clen = fstReaderUint64(xc->f);
-        indx_pos = indx_pntr - chain_clen;
-#ifdef FST_DEBUG
-        fprintf(stderr, FST_APIMESS "indx_pos: %d (%d bytes)\n", (int)indx_pos, (int)chain_clen);
-#endif
-        chain_cmem = (unsigned char *)malloc(chain_clen);
-        if (!chain_cmem)
-            goto block_err;
-        fstReaderFseeko(xc, xc->f, indx_pos, SEEK_SET);
-        fstFread(chain_cmem, chain_clen, 1, xc->f);
-
-        if (vc_maxhandle > vc_maxhandle_largest) {
-            free(chain_table);
-            free(chain_table_lengths);
-
-            vc_maxhandle_largest = vc_maxhandle;
-
-            if (!(vc_maxhandle + 1)) {
-                chk_report_abort("TALOS-2023-1798");
-            }
-
-            if (sizeof(size_t) < sizeof(uint64_t)) {
-                /* TALOS-2023-1798 for 32b overflow */
-                uint64_t chk_64 = (vc_maxhandle + 1) * sizeof(fst_off_t);
-                size_t chk_32 = ((size_t)(vc_maxhandle + 1)) * sizeof(fst_off_t);
-                if (chk_64 != chk_32)
-                    chk_report_abort("TALOS-2023-1798");
-            } else {
-                uint64_t chk_64 = (vc_maxhandle + 1) * sizeof(fst_off_t);
-                if ((chk_64 / sizeof(fst_off_t)) != (vc_maxhandle + 1)) {
-                    chk_report_abort("TALOS-2023-1798");
-                }
-            }
-            chain_table = (fst_off_t *)calloc((vc_maxhandle + 1), sizeof(fst_off_t));
-
-            if (sizeof(size_t) < sizeof(uint64_t)) {
-                /* TALOS-2023-1798 for 32b overflow */
-                uint64_t chk_64 = (vc_maxhandle + 1) * sizeof(uint32_t);
-                size_t chk_32 = ((size_t)(vc_maxhandle + 1)) * sizeof(uint32_t);
-                if (chk_64 != chk_32)
-                    chk_report_abort("TALOS-2023-1798");
-            } else {
-                uint64_t chk_64 = (vc_maxhandle + 1) * sizeof(uint32_t);
-                if ((chk_64 / sizeof(uint32_t)) != (vc_maxhandle + 1)) {
-                    chk_report_abort("TALOS-2023-1798");
-                }
-            }
-            chain_table_lengths = (uint32_t *)calloc((vc_maxhandle + 1), sizeof(uint32_t));
-        }
-
-        if (!chain_table || !chain_table_lengths)
-            goto block_err;
-
-        pnt = chain_cmem;
-        idx = 0;
-        pval = 0;
-
-        if (sectype == FST_BL_VCDATA_DYN_ALIAS2) {
-            uint32_t prev_alias = 0;
-
-            do {
-                int skiplen;
-
-                if (*pnt & 0x01) {
-                    int64_t shval = fstGetSVarint64(pnt, &skiplen) >> 1;
-                    if (shval > 0) {
-                        pval = chain_table[idx] = pval + shval;
-                        if (idx) {
-                            chain_table_lengths[pidx] = pval - chain_table[pidx];
-                        }
-                        pidx = idx++;
-                    } else if (shval < 0) {
-                        chain_table[idx] =
-                            0; /* need to explicitly zero as calloc above might not run */
-                        chain_table_lengths[idx] = prev_alias =
-                            shval; /* because during this loop iter would give stale data! */
-                        idx++;
-                    } else {
-                        chain_table[idx] =
-                            0; /* need to explicitly zero as calloc above might not run */
-                        chain_table_lengths[idx] =
-                            prev_alias; /* because during this loop iter would give stale data! */
-                        idx++;
-                    }
-                } else {
-                    uint64_t val = fstGetVarint32(pnt, &skiplen);
-
-                    fstHandle loopcnt = val >> 1;
-                    if ((idx + loopcnt - 1) > vc_maxhandle) /* TALOS-2023-1789 */
-                    {
-                        chk_report_abort("TALOS-2023-1789");
-                    }
-
-                    for (i = 0; i < loopcnt; i++) {
-                        chain_table[idx++] = 0;
-                    }
-                }
-
-                pnt += skiplen;
-            } while (pnt != (chain_cmem + chain_clen));
-        } else {
-            do {
-                int skiplen;
-                uint64_t val = fstGetVarint32(pnt, &skiplen);
-
-                if (!val) {
-                    pnt += skiplen;
-                    val = fstGetVarint32(pnt, &skiplen);
-                    chain_table[idx] =
-                        0; /* need to explicitly zero as calloc above might not run */
-                    chain_table_lengths[idx] =
-                        -val; /* because during this loop iter would give stale data! */
-                    idx++;
-                } else if (val & 1) {
-                    pval = chain_table[idx] = pval + (val >> 1);
-                    if (idx) {
-                        chain_table_lengths[pidx] = pval - chain_table[pidx];
-                    }
-                    pidx = idx++;
-                } else {
-                    fstHandle loopcnt = val >> 1;
-
-                    if ((idx + loopcnt - 1) > vc_maxhandle) /* TALOS-2023-1789 */
-                    {
-                        chk_report_abort("TALOS-2023-1789");
-                    }
-
-                    for (i = 0; i < loopcnt; i++) {
-                        chain_table[idx++] = 0;
-                    }
-                }
-
-                pnt += skiplen;
-            } while (pnt != (chain_cmem + chain_clen));
-        }
-
-        chain_table[idx] = indx_pos - vc_start;
-        chain_table_lengths[pidx] = chain_table[idx] - chain_table[pidx];
-
-        for (i = 0; i < idx; i++) {
-            int32_t v32 = chain_table_lengths[i];
-            if ((v32 < 0) && (!chain_table[i])) {
-                v32 = -v32;
-                v32--;
-                if (((uint32_t)v32) < i) /* sanity check */
-                {
-                    chain_table[i] = chain_table[v32];
-                    chain_table_lengths[i] = chain_table_lengths[v32];
-                }
-            }
-        }
-
-#ifdef FST_DEBUG
-        fprintf(stderr, FST_APIMESS "decompressed chain idx len: %" PRIu32 "\n", idx);
-#endif
-
-        mc_mem_len = 16384;
-        mc_mem = (unsigned char *)malloc(mc_mem_len); /* buffer for compressed reads */
-
-        /* check compressed VC data */
-        if (idx > xc->maxhandle)
-            idx = xc->maxhandle;
-        for (i = 0; i < idx; i++) {
-            if (chain_table[i]) {
-                int process_idx = i / 8;
-                int process_bit = i & 7;
-
-                if (xc->process_mask[process_idx] & (1 << process_bit)) {
-                    int rc = Z_OK;
-                    uint32_t val;
-                    uint32_t skiplen;
-                    uint32_t tdelta;
-
-                    fstReaderFseeko(xc, xc->f, vc_start + chain_table[i], SEEK_SET);
-                    val = fstReaderVarint32WithSkip(xc->f, &skiplen);
-                    if (val) {
-                        unsigned char *mu =
-                            mem_for_traversal + traversal_mem_offs; /* uncomp: dst */
-                        unsigned char *mc; /* comp:   src */
-                        unsigned long destlen = val;
-                        unsigned long sourcelen = chain_table_lengths[i];
-
-                        if (traversal_mem_offs >= mem_required_for_traversal) {
-                            chk_report_abort("TALOS-2023-1785");
-                        }
-
-                        if (mc_mem_len < chain_table_lengths[i]) {
-                            free(mc_mem);
-                            mc_mem = (unsigned char *)malloc(mc_mem_len = chain_table_lengths[i]);
-                        }
-                        mc = mc_mem;
-
-                        fstFread(mc, chain_table_lengths[i], 1, xc->f);
-
-                        switch (packtype) {
-                            case '4':
-                                rc = (destlen ==
-                                      (unsigned long)LZ4_decompress_safe_partial((char *)mc,
-                                                                                 (char *)mu,
-                                                                                 sourcelen,
-                                                                                 destlen,
-                                                                                 destlen))
-                                         ? Z_OK
-                                         : Z_DATA_ERROR;
-                                break;
-                            case 'F':
-                                fastlz_decompress(mc,
-                                                  sourcelen,
-                                                  mu,
-                                                  destlen); /* rc appears unreliable */
-                                break;
-                            default:
-                                rc = uncompress(mu, &destlen, mc, sourcelen);
-                                break;
-                        }
-
-                        /* data to process is for(j=0;j<destlen;j++) in mu[j] */
-                        headptr[i] = traversal_mem_offs;
-                        length_remaining[i] = val;
-                        traversal_mem_offs += val;
-                    } else {
-                        int destlen = chain_table_lengths[i] - skiplen;
-                        unsigned char *mu = mem_for_traversal + traversal_mem_offs;
-
-                        if (traversal_mem_offs >= mem_required_for_traversal) {
-                            chk_report_abort("TALOS-2023-1785");
-                        }
-
-                        fstFread(mu, destlen, 1, xc->f);
-                        /* data to process is for(j=0;j<destlen;j++) in mu[j] */
-                        headptr[i] = traversal_mem_offs;
-                        length_remaining[i] = destlen;
-                        traversal_mem_offs += destlen;
-                    }
-
-                    if (rc != Z_OK) {
-                        fprintf(stderr,
-                                FST_APIMESS
-                                "fstReaderIterBlocks2(), fac: %d clen: %d (rc=%d), exiting.\n",
-                                (int)i,
-                                (int)val,
-                                rc);
-                        exit(255);
-                    }
-
-                    if (xc->signal_lens[i] == 1) {
-                        uint32_t vli = fstGetVarint32NoSkip(mem_for_traversal + headptr[i]);
-                        uint32_t shcnt = 2 << (vli & 1);
-                        tdelta = vli >> shcnt;
-                    } else {
-                        uint32_t vli = fstGetVarint32NoSkip(mem_for_traversal + headptr[i]);
-                        tdelta = vli >> 1;
-                    }
-
-                    if (tdelta >= tc_head_items) {
-                        chk_report_abort("TALOS-2023-1791");
-                    }
-
-                    scatterptr[i] = tc_head[tdelta];
-                    tc_head[tdelta] = i + 1;
-                }
-            }
-        }
-
-        free(mc_mem); /* there is no usage below for this, no real need to clear out mc_mem or
-                         mc_mem_len */
-
-        for (i = 0; i < tsec_nitems; i++) {
-            uint32_t tdelta;
-            int skiplen, skiplen2;
-            uint32_t vli;
-
-            if (fv) {
-                char wx_buf[32];
-                int wx_len;
-
-                if (time_table[i] != previous_time) {
-                    if (xc->limit_range_valid) {
-                        if (time_table[i] > xc->limit_range_end) {
-                            break;
-                        }
-                    }
-
-                    if (dumpvars_state == 1) {
-                        wx_len = snprintf(wx_buf, 32, "$end\n");
-                        fstWritex(xc, wx_buf, wx_len);
-                        dumpvars_state = 2;
-                    }
-                    wx_len = snprintf(wx_buf, 32, "#%" PRIu64 "\n", time_table[i]);
-                    fstWritex(xc, wx_buf, wx_len);
-                    if (!dumpvars_state) {
-                        wx_len = snprintf(wx_buf, 32, "$dumpvars\n");
-                        fstWritex(xc, wx_buf, wx_len);
-                        dumpvars_state = 1;
-                    }
-
-                    if ((xc->num_blackouts) && (cur_blackout != xc->num_blackouts)) {
-                        if (time_table[i] == xc->blackout_times[cur_blackout]) {
-                            wx_len =
-                                snprintf(wx_buf,
-                                         32,
-                                         "$dump%s $end\n",
-                                         (xc->blackout_activity[cur_blackout++]) ? "on" : "off");
-                            fstWritex(xc, wx_buf, wx_len);
-                        }
-                    }
-                    previous_time = time_table[i];
-                }
-            }
-
-            while (tc_head[i]) {
-                idx = tc_head[i] - 1;
-                vli = fstGetVarint32(mem_for_traversal + headptr[idx], &skiplen);
-
-                if (xc->signal_lens[idx] <= 1) {
-                    if (xc->signal_lens[idx] == 1) {
-                        unsigned char val;
-                        if (!(vli & 1)) {
-                            /* tdelta = vli >> 2; */ /* scan-build */
-                            val = ((vli >> 1) & 1) | '0';
-                        } else {
-                            /* tdelta = vli >> 4; */ /* scan-build */
-                            val = FST_RCV_STR[((vli >> 1) & 7)];
-                        }
-
-                        if (value_change_callback) {
-                            xc->temp_signal_value_buf[0] = val;
-                            xc->temp_signal_value_buf[1] = 0;
-                            value_change_callback(user_callback_data_pointer,
-                                                  time_table[i],
-                                                  idx + 1,
-                                                  xc->temp_signal_value_buf);
-                        } else {
-                            if (fv) {
-                                char vcd_id[16];
-                                int vcdid_len = fstVcdIDForFwrite(vcd_id + 1, idx + 1);
-
-                                vcd_id[0] = val;
-                                vcd_id[vcdid_len + 1] = '\n';
-                                fstWritex(xc, vcd_id, vcdid_len + 2);
-                            }
-                        }
-                        headptr[idx] += skiplen;
-                        length_remaining[idx] -= skiplen;
-
-                        tc_head[i] = scatterptr[idx];
-                        scatterptr[idx] = 0;
-
-                        if (length_remaining[idx]) {
-                            int shamt;
-                            vli = fstGetVarint32NoSkip(mem_for_traversal + headptr[idx]);
-                            shamt = 2 << (vli & 1);
-                            tdelta = vli >> shamt;
-
-                            if ((tdelta + i) >= tc_head_items) {
-                                chk_report_abort("TALOS-2023-1791");
-                            }
-
-                            scatterptr[idx] = tc_head[i + tdelta];
-                            tc_head[i + tdelta] = idx + 1;
-                        }
-                    } else {
-                        unsigned char *vdata;
-                        uint32_t len;
-
-                        vli = fstGetVarint32(mem_for_traversal + headptr[idx], &skiplen);
-                        len = fstGetVarint32(mem_for_traversal + headptr[idx] + skiplen, &skiplen2);
-                        /* tdelta = vli >> 1; */ /* scan-build */
-                        skiplen += skiplen2;
-                        vdata = mem_for_traversal + headptr[idx] + skiplen;
-
-                        if (!(vli & 1)) {
-                            if (value_change_callback_varlen) {
-                                value_change_callback_varlen(user_callback_data_pointer,
-                                                             time_table[i],
-                                                             idx + 1,
-                                                             vdata,
-                                                             len);
-                            } else {
-                                if (fv) {
-                                    char vcd_id[16];
-                                    int vcdid_len;
-
-                                    vcd_id[0] = 's';
-                                    fstWritex(xc, vcd_id, 1);
-
-                                    vcdid_len = fstVcdIDForFwrite(vcd_id + 1, idx + 1);
-                                    {
-                                        if (sizeof(size_t) < sizeof(uint64_t)) {
-                                            /* TALOS-2023-1790 for 32b overflow */
-                                            uint64_t chk_64 = len * 4 + 1;
-                                            size_t chk_32 = len * 4 + 1;
-                                            if (chk_64 != chk_32)
-                                                chk_report_abort("TALOS-2023-1790");
-                                        }
-
-                                        unsigned char *vesc = (unsigned char *)malloc(len * 4 + 1);
-                                        int vlen = fstUtilityBinToEsc(vesc, vdata, len);
-                                        fstWritex(xc, vesc, vlen);
-                                        free(vesc);
-                                    }
-
-                                    vcd_id[0] = ' ';
-                                    vcd_id[vcdid_len + 1] = '\n';
-                                    fstWritex(xc, vcd_id, vcdid_len + 2);
-                                }
-                            }
-                        }
-
-                        skiplen += len;
-                        headptr[idx] += skiplen;
-                        length_remaining[idx] -= skiplen;
-
-                        tc_head[i] = scatterptr[idx];
-                        scatterptr[idx] = 0;
-
-                        if (length_remaining[idx]) {
-                            vli = fstGetVarint32NoSkip(mem_for_traversal + headptr[idx]);
-                            tdelta = vli >> 1;
-
-                            if ((tdelta + i) >= tc_head_items) {
-                                chk_report_abort("TALOS-2023-1791");
-                            }
-
-                            scatterptr[idx] = tc_head[i + tdelta];
-                            tc_head[i + tdelta] = idx + 1;
-                        }
-                    }
-                } else {
-                    uint32_t len = xc->signal_lens[idx];
-                    unsigned char *vdata;
-
-                    vli = fstGetVarint32(mem_for_traversal + headptr[idx], &skiplen);
-                    /* tdelta = vli >> 1; */ /* scan-build */
-                    vdata = mem_for_traversal + headptr[idx] + skiplen;
-
-                    if (xc->signal_typs[idx] != FST_VT_VCD_REAL) {
-                        if (len > xc->longest_signal_value_len) {
-                            chk_report_abort("TALOS-2023-1797");
-                        }
-
-                        if (!(vli & 1)) {
-                            int byte = 0;
-                            int bit;
-                            unsigned int j;
-
-                            for (j = 0; j < len; j++) {
-                                unsigned char ch;
-                                byte = j / 8;
-                                bit = 7 - (j & 7);
-                                ch = ((vdata[byte] >> bit) & 1) | '0';
-                                xc->temp_signal_value_buf[j] = ch;
-                            }
-                            xc->temp_signal_value_buf[j] = 0;
-
-                            if (value_change_callback) {
-                                value_change_callback(user_callback_data_pointer,
-                                                      time_table[i],
-                                                      idx + 1,
-                                                      xc->temp_signal_value_buf);
-                            } else {
-                                if (fv) {
-                                    unsigned char ch_bp =
-                                        (xc->signal_typs[idx] != FST_VT_VCD_PORT) ? 'b' : 'p';
-
-                                    fstWritex(xc, &ch_bp, 1);
-                                    fstWritex(xc, xc->temp_signal_value_buf, len);
-                                }
-                            }
-
-                            len = byte + 1;
-                        } else {
-                            if (value_change_callback) {
-                                memcpy(xc->temp_signal_value_buf, vdata, len);
-                                xc->temp_signal_value_buf[len] = 0;
-                                value_change_callback(user_callback_data_pointer,
-                                                      time_table[i],
-                                                      idx + 1,
-                                                      xc->temp_signal_value_buf);
-                            } else {
-                                if (fv) {
-                                    unsigned char ch_bp =
-                                        (xc->signal_typs[idx] != FST_VT_VCD_PORT) ? 'b' : 'p';
-                                    uint64_t mem_required_for_traversal_chk =
-                                        vdata - mem_for_traversal + len;
-
-                                    fstWritex(xc, &ch_bp, 1);
-                                    if (mem_required_for_traversal_chk >
-                                        mem_required_for_traversal) {
-                                        chk_report_abort("TALOS-2023-1793");
-                                    }
-                                    fstWritex(xc, vdata, len);
-                                }
-                            }
-                        }
-                    } else {
-                        double d;
-                        unsigned char *clone_d /*= (unsigned char *)&d */; /* scan-build */
-                        unsigned char buf[8];
-                        unsigned char *srcdata;
-
-                        if (!(vli & 1)) /* very rare case, but possible */
-                        {
-                            int bit;
-                            int j;
-
-                            for (j = 0; j < 8; j++) {
-                                unsigned char ch;
-                                bit = 7 - (j & 7);
-                                ch = ((vdata[0] >> bit) & 1) | '0';
-                                buf[j] = ch;
-                            }
-
-                            len = 1;
-                            srcdata = buf;
-                        } else {
-                            srcdata = vdata;
-                        }
-
-                        if (value_change_callback) {
-                            if (xc->native_doubles_for_cb) {
-                                if (xc->double_endian_match) {
-                                    clone_d = srcdata;
-                                } else {
-                                    int j;
-
-                                    clone_d = (unsigned char *)&d;
-                                    for (j = 0; j < 8; j++) {
-                                        clone_d[j] = srcdata[7 - j];
-                                    }
-                                }
-                                value_change_callback(user_callback_data_pointer,
-                                                      time_table[i],
-                                                      idx + 1,
-                                                      clone_d);
-                            } else {
-                                clone_d = (unsigned char *)&d;
-                                if (xc->double_endian_match) {
-                                    memcpy(clone_d, srcdata, 8);
-                                } else {
-                                    int j;
-
-                                    for (j = 0; j < 8; j++) {
-                                        clone_d[j] = srcdata[7 - j];
-                                    }
-                                }
-                                snprintf((char *)xc->temp_signal_value_buf,
-                                         xc->longest_signal_value_len + 1,
-                                         "%.16g",
-                                         d);
-                                value_change_callback(user_callback_data_pointer,
-                                                      time_table[i],
-                                                      idx + 1,
-                                                      xc->temp_signal_value_buf);
-                            }
-                        } else {
-                            if (fv) {
-                                char wx_buf[32];
-                                int wx_len;
-
-                                clone_d = (unsigned char *)&d;
-                                if (xc->double_endian_match) {
-                                    memcpy(clone_d, srcdata, 8);
-                                } else {
-                                    int j;
-
-                                    for (j = 0; j < 8; j++) {
-                                        clone_d[j] = srcdata[7 - j];
-                                    }
-                                }
-
-                                wx_len = snprintf(wx_buf, 32, "r%.16g", d);
-                                fstWritex(xc, wx_buf, wx_len);
-                            }
-                        }
-                    }
-
-                    if (fv) {
-                        char vcd_id[16];
-                        int vcdid_len = fstVcdIDForFwrite(vcd_id + 1, idx + 1);
-                        vcd_id[0] = ' ';
-                        vcd_id[vcdid_len + 1] = '\n';
-                        fstWritex(xc, vcd_id, vcdid_len + 2);
-                    }
-
-                    skiplen += len;
-                    headptr[idx] += skiplen;
-                    length_remaining[idx] -= skiplen;
-
-                    tc_head[i] = scatterptr[idx];
-                    scatterptr[idx] = 0;
-
-                    if (length_remaining[idx]) {
-                        vli = fstGetVarint32NoSkip(mem_for_traversal + headptr[idx]);
-                        tdelta = vli >> 1;
-
-                        if ((tdelta + i) >= tc_head_items) {
-                            chk_report_abort("TALOS-2023-1791");
-                        }
-
-                        scatterptr[idx] = tc_head[i + tdelta];
-                        tc_head[i + tdelta] = idx + 1;
-                    }
-                }
-            }
-        }
-
-    block_err:
-        free(tc_head);
-        free(chain_cmem);
-        free(mem_for_traversal);
-        mem_for_traversal = NULL;
-
-        secnum++;
-        if (secnum == xc->vc_section_count)
-            break; /* in case file is growing, keep with original block count */
-        blkpos += seclen;
-    }
-
-    if (mem_for_traversal)
-        free(mem_for_traversal); /* scan-build */
-    free(length_remaining);
-    free(headptr);
-    free(scatterptr);
-
-    if (chain_table)
-        free(chain_table);
-    if (chain_table_lengths)
-        free(chain_table_lengths);
-
-    free(time_table);
-
-#ifndef FST_WRITEX_DISABLE
-    if (fv) {
-        fstWritex(xc, NULL, 0);
-    }
-#endif
-
-    return (1);
-}
-
-/* rvat functions */
-
-static char *fstExtractRvatDataFromFrame(fstReaderContext *xc, fstHandle facidx, char *buf)
-{
-    if (facidx >= xc->rvat_frame_maxhandle) {
-        return (NULL);
-    }
-
-    if (xc->signal_lens[facidx] == 1) {
-        buf[0] = (char)xc->rvat_frame_data[xc->rvat_sig_offs[facidx]];
-        buf[1] = 0;
-    } else {
-        if (xc->signal_typs[facidx] != FST_VT_VCD_REAL) {
-            memcpy(buf, xc->rvat_frame_data + xc->rvat_sig_offs[facidx], xc->signal_lens[facidx]);
-            buf[xc->signal_lens[facidx]] = 0;
-        } else {
-            double d;
-            unsigned char *clone_d = (unsigned char *)&d;
-            unsigned char *srcdata = xc->rvat_frame_data + xc->rvat_sig_offs[facidx];
-
-            if (xc->double_endian_match) {
-                memcpy(clone_d, srcdata, 8);
-            } else {
-                int j;
-
-                for (j = 0; j < 8; j++) {
-                    clone_d[j] = srcdata[7 - j];
-                }
-            }
-
-            snprintf((char *)buf, 32, "%.16g", d); /* this will write 18 bytes */
-        }
-    }
-
-    return (buf);
-}
-
-char *fstReaderGetValueFromHandleAtTime(fstReaderContext *xc,
-                                        uint64_t tim,
-                                        fstHandle facidx,
-                                        char *buf)
-{
-    fst_off_t blkpos = 0, prev_blkpos;
-    uint64_t beg_tim, end_tim, beg_tim2, end_tim2;
-    int sectype;
-#ifdef FST_DEBUG
-    unsigned int secnum = 0;
-#endif
-    uint64_t seclen;
-    uint64_t tsec_uclen = 0, tsec_clen = 0;
-    uint64_t tsec_nitems;
-    uint64_t frame_uclen, frame_clen;
-#ifdef FST_DEBUG
-    uint64_t mem_required_for_traversal;
-#endif
-    fst_off_t indx_pntr, indx_pos;
-    long chain_clen;
-    unsigned char *chain_cmem;
-    unsigned char *pnt;
-    fstHandle idx, pidx = 0, i;
-    uint64_t pval;
-
-    if ((!xc) || (!facidx) || (facidx > xc->maxhandle) || (!buf) ||
-        (!xc->signal_lens[facidx - 1])) {
-        return (NULL);
-    }
-
-    if (!xc->rvat_sig_offs) {
-        uint32_t cur_offs = 0;
-
-        xc->rvat_sig_offs = (uint32_t *)calloc(xc->maxhandle, sizeof(uint32_t));
-        for (i = 0; i < xc->maxhandle; i++) {
-            xc->rvat_sig_offs[i] = cur_offs;
-            cur_offs += xc->signal_lens[i];
-        }
-    }
-
-    if (xc->rvat_data_valid) {
-        if ((xc->rvat_beg_tim <= tim) && (tim <= xc->rvat_end_tim)) {
-            goto process_value;
-        }
-
-        fstReaderDeallocateRvatData(xc);
-    }
-
-    xc->rvat_chain_pos_valid = 0;
-
-    for (;;) {
-        fstReaderFseeko(xc, xc->f, (prev_blkpos = blkpos), SEEK_SET);
-
-        sectype = fgetc(xc->f);
-        seclen = fstReaderUint64(xc->f);
-
-        if ((sectype == EOF) || (sectype == FST_BL_SKIP) || (!seclen)) {
-            return (NULL); /* if this loop exits on break, it's successful */
-        }
-
-        blkpos++;
-        if ((sectype != FST_BL_VCDATA) && (sectype != FST_BL_VCDATA_DYN_ALIAS) &&
-            (sectype != FST_BL_VCDATA_DYN_ALIAS2)) {
-            blkpos += seclen;
-            continue;
-        }
-
-        beg_tim = fstReaderUint64(xc->f);
-        end_tim = fstReaderUint64(xc->f);
-
-        if ((beg_tim <= tim) && (tim <= end_tim)) {
-            if ((tim == end_tim) && (tim != xc->end_time)) {
-                fst_off_t cached_pos = ftello(xc->f);
-                fstReaderFseeko(xc, xc->f, blkpos, SEEK_SET);
-
-                sectype = fgetc(xc->f);
-                seclen = fstReaderUint64(xc->f);
-
-                beg_tim2 = fstReaderUint64(xc->f);
-                end_tim2 = fstReaderUint64(xc->f);
-
-                if (((sectype != FST_BL_VCDATA) && (sectype != FST_BL_VCDATA_DYN_ALIAS) &&
-                     (sectype != FST_BL_VCDATA_DYN_ALIAS2)) ||
-                    (!seclen) || (beg_tim2 != tim)) {
-                    blkpos = prev_blkpos;
-                    break;
-                }
-                beg_tim = beg_tim2;
-                end_tim = end_tim2;
-                fstReaderFseeko(xc, xc->f, cached_pos, SEEK_SET);
-            }
-            break;
-        }
-
-        blkpos += seclen;
-#ifdef FST_DEBUG
-        secnum++;
-#endif
-    }
-
-    xc->rvat_beg_tim = beg_tim;
-    xc->rvat_end_tim = end_tim;
-
-#ifdef FST_DEBUG
-    mem_required_for_traversal =
-#endif
-        fstReaderUint64(xc->f);
-
-#ifdef FST_DEBUG
-    fprintf(stderr,
-            FST_APIMESS "rvat sec: %u seclen: %d begtim: %d endtim: %d\n",
-            secnum,
-            (int)seclen,
-            (int)beg_tim,
-            (int)end_tim);
-    fprintf(stderr,
-            FST_APIMESS "mem_required_for_traversal: %d\n",
-            (int)mem_required_for_traversal);
-#endif
-
-    /* process time block */
-    {
-        unsigned char *ucdata;
-        unsigned char *cdata;
-        unsigned long destlen /* = tsec_uclen */; /* scan-build */
-        unsigned long sourcelen /* = tsec_clen */; /* scan-build */
-        int rc;
-        unsigned char *tpnt;
-        uint64_t tpval;
-        unsigned int ti;
-
-        fstReaderFseeko(xc, xc->f, blkpos + seclen - 24, SEEK_SET);
-        tsec_uclen = fstReaderUint64(xc->f);
-        tsec_clen = fstReaderUint64(xc->f);
-        tsec_nitems = fstReaderUint64(xc->f);
-#ifdef FST_DEBUG
-        fprintf(stderr,
-                FST_APIMESS "time section unc: %d, com: %d (%d items)\n",
-                (int)tsec_uclen,
-                (int)tsec_clen,
-                (int)tsec_nitems);
-#endif
-        ucdata = (unsigned char *)malloc(tsec_uclen);
-        destlen = tsec_uclen;
-        sourcelen = tsec_clen;
-
-        fstReaderFseeko(xc, xc->f, -24 - ((fst_off_t)tsec_clen), SEEK_CUR);
-        if (tsec_uclen != tsec_clen) {
-            cdata = (unsigned char *)malloc(tsec_clen);
-            fstFread(cdata, tsec_clen, 1, xc->f);
-
-            rc = uncompress(ucdata, &destlen, cdata, sourcelen);
-
-            if (rc != Z_OK) {
-                fprintf(stderr,
-                        FST_APIMESS
-                        "fstReaderGetValueFromHandleAtTime(), tsec uncompress rc = %d, exiting.\n",
-                        rc);
-                exit(255);
-            }
-
-            free(cdata);
-        } else {
-            fstFread(ucdata, tsec_uclen, 1, xc->f);
-        }
-
-        xc->rvat_time_table = (uint64_t *)calloc(tsec_nitems, sizeof(uint64_t));
-        tpnt = ucdata;
-        tpval = 0;
-        for (ti = 0; ti < tsec_nitems; ti++) {
-            int skiplen;
-            uint64_t val = fstGetVarint64(tpnt, &skiplen);
-            tpval = xc->rvat_time_table[ti] = tpval + val;
-            tpnt += skiplen;
-        }
-
-        free(ucdata);
-    }
-
-    fstReaderFseeko(xc, xc->f, blkpos + 32, SEEK_SET);
-
-    frame_uclen = fstReaderVarint64(xc->f);
-    frame_clen = fstReaderVarint64(xc->f);
-    xc->rvat_frame_maxhandle = fstReaderVarint64(xc->f);
-    xc->rvat_frame_data = (unsigned char *)malloc(frame_uclen);
-
-    if (frame_uclen == frame_clen) {
-        fstFread(xc->rvat_frame_data, frame_uclen, 1, xc->f);
-    } else {
-        unsigned char *mc = (unsigned char *)malloc(frame_clen);
-        int rc;
-
-        unsigned long destlen = frame_uclen;
-        unsigned long sourcelen = frame_clen;
-
-        fstFread(mc, sourcelen, 1, xc->f);
-        rc = uncompress(xc->rvat_frame_data, &destlen, mc, sourcelen);
-        if (rc != Z_OK) {
-            fprintf(stderr,
-                    FST_APIMESS
-                    "fstReaderGetValueFromHandleAtTime(), frame decompress rc: %d, exiting.\n",
-                    rc);
-            exit(255);
-        }
-        free(mc);
-    }
-
-    xc->rvat_vc_maxhandle = fstReaderVarint64(xc->f);
-    xc->rvat_vc_start = ftello(xc->f); /* points to '!' character */
-    xc->rvat_packtype = fgetc(xc->f);
-
-#ifdef FST_DEBUG
-    fprintf(stderr,
-            FST_APIMESS "frame_uclen: %d, frame_clen: %d, frame_maxhandle: %d\n",
-            (int)frame_uclen,
-            (int)frame_clen,
-            (int)xc->rvat_frame_maxhandle);
-    fprintf(stderr, FST_APIMESS "vc_maxhandle: %d\n", (int)xc->rvat_vc_maxhandle);
-#endif
-
-    indx_pntr = blkpos + seclen - 24 - tsec_clen - 8;
-    fstReaderFseeko(xc, xc->f, indx_pntr, SEEK_SET);
-    chain_clen = fstReaderUint64(xc->f);
-    indx_pos = indx_pntr - chain_clen;
-#ifdef FST_DEBUG
-    fprintf(stderr, FST_APIMESS "indx_pos: %d (%d bytes)\n", (int)indx_pos, (int)chain_clen);
-#endif
-    chain_cmem = (unsigned char *)malloc(chain_clen);
-    fstReaderFseeko(xc, xc->f, indx_pos, SEEK_SET);
-    fstFread(chain_cmem, chain_clen, 1, xc->f);
-
-    xc->rvat_chain_table = (fst_off_t *)calloc((xc->rvat_vc_maxhandle + 1), sizeof(fst_off_t));
-    xc->rvat_chain_table_lengths =
-        (uint32_t *)calloc((xc->rvat_vc_maxhandle + 1), sizeof(uint32_t));
-
-    pnt = chain_cmem;
-    idx = 0;
-    pval = 0;
-
-    if (sectype == FST_BL_VCDATA_DYN_ALIAS2) {
-        uint32_t prev_alias = 0;
-
-        do {
-            int skiplen;
-
-            if (*pnt & 0x01) {
-                int64_t shval = fstGetSVarint64(pnt, &skiplen) >> 1;
-                if (shval > 0) {
-                    pval = xc->rvat_chain_table[idx] = pval + shval;
-                    if (idx) {
-                        xc->rvat_chain_table_lengths[pidx] = pval - xc->rvat_chain_table[pidx];
-                    }
-                    pidx = idx++;
-                } else if (shval < 0) {
-                    xc->rvat_chain_table[idx] =
-                        0; /* need to explicitly zero as calloc above might not run */
-                    xc->rvat_chain_table_lengths[idx] = prev_alias =
-                        shval; /* because during this loop iter would give stale data! */
-                    idx++;
-                } else {
-                    xc->rvat_chain_table[idx] =
-                        0; /* need to explicitly zero as calloc above might not run */
-                    xc->rvat_chain_table_lengths[idx] =
-                        prev_alias; /* because during this loop iter would give stale data! */
-                    idx++;
-                }
-            } else {
-                uint64_t val = fstGetVarint32(pnt, &skiplen);
-
-                fstHandle loopcnt = val >> 1;
-                for (i = 0; i < loopcnt; i++) {
-                    xc->rvat_chain_table[idx++] = 0;
-                }
-            }
-
-            pnt += skiplen;
-        } while (pnt != (chain_cmem + chain_clen));
-    } else {
-        do {
-            int skiplen;
-            uint64_t val = fstGetVarint32(pnt, &skiplen);
-
-            if (!val) {
-                pnt += skiplen;
-                val = fstGetVarint32(pnt, &skiplen);
-                xc->rvat_chain_table[idx] = 0;
-                xc->rvat_chain_table_lengths[idx] = -val;
-                idx++;
-            } else if (val & 1) {
-                pval = xc->rvat_chain_table[idx] = pval + (val >> 1);
-                if (idx) {
-                    xc->rvat_chain_table_lengths[pidx] = pval - xc->rvat_chain_table[pidx];
-                }
-                pidx = idx++;
-            } else {
-                fstHandle loopcnt = val >> 1;
-                for (i = 0; i < loopcnt; i++) {
-                    xc->rvat_chain_table[idx++] = 0;
-                }
-            }
-
-            pnt += skiplen;
-        } while (pnt != (chain_cmem + chain_clen));
-    }
-
-    free(chain_cmem);
-    xc->rvat_chain_table[idx] = indx_pos - xc->rvat_vc_start;
-    xc->rvat_chain_table_lengths[pidx] = xc->rvat_chain_table[idx] - xc->rvat_chain_table[pidx];
-
-    for (i = 0; i < idx; i++) {
-        int32_t v32 = xc->rvat_chain_table_lengths[i];
-        if ((v32 < 0) && (!xc->rvat_chain_table[i])) {
-            v32 = -v32;
-            v32--;
-            if (((uint32_t)v32) < i) /* sanity check */
-            {
-                xc->rvat_chain_table[i] = xc->rvat_chain_table[v32];
-                xc->rvat_chain_table_lengths[i] = xc->rvat_chain_table_lengths[v32];
-            }
-        }
-    }
-
-#ifdef FST_DEBUG
-    fprintf(stderr, FST_APIMESS "decompressed chain idx len: %" PRIu32 "\n", idx);
-#endif
-
-    xc->rvat_data_valid = 1;
-
-/* all data at this point is loaded or resident in fst cache, process and return appropriate value
- */
-process_value:
-    if (facidx > xc->rvat_vc_maxhandle) {
-        return (NULL);
-    }
-
-    facidx--; /* scale down for array which starts at zero */
-
-    if (((tim == xc->rvat_beg_tim) && (!xc->rvat_chain_table[facidx])) ||
-        (!xc->rvat_chain_table[facidx])) {
-        return (fstExtractRvatDataFromFrame(xc, facidx, buf));
-    }
-
-    if (facidx != xc->rvat_chain_facidx) {
-        if (xc->rvat_chain_mem) {
-            free(xc->rvat_chain_mem);
-            xc->rvat_chain_mem = NULL;
-
-            xc->rvat_chain_pos_valid = 0;
-        }
-    }
-
-    if (!xc->rvat_chain_mem) {
-        uint32_t skiplen;
-        fstReaderFseeko(xc, xc->f, xc->rvat_vc_start + xc->rvat_chain_table[facidx], SEEK_SET);
-        xc->rvat_chain_len = fstReaderVarint32WithSkip(xc->f, &skiplen);
-        if (xc->rvat_chain_len) {
-            unsigned char *mu = (unsigned char *)malloc(xc->rvat_chain_len);
-            unsigned char *mc = (unsigned char *)malloc(xc->rvat_chain_table_lengths[facidx]);
-            unsigned long destlen = xc->rvat_chain_len;
-            unsigned long sourcelen = xc->rvat_chain_table_lengths[facidx];
-            int rc = Z_OK;
-
-            fstFread(mc, xc->rvat_chain_table_lengths[facidx], 1, xc->f);
-
-            switch (xc->rvat_packtype) {
-                case '4':
-                    rc = (destlen == (unsigned long)LZ4_decompress_safe_partial((char *)mc,
-                                                                                (char *)mu,
-                                                                                sourcelen,
-                                                                                destlen,
-                                                                                destlen))
-                             ? Z_OK
-                             : Z_DATA_ERROR;
-                    break;
-                case 'F':
-                    fastlz_decompress(mc, sourcelen, mu, destlen); /* rc appears unreliable */
-                    break;
-                default:
-                    rc = uncompress(mu, &destlen, mc, sourcelen);
-                    break;
-            }
-
-            free(mc);
-
-            if (rc != Z_OK) {
-                fprintf(stderr,
-                        FST_APIMESS "fstReaderGetValueFromHandleAtTime(), rvat decompress clen: %d "
-                                    "(rc=%d), exiting.\n",
-                        (int)xc->rvat_chain_len,
-                        rc);
-                exit(255);
-            }
-
-            /* data to process is for(j=0;j<destlen;j++) in mu[j] */
-            xc->rvat_chain_mem = mu;
-        } else {
-            int destlen = xc->rvat_chain_table_lengths[facidx] - skiplen;
-            unsigned char *mu = (unsigned char *)malloc(xc->rvat_chain_len = destlen);
-            fstFread(mu, destlen, 1, xc->f);
-            /* data to process is for(j=0;j<destlen;j++) in mu[j] */
-            xc->rvat_chain_mem = mu;
-        }
-
-        xc->rvat_chain_facidx = facidx;
-    }
-
-    /* process value chain here */
-
-    {
-        uint32_t tidx = 0, ptidx = 0;
-        uint32_t tdelta;
-        int skiplen;
-        unsigned int iprev = xc->rvat_chain_len;
-        uint32_t pvli = 0;
-        int pskip = 0;
-
-        if ((xc->rvat_chain_pos_valid) && (tim >= xc->rvat_chain_pos_time)) {
-            i = xc->rvat_chain_pos_idx;
-            tidx = xc->rvat_chain_pos_tidx;
-        } else {
-            i = 0;
-            tidx = 0;
-            xc->rvat_chain_pos_time = xc->rvat_beg_tim;
-        }
-
-        if (xc->signal_lens[facidx] == 1) {
-            while (i < xc->rvat_chain_len) {
-                uint32_t vli = fstGetVarint32(xc->rvat_chain_mem + i, &skiplen);
-                uint32_t shcnt = 2 << (vli & 1);
-                tdelta = vli >> shcnt;
-
-                if (xc->rvat_time_table[tidx + tdelta] <= tim) {
-                    iprev = i;
-                    pvli = vli;
-                    ptidx = tidx;
-                    /* pskip = skiplen; */ /* scan-build */
-
-                    tidx += tdelta;
-                    i += skiplen;
-                } else {
-                    break;
-                }
-            }
-            if (iprev != xc->rvat_chain_len) {
-                xc->rvat_chain_pos_tidx = ptidx;
-                xc->rvat_chain_pos_idx = iprev;
-                xc->rvat_chain_pos_time = tim;
-                xc->rvat_chain_pos_valid = 1;
-
-                if (!(pvli & 1)) {
-                    buf[0] = ((pvli >> 1) & 1) | '0';
-                } else {
-                    buf[0] = FST_RCV_STR[((pvli >> 1) & 7)];
-                }
-                buf[1] = 0;
-                return (buf);
-            } else {
-                return (fstExtractRvatDataFromFrame(xc, facidx, buf));
-            }
-        } else {
-            while (i < xc->rvat_chain_len) {
-                uint32_t vli = fstGetVarint32(xc->rvat_chain_mem + i, &skiplen);
-                tdelta = vli >> 1;
-
-                if (xc->rvat_time_table[tidx + tdelta] <= tim) {
-                    iprev = i;
-                    pvli = vli;
-                    ptidx = tidx;
-                    pskip = skiplen;
-
-                    tidx += tdelta;
-                    i += skiplen;
-
-                    if (!(pvli & 1)) {
-                        i += ((xc->signal_lens[facidx] + 7) / 8);
-                    } else {
-                        i += xc->signal_lens[facidx];
-                    }
-                } else {
-                    break;
-                }
-            }
-
-            if (iprev != xc->rvat_chain_len) {
-                unsigned char *vdata = xc->rvat_chain_mem + iprev + pskip;
-
-                xc->rvat_chain_pos_tidx = ptidx;
-                xc->rvat_chain_pos_idx = iprev;
-                xc->rvat_chain_pos_time = tim;
-                xc->rvat_chain_pos_valid = 1;
-
-                if (xc->signal_typs[facidx] != FST_VT_VCD_REAL) {
-                    if (!(pvli & 1)) {
-                        int byte = 0;
-                        int bit;
-                        unsigned int j;
-
-                        for (j = 0; j < xc->signal_lens[facidx]; j++) {
-                            unsigned char ch;
-                            byte = j / 8;
-                            bit = 7 - (j & 7);
-                            ch = ((vdata[byte] >> bit) & 1) | '0';
-                            buf[j] = ch;
-                        }
-                        buf[j] = 0;
-
-                        return (buf);
-                    } else {
-                        memcpy(buf, vdata, xc->signal_lens[facidx]);
-                        buf[xc->signal_lens[facidx]] = 0;
-                        return (buf);
-                    }
-                } else {
-                    double d;
-                    unsigned char *clone_d = (unsigned char *)&d;
-                    unsigned char bufd[8];
-                    unsigned char *srcdata;
-
-                    if (!(pvli & 1)) /* very rare case, but possible */
-                    {
-                        int bit;
-                        int j;
-
-                        for (j = 0; j < 8; j++) {
-                            unsigned char ch;
-                            bit = 7 - (j & 7);
-                            ch = ((vdata[0] >> bit) & 1) | '0';
-                            bufd[j] = ch;
-                        }
-
-                        srcdata = bufd;
-                    } else {
-                        srcdata = vdata;
-                    }
-
-                    if (xc->double_endian_match) {
-                        memcpy(clone_d, srcdata, 8);
-                    } else {
-                        int j;
-
-                        for (j = 0; j < 8; j++) {
-                            clone_d[j] = srcdata[7 - j];
-                        }
-                    }
-
-                    snprintf(buf, 32, "r%.16g", d); /* this will write 19 bytes */
-                    return (buf);
-                }
-            } else {
-                return (fstExtractRvatDataFromFrame(xc, facidx, buf));
-            }
-        }
-    }
-
-    /* return(NULL); */
-}
-
-/***********************/
-/***                 ***/
-/***  jenkins hash   ***/
-/***                 ***/
-/***********************/
-
-/*
---------------------------------------------------------------------
-mix -- mix 3 32-bit values reversibly.
-For every delta with one or two bits set, and the deltas of all three
-  high bits or all three low bits, whether the original value of a,b,c
-  is almost all zero or is uniformly distributed,
-* If mix() is run forward or backward, at least 32 bits in a,b,c
-  have at least 1/4 probability of changing.
-* If mix() is run forward, every bit of c will change between 1/3 and
-  2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
-mix() was built out of 36 single-cycle latency instructions in a
-  structure that could supported 2x parallelism, like so:
-      a -= b;
-      a -= c; x = (c>>13);
-      b -= c; a ^= x;
-      b -= a; x = (a<<8);
-      c -= a; b ^= x;
-      c -= b; x = (b>>13);
-      ...
-  Unfortunately, superscalar Pentiums and Sparcs can't take advantage
-  of that parallelism.  They've also turned some of those single-cycle
-  latency instructions into multi-cycle latency instructions.  Still,
-  this is the fastest good hash I could find.  There were about 2^^68
-  to choose from.  I only looked at a billion or so.
---------------------------------------------------------------------
-*/
-#define mix(a, b, c) \
-    { \
-        a -= b; \
-        a -= c; \
-        a ^= (c >> 13); \
-        b -= c; \
-        b -= a; \
-        b ^= (a << 8); \
-        c -= a; \
-        c -= b; \
-        c ^= (b >> 13); \
-        a -= b; \
-        a -= c; \
-        a ^= (c >> 12); \
-        b -= c; \
-        b -= a; \
-        b ^= (a << 16); \
-        c -= a; \
-        c -= b; \
-        c ^= (b >> 5); \
-        a -= b; \
-        a -= c; \
-        a ^= (c >> 3); \
-        b -= c; \
-        b -= a; \
-        b ^= (a << 10); \
-        c -= a; \
-        c -= b; \
-        c ^= (b >> 15); \
-    }
-
-/*
---------------------------------------------------------------------
-j_hash() -- hash a variable-length key into a 32-bit value
-  k       : the key (the unaligned variable-length array of bytes)
-  len     : the length of the key, counting by bytes
-  initval : can be any 4-byte value
-Returns a 32-bit value.  Every bit of the key affects every bit of
-the return value.  Every 1-bit and 2-bit delta achieves avalanche.
-About 6*len+35 instructions.
-
-The best hash table sizes are powers of 2.  There is no need to do
-mod a prime (mod is sooo slow!).  If you need less than 32 bits,
-use a bitmask.  For example, if you need only 10 bits, do
-  h = (h & hashmask(10));
-In which case, the hash table should have hashsize(10) elements.
-
-If you are hashing n strings (uint8_t **)k, do it like this:
-  for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
-
-By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
-code any way you wish, private, educational, or commercial.  It's free.
-
-See http://burtleburtle.net/bob/hash/evahash.html
-Use for hash table lookup, or anything where one collision in 2^^32 is
-acceptable.  Do NOT use for cryptographic purposes.
---------------------------------------------------------------------
-*/
-
-static uint32_t j_hash(const uint8_t *k, uint32_t length, uint32_t initval)
-{
-    uint32_t a, b, c, len;
-
-    /* Set up the internal state */
-    len = length;
-    a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
-    c = initval; /* the previous hash value */
-
-    /*---------------------------------------- handle most of the key */
-    while (len >= 12) {
-        a += (k[0] + ((uint32_t)k[1] << 8) + ((uint32_t)k[2] << 16) + ((uint32_t)k[3] << 24));
-        b += (k[4] + ((uint32_t)k[5] << 8) + ((uint32_t)k[6] << 16) + ((uint32_t)k[7] << 24));
-        c += (k[8] + ((uint32_t)k[9] << 8) + ((uint32_t)k[10] << 16) + ((uint32_t)k[11] << 24));
-        mix(a, b, c);
-        k += 12;
-        len -= 12;
-    }
-
-    /*------------------------------------- handle the last 11 bytes */
-    c += length;
-    switch (len) /* all the case statements fall through */
-    {
-        case 11:
-            c += ((uint32_t)k[10] << 24); /* fallthrough */
-        case 10:
-            c += ((uint32_t)k[9] << 16); /* fallthrough */
-        case 9:
-            c += ((uint32_t)k[8] << 8); /* fallthrough */
-            /* the first byte of c is reserved for the length */
-        case 8:
-            b += ((uint32_t)k[7] << 24); /* fallthrough */
-        case 7:
-            b += ((uint32_t)k[6] << 16); /* fallthrough */
-        case 6:
-            b += ((uint32_t)k[5] << 8); /* fallthrough */
-        case 5:
-            b += k[4]; /* fallthrough */
-        case 4:
-            a += ((uint32_t)k[3] << 24); /* fallthrough */
-        case 3:
-            a += ((uint32_t)k[2] << 16); /* fallthrough */
-        case 2:
-            a += ((uint32_t)k[1] << 8); /* fallthrough */
-        case 1:
-            a += k[0];
-            /* case 0: nothing left to add */
-    }
-    mix(a, b, c);
-    /*-------------------------------------------- report the result */
-    return (c);
-}
-
-/********************************************************************/
-
-/***************************/
-/***                     ***/
-/***  judy HS emulation  ***/
-/***                     ***/
-/***************************/
-
-struct collchain_t
-{
-    struct collchain_t *next;
-    void *payload;
-    uint32_t fullhash, length;
-    unsigned char mem[1];
-};
-
-void **JenkinsIns(void *base_i, const unsigned char *mem, uint32_t length, uint32_t hashmask)
-{
-    struct collchain_t ***base = (struct collchain_t ***)base_i;
-    uint32_t hf, h;
-    struct collchain_t **ar;
-    struct collchain_t *chain, *pchain;
-
-    if (!*base) {
-        *base = (struct collchain_t **)calloc(1, (hashmask + 1) * sizeof(void *));
-    }
-    ar = *base;
-
-    h = (hf = j_hash(mem, length, length)) & hashmask;
-    pchain = chain = ar[h];
-    while (chain) {
-        if ((chain->fullhash == hf) && (chain->length == length) &&
-            !memcmp(chain->mem, mem, length)) {
-            if (pchain != chain) /* move hit to front */
-            {
-                pchain->next = chain->next;
-                chain->next = ar[h];
-                ar[h] = chain;
-            }
-            return (&(chain->payload));
-        }
-
-        pchain = chain;
-        chain = chain->next;
-    }
-
-    chain = (struct collchain_t *)calloc(1, sizeof(struct collchain_t) + length - 1);
-    memcpy(chain->mem, mem, length);
-    chain->fullhash = hf;
-    chain->length = length;
-    chain->next = ar[h];
-    ar[h] = chain;
-    return (&(chain->payload));
-}
-
-void JenkinsFree(void *base_i, uint32_t hashmask)
-{
-    struct collchain_t ***base = (struct collchain_t ***)base_i;
-    uint32_t h;
-    struct collchain_t **ar;
-    struct collchain_t *chain, *chain_next;
-
-    if (base && *base) {
-        ar = *base;
-        for (h = 0; h <= hashmask; h++) {
-            chain = ar[h];
-            while (chain) {
-                chain_next = chain->next;
-                free(chain);
-                chain = chain_next;
-            }
-        }
-
-        free(*base);
-        *base = NULL;
-    }
-}
-
-/**********************************************************************/
-
-/************************/
-/***                  ***/
-/*** utility function ***/
-/***                  ***/
-/************************/
-
-int fstUtilityBinToEscConvertedLen(const unsigned char *s, int len)
-{
-    const unsigned char *src = s;
-    int dlen = 0;
-    int i;
-
-    for (i = 0; i < len; i++) {
-        switch (src[i]) {
-            case '\a': /* fallthrough */
-            case '\b': /* fallthrough */
-            case '\f': /* fallthrough */
-            case '\n': /* fallthrough */
-            case '\r': /* fallthrough */
-            case '\t': /* fallthrough */
-            case '\v': /* fallthrough */
-            case '\'': /* fallthrough */
-            case '\"': /* fallthrough */
-            case '\\': /* fallthrough */
-            case '\?':
-                dlen += 2;
-                break;
-            default:
-                if ((src[i] > ' ') && (src[i] <= '~')) /* no white spaces in output */
-                {
-                    dlen++;
-                } else {
-                    dlen += 4;
-                }
-                break;
-        }
-    }
-
-    return (dlen);
-}
-
-int fstUtilityBinToEsc(unsigned char *d, const unsigned char *s, int len)
-{
-    const unsigned char *src = s;
-    unsigned char *dst = d;
-    unsigned char val;
-    int i;
-
-    for (i = 0; i < len; i++) {
-        switch (src[i]) {
-            case '\a':
-                *(dst++) = '\\';
-                *(dst++) = 'a';
-                break;
-            case '\b':
-                *(dst++) = '\\';
-                *(dst++) = 'b';
-                break;
-            case '\f':
-                *(dst++) = '\\';
-                *(dst++) = 'f';
-                break;
-            case '\n':
-                *(dst++) = '\\';
-                *(dst++) = 'n';
-                break;
-            case '\r':
-                *(dst++) = '\\';
-                *(dst++) = 'r';
-                break;
-            case '\t':
-                *(dst++) = '\\';
-                *(dst++) = 't';
-                break;
-            case '\v':
-                *(dst++) = '\\';
-                *(dst++) = 'v';
-                break;
-            case '\'':
-                *(dst++) = '\\';
-                *(dst++) = '\'';
-                break;
-            case '\"':
-                *(dst++) = '\\';
-                *(dst++) = '\"';
-                break;
-            case '\\':
-                *(dst++) = '\\';
-                *(dst++) = '\\';
-                break;
-            case '\?':
-                *(dst++) = '\\';
-                *(dst++) = '\?';
-                break;
-            default:
-                if ((src[i] > ' ') && (src[i] <= '~')) /* no white spaces in output */
-                {
-                    *(dst++) = src[i];
-                } else {
-                    val = src[i];
-                    *(dst++) = '\\';
-                    *(dst++) = (val / 64) + '0';
-                    val = val & 63;
-                    *(dst++) = (val / 8) + '0';
-                    val = val & 7;
-                    *(dst++) = (val) + '0';
-                }
-                break;
-        }
-    }
-
-    return (dst - d);
-}
-
-/*
- * this overwrites the original string if the destination pointer is NULL
- */
-int fstUtilityEscToBin(unsigned char *d, unsigned char *s, int len)
-{
-    unsigned char *src = s;
-    unsigned char *dst = (!d) ? s : (s = d);
-    unsigned char val[3];
-    int i;
-
-    for (i = 0; i < len; i++) {
-        if (src[i] != '\\') {
-            *(dst++) = src[i];
-        } else {
-            switch (src[++i]) {
-                case 'a':
-                    *(dst++) = '\a';
-                    break;
-                case 'b':
-                    *(dst++) = '\b';
-                    break;
-                case 'f':
-                    *(dst++) = '\f';
-                    break;
-                case 'n':
-                    *(dst++) = '\n';
-                    break;
-                case 'r':
-                    *(dst++) = '\r';
-                    break;
-                case 't':
-                    *(dst++) = '\t';
-                    break;
-                case 'v':
-                    *(dst++) = '\v';
-                    break;
-                case '\'':
-                    *(dst++) = '\'';
-                    break;
-                case '\"':
-                    *(dst++) = '\"';
-                    break;
-                case '\\':
-                    *(dst++) = '\\';
-                    break;
-                case '\?':
-                    *(dst++) = '\?';
-                    break;
-
-                case 'x':
-                    val[0] = toupper(src[++i]);
-                    val[1] = toupper(src[++i]);
-                    val[0] =
-                        ((val[0] >= 'A') && (val[0] <= 'F')) ? (val[0] - 'A' + 10) : (val[0] - '0');
-                    val[1] =
-                        ((val[1] >= 'A') && (val[1] <= 'F')) ? (val[1] - 'A' + 10) : (val[1] - '0');
-                    *(dst++) = val[0] * 16 + val[1];
-                    break;
-
-                case '0':
-                case '1':
-                case '2':
-                case '3':
-                case '4':
-                case '5':
-                case '6':
-                case '7':
-                    val[0] = src[i] - '0';
-                    val[1] = src[++i] - '0';
-                    val[2] = src[++i] - '0';
-                    *(dst++) = val[0] * 64 + val[1] * 8 + val[2];
-                    break;
-
-                default:
-                    *(dst++) = src[i];
-                    break;
-            }
-        }
-    }
-
-    return (dst - s);
-}
-
-struct fstETab *fstUtilityExtractEnumTableFromString(const char *s)
-{
-    struct fstETab *et = NULL;
-    int num_spaces = 0;
-    int i;
-    int newlen;
-
-    if (s) {
-        const char *csp = strchr(s, ' ');
-        int cnt = atoi(csp + 1);
-
-        for (;;) {
-            csp = strchr(csp + 1, ' ');
-            if (csp) {
-                num_spaces++;
-            } else {
-                break;
-            }
-        }
-
-        if (num_spaces == (2 * cnt)) {
-            char *sp, *sp2;
-
-            et = (struct fstETab *)calloc(1, sizeof(struct fstETab));
-            et->elem_count = cnt;
-            et->name = strdup(s);
-            et->literal_arr = (char **)calloc(cnt, sizeof(char *));
-            et->val_arr = (char **)calloc(cnt, sizeof(char *));
-
-            sp = strchr(et->name, ' ');
-            *sp = 0;
-
-            sp = strchr(sp + 1, ' ');
-
-            for (i = 0; i < cnt; i++) {
-                sp2 = strchr(sp + 1, ' ');
-                *(char *)sp2 = 0;
-                et->literal_arr[i] = sp + 1;
-                sp = sp2;
-
-                newlen = fstUtilityEscToBin(NULL,
-                                            (unsigned char *)et->literal_arr[i],
-                                            strlen(et->literal_arr[i]));
-                et->literal_arr[i][newlen] = 0;
-            }
-
-            for (i = 0; i < cnt; i++) {
-                sp2 = strchr(sp + 1, ' ');
-                if (sp2) {
-                    *sp2 = 0;
-                }
-                et->val_arr[i] = sp + 1;
-                sp = sp2;
-
-                newlen = fstUtilityEscToBin(NULL,
-                                            (unsigned char *)et->val_arr[i],
-                                            strlen(et->val_arr[i]));
-                et->val_arr[i][newlen] = 0;
-            }
-        }
-    }
-
-    return (et);
-}
-
-void fstUtilityFreeEnumTable(struct fstETab *etab)
-{
-    if (etab) {
-        free(etab->literal_arr);
-        free(etab->val_arr);
-        free(etab->name);
-        free(etab);
-    }
-}
diff --git a/include/gtkwave/fstapi.h b/include/gtkwave/fstapi.h
deleted file mode 100644
index 00c63154c..000000000
--- a/include/gtkwave/fstapi.h
+++ /dev/null
@@ -1,548 +0,0 @@
-/*
- * Copyright (c) 2009-2026 Tony Bybell.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * SPDX-License-Identifier: MIT
- */
-
-#ifndef FST_API_H
-#define FST_API_H
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <zlib.h>
-#include <inttypes.h>
-#if defined(_MSC_VER)
-#include "fst_win_unistd.h"
-#else
-#include <unistd.h>
-#endif
-#include <time.h>
-
-typedef uint32_t fstHandle;
-typedef uint32_t fstEnumHandle;
-
-enum fstWriterPackType
-{
-    FST_WR_PT_ZLIB = 0,
-    FST_WR_PT_FASTLZ = 1,
-    FST_WR_PT_LZ4 = 2
-};
-
-enum fstFileType
-{
-    FST_FT_MIN = 0,
-
-    FST_FT_VERILOG = 0,
-    FST_FT_VHDL = 1,
-    FST_FT_VERILOG_VHDL = 2,
-
-    FST_FT_MAX = 2
-};
-
-enum fstBlockType
-{
-    FST_BL_HDR = 0,
-    FST_BL_VCDATA = 1,
-    FST_BL_BLACKOUT = 2,
-    FST_BL_GEOM = 3,
-    FST_BL_HIER = 4,
-    FST_BL_VCDATA_DYN_ALIAS = 5,
-    FST_BL_HIER_LZ4 = 6,
-    FST_BL_HIER_LZ4DUO = 7,
-    FST_BL_VCDATA_DYN_ALIAS2 = 8,
-
-    FST_BL_ZWRAPPER = 254, /* indicates that whole trace is gz wrapped */
-    FST_BL_SKIP = 255 /* used while block is being written */
-};
-
-enum fstScopeType
-{
-    FST_ST_MIN = 0,
-
-    FST_ST_VCD_MODULE = 0,
-    FST_ST_VCD_TASK = 1,
-    FST_ST_VCD_FUNCTION = 2,
-    FST_ST_VCD_BEGIN = 3,
-    FST_ST_VCD_FORK = 4,
-    FST_ST_VCD_GENERATE = 5,
-    FST_ST_VCD_STRUCT = 6,
-    FST_ST_VCD_UNION = 7,
-    FST_ST_VCD_CLASS = 8,
-    FST_ST_VCD_INTERFACE = 9,
-    FST_ST_VCD_PACKAGE = 10,
-    FST_ST_VCD_PROGRAM = 11,
-
-    FST_ST_VHDL_ARCHITECTURE = 12,
-    FST_ST_VHDL_PROCEDURE = 13,
-    FST_ST_VHDL_FUNCTION = 14,
-    FST_ST_VHDL_RECORD = 15,
-    FST_ST_VHDL_PROCESS = 16,
-    FST_ST_VHDL_BLOCK = 17,
-    FST_ST_VHDL_FOR_GENERATE = 18,
-    FST_ST_VHDL_IF_GENERATE = 19,
-    FST_ST_VHDL_GENERATE = 20,
-    FST_ST_VHDL_PACKAGE = 21,
-
-    FST_ST_SV_ARRAY = 22,
-    FST_ST_MAX = 22,
-
-    FST_ST_GEN_ATTRBEGIN = 252,
-    FST_ST_GEN_ATTREND = 253,
-
-    FST_ST_VCD_SCOPE = 254,
-    FST_ST_VCD_UPSCOPE = 255
-};
-
-enum fstVarType
-{
-    FST_VT_MIN = 0, /* start of vartypes */
-
-    FST_VT_VCD_EVENT = 0,
-    FST_VT_VCD_INTEGER = 1,
-    FST_VT_VCD_PARAMETER = 2,
-    FST_VT_VCD_REAL = 3,
-    FST_VT_VCD_REAL_PARAMETER = 4,
-    FST_VT_VCD_REG = 5,
-    FST_VT_VCD_SUPPLY0 = 6,
-    FST_VT_VCD_SUPPLY1 = 7,
-    FST_VT_VCD_TIME = 8,
-    FST_VT_VCD_TRI = 9,
-    FST_VT_VCD_TRIAND = 10,
-    FST_VT_VCD_TRIOR = 11,
-    FST_VT_VCD_TRIREG = 12,
-    FST_VT_VCD_TRI0 = 13,
-    FST_VT_VCD_TRI1 = 14,
-    FST_VT_VCD_WAND = 15,
-    FST_VT_VCD_WIRE = 16,
-    FST_VT_VCD_WOR = 17,
-    FST_VT_VCD_PORT = 18,
-    FST_VT_VCD_SPARRAY = 19, /* used to define the rownum (index) port for a sparse array */
-    FST_VT_VCD_REALTIME = 20,
-
-    FST_VT_GEN_STRING = 21, /* generic string type   (max len is defined dynamically via
-                                fstWriterEmitVariableLengthValueChange) */
-
-    FST_VT_SV_BIT = 22,
-    FST_VT_SV_LOGIC = 23,
-    FST_VT_SV_INT = 24, /* declare as size = 32 */
-    FST_VT_SV_SHORTINT = 25, /* declare as size = 16 */
-    FST_VT_SV_LONGINT = 26, /* declare as size = 64 */
-    FST_VT_SV_BYTE = 27, /* declare as size = 8  */
-    FST_VT_SV_ENUM = 28, /* declare as appropriate type range */
-    FST_VT_SV_SHORTREAL = 29, /* declare and emit same as FST_VT_VCD_REAL (needs to be emitted
-                                    as double, not a float) */
-
-    FST_VT_MAX = 29 /* end of vartypes */
-};
-
-enum fstVarDir
-{
-    FST_VD_MIN = 0,
-
-    FST_VD_IMPLICIT = 0,
-    FST_VD_INPUT = 1,
-    FST_VD_OUTPUT = 2,
-    FST_VD_INOUT = 3,
-    FST_VD_BUFFER = 4,
-    FST_VD_LINKAGE = 5,
-
-    FST_VD_MAX = 5
-};
-
-enum fstHierType
-{
-    FST_HT_MIN = 0,
-
-    FST_HT_SCOPE = 0,
-    FST_HT_UPSCOPE = 1,
-    FST_HT_VAR = 2,
-    FST_HT_ATTRBEGIN = 3,
-    FST_HT_ATTREND = 4,
-
-    /* FST_HT_TREEBEGIN and FST_HT_TREEEND are not yet used by FST but are currently used when
-        fstHier bridges other formats */
-    FST_HT_TREEBEGIN = 5,
-    FST_HT_TREEEND = 6,
-
-    FST_HT_MAX = 6
-};
-
-enum fstAttrType
-{
-    FST_AT_MIN = 0,
-
-    FST_AT_MISC = 0, /* self-contained: does not need matching FST_HT_ATTREND */
-    FST_AT_ARRAY = 1,
-    FST_AT_ENUM = 2,
-    FST_AT_PACK = 3,
-
-    FST_AT_MAX = 3
-};
-
-enum fstMiscType
-{
-    FST_MT_MIN = 0,
-
-    FST_MT_COMMENT = 0, /* use fstWriterSetComment() to emit */
-    FST_MT_ENVVAR = 1, /* use fstWriterSetEnvVar() to emit */
-    FST_MT_SUPVAR = 2, /* use fstWriterCreateVar2() to emit */
-    FST_MT_PATHNAME = 3, /* reserved for fstWriterSetSourceStem() string -> number management */
-    FST_MT_SOURCESTEM = 4, /* use fstWriterSetSourceStem() to emit */
-    FST_MT_SOURCEISTEM = 5, /* use fstWriterSetSourceInstantiationStem() to emit */
-    FST_MT_VALUELIST =
-        6, /* use fstWriterSetValueList() to emit, followed by fstWriterCreateVar*() */
-    FST_MT_ENUMTABLE =
-        7, /* use fstWriterCreateEnumTable() and fstWriterEmitEnumTableRef() to emit */
-    FST_MT_UNKNOWN = 8,
-
-    FST_MT_MAX = 8
-};
-
-enum fstArrayType
-{
-    FST_AR_MIN = 0,
-
-    FST_AR_NONE = 0,
-    FST_AR_UNPACKED = 1,
-    FST_AR_PACKED = 2,
-    FST_AR_SPARSE = 3,
-
-    FST_AR_MAX = 3
-};
-
-enum fstEnumValueType
-{
-    FST_EV_SV_INTEGER = 0,
-    FST_EV_SV_BIT = 1,
-    FST_EV_SV_LOGIC = 2,
-    FST_EV_SV_INT = 3,
-    FST_EV_SV_SHORTINT = 4,
-    FST_EV_SV_LONGINT = 5,
-    FST_EV_SV_BYTE = 6,
-    FST_EV_SV_UNSIGNED_INTEGER = 7,
-    FST_EV_SV_UNSIGNED_BIT = 8,
-    FST_EV_SV_UNSIGNED_LOGIC = 9,
-    FST_EV_SV_UNSIGNED_INT = 10,
-    FST_EV_SV_UNSIGNED_SHORTINT = 11,
-    FST_EV_SV_UNSIGNED_LONGINT = 12,
-    FST_EV_SV_UNSIGNED_BYTE = 13,
-
-    FST_EV_REG = 14,
-    FST_EV_TIME = 15,
-
-    FST_EV_MAX = 15
-};
-
-enum fstPackType
-{
-    FST_PT_NONE = 0,
-    FST_PT_UNPACKED = 1,
-    FST_PT_PACKED = 2,
-    FST_PT_TAGGED_PACKED = 3,
-
-    FST_PT_MAX = 3
-};
-
-enum fstSupplementalVarType
-{
-    FST_SVT_MIN = 0,
-
-    FST_SVT_NONE = 0,
-
-    FST_SVT_VHDL_SIGNAL = 1,
-    FST_SVT_VHDL_VARIABLE = 2,
-    FST_SVT_VHDL_CONSTANT = 3,
-    FST_SVT_VHDL_FILE = 4,
-    FST_SVT_VHDL_MEMORY = 5,
-
-    FST_SVT_MAX = 5
-};
-
-enum fstSupplementalDataType
-{
-    FST_SDT_MIN = 0,
-
-    FST_SDT_NONE = 0,
-
-    FST_SDT_VHDL_BOOLEAN = 1,
-    FST_SDT_VHDL_BIT = 2,
-    FST_SDT_VHDL_BIT_VECTOR = 3,
-    FST_SDT_VHDL_STD_ULOGIC = 4,
-    FST_SDT_VHDL_STD_ULOGIC_VECTOR = 5,
-    FST_SDT_VHDL_STD_LOGIC = 6,
-    FST_SDT_VHDL_STD_LOGIC_VECTOR = 7,
-    FST_SDT_VHDL_UNSIGNED = 8,
-    FST_SDT_VHDL_SIGNED = 9,
-    FST_SDT_VHDL_INTEGER = 10,
-    FST_SDT_VHDL_REAL = 11,
-    FST_SDT_VHDL_NATURAL = 12,
-    FST_SDT_VHDL_POSITIVE = 13,
-    FST_SDT_VHDL_TIME = 14,
-    FST_SDT_VHDL_CHARACTER = 15,
-    FST_SDT_VHDL_STRING = 16,
-
-    FST_SDT_MAX = 16,
-
-    FST_SDT_SVT_SHIFT_COUNT = 10, /* FST_SVT_* is ORed in by fstWriterCreateVar2() to the left
-                                        after shifting FST_SDT_SVT_SHIFT_COUNT */
-    FST_SDT_ABS_MAX = ((1 << (FST_SDT_SVT_SHIFT_COUNT)) - 1)
-};
-
-struct fstHier
-{
-    unsigned char htyp;
-
-    union
-    {
-        /* if htyp == FST_HT_SCOPE */
-        struct fstHierScope
-        {
-            unsigned char typ; /* FST_ST_MIN ... FST_ST_MAX */
-            const char *name;
-            const char *component;
-            uint32_t name_length; /* strlen(u.scope.name) */
-            uint32_t component_length; /* strlen(u.scope.component) */
-        } scope;
-
-        /* if htyp == FST_HT_VAR */
-        struct fstHierVar
-        {
-            unsigned char typ; /* FST_VT_MIN ... FST_VT_MAX */
-            unsigned char direction; /* FST_VD_MIN ... FST_VD_MAX */
-            unsigned char svt_workspace; /* zeroed out by FST reader, for client code use */
-            unsigned char sdt_workspace; /* zeroed out by FST reader, for client code use */
-            unsigned int sxt_workspace; /* zeroed out by FST reader, for client code use */
-            const char *name;
-            uint32_t length;
-            fstHandle handle;
-            uint32_t name_length; /* strlen(u.var.name) */
-            unsigned is_alias : 1;
-        } var;
-
-        /* if htyp == FST_HT_ATTRBEGIN */
-        struct fstHierAttr
-        {
-            unsigned char typ; /* FST_AT_MIN ... FST_AT_MAX */
-            unsigned char
-                subtype; /* from fstMiscType, fstArrayType, fstEnumValueType, fstPackType */
-            const char *name;
-            uint64_t arg; /* number of array elements, struct members, or some other payload
-                                (possibly ignored) */
-            uint64_t arg_from_name; /* for when name is overloaded as a variable-length integer
-                                        (FST_AT_MISC + FST_MT_SOURCESTEM) */
-            uint32_t name_length; /* strlen(u.attr.name) */
-        } attr;
-    } u;
-};
-
-struct fstETab
-{
-    char *name;
-    uint32_t elem_count;
-    char **literal_arr;
-    char **val_arr;
-};
-
-/*
- * writer functions
- */
-
-typedef struct fstWriterContext fstWriterContext;
-
-void fstWriterClose(fstWriterContext *ctx);
-fstWriterContext *fstWriterCreate(const char *nam, int use_compressed_hier);
-fstEnumHandle fstWriterCreateEnumTable(fstWriterContext *ctx,
-                                        const char *name,
-                                        uint32_t elem_count,
-                                        unsigned int min_valbits,
-                                        const char **literal_arr,
-                                        const char **val_arr);
-/* used for Verilog/SV */
-fstHandle fstWriterCreateVar(fstWriterContext *ctx,
-                                enum fstVarType vt,
-                                enum fstVarDir vd,
-                                uint32_t len,
-                                const char *nam,
-                                fstHandle aliasHandle);
-/* future expansion for VHDL and other languages.  The variable type, data type, etc map onto
-    the current Verilog/SV one.  The "type" string is optional for a more verbose or custom
-    description */
-fstHandle fstWriterCreateVar2(fstWriterContext *ctx,
-                                enum fstVarType vt,
-                                enum fstVarDir vd,
-                                uint32_t len,
-                                const char *nam,
-                                fstHandle aliasHandle,
-                                const char *type,
-                                enum fstSupplementalVarType svt,
-                                enum fstSupplementalDataType sdt);
-void fstWriterEmitDumpActive(fstWriterContext *ctx, int enable);
-void fstWriterEmitEnumTableRef(fstWriterContext *ctx, fstEnumHandle handle);
-void fstWriterEmitValueChange(fstWriterContext *ctx, fstHandle handle, const void *val);
-void fstWriterEmitValueChange32(fstWriterContext *ctx,
-                                fstHandle handle,
-                                uint32_t bits,
-                                uint32_t val);
-void fstWriterEmitValueChange64(fstWriterContext *ctx,
-                                fstHandle handle,
-                                uint32_t bits,
-                                uint64_t val);
-void fstWriterEmitValueChangeVec32(fstWriterContext *ctx,
-                                    fstHandle handle,
-                                    uint32_t bits,
-                                    const uint32_t *val);
-void fstWriterEmitValueChangeVec64(fstWriterContext *ctx,
-                                    fstHandle handle,
-                                    uint32_t bits,
-                                    const uint64_t *val);
-void fstWriterEmitVariableLengthValueChange(fstWriterContext *ctx,
-                                            fstHandle handle,
-                                            const void *val,
-                                            uint32_t len);
-void fstWriterEmitTimeChange(fstWriterContext *ctx, uint64_t tim);
-void fstWriterFlushContext(fstWriterContext *ctx);
-int fstWriterGetDumpSizeLimitReached(fstWriterContext *ctx);
-int fstWriterGetFseekFailed(fstWriterContext *ctx);
-int fstWriterGetFlushContextPending(fstWriterContext *ctx);
-void fstWriterSetAttrBegin(fstWriterContext *ctx,
-                            enum fstAttrType attrtype,
-                            int subtype,
-                            const char *attrname,
-                            uint64_t arg);
-void fstWriterSetAttrEnd(fstWriterContext *ctx);
-void fstWriterSetComment(fstWriterContext *ctx, const char *comm);
-void fstWriterSetDate(fstWriterContext *ctx, const char *dat);
-void fstWriterSetDumpSizeLimit(fstWriterContext *ctx, uint64_t numbytes);
-void fstWriterSetEnvVar(fstWriterContext *ctx, const char *envvar);
-void fstWriterSetFileType(fstWriterContext *ctx, enum fstFileType filetype);
-void fstWriterSetPackType(fstWriterContext *ctx, enum fstWriterPackType typ);
-void fstWriterSetParallelMode(fstWriterContext *ctx, int enable);
-void fstWriterSetRepackOnClose(fstWriterContext *ctx,
-                                int enable); /* type = 0 (none), 1 (libz) */
-void fstWriterSetScope(fstWriterContext *ctx,
-                        enum fstScopeType scopetype,
-                        const char *scopename,
-                        const char *scopecomp);
-void fstWriterSetSourceInstantiationStem(fstWriterContext *ctx,
-                                            const char *path,
-                                            unsigned int line,
-                                            unsigned int use_realpath);
-void fstWriterSetSourceStem(fstWriterContext *ctx,
-                            const char *path,
-                            unsigned int line,
-                            unsigned int use_realpath);
-void fstWriterSetTimescale(fstWriterContext *ctx, int ts);
-void fstWriterSetTimescaleFromString(fstWriterContext *ctx, const char *s);
-void fstWriterSetTimezero(fstWriterContext *ctx, int64_t tim);
-void fstWriterSetUpscope(fstWriterContext *ctx);
-void fstWriterSetValueList(fstWriterContext *ctx, const char *vl);
-void fstWriterSetVersion(fstWriterContext *ctx, const char *vers);
-
-/*
- * reader functions
- */
-
-typedef struct fstReaderContext fstReaderContext;
-
-void fstReaderClose(fstReaderContext *ctx);
-void fstReaderClrFacProcessMask(fstReaderContext *ctx, fstHandle facidx);
-void fstReaderClrFacProcessMaskAll(fstReaderContext *ctx);
-uint64_t fstReaderGetAliasCount(fstReaderContext *ctx);
-const char *fstReaderGetCurrentFlatScope(fstReaderContext *ctx);
-void *fstReaderGetCurrentScopeUserInfo(fstReaderContext *ctx);
-int fstReaderGetCurrentScopeLen(fstReaderContext *ctx);
-const char *fstReaderGetDateString(fstReaderContext *ctx);
-int fstReaderGetDoubleEndianMatchState(fstReaderContext *ctx);
-uint64_t fstReaderGetDumpActivityChangeTime(fstReaderContext *ctx, uint32_t idx);
-unsigned char fstReaderGetDumpActivityChangeValue(fstReaderContext *ctx, uint32_t idx);
-uint64_t fstReaderGetEndTime(fstReaderContext *ctx);
-int fstReaderGetFacProcessMask(fstReaderContext *ctx, fstHandle facidx);
-int fstReaderGetFileType(fstReaderContext *ctx);
-int fstReaderGetFseekFailed(fstReaderContext *ctx);
-fstHandle fstReaderGetMaxHandle(fstReaderContext *ctx);
-uint64_t fstReaderGetMemoryUsedByWriter(fstReaderContext *ctx);
-uint32_t fstReaderGetNumberDumpActivityChanges(fstReaderContext *ctx);
-uint64_t fstReaderGetScopeCount(fstReaderContext *ctx);
-uint64_t fstReaderGetStartTime(fstReaderContext *ctx);
-signed char fstReaderGetTimescale(fstReaderContext *ctx);
-int64_t fstReaderGetTimezero(fstReaderContext *ctx);
-uint64_t fstReaderGetValueChangeSectionCount(fstReaderContext *ctx);
-char *fstReaderGetValueFromHandleAtTime(fstReaderContext *ctx,
-                                        uint64_t tim,
-                                        fstHandle facidx,
-                                        char *buf);
-uint64_t fstReaderGetVarCount(fstReaderContext *ctx);
-const char *fstReaderGetVersionString(fstReaderContext *ctx);
-struct fstHier *fstReaderIterateHier(fstReaderContext *ctx);
-int fstReaderIterateHierRewind(fstReaderContext *ctx);
-int fstReaderIterBlocks(fstReaderContext *ctx,
-                        void (*value_change_callback)(void *user_callback_data_pointer,
-                                                      uint64_t time,
-                                                      fstHandle facidx,
-                                                      const unsigned char *value),
-                        void *user_callback_data_pointer,
-                        FILE *vcdhandle);
-int fstReaderIterBlocks2(fstReaderContext *ctx,
-                         void (*value_change_callback)(void *user_callback_data_pointer,
-                                                       uint64_t time,
-                                                       fstHandle facidx,
-                                                       const unsigned char *value),
-                         void (*value_change_callback_varlen)(void *user_callback_data_pointer,
-                                                              uint64_t time,
-                                                              fstHandle facidx,
-                                                              const unsigned char *value,
-                                                              uint32_t len),
-                         void *user_callback_data_pointer,
-                         FILE *vcdhandle);
-void fstReaderIterBlocksSetNativeDoublesOnCallback(fstReaderContext *ctx, int enable);
-fstReaderContext *fstReaderOpen(const char *nam);
-fstReaderContext *fstReaderOpenForUtilitiesOnly(void);
-const char *fstReaderPopScope(fstReaderContext *ctx);
-int fstReaderProcessHier(fstReaderContext *ctx, FILE *vcdhandle);
-const char *fstReaderPushScope(fstReaderContext *ctx, const char *nam, void *user_info);
-void fstReaderResetScope(fstReaderContext *ctx);
-void fstReaderSetFacProcessMask(fstReaderContext *ctx, fstHandle facidx);
-void fstReaderSetFacProcessMaskAll(fstReaderContext *ctx);
-void fstReaderSetLimitTimeRange(fstReaderContext *ctx, uint64_t start_time, uint64_t end_time);
-void fstReaderSetUnlimitedTimeRange(fstReaderContext *ctx);
-void fstReaderSetVcdExtensions(fstReaderContext *ctx, int enable);
-
-/*
- * utility functions
- */
-int fstUtilityBinToEscConvertedLen(const unsigned char *s, int len); /* used for mallocs for fstUtilityBinToEsc() */
-int fstUtilityBinToEsc(unsigned char *d, const unsigned char *s, int len);
-int fstUtilityEscToBin(unsigned char *d, unsigned char *s, int len);
-struct fstETab *fstUtilityExtractEnumTableFromString(const char *s);
-void fstUtilityFreeEnumTable(struct fstETab *etab); /* must use to free fstETab properly */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/include/gtkwave/lz4.c b/include/gtkwave/lz4.c
deleted file mode 100644
index 0a727596b..000000000
--- a/include/gtkwave/lz4.c
+++ /dev/null
@@ -1,2789 +0,0 @@
-/*
-   LZ4 - Fast LZ compression algorithm
-   Copyright (C) 2011-2023, Yann Collet.
-
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-    - LZ4 homepage : http://www.lz4.org
-    - LZ4 source repository : https://github.com/lz4/lz4
-*/
-
-/*-************************************
-*  Tuning parameters
-**************************************/
-/*
- * LZ4_HEAPMODE :
- * Select how stateless compression functions like `LZ4_compress_default()`
- * allocate memory for their hash table,
- * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
- */
-#ifndef LZ4_HEAPMODE
-#  define LZ4_HEAPMODE 0
-#endif
-
-/*
- * LZ4_ACCELERATION_DEFAULT :
- * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
- */
-#define LZ4_ACCELERATION_DEFAULT 1
-/*
- * LZ4_ACCELERATION_MAX :
- * Any "acceleration" value higher than this threshold
- * get treated as LZ4_ACCELERATION_MAX instead (fix #876)
- */
-#define LZ4_ACCELERATION_MAX 65537
-
-
-/*-************************************
-*  CPU Feature Detection
-**************************************/
-/* LZ4_FORCE_MEMORY_ACCESS
- * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
- * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
- * The below switch allow to select different access method for improved performance.
- * Method 0 (default) : use `memcpy()`. Safe and portable.
- * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
- *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
- * Method 2 : direct access. This method is portable but violate C standard.
- *            It can generate buggy code on targets which assembly generation depends on alignment.
- *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
- * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
- * Prefer these methods in priority order (0 > 1 > 2)
- */
-#ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally */
-#  if defined(__GNUC__) && \
-  ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
-  || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
-#    define LZ4_FORCE_MEMORY_ACCESS 2
-#  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) || defined(_MSC_VER)
-#    define LZ4_FORCE_MEMORY_ACCESS 1
-#  endif
-#endif
-
-/*
- * LZ4_FORCE_SW_BITCOUNT
- * Define this parameter if your target system or compiler does not support hardware bit count
- */
-#if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for WinCE doesn't support Hardware bit count */
-#  undef  LZ4_FORCE_SW_BITCOUNT  /* avoid double def */
-#  define LZ4_FORCE_SW_BITCOUNT
-#endif
-
-
-
-/*-************************************
-*  Dependency
-**************************************/
-/*
- * LZ4_SRC_INCLUDED:
- * Amalgamation flag, whether lz4.c is included
- */
-#ifndef LZ4_SRC_INCLUDED
-#  define LZ4_SRC_INCLUDED 1
-#endif
-
-#ifndef LZ4_DISABLE_DEPRECATE_WARNINGS
-#  define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
-#endif
-
-#ifndef LZ4_STATIC_LINKING_ONLY
-#  define LZ4_STATIC_LINKING_ONLY
-#endif
-#include "lz4.h"
-/* see also "memory routines" below */
-
-
-/*-************************************
-*  Compiler Options
-**************************************/
-#if defined(_MSC_VER) && (_MSC_VER >= 1400)  /* Visual Studio 2005+ */
-#  include <intrin.h>               /* only present in VS2005+ */
-#  pragma warning(disable : 4127)   /* disable: C4127: conditional expression is constant */
-#  pragma warning(disable : 6237)   /* disable: C6237: conditional expression is always 0 */
-#endif  /* _MSC_VER */
-
-#ifndef LZ4_FORCE_INLINE
-#  ifdef _MSC_VER    /* Visual Studio */
-#    define LZ4_FORCE_INLINE static __forceinline
-#  else
-#    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
-#      ifdef __GNUC__
-#        define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
-#      else
-#        define LZ4_FORCE_INLINE static inline
-#      endif
-#    else
-#      define LZ4_FORCE_INLINE static
-#    endif /* __STDC_VERSION__ */
-#  endif  /* _MSC_VER */
-#endif /* LZ4_FORCE_INLINE */
-
-/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
- * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
- * together with a simple 8-byte copy loop as a fall-back path.
- * However, this optimization hurts the decompression speed by >30%,
- * because the execution does not go to the optimized loop
- * for typical compressible data, and all of the preamble checks
- * before going to the fall-back path become useless overhead.
- * This optimization happens only with the -O3 flag, and -O2 generates
- * a simple 8-byte copy loop.
- * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
- * functions are annotated with __attribute__((optimize("O2"))),
- * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
- * of LZ4_wildCopy8 does not affect the compression speed.
- */
-#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
-#  define LZ4_FORCE_O2  __attribute__((optimize("O2")))
-#  undef LZ4_FORCE_INLINE
-#  define LZ4_FORCE_INLINE  static __inline __attribute__((optimize("O2"),always_inline))
-#else
-#  define LZ4_FORCE_O2
-#endif
-
-#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
-#  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
-#else
-#  define expect(expr,value)    (expr)
-#endif
-
-#ifndef likely
-#define likely(expr)     expect((expr) != 0, 1)
-#endif
-#ifndef unlikely
-#define unlikely(expr)   expect((expr) != 0, 0)
-#endif
-
-/* Should the alignment test prove unreliable, for some reason,
- * it can be disabled by setting LZ4_ALIGN_TEST to 0 */
-#ifndef LZ4_ALIGN_TEST  /* can be externally provided */
-# define LZ4_ALIGN_TEST 1
-#endif
-
-
-/*-************************************
-*  Memory routines
-**************************************/
-
-/*! LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION :
- *  Disable relatively high-level LZ4/HC functions that use dynamic memory
- *  allocation functions (malloc(), calloc(), free()).
- *
- *  Note that this is a compile-time switch. And since it disables
- *  public/stable LZ4 v1 API functions, we don't recommend using this
- *  symbol to generate a library for distribution.
- *
- *  The following public functions are removed when this symbol is defined.
- *  - lz4   : LZ4_createStream, LZ4_freeStream,
- *            LZ4_createStreamDecode, LZ4_freeStreamDecode, LZ4_create (deprecated)
- *  - lz4hc : LZ4_createStreamHC, LZ4_freeStreamHC,
- *            LZ4_createHC (deprecated), LZ4_freeHC  (deprecated)
- *  - lz4frame, lz4file : All LZ4F_* functions
- */
-#if defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
-#  define ALLOC(s)          lz4_error_memory_allocation_is_disabled
-#  define ALLOC_AND_ZERO(s) lz4_error_memory_allocation_is_disabled
-#  define FREEMEM(p)        lz4_error_memory_allocation_is_disabled
-#elif defined(LZ4_USER_MEMORY_FUNCTIONS)
-/* memory management functions can be customized by user project.
- * Below functions must exist somewhere in the Project
- * and be available at link time */
-void* LZ4_malloc(size_t s);
-void* LZ4_calloc(size_t n, size_t s);
-void  LZ4_free(void* p);
-# define ALLOC(s)          LZ4_malloc(s)
-# define ALLOC_AND_ZERO(s) LZ4_calloc(1,s)
-# define FREEMEM(p)        LZ4_free(p)
-#else
-# include <stdlib.h>   /* malloc, calloc, free */
-# define ALLOC(s)          malloc(s)
-# define ALLOC_AND_ZERO(s) calloc(1,s)
-# define FREEMEM(p)        free(p)
-#endif
-
-#if ! LZ4_FREESTANDING
-#  include <string.h>   /* memset, memcpy */
-#endif
-#if !defined(LZ4_memset)
-#  define LZ4_memset(p,v,s) memset((p),(v),(s))
-#endif
-#define MEM_INIT(p,v,s)   LZ4_memset((p),(v),(s))
-
-
-/*-************************************
-*  Common Constants
-**************************************/
-#define MINMATCH 4
-
-#define WILDCOPYLENGTH 8
-#define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
-#define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
-#define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
-#define FASTLOOP_SAFE_DISTANCE 64
-static const int LZ4_minLength = (MFLIMIT+1);
-
-#define KB *(1 <<10)
-#define MB *(1 <<20)
-#define GB *(1U<<30)
-
-#define LZ4_DISTANCE_ABSOLUTE_MAX 65535
-#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX)   /* max supported by LZ4 format */
-#  error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
-#endif
-
-#define ML_BITS  4
-#define ML_MASK  ((1U<<ML_BITS)-1)
-#define RUN_BITS (8-ML_BITS)
-#define RUN_MASK ((1U<<RUN_BITS)-1)
-
-
-/*-************************************
-*  Error detection
-**************************************/
-#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
-#  include <assert.h>
-#else
-#  ifndef assert
-#    define assert(condition) ((void)0)
-#  endif
-#endif
-
-#define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use after variable declarations */
-
-#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
-#  include <stdio.h>
-   static int g_debuglog_enable = 1;
-#  define DEBUGLOG(l, ...) {                          \
-        if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
-            fprintf(stderr, __FILE__  " %i: ", __LINE__); \
-            fprintf(stderr, __VA_ARGS__);             \
-            fprintf(stderr, " \n");                   \
-    }   }
-#else
-#  define DEBUGLOG(l, ...) {}    /* disabled */
-#endif
-
-static int LZ4_isAligned(const void* ptr, size_t alignment)
-{
-    return ((size_t)ptr & (alignment -1)) == 0;
-}
-
-
-/*-************************************
-*  Types
-**************************************/
-#include <limits.h>
-#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-# include <stdint.h>
-  typedef  uint8_t BYTE;
-  typedef uint16_t U16;
-  typedef uint32_t U32;
-  typedef  int32_t S32;
-  typedef uint64_t U64;
-  typedef uintptr_t uptrval;
-#else
-# if UINT_MAX != 4294967295UL
-#   error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
-# endif
-  typedef unsigned char       BYTE;
-  typedef unsigned short      U16;
-  typedef unsigned int        U32;
-  typedef   signed int        S32;
-  typedef unsigned long long  U64;
-  typedef size_t              uptrval;   /* generally true, except OpenVMS-64 */
-#endif
-
-#if defined(__x86_64__)
-  typedef U64    reg_t;   /* 64-bits in x32 mode */
-#else
-  typedef size_t reg_t;   /* 32-bits in x32 mode */
-#endif
-
-typedef enum {
-    notLimited = 0,
-    limitedOutput = 1,
-    fillOutput = 2
-} limitedOutput_directive;
-
-
-/*-************************************
-*  Reading and writing into memory
-**************************************/
-
-/**
- * LZ4 relies on memcpy with a constant size being inlined. In freestanding
- * environments, the compiler can't assume the implementation of memcpy() is
- * standard compliant, so it can't apply its specialized memcpy() inlining
- * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
- * memcpy() as if it were standard compliant, so it can inline it in freestanding
- * environments. This is needed when decompressing the Linux Kernel, for example.
- */
-#if !defined(LZ4_memcpy)
-#  if defined(__GNUC__) && (__GNUC__ >= 4)
-#    define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
-#  else
-#    define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
-#  endif
-#endif
-
-#if !defined(LZ4_memmove)
-#  if defined(__GNUC__) && (__GNUC__ >= 4)
-#    define LZ4_memmove __builtin_memmove
-#  else
-#    define LZ4_memmove memmove
-#  endif
-#endif
-
-static unsigned LZ4_isLittleEndian(void)
-{
-    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental */
-    return one.c[0];
-}
-
-#if defined(__GNUC__) || defined(__INTEL_COMPILER)
-#define LZ4_PACK( __Declaration__ ) __Declaration__ __attribute__((__packed__))
-#elif defined(_MSC_VER)
-#define LZ4_PACK( __Declaration__ ) __pragma( pack(push, 1) ) __Declaration__ __pragma( pack(pop))
-#endif
-
-#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
-/* lie to the compiler about data alignment; use with caution */
-
-static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
-static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
-static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; }
-
-static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
-static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
-
-#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
-
-/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
-/* currently only defined for gcc and icc */
-LZ4_PACK(typedef struct { U16 u16; }) LZ4_unalign16;
-LZ4_PACK(typedef struct { U32 u32; }) LZ4_unalign32;
-LZ4_PACK(typedef struct { reg_t uArch; }) LZ4_unalignST;
-
-static U16 LZ4_read16(const void* ptr) { return ((const LZ4_unalign16*)ptr)->u16; }
-static U32 LZ4_read32(const void* ptr) { return ((const LZ4_unalign32*)ptr)->u32; }
-static reg_t LZ4_read_ARCH(const void* ptr) { return ((const LZ4_unalignST*)ptr)->uArch; }
-
-static void LZ4_write16(void* memPtr, U16 value) { ((LZ4_unalign16*)memPtr)->u16 = value; }
-static void LZ4_write32(void* memPtr, U32 value) { ((LZ4_unalign32*)memPtr)->u32 = value; }
-
-#else  /* safe and portable access using memcpy() */
-
-static U16 LZ4_read16(const void* memPtr)
-{
-    U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
-}
-
-static U32 LZ4_read32(const void* memPtr)
-{
-    U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
-}
-
-static reg_t LZ4_read_ARCH(const void* memPtr)
-{
-    reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
-}
-
-static void LZ4_write16(void* memPtr, U16 value)
-{
-    LZ4_memcpy(memPtr, &value, sizeof(value));
-}
-
-static void LZ4_write32(void* memPtr, U32 value)
-{
-    LZ4_memcpy(memPtr, &value, sizeof(value));
-}
-
-#endif /* LZ4_FORCE_MEMORY_ACCESS */
-
-
-static U16 LZ4_readLE16(const void* memPtr)
-{
-    if (LZ4_isLittleEndian()) {
-        return LZ4_read16(memPtr);
-    } else {
-        const BYTE* p = (const BYTE*)memPtr;
-        return (U16)((U16)p[0] + (p[1]<<8));
-    }
-}
-
-#ifdef LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT
-static U32 LZ4_readLE32(const void* memPtr)
-{
-    if (LZ4_isLittleEndian()) {
-        return LZ4_read32(memPtr);
-    } else {
-        const BYTE* p = (const BYTE*)memPtr;
-        return (U32)p[0] + (p[1]<<8) + (p[2]<<16) + (p[3]<<24);
-    }
-}
-#endif
-
-static void LZ4_writeLE16(void* memPtr, U16 value)
-{
-    if (LZ4_isLittleEndian()) {
-        LZ4_write16(memPtr, value);
-    } else {
-        BYTE* p = (BYTE*)memPtr;
-        p[0] = (BYTE) value;
-        p[1] = (BYTE)(value>>8);
-    }
-}
-
-/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
-LZ4_FORCE_INLINE
-void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
-{
-    BYTE* d = (BYTE*)dstPtr;
-    const BYTE* s = (const BYTE*)srcPtr;
-    BYTE* const e = (BYTE*)dstEnd;
-
-    do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
-}
-
-static const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
-static const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
-
-
-#ifndef LZ4_FAST_DEC_LOOP
-#  if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
-#    define LZ4_FAST_DEC_LOOP 1
-#  elif defined(__aarch64__) && defined(__APPLE__)
-#    define LZ4_FAST_DEC_LOOP 1
-#  elif defined(__aarch64__) && !defined(__clang__)
-     /* On non-Apple aarch64, we disable this optimization for clang because
-      * on certain mobile chipsets, performance is reduced with clang. For
-      * more information refer to https://github.com/lz4/lz4/pull/707 */
-#    define LZ4_FAST_DEC_LOOP 1
-#  else
-#    define LZ4_FAST_DEC_LOOP 0
-#  endif
-#endif
-
-#if LZ4_FAST_DEC_LOOP
-
-LZ4_FORCE_INLINE void
-LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
-{
-    assert(srcPtr + offset == dstPtr);
-    if (offset < 8) {
-        LZ4_write32(dstPtr, 0);   /* silence an msan warning when offset==0 */
-        dstPtr[0] = srcPtr[0];
-        dstPtr[1] = srcPtr[1];
-        dstPtr[2] = srcPtr[2];
-        dstPtr[3] = srcPtr[3];
-        srcPtr += inc32table[offset];
-        LZ4_memcpy(dstPtr+4, srcPtr, 4);
-        srcPtr -= dec64table[offset];
-        dstPtr += 8;
-    } else {
-        LZ4_memcpy(dstPtr, srcPtr, 8);
-        dstPtr += 8;
-        srcPtr += 8;
-    }
-
-    LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
-}
-
-/* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
- * this version copies two times 16 bytes (instead of one time 32 bytes)
- * because it must be compatible with offsets >= 16. */
-LZ4_FORCE_INLINE void
-LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
-{
-    BYTE* d = (BYTE*)dstPtr;
-    const BYTE* s = (const BYTE*)srcPtr;
-    BYTE* const e = (BYTE*)dstEnd;
-
-    do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
-}
-
-/* LZ4_memcpy_using_offset()  presumes :
- * - dstEnd >= dstPtr + MINMATCH
- * - there is at least 8 bytes available to write after dstEnd */
-LZ4_FORCE_INLINE void
-LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
-{
-    BYTE v[8];
-
-    assert(dstEnd >= dstPtr + MINMATCH);
-
-    switch(offset) {
-    case 1:
-        MEM_INIT(v, *srcPtr, 8);
-        break;
-    case 2:
-        LZ4_memcpy(v, srcPtr, 2);
-        LZ4_memcpy(&v[2], srcPtr, 2);
-#if defined(_MSC_VER) && (_MSC_VER <= 1937) /* MSVC 2022 ver 17.7 or earlier */
-#  pragma warning(push)
-#  pragma warning(disable : 6385) /* warning C6385: Reading invalid data from 'v'. */
-#endif
-        LZ4_memcpy(&v[4], v, 4);
-#if defined(_MSC_VER) && (_MSC_VER <= 1937) /* MSVC 2022 ver 17.7 or earlier */
-#  pragma warning(pop)
-#endif
-        break;
-    case 4:
-        LZ4_memcpy(v, srcPtr, 4);
-        LZ4_memcpy(&v[4], srcPtr, 4);
-        break;
-    default:
-        LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
-        return;
-    }
-
-    LZ4_memcpy(dstPtr, v, 8);
-    dstPtr += 8;
-    while (dstPtr < dstEnd) {
-        LZ4_memcpy(dstPtr, v, 8);
-        dstPtr += 8;
-    }
-}
-#endif
-
-
-/*-************************************
-*  Common functions
-**************************************/
-static unsigned LZ4_NbCommonBytes (reg_t val)
-{
-    assert(val != 0);
-    if (LZ4_isLittleEndian()) {
-        if (sizeof(val) == 8) {
-#       if defined(_MSC_VER) && (_MSC_VER >= 1800) && (defined(_M_AMD64) && !defined(_M_ARM64EC)) && !defined(LZ4_FORCE_SW_BITCOUNT)
-/*-*************************************************************************************************
-* ARM64EC is a Microsoft-designed ARM64 ABI compatible with AMD64 applications on ARM64 Windows 11.
-* The ARM64EC ABI does not support AVX/AVX2/AVX512 instructions, nor their relevant intrinsics
-* including _tzcnt_u64. Therefore, we need to neuter the _tzcnt_u64 code path for ARM64EC.
-****************************************************************************************************/
-#         if defined(__clang__) && (__clang_major__ < 10)
-            /* Avoid undefined clang-cl intrinsics issue.
-             * See https://github.com/lz4/lz4/pull/1017 for details. */
-            return (unsigned)__builtin_ia32_tzcnt_u64(val) >> 3;
-#         else
-            /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */
-            return (unsigned)_tzcnt_u64(val) >> 3;
-#         endif
-#       elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            unsigned long r = 0;
-            _BitScanForward64(&r, (U64)val);
-            return (unsigned)r >> 3;
-#       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
-                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
-                                        !defined(LZ4_FORCE_SW_BITCOUNT)
-            return (unsigned)__builtin_ctzll((U64)val) >> 3;
-#       else
-            const U64 m = 0x0101010101010101ULL;
-            val ^= val - 1;
-            return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56);
-#       endif
-        } else /* 32 bits */ {
-#       if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            unsigned long r;
-            _BitScanForward(&r, (U32)val);
-            return (unsigned)r >> 3;
-#       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
-                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
-                        !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            return (unsigned)__builtin_ctz((U32)val) >> 3;
-#       else
-            const U32 m = 0x01010101;
-            return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24;
-#       endif
-        }
-    } else   /* Big Endian CPU */ {
-        if (sizeof(val)==8) {
-#       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
-                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
-                        !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            return (unsigned)__builtin_clzll((U64)val) >> 3;
-#       else
-#if 1
-            /* this method is probably faster,
-             * but adds a 128 bytes lookup table */
-            static const unsigned char ctz7_tab[128] = {
-                7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-                6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-            };
-            U64 const mask = 0x0101010101010101ULL;
-            U64 const t = (((val >> 8) - mask) | val) & mask;
-            return ctz7_tab[(t * 0x0080402010080402ULL) >> 57];
-#else
-            /* this method doesn't consume memory space like the previous one,
-             * but it contains several branches,
-             * that may end up slowing execution */
-            static const U32 by32 = sizeof(val)*4;  /* 32 on 64 bits (goal), 16 on 32 bits.
-            Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
-            Note that this code path is never triggered in 32-bits mode. */
-            unsigned r;
-            if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
-            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
-            r += (!val);
-            return r;
-#endif
-#       endif
-        } else /* 32 bits */ {
-#       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
-                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
-                                        !defined(LZ4_FORCE_SW_BITCOUNT)
-            return (unsigned)__builtin_clz((U32)val) >> 3;
-#       else
-            val >>= 8;
-            val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) |
-              (val + 0x00FF0000)) >> 24;
-            return (unsigned)val ^ 3;
-#       endif
-        }
-    }
-}
-
-
-#define STEPSIZE sizeof(reg_t)
-LZ4_FORCE_INLINE
-unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
-{
-    const BYTE* const pStart = pIn;
-
-    if (likely(pIn < pInLimit-(STEPSIZE-1))) {
-        reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
-        if (!diff) {
-            pIn+=STEPSIZE; pMatch+=STEPSIZE;
-        } else {
-            return LZ4_NbCommonBytes(diff);
-    }   }
-
-    while (likely(pIn < pInLimit-(STEPSIZE-1))) {
-        reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
-        if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
-        pIn += LZ4_NbCommonBytes(diff);
-        return (unsigned)(pIn - pStart);
-    }
-
-    if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
-    if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
-    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
-    return (unsigned)(pIn - pStart);
-}
-
-
-#ifndef LZ4_COMMONDEFS_ONLY
-/*-************************************
-*  Local Constants
-**************************************/
-static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
-static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression run slower on incompressible data */
-
-
-/*-************************************
-*  Local Structures and types
-**************************************/
-typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
-
-/**
- * This enum distinguishes several different modes of accessing previous
- * content in the stream.
- *
- * - noDict        : There is no preceding content.
- * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
- *                   blob being compressed are valid and refer to the preceding
- *                   content (of length ctx->dictSize), which is available
- *                   contiguously preceding in memory the content currently
- *                   being compressed.
- * - usingExtDict  : Like withPrefix64k, but the preceding content is somewhere
- *                   else in memory, starting at ctx->dictionary with length
- *                   ctx->dictSize.
- * - usingDictCtx  : Everything concerning the preceding content is
- *                   in a separate context, pointed to by ctx->dictCtx.
- *                   ctx->dictionary, ctx->dictSize, and table entries
- *                   in the current context that refer to positions
- *                   preceding the beginning of the current compression are
- *                   ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
- *                   ->dictSize describe the location and size of the preceding
- *                   content, and matches are found by looking in the ctx
- *                   ->dictCtx->hashTable.
- */
-typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
-typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
-
-
-/*-************************************
-*  Local Utils
-**************************************/
-int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
-const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
-int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
-int LZ4_sizeofState(void) { return sizeof(LZ4_stream_t); }
-
-
-/*-****************************************
-*  Internal Definitions, used only in Tests
-*******************************************/
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize);
-
-int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
-                                     int compressedSize, int maxOutputSize,
-                                     const void* dictStart, size_t dictSize);
-int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest,
-                                     int compressedSize, int targetOutputSize, int dstCapacity,
-                                     const void* dictStart, size_t dictSize);
-#if defined (__cplusplus)
-}
-#endif
-
-/*-******************************
-*  Compression functions
-********************************/
-LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
-{
-    if (tableType == byU16)
-        return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
-    else
-        return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
-}
-
-LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
-{
-    const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
-    if (LZ4_isLittleEndian()) {
-        const U64 prime5bytes = 889523592379ULL;
-        return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
-    } else {
-        const U64 prime8bytes = 11400714785074694791ULL;
-        return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
-    }
-}
-
-LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
-{
-    if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType);
-
-#ifdef LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT
-    return LZ4_hash4(LZ4_readLE32(p), tableType);
-#else
-    return LZ4_hash4(LZ4_read32(p), tableType);
-#endif
-}
-
-LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
-{
-    switch (tableType)
-    {
-    default: /* fallthrough */
-    case clearedTable: { /* illegal! */ assert(0); return; }
-    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; }
-    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; }
-    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; }
-    }
-}
-
-LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
-{
-    switch (tableType)
-    {
-    default: /* fallthrough */
-    case clearedTable: /* fallthrough */
-    case byPtr: { /* illegal! */ assert(0); return; }
-    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; }
-    case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; }
-    }
-}
-
-/* LZ4_putPosition*() : only used in byPtr mode */
-LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h,
-                                  void* tableBase, tableType_t const tableType)
-{
-    const BYTE** const hashTable = (const BYTE**)tableBase;
-    assert(tableType == byPtr); (void)tableType;
-    hashTable[h] = p;
-}
-
-LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType)
-{
-    U32 const h = LZ4_hashPosition(p, tableType);
-    LZ4_putPositionOnHash(p, h, tableBase, tableType);
-}
-
-/* LZ4_getIndexOnHash() :
- * Index of match position registered in hash table.
- * hash position must be calculated by using base+index, or dictBase+index.
- * Assumption 1 : only valid if tableType == byU32 or byU16.
- * Assumption 2 : h is presumed valid (within limits of hash table)
- */
-LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
-{
-    LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
-    if (tableType == byU32) {
-        const U32* const hashTable = (const U32*) tableBase;
-        assert(h < (1U << (LZ4_MEMORY_USAGE-2)));
-        return hashTable[h];
-    }
-    if (tableType == byU16) {
-        const U16* const hashTable = (const U16*) tableBase;
-        assert(h < (1U << (LZ4_MEMORY_USAGE-1)));
-        return hashTable[h];
-    }
-    assert(0); return 0;  /* forbidden case */
-}
-
-static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType)
-{
-    assert(tableType == byPtr); (void)tableType;
-    { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; }
-}
-
-LZ4_FORCE_INLINE const BYTE*
-LZ4_getPosition(const BYTE* p,
-                const void* tableBase, tableType_t tableType)
-{
-    U32 const h = LZ4_hashPosition(p, tableType);
-    return LZ4_getPositionOnHash(h, tableBase, tableType);
-}
-
-LZ4_FORCE_INLINE void
-LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
-           const int inputSize,
-           const tableType_t tableType) {
-    /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
-     * therefore safe to use no matter what mode we're in. Otherwise, we figure
-     * out if it's safe to leave as is or whether it needs to be reset.
-     */
-    if ((tableType_t)cctx->tableType != clearedTable) {
-        assert(inputSize >= 0);
-        if ((tableType_t)cctx->tableType != tableType
-          || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU)
-          || ((tableType == byU32) && cctx->currentOffset > 1 GB)
-          || tableType == byPtr
-          || inputSize >= 4 KB)
-        {
-            DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
-            MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
-            cctx->currentOffset = 0;
-            cctx->tableType = (U32)clearedTable;
-        } else {
-            DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
-        }
-    }
-
-    /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back,
-     * is faster than compressing without a gap.
-     * However, compressing with currentOffset == 0 is faster still,
-     * so we preserve that case.
-     */
-    if (cctx->currentOffset != 0 && tableType == byU32) {
-        DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset");
-        cctx->currentOffset += 64 KB;
-    }
-
-    /* Finally, clear history */
-    cctx->dictCtx = NULL;
-    cctx->dictionary = NULL;
-    cctx->dictSize = 0;
-}
-
-/** LZ4_compress_generic_validated() :
- *  inlined, to ensure branches are decided at compilation time.
- *  The following conditions are presumed already validated:
- *  - source != NULL
- *  - inputSize > 0
- */
-LZ4_FORCE_INLINE int LZ4_compress_generic_validated(
-                 LZ4_stream_t_internal* const cctx,
-                 const char* const source,
-                 char* const dest,
-                 const int inputSize,
-                 int*  inputConsumed, /* only written when outputDirective == fillOutput */
-                 const int maxOutputSize,
-                 const limitedOutput_directive outputDirective,
-                 const tableType_t tableType,
-                 const dict_directive dictDirective,
-                 const dictIssue_directive dictIssue,
-                 const int acceleration)
-{
-    int result;
-    const BYTE* ip = (const BYTE*)source;
-
-    U32 const startIndex = cctx->currentOffset;
-    const BYTE* base = (const BYTE*)source - startIndex;
-    const BYTE* lowLimit;
-
-    const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx;
-    const BYTE* const dictionary =
-        dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary;
-    const U32 dictSize =
-        dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize;
-    const U32 dictDelta =
-        (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0;   /* make indexes in dictCtx comparable with indexes in current context */
-
-    int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
-    U32 const prefixIdxLimit = startIndex - dictSize;   /* used when dictDirective == dictSmall */
-    const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary;
-    const BYTE* anchor = (const BYTE*) source;
-    const BYTE* const iend = ip + inputSize;
-    const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
-    const BYTE* const matchlimit = iend - LASTLITERALS;
-
-    /* the dictCtx currentOffset is indexed on the start of the dictionary,
-     * while a dictionary in the current context precedes the currentOffset */
-    const BYTE* dictBase = (dictionary == NULL) ? NULL :
-                           (dictDirective == usingDictCtx) ?
-                            dictionary + dictSize - dictCtx->currentOffset :
-                            dictionary + dictSize - startIndex;
-
-    BYTE* op = (BYTE*) dest;
-    BYTE* const olimit = op + maxOutputSize;
-
-    U32 offset = 0;
-    U32 forwardH;
-
-    DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType);
-    assert(ip != NULL);
-    if (tableType == byU16) assert(inputSize<LZ4_64Klimit);  /* Size too large (not within 64K limit) */
-    if (tableType == byPtr) assert(dictDirective==noDict);   /* only supported use case with byPtr */
-    /* If init conditions are not met, we don't have to mark stream
-     * as having dirty context, since no action was taken yet */
-    if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */
-    assert(acceleration >= 1);
-
-    lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0);
-
-    /* Update context state */
-    if (dictDirective == usingDictCtx) {
-        /* Subsequent linked blocks can't use the dictionary. */
-        /* Instead, they use the block we just compressed. */
-        cctx->dictCtx = NULL;
-        cctx->dictSize = (U32)inputSize;
-    } else {
-        cctx->dictSize += (U32)inputSize;
-    }
-    cctx->currentOffset += (U32)inputSize;
-    cctx->tableType = (U32)tableType;
-
-    if (inputSize<LZ4_minLength) goto _last_literals;        /* Input too small, no compression (all literals) */
-
-    /* First Byte */
-    {   U32 const h = LZ4_hashPosition(ip, tableType);
-        if (tableType == byPtr) {
-            LZ4_putPositionOnHash(ip, h, cctx->hashTable, byPtr);
-        } else {
-            LZ4_putIndexOnHash(startIndex, h, cctx->hashTable, tableType);
-    }   }
-    ip++; forwardH = LZ4_hashPosition(ip, tableType);
-
-    /* Main Loop */
-    for ( ; ; ) {
-        const BYTE* match;
-        BYTE* token;
-        const BYTE* filledIp;
-
-        /* Find a match */
-        if (tableType == byPtr) {
-            const BYTE* forwardIp = ip;
-            int step = 1;
-            int searchMatchNb = acceleration << LZ4_skipTrigger;
-            do {
-                U32 const h = forwardH;
-                ip = forwardIp;
-                forwardIp += step;
-                step = (searchMatchNb++ >> LZ4_skipTrigger);
-
-                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
-                assert(ip < mflimitPlusOne);
-
-                match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType);
-                forwardH = LZ4_hashPosition(forwardIp, tableType);
-                LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType);
-
-            } while ( (match+LZ4_DISTANCE_MAX < ip)
-                   || (LZ4_read32(match) != LZ4_read32(ip)) );
-
-        } else {   /* byU32, byU16 */
-
-            const BYTE* forwardIp = ip;
-            int step = 1;
-            int searchMatchNb = acceleration << LZ4_skipTrigger;
-            do {
-                U32 const h = forwardH;
-                U32 const current = (U32)(forwardIp - base);
-                U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
-                assert(matchIndex <= current);
-                assert(forwardIp - base < (ptrdiff_t)(2 GB - 1));
-                ip = forwardIp;
-                forwardIp += step;
-                step = (searchMatchNb++ >> LZ4_skipTrigger);
-
-                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
-                assert(ip < mflimitPlusOne);
-
-                if (dictDirective == usingDictCtx) {
-                    if (matchIndex < startIndex) {
-                        /* there was no match, try the dictionary */
-                        assert(tableType == byU32);
-                        matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
-                        match = dictBase + matchIndex;
-                        matchIndex += dictDelta;   /* make dictCtx index comparable with current context */
-                        lowLimit = dictionary;
-                    } else {
-                        match = base + matchIndex;
-                        lowLimit = (const BYTE*)source;
-                    }
-                } else if (dictDirective == usingExtDict) {
-                    if (matchIndex < startIndex) {
-                        DEBUGLOG(7, "extDict candidate: matchIndex=%5u  <  startIndex=%5u", matchIndex, startIndex);
-                        assert(startIndex - matchIndex >= MINMATCH);
-                        assert(dictBase);
-                        match = dictBase + matchIndex;
-                        lowLimit = dictionary;
-                    } else {
-                        match = base + matchIndex;
-                        lowLimit = (const BYTE*)source;
-                    }
-                } else {   /* single continuous memory segment */
-                    match = base + matchIndex;
-                }
-                forwardH = LZ4_hashPosition(forwardIp, tableType);
-                LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
-
-                DEBUGLOG(7, "candidate at pos=%u  (offset=%u \n", matchIndex, current - matchIndex);
-                if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; }    /* match outside of valid area */
-                assert(matchIndex < current);
-                if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX))
-                  && (matchIndex+LZ4_DISTANCE_MAX < current)) {
-                    continue;
-                } /* too far */
-                assert((current - matchIndex) <= LZ4_DISTANCE_MAX);  /* match now expected within distance */
-
-                if (LZ4_read32(match) == LZ4_read32(ip)) {
-                    if (maybe_extMem) offset = current - matchIndex;
-                    break;   /* match found */
-                }
-
-            } while(1);
-        }
-
-        /* Catch up */
-        filledIp = ip;
-        assert(ip > anchor); /* this is always true as ip has been advanced before entering the main loop */
-        if ((match > lowLimit) && unlikely(ip[-1] == match[-1])) {
-            do { ip--; match--; } while (((ip > anchor) & (match > lowLimit)) && (unlikely(ip[-1] == match[-1])));
-        }
-
-        /* Encode Literals */
-        {   unsigned const litLength = (unsigned)(ip - anchor);
-            token = op++;
-            if ((outputDirective == limitedOutput) &&  /* Check output buffer overflow */
-                (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) {
-                return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
-            }
-            if ((outputDirective == fillOutput) &&
-                (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
-                op--;
-                goto _last_literals;
-            }
-            if (litLength >= RUN_MASK) {
-                int len = (int)(litLength - RUN_MASK);
-                *token = (RUN_MASK<<ML_BITS);
-                for(; len >= 255 ; len-=255) *op++ = 255;
-                *op++ = (BYTE)len;
-            }
-            else *token = (BYTE)(litLength<<ML_BITS);
-
-            /* Copy Literals */
-            LZ4_wildCopy8(op, anchor, op+litLength);
-            op+=litLength;
-            DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
-                        (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source));
-        }
-
-_next_match:
-        /* at this stage, the following variables must be correctly set :
-         * - ip : at start of LZ operation
-         * - match : at start of previous pattern occurrence; can be within current prefix, or within extDict
-         * - offset : if maybe_ext_memSegment==1 (constant)
-         * - lowLimit : must be == dictionary to mean "match is within extDict"; must be == source otherwise
-         * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
-         */
-
-        if ((outputDirective == fillOutput) &&
-            (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) {
-            /* the match was too close to the end, rewind and go to last literals */
-            op = token;
-            goto _last_literals;
-        }
-
-        /* Encode Offset */
-        if (maybe_extMem) {   /* static test */
-            DEBUGLOG(6, "             with offset=%u  (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
-            assert(offset <= LZ4_DISTANCE_MAX && offset > 0);
-            LZ4_writeLE16(op, (U16)offset); op+=2;
-        } else  {
-            DEBUGLOG(6, "             with offset=%u  (same segment)", (U32)(ip - match));
-            assert(ip-match <= LZ4_DISTANCE_MAX);
-            LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
-        }
-
-        /* Encode MatchLength */
-        {   unsigned matchCode;
-
-            if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx)
-              && (lowLimit==dictionary) /* match within extDict */ ) {
-                const BYTE* limit = ip + (dictEnd-match);
-                assert(dictEnd > match);
-                if (limit > matchlimit) limit = matchlimit;
-                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
-                ip += (size_t)matchCode + MINMATCH;
-                if (ip==limit) {
-                    unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
-                    matchCode += more;
-                    ip += more;
-                }
-                DEBUGLOG(6, "             with matchLength=%u starting in extDict", matchCode+MINMATCH);
-            } else {
-                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
-                ip += (size_t)matchCode + MINMATCH;
-                DEBUGLOG(6, "             with matchLength=%u", matchCode+MINMATCH);
-            }
-
-            if ((outputDirective) &&    /* Check output buffer overflow */
-                (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) {
-                if (outputDirective == fillOutput) {
-                    /* Match description too long : reduce it */
-                    U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255;
-                    ip -= matchCode - newMatchCode;
-                    assert(newMatchCode < matchCode);
-                    matchCode = newMatchCode;
-                    if (unlikely(ip <= filledIp)) {
-                        /* We have already filled up to filledIp so if ip ends up less than filledIp
-                         * we have positions in the hash table beyond the current position. This is
-                         * a problem if we reuse the hash table. So we have to remove these positions
-                         * from the hash table.
-                         */
-                        const BYTE* ptr;
-                        DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip));
-                        for (ptr = ip; ptr <= filledIp; ++ptr) {
-                            U32 const h = LZ4_hashPosition(ptr, tableType);
-                            LZ4_clearHash(h, cctx->hashTable, tableType);
-                        }
-                    }
-                } else {
-                    assert(outputDirective == limitedOutput);
-                    return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
-                }
-            }
-            if (matchCode >= ML_MASK) {
-                *token += ML_MASK;
-                matchCode -= ML_MASK;
-                LZ4_write32(op, 0xFFFFFFFF);
-                while (matchCode >= 4*255) {
-                    op+=4;
-                    LZ4_write32(op, 0xFFFFFFFF);
-                    matchCode -= 4*255;
-                }
-                op += matchCode / 255;
-                *op++ = (BYTE)(matchCode % 255);
-            } else
-                *token += (BYTE)(matchCode);
-        }
-        /* Ensure we have enough space for the last literals. */
-        assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit));
-
-        anchor = ip;
-
-        /* Test end of chunk */
-        if (ip >= mflimitPlusOne) break;
-
-        /* Fill table */
-        {   U32 const h = LZ4_hashPosition(ip-2, tableType);
-            if (tableType == byPtr) {
-                LZ4_putPositionOnHash(ip-2, h, cctx->hashTable, byPtr);
-            } else {
-                U32 const idx = (U32)((ip-2) - base);
-                LZ4_putIndexOnHash(idx, h, cctx->hashTable, tableType);
-        }   }
-
-        /* Test next position */
-        if (tableType == byPtr) {
-
-            match = LZ4_getPosition(ip, cctx->hashTable, tableType);
-            LZ4_putPosition(ip, cctx->hashTable, tableType);
-            if ( (match+LZ4_DISTANCE_MAX >= ip)
-              && (LZ4_read32(match) == LZ4_read32(ip)) )
-            { token=op++; *token=0; goto _next_match; }
-
-        } else {   /* byU32, byU16 */
-
-            U32 const h = LZ4_hashPosition(ip, tableType);
-            U32 const current = (U32)(ip-base);
-            U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
-            assert(matchIndex < current);
-            if (dictDirective == usingDictCtx) {
-                if (matchIndex < startIndex) {
-                    /* there was no match, try the dictionary */
-                    assert(tableType == byU32);
-                    matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
-                    match = dictBase + matchIndex;
-                    lowLimit = dictionary;   /* required for match length counter */
-                    matchIndex += dictDelta;
-                } else {
-                    match = base + matchIndex;
-                    lowLimit = (const BYTE*)source;  /* required for match length counter */
-                }
-            } else if (dictDirective==usingExtDict) {
-                if (matchIndex < startIndex) {
-                    assert(dictBase);
-                    match = dictBase + matchIndex;
-                    lowLimit = dictionary;   /* required for match length counter */
-                } else {
-                    match = base + matchIndex;
-                    lowLimit = (const BYTE*)source;   /* required for match length counter */
-                }
-            } else {   /* single memory segment */
-                match = base + matchIndex;
-            }
-            LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
-            assert(matchIndex < current);
-            if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
-              && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
-              && (LZ4_read32(match) == LZ4_read32(ip)) ) {
-                token=op++;
-                *token=0;
-                if (maybe_extMem) offset = current - matchIndex;
-                DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
-                            (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source));
-                goto _next_match;
-            }
-        }
-
-        /* Prepare next loop */
-        forwardH = LZ4_hashPosition(++ip, tableType);
-
-    }
-
-_last_literals:
-    /* Encode Last Literals */
-    {   size_t lastRun = (size_t)(iend - anchor);
-        if ( (outputDirective) &&  /* Check output buffer overflow */
-            (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) {
-            if (outputDirective == fillOutput) {
-                /* adapt lastRun to fill 'dst' */
-                assert(olimit >= op);
-                lastRun  = (size_t)(olimit-op) - 1/*token*/;
-                lastRun -= (lastRun + 256 - RUN_MASK) / 256;  /*additional length tokens*/
-            } else {
-                assert(outputDirective == limitedOutput);
-                return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
-            }
-        }
-        DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun);
-        if (lastRun >= RUN_MASK) {
-            size_t accumulator = lastRun - RUN_MASK;
-            *op++ = RUN_MASK << ML_BITS;
-            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
-            *op++ = (BYTE) accumulator;
-        } else {
-            *op++ = (BYTE)(lastRun<<ML_BITS);
-        }
-        LZ4_memcpy(op, anchor, lastRun);
-        ip = anchor + lastRun;
-        op += lastRun;
-    }
-
-    if (outputDirective == fillOutput) {
-        *inputConsumed = (int) (((const char*)ip)-source);
-    }
-    result = (int)(((char*)op) - dest);
-    assert(result > 0);
-    DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result);
-    return result;
-}
-
-/** LZ4_compress_generic() :
- *  inlined, to ensure branches are decided at compilation time;
- *  takes care of src == (NULL, 0)
- *  and forward the rest to LZ4_compress_generic_validated */
-LZ4_FORCE_INLINE int LZ4_compress_generic(
-                 LZ4_stream_t_internal* const cctx,
-                 const char* const src,
-                 char* const dst,
-                 const int srcSize,
-                 int *inputConsumed, /* only written when outputDirective == fillOutput */
-                 const int dstCapacity,
-                 const limitedOutput_directive outputDirective,
-                 const tableType_t tableType,
-                 const dict_directive dictDirective,
-                 const dictIssue_directive dictIssue,
-                 const int acceleration)
-{
-    DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i",
-                srcSize, dstCapacity);
-
-    if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; }  /* Unsupported srcSize, too large (or negative) */
-    if (srcSize == 0) {   /* src == NULL supported if srcSize == 0 */
-        if (outputDirective != notLimited && dstCapacity <= 0) return 0;  /* no output, can't write anything */
-        DEBUGLOG(5, "Generating an empty block");
-        assert(outputDirective == notLimited || dstCapacity >= 1);
-        assert(dst != NULL);
-        dst[0] = 0;
-        if (outputDirective == fillOutput) {
-            assert (inputConsumed != NULL);
-            *inputConsumed = 0;
-        }
-        return 1;
-    }
-    assert(src != NULL);
-
-    return LZ4_compress_generic_validated(cctx, src, dst, srcSize,
-                inputConsumed, /* only written into if outputDirective == fillOutput */
-                dstCapacity, outputDirective,
-                tableType, dictDirective, dictIssue, acceleration);
-}
-
-
-int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
-{
-    LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse;
-    assert(ctx != NULL);
-    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
-    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
-    if (maxOutputSize >= LZ4_compressBound(inputSize)) {
-        if (inputSize < LZ4_64Klimit) {
-            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
-        } else {
-            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
-            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
-        }
-    } else {
-        if (inputSize < LZ4_64Klimit) {
-            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
-        } else {
-            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
-            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
-        }
-    }
-}
-
-/**
- * LZ4_compress_fast_extState_fastReset() :
- * A variant of LZ4_compress_fast_extState().
- *
- * Using this variant avoids an expensive initialization step. It is only safe
- * to call if the state buffer is known to be correctly initialized already
- * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of
- * "correctly initialized").
- */
-int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
-{
-    LZ4_stream_t_internal* const ctx = &((LZ4_stream_t*)state)->internal_donotuse;
-    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
-    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
-    assert(ctx != NULL);
-
-    if (dstCapacity >= LZ4_compressBound(srcSize)) {
-        if (srcSize < LZ4_64Klimit) {
-            const tableType_t tableType = byU16;
-            LZ4_prepareTable(ctx, srcSize, tableType);
-            if (ctx->currentOffset) {
-                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration);
-            } else {
-                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
-            }
-        } else {
-            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
-            LZ4_prepareTable(ctx, srcSize, tableType);
-            return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
-        }
-    } else {
-        if (srcSize < LZ4_64Klimit) {
-            const tableType_t tableType = byU16;
-            LZ4_prepareTable(ctx, srcSize, tableType);
-            if (ctx->currentOffset) {
-                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration);
-            } else {
-                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
-            }
-        } else {
-            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
-            LZ4_prepareTable(ctx, srcSize, tableType);
-            return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
-        }
-    }
-}
-
-
-int LZ4_compress_fast(const char* src, char* dest, int srcSize, int dstCapacity, int acceleration)
-{
-    int result;
-#if (LZ4_HEAPMODE)
-    LZ4_stream_t* const ctxPtr = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
-    if (ctxPtr == NULL) return 0;
-#else
-    LZ4_stream_t ctx;
-    LZ4_stream_t* const ctxPtr = &ctx;
-#endif
-    result = LZ4_compress_fast_extState(ctxPtr, src, dest, srcSize, dstCapacity, acceleration);
-
-#if (LZ4_HEAPMODE)
-    FREEMEM(ctxPtr);
-#endif
-    return result;
-}
-
-
-int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity)
-{
-    return LZ4_compress_fast(src, dst, srcSize, dstCapacity, 1);
-}
-
-
-/* Note!: This function leaves the stream in an unclean/broken state!
- * It is not safe to subsequently use the same state with a _fastReset() or
- * _continue() call without resetting it. */
-static int LZ4_compress_destSize_extState_internal(LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int acceleration)
-{
-    void* const s = LZ4_initStream(state, sizeof (*state));
-    assert(s != NULL); (void)s;
-
-    if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {  /* compression success is guaranteed */
-        return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, acceleration);
-    } else {
-        if (*srcSizePtr < LZ4_64Klimit) {
-            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, acceleration);
-        } else {
-            tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
-            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, acceleration);
-    }   }
-}
-
-int LZ4_compress_destSize_extState(void* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int acceleration)
-{
-    int const r = LZ4_compress_destSize_extState_internal((LZ4_stream_t*)state, src, dst, srcSizePtr, targetDstSize, acceleration);
-    /* clean the state on exit */
-    LZ4_initStream(state, sizeof (LZ4_stream_t));
-    return r;
-}
-
-
-int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
-{
-#if (LZ4_HEAPMODE)
-    LZ4_stream_t* const ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
-    if (ctx == NULL) return 0;
-#else
-    LZ4_stream_t ctxBody;
-    LZ4_stream_t* const ctx = &ctxBody;
-#endif
-
-    int result = LZ4_compress_destSize_extState_internal(ctx, src, dst, srcSizePtr, targetDstSize, 1);
-
-#if (LZ4_HEAPMODE)
-    FREEMEM(ctx);
-#endif
-    return result;
-}
-
-
-
-/*-******************************
-*  Streaming functions
-********************************/
-
-#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
-LZ4_stream_t* LZ4_createStream(void)
-{
-    LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
-    LZ4_STATIC_ASSERT(sizeof(LZ4_stream_t) >= sizeof(LZ4_stream_t_internal));
-    DEBUGLOG(4, "LZ4_createStream %p", lz4s);
-    if (lz4s == NULL) return NULL;
-    LZ4_initStream(lz4s, sizeof(*lz4s));
-    return lz4s;
-}
-#endif
-
-static size_t LZ4_stream_t_alignment(void)
-{
-#if LZ4_ALIGN_TEST
-    typedef struct { char c; LZ4_stream_t t; } t_a;
-    return sizeof(t_a) - sizeof(LZ4_stream_t);
-#else
-    return 1;  /* effectively disabled */
-#endif
-}
-
-LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
-{
-    DEBUGLOG(5, "LZ4_initStream");
-    if (buffer == NULL) { return NULL; }
-    if (size < sizeof(LZ4_stream_t)) { return NULL; }
-    if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL;
-    MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal));
-    return (LZ4_stream_t*)buffer;
-}
-
-/* resetStream is now deprecated,
- * prefer initStream() which is more general */
-void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
-{
-    DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
-    MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal));
-}
-
-void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
-    LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32);
-}
-
-#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
-int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
-{
-    if (!LZ4_stream) return 0;   /* support free on NULL */
-    DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream);
-    FREEMEM(LZ4_stream);
-    return (0);
-}
-#endif
-
-
-#define HASH_UNIT sizeof(reg_t)
-int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
-{
-    LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
-    const tableType_t tableType = byU32;
-    const BYTE* p = (const BYTE*)dictionary;
-    const BYTE* const dictEnd = p + dictSize;
-    U32 idx32;
-
-    DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict);
-
-    /* It's necessary to reset the context,
-     * and not just continue it with prepareTable()
-     * to avoid any risk of generating overflowing matchIndex
-     * when compressing using this dictionary */
-    LZ4_resetStream(LZ4_dict);
-
-    /* We always increment the offset by 64 KB, since, if the dict is longer,
-     * we truncate it to the last 64k, and if it's shorter, we still want to
-     * advance by a whole window length so we can provide the guarantee that
-     * there are only valid offsets in the window, which allows an optimization
-     * in LZ4_compress_fast_continue() where it uses noDictIssue even when the
-     * dictionary isn't a full 64k. */
-    dict->currentOffset += 64 KB;
-
-    if (dictSize < (int)HASH_UNIT) {
-        return 0;
-    }
-
-    if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
-    dict->dictionary = p;
-    dict->dictSize = (U32)(dictEnd - p);
-    dict->tableType = (U32)tableType;
-    idx32 = dict->currentOffset - dict->dictSize;
-
-    while (p <= dictEnd-HASH_UNIT) {
-        U32 const h = LZ4_hashPosition(p, tableType);
-        LZ4_putIndexOnHash(idx32, h, dict->hashTable, tableType);
-        p+=3; idx32+=3;
-    }
-
-    return (int)dict->dictSize;
-}
-
-void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream)
-{
-    const LZ4_stream_t_internal* dictCtx = (dictionaryStream == NULL) ? NULL :
-        &(dictionaryStream->internal_donotuse);
-
-    DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)",
-             workingStream, dictionaryStream,
-             dictCtx != NULL ? dictCtx->dictSize : 0);
-
-    if (dictCtx != NULL) {
-        /* If the current offset is zero, we will never look in the
-         * external dictionary context, since there is no value a table
-         * entry can take that indicate a miss. In that case, we need
-         * to bump the offset to something non-zero.
-         */
-        if (workingStream->internal_donotuse.currentOffset == 0) {
-            workingStream->internal_donotuse.currentOffset = 64 KB;
-        }
-
-        /* Don't actually attach an empty dictionary.
-         */
-        if (dictCtx->dictSize == 0) {
-            dictCtx = NULL;
-        }
-    }
-    workingStream->internal_donotuse.dictCtx = dictCtx;
-}
-
-
-static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
-{
-    assert(nextSize >= 0);
-    if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) {   /* potential ptrdiff_t overflow (32-bits mode) */
-        /* rescale hash table */
-        U32 const delta = LZ4_dict->currentOffset - 64 KB;
-        const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
-        int i;
-        DEBUGLOG(4, "LZ4_renormDictT");
-        for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
-            if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
-            else LZ4_dict->hashTable[i] -= delta;
-        }
-        LZ4_dict->currentOffset = 64 KB;
-        if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
-        LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
-    }
-}
-
-
-int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
-                                const char* source, char* dest,
-                                int inputSize, int maxOutputSize,
-                                int acceleration)
-{
-    const tableType_t tableType = byU32;
-    LZ4_stream_t_internal* const streamPtr = &LZ4_stream->internal_donotuse;
-    const char* dictEnd = streamPtr->dictSize ? (const char*)streamPtr->dictionary + streamPtr->dictSize : NULL;
-
-    DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i, dictSize=%u)", inputSize, streamPtr->dictSize);
-
-    LZ4_renormDictT(streamPtr, inputSize);   /* fix index overflow */
-    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
-    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
-
-    /* invalidate tiny dictionaries */
-    if ( (streamPtr->dictSize < 4)     /* tiny dictionary : not enough for a hash */
-      && (dictEnd != source)           /* prefix mode */
-      && (inputSize > 0)               /* tolerance : don't lose history, in case next invocation would use prefix mode */
-      && (streamPtr->dictCtx == NULL)  /* usingDictCtx */
-      ) {
-        DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
-        /* remove dictionary existence from history, to employ faster prefix mode */
-        streamPtr->dictSize = 0;
-        streamPtr->dictionary = (const BYTE*)source;
-        dictEnd = source;
-    }
-
-    /* Check overlapping input/dictionary space */
-    {   const char* const sourceEnd = source + inputSize;
-        if ((sourceEnd > (const char*)streamPtr->dictionary) && (sourceEnd < dictEnd)) {
-            streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
-            if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
-            if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
-            streamPtr->dictionary = (const BYTE*)dictEnd - streamPtr->dictSize;
-        }
-    }
-
-    /* prefix mode : source data follows dictionary */
-    if (dictEnd == source) {
-        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
-            return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration);
-        else
-            return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration);
-    }
-
-    /* external dictionary mode */
-    {   int result;
-        if (streamPtr->dictCtx) {
-            /* We depend here on the fact that dictCtx'es (produced by
-             * LZ4_loadDict) guarantee that their tables contain no references
-             * to offsets between dictCtx->currentOffset - 64 KB and
-             * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe
-             * to use noDictIssue even when the dict isn't a full 64 KB.
-             */
-            if (inputSize > 4 KB) {
-                /* For compressing large blobs, it is faster to pay the setup
-                 * cost to copy the dictionary's tables into the active context,
-                 * so that the compression loop is only looking into one table.
-                 */
-                LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr));
-                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
-            } else {
-                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
-            }
-        } else {  /* small data <= 4 KB */
-            if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
-                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration);
-            } else {
-                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
-            }
-        }
-        streamPtr->dictionary = (const BYTE*)source;
-        streamPtr->dictSize = (U32)inputSize;
-        return result;
-    }
-}
-
-
-/* Hidden debug function, to force-test external dictionary mode */
-int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize)
-{
-    LZ4_stream_t_internal* const streamPtr = &LZ4_dict->internal_donotuse;
-    int result;
-
-    LZ4_renormDictT(streamPtr, srcSize);
-
-    if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
-        result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1);
-    } else {
-        result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
-    }
-
-    streamPtr->dictionary = (const BYTE*)source;
-    streamPtr->dictSize = (U32)srcSize;
-
-    return result;
-}
-
-
-/*! LZ4_saveDict() :
- *  If previously compressed data block is not guaranteed to remain available at its memory location,
- *  save it into a safer place (char* safeBuffer).
- *  Note : no need to call LZ4_loadDict() afterwards, dictionary is immediately usable,
- *         one can therefore call LZ4_compress_fast_continue() right after.
- * @return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
- */
-int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
-{
-    LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
-
-    DEBUGLOG(5, "LZ4_saveDict : dictSize=%i, safeBuffer=%p", dictSize, safeBuffer);
-
-    if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
-    if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }
-
-    if (safeBuffer == NULL) assert(dictSize == 0);
-    if (dictSize > 0) {
-        const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
-        assert(dict->dictionary);
-        LZ4_memmove(safeBuffer, previousDictEnd - dictSize, (size_t)dictSize);
-    }
-
-    dict->dictionary = (const BYTE*)safeBuffer;
-    dict->dictSize = (U32)dictSize;
-
-    return dictSize;
-}
-
-
-
-/*-*******************************
- *  Decompression functions
- ********************************/
-
-typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
-
-#undef MIN
-#define MIN(a,b)    ( (a) < (b) ? (a) : (b) )
-
-
-/* variant for decompress_unsafe()
- * does not know end of input
- * presumes input is well formed
- * note : will consume at least one byte */
-static size_t read_long_length_no_check(const BYTE** pp)
-{
-    size_t b, l = 0;
-    do { b = **pp; (*pp)++; l += b; } while (b==255);
-    DEBUGLOG(6, "read_long_length_no_check: +length=%zu using %zu input bytes", l, l/255 + 1)
-    return l;
-}
-
-/* core decoder variant for LZ4_decompress_fast*()
- * for legacy support only : these entry points are deprecated.
- * - Presumes input is correctly formed (no defense vs malformed inputs)
- * - Does not know input size (presume input buffer is "large enough")
- * - Decompress a full block (only)
- * @return : nb of bytes read from input.
- * Note : this variant is not optimized for speed, just for maintenance.
- *        the goal is to remove support of decompress_fast*() variants by v2.0
-**/
-LZ4_FORCE_INLINE int
-LZ4_decompress_unsafe_generic(
-                 const BYTE* const istart,
-                 BYTE* const ostart,
-                 int decompressedSize,
-
-                 size_t prefixSize,
-                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
-                 const size_t dictSize         /* note: =0 if dictStart==NULL */
-                 )
-{
-    const BYTE* ip = istart;
-    BYTE* op = (BYTE*)ostart;
-    BYTE* const oend = ostart + decompressedSize;
-    const BYTE* const prefixStart = ostart - prefixSize;
-
-    DEBUGLOG(5, "LZ4_decompress_unsafe_generic");
-    if (dictStart == NULL) assert(dictSize == 0);
-
-    while (1) {
-        /* start new sequence */
-        unsigned token = *ip++;
-
-        /* literals */
-        {   size_t ll = token >> ML_BITS;
-            if (ll==15) {
-                /* long literal length */
-                ll += read_long_length_no_check(&ip);
-            }
-            if ((size_t)(oend-op) < ll) return -1; /* output buffer overflow */
-            LZ4_memmove(op, ip, ll); /* support in-place decompression */
-            op += ll;
-            ip += ll;
-            if ((size_t)(oend-op) < MFLIMIT) {
-                if (op==oend) break;  /* end of block */
-                DEBUGLOG(5, "invalid: literals end at distance %zi from end of block", oend-op);
-                /* incorrect end of block :
-                 * last match must start at least MFLIMIT==12 bytes before end of output block */
-                return -1;
-        }   }
-
-        /* match */
-        {   size_t ml = token & 15;
-            size_t const offset = LZ4_readLE16(ip);
-            ip+=2;
-
-            if (ml==15) {
-                /* long literal length */
-                ml += read_long_length_no_check(&ip);
-            }
-            ml += MINMATCH;
-
-            if ((size_t)(oend-op) < ml) return -1; /* output buffer overflow */
-
-            {   const BYTE* match = op - offset;
-
-                /* out of range */
-                if (offset > (size_t)(op - prefixStart) + dictSize) {
-                    DEBUGLOG(6, "offset out of range");
-                    return -1;
-                }
-
-                /* check special case : extDict */
-                if (offset > (size_t)(op - prefixStart)) {
-                    /* extDict scenario */
-                    const BYTE* const dictEnd = dictStart + dictSize;
-                    const BYTE* extMatch = dictEnd - (offset - (size_t)(op-prefixStart));
-                    size_t const extml = (size_t)(dictEnd - extMatch);
-                    if (extml > ml) {
-                        /* match entirely within extDict */
-                        LZ4_memmove(op, extMatch, ml);
-                        op += ml;
-                        ml = 0;
-                    } else {
-                        /* match split between extDict & prefix */
-                        LZ4_memmove(op, extMatch, extml);
-                        op += extml;
-                        ml -= extml;
-                    }
-                    match = prefixStart;
-                }
-
-                /* match copy - slow variant, supporting overlap copy */
-                {   size_t u;
-                    for (u=0; u<ml; u++) {
-                        op[u] = match[u];
-            }   }   }
-            op += ml;
-            if ((size_t)(oend-op) < LASTLITERALS) {
-                DEBUGLOG(5, "invalid: match ends at distance %zi from end of block", oend-op);
-                /* incorrect end of block :
-                 * last match must stop at least LASTLITERALS==5 bytes before end of output block */
-                return -1;
-            }
-        } /* match */
-    } /* main loop */
-    return (int)(ip - istart);
-}
-
-
-/* Read the variable-length literal or match length.
- *
- * @ip : input pointer
- * @ilimit : position after which if length is not decoded, the input is necessarily corrupted.
- * @initial_check - check ip >= ipmax before start of loop.  Returns initial_error if so.
- * @error (output) - error code.  Must be set to 0 before call.
-**/
-typedef size_t Rvl_t;
-static const Rvl_t rvl_error = (Rvl_t)(-1);
-LZ4_FORCE_INLINE Rvl_t
-read_variable_length(const BYTE** ip, const BYTE* ilimit,
-                     int initial_check)
-{
-    Rvl_t s, length = 0;
-    assert(ip != NULL);
-    assert(*ip !=  NULL);
-    assert(ilimit != NULL);
-    if (initial_check && unlikely((*ip) >= ilimit)) {    /* read limit reached */
-        return rvl_error;
-    }
-    s = **ip;
-    (*ip)++;
-    length += s;
-    if (unlikely((*ip) > ilimit)) {    /* read limit reached */
-        return rvl_error;
-    }
-    /* accumulator overflow detection (32-bit mode only) */
-    if ((sizeof(length) < 8) && unlikely(length > ((Rvl_t)(-1)/2)) ) {
-        return rvl_error;
-    }
-    if (likely(s != 255)) return length;
-    do {
-        s = **ip;
-        (*ip)++;
-        length += s;
-        if (unlikely((*ip) > ilimit)) {    /* read limit reached */
-            return rvl_error;
-        }
-        /* accumulator overflow detection (32-bit mode only) */
-        if ((sizeof(length) < 8) && unlikely(length > ((Rvl_t)(-1)/2)) ) {
-            return rvl_error;
-        }
-    } while (s == 255);
-
-    return length;
-}
-
-/*! LZ4_decompress_generic() :
- *  This generic decompression function covers all use cases.
- *  It shall be instantiated several times, using different sets of directives.
- *  Note that it is important for performance that this function really get inlined,
- *  in order to remove useless branches during compilation optimization.
- */
-LZ4_FORCE_INLINE int
-LZ4_decompress_generic(
-                 const char* const src,
-                 char* const dst,
-                 int srcSize,
-                 int outputSize,         /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
-
-                 earlyEnd_directive partialDecoding,  /* full, partial */
-                 dict_directive dict,                 /* noDict, withPrefix64k, usingExtDict */
-                 const BYTE* const lowPrefix,  /* always <= dst, == dst when no prefix */
-                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
-                 const size_t dictSize         /* note : = 0 if noDict */
-                 )
-{
-    if ((src == NULL) || (outputSize < 0)) { return -1; }
-
-    {   const BYTE* ip = (const BYTE*) src;
-        const BYTE* const iend = ip + srcSize;
-
-        BYTE* op = (BYTE*) dst;
-        BYTE* const oend = op + outputSize;
-        BYTE* cpy;
-
-        const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
-
-        const int checkOffset = (dictSize < (int)(64 KB));
-
-
-        /* Set up the "end" pointers for the shortcut. */
-        const BYTE* const shortiend = iend - 14 /*maxLL*/ - 2 /*offset*/;
-        const BYTE* const shortoend = oend - 14 /*maxLL*/ - 18 /*maxML*/;
-
-        const BYTE* match;
-        size_t offset;
-        unsigned token;
-        size_t length;
-
-
-        DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
-
-        /* Special cases */
-        assert(lowPrefix <= op);
-        if (unlikely(outputSize==0)) {
-            /* Empty output buffer */
-            if (partialDecoding) return 0;
-            return ((srcSize==1) && (*ip==0)) ? 0 : -1;
-        }
-        if (unlikely(srcSize==0)) { return -1; }
-
-    /* LZ4_FAST_DEC_LOOP:
-     * designed for modern OoO performance cpus,
-     * where copying reliably 32-bytes is preferable to an unpredictable branch.
-     * note : fast loop may show a regression for some client arm chips. */
-#if LZ4_FAST_DEC_LOOP
-        if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
-            DEBUGLOG(6, "skip fast decode loop");
-            goto safe_decode;
-        }
-
-        /* Fast loop : decode sequences as long as output < oend-FASTLOOP_SAFE_DISTANCE */
-        DEBUGLOG(6, "using fast decode loop");
-        while (1) {
-            /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
-            assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
-            assert(ip < iend);
-            token = *ip++;
-            length = token >> ML_BITS;  /* literal length */
-
-            /* decode literal length */
-            if (length == RUN_MASK) {
-                size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1);
-                if (addl == rvl_error) {
-                    DEBUGLOG(6, "error reading long literal length");
-                    goto _output_error;
-                }
-                length += addl;
-                if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
-                if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
-
-                /* copy literals */
-                LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
-                if ((op+length>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
-                LZ4_wildCopy32(op, ip, op+length);
-                ip += length; op += length;
-            } else if (ip <= iend-(16 + 1/*max lit + offset + nextToken*/)) {
-                /* We don't need to check oend, since we check it once for each loop below */
-                DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
-                /* Literals can only be <= 14, but hope compilers optimize better when copy by a register size */
-                LZ4_memcpy(op, ip, 16);
-                ip += length; op += length;
-            } else {
-                goto safe_literal_copy;
-            }
-
-            /* get offset */
-            offset = LZ4_readLE16(ip); ip+=2;
-            DEBUGLOG(6, " offset = %zu", offset);
-            match = op - offset;
-            assert(match <= op);  /* overflow check */
-
-            /* get matchlength */
-            length = token & ML_MASK;
-
-            if (length == ML_MASK) {
-                size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0);
-                if (addl == rvl_error) {
-                    DEBUGLOG(6, "error reading long match length");
-                    goto _output_error;
-                }
-                length += addl;
-                length += MINMATCH;
-                if (unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
-                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
-                    goto safe_match_copy;
-                }
-            } else {
-                length += MINMATCH;
-                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
-                    goto safe_match_copy;
-                }
-
-                /* Fastpath check: skip LZ4_wildCopy32 when true */
-                if ((dict == withPrefix64k) || (match >= lowPrefix)) {
-                    if (offset >= 8) {
-                        assert(match >= lowPrefix);
-                        assert(match <= op);
-                        assert(op + 18 <= oend);
-
-                        LZ4_memcpy(op, match, 8);
-                        LZ4_memcpy(op+8, match+8, 8);
-                        LZ4_memcpy(op+16, match+16, 2);
-                        op += length;
-                        continue;
-            }   }   }
-
-            if ( checkOffset && (unlikely(match + dictSize < lowPrefix)) ) {
-                DEBUGLOG(6, "Error : pos=%zi, offset=%zi => outside buffers", op-lowPrefix, op-match);
-                goto _output_error;
-            }
-            /* match starting within external dictionary */
-            if ((dict==usingExtDict) && (match < lowPrefix)) {
-                assert(dictEnd != NULL);
-                if (unlikely(op+length > oend-LASTLITERALS)) {
-                    if (partialDecoding) {
-                        DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
-                        length = MIN(length, (size_t)(oend-op));
-                    } else {
-                        DEBUGLOG(6, "end-of-block condition violated")
-                        goto _output_error;
-                }   }
-
-                if (length <= (size_t)(lowPrefix-match)) {
-                    /* match fits entirely within external dictionary : just copy */
-                    LZ4_memmove(op, dictEnd - (lowPrefix-match), length);
-                    op += length;
-                } else {
-                    /* match stretches into both external dictionary and current block */
-                    size_t const copySize = (size_t)(lowPrefix - match);
-                    size_t const restSize = length - copySize;
-                    LZ4_memcpy(op, dictEnd - copySize, copySize);
-                    op += copySize;
-                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
-                        BYTE* const endOfMatch = op + restSize;
-                        const BYTE* copyFrom = lowPrefix;
-                        while (op < endOfMatch) { *op++ = *copyFrom++; }
-                    } else {
-                        LZ4_memcpy(op, lowPrefix, restSize);
-                        op += restSize;
-                }   }
-                continue;
-            }
-
-            /* copy match within block */
-            cpy = op + length;
-
-            assert((op <= oend) && (oend-op >= 32));
-            if (unlikely(offset<16)) {
-                LZ4_memcpy_using_offset(op, match, cpy, offset);
-            } else {
-                LZ4_wildCopy32(op, match, cpy);
-            }
-
-            op = cpy;   /* wildcopy correction */
-        }
-    safe_decode:
-#endif
-
-        /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
-        DEBUGLOG(6, "using safe decode loop");
-        while (1) {
-            assert(ip < iend);
-            token = *ip++;
-            length = token >> ML_BITS;  /* literal length */
-
-            /* A two-stage shortcut for the most common case:
-             * 1) If the literal length is 0..14, and there is enough space,
-             * enter the shortcut and copy 16 bytes on behalf of the literals
-             * (in the fast mode, only 8 bytes can be safely copied this way).
-             * 2) Further if the match length is 4..18, copy 18 bytes in a similar
-             * manner; but we ensure that there's enough space in the output for
-             * those 18 bytes earlier, upon entering the shortcut (in other words,
-             * there is a combined check for both stages).
-             */
-            if ( (length != RUN_MASK)
-                /* strictly "less than" on input, to re-enter the loop with at least one byte */
-              && likely((ip < shortiend) & (op <= shortoend)) ) {
-                /* Copy the literals */
-                LZ4_memcpy(op, ip, 16);
-                op += length; ip += length;
-
-                /* The second stage: prepare for match copying, decode full info.
-                 * If it doesn't work out, the info won't be wasted. */
-                length = token & ML_MASK; /* match length */
-                offset = LZ4_readLE16(ip); ip += 2;
-                match = op - offset;
-                assert(match <= op); /* check overflow */
-
-                /* Do not deal with overlapping matches. */
-                if ( (length != ML_MASK)
-                  && (offset >= 8)
-                  && (dict==withPrefix64k || match >= lowPrefix) ) {
-                    /* Copy the match. */
-                    LZ4_memcpy(op + 0, match + 0, 8);
-                    LZ4_memcpy(op + 8, match + 8, 8);
-                    LZ4_memcpy(op +16, match +16, 2);
-                    op += length + MINMATCH;
-                    /* Both stages worked, load the next token. */
-                    continue;
-                }
-
-                /* The second stage didn't work out, but the info is ready.
-                 * Propel it right to the point of match copying. */
-                goto _copy_match;
-            }
-
-            /* decode literal length */
-            if (length == RUN_MASK) {
-                size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1);
-                if (addl == rvl_error) { goto _output_error; }
-                length += addl;
-                if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
-                if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
-            }
-
-#if LZ4_FAST_DEC_LOOP
-        safe_literal_copy:
-#endif
-            /* copy literals */
-            cpy = op+length;
-
-            LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
-            if ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) {
-                /* We've either hit the input parsing restriction or the output parsing restriction.
-                 * In the normal scenario, decoding a full block, it must be the last sequence,
-                 * otherwise it's an error (invalid input or dimensions).
-                 * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
-                 */
-                if (partialDecoding) {
-                    /* Since we are partial decoding we may be in this block because of the output parsing
-                     * restriction, which is not valid since the output buffer is allowed to be undersized.
-                     */
-                    DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
-                    DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
-                    DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
-                    DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
-                    /* Finishing in the middle of a literals segment,
-                     * due to lack of input.
-                     */
-                    if (ip+length > iend) {
-                        length = (size_t)(iend-ip);
-                        cpy = op + length;
-                    }
-                    /* Finishing in the middle of a literals segment,
-                     * due to lack of output space.
-                     */
-                    if (cpy > oend) {
-                        cpy = oend;
-                        assert(op<=oend);
-                        length = (size_t)(oend-op);
-                    }
-                } else {
-                     /* We must be on the last sequence (or invalid) because of the parsing limitations
-                      * so check that we exactly consume the input and don't overrun the output buffer.
-                      */
-                    if ((ip+length != iend) || (cpy > oend)) {
-                        DEBUGLOG(6, "should have been last run of literals")
-                        DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
-                        DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
-                        goto _output_error;
-                    }
-                }
-                LZ4_memmove(op, ip, length);  /* supports overlapping memory regions, for in-place decompression scenarios */
-                ip += length;
-                op += length;
-                /* Necessarily EOF when !partialDecoding.
-                 * When partialDecoding, it is EOF if we've either
-                 * filled the output buffer or
-                 * can't proceed with reading an offset for following match.
-                 */
-                if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
-                    break;
-                }
-            } else {
-                LZ4_wildCopy8(op, ip, cpy);   /* can overwrite up to 8 bytes beyond cpy */
-                ip += length; op = cpy;
-            }
-
-            /* get offset */
-            offset = LZ4_readLE16(ip); ip+=2;
-            match = op - offset;
-
-            /* get matchlength */
-            length = token & ML_MASK;
-
-    _copy_match:
-            if (length == ML_MASK) {
-                size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0);
-                if (addl == rvl_error) { goto _output_error; }
-                length += addl;
-                if (unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
-            }
-            length += MINMATCH;
-
-#if LZ4_FAST_DEC_LOOP
-        safe_match_copy:
-#endif
-            if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
-            /* match starting within external dictionary */
-            if ((dict==usingExtDict) && (match < lowPrefix)) {
-                assert(dictEnd != NULL);
-                if (unlikely(op+length > oend-LASTLITERALS)) {
-                    if (partialDecoding) length = MIN(length, (size_t)(oend-op));
-                    else goto _output_error;   /* doesn't respect parsing restriction */
-                }
-
-                if (length <= (size_t)(lowPrefix-match)) {
-                    /* match fits entirely within external dictionary : just copy */
-                    LZ4_memmove(op, dictEnd - (lowPrefix-match), length);
-                    op += length;
-                } else {
-                    /* match stretches into both external dictionary and current block */
-                    size_t const copySize = (size_t)(lowPrefix - match);
-                    size_t const restSize = length - copySize;
-                    LZ4_memcpy(op, dictEnd - copySize, copySize);
-                    op += copySize;
-                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
-                        BYTE* const endOfMatch = op + restSize;
-                        const BYTE* copyFrom = lowPrefix;
-                        while (op < endOfMatch) *op++ = *copyFrom++;
-                    } else {
-                        LZ4_memcpy(op, lowPrefix, restSize);
-                        op += restSize;
-                }   }
-                continue;
-            }
-            assert(match >= lowPrefix);
-
-            /* copy match within block */
-            cpy = op + length;
-
-            /* partialDecoding : may end anywhere within the block */
-            assert(op<=oend);
-            if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
-                size_t const mlen = MIN(length, (size_t)(oend-op));
-                const BYTE* const matchEnd = match + mlen;
-                BYTE* const copyEnd = op + mlen;
-                if (matchEnd > op) {   /* overlap copy */
-                    while (op < copyEnd) { *op++ = *match++; }
-                } else {
-                    LZ4_memcpy(op, match, mlen);
-                }
-                op = copyEnd;
-                if (op == oend) { break; }
-                continue;
-            }
-
-            if (unlikely(offset<8)) {
-                LZ4_write32(op, 0);   /* silence msan warning when offset==0 */
-                op[0] = match[0];
-                op[1] = match[1];
-                op[2] = match[2];
-                op[3] = match[3];
-                match += inc32table[offset];
-                LZ4_memcpy(op+4, match, 4);
-                match -= dec64table[offset];
-            } else {
-                LZ4_memcpy(op, match, 8);
-                match += 8;
-            }
-            op += 8;
-
-            if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
-                BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
-                if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
-                if (op < oCopyLimit) {
-                    LZ4_wildCopy8(op, match, oCopyLimit);
-                    match += oCopyLimit - op;
-                    op = oCopyLimit;
-                }
-                while (op < cpy) { *op++ = *match++; }
-            } else {
-                LZ4_memcpy(op, match, 8);
-                if (length > 16)  { LZ4_wildCopy8(op+8, match+8, cpy); }
-            }
-            op = cpy;   /* wildcopy correction */
-        }
-
-        /* end of decoding */
-        DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
-        return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
-
-        /* Overflow error detected */
-    _output_error:
-        return (int) (-(((const char*)ip)-src))-1;
-    }
-}
-
-
-/*===== Instantiate the API decoding functions. =====*/
-
-LZ4_FORCE_O2
-int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
-{
-    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
-                                  decode_full_block, noDict,
-                                  (BYTE*)dest, NULL, 0);
-}
-
-LZ4_FORCE_O2
-int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
-{
-    dstCapacity = MIN(targetOutputSize, dstCapacity);
-    return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
-                                  partial_decode,
-                                  noDict, (BYTE*)dst, NULL, 0);
-}
-
-LZ4_FORCE_O2
-int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
-{
-    DEBUGLOG(5, "LZ4_decompress_fast");
-    return LZ4_decompress_unsafe_generic(
-                (const BYTE*)source, (BYTE*)dest, originalSize,
-                0, NULL, 0);
-}
-
-/*===== Instantiate a few more decoding cases, used more than once. =====*/
-
-LZ4_FORCE_O2 /* Exported, an obsolete API function. */
-int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
-{
-    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
-                                  decode_full_block, withPrefix64k,
-                                  (BYTE*)dest - 64 KB, NULL, 0);
-}
-
-LZ4_FORCE_O2
-static int LZ4_decompress_safe_partial_withPrefix64k(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity)
-{
-    dstCapacity = MIN(targetOutputSize, dstCapacity);
-    return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
-                                  partial_decode, withPrefix64k,
-                                  (BYTE*)dest - 64 KB, NULL, 0);
-}
-
-/* Another obsolete API function, paired with the previous one. */
-int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
-{
-    return LZ4_decompress_unsafe_generic(
-                (const BYTE*)source, (BYTE*)dest, originalSize,
-                64 KB, NULL, 0);
-}
-
-LZ4_FORCE_O2
-static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
-                                               size_t prefixSize)
-{
-    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
-                                  decode_full_block, noDict,
-                                  (BYTE*)dest-prefixSize, NULL, 0);
-}
-
-LZ4_FORCE_O2
-static int LZ4_decompress_safe_partial_withSmallPrefix(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity,
-                                               size_t prefixSize)
-{
-    dstCapacity = MIN(targetOutputSize, dstCapacity);
-    return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
-                                  partial_decode, noDict,
-                                  (BYTE*)dest-prefixSize, NULL, 0);
-}
-
-LZ4_FORCE_O2
-int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
-                                     int compressedSize, int maxOutputSize,
-                                     const void* dictStart, size_t dictSize)
-{
-    DEBUGLOG(5, "LZ4_decompress_safe_forceExtDict");
-    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
-                                  decode_full_block, usingExtDict,
-                                  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
-}
-
-LZ4_FORCE_O2
-int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest,
-                                     int compressedSize, int targetOutputSize, int dstCapacity,
-                                     const void* dictStart, size_t dictSize)
-{
-    dstCapacity = MIN(targetOutputSize, dstCapacity);
-    return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
-                                  partial_decode, usingExtDict,
-                                  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
-}
-
-LZ4_FORCE_O2
-static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
-                                       const void* dictStart, size_t dictSize)
-{
-    return LZ4_decompress_unsafe_generic(
-                (const BYTE*)source, (BYTE*)dest, originalSize,
-                0, (const BYTE*)dictStart, dictSize);
-}
-
-/* The "double dictionary" mode, for use with e.g. ring buffers: the first part
- * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
- * These routines are used only once, in LZ4_decompress_*_continue().
- */
-LZ4_FORCE_INLINE
-int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
-                                   size_t prefixSize, const void* dictStart, size_t dictSize)
-{
-    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
-                                  decode_full_block, usingExtDict,
-                                  (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
-}
-
-/*===== streaming decompression functions =====*/
-
-#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
-LZ4_streamDecode_t* LZ4_createStreamDecode(void)
-{
-    LZ4_STATIC_ASSERT(sizeof(LZ4_streamDecode_t) >= sizeof(LZ4_streamDecode_t_internal));
-    return (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
-}
-
-int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
-{
-    if (LZ4_stream == NULL) { return 0; }  /* support free on NULL */
-    FREEMEM(LZ4_stream);
-    return 0;
-}
-#endif
-
-/*! LZ4_setStreamDecode() :
- *  Use this function to instruct where to find the dictionary.
- *  This function is not necessary if previous data is still available where it was decoded.
- *  Loading a size of 0 is allowed (same effect as no dictionary).
- * @return : 1 if OK, 0 if error
- */
-int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
-{
-    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
-    lz4sd->prefixSize = (size_t)dictSize;
-    if (dictSize) {
-        assert(dictionary != NULL);
-        lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
-    } else {
-        lz4sd->prefixEnd = (const BYTE*) dictionary;
-    }
-    lz4sd->externalDict = NULL;
-    lz4sd->extDictSize  = 0;
-    return 1;
-}
-
-/*! LZ4_decoderRingBufferSize() :
- *  when setting a ring buffer for streaming decompression (optional scenario),
- *  provides the minimum size of this ring buffer
- *  to be compatible with any source respecting maxBlockSize condition.
- *  Note : in a ring buffer scenario,
- *  blocks are presumed decompressed next to each other.
- *  When not enough space remains for next block (remainingSize < maxBlockSize),
- *  decoding resumes from beginning of ring buffer.
- * @return : minimum ring buffer size,
- *           or 0 if there is an error (invalid maxBlockSize).
- */
-int LZ4_decoderRingBufferSize(int maxBlockSize)
-{
-    if (maxBlockSize < 0) return 0;
-    if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0;
-    if (maxBlockSize < 16) maxBlockSize = 16;
-    return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize);
-}
-
-/*
-*_continue() :
-    These decoding functions allow decompression of multiple blocks in "streaming" mode.
-    Previously decoded blocks must still be available at the memory position where they were decoded.
-    If it's not possible, save the relevant part of decoded data into a safe buffer,
-    and indicate where it stands using LZ4_setStreamDecode()
-*/
-LZ4_FORCE_O2
-int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
-{
-    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
-    int result;
-
-    if (lz4sd->prefixSize == 0) {
-        /* The first call, no dictionary yet. */
-        assert(lz4sd->extDictSize == 0);
-        result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
-        if (result <= 0) return result;
-        lz4sd->prefixSize = (size_t)result;
-        lz4sd->prefixEnd = (BYTE*)dest + result;
-    } else if (lz4sd->prefixEnd == (BYTE*)dest) {
-        /* They're rolling the current segment. */
-        if (lz4sd->prefixSize >= 64 KB - 1)
-            result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
-        else if (lz4sd->extDictSize == 0)
-            result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize,
-                                                         lz4sd->prefixSize);
-        else
-            result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize,
-                                                    lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
-        if (result <= 0) return result;
-        lz4sd->prefixSize += (size_t)result;
-        lz4sd->prefixEnd  += result;
-    } else {
-        /* The buffer wraps around, or they're switching to another buffer. */
-        lz4sd->extDictSize = lz4sd->prefixSize;
-        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
-        result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize,
-                                                  lz4sd->externalDict, lz4sd->extDictSize);
-        if (result <= 0) return result;
-        lz4sd->prefixSize = (size_t)result;
-        lz4sd->prefixEnd  = (BYTE*)dest + result;
-    }
-
-    return result;
-}
-
-LZ4_FORCE_O2 int
-LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode,
-                        const char* source, char* dest, int originalSize)
-{
-    LZ4_streamDecode_t_internal* const lz4sd =
-        (assert(LZ4_streamDecode!=NULL), &LZ4_streamDecode->internal_donotuse);
-    int result;
-
-    DEBUGLOG(5, "LZ4_decompress_fast_continue (toDecodeSize=%i)", originalSize);
-    assert(originalSize >= 0);
-
-    if (lz4sd->prefixSize == 0) {
-        DEBUGLOG(5, "first invocation : no prefix nor extDict");
-        assert(lz4sd->extDictSize == 0);
-        result = LZ4_decompress_fast(source, dest, originalSize);
-        if (result <= 0) return result;
-        lz4sd->prefixSize = (size_t)originalSize;
-        lz4sd->prefixEnd = (BYTE*)dest + originalSize;
-    } else if (lz4sd->prefixEnd == (BYTE*)dest) {
-        DEBUGLOG(5, "continue using existing prefix");
-        result = LZ4_decompress_unsafe_generic(
-                        (const BYTE*)source, (BYTE*)dest, originalSize,
-                        lz4sd->prefixSize,
-                        lz4sd->externalDict, lz4sd->extDictSize);
-        if (result <= 0) return result;
-        lz4sd->prefixSize += (size_t)originalSize;
-        lz4sd->prefixEnd  += originalSize;
-    } else {
-        DEBUGLOG(5, "prefix becomes extDict");
-        lz4sd->extDictSize = lz4sd->prefixSize;
-        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
-        result = LZ4_decompress_fast_extDict(source, dest, originalSize,
-                                             lz4sd->externalDict, lz4sd->extDictSize);
-        if (result <= 0) return result;
-        lz4sd->prefixSize = (size_t)originalSize;
-        lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
-    }
-
-    return result;
-}
-
-
-/*
-Advanced decoding functions :
-*_usingDict() :
-    These decoding functions work the same as "_continue" ones,
-    the dictionary must be explicitly provided within parameters
-*/
-
-int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
-{
-    if (dictSize==0)
-        return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
-    if (dictStart+dictSize == dest) {
-        if (dictSize >= 64 KB - 1) {
-            return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
-        }
-        assert(dictSize >= 0);
-        return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize);
-    }
-    assert(dictSize >= 0);
-    return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize);
-}
-
-int LZ4_decompress_safe_partial_usingDict(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity, const char* dictStart, int dictSize)
-{
-    if (dictSize==0)
-        return LZ4_decompress_safe_partial(source, dest, compressedSize, targetOutputSize, dstCapacity);
-    if (dictStart+dictSize == dest) {
-        if (dictSize >= 64 KB - 1) {
-            return LZ4_decompress_safe_partial_withPrefix64k(source, dest, compressedSize, targetOutputSize, dstCapacity);
-        }
-        assert(dictSize >= 0);
-        return LZ4_decompress_safe_partial_withSmallPrefix(source, dest, compressedSize, targetOutputSize, dstCapacity, (size_t)dictSize);
-    }
-    assert(dictSize >= 0);
-    return LZ4_decompress_safe_partial_forceExtDict(source, dest, compressedSize, targetOutputSize, dstCapacity, dictStart, (size_t)dictSize);
-}
-
-int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
-{
-    if (dictSize==0 || dictStart+dictSize == dest)
-        return LZ4_decompress_unsafe_generic(
-                        (const BYTE*)source, (BYTE*)dest, originalSize,
-                        (size_t)dictSize, NULL, 0);
-    assert(dictSize >= 0);
-    return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize);
-}
-
-
-/*=*************************************************
-*  Obsolete Functions
-***************************************************/
-/* obsolete compression functions */
-int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
-{
-    return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
-}
-int LZ4_compress(const char* src, char* dest, int srcSize)
-{
-    return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize));
-}
-int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize)
-{
-    return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1);
-}
-int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize)
-{
-    return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1);
-}
-int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity)
-{
-    return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1);
-}
-int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize)
-{
-    return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1);
-}
-
-/*
-These decompression functions are deprecated and should no longer be used.
-They are only provided here for compatibility with older user programs.
-- LZ4_uncompress is totally equivalent to LZ4_decompress_fast
-- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
-*/
-int LZ4_uncompress (const char* source, char* dest, int outputSize)
-{
-    return LZ4_decompress_fast(source, dest, outputSize);
-}
-int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize)
-{
-    return LZ4_decompress_safe(source, dest, isize, maxOutputSize);
-}
-
-/* Obsolete Streaming functions */
-
-int LZ4_sizeofStreamState(void) { return sizeof(LZ4_stream_t); }
-
-int LZ4_resetStreamState(void* state, char* inputBuffer)
-{
-    (void)inputBuffer;
-    LZ4_resetStream((LZ4_stream_t*)state);
-    return 0;
-}
-
-#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
-void* LZ4_create (char* inputBuffer)
-{
-    (void)inputBuffer;
-    return LZ4_createStream();
-}
-#endif
-
-char* LZ4_slideInputBuffer (void* state)
-{
-    /* avoid const char * -> char * conversion warning */
-    return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary;
-}
-
-#endif   /* LZ4_COMMONDEFS_ONLY */
diff --git a/include/gtkwave/lz4.h b/include/gtkwave/lz4.h
deleted file mode 100644
index 7a2dbfd4b..000000000
--- a/include/gtkwave/lz4.h
+++ /dev/null
@@ -1,868 +0,0 @@
-/*
- *  LZ4 - Fast LZ compression algorithm
- *  Header File
- *  Copyright (C) 2011-2023, Yann Collet.
-
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-    - LZ4 homepage : http://www.lz4.org
-    - LZ4 source repository : https://github.com/lz4/lz4
-*/
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-#ifndef LZ4_H_2983827168210
-#define LZ4_H_2983827168210
-
-/* --- Dependency --- */
-#include <stddef.h>   /* size_t */
-
-
-/**
-  Introduction
-
-  LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core,
-  scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
-  multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
-
-  The LZ4 compression library provides in-memory compression and decompression functions.
-  It gives full buffer control to user.
-  Compression can be done in:
-    - a single step (described as Simple Functions)
-    - a single step, reusing a context (described in Advanced Functions)
-    - unbounded multiple steps (described as Streaming compression)
-
-  lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md).
-  Decompressing such a compressed block requires additional metadata.
-  Exact metadata depends on exact decompression function.
-  For the typical case of LZ4_decompress_safe(),
-  metadata includes block's compressed size, and maximum bound of decompressed size.
-  Each application is free to encode and pass such metadata in whichever way it wants.
-
-  lz4.h only handle blocks, it can not generate Frames.
-
-  Blocks are different from Frames (doc/lz4_Frame_format.md).
-  Frames bundle both blocks and metadata in a specified manner.
-  Embedding metadata is required for compressed data to be self-contained and portable.
-  Frame format is delivered through a companion API, declared in lz4frame.h.
-  The `lz4` CLI can only manage frames.
-*/
-
-/*^***************************************************************
-*  Export parameters
-*****************************************************************/
-/*
-*  LZ4_DLL_EXPORT :
-*  Enable exporting of functions when building a Windows DLL
-*  LZ4LIB_VISIBILITY :
-*  Control library symbols visibility.
-*/
-#ifndef LZ4LIB_VISIBILITY
-#  if defined(__GNUC__) && (__GNUC__ >= 4)
-#    define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default")))
-#  else
-#    define LZ4LIB_VISIBILITY
-#  endif
-#endif
-#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
-#  define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY
-#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
-#  define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
-#else
-#  define LZ4LIB_API LZ4LIB_VISIBILITY
-#endif
-
-/*! LZ4_FREESTANDING :
- *  When this macro is set to 1, it enables "freestanding mode" that is
- *  suitable for typical freestanding environment which doesn't support
- *  standard C library.
- *
- *  - LZ4_FREESTANDING is a compile-time switch.
- *  - It requires the following macros to be defined:
- *    LZ4_memcpy, LZ4_memmove, LZ4_memset.
- *  - It only enables LZ4/HC functions which don't use heap.
- *    All LZ4F_* functions are not supported.
- *  - See tests/freestanding.c to check its basic setup.
- */
-#if defined(LZ4_FREESTANDING) && (LZ4_FREESTANDING == 1)
-#  define LZ4_HEAPMODE 0
-#  define LZ4HC_HEAPMODE 0
-#  define LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION 1
-#  if !defined(LZ4_memcpy)
-#    error "LZ4_FREESTANDING requires macro 'LZ4_memcpy'."
-#  endif
-#  if !defined(LZ4_memset)
-#    error "LZ4_FREESTANDING requires macro 'LZ4_memset'."
-#  endif
-#  if !defined(LZ4_memmove)
-#    error "LZ4_FREESTANDING requires macro 'LZ4_memmove'."
-#  endif
-#elif ! defined(LZ4_FREESTANDING)
-#  define LZ4_FREESTANDING 0
-#endif
-
-
-/*------   Version   ------*/
-#define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
-#define LZ4_VERSION_MINOR    9    /* for new (non-breaking) interface capabilities */
-#define LZ4_VERSION_RELEASE  5    /* for tweaks, bug-fixes, or development */
-
-#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
-
-#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
-#define LZ4_QUOTE(str) #str
-#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
-#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)  /* requires v1.7.3+ */
-
-LZ4LIB_API int LZ4_versionNumber (void);  /**< library version number; useful to check dll version; requires v1.3.0+ */
-LZ4LIB_API const char* LZ4_versionString (void);   /**< library version string; useful to check dll version; requires v1.7.5+ */
-
-
-/*-************************************
-*  Tuning memory usage
-**************************************/
-/*!
- * LZ4_MEMORY_USAGE :
- * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB)
- * Increasing memory usage improves compression ratio, generally at the cost of speed.
- * Reduced memory usage may improve speed at the cost of ratio, thanks to better cache locality.
- * Default value is 14, for 16KB, which nicely fits into most L1 caches.
- */
-#ifndef LZ4_MEMORY_USAGE
-# define LZ4_MEMORY_USAGE LZ4_MEMORY_USAGE_DEFAULT
-#endif
-
-#define LZ4_MEMORY_USAGE_MIN 10
-#define LZ4_MEMORY_USAGE_DEFAULT 14
-#define LZ4_MEMORY_USAGE_MAX 20
-
-#if (LZ4_MEMORY_USAGE < LZ4_MEMORY_USAGE_MIN)
-#  error "LZ4_MEMORY_USAGE is too small !"
-#endif
-
-#if (LZ4_MEMORY_USAGE > LZ4_MEMORY_USAGE_MAX)
-#  error "LZ4_MEMORY_USAGE is too large !"
-#endif
-
-/*-************************************
-*  Simple Functions
-**************************************/
-/*! LZ4_compress_default() :
- *  Compresses 'srcSize' bytes from buffer 'src'
- *  into already allocated 'dst' buffer of size 'dstCapacity'.
- *  Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
- *  It also runs faster, so it's a recommended setting.
- *  If the function cannot compress 'src' into a more limited 'dst' budget,
- *  compression stops *immediately*, and the function result is zero.
- *  In which case, 'dst' content is undefined (invalid).
- *      srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
- *      dstCapacity : size of buffer 'dst' (which must be already allocated)
- *     @return  : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
- *                or 0 if compression fails
- * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
- */
-LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
-
-/*! LZ4_decompress_safe() :
- * @compressedSize : is the exact complete size of the compressed block.
- * @dstCapacity : is the size of destination buffer (which must be already allocated),
- *                presumed an upper bound of decompressed size.
- * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
- *           If destination buffer is not large enough, decoding will stop and output an error code (negative value).
- *           If the source stream is detected malformed, the function will stop decoding and return a negative result.
- * Note 1 : This function is protected against malicious data packets :
- *          it will never writes outside 'dst' buffer, nor read outside 'source' buffer,
- *          even if the compressed block is maliciously modified to order the decoder to do these actions.
- *          In such case, the decoder stops immediately, and considers the compressed block malformed.
- * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them.
- *          The implementation is free to send / store / derive this information in whichever way is most beneficial.
- *          If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead.
- */
-LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
-
-
-/*-************************************
-*  Advanced Functions
-**************************************/
-#define LZ4_MAX_INPUT_SIZE        0x7E000000   /* 2 113 929 216 bytes */
-#define LZ4_COMPRESSBOUND(isize)  ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
-
-/*! LZ4_compressBound() :
-    Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
-    This function is primarily useful for memory allocation purposes (destination buffer size).
-    Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
-    Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize)
-        inputSize  : max supported value is LZ4_MAX_INPUT_SIZE
-        return : maximum output size in a "worst case" scenario
-              or 0, if input size is incorrect (too large or negative)
-*/
-LZ4LIB_API int LZ4_compressBound(int inputSize);
-
-/*! LZ4_compress_fast() :
-    Same as LZ4_compress_default(), but allows selection of "acceleration" factor.
-    The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
-    It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
-    An acceleration value of "1" is the same as regular LZ4_compress_default()
-    Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c).
-    Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c).
-*/
-LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
-
-
-/*! LZ4_compress_fast_extState() :
- *  Same as LZ4_compress_fast(), using an externally allocated memory space for its state.
- *  Use LZ4_sizeofState() to know how much memory must be allocated,
- *  and allocate it on 8-bytes boundaries (using `malloc()` typically).
- *  Then, provide this buffer as `void* state` to compression function.
- */
-LZ4LIB_API int LZ4_sizeofState(void);
-LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
-
-/*! LZ4_compress_destSize() :
- *  Reverse the logic : compresses as much data as possible from 'src' buffer
- *  into already allocated buffer 'dst', of size >= 'dstCapacity'.
- *  This function either compresses the entire 'src' content into 'dst' if it's large enough,
- *  or fill 'dst' buffer completely with as much data as possible from 'src'.
- *  note: acceleration parameter is fixed to "default".
- *
- * *srcSizePtr : in+out parameter. Initially contains size of input.
- *               Will be modified to indicate how many bytes where read from 'src' to fill 'dst'.
- *               New value is necessarily <= input value.
- * @return : Nb bytes written into 'dst' (necessarily <= dstCapacity)
- *           or 0 if compression fails.
- *
- * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed in v1.9.2+):
- *        the produced compressed content could, in specific circumstances,
- *        require to be decompressed into a destination buffer larger
- *        by at least 1 byte than the content to decompress.
- *        If an application uses `LZ4_compress_destSize()`,
- *        it's highly recommended to update liblz4 to v1.9.2 or better.
- *        If this can't be done or ensured,
- *        the receiving decompression function should provide
- *        a dstCapacity which is > decompressedSize, by at least 1 byte.
- *        See https://github.com/lz4/lz4/issues/859 for details
- */
-LZ4LIB_API int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize);
-
-/*! LZ4_decompress_safe_partial() :
- *  Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
- *  into destination buffer 'dst' of size 'dstCapacity'.
- *  Up to 'targetOutputSize' bytes will be decoded.
- *  The function stops decoding on reaching this objective.
- *  This can be useful to boost performance
- *  whenever only the beginning of a block is required.
- *
- * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
- *           If source stream is detected malformed, function returns a negative result.
- *
- *  Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
- *
- *  Note 2 : targetOutputSize must be <= dstCapacity
- *
- *  Note 3 : this function effectively stops decoding on reaching targetOutputSize,
- *           so dstCapacity is kind of redundant.
- *           This is because in older versions of this function,
- *           decoding operation would still write complete sequences.
- *           Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
- *           it could write more bytes, though only up to dstCapacity.
- *           Some "margin" used to be required for this operation to work properly.
- *           Thankfully, this is no longer necessary.
- *           The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
- *
- *  Note 4 : If srcSize is the exact size of the block,
- *           then targetOutputSize can be any value,
- *           including larger than the block's decompressed size.
- *           The function will, at most, generate block's decompressed size.
- *
- *  Note 5 : If srcSize is _larger_ than block's compressed size,
- *           then targetOutputSize **MUST** be <= block's decompressed size.
- *           Otherwise, *silent corruption will occur*.
- */
-LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
-
-
-/*-*********************************************
-*  Streaming Compression Functions
-***********************************************/
-typedef union LZ4_stream_u LZ4_stream_t;  /* incomplete type (defined later) */
-
-/*!
- Note about RC_INVOKED
-
- - RC_INVOKED is predefined symbol of rc.exe (the resource compiler which is part of MSVC/Visual Studio).
-   https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros
-
- - Since rc.exe is a legacy compiler, it truncates long symbol (> 30 chars)
-   and reports warning "RC4011: identifier truncated".
-
- - To eliminate the warning, we surround long preprocessor symbol with
-   "#if !defined(RC_INVOKED) ... #endif" block that means
-   "skip this block when rc.exe is trying to read it".
-*/
-#if !defined(RC_INVOKED) /* https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros */
-#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
-LZ4LIB_API LZ4_stream_t* LZ4_createStream(void);
-LZ4LIB_API int           LZ4_freeStream (LZ4_stream_t* streamPtr);
-#endif /* !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) */
-#endif
-
-/*! LZ4_resetStream_fast() : v1.9.0+
- *  Use this to prepare an LZ4_stream_t for a new chain of dependent blocks
- *  (e.g., LZ4_compress_fast_continue()).
- *
- *  An LZ4_stream_t must be initialized once before usage.
- *  This is automatically done when created by LZ4_createStream().
- *  However, should the LZ4_stream_t be simply declared on stack (for example),
- *  it's necessary to initialize it first, using LZ4_initStream().
- *
- *  After init, start any new stream with LZ4_resetStream_fast().
- *  A same LZ4_stream_t can be re-used multiple times consecutively
- *  and compress multiple streams,
- *  provided that it starts each new stream with LZ4_resetStream_fast().
- *
- *  LZ4_resetStream_fast() is much faster than LZ4_initStream(),
- *  but is not compatible with memory regions containing garbage data.
- *
- *  Note: it's only useful to call LZ4_resetStream_fast()
- *        in the context of streaming compression.
- *        The *extState* functions perform their own resets.
- *        Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive.
- */
-LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
-
-/*! LZ4_loadDict() :
- *  Use this function to reference a static dictionary into LZ4_stream_t.
- *  The dictionary must remain available during compression.
- *  LZ4_loadDict() triggers a reset, so any previous data will be forgotten.
- *  The same dictionary will have to be loaded on decompression side for successful decoding.
- *  Dictionary are useful for better compression of small data (KB range).
- *  While LZ4 itself accepts any input as dictionary, dictionary efficiency is also a topic.
- *  When in doubt, employ the Zstandard's Dictionary Builder.
- *  Loading a size of 0 is allowed, and is the same as reset.
- * @return : loaded dictionary size, in bytes (note: only the last 64 KB are loaded)
- */
-LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
-
-/*! LZ4_compress_fast_continue() :
- *  Compress 'src' content using data from previously compressed blocks, for better compression ratio.
- * 'dst' buffer must be already allocated.
- *  If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
- *
- * @return : size of compressed block
- *           or 0 if there is an error (typically, cannot fit into 'dst').
- *
- *  Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block.
- *           Each block has precise boundaries.
- *           Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata.
- *           It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together.
- *
- *  Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory !
- *
- *  Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB.
- *           Make sure that buffers are separated, by at least one byte.
- *           This construction ensures that each block only depends on previous block.
- *
- *  Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
- *
- *  Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed.
- */
-LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
-
-/*! LZ4_saveDict() :
- *  If last 64KB data cannot be guaranteed to remain available at its current memory location,
- *  save it into a safer place (char* safeBuffer).
- *  This is schematically equivalent to a memcpy() followed by LZ4_loadDict(),
- *  but is much faster, because LZ4_saveDict() doesn't need to rebuild tables.
- * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error.
- */
-LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize);
-
-
-/*-**********************************************
-*  Streaming Decompression Functions
-*  Bufferless synchronous API
-************************************************/
-typedef union LZ4_streamDecode_u LZ4_streamDecode_t;   /* tracking context */
-
-/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() :
- *  creation / destruction of streaming decompression tracking context.
- *  A tracking context can be re-used multiple times.
- */
-#if !defined(RC_INVOKED) /* https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros */
-#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
-LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void);
-LZ4LIB_API int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
-#endif /* !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) */
-#endif
-
-/*! LZ4_setStreamDecode() :
- *  An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
- *  Use this function to start decompression of a new stream of blocks.
- *  A dictionary can optionally be set. Use NULL or size 0 for a reset order.
- *  Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
- * @return : 1 if OK, 0 if error
- */
-LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
-
-/*! LZ4_decoderRingBufferSize() : v1.8.2+
- *  Note : in a ring buffer scenario (optional),
- *  blocks are presumed decompressed next to each other
- *  up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
- *  at which stage it resumes from beginning of ring buffer.
- *  When setting such a ring buffer for streaming decompression,
- *  provides the minimum size of this ring buffer
- *  to be compatible with any source respecting maxBlockSize condition.
- * @return : minimum ring buffer size,
- *           or 0 if there is an error (invalid maxBlockSize).
- */
-LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize);
-#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize))  /* for static allocation; maxBlockSize presumed valid */
-
-/*! LZ4_decompress_safe_continue() :
- *  This decoding function allows decompression of consecutive blocks in "streaming" mode.
- *  The difference with the usual independent blocks is that
- *  new blocks are allowed to find references into former blocks.
- *  A block is an unsplittable entity, and must be presented entirely to the decompression function.
- *  LZ4_decompress_safe_continue() only accepts one block at a time.
- *  It's modeled after `LZ4_decompress_safe()` and behaves similarly.
- *
- * @LZ4_streamDecode : decompression state, tracking the position in memory of past data
- * @compressedSize : exact complete size of one compressed block.
- * @dstCapacity : size of destination buffer (which must be already allocated),
- *                must be an upper bound of decompressed size.
- * @return : number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
- *           If destination buffer is not large enough, decoding will stop and output an error code (negative value).
- *           If the source stream is detected malformed, the function will stop decoding and return a negative result.
- *
- *  The last 64KB of previously decoded data *must* remain available and unmodified
- *  at the memory position where they were previously decoded.
- *  If less than 64KB of data has been decoded, all the data must be present.
- *
- *  Special : if decompression side sets a ring buffer, it must respect one of the following conditions :
- *  - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize).
- *    maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes.
- *    In which case, encoding and decoding buffers do not need to be synchronized.
- *    Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize.
- *  - Synchronized mode :
- *    Decompression buffer size is _exactly_ the same as compression buffer size,
- *    and follows exactly same update rule (block boundaries at same positions),
- *    and decoding function is provided with exact decompressed size of each block (exception for last block of the stream),
- *    _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB).
- *  - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
- *    In which case, encoding and decoding buffers do not need to be synchronized,
- *    and encoding ring buffer can have any size, including small ones ( < 64 KB).
- *
- *  Whenever these conditions are not possible,
- *  save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression,
- *  then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
-*/
-LZ4LIB_API int
-LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode,
-                        const char* src, char* dst,
-                        int srcSize, int dstCapacity);
-
-
-/*! LZ4_decompress_safe_usingDict() :
- *  Works the same as
- *  a combination of LZ4_setStreamDecode() followed by LZ4_decompress_safe_continue()
- *  However, it's stateless: it doesn't need any LZ4_streamDecode_t state.
- *  Dictionary is presumed stable : it must remain accessible and unmodified during decompression.
- *  Performance tip : Decompression speed can be substantially increased
- *                    when dst == dictStart + dictSize.
- */
-LZ4LIB_API int
-LZ4_decompress_safe_usingDict(const char* src, char* dst,
-                              int srcSize, int dstCapacity,
-                              const char* dictStart, int dictSize);
-
-/*! LZ4_decompress_safe_partial_usingDict() :
- *  Behaves the same as LZ4_decompress_safe_partial()
- *  with the added ability to specify a memory segment for past data.
- *  Performance tip : Decompression speed can be substantially increased
- *                    when dst == dictStart + dictSize.
- */
-LZ4LIB_API int
-LZ4_decompress_safe_partial_usingDict(const char* src, char* dst,
-                                      int compressedSize,
-                                      int targetOutputSize, int maxOutputSize,
-                                      const char* dictStart, int dictSize);
-
-#endif /* LZ4_H_2983827168210 */
-
-
-/*^*************************************
- * !!!!!!   STATIC LINKING ONLY   !!!!!!
- ***************************************/
-
-/*-****************************************************************************
- * Experimental section
- *
- * Symbols declared in this section must be considered unstable. Their
- * signatures or semantics may change, or they may be removed altogether in the
- * future. They are therefore only safe to depend on when the caller is
- * statically linked against the library.
- *
- * To protect against unsafe usage, not only are the declarations guarded,
- * the definitions are hidden by default
- * when building LZ4 as a shared/dynamic library.
- *
- * In order to access these declarations,
- * define LZ4_STATIC_LINKING_ONLY in your application
- * before including LZ4's headers.
- *
- * In order to make their implementations accessible dynamically, you must
- * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library.
- ******************************************************************************/
-
-#ifdef LZ4_STATIC_LINKING_ONLY
-
-#ifndef LZ4_STATIC_3504398509
-#define LZ4_STATIC_3504398509
-
-#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS
-# define LZ4LIB_STATIC_API LZ4LIB_API
-#else
-# define LZ4LIB_STATIC_API
-#endif
-
-
-/*! LZ4_compress_fast_extState_fastReset() :
- *  A variant of LZ4_compress_fast_extState().
- *
- *  Using this variant avoids an expensive initialization step.
- *  It is only safe to call if the state buffer is known to be correctly initialized already
- *  (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized").
- *  From a high level, the difference is that
- *  this function initializes the provided state with a call to something like LZ4_resetStream_fast()
- *  while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream().
- */
-LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
-
-/*! LZ4_compress_destSize_extState() :
- *  Same as LZ4_compress_destSize(), but using an externally allocated state.
- *  Also: exposes @acceleration
- */
-int LZ4_compress_destSize_extState(void* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int acceleration);
-
-/*! LZ4_attach_dictionary() :
- *  This is an experimental API that allows
- *  efficient use of a static dictionary many times.
- *
- *  Rather than re-loading the dictionary buffer into a working context before
- *  each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
- *  working LZ4_stream_t, this function introduces a no-copy setup mechanism,
- *  in which the working stream references the dictionary stream in-place.
- *
- *  Several assumptions are made about the state of the dictionary stream.
- *  Currently, only streams which have been prepared by LZ4_loadDict() should
- *  be expected to work.
- *
- *  Alternatively, the provided dictionaryStream may be NULL,
- *  in which case any existing dictionary stream is unset.
- *
- *  If a dictionary is provided, it replaces any pre-existing stream history.
- *  The dictionary contents are the only history that can be referenced and
- *  logically immediately precede the data compressed in the first subsequent
- *  compression call.
- *
- *  The dictionary will only remain attached to the working stream through the
- *  first compression call, at the end of which it is cleared. The dictionary
- *  stream (and source buffer) must remain in-place / accessible / unchanged
- *  through the completion of the first compression call on the stream.
- */
-LZ4LIB_STATIC_API void
-LZ4_attach_dictionary(LZ4_stream_t* workingStream,
-                const LZ4_stream_t* dictionaryStream);
-
-
-/*! In-place compression and decompression
- *
- * It's possible to have input and output sharing the same buffer,
- * for highly constrained memory environments.
- * In both cases, it requires input to lay at the end of the buffer,
- * and decompression to start at beginning of the buffer.
- * Buffer size must feature some margin, hence be larger than final size.
- *
- * |<------------------------buffer--------------------------------->|
- *                             |<-----------compressed data--------->|
- * |<-----------decompressed size------------------>|
- *                                                  |<----margin---->|
- *
- * This technique is more useful for decompression,
- * since decompressed size is typically larger,
- * and margin is short.
- *
- * In-place decompression will work inside any buffer
- * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize).
- * This presumes that decompressedSize > compressedSize.
- * Otherwise, it means compression actually expanded data,
- * and it would be more efficient to store such data with a flag indicating it's not compressed.
- * This can happen when data is not compressible (already compressed, or encrypted).
- *
- * For in-place compression, margin is larger, as it must be able to cope with both
- * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX,
- * and data expansion, which can happen when input is not compressible.
- * As a consequence, buffer size requirements are much higher,
- * and memory savings offered by in-place compression are more limited.
- *
- * There are ways to limit this cost for compression :
- * - Reduce history size, by modifying LZ4_DISTANCE_MAX.
- *   Note that it is a compile-time constant, so all compressions will apply this limit.
- *   Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX,
- *   so it's a reasonable trick when inputs are known to be small.
- * - Require the compressor to deliver a "maximum compressed size".
- *   This is the `dstCapacity` parameter in `LZ4_compress*()`.
- *   When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail,
- *   in which case, the return code will be 0 (zero).
- *   The caller must be ready for these cases to happen,
- *   and typically design a backup scheme to send data uncompressed.
- * The combination of both techniques can significantly reduce
- * the amount of margin required for in-place compression.
- *
- * In-place compression can work in any buffer
- * which size is >= (maxCompressedSize)
- * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success.
- * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX,
- * so it's possible to reduce memory requirements by playing with them.
- */
-
-#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize)          (((compressedSize) >> 8) + 32)
-#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize)   ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize))  /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */
-
-#ifndef LZ4_DISTANCE_MAX   /* history window size; can be user-defined at compile time */
-#  define LZ4_DISTANCE_MAX 65535   /* set to maximum value by default */
-#endif
-
-#define LZ4_COMPRESS_INPLACE_MARGIN                           (LZ4_DISTANCE_MAX + 32)   /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */
-#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize)   ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN)  /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
-
-#endif   /* LZ4_STATIC_3504398509 */
-#endif   /* LZ4_STATIC_LINKING_ONLY */
-
-
-
-#ifndef LZ4_H_98237428734687
-#define LZ4_H_98237428734687
-
-/*-************************************************************
- *  Private Definitions
- **************************************************************
- * Do not use these definitions directly.
- * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
- * Accessing members will expose user code to API and/or ABI break in future versions of the library.
- **************************************************************/
-#define LZ4_HASHLOG   (LZ4_MEMORY_USAGE-2)
-#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
-#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)       /* required as macro for static allocation */
-
-#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-# include <stdint.h>
-  typedef  int8_t  LZ4_i8;
-  typedef uint8_t  LZ4_byte;
-  typedef uint16_t LZ4_u16;
-  typedef uint32_t LZ4_u32;
-#else
-  typedef   signed char  LZ4_i8;
-  typedef unsigned char  LZ4_byte;
-  typedef unsigned short LZ4_u16;
-  typedef unsigned int   LZ4_u32;
-#endif
-
-/*! LZ4_stream_t :
- *  Never ever use below internal definitions directly !
- *  These definitions are not API/ABI safe, and may change in future versions.
- *  If you need static allocation, declare or allocate an LZ4_stream_t object.
-**/
-
-typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
-struct LZ4_stream_t_internal {
-    LZ4_u32 hashTable[LZ4_HASH_SIZE_U32];
-    const LZ4_byte* dictionary;
-    const LZ4_stream_t_internal* dictCtx;
-    LZ4_u32 currentOffset;
-    LZ4_u32 tableType;
-    LZ4_u32 dictSize;
-    /* Implicit padding to ensure structure is aligned */
-};
-
-#define LZ4_STREAM_MINSIZE  ((1UL << (LZ4_MEMORY_USAGE)) + 32)  /* static size, for inter-version compatibility */
-union LZ4_stream_u {
-    char minStateSize[LZ4_STREAM_MINSIZE];
-    LZ4_stream_t_internal internal_donotuse;
-}; /* previously typedef'd to LZ4_stream_t */
-
-
-/*! LZ4_initStream() : v1.9.0+
- *  An LZ4_stream_t structure must be initialized at least once.
- *  This is automatically done when invoking LZ4_createStream(),
- *  but it's not when the structure is simply declared on stack (for example).
- *
- *  Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t.
- *  It can also initialize any arbitrary buffer of sufficient size,
- *  and will @return a pointer of proper type upon initialization.
- *
- *  Note : initialization fails if size and alignment conditions are not respected.
- *         In which case, the function will @return NULL.
- *  Note2: An LZ4_stream_t structure guarantees correct alignment and size.
- *  Note3: Before v1.9.0, use LZ4_resetStream() instead
-**/
-LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* stateBuffer, size_t size);
-
-
-/*! LZ4_streamDecode_t :
- *  Never ever use below internal definitions directly !
- *  These definitions are not API/ABI safe, and may change in future versions.
- *  If you need static allocation, declare or allocate an LZ4_streamDecode_t object.
-**/
-typedef struct {
-    const LZ4_byte* externalDict;
-    const LZ4_byte* prefixEnd;
-    size_t extDictSize;
-    size_t prefixSize;
-} LZ4_streamDecode_t_internal;
-
-#define LZ4_STREAMDECODE_MINSIZE 32
-union LZ4_streamDecode_u {
-    char minStateSize[LZ4_STREAMDECODE_MINSIZE];
-    LZ4_streamDecode_t_internal internal_donotuse;
-} ;   /* previously typedef'd to LZ4_streamDecode_t */
-
-
-
-/*-************************************
-*  Obsolete Functions
-**************************************/
-
-/*! Deprecation warnings
- *
- *  Deprecated functions make the compiler generate a warning when invoked.
- *  This is meant to invite users to update their source code.
- *  Should deprecation warnings be a problem, it is generally possible to disable them,
- *  typically with -Wno-deprecated-declarations for gcc
- *  or _CRT_SECURE_NO_WARNINGS in Visual.
- *
- *  Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS
- *  before including the header file.
- */
-#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
-#  define LZ4_DEPRECATED(message)   /* disable deprecation warnings */
-#else
-#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
-#    define LZ4_DEPRECATED(message) [[deprecated(message)]]
-#  elif defined(_MSC_VER)
-#    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
-#  elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45))
-#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
-#  elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31)
-#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
-#  else
-#    pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler")
-#    define LZ4_DEPRECATED(message)   /* disabled */
-#  endif
-#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
-
-/*! Obsolete compression functions (since v1.7.3) */
-LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress               (const char* src, char* dest, int srcSize);
-LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize);
-LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
-LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
-LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
-LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
-
-/*! Obsolete decompression functions (since v1.8.0) */
-LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize);
-LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
-
-/* Obsolete streaming functions (since v1.7.0)
- * degraded functionality; do not use!
- *
- * In order to perform streaming compression, these functions depended on data
- * that is no longer tracked in the state. They have been preserved as well as
- * possible: using them will still produce a correct output. However, they don't
- * actually retain any history between compression calls. The compression ratio
- * achieved will therefore be no better than compressing each chunk
- * independently.
- */
-LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer);
-LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int   LZ4_sizeofStreamState(void);
-LZ4_DEPRECATED("Use LZ4_resetStream() instead")  LZ4LIB_API int   LZ4_resetStreamState(void* state, char* inputBuffer);
-LZ4_DEPRECATED("Use LZ4_saveDict() instead")     LZ4LIB_API char* LZ4_slideInputBuffer (void* state);
-
-/*! Obsolete streaming decoding functions (since v1.7.0) */
-LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
-LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
-
-/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) :
- *  These functions used to be faster than LZ4_decompress_safe(),
- *  but this is no longer the case. They are now slower.
- *  This is because LZ4_decompress_fast() doesn't know the input size,
- *  and therefore must progress more cautiously into the input buffer to not read beyond the end of block.
- *  On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability.
- *  As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated.
- *
- *  The last remaining LZ4_decompress_fast() specificity is that
- *  it can decompress a block without knowing its compressed size.
- *  Such functionality can be achieved in a more secure manner
- *  by employing LZ4_decompress_safe_partial().
- *
- *  Parameters:
- *  originalSize : is the uncompressed size to regenerate.
- *                 `dst` must be already allocated, its size must be >= 'originalSize' bytes.
- * @return : number of bytes read from source buffer (== compressed size).
- *           The function expects to finish at block's end exactly.
- *           If the source stream is detected malformed, the function stops decoding and returns a negative result.
- *  note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer.
- *         However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds.
- *         Also, since match offsets are not validated, match reads from 'src' may underflow too.
- *         These issues never happen if input (compressed) data is correct.
- *         But they may happen if input data is invalid (error or intentional tampering).
- *         As a consequence, use these functions in trusted environments with trusted data **only**.
- */
-LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_partial() instead")
-LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
-LZ4_DEPRECATED("This function is deprecated and unsafe. Consider migrating towards LZ4_decompress_safe_continue() instead. "
-               "Note that the contract will change (requires block's compressed size, instead of decompressed size)")
-LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
-LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_partial_usingDict() instead")
-LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
-
-/*! LZ4_resetStream() :
- *  An LZ4_stream_t structure must be initialized at least once.
- *  This is done with LZ4_initStream(), or LZ4_resetStream().
- *  Consider switching to LZ4_initStream(),
- *  invoking LZ4_resetStream() will trigger deprecation warnings in the future.
- */
-LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
-
-
-#endif /* LZ4_H_98237428734687 */
-
-
-#if defined (__cplusplus)
-}
-#endif
diff --git a/include/verilated.mk.in b/include/verilated.mk.in
index 4e71a5ac1..b936d8c45 100644
--- a/include/verilated.mk.in
+++ b/include/verilated.mk.in
@@ -104,6 +104,7 @@ CPPFLAGS += -I. $(VK_CPPFLAGS_WALL) $(VK_CPPFLAGS_ALWAYS)
 VPATH += ..
 VPATH += $(VERILATOR_ROOT)/include
 VPATH += $(VERILATOR_ROOT)/include/vltstd
+VPATH += $(VERILATOR_ROOT)/include/fstcpp
 
 LDFLAGS += $(CFG_LDFLAGS_VERILATED)
 
@@ -205,6 +206,13 @@ VK_OBJS_SLOW = $(addsuffix .o, $(VM_SLOW))
 
 VK_USER_OBJS = $(addsuffix .o, $(VM_USER_CLASSES))
 
+ifneq ($(VM_TRACE_FST),0)
+ifneq ($(VM_TRACE_FST),)
+    VM_GLOBAL_FAST += fstcpp_writer fstcpp_variable_info
+    LDLIBS = -llz4 -lz
+endif
+endif
+
 # Note VM_GLOBAL_FAST and VM_GLOBAL_SLOW holds the files required from the
 # run-time library. In practice everything is actually in VM_GLOBAL_FAST,
 # but keeping the distinction for compatibility for now.
diff --git a/include/verilated_fst_c.cpp b/include/verilated_fst_c.cpp
index a0f636dc4..2ac855952 100644
--- a/include/verilated_fst_c.cpp
+++ b/include/verilated_fst_c.cpp
@@ -26,21 +26,14 @@
 #include "verilated.h"
 #include "verilated_fst_c.h"
 
-// GTKWave configuration
-#define HAVE_LIBPTHREAD
-#define FST_WRITER_PARALLEL
-#define LZ4_DISABLE_DEPRECATE_WARNINGS
-
-// Include the GTKWave implementation directly
-#define FST_CONFIG_INCLUDE "fst_config.h"
-#include "gtkwave/fastlz.c"
-#include "gtkwave/fstapi.c"
-#include "gtkwave/lz4.c"
+// Include fstcpp library
+#include "fstcpp/fstcpp_writer.h"
 
 #include <algorithm>
 #include <iterator>
 #include <sstream>
 #include <type_traits>
+#include <vector>
 
 #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__)
 # include <io.h>
@@ -53,8 +46,8 @@
 //=============================================================================
 // Check that forward declared types matches the FST API types
 
-static_assert(std::is_same<vlFstHandle, fstHandle>::value, "vlFstHandle mismatch");
-static_assert(std::is_same<vlFstEnumHandle, fstEnumHandle>::value, "vlFstHandle mismatch");
+static_assert(std::is_same<vlFstHandle, fst::Handle>::value, "vlFstHandle mismatch");
+static_assert(std::is_same<vlFstEnumHandle, fst::EnumHandle>::value, "vlFstHandle mismatch");
 
 //=============================================================================
 // Specialization of the generics for this trace format
@@ -71,18 +64,18 @@ static_assert(std::is_same<vlFstEnumHandle, fstEnumHandle>::value, "vlFstHandle
 VerilatedFst::VerilatedFst(void* /*fst*/) {}
 
 VerilatedFst::~VerilatedFst() {
-    if (m_fst) fstWriterClose(m_fst);
+    if (m_fst) VL_DO_CLEAR(delete m_fst, m_fst = nullptr);
     if (m_symbolp) VL_DO_CLEAR(delete[] m_symbolp, m_symbolp = nullptr);
     if (m_strbufp) VL_DO_CLEAR(delete[] m_strbufp, m_strbufp = nullptr);
 }
 
 void VerilatedFst::open(const char* filename) VL_MT_SAFE_EXCLUDES(m_mutex) {
     const VerilatedLockGuard lock{m_mutex};
-    m_fst = fstWriterCreate(filename, 1);
-    fstWriterSetPackType(m_fst, FST_WR_PT_LZ4);
-    fstWriterSetTimescaleFromString(m_fst, timeResStr().c_str());  // lintok-begin-on-ref
-    if (m_useFstWriterThread) fstWriterSetParallelMode(m_fst, 1);
-    fstWriterSetVersion(m_fst, "Generated by VerilatedFst");
+    m_fst = new fst::Writer{filename};
+    m_fst->setWriterPackType(fst::WriterPackType::LZ4);
+    m_fst->setTimecale(int8_t(round(log10(timeRes()))));
+    // if (m_useFstWriterThread) fstWriterSetParallelMode(m_fst, 1);
+    m_fst->setWriter("Generated by VerilatedFst");
     constDump(true);  // First dump must contain the const signals
     fullDump(true);  // First dump must be full for fst
 
@@ -90,7 +83,7 @@ void VerilatedFst::open(const char* filename) VL_MT_SAFE_EXCLUDES(m_mutex) {
 
     // convert m_code2symbol into an array for fast lookup
     if (!m_symbolp) {
-        m_symbolp = new fstHandle[nextCode()]{0};
+        m_symbolp = new fst::Handle[nextCode()]{0};
         for (const auto& i : m_code2symbol) m_symbolp[i.first] = i.second;
     }
     m_code2symbol.clear();
@@ -103,7 +96,7 @@ void VerilatedFst::close() VL_MT_SAFE_EXCLUDES(m_mutex) {
     const VerilatedLockGuard lock{m_mutex};
     Super::closeBase();
     emitTimeChangeMaybe();
-    fstWriterClose(m_fst);
+    if (m_fst) m_fst->close();
     m_fst = nullptr;
 }
 
@@ -111,18 +104,18 @@ void VerilatedFst::flush() VL_MT_SAFE_EXCLUDES(m_mutex) {
     const VerilatedLockGuard lock{m_mutex};
     Super::flushBase();
     emitTimeChangeMaybe();
-    fstWriterFlushContext(m_fst);
+    if (m_fst) m_fst->flushValueChangeData();
 }
 
 void VerilatedFst::emitTimeChange(uint64_t timeui) {
-    if (!timeui) fstWriterEmitTimeChange(m_fst, timeui);
+    if (!timeui) m_fst->emitTimeChange(timeui);
     m_timeui = timeui;
 }
 
 VL_ATTR_ALWINLINE
 void VerilatedFst::emitTimeChangeMaybe() {
     if (VL_UNLIKELY(m_timeui)) {
-        fstWriterEmitTimeChange(m_fst, m_timeui);
+        m_fst->emitTimeChange(m_timeui);
         m_timeui = 0;
     }
 }
@@ -133,8 +126,12 @@ void VerilatedFst::emitTimeChangeMaybe() {
 void VerilatedFst::declDTypeEnum(int dtypenum, const char* name, uint32_t elements,
                                  unsigned int minValbits, const char** itemNamesp,
                                  const char** itemValuesp) {
-    const fstEnumHandle enumNum
-        = fstWriterCreateEnumTable(m_fst, name, elements, minValbits, itemNamesp, itemValuesp);
+    std::vector<std::pair<const char*, const char*>> itemNameValuesp{elements};
+    for (uint32_t i = 0; i < elements; ++i) {
+        itemNameValuesp[i].first = itemNamesp[i];
+        itemNameValuesp[i].second = itemValuesp[i];
+    }
+    const fst::EnumHandle enumNum = m_fst->createEnumTable(name, minValbits, itemNameValuesp);
     const bool newEntry = m_local2fstdtype[initUserp()].emplace(dtypenum, enumNum).second;
     assert(newEntry);
 }
@@ -175,30 +172,30 @@ void VerilatedFst::pushPrefix(const char* namep, VerilatedTracePrefixType type,
 
     switch (type) {
     case VerilatedTracePrefixType::SCOPE_MODULE:
-        fstWriterSetScope(m_fst, FST_ST_VCD_MODULE, namep, nullptr);
+        m_fst->setScope(fst::Hierarchy::ScopeType::VCD_MODULE, name, std::string{});
         break;
     case VerilatedTracePrefixType::SCOPE_INTERFACE:
-        fstWriterSetScope(m_fst, FST_ST_VCD_INTERFACE, namep, nullptr);
+        m_fst->setScope(fst::Hierarchy::ScopeType::VCD_INTERFACE, name, std::string{});
         break;
     case VerilatedTracePrefixType::STRUCT_PACKED:
-        fstWriterSetAttrBegin(m_fst, FST_AT_PACK, FST_PT_PACKED, "members", l);
-        fstWriterSetScope(m_fst, FST_ST_VCD_STRUCT, namep, nullptr);
+        m_fst->setAttrBegin(fst::AttributeType::PACK, fst::PackType::PACKED, "members", l);
+        m_fst->setScope(fst::Hierarchy::ScopeType::VCD_STRUCT, name, std::string{});
         break;
     case VerilatedTracePrefixType::STRUCT_UNPACKED:
-        fstWriterSetAttrBegin(m_fst, FST_AT_PACK, FST_PT_UNPACKED, "members", l);
-        fstWriterSetScope(m_fst, FST_ST_VCD_STRUCT, namep, nullptr);
+        m_fst->setAttrBegin(fst::AttributeType::PACK, fst::PackType::UNPACKED, "members", l);
+        m_fst->setScope(fst::Hierarchy::ScopeType::VCD_STRUCT, name, std::string{});
         break;
     case VerilatedTracePrefixType::UNION_PACKED:
-        fstWriterSetAttrBegin(m_fst, FST_AT_PACK, FST_PT_PACKED, "members", l);
-        fstWriterSetScope(m_fst, FST_ST_VCD_UNION, namep, nullptr);
+        m_fst->setAttrBegin(fst::AttributeType::PACK, fst::PackType::PACKED, "members", l);
+        m_fst->setScope(fst::Hierarchy::ScopeType::VCD_UNION, name, std::string{});
         break;
     case VerilatedTracePrefixType::ARRAY_PACKED:
-        fstWriterSetAttrBegin(m_fst, FST_AT_ARRAY, FST_AR_PACKED, "bounds", lr);
-        fstWriterSetScope(m_fst, FST_ST_SV_ARRAY, namep, nullptr);
+        m_fst->setAttrBegin(fst::AttributeType::ARRAY, fst::PackType::PACKED, "bounds", lr);
+        m_fst->setScope(fst::Hierarchy::ScopeType::VCD_ARRAY, name, std::string{});
         break;
     case VerilatedTracePrefixType::ARRAY_UNPACKED:
-        fstWriterSetAttrBegin(m_fst, FST_AT_ARRAY, FST_AR_UNPACKED, "bounds", lr);
-        fstWriterSetScope(m_fst, FST_ST_SV_ARRAY, namep, nullptr);
+        m_fst->setAttrBegin(fst::AttributeType::ARRAY, fst::PackType::UNPACKED, "bounds", lr);
+        m_fst->setScope(fst::Hierarchy::ScopeType::VCD_ARRAY, name, std::string{});
         break;
     default: break;
     }
@@ -207,7 +204,7 @@ void VerilatedFst::pushPrefix(const char* namep, VerilatedTracePrefixType type,
 void VerilatedFst::popPrefix() {
     assert(!m_prefixStack.empty());
     if (m_prefixStack.back().second != VerilatedTracePrefixType::ROOTIO_WRAPPER) {
-        fstWriterSetUpscope(m_fst);
+        m_fst->upscope();
     }
     m_prefixStack.pop_back();
     assert(!m_prefixStack.empty());  // Always one left, the constructor's initial one
@@ -232,56 +229,55 @@ void VerilatedFst::declare(uint32_t code, const char* name, int dtypenum,
     const std::string name_str = name_ss.str();
 
     if (dtypenum > 0) {
-        fstWriterEmitEnumTableRef(m_fst, m_local2fstdtype.at(initUserp()).at(dtypenum));
+        m_fst->emitEnumTableRef(m_local2fstdtype.at(initUserp()).at(dtypenum));
     }
 
-    fstVarDir varDir = FST_VD_IMPLICIT;
+    fst::Hierarchy::VarDirection varDir = fst::Hierarchy::VarDirection::IMPLICIT;
     switch (direction) {
-    case VerilatedTraceSigDirection::INOUT: varDir = FST_VD_INOUT; break;
-    case VerilatedTraceSigDirection::OUTPUT: varDir = FST_VD_OUTPUT; break;
-    case VerilatedTraceSigDirection::INPUT: varDir = FST_VD_INPUT; break;
-    case VerilatedTraceSigDirection::NONE: varDir = FST_VD_IMPLICIT; break;
+    case VerilatedTraceSigDirection::INOUT: varDir = fst::Hierarchy::VarDirection::INOUT; break;
+    case VerilatedTraceSigDirection::OUTPUT: varDir = fst::Hierarchy::VarDirection::OUTPUT; break;
+    case VerilatedTraceSigDirection::INPUT: varDir = fst::Hierarchy::VarDirection::INPUT; break;
+    case VerilatedTraceSigDirection::NONE: varDir = fst::Hierarchy::VarDirection::IMPLICIT; break;
     }
 
-    fstVarType varType;
+    fst::Hierarchy::VarType varType;
     // Doubles have special decoding properties, so must indicate if a double
     if (type == VerilatedTraceSigType::DOUBLE) {
         if (kind == VerilatedTraceSigKind::PARAMETER) {
-            varType = FST_VT_VCD_REAL_PARAMETER;
+            varType = fst::Hierarchy::VarType::VCD_REAL_PARAMETER;
         } else {
-            varType = FST_VT_VCD_REAL;
+            varType = fst::Hierarchy::VarType::VCD_REAL;
         }
     }
     // clang-format off
-    else if (kind == VerilatedTraceSigKind::PARAMETER) varType = FST_VT_VCD_PARAMETER;
-    else if (kind == VerilatedTraceSigKind::SUPPLY0) varType = FST_VT_VCD_SUPPLY0;
-    else if (kind == VerilatedTraceSigKind::SUPPLY1) varType = FST_VT_VCD_SUPPLY1;
-    else if (kind == VerilatedTraceSigKind::TRI) varType = FST_VT_VCD_TRI;
-    else if (kind == VerilatedTraceSigKind::TRI0) varType = FST_VT_VCD_TRI0;
-    else if (kind == VerilatedTraceSigKind::TRI1) varType = FST_VT_VCD_TRI1;
-    else if (kind == VerilatedTraceSigKind::TRIAND) varType = FST_VT_VCD_TRIAND;
-    else if (kind == VerilatedTraceSigKind::TRIOR) varType = FST_VT_VCD_TRIOR;
-    else if (kind == VerilatedTraceSigKind::TRIREG) varType = FST_VT_VCD_TRIREG;
-    else if (kind == VerilatedTraceSigKind::WIRE) varType = FST_VT_VCD_WIRE;
+    else if (kind == VerilatedTraceSigKind::PARAMETER) varType = fst::Hierarchy::VarType::VCD_PARAMETER;
+    else if (kind == VerilatedTraceSigKind::SUPPLY0) varType = fst::Hierarchy::VarType::VCD_SUPPLY0;
+    else if (kind == VerilatedTraceSigKind::SUPPLY1) varType = fst::Hierarchy::VarType::VCD_SUPPLY1;
+    else if (kind == VerilatedTraceSigKind::TRI) varType = fst::Hierarchy::VarType::VCD_TRI;
+    else if (kind == VerilatedTraceSigKind::TRI0) varType = fst::Hierarchy::VarType::VCD_TRI0;
+    else if (kind == VerilatedTraceSigKind::TRI1) varType = fst::Hierarchy::VarType::VCD_TRI1;
+    else if (kind == VerilatedTraceSigKind::TRIAND) varType = fst::Hierarchy::VarType::VCD_TRIAND;
+    else if (kind == VerilatedTraceSigKind::TRIOR) varType = fst::Hierarchy::VarType::VCD_TRIOR;
+    else if (kind == VerilatedTraceSigKind::TRIREG) varType = fst::Hierarchy::VarType::VCD_TRIREG;
+    else if (kind == VerilatedTraceSigKind::WIRE) varType = fst::Hierarchy::VarType::VCD_WIRE;
     //
-    else if (type == VerilatedTraceSigType::INTEGER) varType = FST_VT_VCD_INTEGER;
-    else if (type == VerilatedTraceSigType::BIT) varType = FST_VT_SV_BIT;
-    else if (type == VerilatedTraceSigType::LOGIC) varType = FST_VT_SV_LOGIC;
-    else if (type == VerilatedTraceSigType::INT) varType = FST_VT_SV_INT;
-    else if (type == VerilatedTraceSigType::SHORTINT) varType = FST_VT_SV_SHORTINT;
-    else if (type == VerilatedTraceSigType::LONGINT) varType = FST_VT_SV_LONGINT;
-    else if (type == VerilatedTraceSigType::BYTE) varType = FST_VT_SV_BYTE;
-    else if (type == VerilatedTraceSigType::EVENT) varType = FST_VT_VCD_EVENT;
-    else if (type == VerilatedTraceSigType::TIME) varType = FST_VT_VCD_TIME;
+    else if (type == VerilatedTraceSigType::INTEGER) varType = fst::Hierarchy::VarType::VCD_INTEGER;
+    else if (type == VerilatedTraceSigType::BIT) varType = fst::Hierarchy::VarType::SV_BIT;
+    else if (type == VerilatedTraceSigType::LOGIC) varType = fst::Hierarchy::VarType::SV_LOGIC;
+    else if (type == VerilatedTraceSigType::INT) varType = fst::Hierarchy::VarType::SV_INT;
+    else if (type == VerilatedTraceSigType::SHORTINT) varType = fst::Hierarchy::VarType::SV_SHORTINT;
+    else if (type == VerilatedTraceSigType::LONGINT) varType = fst::Hierarchy::VarType::SV_LONGINT;
+    else if (type == VerilatedTraceSigType::BYTE) varType = fst::Hierarchy::VarType::SV_BYTE;
+    else if (type == VerilatedTraceSigType::EVENT) varType = fst::Hierarchy::VarType::VCD_EVENT;
+    else if (type == VerilatedTraceSigType::TIME) varType = fst::Hierarchy::VarType::VCD_TIME;
     else { assert(0); /* Unreachable */ }
     // clang-format on
 
     const auto it = vlstd::as_const(m_code2symbol).find(code);
     if (it == m_code2symbol.end()) {  // New
-        m_code2symbol[code]
-            = fstWriterCreateVar(m_fst, varType, varDir, bits, name_str.c_str(), 0);
+        m_code2symbol[code] = m_fst->createVar(varType, varDir, bits, name_str.c_str(), 0);
     } else {  // Alias
-        fstWriterCreateVar(m_fst, varType, varDir, bits, name_str.c_str(), it->second);
+        m_fst->createVar(varType, varDir, bits, name_str.c_str(), it->second);
     }
 }
 
@@ -390,14 +386,14 @@ VL_ATTR_ALWINLINE
 void VerilatedFstBuffer::emitEvent(uint32_t code) {
     VL_DEBUG_IFDEF(assert(m_symbolp[code]););
     m_owner.emitTimeChangeMaybe();
-    fstWriterEmitValueChange(m_fst, m_symbolp[code], "1");
+    m_fst->emitValueChange(m_symbolp[code], "1");
 }
 
 VL_ATTR_ALWINLINE
 void VerilatedFstBuffer::emitBit(uint32_t code, CData newval) {
     VL_DEBUG_IFDEF(assert(m_symbolp[code]););
     m_owner.emitTimeChangeMaybe();
-    fstWriterEmitValueChange(m_fst, m_symbolp[code], newval ? "1" : "0");
+    m_fst->emitValueChange(m_symbolp[code], newval ? "1" : "0");
 }
 
 VL_ATTR_ALWINLINE
@@ -406,7 +402,7 @@ void VerilatedFstBuffer::emitCData(uint32_t code, CData newval, int bits) {
     VL_DEBUG_IFDEF(assert(m_symbolp[code]););
     cvtCDataToStr(buf, newval << (VL_BYTESIZE - bits));
     m_owner.emitTimeChangeMaybe();
-    fstWriterEmitValueChange(m_fst, m_symbolp[code], buf);
+    m_fst->emitValueChange(m_symbolp[code], buf);
 }
 
 VL_ATTR_ALWINLINE
@@ -415,7 +411,7 @@ void VerilatedFstBuffer::emitSData(uint32_t code, SData newval, int bits) {
     VL_DEBUG_IFDEF(assert(m_symbolp[code]););
     cvtSDataToStr(buf, newval << (VL_SHORTSIZE - bits));
     m_owner.emitTimeChangeMaybe();
-    fstWriterEmitValueChange(m_fst, m_symbolp[code], buf);
+    m_fst->emitValueChange(m_symbolp[code], buf);
 }
 
 VL_ATTR_ALWINLINE
@@ -424,7 +420,7 @@ void VerilatedFstBuffer::emitIData(uint32_t code, IData newval, int bits) {
     VL_DEBUG_IFDEF(assert(m_symbolp[code]););
     cvtIDataToStr(buf, newval << (VL_IDATASIZE - bits));
     m_owner.emitTimeChangeMaybe();
-    fstWriterEmitValueChange(m_fst, m_symbolp[code], buf);
+    m_fst->emitValueChange(m_symbolp[code], buf);
 }
 
 VL_ATTR_ALWINLINE
@@ -433,7 +429,7 @@ void VerilatedFstBuffer::emitQData(uint32_t code, QData newval, int bits) {
     VL_DEBUG_IFDEF(assert(m_symbolp[code]););
     cvtQDataToStr(buf, newval << (VL_QUADSIZE - bits));
     m_owner.emitTimeChangeMaybe();
-    fstWriterEmitValueChange(m_fst, m_symbolp[code], buf);
+    m_fst->emitValueChange(m_symbolp[code], buf);
 }
 
 VL_ATTR_ALWINLINE
@@ -450,11 +446,11 @@ void VerilatedFstBuffer::emitWData(uint32_t code, const WData* newvalp, int bits
         wp += VL_EDATASIZE;
     }
     m_owner.emitTimeChangeMaybe();
-    fstWriterEmitValueChange(m_fst, m_symbolp[code], m_strbufp);
+    m_fst->emitValueChange(m_symbolp[code], m_strbufp);
 }
 
 VL_ATTR_ALWINLINE
 void VerilatedFstBuffer::emitDouble(uint32_t code, double newval) {
     m_owner.emitTimeChangeMaybe();
-    fstWriterEmitValueChange(m_fst, m_symbolp[code], &newval);
+    m_fst->emitValueChange(m_symbolp[code], *reinterpret_cast<const uint64_t*>(&newval));
 }
diff --git a/include/verilated_fst_c.h b/include/verilated_fst_c.h
index beab35565..387244d37 100644
--- a/include/verilated_fst_c.h
+++ b/include/verilated_fst_c.h
@@ -34,7 +34,9 @@ typedef uint32_t vlFstEnumHandle;
 
 class VerilatedFstBuffer;
 
-struct fstWriterContext;
+namespace fst {
+class Writer;
+}
 
 //=============================================================================
 // VerilatedFst
@@ -51,7 +53,7 @@ private:
     //=========================================================================
     // FST-specific internals
 
-    fstWriterContext* m_fst = nullptr;
+    fst::Writer* m_fst = nullptr;
     std::map<uint32_t, vlFstHandle> m_code2symbol;
     std::map<void*, std::map<int, vlFstEnumHandle>> m_local2fstdtype;
     vlFstHandle* m_symbolp = nullptr;  // same as m_code2symbol, but as an array
@@ -211,7 +213,7 @@ class VerilatedFstBuffer VL_NOT_FINAL {
     VerilatedFst& m_owner;  // Trace file owning this buffer. Required by subclasses.
 
     // The FST file handle
-    fstWriterContext* const m_fst = m_owner.m_fst;
+    fst::Writer* const m_fst = m_owner.m_fst;
     // code to fstHande map, as an array
     const vlFstHandle* const m_symbolp = m_owner.m_symbolp;
     // String buffer long enough to hold maxBits() chars
diff --git a/test_regress/t/t_dist_copyright.py b/test_regress/t/t_dist_copyright.py
index 53ee776c6..0eff91f11 100755
--- a/test_regress/t/t_dist_copyright.py
+++ b/test_regress/t/t_dist_copyright.py
@@ -32,7 +32,7 @@ EXEMPT_FILES_LIST = """
     docs/gen
     docs/spelling.txt
     docs/verilated.dox
-    include/gtkwave
+    include/fstcpp
     include/vltstd
     install-sh
     src/mkinstalldirs
diff --git a/test_regress/t/t_dist_cppstyle.py b/test_regress/t/t_dist_cppstyle.py
index 8c79df855..67ed3fe55 100755
--- a/test_regress/t/t_dist_cppstyle.py
+++ b/test_regress/t/t_dist_cppstyle.py
@@ -57,7 +57,7 @@ for filename in sorted(files.keys()):
         continue
     if not re.search(r'\.(h|c|cpp)(\.in)?$', filename):
         continue
-    if '/gtkwave/' in filename:
+    if '/fstcpp/' in filename:
         continue
 
     contents = test.file_contents(filename) + "\n\n"

From e97243f173c824c630acf0798317144c0c26e05c Mon Sep 17 00:00:00 2001
From: Yu-Sheng Lin <johnjohnlys@gmail.com>
Date: Mon, 16 Mar 2026 02:20:32 +0800
Subject: [PATCH 2/8] Fix regression error and review comments

---
 include/fstcpp/fstcpp.h                     |  42 +-
 include/fstcpp/fstcpp_assertion.h           |  16 +-
 include/fstcpp/fstcpp_stream_write_helper.h | 181 ++++----
 include/fstcpp/fstcpp_variable_info.cpp     |  21 +-
 include/fstcpp/fstcpp_variable_info.h       | 125 ++---
 include/fstcpp/fstcpp_writer.cpp            | 490 ++++++++++----------
 include/fstcpp/fstcpp_writer.h              | 359 ++++----------
 include/verilated_fst_c.cpp                 |  15 +-
 8 files changed, 560 insertions(+), 689 deletions(-)

diff --git a/include/fstcpp/fstcpp.h b/include/fstcpp/fstcpp.h
index 6808b5139..5eb0c7414 100644
--- a/include/fstcpp/fstcpp.h
+++ b/include/fstcpp/fstcpp.h
@@ -10,7 +10,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <cstring>
-#include <utility>
+#include <string>
 // Other libraries' .h files.
 // Your project's .h files.
 
@@ -23,11 +23,23 @@ namespace fst {
 
 typedef uint32_t Handle;
 typedef uint32_t EnumHandle;
-using string_view_pair = std::pair<const char *, std::size_t>;
+struct string_view_pair {
+	const char *m_data = nullptr;
+	size_t m_size = 0;
+
+	// implicit conversion from const char*, std::string, std::string_view
+	string_view_pair(const char *data)
+		: m_data{data}, m_size{data == nullptr ? 0 : std::strlen(data)} {}
+	string_view_pair(const char *data, size_t size) : m_data{data}, m_size{size} {}
+	string_view_pair(const std::string &s) : m_data{s.c_str()}, m_size{s.size()} {}
+#if __cplusplus >= 201703L
+	string_view_pair(std::string_view s) : m_data{s.data()}, m_size{s.size()} {}
+#endif
+};
 
 [[maybe_unused]]
 static inline string_view_pair make_string_view_pair(const char *data) {
-	if (not data) {
+	if (!data) {
 		return {nullptr, 0};
 	}
 	return {data, std::strlen(data)};
@@ -217,19 +229,19 @@ struct Hierarchy {
 };
 
 struct Header {
-	uint64_t start_time = uint64_t(-1);
-	uint64_t end_time = 0;
-	int64_t timezero = 0;
+	uint64_t m_start_time{uint64_t(-1)};
+	uint64_t m_end_time{0};
+	int64_t m_timezero{0};
 	// Match the original fstapi.c. Just for information, not used in FST.
-	uint64_t writer_memory_use = 1ull << 27;
-	uint64_t num_scopes = 0;
-	uint64_t num_vars = 0;     // #CreateVar calls, including aliases
-	uint64_t num_handles = 0;  // #unique handles, excluding aliases, shall be <= num_vars
-	uint64_t num_value_change_data_blocks = 0;
-	char writer[128]{};
-	char date[26]{};
-	FileType filetype = FileType::VERILOG;
-	int8_t timescale = -9;
+	uint64_t m_writer_memory_use{1ull << 27};
+	uint64_t m_num_scopes{0};
+	uint64_t m_num_vars{0};     // #CreateVar calls, including aliases
+	uint64_t m_num_handles{0};  // #unique handles, excluding aliases, shall be <= m_num_vars
+	uint64_t m_num_value_change_data_blocks{0};
+	char m_writer[128]{};
+	char m_date[26]{};
+	FileType m_filetype{FileType::VERILOG};
+	int8_t m_timescale{-9};
 };
 
 static constexpr uint64_t kInvalidTime = uint64_t(-1);
diff --git a/include/fstcpp/fstcpp_assertion.h b/include/fstcpp/fstcpp_assertion.h
index b8567d116..1f7265b13 100644
--- a/include/fstcpp/fstcpp_assertion.h
+++ b/include/fstcpp/fstcpp_assertion.h
@@ -6,9 +6,9 @@
 // direct include
 // C system headers
 // C++ standard library headers
+#include <cstdlib>
 #include <iostream>
 #include <sstream>
-#include <stdexcept>
 // Other libraries' .h files.
 // Your project's .h files.
 
@@ -18,7 +18,7 @@
 		oss << "FST_CHECK failed: " #a; \
 		const auto e = oss.str();       \
 		std::cerr << e << std::endl;    \
-		throw std::runtime_error(e);    \
+		std::abort();                   \
 	}
 
 #define FST_CHECK_EQ(a, b)                           \
@@ -28,7 +28,7 @@
 		oss << " (" << (a) << " vs. " << (b) << ")"; \
 		const auto e = oss.str();                    \
 		std::cerr << e << std::endl;                 \
-		throw std::runtime_error(e);                 \
+		std::abort();                                \
 	}
 
 #define FST_CHECK_NE(a, b)                           \
@@ -38,7 +38,7 @@
 		oss << " (" << (a) << " vs. " << (b) << ")"; \
 		const auto e = oss.str();                    \
 		std::cerr << e << std::endl;                 \
-		throw std::runtime_error(e);                 \
+		std::abort();                                \
 	}
 
 #define FST_CHECK_GT(a, b)                           \
@@ -48,7 +48,7 @@
 		oss << " (" << (a) << " vs. " << (b) << ")"; \
 		const auto e = oss.str();                    \
 		std::cerr << e << std::endl;                 \
-		throw std::runtime_error(e);                 \
+		std::abort();                                \
 	}
 
 #define FST_CHECK_GE(a, b)                           \
@@ -58,7 +58,7 @@
 		oss << " (" << (a) << " vs. " << (b) << ")"; \
 		const auto e = oss.str();                    \
 		std::cerr << e << std::endl;                 \
-		throw std::runtime_error(e);                 \
+		std::abort();                                \
 	}
 
 #define FST_CHECK_LT(a, b)                           \
@@ -68,7 +68,7 @@
 		oss << " (" << (a) << " vs. " << (b) << ")"; \
 		const auto e = oss.str();                    \
 		std::cerr << e << std::endl;                 \
-		throw std::runtime_error(e);                 \
+		std::abort();                                \
 	}
 
 #define FST_CHECK_LE(a, b)                           \
@@ -78,7 +78,7 @@
 		oss << " (" << (a) << " vs. " << (b) << ")"; \
 		const auto e = oss.str();                    \
 		std::cerr << e << std::endl;                 \
-		throw std::runtime_error(e);                 \
+		std::abort();                                \
 	}
 
 // We turn on all DCHECKs to CHECKs temporarily for better safety.
diff --git a/include/fstcpp/fstcpp_stream_write_helper.h b/include/fstcpp/fstcpp_stream_write_helper.h
index 70621e0a1..e3b9158ba 100644
--- a/include/fstcpp/fstcpp_stream_write_helper.h
+++ b/include/fstcpp/fstcpp_stream_write_helper.h
@@ -6,10 +6,12 @@
 #pragma once
 // direct include
 // C system headers
+#ifdef _MSC_VER
+#	include <intrin.h>
+#endif
 // C++ standard library headers
 #include <cstdint>
 #include <cstring>
-#include <iostream>
 #include <vector>
 // Other libraries' .h files.
 // Your project's .h files.
@@ -26,10 +28,27 @@ namespace platform {
 // clang-format off
 template <typename U> U to_big_endian(U u) { return u; }
 #else
+#if defined(__GNUC__) || defined(__clang__)
 template<typename U> U to_big_endian(U u, std::integral_constant<size_t, 1>) { return u; }
 template<typename U> U to_big_endian(U u, std::integral_constant<size_t, 2>) { return __builtin_bswap16(u); }
 template<typename U> U to_big_endian(U u, std::integral_constant<size_t, 4>) { return __builtin_bswap32(u); }
 template<typename U> U to_big_endian(U u, std::integral_constant<size_t, 8>) { return __builtin_bswap64(u); }
+#elif defined(_MSC_VER) // MSVC
+template<typename U> U to_big_endian(U u, std::integral_constant<size_t, 1>) { return u; }
+template<typename U> U to_big_endian(U u, std::integral_constant<size_t, 2>) { return _byteswap_ushort(u); }
+template<typename U> U to_big_endian(U u, std::integral_constant<size_t, 4>) { return _byteswap_ulong(u); }
+template<typename U> U to_big_endian(U u, std::integral_constant<size_t, 8>) { return _byteswap_uint64(u); }
+#else
+template<typename U, size_t S> U to_big_endian(U u, std::integral_constant<size_t, S>) {
+	U ret{ 0 };
+	for (size_t i = 0; i < S; ++i) {
+		ret |= u & 0xff;
+		ret <<= 8;
+		u >>= 8;
+	}
+	return ret;
+}
+#endif
 // clang-format on
 template <typename U>
 U to_big_endian(U u) {
@@ -40,17 +59,17 @@ U to_big_endian(U u) {
 }  // namespace platform
 
 struct StreamWriteHelper {
-	std::ostream *os;
+	std::ostream *m_os{nullptr};
 
-	StreamWriteHelper(std::ostream &os_) : os(&os_) {}
-	StreamWriteHelper(std::ostream *os_) : os(os_) {}
+	StreamWriteHelper(std::ostream &os_) : m_os{&os_} {}
+	StreamWriteHelper(std::ostream *os_) : m_os{os_} {}
 
 	// Write the entire uint, big-endian
 	// We do not provide little-endian version since FST only uses big-endian
 	template <typename U>
 	StreamWriteHelper &writeUInt(U u) {
 		u = platform::to_big_endian(u);
-		os->write(reinterpret_cast<const char *>(&u), sizeof(u));
+		m_os->write(reinterpret_cast<const char *>(&u), sizeof(u));
 		return *this;
 	}
 
@@ -64,35 +83,35 @@ struct StreamWriteHelper {
 		u <<= sizeof(u) * 8 - bitwidth;
 		// Write the first (bitwidth+7)/8 bytes
 		u = platform::to_big_endian(u);
-		os->write(reinterpret_cast<const char *>(&u), (bitwidth + 7) / 8);
+		m_os->write(reinterpret_cast<const char *>(&u), (bitwidth + 7) / 8);
 		return *this;
 	}
 
 	StreamWriteHelper &writeLEB128(uint64_t v) {
 		// Just reuse the logic from fstapi.c, is there a better way?
-		uint64_t nxt;
-		unsigned char buf[10]; /* ceil(64/7) = 10 */
-		unsigned char *pnt = buf;
-		int len;
+		uint64_t nxt{0};
+		unsigned char buf[10]{}; /* ceil(64/7) = 10 */
+		unsigned char *pnt{buf};
+		int len{0};
 		while ((nxt = v >> 7)) {
 			*(pnt++) = ((unsigned char)v) | 0x80;
 			v = nxt;
 		}
 		*(pnt++) = (unsigned char)v;
-		len = pnt - buf;
-		os->write(reinterpret_cast<const char *>(buf), len);
+		len = static_cast<int>(pnt - buf);
+		m_os->write(reinterpret_cast<const char *>(buf), len);
 		return *this;
 	}
 
 	StreamWriteHelper &writeLEB128Signed(int64_t v) {
 		// Just reuse the logic from fstapi.c, is there a better way?
-		unsigned char buf[15]; /* ceil(64/7) = 10 + sign byte padded way up */
-		unsigned char byt;
-		unsigned char *pnt = buf;
-		int more = 1;
-		int len;
+		unsigned char buf[15]{}; /* ceil(64/7) = 10 + sign byte padded way up */
+		unsigned char byt{0};
+		unsigned char *pnt{buf};
+		int more{1};
+		int len{0};
 		do {
-			byt = v | 0x80;
+			byt = static_cast<unsigned char>(v | 0x80);
 			v >>= 7;
 
 			if (((!v) && (!(byt & 0x40))) || ((v == -1) && (byt & 0x40))) {
@@ -102,15 +121,15 @@ struct StreamWriteHelper {
 
 			*(pnt++) = byt;
 		} while (more);
-		len = pnt - buf;
-		os->write(reinterpret_cast<const char *>(buf), len);
+		len = static_cast<int>(pnt - buf);
+		m_os->write(reinterpret_cast<const char *>(buf), len);
 		return *this;
 	}
 
 	template <typename F>
 	StreamWriteHelper &writeFloat(F f) {
 		// Always write in native endianness
-		os->write(reinterpret_cast<const char *>(&f), sizeof(f));
+		m_os->write(reinterpret_cast<const char *>(&f), sizeof(f));
 		return *this;
 	}
 
@@ -126,13 +145,13 @@ struct StreamWriteHelper {
 
 	// Write the string, non-null-terminated
 	StreamWriteHelper &writeString(const fst::string_view_pair str) {
-		os->write(str.first, str.second);
+		m_os->write(str.m_data, str.m_size);
 		return *this;
 	}
 
 	// Write the string, null-terminated
 	StreamWriteHelper &writeString0(const fst::string_view_pair str) {
-		os->write(str.first, str.second).put('\0');
+		m_os->write(str.m_data, str.m_size).put('\0');
 		return *this;
 	}
 	StreamWriteHelper &writeString(const std::string &str) {
@@ -143,33 +162,33 @@ struct StreamWriteHelper {
 	}
 
 	StreamWriteHelper &write(const char *ptr, size_t size) {
-		os->write(ptr, size);
+		m_os->write(ptr, size);
 		return *this;
 	}
 
 	StreamWriteHelper &write(const uint8_t *ptr, size_t size) {
-		os->write(reinterpret_cast<const char *>(ptr), size);
+		m_os->write(reinterpret_cast<const char *>(ptr), size);
 		return *this;
 	}
 
 	StreamWriteHelper &seek(std::streamoff pos, std::ios_base::seekdir dir) {
-		os->seekp(pos, dir);
+		m_os->seekp(pos, dir);
 		return *this;
 	}
 
 	StreamWriteHelper &fill(char fill_char, size_t size) {
 		if (size > 32) {
 			// optimize large fills
-			constexpr unsigned kChunkSize = 16;
-			char buf[kChunkSize];
-			std::memset(buf, fill_char, kChunkSize);
-			for (size_t i = 0; i < size / kChunkSize; ++i) {
-				os->write(buf, kChunkSize);
+			constexpr unsigned s_kChunkSize = 16;
+			char buf[s_kChunkSize]{};
+			std::memset(buf, fill_char, s_kChunkSize);
+			for (size_t i{0}; i < size / s_kChunkSize; ++i) {
+				m_os->write(buf, s_kChunkSize);
 			}
-			size %= kChunkSize;
+			size %= s_kChunkSize;
 		}
-		for (size_t i = 0; i < size; ++i) {
-			os->put(fill_char);
+		for (size_t i{0}; i < size; ++i) {
+			m_os->put(fill_char);
 		}
 		return *this;
 	}
@@ -197,41 +216,41 @@ struct StreamWriteHelper {
 	// to endOffset(), which is a common mistake.
 
 	StreamWriteHelper &beginOffset(std::streamoff &pos) {
-		pos = os->tellp();
+		pos = m_os->tellp();
 		return *this;
 	}
 
 	StreamWriteHelper &endOffset(std::streamoff *diff) {
 		// diff shall store previous position before calling this function
-		*diff = os->tellp() - *diff;
+		*diff = m_os->tellp() - *diff;
 		return *this;
 	}
 
 	StreamWriteHelper &endOffset(std::streamoff *diff, std::streamoff pos) {
-		*diff = os->tellp() - pos;
+		*diff = m_os->tellp() - pos;
 		return *this;
 	}
 };
 
 struct StreamVectorWriteHelper {
-	std::vector<uint8_t> &vec;
+	std::vector<uint8_t> &m_vec;
 
-	StreamVectorWriteHelper(std::vector<uint8_t> &vec_) : vec(vec_) {}
+	StreamVectorWriteHelper(std::vector<uint8_t> &vec_) : m_vec{vec_} {}
 
 	template <typename T>
 	StreamVectorWriteHelper &write(T u) {
 		const size_t s = sizeof(u);
-		vec.resize(vec.size() + s);
-		std::memcpy(vec.data() + vec.size() - s, &u, s);
+		m_vec.resize(m_vec.size() + s);
+		std::memcpy(m_vec.data() + m_vec.size() - s, &u, s);
 		return *this;
 	}
 
 	template <typename T>
 	StreamVectorWriteHelper &fill(T u, size_t count) {
 		const size_t s = sizeof(u) * count;
-		vec.resize(vec.size() + s);
-		for (size_t i = 0; i < count; ++i) {
-			std::memcpy(vec.data() + vec.size() - s + i * sizeof(u), &u, sizeof(u));
+		m_vec.resize(m_vec.size() + s);
+		for (size_t i{0}; i < count; ++i) {
+			std::memcpy(m_vec.data() + m_vec.size() - s + i * sizeof(u), &u, sizeof(u));
 		}
 		return *this;
 	}
@@ -239,14 +258,14 @@ struct StreamVectorWriteHelper {
 	template <typename T>
 	StreamVectorWriteHelper &write(T *u, size_t size) {
 		const size_t s = sizeof(u) * size;
-		vec.resize(vec.size() + s);
-		std::memcpy(vec.data() + vec.size() - s, u, s);
+		m_vec.resize(m_vec.size() + s);
+		std::memcpy(m_vec.data() + m_vec.size() - s, u, s);
 		return *this;
 	}
 
 	template <typename E>
 	StreamVectorWriteHelper &writeU8Enum(E e) {
-		vec.push_back(static_cast<uint8_t>(e));
+		m_vec.push_back(static_cast<uint8_t>(e));
 		return *this;
 	}
 
@@ -256,8 +275,8 @@ struct StreamVectorWriteHelper {
 	StreamVectorWriteHelper &writeUIntBE(U u) {
 		u = platform::to_big_endian(u);
 		const size_t s = sizeof(u);
-		vec.resize(vec.size() + s);
-		std::memcpy(vec.data() + vec.size() - s, &u, s);
+		m_vec.resize(m_vec.size() + s);
+		std::memcpy(m_vec.data() + m_vec.size() - s, &u, s);
 		return *this;
 	}
 
@@ -272,39 +291,39 @@ struct StreamVectorWriteHelper {
 		// Write the first (bitwidth+7)/8 bytes
 		u = platform::to_big_endian(u);
 		const size_t s = (bitwidth + 7) / 8;
-		vec.resize(vec.size() + s);
-		std::memcpy(vec.data() + vec.size() - s, &u, s);
+		m_vec.resize(m_vec.size() + s);
+		std::memcpy(m_vec.data() + m_vec.size() - s, &u, s);
 		return *this;
 	}
 
 	StreamVectorWriteHelper &writeLEB128(uint64_t v) {
 		// Just reuse the logic from fstapi.c, is there a better way?
-		uint64_t nxt;
-		unsigned char buf[10]; /* ceil(64/7) = 10 */
-		unsigned char *pnt = buf;
-		int len;
+		uint64_t nxt{0};
+		unsigned char buf[10]{}; /* ceil(64/7) = 10 */
+		unsigned char *pnt{buf};
+		int len{0};
 		while ((nxt = v >> 7)) {
 			*(pnt++) = ((unsigned char)v) | 0x80;
 			v = nxt;
 		}
 		*(pnt++) = (unsigned char)v;
-		len = pnt - buf;
+		len = static_cast<int>(pnt - buf);
 
-		const size_t cur = vec.size();
-		vec.resize(cur + len);
-		std::memcpy(vec.data() + cur, buf, len);
+		const size_t cur = m_vec.size();
+		m_vec.resize(cur + len);
+		std::memcpy(m_vec.data() + cur, buf, len);
 		return *this;
 	}
 
 	StreamVectorWriteHelper &writeLEB128Signed(int64_t v) {
 		// Just reuse the logic from fstapi.c, is there a better way?
-		unsigned char buf[15]; /* ceil(64/7) = 10 + sign byte padded way up */
-		unsigned char byt;
-		unsigned char *pnt = buf;
-		int more = 1;
-		int len;
+		unsigned char buf[15]{}; /* ceil(64/7) = 10 + sign byte padded way up */
+		unsigned char byt{0};
+		unsigned char *pnt{buf};
+		int more{1};
+		int len{0};
 		do {
-			byt = v | 0x80;
+			byt = static_cast<unsigned char>(v | 0x80);
 			v >>= 7;
 
 			if (((!v) && (!(byt & 0x40))) || ((v == -1) && (byt & 0x40))) {
@@ -314,11 +333,11 @@ struct StreamVectorWriteHelper {
 
 			*(pnt++) = byt;
 		} while (more);
-		len = pnt - buf;
+		len = static_cast<int>(pnt - buf);
 
-		const size_t cur = vec.size();
-		vec.resize(cur + len);
-		std::memcpy(vec.data() + cur, buf, len);
+		const size_t cur = m_vec.size();
+		m_vec.resize(cur + len);
+		std::memcpy(m_vec.data() + cur, buf, len);
 		return *this;
 	}
 
@@ -334,25 +353,25 @@ struct StreamVectorWriteHelper {
 
 	// Write the string, non-null-terminated
 	StreamVectorWriteHelper &writeString(const fst::string_view_pair str) {
-		if (str.second != 0) {
-			const size_t len = str.second;
-			const size_t cur = vec.size();
-			vec.resize(cur + len);
-			std::memcpy(vec.data() + cur, str.first, len);
+		if (str.m_size != 0) {
+			const size_t len = str.m_size;
+			const size_t cur = m_vec.size();
+			m_vec.resize(cur + len);
+			std::memcpy(m_vec.data() + cur, str.m_data, len);
 		}
 		return *this;
 	}
 
 	// Write the string, null-terminated
 	StreamVectorWriteHelper &writeString0(const fst::string_view_pair str) {
-		if (str.second != 0) {
-			const size_t len = str.second;
-			const size_t cur = vec.size();
-			vec.resize(cur + len + 1);
-			std::memcpy(vec.data() + cur, str.first, len);
-			vec[cur + len] = '\0';
+		if (str.m_size != 0) {
+			const size_t len = str.m_size;
+			const size_t cur = m_vec.size();
+			m_vec.resize(cur + len + 1);
+			std::memcpy(m_vec.data() + cur, str.m_data, len);
+			m_vec[cur + len] = '\0';
 		} else {
-			vec.push_back('\0');
+			m_vec.push_back('\0');
 		}
 		return *this;
 	}
diff --git a/include/fstcpp/fstcpp_variable_info.cpp b/include/fstcpp/fstcpp_variable_info.cpp
index d748c2e29..c085d8719 100644
--- a/include/fstcpp/fstcpp_variable_info.cpp
+++ b/include/fstcpp/fstcpp_variable_info.cpp
@@ -18,16 +18,21 @@ constexpr uint64_t VariableInfo::kCapacityBase;
 
 void VariableInfo::reallocate(uint64_t new_size) {
 	// Allocate new memory
-	const uint32_t new_capacity_log2 =
-		std::max(platform::clog2(new_size), kCapacityBaseShift) - kCapacityBaseShift;
-	uint8_t *new_data = new uint8_t[kCapacityBase << new_capacity_log2];
+	const uint32_t new_capacity_log2{
+		std::max(
+			static_cast<uint32_t>(platform::clog2(new_size)),
+			static_cast<uint32_t>(kCapacityBaseShift)
+		) -
+		static_cast<uint32_t>(kCapacityBaseShift)
+	};
+	uint8_t *new_data{new uint8_t[kCapacityBase << new_capacity_log2]};
 	// Copy old data to new memory
-	if (data != nullptr) {
-		const uint64_t old_size = size();
-		std::copy_n(data, old_size, new_data);
-		delete[] data;
+	if (m_data != nullptr) {
+		const uint64_t old_size{size()};
+		std::copy_n(m_data, old_size, new_data);
+		delete[] m_data;
 	}
-	data = new_data;
+	m_data = new_data;
 	capacity_log2(new_capacity_log2);
 }
 
diff --git a/include/fstcpp/fstcpp_variable_info.h b/include/fstcpp/fstcpp_variable_info.h
index a6d64fc8e..b91ab5103 100644
--- a/include/fstcpp/fstcpp_variable_info.h
+++ b/include/fstcpp/fstcpp_variable_info.h
@@ -7,6 +7,9 @@
 // direct include
 #include "fstcpp/fstcpp.h"
 // C system headers
+#ifdef _MSC_VER
+#	include <intrin.h>
+#endif
 // C++ standard library headers
 #include <algorithm>
 #include <cstdint>
@@ -23,7 +26,21 @@ namespace platform {
 
 // Can be replaced with std::bit_width when C++20 is available
 inline uint64_t clog2(uint64_t x) {
+#if defined(__GNUC__) || defined(__clang__)
 	return 64 - __builtin_clzll(x - 1);
+#elif defined(_MSC_VER)  // MSVC
+	if (x <= 1) return 0;
+	unsigned long index;
+	_BitScanReverse64(&index, x - 1);
+	return static_cast<uint64_t>(index + 1);
+#else
+	uint64_t r = 0;
+	while (x > 1) {
+		x >>= 1;
+		r++;
+	}
+	return r;
+#endif
 }
 
 inline constexpr uint32_t gen_mask_safe(unsigned width) {
@@ -44,6 +61,10 @@ inline void write_field(uint32_t &dst, const uint32_t src, unsigned width, unsig
 }  // namespace platform
 
 class VariableInfo final {
+public:
+	static constexpr uint32_t kMaxSupportedBitwidth = 0x7fffff;
+
+private:
 	static constexpr uint64_t kCapacityBaseShift = 5;
 	static constexpr uint64_t kCapacityBase = 1 << kCapacityBaseShift;
 
@@ -54,17 +75,19 @@ class VariableInfo final {
 
 	// begin of data members
 	// 1. 8B pointer (assume 64-bit architecture), its size can be:
-	//   - 0 if data is nullptr
-	//   - `kCapacityBase * pow(2, capacity_log2)` if data is not nullptr
+	//   - 0 if m_data is nullptr
+	//   - `kCapacityBase * pow(2, m_capacity_log2)` if m_data is not nullptr
 	//   - If we want more bits, we can use the `kCapacityBaseShift` LSB for other purposes.
-	uint8_t *data = nullptr;
+	uint8_t *m_data{nullptr};
 	// 2. 4B size. The same as vector.size(), but we only need 32b.
-	uint32_t size_ = 0;
+	uint32_t m_size{0};
 	// 3. 4B misc. Highly compacted information for max cache efficiency.
 	//    - 6b capacity_log2
 	//    - 2b last_encoding_type
 	//    - 23b bitwidth
 	//    - 1b is_real
+	uint32_t m_misc{0};
+	// end of data members
 
 	// Note: optimization possibility (not implemented)
 	//    - real is always 64-bit double, so we can use 24 bits to encode
@@ -82,42 +105,42 @@ class VariableInfo final {
 	static constexpr uint32_t kLastEncodingTypeOffset = kBitwidthOffset + kBitwidthWidth;
 	static constexpr uint32_t kCapacityLog2Offset =
 		kLastEncodingTypeOffset + kLastEncodingTypeWidth;
-	uint32_t misc = 0;
-	// end of data members
 
 	void capacity_log2(uint32_t capacity_log2_) {
-		platform::write_field(misc, capacity_log2_, kCapacityLog2Width, kCapacityLog2Offset);
+		platform::write_field(m_misc, capacity_log2_, kCapacityLog2Width, kCapacityLog2Offset);
 	}
 	uint32_t capacity() const {
-		if (data == nullptr) {
+		if (m_data == nullptr) {
 			return 0;
 		}
-		return kCapacityBase << platform::read_field(misc, kCapacityLog2Width, kCapacityLog2Offset);
+		return kCapacityBase << platform::read_field(
+				   m_misc, kCapacityLog2Width, kCapacityLog2Offset
+			   );
 	}
 
-	inline bool need_reallocate(uint64_t new_size) const { return capacity() < new_size; }
+	bool need_reallocate(uint64_t new_size) const { return capacity() < new_size; }
 	// This function is cold, so we don't inline it
 	void reallocate(uint64_t new_size);
 
-	inline void size(uint64_t s) { size_ = s; }
+	void size(uint64_t s) { m_size = static_cast<uint32_t>(s); }
 
 public:
-	static constexpr uint32_t kMaxSupportedBitwidth = 0x7fffff;
-	inline uint64_t size() const { return size_; }
-	inline uint32_t bitwidth() const {
-		return platform::read_field(misc, kBitwidthWidth, kBitwidthOffset);
+	uint64_t size() const { return m_size; }
+	uint32_t bitwidth() const {
+		return platform::read_field(m_misc, kBitwidthWidth, kBitwidthOffset);
 	}
-	inline bool is_real() const {
-		return bool(platform::read_field(misc, kIsRealWidth, kIsRealOffset));
-	}
-	inline void last_written_encode_type(EncodingType encoding_) {
+	bool is_real() const { return bool(platform::read_field(m_misc, kIsRealWidth, kIsRealOffset)); }
+	void last_written_encode_type(EncodingType encoding_) {
 		platform::write_field(
-			misc, static_cast<uint32_t>(encoding_), kLastEncodingTypeWidth, kLastEncodingTypeOffset
+			m_misc,
+			static_cast<uint32_t>(encoding_),
+			kLastEncodingTypeWidth,
+			kLastEncodingTypeOffset
 		);
 	}
-	inline EncodingType last_written_encode_type() const {
+	EncodingType last_written_encode_type() const {
 		return static_cast<EncodingType>(
-			platform::read_field(misc, kLastEncodingTypeWidth, kLastEncodingTypeOffset)
+			platform::read_field(m_misc, kLastEncodingTypeWidth, kLastEncodingTypeOffset)
 		);
 	}
 	uint64_t last_written_bytes() const;
@@ -135,11 +158,10 @@ public:
 		}
 	}
 	VariableInfo(VariableInfo &&rhs) {
-		data = rhs.data;
-		rhs.data = nullptr;
-		misc = rhs.misc;
-		size_ = rhs.size_;
-		// rhs.misc = 0;
+		m_data = rhs.m_data;
+		rhs.m_data = nullptr;
+		m_misc = rhs.m_misc;
+		m_size = rhs.m_size;
 	}
 
 	uint32_t emitValueChange(uint64_t current_time_index, const uint64_t val);
@@ -151,8 +173,8 @@ public:
 	);
 
 	void keepOnlyTheLatestValue() {
-		const auto last_written_bytes_ = last_written_bytes();
-		const auto data_ptr_ = data_ptr();
+		const uint64_t last_written_bytes_ = last_written_bytes();
+		uint8_t *data_ptr_ = data_ptr();
 		std::copy_n(data_ptr_ + size() - last_written_bytes_, last_written_bytes_, data_ptr_);
 		size(last_written_bytes_);
 	}
@@ -172,7 +194,7 @@ public:
 		size(new_size);
 	}
 	void add_size(size_t added_size) { resize(size() + added_size); }
-	uint8_t *data_ptr() { return data; }
+	uint8_t *data_ptr() { return m_data; }
 };
 static_assert(
 	sizeof(VariableInfo) != 12,
@@ -294,7 +316,6 @@ public:
 
 	void emitValueChange(uint64_t current_time_index, const uint64_t val) {
 		auto wh = emitValueChangeCommonPart(current_time_index, EncodingType::BINARY);
-		std::cout << current_time_index << ": " << std::hex << val << std::endl;
 		// Note, do not use write<double> here since the uint64_t is
 		// already bit_cast'ed from double
 		wh.write<uint64_t>(val);
@@ -358,14 +379,12 @@ public:
 	VariableInfoScalarInt(VariableInfo &info_) : info(info_) {}
 
 public:
-	inline size_t computeBytesNeeded(EncodingType encoding) const {
+	size_t computeBytesNeeded(EncodingType encoding) const {
 		return kEmitTimeIndexAndEncodingSize + sizeof(T) * bitPerEncodedBit(encoding);
 	}
 
 	// The returning address points to the first byte of the value
-	inline EmitWriterHelper emitValueChangeCommonPart(
-		uint64_t current_time_index, EncodingType encoding
-	) {
+	EmitWriterHelper emitValueChangeCommonPart(uint64_t current_time_index, EncodingType encoding) {
 		if (current_time_index + 1 == 0) {
 			// This is the first value change, we need to remove everything
 			// and then add the new value
@@ -548,16 +567,14 @@ public:
 	VariableInfoLongInt(VariableInfo &info_) : info(info_) {}
 
 public:
-	inline size_t computeBytesNeeded(EncodingType encoding) const {
+	size_t computeBytesNeeded(EncodingType encoding) const {
 		return (
 			kEmitTimeIndexAndEncodingSize +
 			num_words() * sizeof(uint64_t) * bitPerEncodedBit(encoding)
 		);
 	}
 
-	inline EmitWriterHelper emitValueChangeCommonPart(
-		uint64_t current_time_index, EncodingType encoding
-	) {
+	EmitWriterHelper emitValueChangeCommonPart(uint64_t current_time_index, EncodingType encoding) {
 		if (current_time_index + 1 == 0) {
 			info.resize(0);
 		}
@@ -727,9 +744,9 @@ public:
 
 template <typename Callable, typename... Args>
 auto VariableInfo::dispatchHelper(Callable &&callable, Args &&...args) const {
-	const auto bitwidth = this->bitwidth();
-	const auto is_real = this->is_real();
-	if (not is_real) {
+	const uint32_t bitwidth = this->bitwidth();
+	const bool is_real = this->is_real();
+	if (!is_real) {
 		// Decision: the branch miss is too expensive for large design, so we only use 3 types of
 		// int
 		if (bitwidth <= 8) {
@@ -737,12 +754,6 @@ auto VariableInfo::dispatchHelper(Callable &&callable, Args &&...args) const {
 				detail::VariableInfoScalarInt<uint8_t>(const_cast<VariableInfo &>(*this)),
 				std::forward<Args>(args)...
 			);
-			// } else if (bitwidth <= 16) {
-			// 	return
-			// callable(detail::VariableInfoScalarInt<uint16_t>(const_cast<VariableInfo&>(*this)),
-			// std::forward<Args>(args)...); } else if (bitwidth <= 32) { 	return
-			// callable(detail::VariableInfoScalarInt<uint32_t>(const_cast<VariableInfo&>(*this)),
-			// std::forward<Args>(args)...);
 		} else if (bitwidth <= 64) {
 			return callable(
 				detail::VariableInfoScalarInt<uint64_t>(const_cast<VariableInfo &>(*this)),
@@ -761,35 +772,35 @@ auto VariableInfo::dispatchHelper(Callable &&callable, Args &&...args) const {
 }
 
 inline VariableInfo::VariableInfo(uint32_t bitwidth_, bool is_real_) {
-	platform::write_field(misc, bitwidth_, kBitwidthWidth, kBitwidthOffset);
-	platform::write_field(misc, is_real_, kIsRealWidth, kIsRealOffset);
+	platform::write_field(m_misc, bitwidth_, kBitwidthWidth, kBitwidthOffset);
+	platform::write_field(m_misc, is_real_, kIsRealWidth, kIsRealOffset);
 	dispatchHelper([](auto obj) { obj.construct(); });
 	last_written_encode_type(EncodingType::BINARY);
 }
 
 inline uint32_t VariableInfo::emitValueChange(uint64_t current_time_index, const uint64_t val) {
-	const auto old_size = size();
+	const uint64_t old_size = size();
 	dispatchHelper([=](auto obj) { obj.emitValueChange(current_time_index, val); });
 	last_written_encode_type(EncodingType::BINARY);
-	return size() - old_size;
+	return static_cast<uint32_t>(size() - old_size);
 }
 
 inline uint32_t VariableInfo::emitValueChange(
 	uint64_t current_time_index, const uint32_t *val, EncodingType encoding
 ) {
-	const auto old_size = size();
+	const uint64_t old_size = size();
 	dispatchHelper([=](auto obj) { obj.emitValueChange(current_time_index, val, encoding); });
 	last_written_encode_type(encoding);
-	return size() - old_size;
+	return static_cast<uint32_t>(size() - old_size);
 }
 
 inline uint32_t VariableInfo::emitValueChange(
 	uint64_t current_time_index, const uint64_t *val, EncodingType encoding
 ) {
-	const auto old_size = size();
+	const uint64_t old_size = size();
 	dispatchHelper([=](auto obj) { obj.emitValueChange(current_time_index, val, encoding); });
 	last_written_encode_type(encoding);
-	return size() - old_size;
+	return static_cast<uint32_t>(size() - old_size);
 }
 
 inline void VariableInfo::dumpInitialBits(std::vector<uint8_t> &buf) const {
@@ -801,7 +812,7 @@ inline void VariableInfo::dumpValueChanges(std::vector<uint8_t> &buf) const {
 }
 
 inline uint64_t VariableInfo::last_written_bytes() const {
-	const auto encoding = last_written_encode_type();
+	const EncodingType encoding = last_written_encode_type();
 	return dispatchHelper([encoding](auto obj) { return obj.computeBytesNeeded(encoding); });
 }
 
diff --git a/include/fstcpp/fstcpp_writer.cpp b/include/fstcpp/fstcpp_writer.cpp
index 6df611869..b965e80b7 100644
--- a/include/fstcpp/fstcpp_writer.cpp
+++ b/include/fstcpp/fstcpp_writer.cpp
@@ -9,9 +9,7 @@
 // C++ standard library headers
 #include <cstdio>
 #include <cstring>
-#include <iostream>
 #include <numeric>
-#include <stdexcept>
 #include <string>
 #include <unordered_map>
 #include <utility>
@@ -25,8 +23,6 @@
 #include "fstcpp/fstcpp_stream_write_helper.h"
 #include "fstcpp/fstcpp_variable_info.h"
 
-using namespace std;
-
 // AT(x) is used to access vector at index x, and it will throw exception if out of bound
 // in debug mode, but in release mode, it will not throw exception
 // Usually you should only need AT(x) only at very hot code path.
@@ -41,54 +37,54 @@ namespace fst {
 namespace detail {
 
 void BlackoutData::emitDumpActive(uint64_t current_timestamp, bool enable) {
-	StreamVectorWriteHelper h(buffer);
-	h.writeUIntBE<uint8_t>(enable).writeLEB128(current_timestamp - previous_timestamp);
-	++count;
+	StreamVectorWriteHelper h(m_buffer);
+	h.writeUIntBE<uint8_t>(enable).writeLEB128(current_timestamp - m_previous_timestamp);
+	++m_count;
 }
 
 ValueChangeData::ValueChangeData() {
-	variable_infos.reserve(1024);
+	m_variable_infos.reserve(1024);
 }
 
 ValueChangeData::~ValueChangeData() = default;
 
 void ValueChangeData::keepOnlyTheLatestValue() {
-	for (auto &v : variable_infos) {
+	for (VariableInfo &v : m_variable_infos) {
 		v.keepOnlyTheLatestValue();
 	}
-	FST_CHECK(not timestamps.empty());
-	timestamps.front() = timestamps.back();
-	timestamps.resize(1);
+	FST_CHECK(!m_timestamps.empty());
+	m_timestamps.front() = m_timestamps.back();
+	m_timestamps.resize(1);
 }
 
 }  // namespace detail
 
 void Writer::open(const string_view_pair name) {
-	FST_CHECK(not main_fst_file_.is_open());
-	main_fst_file_.open(string(name.first, name.second), ios::binary);
+	FST_CHECK(!m_main_fst_file_.is_open());
+	m_main_fst_file_.open(std::string(name.m_data, name.m_size), std::ios::binary);
 	// reserve space for header, we will write it at Close(), append geometry and hierarchy at the
 	// end wave data will be flushed in between
-	main_fst_file_.seekp(kSharedBlockHeaderSize + HeaderInfo::total_size, ios_base::beg);
+	m_main_fst_file_.seekp(kSharedBlockHeaderSize + HeaderInfo::total_size, std::ios_base::beg);
 }
 
 void Writer::close() {
-	if (not main_fst_file_.is_open()) return;
+	if (!m_main_fst_file_.is_open()) return;
 	// Finalize header fields
-	if (header_.date[0] == '\0') {
+	if (m_header_.m_date[0] == '\0') {
 		// date is not set yet, set to the current date
 		setDate();
 	}
-	if (header_.start_time == kInvalidTime) {
-		header_.start_time = 0;
+	if (m_header_.m_start_time == kInvalidTime) {
+		m_header_.m_start_time = 0;
 	}
-	flushValueChangeData_(value_change_data_, main_fst_file_);
-	appendGeometry_(main_fst_file_);
-	appendHierarchy_(main_fst_file_);
-	appendBlackout_(main_fst_file_);
+	flushValueChangeData_(m_value_change_data_, m_main_fst_file_);
+	appendGeometry_(m_main_fst_file_);
+	appendHierarchy_(m_main_fst_file_);
+	appendBlackout_(m_main_fst_file_);
 	// Note: write header seek to 0, so we need to do
 	// this after all append operations
-	writeHeader_(header_, main_fst_file_);
-	main_fst_file_.close();
+	writeHeader_(m_header_, m_main_fst_file_);
+	m_main_fst_file_.close();
 }
 
 /////////////////////////////////////////
@@ -99,20 +95,20 @@ void Writer::setScope(
 	const string_view_pair scopename,
 	const string_view_pair scopecomp
 ) {
-	FST_CHECK(not hierarchy_finalized_);
-	StreamVectorWriteHelper h(hierarchy_buffer_);
+	FST_CHECK(!m_hierarchy_finalized_);
+	StreamVectorWriteHelper h(m_hierarchy_buffer_);
 	h  //
 		.writeU8Enum(Hierarchy::ScopeControlType::VCD_SCOPE)
 		.writeU8Enum(scopetype)
 		.writeString0(scopename)
 		.writeString0(scopecomp);
-	++header_.num_scopes;
+	++m_header_.m_num_scopes;
 }
 
 void Writer::upscope() {
-	FST_CHECK(not hierarchy_finalized_);
+	FST_CHECK(!m_hierarchy_finalized_);
 	// TODO: shall we inline it?
-	StreamVectorWriteHelper h(hierarchy_buffer_);
+	StreamVectorWriteHelper h(m_hierarchy_buffer_);
 	h.writeU8Enum(Hierarchy::ScopeControlType::VCD_UPSCOPE);
 }
 
@@ -123,13 +119,13 @@ Handle Writer::createVar(
 	const string_view_pair name,
 	Handle alias_handle
 ) {
-	FST_CHECK(not hierarchy_finalized_);
+	FST_CHECK(!m_hierarchy_finalized_);
 	FST_CHECK_LE(bitwidth, VariableInfo::kMaxSupportedBitwidth);
 	// write hierarchy entry: type, direction, name, length, alias
-	StreamVectorWriteHelper h(hierarchy_buffer_);
+	StreamVectorWriteHelper h(m_hierarchy_buffer_);
 
 	// determine real/string handling like original C implementation
-	bool is_real = false;
+	bool is_real{false};
 	switch (vartype) {
 	case Hierarchy::VarType::VCD_REAL:
 	case Hierarchy::VarType::VCD_REAL_PARAMETER:
@@ -144,17 +140,17 @@ Handle Writer::createVar(
 	default:
 		break;
 	}
-	if (alias_handle > header_.num_handles) {
+	if (alias_handle > m_header_.m_num_handles) {
 		// sanitize
 		alias_handle = 0;
 	}
-	const bool is_alias = alias_handle != 0;
-	// This counter is incremented whether alias or non-alias
-	++header_.num_vars;
-	if (not is_alias) {
+	const bool is_alias{alias_handle != 0};
+	// This counter is incremented whether alias || non-alias
+	++m_header_.m_num_vars;
+	if (!is_alias) {
 		// This counter is incremented only for non-alias variables
-		++header_.num_handles;
-		alias_handle = header_.num_handles;
+		++m_header_.m_num_handles;
+		alias_handle = static_cast<uint32_t>(m_header_.m_num_handles);
 	}
 
 	h  //
@@ -165,40 +161,41 @@ Handle Writer::createVar(
 		.writeLEB128(is_alias ? alias_handle : 0);
 
 	// If alias_handle == 0, we must allocate geom/valpos/curval entries and create a new handle
-	if (not is_alias) {
-		StreamVectorWriteHelper g(geometry_buffer_);
+	if (!is_alias) {
+		StreamVectorWriteHelper g(m_geometry_buffer_);
 		// I don't know why the original C implementation encode bitwidth again
-		const uint32_t geom_len = (bitwidth == 0 ? uint32_t(-1) : is_real ? uint32_t(0) : bitwidth);
+		const uint32_t geom_len{(bitwidth == 0 ? uint32_t(-1) : is_real ? uint32_t(0) : bitwidth)};
 		g.writeLEB128(geom_len);
-		value_change_data_.variable_infos.emplace_back(bitwidth, is_real);
+		m_value_change_data_.m_variable_infos.emplace_back(bitwidth, is_real);
 	}
 
 	return alias_handle;
 }
 
+// TODO
 // LCOV_EXCL_START
-Handle Writer::createVar2(
-	Hierarchy::VarType vartype,
-	Hierarchy::VarDirection vardir,
-	uint32_t bitwidth,
-	const string_view_pair name,
-	Handle alias_handle,
-	const string_view_pair type,
-	Hierarchy::SupplementalVarType svt,
-	Hierarchy::SupplementalDataType sdt
-) {
-	FST_CHECK(not hierarchy_finalized_);
-	(void)vartype;
-	(void)vardir;
-	(void)bitwidth;
-	(void)name;
-	(void)alias_handle;
-	(void)type;
-	(void)svt;
-	(void)sdt;
-	throw runtime_error("TODO");
-	return 0;
-}
+// Handle Writer::createVar2(
+// 	Hierarchy::VarType vartype,
+// 	Hierarchy::VarDirection vardir,
+// 	uint32_t bitwidth,
+// 	const string_view_pair name,
+// 	Handle alias_handle,
+// 	const string_view_pair type,
+// 	Hierarchy::SupplementalVarType svt,
+// 	Hierarchy::SupplementalDataType sdt
+// ) {
+// 	FST_CHECK(!m_hierarchy_finalized_);
+// 	(void)vartype;
+// 	(void)vardir;
+// 	(void)bitwidth;
+// 	(void)name;
+// 	(void)alias_handle;
+// 	(void)type;
+// 	(void)svt;
+// 	(void)sdt;
+// 	throw std::runtime_error("TODO");
+// 	return 0;
+// }
 // LCOV_EXCL_STOP
 
 /////////////////////////////////////////
@@ -207,44 +204,42 @@ Handle Writer::createVar2(
 void Writer::emitTimeChange(uint64_t tim) {
 	finalizeHierarchy_();
 
-	if (value_change_data_usage_ > value_change_data_flush_threshold_ or flush_pending_) {
-		flushValueChangeData_(value_change_data_, main_fst_file_);
+	if (m_value_change_data_usage_ > m_value_change_data_flush_threshold_ || m_flush_pending_) {
+		flushValueChangeData_(m_value_change_data_, m_main_fst_file_);
 	}
 
 	// Update header
-	header_.start_time = min(header_.start_time, tim);
-	header_.end_time = tim;
+	m_header_.m_start_time = std::min(m_header_.m_start_time, tim);
+	m_header_.m_end_time = tim;
 
-	if (value_change_data_.timestamps.empty() or value_change_data_.timestamps.back() != tim) {
-		value_change_data_.timestamps.push_back(tim);
+	if (m_value_change_data_.m_timestamps.empty() ||
+		m_value_change_data_.m_timestamps.back() != tim) {
+		m_value_change_data_.m_timestamps.push_back(tim);
 	}
 }
 
-void Writer::emitDumpActive(bool enable) {
-	// TODO: this API is not fully understood, need to check
-	FST_CHECK(not value_change_data_.timestamps.empty());
-	blackout_data_.emitDumpActive(value_change_data_.timestamps.back(), enable);
-}
-
-template <typename T, typename... U>
-uint64_t emitValueHelperStaticDispatch_(
-	VariableInfo *var_info, const uint64_t time_index, U &&...val
-) {
-	return static_cast<T *>(var_info)->emitValueChange(time_index, std::forward<U>(val)...);
-}
+// TODO
+// void Writer::emitDumpActive(bool enable) {
+// 	// TODO: this API is not fully understood, need to check
+// 	FST_CHECK(!m_value_change_data_.m_timestamps.empty());
+// 	m_blackout_data_.emitDumpActive(m_value_change_data_.m_timestamps.back(), enable);
+// }
 
 template <typename... T>
 void Writer::emitValueChangeHelper_(Handle handle, T &&...val) {
 	// Let data prefetch go first
-	auto &var_info = value_change_data_.variable_infos AT(handle - 1);
+	VariableInfo &var_info = m_value_change_data_.m_variable_infos AT(handle - 1);
+#if defined(__GNUC__) || defined(__clang__)
 	__builtin_prefetch(var_info.data_ptr() + var_info.size() - 1, 1, 0);
+#endif
 
 	finalizeHierarchy_();
 
 	// Original implementation: virtual, but vtable is too costly, we switch to if-else static
 	// dispatch
-	value_change_data_usage_ +=
-		var_info.emitValueChange(value_change_data_.timestamps.size() - 1, std::forward<T>(val)...);
+	m_value_change_data_usage_ += var_info.emitValueChange(
+		m_value_change_data_.m_timestamps.size() - 1, std::forward<T>(val)...
+	);
 }
 
 void Writer::emitValueChange(Handle handle, const uint32_t *val, EncodingType encoding) {
@@ -261,7 +256,7 @@ void Writer::emitValueChange(Handle handle, uint64_t val) {
 
 void Writer::emitValueChange(Handle handle, const char *val) {
 	finalizeHierarchy_();
-	auto &var_info = value_change_data_.variable_infos AT(handle - 1);
+	VariableInfo &var_info = m_value_change_data_.m_variable_infos AT(handle - 1);
 
 	// For double handles, const char* is interpreted as a double* (8B)
 	// This double shall be written out as raw IEEE 754 double
@@ -272,65 +267,67 @@ void Writer::emitValueChange(Handle handle, const char *val) {
 	}
 
 	// For normal integer handles, const char* is "01xz..." (1B per bit)
-	const uint32_t bitwidth = var_info.bitwidth();
+	const uint32_t bitwidth{var_info.bitwidth()};
 	FST_DCHECK_NE(bitwidth, 0);
 
 	val += bitwidth;
-	thread_local static vector<uint64_t> packed_value_buffer;
-	const unsigned num_words = (bitwidth + 63) / 64;
-	packed_value_buffer.assign(num_words, 0);
+	thread_local static std::vector<uint64_t> t_packed_value_buffer;
+	const unsigned num_words{(bitwidth + 63) / 64};
+	t_packed_value_buffer.assign(num_words, 0);
 	for (unsigned i = 0; i < num_words; ++i) {
-		const char *start = val - std::min((i + 1) * 64, bitwidth);
-		const char *end = val - 64 * i;
-		packed_value_buffer[i] = 0;
+		const char *start{val - std::min((i + 1) * 64, bitwidth)};
+		const char *end{val - 64 * i};
+		t_packed_value_buffer[i] = 0;
 		for (const char *p = start; p < end; ++p) {
 			// No checking for invalid characters, follow original C implementation
-			packed_value_buffer[i] <<= 1;
-			packed_value_buffer[i] |= (*p - '0');
+			t_packed_value_buffer[i] <<= 1;
+			t_packed_value_buffer[i] |= static_cast<uint64_t>(*p - '0');
 		}
 	}
 
 	if (bitwidth <= 64) {
-		emitValueChange(handle, packed_value_buffer.front());
+		emitValueChange(handle, t_packed_value_buffer.front());
 	} else {
-		emitValueChange(handle, packed_value_buffer.data(), EncodingType::BINARY);
+		emitValueChange(handle, t_packed_value_buffer.data(), EncodingType::BINARY);
 	}
 }
 
 /////////////////////////////////////////
 // File flushing functions
 /////////////////////////////////////////
-void Writer::writeHeader_(const Header &header, ostream &os) {
+void Writer::writeHeader_(const Header &header, std::ostream &os) {
 	StreamWriteHelper h(os);
-	static char kDefaultWriterName[sizeof(header.writer)] = "fstcppWriter";
-	const char *writer_name = header.writer[0] == '\0' ? kDefaultWriterName : header.writer;
+	static char kDefaultWriterName[sizeof(header.m_writer)] = "fstcppWriter";
+	const char *writer_name = header.m_writer[0] == '\0' ? kDefaultWriterName : header.m_writer;
 
 	// Actual write
 	h  //
-		.seek(streamoff(0), ios_base::beg)
+		.seek(std::streamoff(0), std::ios_base::beg)
 		.writeBlockHeader(BlockType::HEADER, HeaderInfo::total_size)
-		.writeUInt(header.start_time)
-		.writeUInt(header.end_time)
+		.writeUInt(header.m_start_time)
+		.writeUInt(header.m_end_time)
 		.writeFloat(HeaderInfo::kEndianessMagicIdentifier)
-		.writeUInt(header.writer_memory_use)
-		.writeUInt(header.num_scopes)
-		.writeUInt(header.num_vars)
-		.writeUInt(header.num_handles)
-		.writeUInt(header.num_value_change_data_blocks)
-		.writeUInt(header.timescale)
-		.write(writer_name, sizeof(header.writer))
-		.write(header.date, sizeof(header.date))
+		.writeUInt(header.m_writer_memory_use)
+		.writeUInt(header.m_num_scopes)
+		.writeUInt(header.m_num_vars)
+		.writeUInt(header.m_num_handles)
+		.writeUInt(header.m_num_value_change_data_blocks)
+		.writeUInt(header.m_timescale)
+		.write(writer_name, sizeof(header.m_writer))
+		.write(header.m_date, sizeof(header.m_date))
 		.fill('\0', HeaderInfo::Size::reserved)
-		.writeUInt(static_cast<uint8_t>(header.filetype))
-		.writeUInt(header.timezero);
+		.writeUInt(static_cast<uint8_t>(header.m_filetype))
+		.writeUInt(header.m_timezero);
 
 	FST_DCHECK_EQ(os.tellp(), HeaderInfo::total_size + kSharedBlockHeaderSize);
-};
+}
 
 namespace {  // compression helpers
 
 // These API pass compressed_data to avoid frequent reallocations
-void compressUsingLz4(const vector<uint8_t> &uncompressed_data, vector<uint8_t> &compressed_data) {
+void compressUsingLz4(
+	const std::vector<uint8_t> &uncompressed_data, std::vector<uint8_t> &compressed_data
+) {
 	const int uncompressed_size = uncompressed_data.size();
 	const int compressed_bound = LZ4_compressBound(uncompressed_size);
 	compressed_data.resize(compressed_bound);
@@ -344,7 +341,7 @@ void compressUsingLz4(const vector<uint8_t> &uncompressed_data, vector<uint8_t>
 }
 
 void compressUsingZlib(
-	const vector<uint8_t> &uncompressed_data, vector<uint8_t> &compressed_data, int level
+	const std::vector<uint8_t> &uncompressed_data, std::vector<uint8_t> &compressed_data, int level
 ) {
 	// compress using zlib
 	const uLong uncompressed_size = uncompressed_data.size();
@@ -358,17 +355,17 @@ void compressUsingZlib(
 		level
 	);
 	if (z_status != Z_OK) {
-		throw runtime_error(
-			"Failed to compress data with zlib, error code: " + to_string(z_status)
+		throw std::runtime_error(
+			"Failed to compress data with zlib, error code: " + std::to_string(z_status)
 		);
 	}
 	compressed_data.resize(compressed_bound);
 }
 
-pair<const uint8_t *, size_t> selectSmaller(
-	const vector<uint8_t> &compressed_data, const vector<uint8_t> &uncompressed_data
+std::pair<const uint8_t *, size_t> selectSmaller(
+	const std::vector<uint8_t> &compressed_data, const std::vector<uint8_t> &uncompressed_data
 ) {
-	pair<const uint8_t *, size_t> ret;
+	std::pair<const uint8_t *, size_t> ret;
 	if (compressed_data.size() < uncompressed_data.size()) {
 		ret.first = compressed_data.data();
 		ret.second = compressed_data.size();
@@ -383,103 +380,109 @@ pair<const uint8_t *, size_t> selectSmaller(
 
 // AppendHierarchy_ and AppendGeometry_ shares a very similar structure
 // But they are slightly different in the original C implementation...
-void Writer::appendGeometry_(ostream &os) {
-	if (geometry_buffer_.empty()) {
+void Writer::appendGeometry_(std::ostream &os) {
+	if (m_geometry_buffer_.empty()) {
 		// skip the geometry block if there is no data
 		return;
 	}
-	vector<uint8_t> geometry_buffer_compressed_;
-	compressUsingZlib(geometry_buffer_, geometry_buffer_compressed_, 9);
+	std::vector<uint8_t> geometry_buffer_compressed_{};
+	compressUsingZlib(m_geometry_buffer_, geometry_buffer_compressed_, 9);
 	// TODO: Replace with structured binding in C++17
-	const auto selected_pair = selectSmaller(geometry_buffer_compressed_, geometry_buffer_);
-	const auto selected_data = selected_pair.first;
-	const auto selected_size = selected_pair.second;
+	const std::pair<const uint8_t *, size_t> selected_pair =
+		selectSmaller(geometry_buffer_compressed_, m_geometry_buffer_);
+	const uint8_t *selected_data = selected_pair.first;
+	const size_t selected_size = selected_pair.second;
 
 	StreamWriteHelper h(os);
 	h  //
-		.seek(0, ios_base::end)
+		.seek(0, std::ios_base::end)
 		// 16 is for the uncompressed_size and header_.num_handles
 		.writeBlockHeader(BlockType::GEOMETRY, selected_size + 16)
-		.writeUInt<uint64_t>(geometry_buffer_.size())
+		.writeUInt<uint64_t>(m_geometry_buffer_.size())
 		// I don't know why the original C implementation write num_handles again here
 		// but we have to follow it
-		.writeUInt(header_.num_handles)
+		.writeUInt(m_header_.m_num_handles)
 		.write(selected_data, selected_size);
 }
 
-void Writer::appendHierarchy_(ostream &os) {
-	if (hierarchy_buffer_.empty()) {
+void Writer::appendHierarchy_(std::ostream &os) {
+	if (m_hierarchy_buffer_.empty()) {
 		// skip the hierarchy block if there is no data
 		return;
 	}
 
 	// compress hierarchy_buffer_ using LZ4.
-	const int compressed_bound = LZ4_compressBound(hierarchy_buffer_.size());
-	vector<uint8_t> hierarchy_buffer_compressed_(compressed_bound);
-	const int compressed_size = LZ4_compress_default(
-		reinterpret_cast<const char *>(hierarchy_buffer_.data()),
+	const int compressed_bound{LZ4_compressBound(m_hierarchy_buffer_.size())};
+	std::vector<uint8_t> hierarchy_buffer_compressed_(compressed_bound);
+	const int compressed_size{LZ4_compress_default(
+		reinterpret_cast<const char *>(m_hierarchy_buffer_.data()),
 		reinterpret_cast<char *>(hierarchy_buffer_compressed_.data()),
-		hierarchy_buffer_.size(),
+		m_hierarchy_buffer_.size(),
 		compressed_bound
-	);
+	)};
 
 	StreamWriteHelper h(os);
 	h  //
-		.seek(0, ios_base::end)
+		.seek(0, std::ios_base::end)
 		// +16 is for the uncompressed_size
 		.writeBlockHeader(BlockType::HIERARCHY_LZ4_COMPRESSED, compressed_size + 8)
-		.writeUInt<uint64_t>(hierarchy_buffer_.size())
+		.writeUInt<uint64_t>(m_hierarchy_buffer_.size())
 		.write(hierarchy_buffer_compressed_.data(), compressed_size);
 }
 
-void Writer::appendBlackout_(ostream &os) {
-	if (blackout_data_.count == 0) {
+void Writer::appendBlackout_(std::ostream &os) {
+	if (m_blackout_data_.m_count == 0) {
 		// skip the blackout block if there is no data
 		return;
 	}
-	const vector<uint8_t> &blackout_data = blackout_data_.buffer;
-	const auto begin_of_blackout_block = os.tellp();
+	const std::vector<uint8_t> &blackout_data = m_blackout_data_.m_buffer;
+	const std::streampos begin_of_blackout_block = os.tellp();
 	StreamWriteHelper h(os);
 	h  //
 	   // skip the block header
-		.seek(kSharedBlockHeaderSize, ios_base::cur)
+		.seek(kSharedBlockHeaderSize, std::ios_base::cur)
 		// Note: we cannot know the size beforehand since this length is LEB128 encoded
 		.writeLEB128(blackout_data.size())
 		.write(blackout_data.data(), blackout_data.size());
 
-	const auto size_of_blackout_block = os.tellp() - begin_of_blackout_block;
+	const std::streamoff size_of_blackout_block = os.tellp() - begin_of_blackout_block;
 	h  //
 	   // go back to the beginning of the block
-		.seek(begin_of_blackout_block, ios_base::beg)
+		.seek(begin_of_blackout_block, std::ios_base::beg)
 		// and write the block header
-		.writeBlockHeader(BlockType::BLACKOUT, size_of_blackout_block - kSharedBlockHeaderSize);
+		.writeBlockHeader(
+			BlockType::BLACKOUT,
+			static_cast<uint64_t>(size_of_blackout_block - kSharedBlockHeaderSize)
+		);
 }
 
-void detail::ValueChangeData::writeInitialBits(vector<uint8_t> &os) const {
+void detail::ValueChangeData::writeInitialBits(std::vector<uint8_t> &os) const {
 	// Build vc_bits_data by concatenating each variable's initial bits as documented.
 	// We will not compress for now; just generate the raw bytes and print summary to stdout.
-	for (size_t i = 0; i < variable_infos.size(); ++i) {
-		auto &vref = variable_infos[i];
+	for (size_t i{0}; i < m_variable_infos.size(); ++i) {
+		const VariableInfo &vref = m_variable_infos[i];
 		vref.dumpInitialBits(os);
 	}
 }
 
-vector<vector<uint8_t>> detail::ValueChangeData::computeWaveData() const {
-	const size_t N = variable_infos.size();
-	vector<vector<uint8_t>> data(N);
-	for (size_t i = 0; i < N; ++i) {
-		variable_infos[i].dumpValueChanges(data[i]);
+std::vector<std::vector<uint8_t>> detail::ValueChangeData::computeWaveData() const {
+	const size_t N{m_variable_infos.size()};
+	std::vector<std::vector<uint8_t>> data(N);
+	for (size_t i{0}; i < N; ++i) {
+		m_variable_infos[i].dumpValueChanges(data[i]);
 	}
 	return data;
 }
 
-vector<int64_t> detail::ValueChangeData::uniquifyWaveData(vector<vector<uint8_t>> &data) {
+std::vector<int64_t> detail::ValueChangeData::uniquifyWaveData(
+	std::vector<std::vector<uint8_t>> &data
+) {
 	// After this function, positions[i] is:
 	//  - = 0: If data[i] is unique (first occurrence)
 	//  - < 0: If data[i] is a duplicate, encoded as -(original_index + 1)
-	vector<int64_t> positions(data.size(), 0);
+	std::vector<int64_t> positions(data.size(), 0);
 	struct MyHash {
-		size_t operator()(const vector<uint8_t> *vec) const {
+		size_t operator()(const std::vector<uint8_t> *vec) const {
 			size_t seed = 0;
 			for (auto v : *vec) {
 				seed ^= v + 0x9e3779b9 + (seed << 6) + (seed >> 2);
@@ -488,11 +491,11 @@ vector<int64_t> detail::ValueChangeData::uniquifyWaveData(vector<vector<uint8_t>
 		}
 	};
 	struct MyEqual {
-		bool operator()(const vector<uint8_t> *a, const vector<uint8_t> *b) const {
+		bool operator()(const std::vector<uint8_t> *a, const std::vector<uint8_t> *b) const {
 			return *a == *b;
 		}
 	};
-	unordered_map<const vector<uint8_t> *, int64_t, MyHash, MyEqual> data_map;
+	std::unordered_map<const std::vector<uint8_t> *, int64_t, MyHash, MyEqual> data_map;
 	for (size_t i = 0; i < data.size(); ++i) {
 		if (data[i].empty()) {
 			continue;
@@ -500,9 +503,9 @@ vector<int64_t> detail::ValueChangeData::uniquifyWaveData(vector<vector<uint8_t>
 		// insert vec->i to data_map if not exists
 		auto p = data_map.emplace(&data[i], static_cast<int64_t>(i));
 		auto it = p.first;
-		auto inserted = p.second;
+		const bool inserted{p.second};
 
-		if (not inserted) {
+		if (!inserted) {
 			// duplicated wave data found
 			positions[i] = -(it->second + 1);
 			// clear data to save memory
@@ -513,9 +516,9 @@ vector<int64_t> detail::ValueChangeData::uniquifyWaveData(vector<vector<uint8_t>
 }
 
 uint64_t detail::ValueChangeData::encodePositionsAndwriteUniqueWaveData(
-	ostream &os,
-	const vector<vector<uint8_t>> &data,
-	vector<int64_t> &positions,
+	std::ostream &os,
+	const std::vector<std::vector<uint8_t>> &data,
+	std::vector<int64_t> &positions,
 	WriterPackType pack_type
 ) {
 	// After this function, positions[i] is:
@@ -527,7 +530,7 @@ uint64_t detail::ValueChangeData::encodePositionsAndwriteUniqueWaveData(
 	StreamWriteHelper h(os);
 	int64_t previous_size = 1;
 	uint64_t written_count = 0;
-	vector<uint8_t> compressed_data;
+	std::vector<uint8_t> compressed_data;
 	for (size_t i = 0; i < positions.size(); ++i) {
 		if (positions[i] < 0) {
 			// duplicate (negative index), do nothing
@@ -537,12 +540,13 @@ uint64_t detail::ValueChangeData::encodePositionsAndwriteUniqueWaveData(
 			// try to compress
 			const uint8_t *selected_data;
 			size_t selected_size;
-			if (pack_type == WriterPackType::NO_COMPRESSION or data[i].size() <= 32) {
+			if (pack_type == WriterPackType::NO_COMPRESSION || data[i].size() <= 32) {
 				selected_data = data[i].data();
 				selected_size = data[i].size();
 			} else {
 				compressUsingLz4(data[i], compressed_data);
-				const auto selected_pair = selectSmaller(compressed_data, data[i]);
+				const std::pair<const uint8_t *, size_t> selected_pair =
+					selectSmaller(compressed_data, data[i]);
 				selected_data = selected_pair.first;
 				selected_size = selected_pair.second;
 			}
@@ -550,7 +554,7 @@ uint64_t detail::ValueChangeData::encodePositionsAndwriteUniqueWaveData(
 
 			// non-empty unique data, write it
 			written_count++;
-			streamoff bytes_written;
+			std::streamoff bytes_written;
 			h  //
 				.beginOffset(bytes_written)
 				// FST spec: 0 means no compression, >0 for the size of the original data
@@ -565,7 +569,7 @@ uint64_t detail::ValueChangeData::encodePositionsAndwriteUniqueWaveData(
 }
 
 void detail::ValueChangeData::writeEncodedPositions(
-	const vector<int64_t> &encoded_positions, ostream &os
+	const std::vector<int64_t> &encoded_positions, std::ostream &os
 ) {
 	// Encode positions with the specified run/varint rules into a varint buffer.
 	StreamWriteHelper h(os);
@@ -613,20 +617,20 @@ void detail::ValueChangeData::writeEncodedPositions(
 	}
 }
 
-void detail::ValueChangeData::writeTimestamps(vector<uint8_t> &os) const {
+void detail::ValueChangeData::writeTimestamps(std::vector<uint8_t> &os) const {
 	// Build LEB128-encoded delta stream (first delta is timestamp[0] - 0)
 	StreamVectorWriteHelper h(os);
-	uint64_t prev = 0;
-	for (size_t i = 0; i < timestamps.size(); ++i) {
-		const uint64_t cur = timestamps[i];
-		const uint64_t delta = cur - prev;
+	uint64_t prev{0};
+	for (size_t i{0}; i < m_timestamps.size(); ++i) {
+		const uint64_t cur{m_timestamps[i]};
+		const uint64_t delta{cur - prev};
 		h.writeLEB128(delta);
 		prev = cur;
 	}
 }
 
 void Writer::flushValueChangeDataConstPart_(
-	const detail::ValueChangeData &vcd, ostream &os, WriterPackType pack_type
+	const detail::ValueChangeData &vcd, std::ostream &os, WriterPackType pack_type
 ) {
 	// 0. setup
 	StreamWriteHelper h(os);
@@ -635,80 +639,83 @@ void Writer::flushValueChangeDataConstPart_(
 	// FST_BL_VCDATA_DYN_ALIAS2 (8) maps to WaveDataVersion3 in fst_file.h
 	// The positions we cannot fill in yet
 	const auto p_tmp1 = [&]() {
-		streamoff start_pos, memory_usage_pos;
+		std::streamoff start_pos, memory_usage_pos;
 		h                            //
 			.beginOffset(start_pos)  // record start position
 			.writeBlockHeader(BlockType::WAVE_DATA_VERSION3, 0 /* Length placeholder 0 */)
-			.writeUInt(vcd.timestamps.front())
-			.writeUInt(vcd.timestamps.back())
+			.writeUInt(vcd.m_timestamps.front())
+			.writeUInt(vcd.m_timestamps.back())
 			.beginOffset(memory_usage_pos)  // record memory usage position
 			.writeUInt<uint64_t>(0);        // placeholder for memory usage
-		return make_pair(start_pos, memory_usage_pos);
+		return std::make_pair(start_pos, memory_usage_pos);
 	}();
-	const auto start_pos = p_tmp1.first;
-	const auto memory_usage_pos = p_tmp1.second;
+	const std::streamoff start_pos{p_tmp1.first};
+	const std::streamoff memory_usage_pos{p_tmp1.second};
 
 	// 2. Bits Section
 	{
-		vector<uint8_t> bits_data;
+		std::vector<uint8_t> bits_data;
 		vcd.writeInitialBits(bits_data);
-		vector<uint8_t> bits_data_compressed;
+		std::vector<uint8_t> bits_data_compressed;
 		const uint8_t *selected_data;
 		size_t selected_size;
-		if (pack_type == WriterPackType::NO_COMPRESSION or bits_data.size() < 32) {
+		if (pack_type == WriterPackType::NO_COMPRESSION || bits_data.size() < 32) {
 			selected_data = bits_data.data();
 			selected_size = bits_data.size();
 		} else {
 			compressUsingZlib(bits_data, bits_data_compressed, 4);
-			const auto selected_pair = selectSmaller(bits_data_compressed, bits_data);
+			const std::pair<const uint8_t *, size_t> selected_pair =
+				selectSmaller(bits_data_compressed, bits_data);
 			selected_data = selected_pair.first;
 			selected_size = selected_pair.second;
 		}
 
-		h                                            //
-			.writeLEB128(bits_data.size())           // uncompressed length
-			.writeLEB128(selected_size)              // compressed length
-			.writeLEB128(vcd.variable_infos.size())  // bits count
+		h                                              //
+			.writeLEB128(bits_data.size())             // uncompressed length
+			.writeLEB128(selected_size)                // compressed length
+			.writeLEB128(vcd.m_variable_infos.size())  // bits count
 			.write(selected_data, selected_size);
 	}
 
 	// 3. Waves Section
 	// Note: We need positions for the next section
 	const auto p_tmp2 = [&, pack_type]() {
-		auto wave_data = vcd.computeWaveData();
-		const size_t memory_usage =
-			accumulate(wave_data.begin(), wave_data.end(), size_t(0), [](size_t a, const auto &b) {
-				return a + b.size();
-			});
-		auto positions = vcd.uniquifyWaveData(wave_data);
+		std::vector<std::vector<uint8_t>> wave_data{vcd.computeWaveData()};
+		const size_t memory_usage{std::accumulate(
+			wave_data.begin(),
+			wave_data.end(),
+			size_t(0),
+			[](size_t a, const std::vector<uint8_t> &b) { return a + b.size(); }
+		)};
+		std::vector<int64_t> positions{vcd.uniquifyWaveData(wave_data)};
 		h
 			// Note: this is not a typo, I expect we shall write count here.
 			// but the spec indeed write vcd.variable_infos.size(),
 			// which is repeated 1 times in header block, 2 times in valuechange block
-			.writeLEB128(vcd.variable_infos.size())
+			.writeLEB128(vcd.m_variable_infos.size())
 			.writeUInt(uint8_t('4'));
-		const uint64_t count = detail::ValueChangeData::encodePositionsAndwriteUniqueWaveData(
+		const uint64_t count{detail::ValueChangeData::encodePositionsAndwriteUniqueWaveData(
 			os, wave_data, positions, pack_type
-		);
+		)};
 		(void)count;
-		return make_pair(positions, memory_usage);
+		return std::make_pair(positions, memory_usage);
 	}();
-	const auto positions = p_tmp2.first;
-	const auto memory_usage = p_tmp2.second;
+	const std::vector<int64_t> positions{p_tmp2.first};
+	const size_t memory_usage{p_tmp2.second};
 
 	// 4. Position Section
 	{
-		const auto pos_begin = os.tellp();
+		const std::streampos pos_begin{os.tellp()};
 		vcd.writeEncodedPositions(positions, os);
-		const uint64_t pos_size = os.tellp() - pos_begin;
+		const uint64_t pos_size{static_cast<uint64_t>(os.tellp() - pos_begin)};
 		h.writeUInt(pos_size);  // Length comes AFTER data for positions
 	}
 
 	// 5. Time Section
 	{
-		vector<uint8_t> time_data;
+		std::vector<uint8_t> time_data;
 		vcd.writeTimestamps(time_data);
-		vector<uint8_t> time_data_compressed;
+		std::vector<uint8_t> time_data_compressed;
 		const uint8_t *selected_data;
 		size_t selected_size;
 		if (pack_type == WriterPackType::NO_COMPRESSION) {
@@ -716,36 +723,37 @@ void Writer::flushValueChangeDataConstPart_(
 			selected_size = time_data.size();
 		} else {
 			compressUsingZlib(time_data, time_data_compressed, 9);
-			const auto selected_pair = selectSmaller(time_data_compressed, time_data);
+			const std::pair<const uint8_t *, size_t> selected_pair =
+				selectSmaller(time_data_compressed, time_data);
 			selected_data = selected_pair.first;
 			selected_size = selected_pair.second;
 		}
-		h                                                 //
-			.write(selected_data, selected_size)          // time data
-			.writeUInt(time_data.size())                  // uncompressed len
-			.writeUInt(selected_size)                     // compressed len
-			.writeUInt(uint64_t(vcd.timestamps.size()));  // count
+		h                                                   //
+			.write(selected_data, selected_size)            // time data
+			.writeUInt(time_data.size())                    // uncompressed len
+			.writeUInt(selected_size)                       // compressed len
+			.writeUInt(uint64_t(vcd.m_timestamps.size()));  // count
 	}
 
 	// 6. Patch Block Length and Memory Required
-	streamoff end_pos;
+	std::streamoff end_pos{0};
 	h  //
 		.beginOffset(end_pos)
 		// Patch Block Length (after 1 byte Type)
-		.seek(start_pos + streamoff(1), ios_base::beg)
-		.writeUInt<uint64_t>(end_pos - start_pos - 1)
+		.seek(start_pos + std::streamoff(1), std::ios_base::beg)
+		.writeUInt<uint64_t>(static_cast<uint64_t>(end_pos - start_pos - 1))
 		// Patch Memory Required
-		.seek(memory_usage_pos, ios_base::beg)
-		.writeUInt<uint64_t>(memory_usage)
+		.seek(memory_usage_pos, std::ios_base::beg)
+		.writeUInt<uint64_t>(static_cast<uint64_t>(memory_usage))
 		// Restore position to end
-		.seek(end_pos, ios_base::beg);
+		.seek(end_pos, std::ios_base::beg);
 }
 
 namespace {  // Helper functions for createEnumTable
 
-void appendEscToString(const string_view_pair in, string &out) {
-	for (size_t i = 0; i < in.second; ++i) {
-		const char c = in.first[i];
+void appendEscToString(const string_view_pair in, std::string &out) {
+	for (size_t i{0}; i < in.m_size; ++i) {
+		const char c{in.m_data[i]};
 		switch (c) {
 			// clang-format off
 		case '\a': { out += "\\a"; break; }
@@ -786,9 +794,9 @@ void Writer::setAttrBegin(
 	const string_view_pair attrname,
 	uint64_t arg
 ) {
-	FST_CHECK(not hierarchy_finalized_);
+	FST_CHECK(!m_hierarchy_finalized_);
 
-	StreamVectorWriteHelper h(hierarchy_buffer_);
+	StreamVectorWriteHelper h(m_hierarchy_buffer_);
 
 	if (attrtype > Hierarchy::AttrType::MAX) {
 		attrtype = Hierarchy::AttrType::MISC;
@@ -835,48 +843,42 @@ void Writer::setAttrBegin(
 		.writeLEB128(arg);
 }
 
-namespace {
-
-// overload for string += string_view_
-// Remove this once C++17 is required
-}  // namespace
-
 EnumHandle Writer::createEnumTable(
 	const string_view_pair name,
 	uint32_t min_valbits,
-	const vector<pair<string_view_pair, string_view_pair>> &literal_val_arr
+	const std::vector<std::pair<string_view_pair, string_view_pair>> &literal_val_arr
 ) {
-	EnumHandle handle = 0;
+	EnumHandle handle{0};
 
-	if (name.second == 0 or literal_val_arr.empty()) {
+	if (name.m_size == 0 || literal_val_arr.empty()) {
 		return handle;
 	}
 
-	string attr_str;
+	std::string attr_str;
 	attr_str.reserve(256);
-	attr_str.append(name.first, name.second);
+	attr_str.append(name.m_data, name.m_size);
 	attr_str += ' ';
-	attr_str += to_string(literal_val_arr.size());
+	attr_str += std::to_string(literal_val_arr.size());
 	attr_str += ' ';
 
 	for (const auto &p : literal_val_arr) {
-		const auto &literal = p.first;
+		const string_view_pair literal{p.first};
 		// literal
 		appendEscToString(literal, attr_str);
 		attr_str += ' ';
 	}
 	for (const auto &p : literal_val_arr) {
-		const auto &val = p.second;
+		const string_view_pair val{p.second};
 		// val (with padding)
-		if (min_valbits > 0 and val.second < min_valbits) {
-			attr_str.insert(attr_str.end(), min_valbits - val.second, '0');
+		if (min_valbits > 0 && val.m_size < min_valbits) {
+			attr_str.insert(attr_str.end(), min_valbits - val.m_size, '0');
 		}
 		appendEscToString(val, attr_str);
 		attr_str += ' ';
 	}
 	attr_str.pop_back();  // remove last space
 
-	handle = ++enum_count_;
+	handle = ++m_enum_count_;
 	setAttrBegin(
 		Hierarchy::AttrType::MISC,
 		Hierarchy::AttrSubType::MISC_ENUMTABLE,
diff --git a/include/fstcpp/fstcpp_writer.h b/include/fstcpp/fstcpp_writer.h
index 8ec161e67..e50a74214 100644
--- a/include/fstcpp/fstcpp_writer.h
+++ b/include/fstcpp/fstcpp_writer.h
@@ -12,7 +12,6 @@
 #include <cstdint>
 #include <ctime>
 #include <fstream>
-#include <string>
 #include <vector>
 #if __cplusplus >= 201703L
 #	include <string_view>
@@ -28,19 +27,20 @@ class Writer;
 
 namespace detail {
 
-// We define WriterWaveData here for better code inlining, no forward declaration
+// We define BlackoutData here for better code inlining, no forward declaration
+// Blackout is not implemented yet
 struct BlackoutData {
-	std::vector<uint8_t> buffer;
-	uint64_t previous_timestamp = 0;
-	uint64_t count = 0;
+	std::vector<uint8_t> m_buffer{};
+	uint64_t m_previous_timestamp{0};
+	uint64_t m_count{0};
 
 	void emitDumpActive(uint64_t current_timestamp, bool enable);
 };
 
 // We define ValueChangeData here for better code inlining, no forward declaration
 struct ValueChangeData {
-	std::vector<VariableInfo> variable_infos;
-	std::vector<uint64_t> timestamps;
+	std::vector<VariableInfo> m_variable_infos{};
+	std::vector<uint64_t> m_timestamps{};
 
 	ValueChangeData();
 	~ValueChangeData();
@@ -66,10 +66,31 @@ struct ValueChangeData {
 class Writer {
 	friend class WriterTest;
 
+private:
+	// File/memory buffers
+	// 1. For hierarchy and geometry, we do not keep the data structure, instead we just
+	//    serialize them into buffers, and compress+write them at the end of file.
+	// 2. For header, we keep the data structure in memory since it is quite small
+	// 3. For wave data, we keep a complicated data structure in memory,
+	//    and flush them to file when necessary
+	// 4. For blackout data, it is not implemented yet
+	std::ofstream m_main_fst_file_{};
+	std::vector<uint8_t> m_hierarchy_buffer_{};
+	std::vector<uint8_t> m_geometry_buffer_{};
+	Header m_header_{};
+	detail::BlackoutData m_blackout_data_{};  // Not implemented yet
+	detail::ValueChangeData m_value_change_data_{};
+	bool m_hierarchy_finalized_{false};
+	WriterPackType m_pack_type_{WriterPackType::LZ4};
+	uint64_t m_value_change_data_usage_{0};  // Note: this value is just an estimation
+	uint64_t m_value_change_data_flush_threshold_{128 << 20};  // 128MB
+	uint32_t m_enum_count_{0};
+	bool m_flush_pending_{false};
+
 public:
 	Writer() {}
 	Writer(const string_view_pair name) {
-		if (name.second != 0) open(name);
+		if (name.m_size != 0) open(name);
 	}
 	~Writer() { close(); }
 
@@ -85,22 +106,28 @@ public:
 	//////////////////////////////
 	// Header manipulation API
 	//////////////////////////////
-	const Header &getHeader() const;
-	void setTimecale(int8_t timescale) { header_.timescale = timescale; }
-	void setWriter(const string_view_pair Writer) {
-		const auto len = std::min(Writer.second, sizeof(header_.writer));
-		std::copy_n(Writer.first, len, header_.writer);
-		if (len != sizeof(header_.writer)) {
-			header_.writer[len] = '\0';
+	const Header &getHeader() const { return m_header_; }
+	void setTimecale(int8_t timescale) { m_header_.m_timescale = timescale; }
+	void setWriter(const string_view_pair writer) {
+		const size_t len = std::min(writer.m_size, sizeof(m_header_.m_writer));
+		std::copy_n(writer.m_data, len, m_header_.m_writer);
+		if (len != sizeof(m_header_.m_writer)) {
+			m_header_.m_writer[len] = '\0';
 		}
 	}
 	void setDate(const string_view_pair date_str) {
-		const auto len = date_str.second;
-		FST_CHECK_EQ(len, sizeof(header_.date) - 1);
-		std::copy_n(date_str.first, len, header_.date);
-		header_.date[len] = '\0';
+		const size_t len = date_str.m_size;
+		FST_CHECK_EQ(len, sizeof(m_header_.m_date) - 1);
+		std::copy_n(date_str.m_data, len, m_header_.m_date);
+		m_header_.m_date[len] = '\0';
 	}
-	void setTimezero(int64_t timezero) { header_.timezero = timezero; }
+	void setDate(const std::tm *d) { setDate(make_string_view_pair(std::asctime(d))); }
+	void setDate() {
+		// set date to now
+		std::time_t t{std::time(nullptr)};
+		setDate(std::localtime(&t));
+	}
+	void setTimezero(int64_t timezero) { m_header_.m_timezero = timezero; }
 
 	//////////////////////////////
 	// Change scope API
@@ -121,8 +148,8 @@ public:
 		const string_view_pair attrname,
 		uint64_t arg
 	);
-	inline void setAttrEnd() {
-		hierarchy_buffer_.push_back(
+	void setAttrEnd() {
+		m_hierarchy_buffer_.push_back(
 			static_cast<uint8_t>(Hierarchy::ScopeControlType::GEN_ATTR_END)
 		);
 	}
@@ -131,7 +158,20 @@ public:
 		uint32_t min_valbits,
 		const std::vector<std::pair<string_view_pair, string_view_pair>> &literal_val_arr
 	);
-	inline void emitEnumTableRef(EnumHandle handle) {
+	template <typename T1, typename T2>
+	EnumHandle createEnumTable(
+		const char *name,
+		uint32_t min_valbits,
+		const std::vector<std::pair<T1, T2>> &literal_val_arr
+	) {
+		std::vector<std::pair<string_view_pair, string_view_pair>> arr{};
+		arr.reserve(literal_val_arr.size());
+		for (const auto &p : literal_val_arr) {
+			arr.emplace_back(make_string_view_pair(p.first), make_string_view_pair(p.second));
+		}
+		return createEnumTable(make_string_view_pair(name), min_valbits, arr);
+	}
+	void emitEnumTableRef(EnumHandle handle) {
 		setAttrBegin(
 			Hierarchy::AttrType::MISC,
 			Hierarchy::AttrSubType::MISC_ENUMTABLE,
@@ -139,9 +179,9 @@ public:
 			handle
 		);
 	}
-	inline void setWriterPackType(WriterPackType pack_type) {
-		FST_CHECK(pack_type != WriterPackType::ZLIB and pack_type != WriterPackType::FASTLZ);
-		pack_type_ = pack_type;
+	void setWriterPackType(WriterPackType pack_type) {
+		FST_CHECK(pack_type != WriterPackType::ZLIB && pack_type != WriterPackType::FASTLZ);
+		m_pack_type_ = pack_type;
 	}
 
 	//////////////////////////////
@@ -154,22 +194,24 @@ public:
 		const string_view_pair name,
 		uint32_t alias_handle
 	);
-	Handle createVar2(
-		Hierarchy::VarType vartype,
-		Hierarchy::VarDirection vardir,
-		uint32_t bitwidth,
-		const string_view_pair name,
-		uint32_t alias_handle,
-		const string_view_pair type,
-		Hierarchy::SupplementalVarType svt,
-		Hierarchy::SupplementalDataType sdt
-	);
+	// TODO
+	// Handle createVar2(
+	// 	Hierarchy::VarType vartype,
+	// 	Hierarchy::VarDirection vardir,
+	// 	uint32_t bitwidth,
+	// 	const string_view_pair name,
+	// 	uint32_t alias_handle,
+	// 	const string_view_pair type,
+	// 	Hierarchy::SupplementalVarType svt,
+	// 	Hierarchy::SupplementalDataType sdt
+	// );
 
 	//////////////////////////////
 	// Waveform API
 	//////////////////////////////
 	void emitTimeChange(uint64_t tim);
-	void emitDumpActive(bool enable);
+	// TODO
+	// void emitDumpActive(bool enable);
 	void emitValueChange(
 		Handle handle, const uint32_t *val, EncodingType encoding = EncodingType::BINARY
 	);
@@ -186,257 +228,36 @@ public:
 	// We only ensure that this function works where Verilator use it.
 	void emitValueChange(Handle handle, const char *val);
 
-	//////////////////////////////
-	// Alias version
-	//////////////////////////////
-	// Constructor
-	Writer(const char *name) : Writer(make_string_view_pair(name)) {}
-	Writer(const std::string &name) : Writer(make_string_view_pair(name.c_str(), name.size())) {}
-	// Open
-	inline void open(const char *name) { open(make_string_view_pair(name)); }
-	inline void open(const std::string &name) {
-		open(make_string_view_pair(name.c_str(), name.size()));
-	}
-	// setWriter
-	inline void setWriter(const char *Writer) {
-		if (Writer) setWriter(make_string_view_pair(Writer));
-	}
-	inline void setWriter(const std::string &Writer) {
-		setWriter(make_string_view_pair(Writer.c_str(), Writer.size()));
-	}
-	// setDate
-	inline void setDate(const char *date_str) {
-		if (date_str) setDate(make_string_view_pair(date_str));
-	}
-	inline void setDate(const std::string &date_str) {
-		setDate(make_string_view_pair(date_str.c_str(), date_str.size()));
-	}
-	inline void setDate(const std::tm *d) { setDate(make_string_view_pair(std::asctime(d))); }
-	inline void setDate() {
-		// set date to now
-		std::time_t t = std::time(nullptr);
-		setDate(std::localtime(&t));
-	}
-	// CreateVar(2)
-	inline Handle createVar(
-		Hierarchy::VarType vartype,
-		Hierarchy::VarDirection vardir,
-		uint32_t bitwidth,
-		const char *name,
-		uint32_t alias_handle
-	) {
-		FST_CHECK_NE(name, static_cast<void *>(nullptr));
-		return createVar(vartype, vardir, bitwidth, make_string_view_pair(name), alias_handle);
-	}
-	inline Handle createVar(
-		Hierarchy::VarType vartype,
-		Hierarchy::VarDirection vardir,
-		uint32_t bitwidth,
-		const std::string &name,
-		uint32_t alias_handle
-	) {
-		return createVar(
-			vartype,
-			vardir,
-			bitwidth,
-			make_string_view_pair(name.c_str(), name.size()),
-			alias_handle
-		);
-	}
-	// setScope
-	inline void setScope(
-		Hierarchy::ScopeType scopetype, const std::string &scopename, const std::string &scopecomp
-	) {
-		setScope(
-			scopetype,
-			make_string_view_pair(scopename.c_str(), scopename.size()),
-			make_string_view_pair(scopecomp.c_str(), scopecomp.size())
-		);
-	}
-	inline void setScope(
-		Hierarchy::ScopeType scopetype, const char *scopename, const char *scopecomp
-	) {
-		setScope(scopetype, make_string_view_pair(scopename), make_string_view_pair(scopecomp));
-	}
-	// setAttrBegin
-	inline void setAttrBegin(
-		Hierarchy::AttrType attrtype,
-		Hierarchy::AttrSubType subtype,
-		const char *attrname,
-		uint64_t arg
-	) {
-		setAttrBegin(attrtype, subtype, make_string_view_pair(attrname), arg);
-	}
-	// CreateEnumTable
-	EnumHandle createEnumTable(
-		const char *name,
-		uint32_t min_valbits,
-		const std::vector<std::pair<const char *, const char *>> &literal_val_arr
-	) {
-		std::vector<std::pair<string_view_pair, string_view_pair>> arr;
-		arr.reserve(literal_val_arr.size());
-		for (const auto &p : literal_val_arr) {
-			arr.emplace_back(make_string_view_pair(p.first), make_string_view_pair(p.second));
-		}
-		return createEnumTable(make_string_view_pair(name), min_valbits, arr);
-	}
-	// CreateVar2
-	inline Handle createVar2(
-		Hierarchy::VarType vartype,
-		Hierarchy::VarDirection vardir,
-		uint32_t bitwidth,
-		const char *name,
-		uint32_t alias_handle,
-		const char *type,
-		Hierarchy::SupplementalVarType svt,
-		Hierarchy::SupplementalDataType sdt
-	) {
-		return createVar2(
-			vartype,
-			vardir,
-			bitwidth,
-			make_string_view_pair(name),
-			alias_handle,
-			make_string_view_pair(type),
-			svt,
-			sdt
-		);
-	}
 	// Flush value change data
-	inline void flushValueChangeData() { flush_pending_ = true; }
+	void flushValueChangeData() { m_flush_pending_ = true; }
 
-#if __cplusplus >= 201703L
-	// All APIs with string_view_pair --> define a
-	// string_view version and forward to the string_view_pair version
-	inline Writer(std::string_view name)
-		: Writer(make_string_view_pair(name.data(), name.size())) {}
-	inline void open(std::string_view name) {
-		open(make_string_view_pair(name.data(), name.size()));
-	}
-	inline void setWriter(std::string_view Writer) {
-		setWriter(make_string_view_pair(Writer.data(), Writer.size()));
-	}
-	inline void setDate(std::string_view date_str) {
-		setDate(make_string_view_pair(date_str.data(), date_str.size()));
-	}
-
-	inline void setScope(
-		Hierarchy::ScopeType scopetype, std::string_view scopename, std::string_view scopecomp
-	) {
-		setScope(
-			scopetype,
-			make_string_view_pair(scopename.data(), scopename.size()),
-			make_string_view_pair(scopecomp.data(), scopecomp.size())
-		);
-	}
-
-	inline void setAttrBegin(
-		Hierarchy::AttrType attrtype,
-		Hierarchy::AttrSubType subtype,
-		std::string_view attrname,
-		uint64_t arg
-	) {
-		setAttrBegin(
-			attrtype, subtype, make_string_view_pair(attrname.data(), attrname.size()), arg
-		);
-	}
-
-	EnumHandle createEnumTable(
-		std::string_view name,
-		uint32_t min_valbits,
-		const std::vector<std::pair<std::string_view, std::string_view>> &literal_val_arr
-	) {
-		std::vector<std::pair<string_view_pair, string_view_pair>> arr;
-		arr.reserve(literal_val_arr.size());
-		for (const auto &p : literal_val_arr) {
-			arr.emplace_back(
-				make_string_view_pair(p.first.data(), p.first.size()),
-				make_string_view_pair(p.second.data(), p.second.size())
-			);
-		}
-		return createEnumTable(make_string_view_pair(name.data(), name.size()), min_valbits, arr);
-	}
-
-	inline Handle createVar(
-		Hierarchy::VarType vartype,
-		Hierarchy::VarDirection vardir,
-		uint32_t bitwidth,
-		std::string_view name,
-		uint32_t alias_handle
-	) {
-		return createVar(
-			vartype, vardir, bitwidth, make_string_view_pair(name.data(), name.size()), alias_handle
-		);
-	}
-
-	inline Handle createVar2(
-		Hierarchy::VarType vartype,
-		Hierarchy::VarDirection vardir,
-		uint32_t bitwidth,
-		std::string_view name,
-		uint32_t alias_handle,
-		std::string_view type,
-		Hierarchy::SupplementalVarType svt,
-		Hierarchy::SupplementalDataType sdt
-	) {
-		return createVar2(
-			vartype,
-			vardir,
-			bitwidth,
-			make_string_view_pair(name.data(), name.size()),
-			alias_handle,
-			make_string_view_pair(type.data(), type.size()),
-			svt,
-			sdt
-		);
-	}
-#endif
 private:
-	// File/memory buffers
-	// 1. For hierarchy and geometry, we do not keep the data structure, instead we just
-	//    serialize them into buffers, and compress+write them at the end of file.
-	// 2. For header, we keep the data structure in memory since it is quite small
-	// 3. For wave data, we keep a complicated data structure in memory,
-	//    and flush them to file when necessary
-	std::ofstream main_fst_file_;
-	std::vector<uint8_t> hierarchy_buffer_;
-	std::vector<uint8_t> geometry_buffer_;
-	Header header_{};
-	detail::BlackoutData blackout_data_;
-	detail::ValueChangeData value_change_data_;
-	bool hierarchy_finalized_ = false;
-	WriterPackType pack_type_ = WriterPackType::LZ4;
-	uint64_t value_change_data_usage_ = 0;  // Note: this value is just an estimation
-	uint64_t value_change_data_flush_threshold_ = 128 << 20;  // 128MB
-	uint32_t enum_count_ = 0;
-	bool flush_pending_ = false;
-
 	// internal helpers
 	static void writeHeader_(const Header &header, std::ostream &os);
 	void appendGeometry_(std::ostream &os);
 	void appendHierarchy_(std::ostream &os);
-	void appendBlackout_(std::ostream &os);
+	void appendBlackout_(std::ostream &os);  // Not implemented yet
 	// This function is used to flush value change data to file, and keep only the latest value in
 	// memory Just want to separate the const part from the non-const part for code clarity
 	static void flushValueChangeDataConstPart_(
 		const detail::ValueChangeData &vcd, std::ostream &os, WriterPackType pack_type
 	);
-	inline void flushValueChangeData_(detail::ValueChangeData &vcd, std::ostream &os) {
-		if (vcd.timestamps.empty()) {
+	void flushValueChangeData_(detail::ValueChangeData &vcd, std::ostream &os) {
+		if (vcd.m_timestamps.empty()) {
 			return;
 		}
-		flushValueChangeDataConstPart_(vcd, os, pack_type_);
+		flushValueChangeDataConstPart_(vcd, os, m_pack_type_);
 		vcd.keepOnlyTheLatestValue();
-		++header_.num_value_change_data_blocks;
-		value_change_data_usage_ = 0;
-		flush_pending_ = false;
+		++m_header_.m_num_value_change_data_blocks;
+		m_value_change_data_usage_ = 0;
+		m_flush_pending_ = false;
 	}
 	void finalizeHierarchy_() {
-		if (hierarchy_finalized_) return;
-		hierarchy_finalized_ = true;
+		if (m_hierarchy_finalized_) return;
+		m_hierarchy_finalized_ = true;
 		// Original FST code comments: as a default, use 128MB and increment when
 		// every 1M signals are defined.
-		value_change_data_flush_threshold_ = (((header_.num_handles - 1) >> 20) + 1) << 27;
+		m_value_change_data_flush_threshold_ = (((m_header_.m_num_handles - 1) >> 20) + 1) << 27;
 	}
 	template <typename... T>
 	void emitValueChangeHelper_(Handle handle, T &&...val);
diff --git a/include/verilated_fst_c.cpp b/include/verilated_fst_c.cpp
index 2ac855952..8d72b2b08 100644
--- a/include/verilated_fst_c.cpp
+++ b/include/verilated_fst_c.cpp
@@ -27,6 +27,7 @@
 #include "verilated_fst_c.h"
 
 // Include fstcpp library
+#include "fstcpp/fstcpp.h"
 #include "fstcpp/fstcpp_writer.h"
 
 #include <algorithm>
@@ -178,24 +179,24 @@ void VerilatedFst::pushPrefix(const char* namep, VerilatedTracePrefixType type,
         m_fst->setScope(fst::Hierarchy::ScopeType::VCD_INTERFACE, name, std::string{});
         break;
     case VerilatedTracePrefixType::STRUCT_PACKED:
-        m_fst->setAttrBegin(fst::AttributeType::PACK, fst::PackType::PACKED, "members", l);
+        m_fst->setAttrBegin(fst::Hierarchy::AttrType::PACK, fst::Hierarchy::AttrSubType::PACK_PACKED, "members", l);
         m_fst->setScope(fst::Hierarchy::ScopeType::VCD_STRUCT, name, std::string{});
         break;
     case VerilatedTracePrefixType::STRUCT_UNPACKED:
-        m_fst->setAttrBegin(fst::AttributeType::PACK, fst::PackType::UNPACKED, "members", l);
+        m_fst->setAttrBegin(fst::Hierarchy::AttrType::PACK, fst::Hierarchy::AttrSubType::PACK_UNPACKED, "members", l);
         m_fst->setScope(fst::Hierarchy::ScopeType::VCD_STRUCT, name, std::string{});
         break;
     case VerilatedTracePrefixType::UNION_PACKED:
-        m_fst->setAttrBegin(fst::AttributeType::PACK, fst::PackType::PACKED, "members", l);
+        m_fst->setAttrBegin(fst::Hierarchy::AttrType::PACK, fst::Hierarchy::AttrSubType::PACK_PACKED, "members", l);
         m_fst->setScope(fst::Hierarchy::ScopeType::VCD_UNION, name, std::string{});
         break;
     case VerilatedTracePrefixType::ARRAY_PACKED:
-        m_fst->setAttrBegin(fst::AttributeType::ARRAY, fst::PackType::PACKED, "bounds", lr);
-        m_fst->setScope(fst::Hierarchy::ScopeType::VCD_ARRAY, name, std::string{});
+        m_fst->setAttrBegin(fst::Hierarchy::AttrType::ARRAY, fst::Hierarchy::AttrSubType::ARRAY_PACKED, "bounds", lr);
+        m_fst->setScope(fst::Hierarchy::ScopeType::SV_ARRAY, name, std::string{});
         break;
     case VerilatedTracePrefixType::ARRAY_UNPACKED:
-        m_fst->setAttrBegin(fst::AttributeType::ARRAY, fst::PackType::UNPACKED, "bounds", lr);
-        m_fst->setScope(fst::Hierarchy::ScopeType::VCD_ARRAY, name, std::string{});
+        m_fst->setAttrBegin(fst::Hierarchy::AttrType::ARRAY, fst::Hierarchy::AttrSubType::ARRAY_UNPACKED, "bounds", lr);
+        m_fst->setScope(fst::Hierarchy::ScopeType::SV_ARRAY, name, std::string{});
         break;
     default: break;
     }

From 5bf639e67331daf8c3dc1f28e5df81910a971976 Mon Sep 17 00:00:00 2001
From: Yu-Sheng Lin <johnjohnlys@gmail.com>
Date: Mon, 16 Mar 2026 20:52:28 +0800
Subject: [PATCH 3/8] For passing Verilator regression

- Eliminate strict aliasing
- Include .cpp in verilated_fst_c.cpp directly as WS suggested
---
 include/fstcpp/fstcpp_variable_info.h |  3 ++-
 include/fstcpp/fstcpp_writer.cpp      |  2 +-
 include/verilated.mk.in               |  1 -
 include/verilated_fst_c.cpp           | 10 ++++++----
 test_regress/t/t_dist_whitespace.py   |  2 +-
 5 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/include/fstcpp/fstcpp_variable_info.h b/include/fstcpp/fstcpp_variable_info.h
index b91ab5103..f707aaaa8 100644
--- a/include/fstcpp/fstcpp_variable_info.h
+++ b/include/fstcpp/fstcpp_variable_info.h
@@ -310,7 +310,8 @@ public:
 		info.resize(needed);
 		EmitWriterHelper wh(info.data_ptr());
 		const double nan_val = std::numeric_limits<double>::quiet_NaN();
-		const uint64_t nan_val_u64 = *reinterpret_cast<const uint64_t *>(&nan_val);
+		uint64_t nan_val_u64;
+		std::memcpy(&nan_val_u64, &nan_val, sizeof(nan_val_u64));
 		wh.writeTimeIndexAndEncoding(0, EncodingType::BINARY).write<uint64_t>(nan_val_u64);
 	}
 
diff --git a/include/fstcpp/fstcpp_writer.cpp b/include/fstcpp/fstcpp_writer.cpp
index b965e80b7..7f7b46458 100644
--- a/include/fstcpp/fstcpp_writer.cpp
+++ b/include/fstcpp/fstcpp_writer.cpp
@@ -271,7 +271,7 @@ void Writer::emitValueChange(Handle handle, const char *val) {
 	FST_DCHECK_NE(bitwidth, 0);
 
 	val += bitwidth;
-	thread_local static std::vector<uint64_t> t_packed_value_buffer;
+	static std::vector<uint64_t> t_packed_value_buffer;
 	const unsigned num_words{(bitwidth + 63) / 64};
 	t_packed_value_buffer.assign(num_words, 0);
 	for (unsigned i = 0; i < num_words; ++i) {
diff --git a/include/verilated.mk.in b/include/verilated.mk.in
index b936d8c45..9afb5757b 100644
--- a/include/verilated.mk.in
+++ b/include/verilated.mk.in
@@ -208,7 +208,6 @@ VK_USER_OBJS = $(addsuffix .o, $(VM_USER_CLASSES))
 
 ifneq ($(VM_TRACE_FST),0)
 ifneq ($(VM_TRACE_FST),)
-    VM_GLOBAL_FAST += fstcpp_writer fstcpp_variable_info
     LDLIBS = -llz4 -lz
 endif
 endif
diff --git a/include/verilated_fst_c.cpp b/include/verilated_fst_c.cpp
index 8d72b2b08..3abfb40b6 100644
--- a/include/verilated_fst_c.cpp
+++ b/include/verilated_fst_c.cpp
@@ -26,9 +26,9 @@
 #include "verilated.h"
 #include "verilated_fst_c.h"
 
-// Include fstcpp library
-#include "fstcpp/fstcpp.h"
-#include "fstcpp/fstcpp_writer.h"
+// Include fstcpp cpp file directly
+#include "fstcpp/fstcpp_variable_info.cpp"
+#include "fstcpp/fstcpp_writer.cpp"
 
 #include <algorithm>
 #include <iterator>
@@ -453,5 +453,7 @@ void VerilatedFstBuffer::emitWData(uint32_t code, const WData* newvalp, int bits
 VL_ATTR_ALWINLINE
 void VerilatedFstBuffer::emitDouble(uint32_t code, double newval) {
     m_owner.emitTimeChangeMaybe();
-    m_fst->emitValueChange(m_symbolp[code], *reinterpret_cast<const uint64_t*>(&newval));
+    uint64_t newval_u64;
+    std::memcpy(&newval_u64, &newval, sizeof(newval_u64));
+    m_fst->emitValueChange(m_symbolp[code], newval_u64);
 }
diff --git a/test_regress/t/t_dist_whitespace.py b/test_regress/t/t_dist_whitespace.py
index 2b4fa35f3..c6678f5b6 100755
--- a/test_regress/t/t_dist_whitespace.py
+++ b/test_regress/t/t_dist_whitespace.py
@@ -11,7 +11,7 @@ import vltest_bootstrap
 
 test.scenarios('dist')
 
-Tabs_Exempt_Re = r'(\.out$)|(/gtkwave)|(Makefile)|(\.mk$)|(\.mk\.in$)|test_regress/t/t_preproc\.v|install-sh'
+Tabs_Exempt_Re = r'(\.out$)|(/fstcpp)|(Makefile)|(\.mk$)|(\.mk\.in$)|test_regress/t/t_preproc\.v|install-sh'
 
 Unicode_Exempt_Re = r'(Changes$|CONTRIBUTORS$|LICENSES?|contributors.rst$|spelling.txt$)'
 

From a3b74d2b21f357a426c07fe24fbb4d8960789e05 Mon Sep 17 00:00:00 2001
From: Yu-Sheng Lin <johnjohnlys@gmail.com>
Date: Tue, 17 Mar 2026 22:51:50 +0800
Subject: [PATCH 4/8] Fix linkage errors

---
 include/verilated.mk.in   | 2 +-
 verilator-config.cmake.in | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/verilated.mk.in b/include/verilated.mk.in
index 9afb5757b..e517e9161 100644
--- a/include/verilated.mk.in
+++ b/include/verilated.mk.in
@@ -208,7 +208,7 @@ VK_USER_OBJS = $(addsuffix .o, $(VM_USER_CLASSES))
 
 ifneq ($(VM_TRACE_FST),0)
 ifneq ($(VM_TRACE_FST),)
-    LDLIBS = -llz4 -lz
+    LDLIBS += -llz4 -lz
 endif
 endif
 
diff --git a/verilator-config.cmake.in b/verilator-config.cmake.in
index c62c8f129..76d1b6a5a 100644
--- a/verilator-config.cmake.in
+++ b/verilator-config.cmake.in
@@ -743,6 +743,10 @@ function(verilate TARGET)
 
     target_link_libraries(${TARGET} PUBLIC ${VERILATOR_MT_CFLAGS})
 
+    if(${VERILATE_PREFIX}_TRACE_FST)
+        target_link_libraries(${TARGET} PUBLIC -llz4 -lz)
+    endif()
+
     target_compile_features(${TARGET} PRIVATE cxx_std_11)
 
     if(${VERILATE_PREFIX}_TIMING)

From 76f241c226e9610da005499f09aca2b3fdbae517 Mon Sep 17 00:00:00 2001
From: Yu-Sheng Lin <johnjohnlys@gmail.com>
Date: Thu, 19 Mar 2026 01:57:16 +0800
Subject: [PATCH 5/8] Fix calling wrong API in verilated_fst_c.cpp

---
 include/verilated_fst_c.cpp | 46 ++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 24 deletions(-)

diff --git a/include/verilated_fst_c.cpp b/include/verilated_fst_c.cpp
index 3abfb40b6..f3ac9d3e1 100644
--- a/include/verilated_fst_c.cpp
+++ b/include/verilated_fst_c.cpp
@@ -31,6 +31,7 @@
 #include "fstcpp/fstcpp_writer.cpp"
 
 #include <algorithm>
+#include <cstdint>
 #include <iterator>
 #include <sstream>
 #include <type_traits>
@@ -387,67 +388,64 @@ VL_ATTR_ALWINLINE
 void VerilatedFstBuffer::emitEvent(uint32_t code) {
     VL_DEBUG_IFDEF(assert(m_symbolp[code]););
     m_owner.emitTimeChangeMaybe();
-    m_fst->emitValueChange(m_symbolp[code], "1");
+    m_fst->emitValueChange(m_symbolp[code], 1);
 }
 
 VL_ATTR_ALWINLINE
 void VerilatedFstBuffer::emitBit(uint32_t code, CData newval) {
     VL_DEBUG_IFDEF(assert(m_symbolp[code]););
     m_owner.emitTimeChangeMaybe();
-    m_fst->emitValueChange(m_symbolp[code], newval ? "1" : "0");
+    m_fst->emitValueChange(m_symbolp[code], newval ? 1 : 0);
 }
 
 VL_ATTR_ALWINLINE
 void VerilatedFstBuffer::emitCData(uint32_t code, CData newval, int bits) {
-    char buf[VL_BYTESIZE];
     VL_DEBUG_IFDEF(assert(m_symbolp[code]););
-    cvtCDataToStr(buf, newval << (VL_BYTESIZE - bits));
     m_owner.emitTimeChangeMaybe();
-    m_fst->emitValueChange(m_symbolp[code], buf);
+    m_fst->emitValueChange(m_symbolp[code], newval);
 }
 
 VL_ATTR_ALWINLINE
 void VerilatedFstBuffer::emitSData(uint32_t code, SData newval, int bits) {
-    char buf[VL_SHORTSIZE];
     VL_DEBUG_IFDEF(assert(m_symbolp[code]););
-    cvtSDataToStr(buf, newval << (VL_SHORTSIZE - bits));
     m_owner.emitTimeChangeMaybe();
-    m_fst->emitValueChange(m_symbolp[code], buf);
+    m_fst->emitValueChange(m_symbolp[code], newval);
 }
 
 VL_ATTR_ALWINLINE
 void VerilatedFstBuffer::emitIData(uint32_t code, IData newval, int bits) {
-    char buf[VL_IDATASIZE];
     VL_DEBUG_IFDEF(assert(m_symbolp[code]););
-    cvtIDataToStr(buf, newval << (VL_IDATASIZE - bits));
     m_owner.emitTimeChangeMaybe();
-    m_fst->emitValueChange(m_symbolp[code], buf);
+    m_fst->emitValueChange(m_symbolp[code], newval);
 }
 
 VL_ATTR_ALWINLINE
 void VerilatedFstBuffer::emitQData(uint32_t code, QData newval, int bits) {
-    char buf[VL_QUADSIZE];
     VL_DEBUG_IFDEF(assert(m_symbolp[code]););
-    cvtQDataToStr(buf, newval << (VL_QUADSIZE - bits));
     m_owner.emitTimeChangeMaybe();
-    m_fst->emitValueChange(m_symbolp[code], buf);
+    m_fst->emitValueChange(m_symbolp[code], newval);
 }
 
 VL_ATTR_ALWINLINE
 void VerilatedFstBuffer::emitWData(uint32_t code, const WData* newvalp, int bits) {
+    // While emitValueChange has a uint32_t* version
+    // It does the same conversion, allocating a pointer and copying the data
+    // So I decide to use the verilator buffer directly.
+    // The buffer were designed to hold maxBits() char,
+    // so it is very safe to use it as uint64_t*.
     int words = VL_WORDS_I(bits);
-    char* wp = m_strbufp;
-    // Convert the most significant word
-    const int bitsInMSW = VL_BITBIT_E(bits) ? VL_BITBIT_E(bits) : VL_EDATASIZE;
-    cvtEDataToStr(wp, newvalp[--words] << (VL_EDATASIZE - bitsInMSW));
-    wp += bitsInMSW;
-    // Convert the remaining words
-    while (words > 0) {
-        cvtEDataToStr(wp, newvalp[--words]);
-        wp += VL_EDATASIZE;
+    uint64_t* wp = reinterpret_cast<uint64_t*>(m_strbufp);
+    // cast newvalp (uint32_t[words]) to wp (uint64_t[ceil(words/2)])
+    for (int i = 0; i < words/2; ++i) {
+        wp[i] = newvalp[i*2+1];
+        wp[i] <<= 32;
+        wp[i] |= newvalp[i*2];
+    }
+    if (words % 2 == 1) {
+        wp[words/2] = newvalp[words-1];
     }
     m_owner.emitTimeChangeMaybe();
-    m_fst->emitValueChange(m_symbolp[code], m_strbufp);
+    m_fst->emitValueChange(m_symbolp[code], wp);
 }
 
 VL_ATTR_ALWINLINE

From 04e4f7b63d483bdc0d8fe4b9cd236a8c5766e4d6 Mon Sep 17 00:00:00 2001
From: Yu-Sheng Lin <johnjohnlys@gmail.com>
Date: Mon, 23 Mar 2026 02:02:24 +0800
Subject: [PATCH 6/8] Update according review comments

---
 docs/guide/install.rst           |  4 ++--
 include/fstcpp/fstcpp_writer.cpp | 25 ++++++++++++------------
 include/fstcpp/fstcpp_writer.h   |  3 +++
 include/verilated.mk.in          |  1 -
 include/verilated_fst_c.cpp      | 33 ++++++++++----------------------
 include/verilated_fst_c.h        | 10 +++++-----
 6 files changed, 32 insertions(+), 44 deletions(-)

diff --git a/docs/guide/install.rst b/docs/guide/install.rst
index 8ebb45110..f25ac2dec 100644
--- a/docs/guide/install.rst
+++ b/docs/guide/install.rst
@@ -64,7 +64,7 @@ In brief, to install from git:
    #sudo apt-get install libgoogle-perftools-dev libjemalloc-dev numactl perl-doc
    #sudo apt-get install libfl2  # Ubuntu only (ignore if gives error)
    #sudo apt-get install libfl-dev  # Ubuntu only (ignore if gives error)
-   #sudo apt-get install zlibc zlib1g zlib1g-dev  # Ubuntu only (ignore if gives error)
+   #sudo apt-get install zlibc zlib1g zlib1g-dev liblz4 liblz4-dev  # Ubuntu only (ignore if gives error)
 
    git clone https://github.com/verilator/verilator   # Only first time
 
@@ -116,7 +116,7 @@ To build or run Verilator, you need these standard packages:
    sudo apt-get install libgz  # Non-Ubuntu (ignore if gives error)
    sudo apt-get install libfl2  # Ubuntu only (ignore if gives error)
    sudo apt-get install libfl-dev  # Ubuntu only (ignore if gives error)
-   sudo apt-get install zlibc zlib1g zlib1g-dev  # Ubuntu only (ignore if gives error)
+   sudo apt-get install zlibc zlib1g zlib1g-dev liblz4 liblz4-dev  # Ubuntu only (ignore if gives error)
 
 For SystemC:
 
diff --git a/include/fstcpp/fstcpp_writer.cpp b/include/fstcpp/fstcpp_writer.cpp
index 7f7b46458..2756e23b9 100644
--- a/include/fstcpp/fstcpp_writer.cpp
+++ b/include/fstcpp/fstcpp_writer.cpp
@@ -23,13 +23,13 @@
 #include "fstcpp/fstcpp_stream_write_helper.h"
 #include "fstcpp/fstcpp_variable_info.h"
 
-// AT(x) is used to access vector at index x, and it will throw exception if out of bound
+// AT(vec, x) is used to access vector at index x, and it will throw exception if out of bound
 // in debug mode, but in release mode, it will not throw exception
-// Usually you should only need AT(x) only at very hot code path.
+// Usually you should only need AT(vec, x) only at very hot code path.
 #ifndef NDEBUG
-#	define AT(x) .at(x)
+#	define AT(vec, x) (vec.at(x))
 #else
-#	define AT(x) [x]
+#	define AT(vec, x) (vec[x])
 #endif
 
 namespace fst {
@@ -228,7 +228,7 @@ void Writer::emitTimeChange(uint64_t tim) {
 template <typename... T>
 void Writer::emitValueChangeHelper_(Handle handle, T &&...val) {
 	// Let data prefetch go first
-	VariableInfo &var_info = m_value_change_data_.m_variable_infos AT(handle - 1);
+	VariableInfo &var_info = AT(m_value_change_data_.m_variable_infos, handle - 1);
 #if defined(__GNUC__) || defined(__clang__)
 	__builtin_prefetch(var_info.data_ptr() + var_info.size() - 1, 1, 0);
 #endif
@@ -256,7 +256,7 @@ void Writer::emitValueChange(Handle handle, uint64_t val) {
 
 void Writer::emitValueChange(Handle handle, const char *val) {
 	finalizeHierarchy_();
-	VariableInfo &var_info = m_value_change_data_.m_variable_infos AT(handle - 1);
+	VariableInfo &var_info = AT(m_value_change_data_.m_variable_infos, handle - 1);
 
 	// For double handles, const char* is interpreted as a double* (8B)
 	// This double shall be written out as raw IEEE 754 double
@@ -271,24 +271,23 @@ void Writer::emitValueChange(Handle handle, const char *val) {
 	FST_DCHECK_NE(bitwidth, 0);
 
 	val += bitwidth;
-	static std::vector<uint64_t> t_packed_value_buffer;
 	const unsigned num_words{(bitwidth + 63) / 64};
-	t_packed_value_buffer.assign(num_words, 0);
+	m_packed_value_buffer_.assign(num_words, 0);
 	for (unsigned i = 0; i < num_words; ++i) {
 		const char *start{val - std::min((i + 1) * 64, bitwidth)};
 		const char *end{val - 64 * i};
-		t_packed_value_buffer[i] = 0;
+		m_packed_value_buffer_[i] = 0;
 		for (const char *p = start; p < end; ++p) {
 			// No checking for invalid characters, follow original C implementation
-			t_packed_value_buffer[i] <<= 1;
-			t_packed_value_buffer[i] |= static_cast<uint64_t>(*p - '0');
+			m_packed_value_buffer_[i] <<= 1;
+			m_packed_value_buffer_[i] |= static_cast<uint64_t>(*p - '0');
 		}
 	}
 
 	if (bitwidth <= 64) {
-		emitValueChange(handle, t_packed_value_buffer.front());
+		emitValueChange(handle, m_packed_value_buffer_.front());
 	} else {
-		emitValueChange(handle, t_packed_value_buffer.data(), EncodingType::BINARY);
+		emitValueChange(handle, m_packed_value_buffer_.data(), EncodingType::BINARY);
 	}
 }
 
diff --git a/include/fstcpp/fstcpp_writer.h b/include/fstcpp/fstcpp_writer.h
index e50a74214..532fdff93 100644
--- a/include/fstcpp/fstcpp_writer.h
+++ b/include/fstcpp/fstcpp_writer.h
@@ -77,6 +77,9 @@ private:
 	std::ofstream m_main_fst_file_{};
 	std::vector<uint8_t> m_hierarchy_buffer_{};
 	std::vector<uint8_t> m_geometry_buffer_{};
+	// Temporary buffer for packing bit strings into words
+	// Only used in emitValueChange(Handle, const char*)
+	std::vector<uint64_t> m_packed_value_buffer_{};
 	Header m_header_{};
 	detail::BlackoutData m_blackout_data_{};  // Not implemented yet
 	detail::ValueChangeData m_value_change_data_{};
diff --git a/include/verilated.mk.in b/include/verilated.mk.in
index e517e9161..3ae635175 100644
--- a/include/verilated.mk.in
+++ b/include/verilated.mk.in
@@ -104,7 +104,6 @@ CPPFLAGS += -I. $(VK_CPPFLAGS_WALL) $(VK_CPPFLAGS_ALWAYS)
 VPATH += ..
 VPATH += $(VERILATOR_ROOT)/include
 VPATH += $(VERILATOR_ROOT)/include/vltstd
-VPATH += $(VERILATOR_ROOT)/include/fstcpp
 
 LDFLAGS += $(CFG_LDFLAGS_VERILATED)
 
diff --git a/include/verilated_fst_c.cpp b/include/verilated_fst_c.cpp
index f3ac9d3e1..61fcd4897 100644
--- a/include/verilated_fst_c.cpp
+++ b/include/verilated_fst_c.cpp
@@ -395,61 +395,48 @@ VL_ATTR_ALWINLINE
 void VerilatedFstBuffer::emitBit(uint32_t code, CData newval) {
     VL_DEBUG_IFDEF(assert(m_symbolp[code]););
     m_owner.emitTimeChangeMaybe();
-    m_fst->emitValueChange(m_symbolp[code], newval ? 1 : 0);
+    m_fst->emitValueChange(m_symbolp[code], uint64_t(newval));
 }
 
 VL_ATTR_ALWINLINE
-void VerilatedFstBuffer::emitCData(uint32_t code, CData newval, int bits) {
+void VerilatedFstBuffer::emitCData(uint32_t code, CData newval, int) {
     VL_DEBUG_IFDEF(assert(m_symbolp[code]););
     m_owner.emitTimeChangeMaybe();
     m_fst->emitValueChange(m_symbolp[code], newval);
 }
 
 VL_ATTR_ALWINLINE
-void VerilatedFstBuffer::emitSData(uint32_t code, SData newval, int bits) {
+void VerilatedFstBuffer::emitSData(uint32_t code, SData newval, int) {
     VL_DEBUG_IFDEF(assert(m_symbolp[code]););
     m_owner.emitTimeChangeMaybe();
     m_fst->emitValueChange(m_symbolp[code], newval);
 }
 
 VL_ATTR_ALWINLINE
-void VerilatedFstBuffer::emitIData(uint32_t code, IData newval, int bits) {
+void VerilatedFstBuffer::emitIData(uint32_t code, IData newval, int) {
     VL_DEBUG_IFDEF(assert(m_symbolp[code]););
     m_owner.emitTimeChangeMaybe();
     m_fst->emitValueChange(m_symbolp[code], newval);
 }
 
 VL_ATTR_ALWINLINE
-void VerilatedFstBuffer::emitQData(uint32_t code, QData newval, int bits) {
+void VerilatedFstBuffer::emitQData(uint32_t code, QData newval, int) {
     VL_DEBUG_IFDEF(assert(m_symbolp[code]););
     m_owner.emitTimeChangeMaybe();
     m_fst->emitValueChange(m_symbolp[code], newval);
 }
 
 VL_ATTR_ALWINLINE
-void VerilatedFstBuffer::emitWData(uint32_t code, const WData* newvalp, int bits) {
-    // While emitValueChange has a uint32_t* version
-    // It does the same conversion, allocating a pointer and copying the data
-    // So I decide to use the verilator buffer directly.
-    // The buffer were designed to hold maxBits() char,
-    // so it is very safe to use it as uint64_t*.
-    int words = VL_WORDS_I(bits);
-    uint64_t* wp = reinterpret_cast<uint64_t*>(m_strbufp);
-    // cast newvalp (uint32_t[words]) to wp (uint64_t[ceil(words/2)])
-    for (int i = 0; i < words/2; ++i) {
-        wp[i] = newvalp[i*2+1];
-        wp[i] <<= 32;
-        wp[i] |= newvalp[i*2];
-    }
-    if (words % 2 == 1) {
-        wp[words/2] = newvalp[words-1];
-    }
+void VerilatedFstBuffer::emitWData(uint32_t code, const WData* newvalp, int) {
+    VL_DEBUG_IFDEF(assert(m_symbolp[code]););
     m_owner.emitTimeChangeMaybe();
-    m_fst->emitValueChange(m_symbolp[code], wp);
+    // call emitValueChange(handle, uint32_t*)
+    m_fst->emitValueChange(m_symbolp[code], newvalp);
 }
 
 VL_ATTR_ALWINLINE
 void VerilatedFstBuffer::emitDouble(uint32_t code, double newval) {
+    VL_DEBUG_IFDEF(assert(m_symbolp[code]););
     m_owner.emitTimeChangeMaybe();
     uint64_t newval_u64;
     std::memcpy(&newval_u64, &newval, sizeof(newval_u64));
diff --git a/include/verilated_fst_c.h b/include/verilated_fst_c.h
index 387244d37..008698364 100644
--- a/include/verilated_fst_c.h
+++ b/include/verilated_fst_c.h
@@ -231,11 +231,11 @@ class VerilatedFstBuffer VL_NOT_FINAL {
     // called from only one place (the full* methods), so always inline them.
     VL_ATTR_ALWINLINE void emitEvent(uint32_t code);
     VL_ATTR_ALWINLINE void emitBit(uint32_t code, CData newval);
-    VL_ATTR_ALWINLINE void emitCData(uint32_t code, CData newval, int bits);
-    VL_ATTR_ALWINLINE void emitSData(uint32_t code, SData newval, int bits);
-    VL_ATTR_ALWINLINE void emitIData(uint32_t code, IData newval, int bits);
-    VL_ATTR_ALWINLINE void emitQData(uint32_t code, QData newval, int bits);
-    VL_ATTR_ALWINLINE void emitWData(uint32_t code, const WData* newvalp, int bits);
+    VL_ATTR_ALWINLINE void emitCData(uint32_t code, CData newval, int);
+    VL_ATTR_ALWINLINE void emitSData(uint32_t code, SData newval, int);
+    VL_ATTR_ALWINLINE void emitIData(uint32_t code, IData newval, int);
+    VL_ATTR_ALWINLINE void emitQData(uint32_t code, QData newval, int);
+    VL_ATTR_ALWINLINE void emitWData(uint32_t code, const WData* newvalp, int);
     VL_ATTR_ALWINLINE void emitDouble(uint32_t code, double newval);
 };
 

From c2540f05771961c21d95f77f35c80126e6f999d1 Mon Sep 17 00:00:00 2001
From: Yu-Sheng Lin <johnjohnlys@gmail.com>
Date: Thu, 26 Mar 2026 00:23:11 +0800
Subject: [PATCH 7/8] Make MSVC version testable

---
 include/fstcpp/fstcpp.h                     | 11 +++++++
 include/fstcpp/fstcpp_assertion.h           | 10 ++----
 include/fstcpp/fstcpp_stream_write_helper.h | 28 ++++++++--------
 include/fstcpp/fstcpp_variable_info.h       | 36 +++++++++++++--------
 4 files changed, 52 insertions(+), 33 deletions(-)

diff --git a/include/fstcpp/fstcpp.h b/include/fstcpp/fstcpp.h
index 5eb0c7414..e17b33766 100644
--- a/include/fstcpp/fstcpp.h
+++ b/include/fstcpp/fstcpp.h
@@ -13,6 +13,17 @@
 #include <string>
 // Other libraries' .h files.
 // Your project's .h files.
+#if defined(MSC_VER_) || defined(FORCE_MSC_VER_)
+#	define USE_GCC_INTRINSIC 0
+// Note: we do not support MSVC intrinsic for now
+#	define USE_MSVC_INTRINSIC 0
+#elif defined(__GNUC__) || defined(__clang__)
+#	define USE_GCC_INTRINSIC 1
+#	define USE_MSVC_INTRINSIC 0
+#else
+#	define USE_GCC_INTRINSIC 0
+#	define USE_MSVC_INTRINSIC 0
+#endif
 
 // Remove these when we upgrade to C++20
 #pragma GCC diagnostic ignored "-Wpragmas"
diff --git a/include/fstcpp/fstcpp_assertion.h b/include/fstcpp/fstcpp_assertion.h
index 1f7265b13..e9606f665 100644
--- a/include/fstcpp/fstcpp_assertion.h
+++ b/include/fstcpp/fstcpp_assertion.h
@@ -102,16 +102,12 @@
 
 // Compatibility layer for unreachable code hint
 #if defined(__cplusplus) && __cplusplus >= 202302L
-// Prefer the standard library version if available
 #	include <utility>
 #	define FST_UNREACHABLE std::unreachable()
-#elif defined(__GNUC__) || defined(__clang__)
-// --- GCC / Clang ---
+#elif USE_GCC_INTRINSIC
 #	define FST_UNREACHABLE __builtin_unreachable()
-#elif defined(_MSC_VER)
-// --- MSVC ---
-#	define FST_UNREACHABLE __assume(0)
+// TODO: implement MSVC version
+// #elif USE_MSVC_INTRINSIC
 #else
-// --- Fallback ---
 #	define FST_UNREACHABLE std::abort()
 #endif
diff --git a/include/fstcpp/fstcpp_stream_write_helper.h b/include/fstcpp/fstcpp_stream_write_helper.h
index e3b9158ba..ad2999962 100644
--- a/include/fstcpp/fstcpp_stream_write_helper.h
+++ b/include/fstcpp/fstcpp_stream_write_helper.h
@@ -6,10 +6,10 @@
 #pragma once
 // direct include
 // C system headers
-#ifdef _MSC_VER
-#	include <intrin.h>
-#endif
 // C++ standard library headers
+#if defined(__cplusplus) && __cplusplus >= 202302L
+#	include <bit>
+#endif
 #include <cstdint>
 #include <cstring>
 #include <vector>
@@ -28,22 +28,24 @@ namespace platform {
 // clang-format off
 template <typename U> U to_big_endian(U u) { return u; }
 #else
-#if defined(__GNUC__) || defined(__clang__)
+#if defined(__cplusplus) && __cplusplus >= 202302L
+template <typename U, size_t S>
+U to_big_endian(U u, std::integral_constant<size_t, S>) {
+	return std::byteswap(u);
+}
+#elif USE_GCC_INTRINSIC
 template<typename U> U to_big_endian(U u, std::integral_constant<size_t, 1>) { return u; }
 template<typename U> U to_big_endian(U u, std::integral_constant<size_t, 2>) { return __builtin_bswap16(u); }
 template<typename U> U to_big_endian(U u, std::integral_constant<size_t, 4>) { return __builtin_bswap32(u); }
 template<typename U> U to_big_endian(U u, std::integral_constant<size_t, 8>) { return __builtin_bswap64(u); }
-#elif defined(_MSC_VER) // MSVC
-template<typename U> U to_big_endian(U u, std::integral_constant<size_t, 1>) { return u; }
-template<typename U> U to_big_endian(U u, std::integral_constant<size_t, 2>) { return _byteswap_ushort(u); }
-template<typename U> U to_big_endian(U u, std::integral_constant<size_t, 4>) { return _byteswap_ulong(u); }
-template<typename U> U to_big_endian(U u, std::integral_constant<size_t, 8>) { return _byteswap_uint64(u); }
+// TODO: implement MSVC version
+// #elif USE_MSVC_INTRINSIC
 #else
-template<typename U, size_t S> U to_big_endian(U u, std::integral_constant<size_t, S>) {
-	U ret{ 0 };
+template <typename U, size_t S>
+U to_big_endian(U u, std::integral_constant<size_t, S>) {
+	U ret{0};
 	for (size_t i = 0; i < S; ++i) {
-		ret |= u & 0xff;
-		ret <<= 8;
+		ret = (ret << 8) | (u & 0xff);
 		u >>= 8;
 	}
 	return ret;
diff --git a/include/fstcpp/fstcpp_variable_info.h b/include/fstcpp/fstcpp_variable_info.h
index f707aaaa8..470bb8ef1 100644
--- a/include/fstcpp/fstcpp_variable_info.h
+++ b/include/fstcpp/fstcpp_variable_info.h
@@ -7,10 +7,10 @@
 // direct include
 #include "fstcpp/fstcpp.h"
 // C system headers
-#ifdef _MSC_VER
-#	include <intrin.h>
-#endif
 // C++ standard library headers
+#if defined(__cplusplus) && __cplusplus >= 202002L
+#	include <bit>
+#endif
 #include <algorithm>
 #include <cstdint>
 #include <limits>
@@ -26,19 +26,29 @@ namespace platform {
 
 // Can be replaced with std::bit_width when C++20 is available
 inline uint64_t clog2(uint64_t x) {
-#if defined(__GNUC__) || defined(__clang__)
-	return 64 - __builtin_clzll(x - 1);
-#elif defined(_MSC_VER)  // MSVC
 	if (x <= 1) return 0;
-	unsigned long index;
-	_BitScanReverse64(&index, x - 1);
-	return static_cast<uint64_t>(index + 1);
+#if defined(__cplusplus) && __cplusplus >= 202002L
+	return std::bit_width(x - 1);
+#elif USE_GCC_INTRINSIC
+	return 64 - __builtin_clzll(x - 1);
+// TODO: implement MSVC version
+// #elif USE_MSVC_INTRINSIC
 #else
 	uint64_t r = 0;
-	while (x > 1) {
-		x >>= 1;
-		r++;
-	}
+	x -= 1;
+	auto CheckAndShift = [&](uint64_t shift) {
+		if (x >> shift) {
+			r += shift;
+			x >>= shift;
+		}
+	};
+	CheckAndShift(32);
+	CheckAndShift(16);
+	CheckAndShift(8);
+	CheckAndShift(4);
+	CheckAndShift(2);
+	CheckAndShift(1);
+	r += x;
 	return r;
 #endif
 }

From fc28f76ef41752597ec5946b4a8d3fdda1ac8273 Mon Sep 17 00:00:00 2001
From: github action <action@example.com>
Date: Wed, 25 Mar 2026 16:24:13 +0000
Subject: [PATCH 8/8] Apply 'make format'

---
 include/verilated_fst_c.cpp | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/include/verilated_fst_c.cpp b/include/verilated_fst_c.cpp
index 61fcd4897..7e84aec0f 100644
--- a/include/verilated_fst_c.cpp
+++ b/include/verilated_fst_c.cpp
@@ -180,23 +180,28 @@ void VerilatedFst::pushPrefix(const char* namep, VerilatedTracePrefixType type,
         m_fst->setScope(fst::Hierarchy::ScopeType::VCD_INTERFACE, name, std::string{});
         break;
     case VerilatedTracePrefixType::STRUCT_PACKED:
-        m_fst->setAttrBegin(fst::Hierarchy::AttrType::PACK, fst::Hierarchy::AttrSubType::PACK_PACKED, "members", l);
+        m_fst->setAttrBegin(fst::Hierarchy::AttrType::PACK,
+                            fst::Hierarchy::AttrSubType::PACK_PACKED, "members", l);
         m_fst->setScope(fst::Hierarchy::ScopeType::VCD_STRUCT, name, std::string{});
         break;
     case VerilatedTracePrefixType::STRUCT_UNPACKED:
-        m_fst->setAttrBegin(fst::Hierarchy::AttrType::PACK, fst::Hierarchy::AttrSubType::PACK_UNPACKED, "members", l);
+        m_fst->setAttrBegin(fst::Hierarchy::AttrType::PACK,
+                            fst::Hierarchy::AttrSubType::PACK_UNPACKED, "members", l);
         m_fst->setScope(fst::Hierarchy::ScopeType::VCD_STRUCT, name, std::string{});
         break;
     case VerilatedTracePrefixType::UNION_PACKED:
-        m_fst->setAttrBegin(fst::Hierarchy::AttrType::PACK, fst::Hierarchy::AttrSubType::PACK_PACKED, "members", l);
+        m_fst->setAttrBegin(fst::Hierarchy::AttrType::PACK,
+                            fst::Hierarchy::AttrSubType::PACK_PACKED, "members", l);
         m_fst->setScope(fst::Hierarchy::ScopeType::VCD_UNION, name, std::string{});
         break;
     case VerilatedTracePrefixType::ARRAY_PACKED:
-        m_fst->setAttrBegin(fst::Hierarchy::AttrType::ARRAY, fst::Hierarchy::AttrSubType::ARRAY_PACKED, "bounds", lr);
+        m_fst->setAttrBegin(fst::Hierarchy::AttrType::ARRAY,
+                            fst::Hierarchy::AttrSubType::ARRAY_PACKED, "bounds", lr);
         m_fst->setScope(fst::Hierarchy::ScopeType::SV_ARRAY, name, std::string{});
         break;
     case VerilatedTracePrefixType::ARRAY_UNPACKED:
-        m_fst->setAttrBegin(fst::Hierarchy::AttrType::ARRAY, fst::Hierarchy::AttrSubType::ARRAY_UNPACKED, "bounds", lr);
+        m_fst->setAttrBegin(fst::Hierarchy::AttrType::ARRAY,
+                            fst::Hierarchy::AttrSubType::ARRAY_UNPACKED, "bounds", lr);
         m_fst->setScope(fst::Hierarchy::ScopeType::SV_ARRAY, name, std::string{});
         break;
     default: break;
@@ -230,9 +235,7 @@ void VerilatedFst::declare(uint32_t code, const char* name, int dtypenum,
     if (bussed) name_ss << " [" << msb << ":" << lsb << "]";
     const std::string name_str = name_ss.str();
 
-    if (dtypenum > 0) {
-        m_fst->emitEnumTableRef(m_local2fstdtype.at(initUserp()).at(dtypenum));
-    }
+    if (dtypenum > 0) { m_fst->emitEnumTableRef(m_local2fstdtype.at(initUserp()).at(dtypenum)); }
 
     fst::Hierarchy::VarDirection varDir = fst::Hierarchy::VarDirection::IMPLICIT;
     switch (direction) {