From 59cb53cfbc18b8317e06f07b13d0f9a9821b79ba Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Fri, 14 Mar 2025 14:06:51 +0000 Subject: [PATCH] Set trigger vector in whole words (#5857) Having many triggers still hits a bottleneck in LLVM leading to long compile times. Instead of setting triggers bit-wise, set them as a whole 64-bit word when possible. This improves C++ compile times by ~4x on some large designs and has minor run-time performance benefit. --- include/verilated_types.h | 7 ++- src/V3AstNodes.cpp | 3 +- src/V3Sched.cpp | 57 +++++++++++++++++++--- test_regress/t/t_json_only_debugcheck.out | 2 +- test_regress/t/t_scheduling_many_clocks.py | 20 ++++++++ test_regress/t/t_scheduling_many_clocks.v | 55 +++++++++++++++++++++ test_regress/t/t_xml_debugcheck.out | 2 +- 7 files changed, 135 insertions(+), 11 deletions(-) create mode 100755 test_regress/t/t_scheduling_many_clocks.py create mode 100644 test_regress/t/t_scheduling_many_clocks.v diff --git a/include/verilated_types.h b/include/verilated_types.h index cd5ea552e..bbda7da92 100644 --- a/include/verilated_types.h +++ b/include/verilated_types.h @@ -184,8 +184,11 @@ public: // Word at given 'wordIndex' uint64_t word(size_t wordIndex) const { return m_flags[wordIndex]; } - // Set specified flag to given value - void set(size_t index, bool value) { + // Set specified word to given value + void setWord(size_t wordIndex, uint64_t value) { m_flags[wordIndex] = value; } + + // Set specified bit to given value + void setBit(size_t index, bool value) { uint64_t& w = m_flags[index / 64]; const size_t bitIndex = index % 64; w &= ~(1ULL << bitIndex); diff --git a/src/V3AstNodes.cpp b/src/V3AstNodes.cpp index 3172ac4ba..13c4b0f87 100644 --- a/src/V3AstNodes.cpp +++ b/src/V3AstNodes.cpp @@ -3018,7 +3018,8 @@ void AstCMethodHard::setPurity() { {"resume", false}, {"reverse", false}, {"rsort", false}, - {"set", false}, + {"setBit", false}, + {"setWord", false}, {"set_randmode", false}, {"shuffle", false}, {"size", true}, diff --git a/src/V3Sched.cpp b/src/V3Sched.cpp index 5a619e9b7..b0fcaf7c2 100644 --- a/src/V3Sched.cpp +++ b/src/V3Sched.cpp @@ -501,7 +501,7 @@ struct TriggerKit final { void addFirstIterationTriggerAssignment(AstVarScope* flagp, uint32_t index) const { FileLine* const flp = flagp->fileline(); AstVarRef* const vrefp = new AstVarRef{flp, m_vscp, VAccess::WRITE}; - AstCMethodHard* const callp = new AstCMethodHard{flp, vrefp, "set"}; + AstCMethodHard* const callp = new AstCMethodHard{flp, vrefp, "setBit"}; callp->addPinsp(new AstConst{flp, index}); callp->addPinsp(new AstVarRef{flp, flagp, VAccess::READ}); callp->dtypeSetVoid(); @@ -512,7 +512,7 @@ struct TriggerKit final { void addExtraTriggerAssignment(AstVarScope* extraTriggerVscp, uint32_t index) const { FileLine* const flp = extraTriggerVscp->fileline(); AstVarRef* const vrefp = new AstVarRef{flp, m_vscp, VAccess::WRITE}; - AstCMethodHard* const callp = new AstCMethodHard{flp, vrefp, "set"}; + AstCMethodHard* const callp = new AstCMethodHard{flp, vrefp, "setBit"}; callp->addPinsp(new AstConst{flp, index}); callp->addPinsp(new AstVarRef{flp, extraTriggerVscp, VAccess::READ}); callp->dtypeSetVoid(); @@ -651,9 +651,9 @@ const TriggerKit createTriggers(AstNetlist* netlistp, AstCFunc* const initFuncp, } // Set the given trigger to the given value - const auto setTrig = [&](uint32_t index, AstNodeExpr* valp) { + const auto setTrigBit = [&](uint32_t index, AstNodeExpr* valp) { AstVarRef* const vrefp = new AstVarRef{flp, vscp, VAccess::WRITE}; - AstCMethodHard* const callp = new AstCMethodHard{flp, vrefp, "set"}; + AstCMethodHard* const callp = new AstCMethodHard{flp, vrefp, "setBit"}; callp->addPinsp(new AstConst{flp, index}); callp->addPinsp(valp); callp->dtypeSetVoid(); @@ -694,9 +694,14 @@ const TriggerKit createTriggers(AstNetlist* netlistp, AstCFunc* const initFuncp, // Add trigger computation uint32_t triggerNumber = extraTriggers.size(); + uint32_t triggerBitIdx = triggerNumber; AstNodeStmt* initialTrigsp = nullptr; std::vector senItemIndex2TriggerIndex; senItemIndex2TriggerIndex.reserve(senItemps.size()); + constexpr uint32_t TRIG_VEC_WORD_SIZE_LOG2 = 6; // 64-bits + constexpr uint32_t TRIG_VEC_WORD_SIZE = 1 << TRIG_VEC_WORD_SIZE_LOG2; + std::vector trigExprps; + trigExprps.reserve(TRIG_VEC_WORD_SIZE); for (const AstSenItem* const senItemp : senItemps) { UASSERT_OBJ(senItemp->isClocked() || senItemp->isHybrid(), senItemp, "Cannot create trigger expression for non-clocked sensitivity"); @@ -706,12 +711,12 @@ const TriggerKit createTriggers(AstNetlist* netlistp, AstCFunc* const initFuncp, // Add the trigger computation const auto& pair = senExprBuilder.build(senItemp); - funcp->addStmtsp(setTrig(triggerNumber, pair.first)); + trigExprps.emplace_back(pair.first); // Add initialization time trigger if (pair.second || v3Global.opt.xInitialEdge()) { initialTrigsp - = AstNode::addNext(initialTrigsp, setTrig(triggerNumber, new AstConst{flp, 1})); + = AstNode::addNext(initialTrigsp, setTrigBit(triggerNumber, new AstConst{flp, 1})); } // Add a debug statement for this trigger @@ -723,7 +728,47 @@ const TriggerKit createTriggers(AstNetlist* netlistp, AstCFunc* const initFuncp, // ++triggerNumber; + + // Add statements on every word boundary + if (triggerNumber % TRIG_VEC_WORD_SIZE == 0) { + if (triggerBitIdx % TRIG_VEC_WORD_SIZE != 0) { + // Set leading triggers bit-wise + for (AstNodeExpr* const exprp : trigExprps) { + funcp->addStmtsp(setTrigBit(triggerBitIdx++, exprp)); + } + } else { + // Set whole word as a unit + UASSERT_OBJ(triggerNumber == triggerBitIdx + TRIG_VEC_WORD_SIZE, senItemp, + "Mismatched index"); + UASSERT_OBJ(trigExprps.size() == TRIG_VEC_WORD_SIZE, senItemp, + "There should be TRIG_VEC_WORD_SIZE expressions"); + // Concatenate all bits in a tree + for (uint32_t level = 0; level < TRIG_VEC_WORD_SIZE_LOG2; ++level) { + const uint32_t stride = 1 << level; + for (uint32_t i = 0; i < TRIG_VEC_WORD_SIZE; i += 2 * stride) { + trigExprps[i] = new AstConcat{trigExprps[i]->fileline(), + trigExprps[i + stride], trigExprps[i]}; + trigExprps[i + stride] = nullptr; + } + } + // Set the whole word in the trigger vector + AstVarRef* const vrefp = new AstVarRef{flp, vscp, VAccess::WRITE}; + AstCMethodHard* const callp = new AstCMethodHard{flp, vrefp, "setWord"}; + callp->addPinsp(new AstConst{flp, triggerBitIdx / TRIG_VEC_WORD_SIZE}); + callp->addPinsp(trigExprps[0]); + callp->dtypeSetVoid(); + funcp->addStmtsp(callp->makeStmt()); + triggerBitIdx += TRIG_VEC_WORD_SIZE; + } + UASSERT_OBJ(triggerNumber == triggerBitIdx, senItemp, "Mismatched index"); + trigExprps.clear(); + } } + // Set trailing triggers bit-wise + for (AstNodeExpr* const exprp : trigExprps) { + funcp->addStmtsp(setTrigBit(triggerBitIdx++, exprp)); + } + trigExprps.clear(); // Construct the map from old SenTrees to new SenTrees for (const AstSenTree* const senTreep : senTreeps) { diff --git a/test_regress/t/t_json_only_debugcheck.out b/test_regress/t/t_json_only_debugcheck.out index b8e816102..70189abaa 100644 --- a/test_regress/t/t_json_only_debugcheck.out +++ b/test_regress/t/t_json_only_debugcheck.out @@ -993,7 +993,7 @@ "stmtsp": [ {"type":"STMTEXPR","name":"","addr":"(CP)","loc":"d,11:8,11:9", "exprp": [ - {"type":"CMETHODHARD","name":"set","addr":"(DP)","loc":"d,11:8,11:9","dtypep":"(CB)", + {"type":"CMETHODHARD","name":"setBit","addr":"(DP)","loc":"d,11:8,11:9","dtypep":"(CB)", "fromp": [ {"type":"VARREF","name":"__VactTriggered","addr":"(EP)","loc":"d,11:8,11:9","dtypep":"(NB)","access":"WR","varp":"(U)","varScopep":"UNLINKED","classOrPackagep":"UNLINKED"} ], diff --git a/test_regress/t/t_scheduling_many_clocks.py b/test_regress/t/t_scheduling_many_clocks.py new file mode 100755 index 000000000..5c1d7aad8 --- /dev/null +++ b/test_regress/t/t_scheduling_many_clocks.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2025 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.sim_time = 100000 + +test.scenarios('simulator') + +test.compile() + +test.execute() + +test.passes() diff --git a/test_regress/t/t_scheduling_many_clocks.v b/test_regress/t/t_scheduling_many_clocks.v new file mode 100644 index 000000000..04aeccb45 --- /dev/null +++ b/test_regress/t/t_scheduling_many_clocks.v @@ -0,0 +1,55 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2025 by Wilson Snyder. +// SPDX-License-Identifier: CC0-1.0 + +`define stop $stop +`define checkh(gotv,expv) do if ((gotv) !== (expv)) begin $write("%%Error: %s:%0d: got=%0x exp=%0x (%s !== %s)\n", `__FILE__,`__LINE__, (gotv), (expv), `"gotv`", `"expv`"); `stop; end while(0); + +module t(/*AUTOARG*/ + // Inputs + clk + ); + + input clk; + + localparam int ITERATIONS = 5; + localparam int N = 227; + + logic [N-1:0] gclk = {N{1'b0}}; + + // Not actually used, but creates an extra internal trigger + export "DPI-C" function toggle; + function void toggle(); + gclk = ~gclk; + endfunction + + int cyc = 0; + always @(posedge clk) begin + if (~|gclk) begin + gclk[0] = 1'b1; + end else begin + gclk = {gclk[N-2:0], gclk[N-1]}; + end + + cyc <= cyc + 32'd1; + if (cyc == ITERATIONS*N - 1) begin + $display("cyc"); + $write("*-* All Finished *-*\n"); + $finish; + end + end + + for (genvar n = 0; n < N; n++) begin : gen + int cnt = 0; + always @(posedge gclk[n]) cnt <= cnt + 1; + + wire int cnt_plus_one = cnt + 1; + + final begin + `checkh(cnt_plus_one, ITERATIONS + 1); + end + end + +endmodule diff --git a/test_regress/t/t_xml_debugcheck.out b/test_regress/t/t_xml_debugcheck.out index d962b2382..9b6d889fd 100644 --- a/test_regress/t/t_xml_debugcheck.out +++ b/test_regress/t/t_xml_debugcheck.out @@ -601,7 +601,7 @@ - +