verilator/src/V3SchedPartition.cpp

432 lines
17 KiB
C++
Raw Normal View History

// -*- mode: C++; c-file-style: "cc-mode" -*-
//*************************************************************************
// DESCRIPTION: Verilator: Scheduling - partitioning
//
// Code available from: https://verilator.org
//
//*************************************************************************
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of either the GNU Lesser General Public License Version 3
// or the Perl Artistic License Version 2.0.
// SPDX-FileCopyrightText: 2003-2026 Wilson Snyder
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
//
//*************************************************************************
//
// V3SchedPartition (and in particular V3Sched::partition) partitions all
// logic into two regions, the 'act' region contains all logic that might
// compute a clock via an update even that falls into the SystemVerilog Active
// scheduling region (that is: blocking and continuous assignments in
// particular). All other logic is assigned to the 'nba' region.
//
// To achieve this, we build a dependency graph of all logic in the design,
// and trace back from every AstSenItem through all logic that might (via an
// Active region update) feed into triggering that AstSenItem. Any such logic
// is then assigned to the 'act' region, and all other logic is assigned to
// the 'nba' region.
//
// For later practical purposes, AstAlwaysPre logic that would be assigned to
// the 'act' region is returned separately. Nevertheless, this logic is part of
// the 'act' region.
//
// For more details, please see the internals documentation.
//
//*************************************************************************
#include "V3PchAstNoMT.h" // VL_MT_DISABLED_CODE_UNIT
2022-08-05 13:15:59 +02:00
#include "V3EmitV.h"
#include "V3Graph.h"
2022-08-05 13:15:59 +02:00
#include "V3Sched.h"
#include <tuple>
#include <unordered_map>
#include <vector>
2022-09-22 18:28:42 +02:00
VL_DEFINE_DEBUG_FUNCTIONS;
namespace V3Sched {
namespace {
class SchedSenVertex final : public V3GraphVertex {
VL_RTTI_IMPL(SchedSenVertex, V3GraphVertex)
const AstSenItem* const m_senItemp;
public:
SchedSenVertex(V3Graph* graphp, const AstSenItem* senItemp)
: V3GraphVertex{graphp}
, m_senItemp{senItemp} {}
2022-05-16 21:02:49 +02:00
// LCOV_EXCL_START // Debug code
string name() const override {
std::ostringstream os;
V3EmitV::verilogForTree(const_cast<AstSenItem*>(m_senItemp), os);
return os.str();
}
string dotShape() const override { return "doubleoctagon"; }
string dotColor() const override { return "red"; }
2022-05-16 21:02:49 +02:00
// LCOV_EXCL_STOP
};
class SchedLogicVertex final : public V3GraphVertex {
VL_RTTI_IMPL(SchedLogicVertex, V3GraphVertex)
AstScope* const m_scopep;
AstSenTree* const m_senTreep;
AstNode* const m_logicp;
public:
SchedLogicVertex(V3Graph* graphp, AstScope* scopep, AstSenTree* senTreep, AstNode* logicp)
: V3GraphVertex{graphp}
, m_scopep{scopep}
, m_senTreep{senTreep}
, m_logicp{logicp} {}
AstScope* scopep() const { return m_scopep; }
AstSenTree* senTreep() const { return m_senTreep; }
AstNode* logicp() const { return m_logicp; }
2022-05-16 21:02:49 +02:00
// LCOV_EXCL_START // Debug code
string name() const override VL_MT_STABLE {
return m_logicp->typeName() + ("\n" + m_logicp->fileline()->ascii());
};
string dotShape() const override { return "rectangle"; }
2022-05-16 21:02:49 +02:00
// LCOV_EXCL_STOP
};
class SchedVarVertex final : public V3GraphVertex {
VL_RTTI_IMPL(SchedVarVertex, V3GraphVertex)
const AstVarScope* const m_vscp;
public:
SchedVarVertex(V3Graph* graphp, AstVarScope* vscp)
: V3GraphVertex{graphp}
, m_vscp{vscp} {}
2022-05-16 21:02:49 +02:00
Deprecate clocker attribute and --clk option (#6463) The only use for the clocker attribute and the AstVar::isUsedClock that is actually necessary today for correctness is to mark top level inputs of --lib-create blocks as being (or driving) a clock signal. Correctness of --lib-create (and hence hierarchical blocks) actually used to depend on having the right optimizations eliminate intermediate clocks (e.g.: V3Gate), when the top level port was not used directly in a sensitivity list, or marking top level signals manually via --clk or the clocker attribute. However V3Sched::partition already needs to trace through the logic to figure out what signals might drive a sensitivity list, so it can very easily mark all top level inputs as such. In this patch we remove the AstVar::attrClocker and AstVar::isUsedClock attributes, and replace them with AstVar::isPrimaryClock, automatically set by V3Sched::partition. This eliminates all need for manual annotation so we are deprecating the --clk/--no-clk options and the clocker/no_clocker attributes. This also eliminates the opportunity for any further mis-optimization similar to #6453. Regarding the other uses of the removed AstVar attributes: - As of 5.000, initial edges are triggered via a separate mechanism applied in V3Sched, so the use in V3EmitCFunc.cpp is redundant - Also as of 5.000, we can handle arbitrary sensitivity expressions, so the restriction on eliminating clock signals in V3Gate is unnecessary - Since the recent change when Dfg is applied after V3Scope, it does perform the equivalent of GateClkDecomp, so we can delete that pass.
2025-09-20 16:50:22 +02:00
AstVar* varp() const { return m_vscp->varp(); }
2022-05-16 21:02:49 +02:00
// LCOV_EXCL_START // Debug code
string name() const override VL_MT_STABLE { return m_vscp->name(); }
string dotShape() const override {
return m_vscp->scopep()->isTop() && m_vscp->varp()->isNonOutput() ? "invhouse" : "ellipse";
}
string dotColor() const override {
return m_vscp->scopep()->isTop() && m_vscp->varp()->isNonOutput() ? "green" : "black";
}
2022-05-16 21:02:49 +02:00
// LCOV_EXCL_STOP
};
class SchedGraphBuilder final : public VNVisitor {
// NODE STATE
// AstVarScope::user1() -> SchedVarVertex
// AstSenItem::user1p() -> SchedSenVertex
// AstVarScope::user2() -> bool: Read of this AstVarScope triggers this logic.
// Used only for hybrid logic.
const VNUser1InUse m_user1InUse;
const VNUser2InUse m_user2InUse;
// STATE
V3Graph* const m_graphp = new V3Graph; // The dataflow graph being built
// The vertices associated with a unique AstSenItem
std::unordered_map<VNRef<AstSenItem>, SchedSenVertex*> m_senVertices;
AstScope* m_scopep = nullptr; // AstScope of the current AstActive
AstSenTree* m_senTreep = nullptr; // AstSenTree of the current AstActive
// Predicate for whether a read of the given variable triggers this block
std::function<bool(AstVarScope*)> m_readTriggersThisLogic;
// The DPI export trigger variable, if any
AstVarScope* const m_dpiExportTriggerp = v3Global.rootp()->dpiExportTriggerp();
SchedVarVertex* getVarVertex(AstVarScope* vscp) const {
if (!vscp->user1p()) {
SchedVarVertex* const vtxp = new SchedVarVertex{m_graphp, vscp};
// If this variable can be written via a DPI export, add a source edge from the
// DPI export trigger vertex. This ensures calls to DPI exports that might write a
// clock end up in the 'act' region.
if (vscp->varp()->isWrittenByDpi()) {
new V3GraphEdge{m_graphp, getVarVertex(m_dpiExportTriggerp), vtxp, 1};
}
vscp->user1p(vtxp);
}
return vscp->user1u().to<SchedVarVertex*>();
}
SchedSenVertex* getSenVertex(AstSenItem* senItemp) {
if (!senItemp->user1p()) {
// There is a unique SchedSenVertex for each globally unique AstSenItem. Multiple
// AstSenTree might use the same AstSenItem (e.g.: posedge clk1 or rst, posedge clk2 or
// rst), so we use a hash map to get the unique SchedSenVertex. (Note: This creates
// separate vertices for ET_CHANGED and ET_HYBRID over the same expression, but that is
// OK for now).
const auto pair = m_senVertices.emplace(*senItemp, nullptr);
// If it does not exist, create it
if (pair.second) {
// Create the vertex
SchedSenVertex* const vtxp = new SchedSenVertex{m_graphp, senItemp};
// Connect up the variable references
if (senItemp->sensp()) {
senItemp->sensp()->foreach([&](AstVarRef* refp) {
new V3GraphEdge{m_graphp, getVarVertex(refp->varScopep()), vtxp, 1};
});
}
// Store back to hash map so we can find it next time
pair.first->second = vtxp;
}
// Cache sensitivity vertex
senItemp->user1p(pair.first->second);
}
return senItemp->user1u().to<SchedSenVertex*>();
}
void visitLogic(AstNode* nodep) {
UASSERT_OBJ(m_senTreep, nodep, "Should be under AstActive");
SchedLogicVertex* const logicVtxp
= new SchedLogicVertex{m_graphp, m_scopep, m_senTreep, nodep};
// Clocked or hybrid logic has explicit sensitivity, so add edge from sensitivity vertex
if (!m_senTreep->hasCombo()) {
2023-11-06 13:13:31 +01:00
m_senTreep->foreach([this, nodep, logicVtxp](AstSenItem* senItemp) {
UASSERT_OBJ(senItemp->isClocked() || senItemp->isHybrid(), nodep,
"Non-clocked SenItem under clocked SenTree");
V3GraphVertex* const eventVtxp = getSenVertex(senItemp);
new V3GraphEdge{m_graphp, eventVtxp, logicVtxp, 10};
});
}
// Add edges based on references
2023-11-06 13:13:31 +01:00
nodep->foreach([this, logicVtxp](const AstVarRef* vrefp) {
AstVarScope* const vscp = vrefp->varScopep();
if (vrefp->access().isReadOrRW() && m_readTriggersThisLogic(vscp)) {
new V3GraphEdge{m_graphp, getVarVertex(vscp), logicVtxp, 10};
}
if (vrefp->access().isWriteOrRW() && !vrefp->varp()->ignoreSchedWrite()) {
new V3GraphEdge{m_graphp, logicVtxp, getVarVertex(vscp), 10};
}
});
// If the logic calls a 'context' DPI import, it might fire the DPI Export trigger
if (m_dpiExportTriggerp) {
2023-11-06 13:13:31 +01:00
nodep->foreach([this, logicVtxp](const AstCCall* callp) {
if (!callp->funcp()->dpiImportWrapper()) return;
if (!callp->funcp()->dpiContext()) return;
new V3GraphEdge{m_graphp, logicVtxp, getVarVertex(m_dpiExportTriggerp), 10};
});
}
}
// VISIT methods
2022-09-16 17:15:10 +02:00
void visit(AstActive* nodep) override {
AstSenTree* const senTreep = nodep->sentreep();
UASSERT_OBJ(senTreep->hasClocked() || senTreep->hasCombo() || senTreep->hasHybrid(), nodep,
"Unhandled");
UASSERT_OBJ(!m_senTreep, nodep, "Should not nest");
// Mark explicit sensitivities as not triggering these blocks
if (senTreep->hasHybrid()) {
AstNode::user2ClearTree();
senTreep->foreach([](const AstVarRef* refp) { //
refp->varScopep()->user2(true);
});
}
VL_RESTORER(m_senTreep);
m_senTreep = senTreep;
iterateChildrenConst(nodep);
}
2022-09-16 17:15:10 +02:00
void visit(AstNodeProcedure* nodep) override { visitLogic(nodep); }
void visit(AstNodeAssign* nodep) override { visitLogic(nodep); }
void visit(AstCoverToggle* nodep) override { visitLogic(nodep); }
// Pre and Post logic are handled separately
void visit(AstAlwaysPre* nodep) override {}
2022-09-16 17:15:10 +02:00
void visit(AstAlwaysPost* nodep) override {}
2022-05-16 21:02:49 +02:00
// LCOV_EXCL_START
// Ignore
2022-09-16 17:15:10 +02:00
void visit(AstInitialStatic* nodep) override { nodep->v3fatalSrc("Should not need ordering"); }
void visit(AstInitial* nodep) override { //
nodep->v3fatalSrc("Should not need ordering");
}
2022-09-16 17:15:10 +02:00
void visit(AstFinal* nodep) override { //
nodep->v3fatalSrc("Should not need ordering");
2022-05-16 21:02:49 +02:00
}
2022-05-16 21:02:49 +02:00
// Default - Any other AstActive content not handled above will hit this
2022-09-16 17:15:10 +02:00
void visit(AstNode* nodep) override { //
2022-12-23 17:32:38 +01:00
nodep->v3fatalSrc("Should be handled above");
2022-05-16 21:02:49 +02:00
}
// LCOV_EXCL_STOP
SchedGraphBuilder(const LogicByScope& clockedLogic, const LogicByScope& combinationalLogic,
const LogicByScope& hybridLogic) {
// Build the data flow graph
const auto iter = [this](const LogicByScope& lbs) {
for (const auto& pair : lbs) {
m_scopep = pair.first;
iterate(pair.second);
m_scopep = nullptr;
}
};
// Clocked logic is never triggered by reads
m_readTriggersThisLogic = [](AstVarScope*) { return false; };
iter(clockedLogic);
// Combinational logic is always triggered by reads
m_readTriggersThisLogic = [](AstVarScope*) { return true; };
iter(combinationalLogic);
// Hybrid logic is triggered by all reads, except for reads of the explicit sensitivities
m_readTriggersThisLogic = [](AstVarScope* vscp) { return !vscp->user2(); };
iter(hybridLogic);
}
public:
// Build the dataflow graph for partitioning
static std::unique_ptr<V3Graph> build(const LogicByScope& clockedLogic,
const LogicByScope& combinationalLogic,
const LogicByScope& hybridLogic) {
SchedGraphBuilder visitor{clockedLogic, combinationalLogic, hybridLogic};
return std::unique_ptr<V3Graph>{visitor.m_graphp};
}
};
2024-03-26 00:06:25 +01:00
void colorActiveRegion(V3Graph& graph) {
// Work queue for depth first traversal
std::vector<V3GraphVertex*> queue{};
// Trace from all SchedSenVertex
2024-03-26 00:06:25 +01:00
for (V3GraphVertex& vtx : graph.vertices()) {
if (const auto activeEventVtxp = vtx.cast<SchedSenVertex>()) {
queue.push_back(activeEventVtxp);
}
}
// Depth first traversal
while (!queue.empty()) {
// Pop next work item
V3GraphVertex& vtx = *queue.back();
queue.pop_back();
// If not first encounter, move on
if (vtx.color() != 0) continue;
// Mark vertex as being in active region
vtx.color(1);
// Enqueue all parent vertices that feed this vertex.
2024-03-26 00:06:25 +01:00
for (V3GraphEdge& edge : vtx.inEdges()) queue.push_back(edge.fromp());
Deprecate clocker attribute and --clk option (#6463) The only use for the clocker attribute and the AstVar::isUsedClock that is actually necessary today for correctness is to mark top level inputs of --lib-create blocks as being (or driving) a clock signal. Correctness of --lib-create (and hence hierarchical blocks) actually used to depend on having the right optimizations eliminate intermediate clocks (e.g.: V3Gate), when the top level port was not used directly in a sensitivity list, or marking top level signals manually via --clk or the clocker attribute. However V3Sched::partition already needs to trace through the logic to figure out what signals might drive a sensitivity list, so it can very easily mark all top level inputs as such. In this patch we remove the AstVar::attrClocker and AstVar::isUsedClock attributes, and replace them with AstVar::isPrimaryClock, automatically set by V3Sched::partition. This eliminates all need for manual annotation so we are deprecating the --clk/--no-clk options and the clocker/no_clocker attributes. This also eliminates the opportunity for any further mis-optimization similar to #6453. Regarding the other uses of the removed AstVar attributes: - As of 5.000, initial edges are triggered via a separate mechanism applied in V3Sched, so the use in V3EmitCFunc.cpp is redundant - Also as of 5.000, we can handle arbitrary sensitivity expressions, so the restriction on eliminating clock signals in V3Gate is unnecessary - Since the recent change when Dfg is applied after V3Scope, it does perform the equivalent of GateClkDecomp, so we can delete that pass.
2025-09-20 16:50:22 +02:00
// Mark top level ports that drive a sensitivity list
if (SchedVarVertex* const vvtxp = vtx.cast<SchedVarVertex>()) {
AstVar* const varp = vvtxp->varp();
if (varp->isPrimaryIO()) varp->setPrimaryClock();
}
// If this is a logic vertex, also enqueue all variable vertices that are driven from this
// logic. This will ensure that if a variable is set in the active region, then all
// settings of that variable will be in the active region.
if (vtx.is<SchedLogicVertex>()) {
2024-03-26 00:06:25 +01:00
for (V3GraphEdge& edge : vtx.outEdges()) {
UASSERT(edge.top()->is<SchedVarVertex>(), "Should be var vertex");
queue.push_back(edge.top());
}
}
}
}
} // namespace
LogicRegions partition(LogicByScope& clockedLogic, LogicByScope& combinationalLogic,
LogicByScope& hybridLogic) {
UINFO(2, __FUNCTION__ << ":");
// Build the graph
const std::unique_ptr<V3Graph> graphp
= SchedGraphBuilder::build(clockedLogic, combinationalLogic, hybridLogic);
if (dumpGraphLevel() >= 6) graphp->dumpDotFilePrefixed("sched");
// Partition into Active and NBA regions
colorActiveRegion(*(graphp.get()));
if (dumpGraphLevel() >= 6) graphp->dumpDotFilePrefixed("sched-partitioned", true);
LogicRegions result;
2024-03-26 00:06:25 +01:00
for (V3GraphVertex& vtx : graphp->vertices()) {
if (const auto lvtxp = vtx.cast<SchedLogicVertex>()) {
Support #0 delays with IEEE-1800 compliant semantics (#7079) This patch adds IEEE-1800 compliant scheduling support for the Inactive scheduling region used for #0 delays. Implementing this requires that **all** IEEE-1800 active region events are placed in the internal 'act' section. This has simulation performance implications. It prevents some optimizations (e.g. V3LifePost), which reduces single threaded performance. It also reduces the available work and parallelism in the internal 'nba' section, which reduced the effectiveness of multi-threading severely. Performance impact on RTLMeter when using scheduling adjusted to support proper #0 delays is ~10-20% slowdown in single-threaded mode, and ~100% (2x slower) with --threads 4. To avoid paying this performance penalty unconditionally, the scheduling is only adjusted if either: 1. The input contains a statically known #0 delay 2. The input contains a variable #x delay unknown at compile time If no #0 is present, but #x variable delays are, a ZERODLY warning is issued advising the use of '--no-sched-zero-delay' which is a promise by the user that none of the variable delays will evaluate to a zero delay at run-time. This warning is turned off if '--sched-zero-delay' is explicitly given. This is similar to the '--timing' option. If '--no-sched-zero-delay' was used at compile time, then executing a zero delay will fail at runtime. A ZERODLY warning is also issued if a static #0 if found, but the user specified '--no-sched-zero-delay'. In this case the scheduling is not adjusted to support #0, so executing it will fail at runtime. Presumably the user knows it won't be executed. The intended behaviour with all this is the following: No #0, no #var in the design (#constant is OK) -> Same as current behaviour, scheduling not adjusted, same code generated as before Has static #0 and '--no-sched-zero-delay' is NOT given: -> No warnings, scheduling adjusted so it just works, runs slow Has static #0 and '--no-sched-zero-delay' is given: -> ZERODLY on the #0, scheduling not adjusted, fails at runtime if hit No static #0, but has #var and no option is given: -> ZERODLY on the #var advising use of '--no-sched-zero-delay' or '--sched-zero-delay' (similar to '--timing'), scheduling adjusted assuming it can be a zero delay and it just works No static #0, but has #var and '--no-sched-zero-delay' is given: -> No warning, scheduling not adjusted, fails at runtime if zero delay No static #0, but has #var and '--sched-zero-delay' is given: -> No warning, scheduling adjusted so it just works
2026-02-16 04:55:55 +01:00
// Move to 'act'/'nba' based on coloring by default ...
bool toAct = lvtxp->color();
// ... however, if a #0 delay is possible, then the 'inact' region is required,
// in which case **EVERYTHING** that is not a Post block needs to go to 'act'.
// This severely limits downstream optimizations (e.g. V3LifePost), and severely
// reduces available parallelism in 'nba' for multi-threaded execution.
if (v3Global.usesZeroDelay() && !VN_IS(lvtxp->logicp(), AlwaysPost)) toAct = true;
LogicByScope& lbs = toAct ? result.m_act : result.m_nba;
AstNode* const logicp = lvtxp->logicp();
logicp->unlinkFrBack();
lbs.add(lvtxp->scopep(), lvtxp->senTreep(), logicp);
}
}
// Partition the Pre logic
{
const VNUser1InUse user1InUse; // AstVarScope::user1() -> bool: read in Active region
2022-12-23 17:32:38 +01:00
const VNUser2InUse user2InUse; // AstVarScope::user2() -> bool: written in Active region
const auto markVars = [](AstNode* nodep) {
nodep->foreach([](const AstNodeVarRef* vrefp) {
AstVarScope* const vscp = vrefp->varScopep();
if (vrefp->access().isReadOrRW()) vscp->user1(true);
if (vrefp->access().isWriteOrRW() && !vrefp->varp()->ignoreSchedWrite())
vscp->user2(true);
});
};
for (const auto& pair : result.m_act) {
AstActive* const activep = pair.second;
markVars(activep->sentreep());
markVars(activep);
}
// AstAlwaysPre and AstAlwaysPost should only appear under a clocked
// AstActive, and should be the only thing left at this point.
for (const auto& pair : clockedLogic) {
AstScope* const scopep = pair.first;
AstActive* const activep = pair.second;
for (AstNode *nodep = activep->stmtsp(), *nextp; nodep; nodep = nextp) {
nextp = nodep->nextp();
if (AstAlwaysPre* const logicp = VN_CAST(nodep, AlwaysPre)) {
bool toActiveRegion = false;
logicp->foreach([&](const AstNodeVarRef* vrefp) {
AstVarScope* const vscp = vrefp->varScopep();
if (vrefp->access().isReadOnly()) {
// Variable only read in Pre, and is written in active region
if (vscp->user2()) toActiveRegion = true;
} else {
// Variable written in Pre, and referenced in active region
if (vscp->user1() || vscp->user2()) toActiveRegion = true;
}
});
LogicByScope& lbs = toActiveRegion ? result.m_pre : result.m_nba;
logicp->unlinkFrBack();
lbs.add(scopep, activep->sentreep(), logicp);
} else {
UASSERT_OBJ(VN_IS(nodep, AlwaysPost), nodep,
"Unexpected node type " << nodep->typeName());
nodep->unlinkFrBack();
result.m_nba.add(scopep, activep->sentreep(), nodep);
}
}
}
}
// Clean up remains of inputs
clockedLogic.deleteActives();
combinationalLogic.deleteActives();
hybridLogic.deleteActives();
return result;
}
} // namespace V3Sched