2022-05-15 17:03:32 +02:00
|
|
|
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
|
|
|
|
//*************************************************************************
|
|
|
|
|
// DESCRIPTION: Verilator: Code scheduling
|
|
|
|
|
//
|
|
|
|
|
// Code available from: https://verilator.org
|
|
|
|
|
//
|
|
|
|
|
//*************************************************************************
|
|
|
|
|
//
|
2026-01-27 02:24:34 +01:00
|
|
|
// This program is free software; you can redistribute it and/or modify it
|
|
|
|
|
// under the terms of either the GNU Lesser General Public License Version 3
|
|
|
|
|
// or the Perl Artistic License Version 2.0.
|
|
|
|
|
// SPDX-FileCopyrightText: 2003-2026 Wilson Snyder
|
2022-05-15 17:03:32 +02:00
|
|
|
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
|
|
|
|
//
|
|
|
|
|
//*************************************************************************
|
|
|
|
|
//
|
|
|
|
|
// V3Sched::schedule is the top level entry-point to the scheduling algorithm
|
|
|
|
|
// at a high level, the process is:
|
|
|
|
|
//
|
|
|
|
|
// - Gather and classify all logic in the design based on what triggers its execution
|
|
|
|
|
// - Schedule static, initial and final logic classes in source order
|
|
|
|
|
// - Break combinational cycles by introducing hybrid logic
|
|
|
|
|
// - Create 'settle' region that restores the combinational invariant
|
|
|
|
|
// - Partition the clocked and combinational (including hybrid) logic into pre/act/nba.
|
|
|
|
|
// All clocks (signals referenced in an AstSenTree) generated via a blocking assignment
|
|
|
|
|
// (including combinationally generated signals) are computed within the act region.
|
|
|
|
|
// - Replicate combinational logic
|
|
|
|
|
// - Create input combinational logic loop
|
|
|
|
|
// - Create the pre/act/nba triggers
|
|
|
|
|
// - Create the 'act' region evaluation function
|
|
|
|
|
// - Create the 'nba' region evaluation function
|
|
|
|
|
// - Bolt it all together to create the '_eval' function
|
|
|
|
|
//
|
|
|
|
|
// Details of the algorithm are described in the internals documentation docs/internals.rst
|
|
|
|
|
//
|
|
|
|
|
//*************************************************************************
|
|
|
|
|
|
2023-10-18 12:37:46 +02:00
|
|
|
#include "V3PchAstNoMT.h" // VL_MT_DISABLED_CODE_UNIT
|
|
|
|
|
|
2022-08-05 13:15:59 +02:00
|
|
|
#include "V3Sched.h"
|
|
|
|
|
|
2025-10-27 11:41:30 +01:00
|
|
|
#include "V3Const.h"
|
2022-05-15 17:03:32 +02:00
|
|
|
#include "V3EmitCBase.h"
|
|
|
|
|
#include "V3EmitV.h"
|
|
|
|
|
#include "V3Order.h"
|
2022-09-05 16:17:51 +02:00
|
|
|
#include "V3SenExprBuilder.h"
|
2022-05-15 17:03:32 +02:00
|
|
|
#include "V3Stats.h"
|
|
|
|
|
|
2022-09-22 18:28:42 +02:00
|
|
|
VL_DEFINE_DEBUG_FUNCTIONS;
|
2022-05-15 17:03:32 +02:00
|
|
|
|
|
|
|
|
namespace V3Sched {
|
|
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
|
|
//============================================================================
|
|
|
|
|
// Utility functions
|
|
|
|
|
|
2022-07-30 18:49:30 +02:00
|
|
|
std::vector<const AstSenTree*> getSenTreesUsedBy(const std::vector<const LogicByScope*>& lbsps) {
|
2022-05-15 17:03:32 +02:00
|
|
|
const VNUser1InUse user1InUse;
|
|
|
|
|
std::vector<const AstSenTree*> result;
|
|
|
|
|
for (const LogicByScope* const lbsp : lbsps) {
|
|
|
|
|
for (const auto& pair : *lbsp) {
|
|
|
|
|
AstActive* const activep = pair.second;
|
2025-08-18 01:14:34 +02:00
|
|
|
AstSenTree* const senTreep = activep->sentreep();
|
2022-05-15 17:03:32 +02:00
|
|
|
if (senTreep->user1SetOnce()) continue;
|
|
|
|
|
if (senTreep->hasClocked() || senTreep->hasHybrid()) result.push_back(senTreep);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-03 05:04:55 +02:00
|
|
|
void remapSensitivities(const LogicByScope& lbs,
|
2025-08-21 10:43:37 +02:00
|
|
|
const std::unordered_map<const AstSenTree*, AstSenTree*>& senTreeMap) {
|
2022-05-15 17:03:32 +02:00
|
|
|
for (const auto& pair : lbs) {
|
|
|
|
|
AstActive* const activep = pair.second;
|
2025-08-18 01:14:34 +02:00
|
|
|
AstSenTree* const senTreep = activep->sentreep();
|
2022-05-15 17:03:32 +02:00
|
|
|
if (senTreep->hasCombo()) continue;
|
2025-08-18 01:14:34 +02:00
|
|
|
activep->sentreep(senTreeMap.at(senTreep));
|
2022-05-15 17:03:32 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-30 18:49:30 +02:00
|
|
|
void invertAndMergeSenTreeMap(
|
2024-03-09 13:43:09 +01:00
|
|
|
V3Order::TrigToSenMap& result,
|
2022-07-30 18:49:30 +02:00
|
|
|
const std::unordered_map<const AstSenTree*, AstSenTree*>& senTreeMap) {
|
2024-09-25 11:35:50 +02:00
|
|
|
for (const auto& pair : senTreeMap) result.emplace(pair.second, pair.first);
|
2022-05-15 17:03:32 +02:00
|
|
|
}
|
|
|
|
|
|
2025-08-23 16:45:13 +02:00
|
|
|
std::vector<AstSenTree*>
|
2026-02-06 23:20:10 +01:00
|
|
|
findTriggeredIface(const AstVarScope* vscp,
|
2025-08-23 16:45:13 +02:00
|
|
|
const VirtIfaceTriggers::IfaceMemberSensMap& vifMemberTriggered) {
|
2026-02-06 23:20:10 +01:00
|
|
|
const AstIface* ifacep;
|
|
|
|
|
if (vscp->varp()->isVirtIface()) {
|
|
|
|
|
// If `vscp->varp()->isVirtIface()` is true then the interface type that viface is pointing
|
|
|
|
|
// to is under `VN_AS(vscp->varp()->dtypep(), IfaceRefDType)->ifacep()`
|
|
|
|
|
|
|
|
|
|
ifacep = VN_AS(vscp->varp()->dtypep(), IfaceRefDType)->ifacep();
|
|
|
|
|
|
|
|
|
|
// Virtual interface is sensitive to a different interface type than it is a virtual type
|
|
|
|
|
// of - this may be a valid behaviour but this function does not expects that
|
|
|
|
|
UASSERT_OBJ(vscp->varp()->sensIfacep() == nullptr, vscp,
|
|
|
|
|
"Virtual interface has an ambiguous type - "
|
|
|
|
|
<< vscp->varp()->sensIfacep()->prettyTypeName()
|
|
|
|
|
<< " != " << ifacep->prettyTypeName());
|
|
|
|
|
} else {
|
|
|
|
|
// If `vscp->varp()` is of a non-virtual interface type it has `sensIfacep()` set to
|
|
|
|
|
// interface it is sensitive to
|
|
|
|
|
ifacep = vscp->varp()->sensIfacep();
|
|
|
|
|
}
|
|
|
|
|
UASSERT_OBJ(ifacep, vscp, "Variable is not sensitive for any interface");
|
2025-08-23 16:45:13 +02:00
|
|
|
std::vector<AstSenTree*> result;
|
2025-07-18 15:07:31 +02:00
|
|
|
for (const auto& memberIt : vifMemberTriggered) {
|
2026-02-06 23:20:10 +01:00
|
|
|
if (memberIt.first.m_ifacep == ifacep) result.push_back(memberIt.second);
|
2025-07-18 15:07:31 +02:00
|
|
|
}
|
2026-02-06 23:20:10 +01:00
|
|
|
UASSERT_OBJ(!result.empty(), vscp, "Did not find virtual interface trigger");
|
2025-08-23 16:45:13 +02:00
|
|
|
return result;
|
2025-07-18 15:07:31 +02:00
|
|
|
}
|
|
|
|
|
|
2023-10-28 09:14:38 +02:00
|
|
|
//============================================================================
|
2025-10-27 16:16:28 +01:00
|
|
|
// Eval loop builder
|
2023-10-28 09:14:38 +02:00
|
|
|
|
2024-01-20 21:06:46 +01:00
|
|
|
struct EvalLoop final {
|
2025-10-31 19:29:11 +01:00
|
|
|
// Flag set to true on entry to the first iteration of the loop
|
2023-10-28 09:14:38 +02:00
|
|
|
AstVarScope* firstIterp;
|
|
|
|
|
// The loop itself and statements around it
|
2025-10-31 19:29:11 +01:00
|
|
|
AstNodeStmt* stmtsp;
|
2023-10-28 09:14:38 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Create an eval loop with all the trimmings.
|
2023-12-10 14:32:24 +01:00
|
|
|
EvalLoop createEvalLoop(
|
|
|
|
|
AstNetlist* netlistp, //
|
|
|
|
|
const std::string& tag, // Tag for current phase
|
|
|
|
|
const string& name, // Name of current phase
|
|
|
|
|
bool slow, // Should create slow functions
|
2025-10-31 19:29:11 +01:00
|
|
|
const TriggerKit& trigKit, // The trigger kit
|
Support #0 delays with IEEE-1800 compliant semantics (#7079)
This patch adds IEEE-1800 compliant scheduling support for the Inactive
scheduling region used for #0 delays.
Implementing this requires that **all** IEEE-1800 active region events
are placed in the internal 'act' section. This has simulation
performance implications. It prevents some optimizations (e.g.
V3LifePost), which reduces single threaded performance. It also reduces
the available work and parallelism in the internal 'nba' section, which
reduced the effectiveness of multi-threading severely.
Performance impact on RTLMeter when using scheduling adjusted to support
proper #0 delays is ~10-20% slowdown in single-threaded mode, and ~100%
(2x slower) with --threads 4.
To avoid paying this performance penalty unconditionally, the scheduling
is only adjusted if either:
1. The input contains a statically known #0 delay
2. The input contains a variable #x delay unknown at compile time
If no #0 is present, but #x variable delays are, a ZERODLY warning is
issued advising the use of '--no-sched-zero-delay' which is a promise
by the user that none of the variable delays will evaluate to a zero
delay at run-time. This warning is turned off if '--sched-zero-delay'
is explicitly given. This is similar to the '--timing' option.
If '--no-sched-zero-delay' was used at compile time, then executing
a zero delay will fail at runtime.
A ZERODLY warning is also issued if a static #0 if found, but the user
specified '--no-sched-zero-delay'. In this case the scheduling is not
adjusted to support #0, so executing it will fail at runtime. Presumably
the user knows it won't be executed.
The intended behaviour with all this is the following:
No #0, no #var in the design (#constant is OK)
-> Same as current behaviour, scheduling not adjusted,
same code generated as before
Has static #0 and '--no-sched-zero-delay' is NOT given:
-> No warnings, scheduling adjusted so it just works, runs slow
Has static #0 and '--no-sched-zero-delay' is given:
-> ZERODLY on the #0, scheduling not adjusted, fails at runtime if hit
No static #0, but has #var and no option is given:
-> ZERODLY on the #var advising use of '--no-sched-zero-delay' or
'--sched-zero-delay' (similar to '--timing'), scheduling adjusted
assuming it can be a zero delay and it just works
No static #0, but has #var and '--no-sched-zero-delay' is given:
-> No warning, scheduling not adjusted, fails at runtime if zero delay
No static #0, but has #var and '--sched-zero-delay' is given:
-> No warning, scheduling adjusted so it just works
2026-02-16 04:55:55 +01:00
|
|
|
AstVarScope* trigp, // The trigger vector - may be nullptr if no triggers or using 'condp'
|
|
|
|
|
AstNodeExpr* condp, // Explicit condition that must be true to run 'phaseWorkp'
|
2023-12-10 14:32:24 +01:00
|
|
|
AstNodeStmt* innerp, // The inner loop, if any
|
|
|
|
|
AstNodeStmt* phasePrepp, // Prep statements run before checking triggers
|
|
|
|
|
AstNodeStmt* phaseWorkp, // The work to do if anything triggered
|
|
|
|
|
// Extra statements to run after the work, even if no triggers fired. This function is
|
|
|
|
|
// passed a variable, which must be set to true if we must continue and loop again,
|
|
|
|
|
// and must be unmodified otherwise.
|
|
|
|
|
std::function<AstNodeStmt*(AstVarScope*)> phaseExtra = [](AstVarScope*) { return nullptr; } //
|
2023-10-28 09:14:38 +02:00
|
|
|
) {
|
Support #0 delays with IEEE-1800 compliant semantics (#7079)
This patch adds IEEE-1800 compliant scheduling support for the Inactive
scheduling region used for #0 delays.
Implementing this requires that **all** IEEE-1800 active region events
are placed in the internal 'act' section. This has simulation
performance implications. It prevents some optimizations (e.g.
V3LifePost), which reduces single threaded performance. It also reduces
the available work and parallelism in the internal 'nba' section, which
reduced the effectiveness of multi-threading severely.
Performance impact on RTLMeter when using scheduling adjusted to support
proper #0 delays is ~10-20% slowdown in single-threaded mode, and ~100%
(2x slower) with --threads 4.
To avoid paying this performance penalty unconditionally, the scheduling
is only adjusted if either:
1. The input contains a statically known #0 delay
2. The input contains a variable #x delay unknown at compile time
If no #0 is present, but #x variable delays are, a ZERODLY warning is
issued advising the use of '--no-sched-zero-delay' which is a promise
by the user that none of the variable delays will evaluate to a zero
delay at run-time. This warning is turned off if '--sched-zero-delay'
is explicitly given. This is similar to the '--timing' option.
If '--no-sched-zero-delay' was used at compile time, then executing
a zero delay will fail at runtime.
A ZERODLY warning is also issued if a static #0 if found, but the user
specified '--no-sched-zero-delay'. In this case the scheduling is not
adjusted to support #0, so executing it will fail at runtime. Presumably
the user knows it won't be executed.
The intended behaviour with all this is the following:
No #0, no #var in the design (#constant is OK)
-> Same as current behaviour, scheduling not adjusted,
same code generated as before
Has static #0 and '--no-sched-zero-delay' is NOT given:
-> No warnings, scheduling adjusted so it just works, runs slow
Has static #0 and '--no-sched-zero-delay' is given:
-> ZERODLY on the #0, scheduling not adjusted, fails at runtime if hit
No static #0, but has #var and no option is given:
-> ZERODLY on the #var advising use of '--no-sched-zero-delay' or
'--sched-zero-delay' (similar to '--timing'), scheduling adjusted
assuming it can be a zero delay and it just works
No static #0, but has #var and '--no-sched-zero-delay' is given:
-> No warning, scheduling not adjusted, fails at runtime if zero delay
No static #0, but has #var and '--sched-zero-delay' is given:
-> No warning, scheduling adjusted so it just works
2026-02-16 04:55:55 +01:00
|
|
|
UASSERT(!trigp || !condp, "Cannot use both 'trigp' and 'condp' in 'createEvalLoop'");
|
|
|
|
|
|
|
|
|
|
// All work is under a trigger or condition, so if there are none,
|
|
|
|
|
// there is nothing to do besides executing the inner loop.
|
|
|
|
|
if (!trigp && !condp) return {nullptr, innerp};
|
2025-10-31 19:29:11 +01:00
|
|
|
|
2023-10-28 09:14:38 +02:00
|
|
|
const std::string varPrefix = "__V" + tag;
|
|
|
|
|
AstScope* const scopeTopp = netlistp->topScopep()->scopep();
|
|
|
|
|
FileLine* const flp = netlistp->fileline();
|
|
|
|
|
|
|
|
|
|
// We wrap the prep/cond/work in a function for readability
|
2025-10-27 16:16:28 +01:00
|
|
|
AstCFunc* const phaseFuncp = util::makeTopFunction(netlistp, "_eval_phase__" + tag, slow);
|
2023-10-28 09:14:38 +02:00
|
|
|
{
|
|
|
|
|
// Add the preparatory statements
|
|
|
|
|
phaseFuncp->addStmtsp(phasePrepp);
|
|
|
|
|
|
2025-10-31 19:29:11 +01:00
|
|
|
// The execute flag
|
|
|
|
|
AstVarScope* const executeFlagp = scopeTopp->createTemp(varPrefix + "Execute", 1);
|
|
|
|
|
executeFlagp->varp()->noReset(true);
|
2023-10-28 09:14:38 +02:00
|
|
|
|
2025-10-31 19:29:11 +01:00
|
|
|
// If there is work in this phase, execute it if any triggers fired
|
|
|
|
|
if (phaseWorkp) {
|
|
|
|
|
AstNodeExpr* const lhsp = new AstVarRef{flp, executeFlagp, VAccess::WRITE};
|
Support #0 delays with IEEE-1800 compliant semantics (#7079)
This patch adds IEEE-1800 compliant scheduling support for the Inactive
scheduling region used for #0 delays.
Implementing this requires that **all** IEEE-1800 active region events
are placed in the internal 'act' section. This has simulation
performance implications. It prevents some optimizations (e.g.
V3LifePost), which reduces single threaded performance. It also reduces
the available work and parallelism in the internal 'nba' section, which
reduced the effectiveness of multi-threading severely.
Performance impact on RTLMeter when using scheduling adjusted to support
proper #0 delays is ~10-20% slowdown in single-threaded mode, and ~100%
(2x slower) with --threads 4.
To avoid paying this performance penalty unconditionally, the scheduling
is only adjusted if either:
1. The input contains a statically known #0 delay
2. The input contains a variable #x delay unknown at compile time
If no #0 is present, but #x variable delays are, a ZERODLY warning is
issued advising the use of '--no-sched-zero-delay' which is a promise
by the user that none of the variable delays will evaluate to a zero
delay at run-time. This warning is turned off if '--sched-zero-delay'
is explicitly given. This is similar to the '--timing' option.
If '--no-sched-zero-delay' was used at compile time, then executing
a zero delay will fail at runtime.
A ZERODLY warning is also issued if a static #0 if found, but the user
specified '--no-sched-zero-delay'. In this case the scheduling is not
adjusted to support #0, so executing it will fail at runtime. Presumably
the user knows it won't be executed.
The intended behaviour with all this is the following:
No #0, no #var in the design (#constant is OK)
-> Same as current behaviour, scheduling not adjusted,
same code generated as before
Has static #0 and '--no-sched-zero-delay' is NOT given:
-> No warnings, scheduling adjusted so it just works, runs slow
Has static #0 and '--no-sched-zero-delay' is given:
-> ZERODLY on the #0, scheduling not adjusted, fails at runtime if hit
No static #0, but has #var and no option is given:
-> ZERODLY on the #var advising use of '--no-sched-zero-delay' or
'--sched-zero-delay' (similar to '--timing'), scheduling adjusted
assuming it can be a zero delay and it just works
No static #0, but has #var and '--no-sched-zero-delay' is given:
-> No warning, scheduling not adjusted, fails at runtime if zero delay
No static #0, but has #var and '--sched-zero-delay' is given:
-> No warning, scheduling adjusted so it just works
2026-02-16 04:55:55 +01:00
|
|
|
// If using explicit condition, that directly determines whether to execute,
|
|
|
|
|
// otherwise check if any triggers are fired
|
|
|
|
|
AstNodeExpr* const rhsp = condp ? condp : trigKit.newAnySetCall(trigp);
|
2025-10-31 19:29:11 +01:00
|
|
|
phaseFuncp->addStmtsp(new AstAssign{flp, lhsp, rhsp});
|
|
|
|
|
|
|
|
|
|
// Add the work
|
|
|
|
|
AstIf* const ifp = new AstIf{flp, new AstVarRef{flp, executeFlagp, VAccess::READ}};
|
|
|
|
|
ifp->addThensp(phaseWorkp);
|
|
|
|
|
phaseFuncp->addStmtsp(ifp);
|
|
|
|
|
}
|
2023-10-28 09:14:38 +02:00
|
|
|
|
2023-12-10 14:32:24 +01:00
|
|
|
// Construct the extra statements
|
2025-10-31 19:29:11 +01:00
|
|
|
AstNodeStmt* const extraWorkp = phaseExtra(executeFlagp);
|
|
|
|
|
if (extraWorkp) phaseFuncp->addStmtsp(extraWorkp);
|
2023-12-10 14:32:24 +01:00
|
|
|
|
2025-10-31 19:29:11 +01:00
|
|
|
// The function returns ture iff it did run work
|
2023-10-28 09:14:38 +02:00
|
|
|
phaseFuncp->rtnType("bool");
|
2025-10-31 19:29:11 +01:00
|
|
|
AstNodeExpr* const retp
|
|
|
|
|
= phaseWorkp || extraWorkp
|
|
|
|
|
? static_cast<AstNodeExpr*>(new AstVarRef{flp, executeFlagp, VAccess::READ})
|
|
|
|
|
: static_cast<AstNodeExpr*>(new AstConst{flp, AstConst::BitFalse{}});
|
|
|
|
|
phaseFuncp->addStmtsp(new AstCReturn{flp, retp});
|
2023-10-28 09:14:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// The result statements
|
|
|
|
|
AstNodeStmt* stmtps = nullptr;
|
|
|
|
|
|
|
|
|
|
// Prof-exec section push
|
2025-11-25 06:53:59 +01:00
|
|
|
if (v3Global.opt.profExec()) { //
|
|
|
|
|
stmtps = AstCStmt::profExecSectionPush(flp, "loop " + tag);
|
|
|
|
|
}
|
2023-10-28 09:14:38 +02:00
|
|
|
|
2026-01-23 18:53:40 +01:00
|
|
|
const auto addVar = [&](const std::string& name, int width, uint32_t initVal, bool init) {
|
|
|
|
|
const string tempName{"__V" + tag + name};
|
|
|
|
|
AstVarScope* const vscp = tempName == "__VstlFirstIteration"
|
|
|
|
|
? netlistp->stlFirstIterationp()
|
|
|
|
|
: scopeTopp->createTemp(tempName, width);
|
2023-10-28 09:14:38 +02:00
|
|
|
vscp->varp()->noReset(true);
|
2025-10-31 19:29:11 +01:00
|
|
|
vscp->varp()->isInternal(true);
|
2026-01-23 18:53:40 +01:00
|
|
|
if (init) stmtps = AstNode::addNext(stmtps, util::setVar(vscp, initVal));
|
2023-10-28 09:14:38 +02:00
|
|
|
return vscp;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// The iteration counter
|
2026-01-23 18:53:40 +01:00
|
|
|
AstVarScope* const counterp = addVar("IterCount", 32, 0, true);
|
2025-10-31 19:29:11 +01:00
|
|
|
// The first iteration flag - cleared in 'phasePrepp' if used
|
2026-01-23 18:53:40 +01:00
|
|
|
AstVarScope* const firstIterFlagp = addVar("FirstIteration", 1, 1, true);
|
|
|
|
|
// Phase function result
|
|
|
|
|
AstVarScope* const phaseResultp = addVar("PhaseResult", 1, 0, false);
|
2023-10-28 09:14:38 +02:00
|
|
|
|
|
|
|
|
// The loop
|
|
|
|
|
{
|
2025-09-29 16:25:25 +02:00
|
|
|
AstLoop* const loopp = new AstLoop{flp};
|
2025-10-31 19:29:11 +01:00
|
|
|
stmtps->addNext(loopp);
|
2023-10-28 09:14:38 +02:00
|
|
|
|
Support #0 delays with IEEE-1800 compliant semantics (#7079)
This patch adds IEEE-1800 compliant scheduling support for the Inactive
scheduling region used for #0 delays.
Implementing this requires that **all** IEEE-1800 active region events
are placed in the internal 'act' section. This has simulation
performance implications. It prevents some optimizations (e.g.
V3LifePost), which reduces single threaded performance. It also reduces
the available work and parallelism in the internal 'nba' section, which
reduced the effectiveness of multi-threading severely.
Performance impact on RTLMeter when using scheduling adjusted to support
proper #0 delays is ~10-20% slowdown in single-threaded mode, and ~100%
(2x slower) with --threads 4.
To avoid paying this performance penalty unconditionally, the scheduling
is only adjusted if either:
1. The input contains a statically known #0 delay
2. The input contains a variable #x delay unknown at compile time
If no #0 is present, but #x variable delays are, a ZERODLY warning is
issued advising the use of '--no-sched-zero-delay' which is a promise
by the user that none of the variable delays will evaluate to a zero
delay at run-time. This warning is turned off if '--sched-zero-delay'
is explicitly given. This is similar to the '--timing' option.
If '--no-sched-zero-delay' was used at compile time, then executing
a zero delay will fail at runtime.
A ZERODLY warning is also issued if a static #0 if found, but the user
specified '--no-sched-zero-delay'. In this case the scheduling is not
adjusted to support #0, so executing it will fail at runtime. Presumably
the user knows it won't be executed.
The intended behaviour with all this is the following:
No #0, no #var in the design (#constant is OK)
-> Same as current behaviour, scheduling not adjusted,
same code generated as before
Has static #0 and '--no-sched-zero-delay' is NOT given:
-> No warnings, scheduling adjusted so it just works, runs slow
Has static #0 and '--no-sched-zero-delay' is given:
-> ZERODLY on the #0, scheduling not adjusted, fails at runtime if hit
No static #0, but has #var and no option is given:
-> ZERODLY on the #var advising use of '--no-sched-zero-delay' or
'--sched-zero-delay' (similar to '--timing'), scheduling adjusted
assuming it can be a zero delay and it just works
No static #0, but has #var and '--no-sched-zero-delay' is given:
-> No warning, scheduling not adjusted, fails at runtime if zero delay
No static #0, but has #var and '--sched-zero-delay' is given:
-> No warning, scheduling adjusted so it just works
2026-02-16 04:55:55 +01:00
|
|
|
// Check the iteration limit (aborts if exceeded). Dump triggers if using triggers.
|
|
|
|
|
AstNodeStmt* dumpCallp = trigp ? trigKit.newDumpCall(trigp, tag, false) : nullptr;
|
2025-10-31 19:29:11 +01:00
|
|
|
loopp->addStmtsp(util::checkIterationLimit(netlistp, name, counterp, dumpCallp));
|
2023-10-28 09:14:38 +02:00
|
|
|
// Increment the iteration counter
|
2025-10-27 16:16:28 +01:00
|
|
|
loopp->addStmtsp(util::incrementVar(counterp));
|
2023-10-28 09:14:38 +02:00
|
|
|
|
|
|
|
|
// Execute the inner loop
|
|
|
|
|
loopp->addStmtsp(innerp);
|
|
|
|
|
|
|
|
|
|
// Call the phase function to execute the current work. If we did
|
2025-10-31 19:29:11 +01:00
|
|
|
// work, then need to loop again, so set the continuation flag.
|
|
|
|
|
// If used, the first iteration flag is cleared when consumed, no
|
|
|
|
|
// need to reset it
|
2023-10-28 09:14:38 +02:00
|
|
|
AstCCall* const callp = new AstCCall{flp, phaseFuncp};
|
|
|
|
|
callp->dtypeSetBit();
|
2026-01-23 18:53:40 +01:00
|
|
|
AstAssign* const resultAssignp
|
|
|
|
|
= new AstAssign{flp, new AstVarRef{flp, phaseResultp, VAccess::WRITE}, callp};
|
|
|
|
|
loopp->addStmtsp(resultAssignp);
|
|
|
|
|
// Clear FirstIteration flag
|
|
|
|
|
AstAssign* const firstClearp
|
|
|
|
|
= new AstAssign{flp, new AstVarRef{flp, firstIterFlagp, VAccess::WRITE},
|
|
|
|
|
new AstConst{flp, AstConst::BitFalse()}};
|
|
|
|
|
loopp->addStmtsp(firstClearp);
|
2025-10-31 19:29:11 +01:00
|
|
|
// Continues until the continuation flag is clear
|
2026-01-23 18:53:40 +01:00
|
|
|
loopp->addStmtsp(
|
|
|
|
|
new AstLoopTest{flp, loopp, new AstVarRef{flp, phaseResultp, VAccess::READ}});
|
2023-10-28 09:14:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Prof-exec section pop
|
2025-11-25 06:53:59 +01:00
|
|
|
if (v3Global.opt.profExec()) {
|
|
|
|
|
stmtps->addNext(AstCStmt::profExecSectionPop(flp, "loop " + tag));
|
|
|
|
|
}
|
2023-10-28 09:14:38 +02:00
|
|
|
|
2023-12-10 14:32:24 +01:00
|
|
|
return {firstIterFlagp, stmtps};
|
2023-10-28 09:14:38 +02:00
|
|
|
}
|
|
|
|
|
|
2022-05-15 17:03:32 +02:00
|
|
|
//============================================================================
|
|
|
|
|
// Collect and classify all logic in the design
|
|
|
|
|
|
|
|
|
|
LogicClasses gatherLogicClasses(AstNetlist* netlistp) {
|
|
|
|
|
LogicClasses result;
|
|
|
|
|
|
2022-10-20 14:48:44 +02:00
|
|
|
netlistp->foreach([&](AstScope* scopep) {
|
|
|
|
|
scopep->foreach([&](AstActive* activep) {
|
2025-08-18 01:14:34 +02:00
|
|
|
AstSenTree* const senTreep = activep->sentreep();
|
2024-03-16 11:35:56 +01:00
|
|
|
if (senTreep->hasStatic()) {
|
2022-05-15 17:03:32 +02:00
|
|
|
UASSERT_OBJ(!senTreep->sensesp()->nextp(), activep,
|
|
|
|
|
"static initializer with additional sensitivities");
|
|
|
|
|
result.m_static.emplace_back(scopep, activep);
|
|
|
|
|
} else if (senTreep->hasInitial()) {
|
|
|
|
|
UASSERT_OBJ(!senTreep->sensesp()->nextp(), activep,
|
|
|
|
|
"'initial' logic with additional sensitivities");
|
|
|
|
|
result.m_initial.emplace_back(scopep, activep);
|
|
|
|
|
} else if (senTreep->hasFinal()) {
|
|
|
|
|
UASSERT_OBJ(!senTreep->sensesp()->nextp(), activep,
|
|
|
|
|
"'final' logic with additional sensitivities");
|
|
|
|
|
result.m_final.emplace_back(scopep, activep);
|
|
|
|
|
} else if (senTreep->hasCombo()) {
|
|
|
|
|
UASSERT_OBJ(!senTreep->sensesp()->nextp(), activep,
|
|
|
|
|
"combinational logic with additional sensitivities");
|
2022-10-13 21:04:43 +02:00
|
|
|
if (VN_IS(activep->stmtsp(), AlwaysPostponed)) {
|
|
|
|
|
result.m_postponed.emplace_back(scopep, activep);
|
|
|
|
|
} else {
|
|
|
|
|
result.m_comb.emplace_back(scopep, activep);
|
|
|
|
|
}
|
2022-05-15 17:03:32 +02:00
|
|
|
} else {
|
|
|
|
|
UASSERT_OBJ(senTreep->hasClocked(), activep, "What else could it be?");
|
2022-12-23 13:34:49 +01:00
|
|
|
if (VN_IS(activep->stmtsp(), AlwaysObserved)) {
|
|
|
|
|
result.m_observed.emplace_back(scopep, activep);
|
|
|
|
|
} else if (VN_IS(activep->stmtsp(), AlwaysReactive)) {
|
|
|
|
|
result.m_reactive.emplace_back(scopep, activep);
|
|
|
|
|
} else {
|
|
|
|
|
result.m_clocked.emplace_back(scopep, activep);
|
|
|
|
|
}
|
2022-05-15 17:03:32 +02:00
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//============================================================================
|
|
|
|
|
// Simple ordering in source order
|
|
|
|
|
|
|
|
|
|
void orderSequentially(AstCFunc* funcp, const LogicByScope& lbs) {
|
Timing support (#3363)
Adds timing support to Verilator. It makes it possible to use delays,
event controls within processes (not just at the start), wait
statements, and forks.
Building a design with those constructs requires a compiler that
supports C++20 coroutines (GCC 10, Clang 5).
The basic idea is to have processes and tasks with delays/event controls
implemented as C++20 coroutines. This allows us to suspend and resume
them at any time.
There are five main runtime classes responsible for managing suspended
coroutines:
* `VlCoroutineHandle`, a wrapper over C++20's `std::coroutine_handle`
with move semantics and automatic cleanup.
* `VlDelayScheduler`, for coroutines suspended by delays. It resumes
them at a proper simulation time.
* `VlTriggerScheduler`, for coroutines suspended by event controls. It
resumes them if its corresponding trigger was set.
* `VlForkSync`, used for syncing `fork..join` and `fork..join_any`
blocks.
* `VlCoroutine`, the return type of all verilated coroutines. It allows
for suspending a stack of coroutines (normally, C++ coroutines are
stackless).
There is a new visitor in `V3Timing.cpp` which:
* scales delays according to the timescale,
* simplifies intra-assignment timing controls and net delays into
regular timing controls and assignments,
* simplifies wait statements into loops with event controls,
* marks processes and tasks with timing controls in them as
suspendable,
* creates delay, trigger scheduler, and fork sync variables,
* transforms timing controls and fork joins into C++ awaits
There are new functions in `V3SchedTiming.cpp` (used by `V3Sched.cpp`)
that integrate static scheduling with timing. This involves providing
external domains for variables, so that the necessary combinational
logic gets triggered after coroutine resumption, as well as statements
that need to be injected into the design eval function to perform this
resumption at the correct time.
There is also a function that transforms forked processes into separate
functions.
See the comments in `verilated_timing.h`, `verilated_timing.cpp`,
`V3Timing.cpp`, and `V3SchedTiming.cpp`, as well as the internals
documentation for more details.
Signed-off-by: Krzysztof Bieganski <kbieganski@antmicro.com>
2022-08-22 14:26:32 +02:00
|
|
|
// Create new subfunc for scope
|
|
|
|
|
const auto createNewSubFuncp = [&](AstScope* const scopep) {
|
|
|
|
|
const string subName{funcp->name() + "__" + scopep->nameDotless()};
|
|
|
|
|
AstCFunc* const subFuncp = new AstCFunc{scopep->fileline(), subName, scopep};
|
|
|
|
|
subFuncp->isLoose(true);
|
|
|
|
|
subFuncp->isConst(false);
|
|
|
|
|
subFuncp->declPrivate(true);
|
|
|
|
|
subFuncp->slow(funcp->slow());
|
2022-09-15 20:43:56 +02:00
|
|
|
scopep->addBlocksp(subFuncp);
|
Timing support (#3363)
Adds timing support to Verilator. It makes it possible to use delays,
event controls within processes (not just at the start), wait
statements, and forks.
Building a design with those constructs requires a compiler that
supports C++20 coroutines (GCC 10, Clang 5).
The basic idea is to have processes and tasks with delays/event controls
implemented as C++20 coroutines. This allows us to suspend and resume
them at any time.
There are five main runtime classes responsible for managing suspended
coroutines:
* `VlCoroutineHandle`, a wrapper over C++20's `std::coroutine_handle`
with move semantics and automatic cleanup.
* `VlDelayScheduler`, for coroutines suspended by delays. It resumes
them at a proper simulation time.
* `VlTriggerScheduler`, for coroutines suspended by event controls. It
resumes them if its corresponding trigger was set.
* `VlForkSync`, used for syncing `fork..join` and `fork..join_any`
blocks.
* `VlCoroutine`, the return type of all verilated coroutines. It allows
for suspending a stack of coroutines (normally, C++ coroutines are
stackless).
There is a new visitor in `V3Timing.cpp` which:
* scales delays according to the timescale,
* simplifies intra-assignment timing controls and net delays into
regular timing controls and assignments,
* simplifies wait statements into loops with event controls,
* marks processes and tasks with timing controls in them as
suspendable,
* creates delay, trigger scheduler, and fork sync variables,
* transforms timing controls and fork joins into C++ awaits
There are new functions in `V3SchedTiming.cpp` (used by `V3Sched.cpp`)
that integrate static scheduling with timing. This involves providing
external domains for variables, so that the necessary combinational
logic gets triggered after coroutine resumption, as well as statements
that need to be injected into the design eval function to perform this
resumption at the correct time.
There is also a function that transforms forked processes into separate
functions.
See the comments in `verilated_timing.h`, `verilated_timing.cpp`,
`V3Timing.cpp`, and `V3SchedTiming.cpp`, as well as the internals
documentation for more details.
Signed-off-by: Krzysztof Bieganski <kbieganski@antmicro.com>
2022-08-22 14:26:32 +02:00
|
|
|
// Call it from the top function
|
2025-10-27 16:16:28 +01:00
|
|
|
funcp->addStmtsp(util::callVoidFunc(subFuncp));
|
Timing support (#3363)
Adds timing support to Verilator. It makes it possible to use delays,
event controls within processes (not just at the start), wait
statements, and forks.
Building a design with those constructs requires a compiler that
supports C++20 coroutines (GCC 10, Clang 5).
The basic idea is to have processes and tasks with delays/event controls
implemented as C++20 coroutines. This allows us to suspend and resume
them at any time.
There are five main runtime classes responsible for managing suspended
coroutines:
* `VlCoroutineHandle`, a wrapper over C++20's `std::coroutine_handle`
with move semantics and automatic cleanup.
* `VlDelayScheduler`, for coroutines suspended by delays. It resumes
them at a proper simulation time.
* `VlTriggerScheduler`, for coroutines suspended by event controls. It
resumes them if its corresponding trigger was set.
* `VlForkSync`, used for syncing `fork..join` and `fork..join_any`
blocks.
* `VlCoroutine`, the return type of all verilated coroutines. It allows
for suspending a stack of coroutines (normally, C++ coroutines are
stackless).
There is a new visitor in `V3Timing.cpp` which:
* scales delays according to the timescale,
* simplifies intra-assignment timing controls and net delays into
regular timing controls and assignments,
* simplifies wait statements into loops with event controls,
* marks processes and tasks with timing controls in them as
suspendable,
* creates delay, trigger scheduler, and fork sync variables,
* transforms timing controls and fork joins into C++ awaits
There are new functions in `V3SchedTiming.cpp` (used by `V3Sched.cpp`)
that integrate static scheduling with timing. This involves providing
external domains for variables, so that the necessary combinational
logic gets triggered after coroutine resumption, as well as statements
that need to be injected into the design eval function to perform this
resumption at the correct time.
There is also a function that transforms forked processes into separate
functions.
See the comments in `verilated_timing.h`, `verilated_timing.cpp`,
`V3Timing.cpp`, and `V3SchedTiming.cpp`, as well as the internals
documentation for more details.
Signed-off-by: Krzysztof Bieganski <kbieganski@antmicro.com>
2022-08-22 14:26:32 +02:00
|
|
|
return subFuncp;
|
|
|
|
|
};
|
2022-05-15 17:03:32 +02:00
|
|
|
const VNUser1InUse user1InUse; // AstScope -> AstCFunc: the sub-function for the scope
|
Timing support (#3363)
Adds timing support to Verilator. It makes it possible to use delays,
event controls within processes (not just at the start), wait
statements, and forks.
Building a design with those constructs requires a compiler that
supports C++20 coroutines (GCC 10, Clang 5).
The basic idea is to have processes and tasks with delays/event controls
implemented as C++20 coroutines. This allows us to suspend and resume
them at any time.
There are five main runtime classes responsible for managing suspended
coroutines:
* `VlCoroutineHandle`, a wrapper over C++20's `std::coroutine_handle`
with move semantics and automatic cleanup.
* `VlDelayScheduler`, for coroutines suspended by delays. It resumes
them at a proper simulation time.
* `VlTriggerScheduler`, for coroutines suspended by event controls. It
resumes them if its corresponding trigger was set.
* `VlForkSync`, used for syncing `fork..join` and `fork..join_any`
blocks.
* `VlCoroutine`, the return type of all verilated coroutines. It allows
for suspending a stack of coroutines (normally, C++ coroutines are
stackless).
There is a new visitor in `V3Timing.cpp` which:
* scales delays according to the timescale,
* simplifies intra-assignment timing controls and net delays into
regular timing controls and assignments,
* simplifies wait statements into loops with event controls,
* marks processes and tasks with timing controls in them as
suspendable,
* creates delay, trigger scheduler, and fork sync variables,
* transforms timing controls and fork joins into C++ awaits
There are new functions in `V3SchedTiming.cpp` (used by `V3Sched.cpp`)
that integrate static scheduling with timing. This involves providing
external domains for variables, so that the necessary combinational
logic gets triggered after coroutine resumption, as well as statements
that need to be injected into the design eval function to perform this
resumption at the correct time.
There is also a function that transforms forked processes into separate
functions.
See the comments in `verilated_timing.h`, `verilated_timing.cpp`,
`V3Timing.cpp`, and `V3SchedTiming.cpp`, as well as the internals
documentation for more details.
Signed-off-by: Krzysztof Bieganski <kbieganski@antmicro.com>
2022-08-22 14:26:32 +02:00
|
|
|
const VNUser2InUse user2InUse; // AstScope -> int: sub-function counter used for names
|
2022-05-15 17:03:32 +02:00
|
|
|
for (const auto& pair : lbs) {
|
|
|
|
|
AstScope* const scopep = pair.first;
|
|
|
|
|
AstActive* const activep = pair.second;
|
Timing support (#3363)
Adds timing support to Verilator. It makes it possible to use delays,
event controls within processes (not just at the start), wait
statements, and forks.
Building a design with those constructs requires a compiler that
supports C++20 coroutines (GCC 10, Clang 5).
The basic idea is to have processes and tasks with delays/event controls
implemented as C++20 coroutines. This allows us to suspend and resume
them at any time.
There are five main runtime classes responsible for managing suspended
coroutines:
* `VlCoroutineHandle`, a wrapper over C++20's `std::coroutine_handle`
with move semantics and automatic cleanup.
* `VlDelayScheduler`, for coroutines suspended by delays. It resumes
them at a proper simulation time.
* `VlTriggerScheduler`, for coroutines suspended by event controls. It
resumes them if its corresponding trigger was set.
* `VlForkSync`, used for syncing `fork..join` and `fork..join_any`
blocks.
* `VlCoroutine`, the return type of all verilated coroutines. It allows
for suspending a stack of coroutines (normally, C++ coroutines are
stackless).
There is a new visitor in `V3Timing.cpp` which:
* scales delays according to the timescale,
* simplifies intra-assignment timing controls and net delays into
regular timing controls and assignments,
* simplifies wait statements into loops with event controls,
* marks processes and tasks with timing controls in them as
suspendable,
* creates delay, trigger scheduler, and fork sync variables,
* transforms timing controls and fork joins into C++ awaits
There are new functions in `V3SchedTiming.cpp` (used by `V3Sched.cpp`)
that integrate static scheduling with timing. This involves providing
external domains for variables, so that the necessary combinational
logic gets triggered after coroutine resumption, as well as statements
that need to be injected into the design eval function to perform this
resumption at the correct time.
There is also a function that transforms forked processes into separate
functions.
See the comments in `verilated_timing.h`, `verilated_timing.cpp`,
`V3Timing.cpp`, and `V3SchedTiming.cpp`, as well as the internals
documentation for more details.
Signed-off-by: Krzysztof Bieganski <kbieganski@antmicro.com>
2022-08-22 14:26:32 +02:00
|
|
|
// Create a sub-function per scope so we can V3Combine them later
|
|
|
|
|
if (!scopep->user1p()) scopep->user1p(createNewSubFuncp(scopep));
|
2022-05-15 17:03:32 +02:00
|
|
|
// Add statements to sub-function
|
|
|
|
|
for (AstNode *logicp = activep->stmtsp(), *nextp; logicp; logicp = nextp) {
|
Timing support (#3363)
Adds timing support to Verilator. It makes it possible to use delays,
event controls within processes (not just at the start), wait
statements, and forks.
Building a design with those constructs requires a compiler that
supports C++20 coroutines (GCC 10, Clang 5).
The basic idea is to have processes and tasks with delays/event controls
implemented as C++20 coroutines. This allows us to suspend and resume
them at any time.
There are five main runtime classes responsible for managing suspended
coroutines:
* `VlCoroutineHandle`, a wrapper over C++20's `std::coroutine_handle`
with move semantics and automatic cleanup.
* `VlDelayScheduler`, for coroutines suspended by delays. It resumes
them at a proper simulation time.
* `VlTriggerScheduler`, for coroutines suspended by event controls. It
resumes them if its corresponding trigger was set.
* `VlForkSync`, used for syncing `fork..join` and `fork..join_any`
blocks.
* `VlCoroutine`, the return type of all verilated coroutines. It allows
for suspending a stack of coroutines (normally, C++ coroutines are
stackless).
There is a new visitor in `V3Timing.cpp` which:
* scales delays according to the timescale,
* simplifies intra-assignment timing controls and net delays into
regular timing controls and assignments,
* simplifies wait statements into loops with event controls,
* marks processes and tasks with timing controls in them as
suspendable,
* creates delay, trigger scheduler, and fork sync variables,
* transforms timing controls and fork joins into C++ awaits
There are new functions in `V3SchedTiming.cpp` (used by `V3Sched.cpp`)
that integrate static scheduling with timing. This involves providing
external domains for variables, so that the necessary combinational
logic gets triggered after coroutine resumption, as well as statements
that need to be injected into the design eval function to perform this
resumption at the correct time.
There is also a function that transforms forked processes into separate
functions.
See the comments in `verilated_timing.h`, `verilated_timing.cpp`,
`V3Timing.cpp`, and `V3SchedTiming.cpp`, as well as the internals
documentation for more details.
Signed-off-by: Krzysztof Bieganski <kbieganski@antmicro.com>
2022-08-22 14:26:32 +02:00
|
|
|
auto* subFuncp = VN_AS(scopep->user1p(), CFunc);
|
2022-05-15 17:03:32 +02:00
|
|
|
nextp = logicp->nextp();
|
|
|
|
|
if (AstNodeProcedure* const procp = VN_CAST(logicp, NodeProcedure)) {
|
2022-09-15 20:43:56 +02:00
|
|
|
if (AstNode* bodyp = procp->stmtsp()) {
|
2022-05-15 17:03:32 +02:00
|
|
|
bodyp->unlinkFrBackWithNext();
|
Timing support (#3363)
Adds timing support to Verilator. It makes it possible to use delays,
event controls within processes (not just at the start), wait
statements, and forks.
Building a design with those constructs requires a compiler that
supports C++20 coroutines (GCC 10, Clang 5).
The basic idea is to have processes and tasks with delays/event controls
implemented as C++20 coroutines. This allows us to suspend and resume
them at any time.
There are five main runtime classes responsible for managing suspended
coroutines:
* `VlCoroutineHandle`, a wrapper over C++20's `std::coroutine_handle`
with move semantics and automatic cleanup.
* `VlDelayScheduler`, for coroutines suspended by delays. It resumes
them at a proper simulation time.
* `VlTriggerScheduler`, for coroutines suspended by event controls. It
resumes them if its corresponding trigger was set.
* `VlForkSync`, used for syncing `fork..join` and `fork..join_any`
blocks.
* `VlCoroutine`, the return type of all verilated coroutines. It allows
for suspending a stack of coroutines (normally, C++ coroutines are
stackless).
There is a new visitor in `V3Timing.cpp` which:
* scales delays according to the timescale,
* simplifies intra-assignment timing controls and net delays into
regular timing controls and assignments,
* simplifies wait statements into loops with event controls,
* marks processes and tasks with timing controls in them as
suspendable,
* creates delay, trigger scheduler, and fork sync variables,
* transforms timing controls and fork joins into C++ awaits
There are new functions in `V3SchedTiming.cpp` (used by `V3Sched.cpp`)
that integrate static scheduling with timing. This involves providing
external domains for variables, so that the necessary combinational
logic gets triggered after coroutine resumption, as well as statements
that need to be injected into the design eval function to perform this
resumption at the correct time.
There is also a function that transforms forked processes into separate
functions.
See the comments in `verilated_timing.h`, `verilated_timing.cpp`,
`V3Timing.cpp`, and `V3SchedTiming.cpp`, as well as the internals
documentation for more details.
Signed-off-by: Krzysztof Bieganski <kbieganski@antmicro.com>
2022-08-22 14:26:32 +02:00
|
|
|
// If the process is suspendable, we need a separate function (a coroutine)
|
|
|
|
|
if (procp->isSuspendable()) {
|
2022-12-11 20:44:18 +01:00
|
|
|
funcp->slow(false);
|
Timing support (#3363)
Adds timing support to Verilator. It makes it possible to use delays,
event controls within processes (not just at the start), wait
statements, and forks.
Building a design with those constructs requires a compiler that
supports C++20 coroutines (GCC 10, Clang 5).
The basic idea is to have processes and tasks with delays/event controls
implemented as C++20 coroutines. This allows us to suspend and resume
them at any time.
There are five main runtime classes responsible for managing suspended
coroutines:
* `VlCoroutineHandle`, a wrapper over C++20's `std::coroutine_handle`
with move semantics and automatic cleanup.
* `VlDelayScheduler`, for coroutines suspended by delays. It resumes
them at a proper simulation time.
* `VlTriggerScheduler`, for coroutines suspended by event controls. It
resumes them if its corresponding trigger was set.
* `VlForkSync`, used for syncing `fork..join` and `fork..join_any`
blocks.
* `VlCoroutine`, the return type of all verilated coroutines. It allows
for suspending a stack of coroutines (normally, C++ coroutines are
stackless).
There is a new visitor in `V3Timing.cpp` which:
* scales delays according to the timescale,
* simplifies intra-assignment timing controls and net delays into
regular timing controls and assignments,
* simplifies wait statements into loops with event controls,
* marks processes and tasks with timing controls in them as
suspendable,
* creates delay, trigger scheduler, and fork sync variables,
* transforms timing controls and fork joins into C++ awaits
There are new functions in `V3SchedTiming.cpp` (used by `V3Sched.cpp`)
that integrate static scheduling with timing. This involves providing
external domains for variables, so that the necessary combinational
logic gets triggered after coroutine resumption, as well as statements
that need to be injected into the design eval function to perform this
resumption at the correct time.
There is also a function that transforms forked processes into separate
functions.
See the comments in `verilated_timing.h`, `verilated_timing.cpp`,
`V3Timing.cpp`, and `V3SchedTiming.cpp`, as well as the internals
documentation for more details.
Signed-off-by: Krzysztof Bieganski <kbieganski@antmicro.com>
2022-08-22 14:26:32 +02:00
|
|
|
subFuncp = createNewSubFuncp(scopep);
|
2023-11-21 03:02:56 +01:00
|
|
|
subFuncp->name(subFuncp->name() + "__Vtiming__"
|
|
|
|
|
+ cvtToStr(scopep->user2Inc()));
|
Timing support (#3363)
Adds timing support to Verilator. It makes it possible to use delays,
event controls within processes (not just at the start), wait
statements, and forks.
Building a design with those constructs requires a compiler that
supports C++20 coroutines (GCC 10, Clang 5).
The basic idea is to have processes and tasks with delays/event controls
implemented as C++20 coroutines. This allows us to suspend and resume
them at any time.
There are five main runtime classes responsible for managing suspended
coroutines:
* `VlCoroutineHandle`, a wrapper over C++20's `std::coroutine_handle`
with move semantics and automatic cleanup.
* `VlDelayScheduler`, for coroutines suspended by delays. It resumes
them at a proper simulation time.
* `VlTriggerScheduler`, for coroutines suspended by event controls. It
resumes them if its corresponding trigger was set.
* `VlForkSync`, used for syncing `fork..join` and `fork..join_any`
blocks.
* `VlCoroutine`, the return type of all verilated coroutines. It allows
for suspending a stack of coroutines (normally, C++ coroutines are
stackless).
There is a new visitor in `V3Timing.cpp` which:
* scales delays according to the timescale,
* simplifies intra-assignment timing controls and net delays into
regular timing controls and assignments,
* simplifies wait statements into loops with event controls,
* marks processes and tasks with timing controls in them as
suspendable,
* creates delay, trigger scheduler, and fork sync variables,
* transforms timing controls and fork joins into C++ awaits
There are new functions in `V3SchedTiming.cpp` (used by `V3Sched.cpp`)
that integrate static scheduling with timing. This involves providing
external domains for variables, so that the necessary combinational
logic gets triggered after coroutine resumption, as well as statements
that need to be injected into the design eval function to perform this
resumption at the correct time.
There is also a function that transforms forked processes into separate
functions.
See the comments in `verilated_timing.h`, `verilated_timing.cpp`,
`V3Timing.cpp`, and `V3SchedTiming.cpp`, as well as the internals
documentation for more details.
Signed-off-by: Krzysztof Bieganski <kbieganski@antmicro.com>
2022-08-22 14:26:32 +02:00
|
|
|
subFuncp->rtnType("VlCoroutine");
|
|
|
|
|
if (VN_IS(procp, Always)) {
|
|
|
|
|
subFuncp->slow(false);
|
|
|
|
|
FileLine* const flp = procp->fileline();
|
2025-09-29 16:25:25 +02:00
|
|
|
AstNodeExpr* const condp = new AstCExpr{
|
2025-10-19 10:44:33 +02:00
|
|
|
flp, "VL_LIKELY(!vlSymsp->_vm_contextp__->gotFinish())", 1};
|
2025-09-29 16:25:25 +02:00
|
|
|
AstLoop* const loopp = new AstLoop{flp};
|
|
|
|
|
loopp->addStmtsp(new AstLoopTest{flp, loopp, condp});
|
|
|
|
|
loopp->addStmtsp(bodyp);
|
|
|
|
|
bodyp = loopp;
|
Timing support (#3363)
Adds timing support to Verilator. It makes it possible to use delays,
event controls within processes (not just at the start), wait
statements, and forks.
Building a design with those constructs requires a compiler that
supports C++20 coroutines (GCC 10, Clang 5).
The basic idea is to have processes and tasks with delays/event controls
implemented as C++20 coroutines. This allows us to suspend and resume
them at any time.
There are five main runtime classes responsible for managing suspended
coroutines:
* `VlCoroutineHandle`, a wrapper over C++20's `std::coroutine_handle`
with move semantics and automatic cleanup.
* `VlDelayScheduler`, for coroutines suspended by delays. It resumes
them at a proper simulation time.
* `VlTriggerScheduler`, for coroutines suspended by event controls. It
resumes them if its corresponding trigger was set.
* `VlForkSync`, used for syncing `fork..join` and `fork..join_any`
blocks.
* `VlCoroutine`, the return type of all verilated coroutines. It allows
for suspending a stack of coroutines (normally, C++ coroutines are
stackless).
There is a new visitor in `V3Timing.cpp` which:
* scales delays according to the timescale,
* simplifies intra-assignment timing controls and net delays into
regular timing controls and assignments,
* simplifies wait statements into loops with event controls,
* marks processes and tasks with timing controls in them as
suspendable,
* creates delay, trigger scheduler, and fork sync variables,
* transforms timing controls and fork joins into C++ awaits
There are new functions in `V3SchedTiming.cpp` (used by `V3Sched.cpp`)
that integrate static scheduling with timing. This involves providing
external domains for variables, so that the necessary combinational
logic gets triggered after coroutine resumption, as well as statements
that need to be injected into the design eval function to perform this
resumption at the correct time.
There is also a function that transforms forked processes into separate
functions.
See the comments in `verilated_timing.h`, `verilated_timing.cpp`,
`V3Timing.cpp`, and `V3SchedTiming.cpp`, as well as the internals
documentation for more details.
Signed-off-by: Krzysztof Bieganski <kbieganski@antmicro.com>
2022-08-22 14:26:32 +02:00
|
|
|
}
|
|
|
|
|
}
|
2022-05-15 17:03:32 +02:00
|
|
|
subFuncp->addStmtsp(bodyp);
|
2023-07-14 17:12:02 +02:00
|
|
|
if (procp->needProcess()) subFuncp->setNeedProcess();
|
2025-10-27 16:16:28 +01:00
|
|
|
util::splitCheck(subFuncp);
|
2022-05-15 17:03:32 +02:00
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
logicp->unlinkFrBack();
|
|
|
|
|
subFuncp->addStmtsp(logicp);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (activep->backp()) activep->unlinkFrBack();
|
|
|
|
|
VL_DO_DANGLING(activep->deleteTree(), activep);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//============================================================================
|
|
|
|
|
// Create simply ordered functions
|
|
|
|
|
|
2025-03-18 14:34:04 +01:00
|
|
|
AstCFunc* createStatic(AstNetlist* netlistp, const LogicClasses& logicClasses) {
|
2025-10-27 16:16:28 +01:00
|
|
|
AstCFunc* const funcp = util::makeTopFunction(netlistp, "_eval_static", /* slow: */ true);
|
2022-05-15 17:03:32 +02:00
|
|
|
orderSequentially(funcp, logicClasses.m_static);
|
2025-03-18 14:34:04 +01:00
|
|
|
return funcp; // Not splitting yet as it is not final
|
2022-05-15 17:03:32 +02:00
|
|
|
}
|
|
|
|
|
|
2025-03-18 14:34:04 +01:00
|
|
|
void createInitial(AstNetlist* netlistp, const LogicClasses& logicClasses) {
|
2025-10-27 16:16:28 +01:00
|
|
|
AstCFunc* const funcp = util::makeTopFunction(netlistp, "_eval_initial", /* slow: */ true);
|
2022-05-15 17:03:32 +02:00
|
|
|
orderSequentially(funcp, logicClasses.m_initial);
|
2025-10-27 16:16:28 +01:00
|
|
|
util::splitCheck(funcp);
|
2022-05-15 17:03:32 +02:00
|
|
|
}
|
|
|
|
|
|
2022-10-13 21:04:43 +02:00
|
|
|
AstCFunc* createPostponed(AstNetlist* netlistp, const LogicClasses& logicClasses) {
|
|
|
|
|
if (logicClasses.m_postponed.empty()) return nullptr;
|
2025-10-27 16:16:28 +01:00
|
|
|
AstCFunc* const funcp = util::makeTopFunction(netlistp, "_eval_postponed", /* slow: */ true);
|
2022-10-13 21:04:43 +02:00
|
|
|
orderSequentially(funcp, logicClasses.m_postponed);
|
2025-10-27 16:16:28 +01:00
|
|
|
util::splitCheck(funcp);
|
2022-10-13 21:04:43 +02:00
|
|
|
return funcp;
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-15 17:03:32 +02:00
|
|
|
void createFinal(AstNetlist* netlistp, const LogicClasses& logicClasses) {
|
2025-10-27 16:16:28 +01:00
|
|
|
AstCFunc* const funcp = util::makeTopFunction(netlistp, "_eval_final", /* slow: */ true);
|
2022-05-15 17:03:32 +02:00
|
|
|
orderSequentially(funcp, logicClasses.m_final);
|
2025-10-27 16:16:28 +01:00
|
|
|
util::splitCheck(funcp);
|
2022-05-15 17:03:32 +02:00
|
|
|
}
|
|
|
|
|
|
2023-12-05 04:11:07 +01:00
|
|
|
//============================================================================
|
|
|
|
|
// Helper that creates virtual interface trigger resets
|
|
|
|
|
|
|
|
|
|
void addVirtIfaceTriggerAssignments(const VirtIfaceTriggers& virtIfaceTriggers,
|
2025-10-31 19:29:11 +01:00
|
|
|
uint32_t vifTriggerIndex, uint32_t vifMemberTriggerIndex,
|
|
|
|
|
const TriggerKit& trigKit) {
|
2025-10-29 22:27:15 +01:00
|
|
|
for (const auto& p : virtIfaceTriggers.m_memberTriggers) {
|
2025-10-31 19:29:11 +01:00
|
|
|
trigKit.addExtraTriggerAssignment(p.second, vifMemberTriggerIndex);
|
2025-10-29 22:27:15 +01:00
|
|
|
++vifMemberTriggerIndex;
|
|
|
|
|
}
|
2023-12-05 04:11:07 +01:00
|
|
|
}
|
|
|
|
|
|
2022-05-15 17:03:32 +02:00
|
|
|
// Order the combinational logic to create the settle loop
|
2022-10-22 16:05:39 +02:00
|
|
|
void createSettle(AstNetlist* netlistp, AstCFunc* const initFuncp, SenExprBuilder& senExprBulider,
|
2022-05-15 17:03:32 +02:00
|
|
|
LogicClasses& logicClasses) {
|
2025-10-27 16:16:28 +01:00
|
|
|
AstCFunc* const funcp = util::makeTopFunction(netlistp, "_eval_settle", true);
|
2022-05-15 17:03:32 +02:00
|
|
|
|
|
|
|
|
// Clone, because ordering is destructive, but we still need them for "_eval"
|
|
|
|
|
LogicByScope comb = logicClasses.m_comb.clone();
|
|
|
|
|
LogicByScope hybrid = logicClasses.m_hybrid.clone();
|
|
|
|
|
|
|
|
|
|
// Nothing to do if there is no logic.
|
|
|
|
|
// While this is rare in real designs, it reduces noise in small tests.
|
|
|
|
|
if (comb.empty() && hybrid.empty()) return;
|
|
|
|
|
|
|
|
|
|
// We have an extra trigger denoting this is the first iteration of the settle loop
|
2025-10-31 19:29:11 +01:00
|
|
|
TriggerKit::ExtraTriggers extraTriggers;
|
|
|
|
|
const uint32_t firstIterationTrigger = extraTriggers.allocate("first iteration");
|
2022-05-15 17:03:32 +02:00
|
|
|
|
|
|
|
|
// Gather the relevant sensitivity expressions and create the trigger kit
|
|
|
|
|
const auto& senTreeps = getSenTreesUsedBy({&comb, &hybrid});
|
2025-11-01 16:43:20 +01:00
|
|
|
const TriggerKit trigKit = TriggerKit::create(netlistp, initFuncp, senExprBulider, {},
|
2026-02-11 19:35:59 +01:00
|
|
|
senTreeps, "stl", extraTriggers, true, false);
|
2022-05-15 17:03:32 +02:00
|
|
|
|
|
|
|
|
// Remap sensitivities (comb has none, so only do the hybrid)
|
2025-11-01 16:43:20 +01:00
|
|
|
remapSensitivities(hybrid, trigKit.mapVec());
|
2022-05-15 17:03:32 +02:00
|
|
|
|
|
|
|
|
// Create the inverse map from trigger ref AstSenTree to original AstSenTree
|
2024-03-09 13:43:09 +01:00
|
|
|
V3Order::TrigToSenMap trigToSen;
|
2025-11-01 16:43:20 +01:00
|
|
|
invertAndMergeSenTreeMap(trigToSen, trigKit.mapVec());
|
2022-05-15 17:03:32 +02:00
|
|
|
|
|
|
|
|
// First trigger is for pure combinational triggers (first iteration)
|
2022-07-21 18:34:12 +02:00
|
|
|
AstSenTree* const inputChanged
|
2025-11-01 16:43:20 +01:00
|
|
|
= trigKit.newExtraTriggerSenTree(trigKit.vscp(), firstIterationTrigger);
|
2022-05-15 17:03:32 +02:00
|
|
|
|
|
|
|
|
// Create and the body function
|
2022-07-14 12:06:20 +02:00
|
|
|
AstCFunc* const stlFuncp = V3Order::order(
|
|
|
|
|
netlistp, {&comb, &hybrid}, trigToSen, "stl", false, true,
|
|
|
|
|
[=](const AstVarScope*, std::vector<AstSenTree*>& out) { out.push_back(inputChanged); });
|
2025-10-27 16:16:28 +01:00
|
|
|
util::splitCheck(stlFuncp);
|
2022-05-15 17:03:32 +02:00
|
|
|
|
|
|
|
|
// Create the eval loop
|
2023-10-28 09:14:38 +02:00
|
|
|
const EvalLoop stlLoop = createEvalLoop( //
|
Support #0 delays with IEEE-1800 compliant semantics (#7079)
This patch adds IEEE-1800 compliant scheduling support for the Inactive
scheduling region used for #0 delays.
Implementing this requires that **all** IEEE-1800 active region events
are placed in the internal 'act' section. This has simulation
performance implications. It prevents some optimizations (e.g.
V3LifePost), which reduces single threaded performance. It also reduces
the available work and parallelism in the internal 'nba' section, which
reduced the effectiveness of multi-threading severely.
Performance impact on RTLMeter when using scheduling adjusted to support
proper #0 delays is ~10-20% slowdown in single-threaded mode, and ~100%
(2x slower) with --threads 4.
To avoid paying this performance penalty unconditionally, the scheduling
is only adjusted if either:
1. The input contains a statically known #0 delay
2. The input contains a variable #x delay unknown at compile time
If no #0 is present, but #x variable delays are, a ZERODLY warning is
issued advising the use of '--no-sched-zero-delay' which is a promise
by the user that none of the variable delays will evaluate to a zero
delay at run-time. This warning is turned off if '--sched-zero-delay'
is explicitly given. This is similar to the '--timing' option.
If '--no-sched-zero-delay' was used at compile time, then executing
a zero delay will fail at runtime.
A ZERODLY warning is also issued if a static #0 if found, but the user
specified '--no-sched-zero-delay'. In this case the scheduling is not
adjusted to support #0, so executing it will fail at runtime. Presumably
the user knows it won't be executed.
The intended behaviour with all this is the following:
No #0, no #var in the design (#constant is OK)
-> Same as current behaviour, scheduling not adjusted,
same code generated as before
Has static #0 and '--no-sched-zero-delay' is NOT given:
-> No warnings, scheduling adjusted so it just works, runs slow
Has static #0 and '--no-sched-zero-delay' is given:
-> ZERODLY on the #0, scheduling not adjusted, fails at runtime if hit
No static #0, but has #var and no option is given:
-> ZERODLY on the #var advising use of '--no-sched-zero-delay' or
'--sched-zero-delay' (similar to '--timing'), scheduling adjusted
assuming it can be a zero delay and it just works
No static #0, but has #var and '--no-sched-zero-delay' is given:
-> No warning, scheduling not adjusted, fails at runtime if zero delay
No static #0, but has #var and '--sched-zero-delay' is given:
-> No warning, scheduling adjusted so it just works
2026-02-16 04:55:55 +01:00
|
|
|
netlistp, "stl", "Settle", /* slow: */ true, trigKit,
|
|
|
|
|
// Use trigger
|
|
|
|
|
trigKit.vscp(), nullptr,
|
|
|
|
|
// Explicit condition
|
2023-10-28 09:14:38 +02:00
|
|
|
// Inner loop statements
|
|
|
|
|
nullptr,
|
|
|
|
|
// Prep statements: Compute the current 'stl' triggers
|
2026-02-11 19:35:59 +01:00
|
|
|
[&trigKit] {
|
|
|
|
|
AstNodeStmt* const stmtp = trigKit.newCompBaseCall();
|
|
|
|
|
if (stmtp) stmtp->addNext(trigKit.newDumpCall(trigKit.vscp(), trigKit.name(), true));
|
|
|
|
|
return stmtp;
|
|
|
|
|
}(),
|
2023-10-28 09:14:38 +02:00
|
|
|
// Work statements: Invoke the 'stl' function
|
2025-10-27 16:16:28 +01:00
|
|
|
util::callVoidFunc(stlFuncp));
|
2022-05-15 17:03:32 +02:00
|
|
|
|
|
|
|
|
// Add the first iteration trigger to the trigger computation function
|
2026-01-23 18:53:40 +01:00
|
|
|
trigKit.addExtraTriggerAssignment(stlLoop.firstIterp, firstIterationTrigger, false);
|
2022-05-15 17:03:32 +02:00
|
|
|
|
|
|
|
|
// Add the eval loop to the top function
|
2023-10-28 09:14:38 +02:00
|
|
|
funcp->addStmtsp(stlLoop.stmtsp);
|
2022-05-15 17:03:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//============================================================================
|
|
|
|
|
// Order the replicated combinational logic to create the 'ico' region
|
|
|
|
|
|
2022-10-22 16:05:39 +02:00
|
|
|
AstNode* createInputCombLoop(AstNetlist* netlistp, AstCFunc* const initFuncp,
|
2023-12-05 04:11:07 +01:00
|
|
|
SenExprBuilder& senExprBuilder, LogicByScope& logic,
|
|
|
|
|
const VirtIfaceTriggers& virtIfaceTriggers) {
|
2022-05-15 17:03:32 +02:00
|
|
|
// Nothing to do if no combinational logic is sensitive to top level inputs
|
|
|
|
|
if (logic.empty()) return nullptr;
|
|
|
|
|
|
|
|
|
|
// SystemC only: Any top level inputs feeding a combinational logic must be marked,
|
|
|
|
|
// so we can make them sc_sensitive
|
|
|
|
|
if (v3Global.opt.systemC()) {
|
|
|
|
|
logic.foreachLogic([](AstNode* logicp) {
|
2022-10-20 14:48:44 +02:00
|
|
|
logicp->foreach([](AstVarRef* refp) {
|
2022-05-15 17:03:32 +02:00
|
|
|
if (refp->access().isWriteOnly()) return;
|
|
|
|
|
AstVarScope* const vscp = refp->varScopep();
|
|
|
|
|
if (vscp->scopep()->isTop() && vscp->varp()->isNonOutput()) {
|
|
|
|
|
vscp->varp()->scSensitive(true);
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-14 13:35:44 +02:00
|
|
|
// We have some extra trigger denoting external conditions
|
|
|
|
|
AstVarScope* const dpiExportTriggerVscp = netlistp->dpiExportTriggerp();
|
|
|
|
|
|
2025-10-31 19:29:11 +01:00
|
|
|
TriggerKit::ExtraTriggers extraTriggers;
|
|
|
|
|
const uint32_t firstIterationTrigger = extraTriggers.allocate("first iteration");
|
|
|
|
|
const uint32_t dpiExportTriggerIndex = dpiExportTriggerVscp
|
|
|
|
|
? extraTriggers.allocate("DPI export trigger")
|
|
|
|
|
: std::numeric_limits<uint32_t>::max();
|
2023-12-05 04:11:07 +01:00
|
|
|
const size_t firstVifTriggerIndex = extraTriggers.size();
|
2025-10-29 22:27:15 +01:00
|
|
|
const size_t firstVifMemberTriggerIndex = extraTriggers.size();
|
|
|
|
|
for (const auto& p : virtIfaceTriggers.m_memberTriggers) {
|
|
|
|
|
const auto& item = p.first;
|
|
|
|
|
extraTriggers.allocate("virtual interface member: " + item.m_ifacep->name() + "."
|
|
|
|
|
+ item.m_memberp->name());
|
|
|
|
|
}
|
2022-05-15 17:03:32 +02:00
|
|
|
|
|
|
|
|
// Gather the relevant sensitivity expressions and create the trigger kit
|
|
|
|
|
const auto& senTreeps = getSenTreesUsedBy({&logic});
|
2025-11-01 16:43:20 +01:00
|
|
|
const TriggerKit trigKit = TriggerKit::create(netlistp, initFuncp, senExprBuilder, {},
|
2026-02-11 19:35:59 +01:00
|
|
|
senTreeps, "ico", extraTriggers, false, false);
|
|
|
|
|
std::ignore = senExprBuilder.getAndClearResults();
|
2022-05-15 17:03:32 +02:00
|
|
|
|
2022-07-14 13:35:44 +02:00
|
|
|
if (dpiExportTriggerVscp) {
|
2025-10-31 19:29:11 +01:00
|
|
|
trigKit.addExtraTriggerAssignment(dpiExportTriggerVscp, dpiExportTriggerIndex);
|
2022-07-14 13:35:44 +02:00
|
|
|
}
|
2025-10-29 22:27:15 +01:00
|
|
|
addVirtIfaceTriggerAssignments(virtIfaceTriggers, firstVifTriggerIndex,
|
2025-10-31 19:29:11 +01:00
|
|
|
firstVifMemberTriggerIndex, trigKit);
|
2022-07-14 13:35:44 +02:00
|
|
|
|
2022-05-15 17:03:32 +02:00
|
|
|
// Remap sensitivities
|
2025-11-01 16:43:20 +01:00
|
|
|
remapSensitivities(logic, trigKit.mapVec());
|
2022-05-15 17:03:32 +02:00
|
|
|
|
|
|
|
|
// Create the inverse map from trigger ref AstSenTree to original AstSenTree
|
2024-03-09 13:43:09 +01:00
|
|
|
V3Order::TrigToSenMap trigToSen;
|
2025-11-01 16:43:20 +01:00
|
|
|
invertAndMergeSenTreeMap(trigToSen, trigKit.mapVec());
|
2022-05-15 17:03:32 +02:00
|
|
|
|
2022-07-14 13:35:44 +02:00
|
|
|
// The trigger top level inputs (first iteration)
|
2022-07-21 18:34:12 +02:00
|
|
|
AstSenTree* const inputChanged
|
2025-11-01 16:43:20 +01:00
|
|
|
= trigKit.newExtraTriggerSenTree(trigKit.vscp(), firstIterationTrigger);
|
2022-05-15 17:03:32 +02:00
|
|
|
|
2022-07-14 13:35:44 +02:00
|
|
|
// The DPI Export trigger
|
|
|
|
|
AstSenTree* const dpiExportTriggered
|
2025-11-01 16:43:20 +01:00
|
|
|
= dpiExportTriggerVscp
|
|
|
|
|
? trigKit.newExtraTriggerSenTree(trigKit.vscp(), dpiExportTriggerIndex)
|
|
|
|
|
: nullptr;
|
2025-10-31 19:29:11 +01:00
|
|
|
const auto& vifMemberTriggeredIco = virtIfaceTriggers.makeMemberToSensMap(
|
|
|
|
|
trigKit, firstVifMemberTriggerIndex, trigKit.vscp());
|
2022-07-14 13:35:44 +02:00
|
|
|
|
2022-05-15 17:03:32 +02:00
|
|
|
// Create and Order the body function
|
2025-08-23 16:45:13 +02:00
|
|
|
AstCFunc* const icoFuncp = V3Order::order(
|
|
|
|
|
netlistp, {&logic}, trigToSen, "ico", false, false,
|
|
|
|
|
[=](const AstVarScope* vscp, std::vector<AstSenTree*>& out) {
|
|
|
|
|
AstVar* const varp = vscp->varp();
|
|
|
|
|
if (varp->isPrimaryInish() || varp->isSigUserRWPublic()) {
|
|
|
|
|
out.push_back(inputChanged);
|
|
|
|
|
}
|
|
|
|
|
if (varp->isWrittenByDpi()) out.push_back(dpiExportTriggered);
|
2026-02-06 23:20:10 +01:00
|
|
|
if (vscp->varp()->isVirtIface()) {
|
2025-08-23 16:45:13 +02:00
|
|
|
std::vector<AstSenTree*> ifaceTriggered
|
2026-02-06 23:20:10 +01:00
|
|
|
= findTriggeredIface(vscp, vifMemberTriggeredIco);
|
2025-08-23 16:45:13 +02:00
|
|
|
out.insert(out.end(), ifaceTriggered.begin(), ifaceTriggered.end());
|
|
|
|
|
}
|
|
|
|
|
});
|
2025-10-27 16:16:28 +01:00
|
|
|
util::splitCheck(icoFuncp);
|
2022-05-15 17:03:32 +02:00
|
|
|
|
|
|
|
|
// Create the eval loop
|
2023-10-28 09:14:38 +02:00
|
|
|
const EvalLoop icoLoop = createEvalLoop( //
|
Support #0 delays with IEEE-1800 compliant semantics (#7079)
This patch adds IEEE-1800 compliant scheduling support for the Inactive
scheduling region used for #0 delays.
Implementing this requires that **all** IEEE-1800 active region events
are placed in the internal 'act' section. This has simulation
performance implications. It prevents some optimizations (e.g.
V3LifePost), which reduces single threaded performance. It also reduces
the available work and parallelism in the internal 'nba' section, which
reduced the effectiveness of multi-threading severely.
Performance impact on RTLMeter when using scheduling adjusted to support
proper #0 delays is ~10-20% slowdown in single-threaded mode, and ~100%
(2x slower) with --threads 4.
To avoid paying this performance penalty unconditionally, the scheduling
is only adjusted if either:
1. The input contains a statically known #0 delay
2. The input contains a variable #x delay unknown at compile time
If no #0 is present, but #x variable delays are, a ZERODLY warning is
issued advising the use of '--no-sched-zero-delay' which is a promise
by the user that none of the variable delays will evaluate to a zero
delay at run-time. This warning is turned off if '--sched-zero-delay'
is explicitly given. This is similar to the '--timing' option.
If '--no-sched-zero-delay' was used at compile time, then executing
a zero delay will fail at runtime.
A ZERODLY warning is also issued if a static #0 if found, but the user
specified '--no-sched-zero-delay'. In this case the scheduling is not
adjusted to support #0, so executing it will fail at runtime. Presumably
the user knows it won't be executed.
The intended behaviour with all this is the following:
No #0, no #var in the design (#constant is OK)
-> Same as current behaviour, scheduling not adjusted,
same code generated as before
Has static #0 and '--no-sched-zero-delay' is NOT given:
-> No warnings, scheduling adjusted so it just works, runs slow
Has static #0 and '--no-sched-zero-delay' is given:
-> ZERODLY on the #0, scheduling not adjusted, fails at runtime if hit
No static #0, but has #var and no option is given:
-> ZERODLY on the #var advising use of '--no-sched-zero-delay' or
'--sched-zero-delay' (similar to '--timing'), scheduling adjusted
assuming it can be a zero delay and it just works
No static #0, but has #var and '--no-sched-zero-delay' is given:
-> No warning, scheduling not adjusted, fails at runtime if zero delay
No static #0, but has #var and '--sched-zero-delay' is given:
-> No warning, scheduling adjusted so it just works
2026-02-16 04:55:55 +01:00
|
|
|
netlistp, "ico", "Input combinational", /* slow: */ false, trigKit,
|
|
|
|
|
// Use trigger
|
|
|
|
|
trigKit.vscp(), nullptr,
|
2023-10-28 09:14:38 +02:00
|
|
|
// Inner loop statements
|
|
|
|
|
nullptr,
|
|
|
|
|
// Prep statements: Compute the current 'ico' triggers
|
2026-02-11 19:35:59 +01:00
|
|
|
[&trigKit] {
|
|
|
|
|
AstNodeStmt* const stmtp = trigKit.newCompBaseCall();
|
|
|
|
|
if (stmtp) stmtp->addNext(trigKit.newDumpCall(trigKit.vscp(), trigKit.name(), true));
|
|
|
|
|
return stmtp;
|
|
|
|
|
}(),
|
2023-10-28 09:14:38 +02:00
|
|
|
// Work statements: Invoke the 'ico' function
|
2025-10-27 16:16:28 +01:00
|
|
|
util::callVoidFunc(icoFuncp));
|
2022-05-15 17:03:32 +02:00
|
|
|
|
|
|
|
|
// Add the first iteration trigger to the trigger computation function
|
2026-01-23 18:53:40 +01:00
|
|
|
trigKit.addExtraTriggerAssignment(icoLoop.firstIterp, firstIterationTrigger, false);
|
2022-05-15 17:03:32 +02:00
|
|
|
|
2023-10-28 09:14:38 +02:00
|
|
|
return icoLoop.stmtsp;
|
2022-05-15 17:03:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//============================================================================
|
2025-10-31 19:29:11 +01:00
|
|
|
// EvalKit groups items that have to be passed to createEval() for a given eval region
|
2022-12-23 13:34:49 +01:00
|
|
|
|
2025-10-31 19:29:11 +01:00
|
|
|
struct EvalKit final {
|
|
|
|
|
// The AstVarScope representing the region's trigger vector
|
|
|
|
|
AstVarScope* const m_vscp = nullptr;
|
|
|
|
|
// The AstCFunc that evaluates the region's logic
|
|
|
|
|
AstCFunc* const m_funcp = nullptr;
|
|
|
|
|
// Is this kit used/required?
|
|
|
|
|
bool empty() const { return !m_funcp; }
|
|
|
|
|
};
|
2022-12-23 13:34:49 +01:00
|
|
|
|
|
|
|
|
//============================================================================
|
|
|
|
|
// Bolt together parts to create the top level _eval function
|
2022-05-15 17:03:32 +02:00
|
|
|
|
|
|
|
|
void createEval(AstNetlist* netlistp, //
|
|
|
|
|
AstNode* icoLoop, //
|
2025-10-31 19:29:11 +01:00
|
|
|
const TriggerKit& trigKit, //
|
2022-12-23 13:34:49 +01:00
|
|
|
const EvalKit& actKit, //
|
|
|
|
|
const EvalKit& nbaKit, //
|
|
|
|
|
const EvalKit& obsKit, //
|
|
|
|
|
const EvalKit& reactKit, //
|
2022-10-13 21:04:43 +02:00
|
|
|
AstCFunc* postponedFuncp, //
|
Timing support (#3363)
Adds timing support to Verilator. It makes it possible to use delays,
event controls within processes (not just at the start), wait
statements, and forks.
Building a design with those constructs requires a compiler that
supports C++20 coroutines (GCC 10, Clang 5).
The basic idea is to have processes and tasks with delays/event controls
implemented as C++20 coroutines. This allows us to suspend and resume
them at any time.
There are five main runtime classes responsible for managing suspended
coroutines:
* `VlCoroutineHandle`, a wrapper over C++20's `std::coroutine_handle`
with move semantics and automatic cleanup.
* `VlDelayScheduler`, for coroutines suspended by delays. It resumes
them at a proper simulation time.
* `VlTriggerScheduler`, for coroutines suspended by event controls. It
resumes them if its corresponding trigger was set.
* `VlForkSync`, used for syncing `fork..join` and `fork..join_any`
blocks.
* `VlCoroutine`, the return type of all verilated coroutines. It allows
for suspending a stack of coroutines (normally, C++ coroutines are
stackless).
There is a new visitor in `V3Timing.cpp` which:
* scales delays according to the timescale,
* simplifies intra-assignment timing controls and net delays into
regular timing controls and assignments,
* simplifies wait statements into loops with event controls,
* marks processes and tasks with timing controls in them as
suspendable,
* creates delay, trigger scheduler, and fork sync variables,
* transforms timing controls and fork joins into C++ awaits
There are new functions in `V3SchedTiming.cpp` (used by `V3Sched.cpp`)
that integrate static scheduling with timing. This involves providing
external domains for variables, so that the necessary combinational
logic gets triggered after coroutine resumption, as well as statements
that need to be injected into the design eval function to perform this
resumption at the correct time.
There is also a function that transforms forked processes into separate
functions.
See the comments in `verilated_timing.h`, `verilated_timing.cpp`,
`V3Timing.cpp`, and `V3SchedTiming.cpp`, as well as the internals
documentation for more details.
Signed-off-by: Krzysztof Bieganski <kbieganski@antmicro.com>
2022-08-22 14:26:32 +02:00
|
|
|
TimingKit& timingKit //
|
2022-05-15 17:03:32 +02:00
|
|
|
) {
|
|
|
|
|
FileLine* const flp = netlistp->fileline();
|
|
|
|
|
|
Support #0 delays with IEEE-1800 compliant semantics (#7079)
This patch adds IEEE-1800 compliant scheduling support for the Inactive
scheduling region used for #0 delays.
Implementing this requires that **all** IEEE-1800 active region events
are placed in the internal 'act' section. This has simulation
performance implications. It prevents some optimizations (e.g.
V3LifePost), which reduces single threaded performance. It also reduces
the available work and parallelism in the internal 'nba' section, which
reduced the effectiveness of multi-threading severely.
Performance impact on RTLMeter when using scheduling adjusted to support
proper #0 delays is ~10-20% slowdown in single-threaded mode, and ~100%
(2x slower) with --threads 4.
To avoid paying this performance penalty unconditionally, the scheduling
is only adjusted if either:
1. The input contains a statically known #0 delay
2. The input contains a variable #x delay unknown at compile time
If no #0 is present, but #x variable delays are, a ZERODLY warning is
issued advising the use of '--no-sched-zero-delay' which is a promise
by the user that none of the variable delays will evaluate to a zero
delay at run-time. This warning is turned off if '--sched-zero-delay'
is explicitly given. This is similar to the '--timing' option.
If '--no-sched-zero-delay' was used at compile time, then executing
a zero delay will fail at runtime.
A ZERODLY warning is also issued if a static #0 if found, but the user
specified '--no-sched-zero-delay'. In this case the scheduling is not
adjusted to support #0, so executing it will fail at runtime. Presumably
the user knows it won't be executed.
The intended behaviour with all this is the following:
No #0, no #var in the design (#constant is OK)
-> Same as current behaviour, scheduling not adjusted,
same code generated as before
Has static #0 and '--no-sched-zero-delay' is NOT given:
-> No warnings, scheduling adjusted so it just works, runs slow
Has static #0 and '--no-sched-zero-delay' is given:
-> ZERODLY on the #0, scheduling not adjusted, fails at runtime if hit
No static #0, but has #var and no option is given:
-> ZERODLY on the #var advising use of '--no-sched-zero-delay' or
'--sched-zero-delay' (similar to '--timing'), scheduling adjusted
assuming it can be a zero delay and it just works
No static #0, but has #var and '--no-sched-zero-delay' is given:
-> No warning, scheduling not adjusted, fails at runtime if zero delay
No static #0, but has #var and '--sched-zero-delay' is given:
-> No warning, scheduling adjusted so it just works
2026-02-16 04:55:55 +01:00
|
|
|
// Grab the delay scheduler variable, if any
|
|
|
|
|
AstVarScope* const delaySchedVscp = timingKit.getDelayScheduler(netlistp);
|
|
|
|
|
|
2026-02-11 19:35:59 +01:00
|
|
|
// 'createResume' consumes the contents that 'createReady' needs, so do the right order
|
|
|
|
|
AstCCall* const timingReadyp = timingKit.createReady(netlistp);
|
2025-10-27 11:41:30 +01:00
|
|
|
AstCCall* const timingResumep = timingKit.createResume(netlistp);
|
|
|
|
|
|
2022-05-15 17:03:32 +02:00
|
|
|
// Create the active eval loop
|
2025-10-31 19:29:11 +01:00
|
|
|
EvalLoop topLoop = createEvalLoop( //
|
Support #0 delays with IEEE-1800 compliant semantics (#7079)
This patch adds IEEE-1800 compliant scheduling support for the Inactive
scheduling region used for #0 delays.
Implementing this requires that **all** IEEE-1800 active region events
are placed in the internal 'act' section. This has simulation
performance implications. It prevents some optimizations (e.g.
V3LifePost), which reduces single threaded performance. It also reduces
the available work and parallelism in the internal 'nba' section, which
reduced the effectiveness of multi-threading severely.
Performance impact on RTLMeter when using scheduling adjusted to support
proper #0 delays is ~10-20% slowdown in single-threaded mode, and ~100%
(2x slower) with --threads 4.
To avoid paying this performance penalty unconditionally, the scheduling
is only adjusted if either:
1. The input contains a statically known #0 delay
2. The input contains a variable #x delay unknown at compile time
If no #0 is present, but #x variable delays are, a ZERODLY warning is
issued advising the use of '--no-sched-zero-delay' which is a promise
by the user that none of the variable delays will evaluate to a zero
delay at run-time. This warning is turned off if '--sched-zero-delay'
is explicitly given. This is similar to the '--timing' option.
If '--no-sched-zero-delay' was used at compile time, then executing
a zero delay will fail at runtime.
A ZERODLY warning is also issued if a static #0 if found, but the user
specified '--no-sched-zero-delay'. In this case the scheduling is not
adjusted to support #0, so executing it will fail at runtime. Presumably
the user knows it won't be executed.
The intended behaviour with all this is the following:
No #0, no #var in the design (#constant is OK)
-> Same as current behaviour, scheduling not adjusted,
same code generated as before
Has static #0 and '--no-sched-zero-delay' is NOT given:
-> No warnings, scheduling adjusted so it just works, runs slow
Has static #0 and '--no-sched-zero-delay' is given:
-> ZERODLY on the #0, scheduling not adjusted, fails at runtime if hit
No static #0, but has #var and no option is given:
-> ZERODLY on the #var advising use of '--no-sched-zero-delay' or
'--sched-zero-delay' (similar to '--timing'), scheduling adjusted
assuming it can be a zero delay and it just works
No static #0, but has #var and '--no-sched-zero-delay' is given:
-> No warning, scheduling not adjusted, fails at runtime if zero delay
No static #0, but has #var and '--sched-zero-delay' is given:
-> No warning, scheduling adjusted so it just works
2026-02-16 04:55:55 +01:00
|
|
|
netlistp, "act", "Active", /* slow: */ false, trigKit,
|
|
|
|
|
// Use trigger
|
|
|
|
|
actKit.m_vscp, nullptr,
|
2023-10-28 09:14:38 +02:00
|
|
|
// Inner loop statements
|
|
|
|
|
nullptr,
|
|
|
|
|
// Prep statements
|
|
|
|
|
[&]() {
|
2025-11-01 16:43:20 +01:00
|
|
|
// Compute the current 'act' triggers - the NBA triggers are the latched value
|
2026-02-11 19:35:59 +01:00
|
|
|
AstNodeStmt* stmtsp = trigKit.newCompBaseCall();
|
|
|
|
|
AstNodeStmt* const dumpp
|
|
|
|
|
= stmtsp ? trigKit.newDumpCall(trigKit.vscp(), trigKit.name(), true) : nullptr;
|
|
|
|
|
// Mark as ready for triggered awaits
|
|
|
|
|
if (timingReadyp) stmtsp = AstNode::addNext(stmtsp, timingReadyp->makeStmt());
|
|
|
|
|
if (AstVarScope* const vscAccp = trigKit.vscAccp()) {
|
|
|
|
|
stmtsp = AstNode::addNext(stmtsp, trigKit.newOrIntoCall(actKit.m_vscp, vscAccp));
|
|
|
|
|
}
|
|
|
|
|
stmtsp = AstNode::addNext(stmtsp, trigKit.newCompExtCall(nbaKit.m_vscp));
|
|
|
|
|
stmtsp = AstNode::addNext(stmtsp, dumpp);
|
2025-10-31 19:29:11 +01:00
|
|
|
// Latch the 'act' triggers under the 'nba' triggers
|
|
|
|
|
stmtsp = AstNode::addNext(stmtsp, trigKit.newOrIntoCall(nbaKit.m_vscp, actKit.m_vscp));
|
2023-10-28 09:14:38 +02:00
|
|
|
//
|
|
|
|
|
return stmtsp;
|
|
|
|
|
}(),
|
|
|
|
|
// Work statements
|
|
|
|
|
[&]() {
|
2025-10-31 19:29:11 +01:00
|
|
|
AstNodeStmt* workp = nullptr;
|
2026-02-11 19:35:59 +01:00
|
|
|
if (AstVarScope* const actAccp = trigKit.vscAccp()) {
|
|
|
|
|
AstCMethodHard* const cCallp = new AstCMethodHard{
|
|
|
|
|
flp, new AstVarRef{flp, actAccp, VAccess::WRITE}, VCMethod::UNPACKED_FILL,
|
|
|
|
|
new AstConst{flp, AstConst::Unsized64{}, 0}};
|
|
|
|
|
cCallp->dtypeSetVoid();
|
|
|
|
|
workp = AstNode::addNext(workp, cCallp->makeStmt());
|
|
|
|
|
}
|
2023-10-21 02:01:45 +02:00
|
|
|
// Resume triggered timing schedulers
|
2026-02-11 19:35:59 +01:00
|
|
|
if (timingResumep) workp = AstNode::addNext(workp, timingResumep->makeStmt());
|
2023-10-28 09:14:38 +02:00
|
|
|
// Invoke the 'act' function
|
2025-10-31 19:29:11 +01:00
|
|
|
workp = AstNode::addNext(workp, util::callVoidFunc(actKit.m_funcp));
|
2023-10-28 09:14:38 +02:00
|
|
|
//
|
|
|
|
|
return workp;
|
|
|
|
|
}());
|
|
|
|
|
|
Support #0 delays with IEEE-1800 compliant semantics (#7079)
This patch adds IEEE-1800 compliant scheduling support for the Inactive
scheduling region used for #0 delays.
Implementing this requires that **all** IEEE-1800 active region events
are placed in the internal 'act' section. This has simulation
performance implications. It prevents some optimizations (e.g.
V3LifePost), which reduces single threaded performance. It also reduces
the available work and parallelism in the internal 'nba' section, which
reduced the effectiveness of multi-threading severely.
Performance impact on RTLMeter when using scheduling adjusted to support
proper #0 delays is ~10-20% slowdown in single-threaded mode, and ~100%
(2x slower) with --threads 4.
To avoid paying this performance penalty unconditionally, the scheduling
is only adjusted if either:
1. The input contains a statically known #0 delay
2. The input contains a variable #x delay unknown at compile time
If no #0 is present, but #x variable delays are, a ZERODLY warning is
issued advising the use of '--no-sched-zero-delay' which is a promise
by the user that none of the variable delays will evaluate to a zero
delay at run-time. This warning is turned off if '--sched-zero-delay'
is explicitly given. This is similar to the '--timing' option.
If '--no-sched-zero-delay' was used at compile time, then executing
a zero delay will fail at runtime.
A ZERODLY warning is also issued if a static #0 if found, but the user
specified '--no-sched-zero-delay'. In this case the scheduling is not
adjusted to support #0, so executing it will fail at runtime. Presumably
the user knows it won't be executed.
The intended behaviour with all this is the following:
No #0, no #var in the design (#constant is OK)
-> Same as current behaviour, scheduling not adjusted,
same code generated as before
Has static #0 and '--no-sched-zero-delay' is NOT given:
-> No warnings, scheduling adjusted so it just works, runs slow
Has static #0 and '--no-sched-zero-delay' is given:
-> ZERODLY on the #0, scheduling not adjusted, fails at runtime if hit
No static #0, but has #var and no option is given:
-> ZERODLY on the #var advising use of '--no-sched-zero-delay' or
'--sched-zero-delay' (similar to '--timing'), scheduling adjusted
assuming it can be a zero delay and it just works
No static #0, but has #var and '--no-sched-zero-delay' is given:
-> No warning, scheduling not adjusted, fails at runtime if zero delay
No static #0, but has #var and '--sched-zero-delay' is given:
-> No warning, scheduling adjusted so it just works
2026-02-16 04:55:55 +01:00
|
|
|
// Create if there are any delays, so we can check at runtime if a #0 is unexpected
|
|
|
|
|
if (delaySchedVscp) {
|
|
|
|
|
topLoop = createEvalLoop( //
|
|
|
|
|
netlistp, "inact", "Inactive", /* slow: */ false, trigKit,
|
|
|
|
|
// Use explicit condition
|
|
|
|
|
nullptr,
|
|
|
|
|
[&]() {
|
|
|
|
|
// Run if any zero delays are pending
|
|
|
|
|
AstNodeExpr* const callp
|
|
|
|
|
= new AstCMethodHard{flp, new AstVarRef{flp, delaySchedVscp, VAccess::READ},
|
|
|
|
|
VCMethod::SCHED_AWAITING_ZERO_DELAY};
|
|
|
|
|
callp->dtypeSetBit();
|
|
|
|
|
return callp;
|
|
|
|
|
}(),
|
|
|
|
|
// Inner loop statements
|
|
|
|
|
topLoop.stmtsp,
|
|
|
|
|
// Prep statements
|
|
|
|
|
nullptr,
|
|
|
|
|
// Work statements
|
|
|
|
|
[&]() -> AstNodeStmt* {
|
|
|
|
|
if (v3Global.usesZeroDelay()) {
|
|
|
|
|
// Resume processes watiting for #0 delay
|
|
|
|
|
AstCMethodHard* const callp = new AstCMethodHard{
|
|
|
|
|
flp, new AstVarRef{flp, delaySchedVscp, VAccess::READWRITE},
|
|
|
|
|
VCMethod::SCHED_RESUME_ZERO_DELAY};
|
|
|
|
|
callp->dtypeSetVoid();
|
|
|
|
|
return callp->makeStmt();
|
|
|
|
|
} else {
|
|
|
|
|
// Assumption was that the design doesn't use #0 delays.
|
|
|
|
|
// Die at run-time if it does.
|
|
|
|
|
AstCStmt* const stmtp = new AstCStmt{flp};
|
|
|
|
|
const FileLine* const locp = netlistp->topModulep()->fileline();
|
|
|
|
|
const std::string& file = VIdProtect::protect(locp->filename());
|
|
|
|
|
const std::string& line = std::to_string(locp->lineno());
|
|
|
|
|
stmtp->add(
|
|
|
|
|
"VL_FATAL_MT(\"" + V3OutFormatter::quoteNameControls(file) + "\", " + line
|
|
|
|
|
+ ", \"\", \"ZERODLY: Design Verilated with '--no-sched-zero-delay', "
|
|
|
|
|
+ "but #0 delay executed at runtime\");");
|
|
|
|
|
return stmtp;
|
|
|
|
|
}
|
|
|
|
|
}());
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-28 09:14:38 +02:00
|
|
|
// Create the NBA eval loop, which is the default top level loop.
|
2025-10-31 19:29:11 +01:00
|
|
|
topLoop = createEvalLoop( //
|
Support #0 delays with IEEE-1800 compliant semantics (#7079)
This patch adds IEEE-1800 compliant scheduling support for the Inactive
scheduling region used for #0 delays.
Implementing this requires that **all** IEEE-1800 active region events
are placed in the internal 'act' section. This has simulation
performance implications. It prevents some optimizations (e.g.
V3LifePost), which reduces single threaded performance. It also reduces
the available work and parallelism in the internal 'nba' section, which
reduced the effectiveness of multi-threading severely.
Performance impact on RTLMeter when using scheduling adjusted to support
proper #0 delays is ~10-20% slowdown in single-threaded mode, and ~100%
(2x slower) with --threads 4.
To avoid paying this performance penalty unconditionally, the scheduling
is only adjusted if either:
1. The input contains a statically known #0 delay
2. The input contains a variable #x delay unknown at compile time
If no #0 is present, but #x variable delays are, a ZERODLY warning is
issued advising the use of '--no-sched-zero-delay' which is a promise
by the user that none of the variable delays will evaluate to a zero
delay at run-time. This warning is turned off if '--sched-zero-delay'
is explicitly given. This is similar to the '--timing' option.
If '--no-sched-zero-delay' was used at compile time, then executing
a zero delay will fail at runtime.
A ZERODLY warning is also issued if a static #0 if found, but the user
specified '--no-sched-zero-delay'. In this case the scheduling is not
adjusted to support #0, so executing it will fail at runtime. Presumably
the user knows it won't be executed.
The intended behaviour with all this is the following:
No #0, no #var in the design (#constant is OK)
-> Same as current behaviour, scheduling not adjusted,
same code generated as before
Has static #0 and '--no-sched-zero-delay' is NOT given:
-> No warnings, scheduling adjusted so it just works, runs slow
Has static #0 and '--no-sched-zero-delay' is given:
-> ZERODLY on the #0, scheduling not adjusted, fails at runtime if hit
No static #0, but has #var and no option is given:
-> ZERODLY on the #var advising use of '--no-sched-zero-delay' or
'--sched-zero-delay' (similar to '--timing'), scheduling adjusted
assuming it can be a zero delay and it just works
No static #0, but has #var and '--no-sched-zero-delay' is given:
-> No warning, scheduling not adjusted, fails at runtime if zero delay
No static #0, but has #var and '--sched-zero-delay' is given:
-> No warning, scheduling adjusted so it just works
2026-02-16 04:55:55 +01:00
|
|
|
netlistp, "nba", "NBA", /* slow: */ false, trigKit,
|
|
|
|
|
// Use trigger
|
|
|
|
|
nbaKit.m_vscp, nullptr,
|
2023-10-28 09:14:38 +02:00
|
|
|
// Inner loop statements
|
2025-10-31 19:29:11 +01:00
|
|
|
topLoop.stmtsp,
|
2023-10-28 09:14:38 +02:00
|
|
|
// Prep statements
|
|
|
|
|
nullptr,
|
|
|
|
|
// Work statements
|
|
|
|
|
[&]() {
|
|
|
|
|
AstNodeStmt* workp = nullptr;
|
|
|
|
|
// Latch the 'nba' trigger flags under the following region's trigger flags
|
|
|
|
|
if (!obsKit.empty()) {
|
2025-10-31 19:29:11 +01:00
|
|
|
workp = trigKit.newOrIntoCall(obsKit.m_vscp, nbaKit.m_vscp);
|
2023-10-28 09:14:38 +02:00
|
|
|
} else if (!reactKit.empty()) {
|
2025-10-31 19:29:11 +01:00
|
|
|
workp = trigKit.newOrIntoCall(reactKit.m_vscp, nbaKit.m_vscp);
|
2023-10-21 02:01:45 +02:00
|
|
|
}
|
2023-10-28 09:14:38 +02:00
|
|
|
// Invoke the 'nba' function
|
2025-10-27 16:16:28 +01:00
|
|
|
workp = AstNode::addNext(workp, util::callVoidFunc(nbaKit.m_funcp));
|
2023-10-28 09:14:38 +02:00
|
|
|
// Clear the 'nba' triggers
|
2025-10-31 19:29:11 +01:00
|
|
|
workp = AstNode::addNext(workp, trigKit.newClearCall(nbaKit.m_vscp));
|
2023-10-28 09:14:38 +02:00
|
|
|
//
|
|
|
|
|
return workp;
|
2023-12-10 14:32:24 +01:00
|
|
|
}(),
|
|
|
|
|
// Extra work (not conditional on having had a fired trigger)
|
|
|
|
|
[&](AstVarScope* continuep) -> AstNodeStmt* {
|
|
|
|
|
// Check if any dynamic NBAs are pending, if there are any in the design
|
|
|
|
|
if (!netlistp->nbaEventp()) return nullptr;
|
|
|
|
|
AstVarScope* const nbaEventp = netlistp->nbaEventp();
|
|
|
|
|
AstVarScope* const nbaEventTriggerp = netlistp->nbaEventTriggerp();
|
|
|
|
|
UASSERT(nbaEventTriggerp, "NBA event trigger var should exist");
|
|
|
|
|
netlistp->nbaEventp(nullptr);
|
|
|
|
|
netlistp->nbaEventTriggerp(nullptr);
|
|
|
|
|
|
2026-02-11 19:35:59 +01:00
|
|
|
// If a dynamic NBA is pending, clear the pending flag and fire the ready event
|
2023-12-10 14:32:24 +01:00
|
|
|
AstIf* const ifp = new AstIf{flp, new AstVarRef{flp, nbaEventTriggerp, VAccess::READ}};
|
2025-10-27 16:16:28 +01:00
|
|
|
ifp->addThensp(util::setVar(continuep, 1));
|
|
|
|
|
ifp->addThensp(util::setVar(nbaEventTriggerp, 0));
|
2025-09-27 14:22:17 +02:00
|
|
|
AstCMethodHard* const firep = new AstCMethodHard{
|
|
|
|
|
flp, new AstVarRef{flp, nbaEventp, VAccess::WRITE}, VCMethod::EVENT_FIRE};
|
2023-12-10 14:32:24 +01:00
|
|
|
firep->dtypeSetVoid();
|
|
|
|
|
ifp->addThensp(firep->makeStmt());
|
|
|
|
|
return ifp;
|
|
|
|
|
});
|
2023-10-21 02:01:45 +02:00
|
|
|
|
2023-10-28 09:14:38 +02:00
|
|
|
if (!obsKit.empty()) {
|
|
|
|
|
// Create the Observed eval loop, which becomes the top level loop.
|
|
|
|
|
topLoop = createEvalLoop( //
|
Support #0 delays with IEEE-1800 compliant semantics (#7079)
This patch adds IEEE-1800 compliant scheduling support for the Inactive
scheduling region used for #0 delays.
Implementing this requires that **all** IEEE-1800 active region events
are placed in the internal 'act' section. This has simulation
performance implications. It prevents some optimizations (e.g.
V3LifePost), which reduces single threaded performance. It also reduces
the available work and parallelism in the internal 'nba' section, which
reduced the effectiveness of multi-threading severely.
Performance impact on RTLMeter when using scheduling adjusted to support
proper #0 delays is ~10-20% slowdown in single-threaded mode, and ~100%
(2x slower) with --threads 4.
To avoid paying this performance penalty unconditionally, the scheduling
is only adjusted if either:
1. The input contains a statically known #0 delay
2. The input contains a variable #x delay unknown at compile time
If no #0 is present, but #x variable delays are, a ZERODLY warning is
issued advising the use of '--no-sched-zero-delay' which is a promise
by the user that none of the variable delays will evaluate to a zero
delay at run-time. This warning is turned off if '--sched-zero-delay'
is explicitly given. This is similar to the '--timing' option.
If '--no-sched-zero-delay' was used at compile time, then executing
a zero delay will fail at runtime.
A ZERODLY warning is also issued if a static #0 if found, but the user
specified '--no-sched-zero-delay'. In this case the scheduling is not
adjusted to support #0, so executing it will fail at runtime. Presumably
the user knows it won't be executed.
The intended behaviour with all this is the following:
No #0, no #var in the design (#constant is OK)
-> Same as current behaviour, scheduling not adjusted,
same code generated as before
Has static #0 and '--no-sched-zero-delay' is NOT given:
-> No warnings, scheduling adjusted so it just works, runs slow
Has static #0 and '--no-sched-zero-delay' is given:
-> ZERODLY on the #0, scheduling not adjusted, fails at runtime if hit
No static #0, but has #var and no option is given:
-> ZERODLY on the #var advising use of '--no-sched-zero-delay' or
'--sched-zero-delay' (similar to '--timing'), scheduling adjusted
assuming it can be a zero delay and it just works
No static #0, but has #var and '--no-sched-zero-delay' is given:
-> No warning, scheduling not adjusted, fails at runtime if zero delay
No static #0, but has #var and '--sched-zero-delay' is given:
-> No warning, scheduling adjusted so it just works
2026-02-16 04:55:55 +01:00
|
|
|
netlistp, "obs", "Observed", /* slow: */ false, trigKit,
|
|
|
|
|
// Use trigger
|
|
|
|
|
obsKit.m_vscp, nullptr,
|
2023-10-28 09:14:38 +02:00
|
|
|
// Inner loop statements
|
|
|
|
|
topLoop.stmtsp,
|
|
|
|
|
// Prep statements
|
|
|
|
|
nullptr,
|
|
|
|
|
// Work statements
|
|
|
|
|
[&]() {
|
|
|
|
|
AstNodeStmt* workp = nullptr;
|
|
|
|
|
// Latch the Observed trigger flags under the Reactive trigger flags
|
|
|
|
|
if (!reactKit.empty()) {
|
2025-10-31 19:29:11 +01:00
|
|
|
workp = trigKit.newOrIntoCall(reactKit.m_vscp, obsKit.m_vscp);
|
2023-10-28 09:14:38 +02:00
|
|
|
}
|
|
|
|
|
// Invoke the 'obs' function
|
2025-10-27 16:16:28 +01:00
|
|
|
workp = AstNode::addNext(workp, util::callVoidFunc(obsKit.m_funcp));
|
2023-10-28 09:14:38 +02:00
|
|
|
// Clear the 'obs' triggers
|
2025-10-31 19:29:11 +01:00
|
|
|
workp = AstNode::addNext(workp, trigKit.newClearCall(obsKit.m_vscp));
|
2023-10-28 09:14:38 +02:00
|
|
|
//
|
|
|
|
|
return workp;
|
|
|
|
|
}());
|
2022-12-23 13:34:49 +01:00
|
|
|
}
|
|
|
|
|
|
2023-10-28 09:14:38 +02:00
|
|
|
if (!reactKit.empty()) {
|
|
|
|
|
// Create the Reactive eval loop, which becomes the top level loop.
|
|
|
|
|
topLoop = createEvalLoop( //
|
Support #0 delays with IEEE-1800 compliant semantics (#7079)
This patch adds IEEE-1800 compliant scheduling support for the Inactive
scheduling region used for #0 delays.
Implementing this requires that **all** IEEE-1800 active region events
are placed in the internal 'act' section. This has simulation
performance implications. It prevents some optimizations (e.g.
V3LifePost), which reduces single threaded performance. It also reduces
the available work and parallelism in the internal 'nba' section, which
reduced the effectiveness of multi-threading severely.
Performance impact on RTLMeter when using scheduling adjusted to support
proper #0 delays is ~10-20% slowdown in single-threaded mode, and ~100%
(2x slower) with --threads 4.
To avoid paying this performance penalty unconditionally, the scheduling
is only adjusted if either:
1. The input contains a statically known #0 delay
2. The input contains a variable #x delay unknown at compile time
If no #0 is present, but #x variable delays are, a ZERODLY warning is
issued advising the use of '--no-sched-zero-delay' which is a promise
by the user that none of the variable delays will evaluate to a zero
delay at run-time. This warning is turned off if '--sched-zero-delay'
is explicitly given. This is similar to the '--timing' option.
If '--no-sched-zero-delay' was used at compile time, then executing
a zero delay will fail at runtime.
A ZERODLY warning is also issued if a static #0 if found, but the user
specified '--no-sched-zero-delay'. In this case the scheduling is not
adjusted to support #0, so executing it will fail at runtime. Presumably
the user knows it won't be executed.
The intended behaviour with all this is the following:
No #0, no #var in the design (#constant is OK)
-> Same as current behaviour, scheduling not adjusted,
same code generated as before
Has static #0 and '--no-sched-zero-delay' is NOT given:
-> No warnings, scheduling adjusted so it just works, runs slow
Has static #0 and '--no-sched-zero-delay' is given:
-> ZERODLY on the #0, scheduling not adjusted, fails at runtime if hit
No static #0, but has #var and no option is given:
-> ZERODLY on the #var advising use of '--no-sched-zero-delay' or
'--sched-zero-delay' (similar to '--timing'), scheduling adjusted
assuming it can be a zero delay and it just works
No static #0, but has #var and '--no-sched-zero-delay' is given:
-> No warning, scheduling not adjusted, fails at runtime if zero delay
No static #0, but has #var and '--sched-zero-delay' is given:
-> No warning, scheduling adjusted so it just works
2026-02-16 04:55:55 +01:00
|
|
|
netlistp, "react", "Reactive", /* slow: */ false, trigKit,
|
|
|
|
|
// Use trigger
|
|
|
|
|
reactKit.m_vscp, nullptr,
|
2023-10-28 09:14:38 +02:00
|
|
|
// Inner loop statements
|
|
|
|
|
topLoop.stmtsp,
|
|
|
|
|
// Prep statements
|
|
|
|
|
nullptr,
|
|
|
|
|
// Work statements
|
|
|
|
|
[&]() {
|
|
|
|
|
// Invoke the 'react' function
|
2025-10-31 19:29:11 +01:00
|
|
|
AstNodeStmt* workp = util::callVoidFunc(reactKit.m_funcp);
|
2023-10-28 09:14:38 +02:00
|
|
|
// Clear the 'react' triggers
|
2025-10-31 19:29:11 +01:00
|
|
|
workp = AstNode::addNext(workp, trigKit.newClearCall(reactKit.m_vscp));
|
2023-10-28 09:14:38 +02:00
|
|
|
return workp;
|
|
|
|
|
}());
|
2022-12-23 13:34:49 +01:00
|
|
|
}
|
2023-10-28 09:14:38 +02:00
|
|
|
|
|
|
|
|
// Now that we have build the loops, create the main 'eval' function
|
2025-10-27 16:16:28 +01:00
|
|
|
AstCFunc* const funcp = util::makeTopFunction(netlistp, "_eval", false);
|
2023-10-28 09:14:38 +02:00
|
|
|
netlistp->evalp(funcp);
|
|
|
|
|
|
2025-11-25 06:53:59 +01:00
|
|
|
if (v3Global.opt.profExec()) funcp->addStmtsp(AstCStmt::profExecSectionPush(flp, "eval"));
|
2023-10-28 09:14:38 +02:00
|
|
|
|
|
|
|
|
// Start with the ico loop, if any
|
|
|
|
|
if (icoLoop) funcp->addStmtsp(icoLoop);
|
|
|
|
|
|
|
|
|
|
// Execute the top level eval loop
|
|
|
|
|
funcp->addStmtsp(topLoop.stmtsp);
|
2022-10-13 21:04:43 +02:00
|
|
|
|
|
|
|
|
// Add the Postponed eval call
|
2025-10-27 16:16:28 +01:00
|
|
|
if (postponedFuncp) funcp->addStmtsp(util::callVoidFunc(postponedFuncp));
|
2023-10-28 09:14:38 +02:00
|
|
|
|
2025-11-25 06:53:59 +01:00
|
|
|
if (v3Global.opt.profExec()) funcp->addStmtsp(AstCStmt::profExecSectionPop(flp, "eval"));
|
2022-05-15 17:03:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
2023-12-05 04:11:07 +01:00
|
|
|
//============================================================================
|
|
|
|
|
// Helper that builds virtual interface trigger sentrees
|
|
|
|
|
|
2025-07-12 03:04:51 +02:00
|
|
|
VirtIfaceTriggers::IfaceMemberSensMap
|
2025-10-31 19:29:11 +01:00
|
|
|
VirtIfaceTriggers::makeMemberToSensMap(const TriggerKit& trigKit, uint32_t vifTriggerIndex,
|
2025-07-12 03:04:51 +02:00
|
|
|
AstVarScope* trigVscp) const {
|
2025-10-29 22:27:15 +01:00
|
|
|
IfaceMemberSensMap map;
|
2025-07-12 03:04:51 +02:00
|
|
|
for (const auto& p : m_memberTriggers) {
|
2025-11-01 16:43:20 +01:00
|
|
|
map.emplace(p.first, trigKit.newExtraTriggerSenTree(trigVscp, vifTriggerIndex));
|
2025-07-12 03:04:51 +02:00
|
|
|
++vifTriggerIndex;
|
|
|
|
|
}
|
2025-10-29 22:27:15 +01:00
|
|
|
return map;
|
2025-07-12 03:04:51 +02:00
|
|
|
}
|
|
|
|
|
|
2025-11-01 16:43:20 +01:00
|
|
|
std::unordered_map<const AstSenTree*, AstSenTree*>
|
|
|
|
|
cloneMapWithNewTriggerReferences(const std::unordered_map<const AstSenTree*, AstSenTree*>& map,
|
|
|
|
|
AstVarScope* vscp) {
|
|
|
|
|
AstTopScope* const topScopep = v3Global.rootp()->topScopep();
|
|
|
|
|
// Copy map
|
|
|
|
|
std::unordered_map<const AstSenTree*, AstSenTree*> newMap{map};
|
|
|
|
|
// Replace references in each mapped value with a reference to the given vscp
|
|
|
|
|
for (auto& pair : newMap) {
|
|
|
|
|
pair.second = pair.second->cloneTree(false);
|
|
|
|
|
pair.second->foreach([&](AstVarRef* refp) {
|
|
|
|
|
UASSERT_OBJ(refp->access() == VAccess::READ, refp, "Should be read ref");
|
|
|
|
|
refp->replaceWith(new AstVarRef{refp->fileline(), vscp, VAccess::READ});
|
|
|
|
|
VL_DO_DANGLING(refp->deleteTree(), refp);
|
|
|
|
|
});
|
|
|
|
|
topScopep->addSenTreesp(pair.second);
|
|
|
|
|
}
|
|
|
|
|
return newMap;
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-27 11:41:30 +01:00
|
|
|
//============================================================================
|
2022-05-15 17:03:32 +02:00
|
|
|
// Top level entry-point to scheduling
|
|
|
|
|
|
|
|
|
|
void schedule(AstNetlist* netlistp) {
|
2022-07-30 18:49:30 +02:00
|
|
|
const auto addSizeStat = [](const string& name, const LogicByScope& lbs) {
|
2022-05-15 17:03:32 +02:00
|
|
|
uint64_t size = 0;
|
|
|
|
|
lbs.foreachLogic([&](AstNode* nodep) { size += nodep->nodeCount(); });
|
|
|
|
|
V3Stats::addStat("Scheduling, " + name, size);
|
|
|
|
|
};
|
|
|
|
|
|
2023-12-05 04:11:07 +01:00
|
|
|
// Step 0. Prepare external domains for timing and virtual interfaces
|
|
|
|
|
// Create extra triggers for virtual interfaces
|
|
|
|
|
const auto& virtIfaceTriggers = makeVirtIfaceTriggers(netlistp);
|
|
|
|
|
// Prepare timing-related logic and external domains
|
2023-10-28 09:14:38 +02:00
|
|
|
TimingKit timingKit = prepareTiming(netlistp);
|
Timing support (#3363)
Adds timing support to Verilator. It makes it possible to use delays,
event controls within processes (not just at the start), wait
statements, and forks.
Building a design with those constructs requires a compiler that
supports C++20 coroutines (GCC 10, Clang 5).
The basic idea is to have processes and tasks with delays/event controls
implemented as C++20 coroutines. This allows us to suspend and resume
them at any time.
There are five main runtime classes responsible for managing suspended
coroutines:
* `VlCoroutineHandle`, a wrapper over C++20's `std::coroutine_handle`
with move semantics and automatic cleanup.
* `VlDelayScheduler`, for coroutines suspended by delays. It resumes
them at a proper simulation time.
* `VlTriggerScheduler`, for coroutines suspended by event controls. It
resumes them if its corresponding trigger was set.
* `VlForkSync`, used for syncing `fork..join` and `fork..join_any`
blocks.
* `VlCoroutine`, the return type of all verilated coroutines. It allows
for suspending a stack of coroutines (normally, C++ coroutines are
stackless).
There is a new visitor in `V3Timing.cpp` which:
* scales delays according to the timescale,
* simplifies intra-assignment timing controls and net delays into
regular timing controls and assignments,
* simplifies wait statements into loops with event controls,
* marks processes and tasks with timing controls in them as
suspendable,
* creates delay, trigger scheduler, and fork sync variables,
* transforms timing controls and fork joins into C++ awaits
There are new functions in `V3SchedTiming.cpp` (used by `V3Sched.cpp`)
that integrate static scheduling with timing. This involves providing
external domains for variables, so that the necessary combinational
logic gets triggered after coroutine resumption, as well as statements
that need to be injected into the design eval function to perform this
resumption at the correct time.
There is also a function that transforms forked processes into separate
functions.
See the comments in `verilated_timing.h`, `verilated_timing.cpp`,
`V3Timing.cpp`, and `V3SchedTiming.cpp`, as well as the internals
documentation for more details.
Signed-off-by: Krzysztof Bieganski <kbieganski@antmicro.com>
2022-08-22 14:26:32 +02:00
|
|
|
|
2022-05-15 17:03:32 +02:00
|
|
|
// Step 1. Gather and classify all logic in the design
|
|
|
|
|
LogicClasses logicClasses = gatherLogicClasses(netlistp);
|
|
|
|
|
|
|
|
|
|
if (v3Global.opt.stats()) {
|
|
|
|
|
V3Stats::statsStage("sched-gather");
|
|
|
|
|
addSizeStat("size of class: static", logicClasses.m_static);
|
|
|
|
|
addSizeStat("size of class: initial", logicClasses.m_initial);
|
|
|
|
|
addSizeStat("size of class: final", logicClasses.m_final);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Step 2. Schedule static, initial and final logic classes in source order
|
2025-03-18 14:34:04 +01:00
|
|
|
AstCFunc* const staticp = createStatic(netlistp, logicClasses);
|
2022-05-15 17:03:32 +02:00
|
|
|
if (v3Global.opt.stats()) V3Stats::statsStage("sched-static");
|
|
|
|
|
|
2025-03-18 14:34:04 +01:00
|
|
|
createInitial(netlistp, logicClasses);
|
2022-05-15 17:03:32 +02:00
|
|
|
if (v3Global.opt.stats()) V3Stats::statsStage("sched-initial");
|
|
|
|
|
|
|
|
|
|
createFinal(netlistp, logicClasses);
|
|
|
|
|
if (v3Global.opt.stats()) V3Stats::statsStage("sched-final");
|
|
|
|
|
|
|
|
|
|
// Step 3: Break combinational cycles by introducing hybrid logic
|
|
|
|
|
// Note: breakCycles also removes corresponding logic from logicClasses.m_comb;
|
|
|
|
|
logicClasses.m_hybrid = breakCycles(netlistp, logicClasses.m_comb);
|
|
|
|
|
if (v3Global.opt.stats()) {
|
|
|
|
|
addSizeStat("size of class: clocked", logicClasses.m_clocked);
|
|
|
|
|
addSizeStat("size of class: combinational", logicClasses.m_comb);
|
|
|
|
|
addSizeStat("size of class: hybrid", logicClasses.m_hybrid);
|
|
|
|
|
V3Stats::statsStage("sched-break-cycles");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// We pass around a single SenExprBuilder instance, as we only need one set of 'prev' variables
|
|
|
|
|
// for edge/change detection in sensitivity expressions, which this keeps track of.
|
2022-10-22 16:05:39 +02:00
|
|
|
AstTopScope* const topScopep = netlistp->topScopep();
|
|
|
|
|
AstScope* const scopeTopp = topScopep->scopep();
|
|
|
|
|
SenExprBuilder senExprBuilder{scopeTopp};
|
2022-05-15 17:03:32 +02:00
|
|
|
|
|
|
|
|
// Step 4: Create 'settle' region that restores the combinational invariant
|
2025-03-18 14:34:04 +01:00
|
|
|
createSettle(netlistp, staticp, senExprBuilder, logicClasses);
|
2022-05-15 17:03:32 +02:00
|
|
|
if (v3Global.opt.stats()) V3Stats::statsStage("sched-settle");
|
|
|
|
|
|
|
|
|
|
// Step 5: Partition the clocked and combinational (including hybrid) logic into pre/act/nba.
|
|
|
|
|
// All clocks (signals referenced in an AstSenTree) generated via a blocking assignment
|
|
|
|
|
// (including combinationally generated signals) are computed within the act region.
|
|
|
|
|
LogicRegions logicRegions
|
|
|
|
|
= partition(logicClasses.m_clocked, logicClasses.m_comb, logicClasses.m_hybrid);
|
2024-07-10 00:31:58 +02:00
|
|
|
logicRegions.m_obs = logicClasses.m_observed;
|
|
|
|
|
logicRegions.m_react = logicClasses.m_reactive;
|
2022-05-15 17:03:32 +02:00
|
|
|
if (v3Global.opt.stats()) {
|
|
|
|
|
addSizeStat("size of region: Active Pre", logicRegions.m_pre);
|
|
|
|
|
addSizeStat("size of region: Active", logicRegions.m_act);
|
|
|
|
|
addSizeStat("size of region: NBA", logicRegions.m_nba);
|
2024-07-10 00:31:58 +02:00
|
|
|
addSizeStat("size of region: Observed", logicRegions.m_obs);
|
|
|
|
|
addSizeStat("size of region: Reactive", logicRegions.m_react);
|
2022-05-15 17:03:32 +02:00
|
|
|
V3Stats::statsStage("sched-partition");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Step 6: Replicate combinational logic
|
|
|
|
|
LogicReplicas logicReplicas = replicateLogic(logicRegions);
|
|
|
|
|
if (v3Global.opt.stats()) {
|
|
|
|
|
addSizeStat("size of replicated logic: Input", logicReplicas.m_ico);
|
|
|
|
|
addSizeStat("size of replicated logic: Active", logicReplicas.m_act);
|
|
|
|
|
addSizeStat("size of replicated logic: NBA", logicReplicas.m_nba);
|
2024-07-10 00:31:58 +02:00
|
|
|
addSizeStat("size of replicated logic: Observed", logicReplicas.m_obs);
|
|
|
|
|
addSizeStat("size of replicated logic: Reactive", logicReplicas.m_react);
|
2022-05-15 17:03:32 +02:00
|
|
|
V3Stats::statsStage("sched-replicate");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Step 7: Create input combinational logic loop
|
2025-03-18 14:34:04 +01:00
|
|
|
AstNode* const icoLoopp = createInputCombLoop(netlistp, staticp, senExprBuilder,
|
2023-12-05 04:11:07 +01:00
|
|
|
logicReplicas.m_ico, virtIfaceTriggers);
|
2022-05-15 17:03:32 +02:00
|
|
|
if (v3Global.opt.stats()) V3Stats::statsStage("sched-create-ico");
|
|
|
|
|
|
2025-11-01 16:43:20 +01:00
|
|
|
// Step 8: Create the triggers
|
2022-07-14 13:35:44 +02:00
|
|
|
AstVarScope* const dpiExportTriggerVscp = netlistp->dpiExportTriggerp();
|
2025-10-26 10:25:34 +01:00
|
|
|
netlistp->dpiExportTriggerp(nullptr); // Finished with this here
|
2022-07-14 13:35:44 +02:00
|
|
|
|
|
|
|
|
// We may have an extra trigger for variable updated in DPI exports
|
2025-10-31 19:29:11 +01:00
|
|
|
TriggerKit::ExtraTriggers extraTriggers;
|
|
|
|
|
const uint32_t dpiExportTriggerIndex = dpiExportTriggerVscp
|
|
|
|
|
? extraTriggers.allocate("DPI export trigger")
|
|
|
|
|
: std::numeric_limits<uint32_t>::max();
|
|
|
|
|
const uint32_t firstVifTriggerIndex = extraTriggers.size();
|
|
|
|
|
const uint32_t firstVifMemberTriggerIndex = extraTriggers.size();
|
2025-10-29 22:27:15 +01:00
|
|
|
for (const auto& p : virtIfaceTriggers.m_memberTriggers) {
|
|
|
|
|
const auto& item = p.first;
|
|
|
|
|
extraTriggers.allocate("virtual interface member: " + item.m_ifacep->name() + "."
|
|
|
|
|
+ item.m_memberp->name());
|
|
|
|
|
}
|
2022-07-14 13:35:44 +02:00
|
|
|
|
2025-11-01 16:43:20 +01:00
|
|
|
const auto& preTreeps = getSenTreesUsedBy({&logicRegions.m_pre});
|
|
|
|
|
const auto& senTreeps = getSenTreesUsedBy({&logicRegions.m_act, //
|
Timing support (#3363)
Adds timing support to Verilator. It makes it possible to use delays,
event controls within processes (not just at the start), wait
statements, and forks.
Building a design with those constructs requires a compiler that
supports C++20 coroutines (GCC 10, Clang 5).
The basic idea is to have processes and tasks with delays/event controls
implemented as C++20 coroutines. This allows us to suspend and resume
them at any time.
There are five main runtime classes responsible for managing suspended
coroutines:
* `VlCoroutineHandle`, a wrapper over C++20's `std::coroutine_handle`
with move semantics and automatic cleanup.
* `VlDelayScheduler`, for coroutines suspended by delays. It resumes
them at a proper simulation time.
* `VlTriggerScheduler`, for coroutines suspended by event controls. It
resumes them if its corresponding trigger was set.
* `VlForkSync`, used for syncing `fork..join` and `fork..join_any`
blocks.
* `VlCoroutine`, the return type of all verilated coroutines. It allows
for suspending a stack of coroutines (normally, C++ coroutines are
stackless).
There is a new visitor in `V3Timing.cpp` which:
* scales delays according to the timescale,
* simplifies intra-assignment timing controls and net delays into
regular timing controls and assignments,
* simplifies wait statements into loops with event controls,
* marks processes and tasks with timing controls in them as
suspendable,
* creates delay, trigger scheduler, and fork sync variables,
* transforms timing controls and fork joins into C++ awaits
There are new functions in `V3SchedTiming.cpp` (used by `V3Sched.cpp`)
that integrate static scheduling with timing. This involves providing
external domains for variables, so that the necessary combinational
logic gets triggered after coroutine resumption, as well as statements
that need to be injected into the design eval function to perform this
resumption at the correct time.
There is also a function that transforms forked processes into separate
functions.
See the comments in `verilated_timing.h`, `verilated_timing.cpp`,
`V3Timing.cpp`, and `V3SchedTiming.cpp`, as well as the internals
documentation for more details.
Signed-off-by: Krzysztof Bieganski <kbieganski@antmicro.com>
2022-08-22 14:26:32 +02:00
|
|
|
&logicRegions.m_nba, //
|
2024-07-10 00:31:58 +02:00
|
|
|
&logicRegions.m_obs, //
|
|
|
|
|
&logicRegions.m_react, //
|
Timing support (#3363)
Adds timing support to Verilator. It makes it possible to use delays,
event controls within processes (not just at the start), wait
statements, and forks.
Building a design with those constructs requires a compiler that
supports C++20 coroutines (GCC 10, Clang 5).
The basic idea is to have processes and tasks with delays/event controls
implemented as C++20 coroutines. This allows us to suspend and resume
them at any time.
There are five main runtime classes responsible for managing suspended
coroutines:
* `VlCoroutineHandle`, a wrapper over C++20's `std::coroutine_handle`
with move semantics and automatic cleanup.
* `VlDelayScheduler`, for coroutines suspended by delays. It resumes
them at a proper simulation time.
* `VlTriggerScheduler`, for coroutines suspended by event controls. It
resumes them if its corresponding trigger was set.
* `VlForkSync`, used for syncing `fork..join` and `fork..join_any`
blocks.
* `VlCoroutine`, the return type of all verilated coroutines. It allows
for suspending a stack of coroutines (normally, C++ coroutines are
stackless).
There is a new visitor in `V3Timing.cpp` which:
* scales delays according to the timescale,
* simplifies intra-assignment timing controls and net delays into
regular timing controls and assignments,
* simplifies wait statements into loops with event controls,
* marks processes and tasks with timing controls in them as
suspendable,
* creates delay, trigger scheduler, and fork sync variables,
* transforms timing controls and fork joins into C++ awaits
There are new functions in `V3SchedTiming.cpp` (used by `V3Sched.cpp`)
that integrate static scheduling with timing. This involves providing
external domains for variables, so that the necessary combinational
logic gets triggered after coroutine resumption, as well as statements
that need to be injected into the design eval function to perform this
resumption at the correct time.
There is also a function that transforms forked processes into separate
functions.
See the comments in `verilated_timing.h`, `verilated_timing.cpp`,
`V3Timing.cpp`, and `V3SchedTiming.cpp`, as well as the internals
documentation for more details.
Signed-off-by: Krzysztof Bieganski <kbieganski@antmicro.com>
2022-08-22 14:26:32 +02:00
|
|
|
&timingKit.m_lbs});
|
2026-02-11 19:35:59 +01:00
|
|
|
const TriggerKit trigKit
|
|
|
|
|
= TriggerKit::create(netlistp, staticp, senExprBuilder, preTreeps, senTreeps, "act",
|
|
|
|
|
extraTriggers, false, v3Global.usesTiming());
|
2022-10-22 16:05:39 +02:00
|
|
|
|
|
|
|
|
// Add post updates from the timing kit
|
2026-02-11 19:35:59 +01:00
|
|
|
if (timingKit.m_postUpdates) trigKit.compBasep()->addStmtsp(timingKit.m_postUpdates);
|
2022-07-14 13:35:44 +02:00
|
|
|
|
|
|
|
|
if (dpiExportTriggerVscp) {
|
2025-10-31 19:29:11 +01:00
|
|
|
trigKit.addExtraTriggerAssignment(dpiExportTriggerVscp, dpiExportTriggerIndex);
|
2022-07-14 13:35:44 +02:00
|
|
|
}
|
2025-10-29 22:27:15 +01:00
|
|
|
addVirtIfaceTriggerAssignments(virtIfaceTriggers, firstVifTriggerIndex,
|
2025-10-31 19:29:11 +01:00
|
|
|
firstVifMemberTriggerIndex, trigKit);
|
2022-05-15 17:03:32 +02:00
|
|
|
if (v3Global.opt.stats()) V3Stats::statsStage("sched-create-triggers");
|
|
|
|
|
|
|
|
|
|
// Note: Experiments so far show that running the Act (or Ico) regions on
|
|
|
|
|
// multiple threads is always a net loss, so only use multi-threading for
|
|
|
|
|
// NBA for now. This can be revised if evidence is available that it would
|
|
|
|
|
// be beneficial
|
|
|
|
|
|
|
|
|
|
// Step 9: Create the 'act' region evaluation function
|
|
|
|
|
|
|
|
|
|
// Remap sensitivities of the input logic to the triggers
|
2025-11-01 16:43:20 +01:00
|
|
|
remapSensitivities(logicRegions.m_pre, trigKit.mapPre());
|
|
|
|
|
remapSensitivities(logicRegions.m_act, trigKit.mapVec());
|
|
|
|
|
remapSensitivities(logicReplicas.m_act, trigKit.mapVec());
|
|
|
|
|
remapSensitivities(timingKit.m_lbs, trigKit.mapVec());
|
|
|
|
|
const std::map<const AstVarScope*, std::vector<AstSenTree*>> actTimingDomains
|
|
|
|
|
= timingKit.remapDomains(trigKit.mapVec());
|
2022-05-15 17:03:32 +02:00
|
|
|
|
|
|
|
|
// Create the inverse map from trigger ref AstSenTree to original AstSenTree
|
2024-03-09 13:43:09 +01:00
|
|
|
V3Order::TrigToSenMap trigToSenAct;
|
2025-11-01 16:43:20 +01:00
|
|
|
invertAndMergeSenTreeMap(trigToSenAct, trigKit.mapPre());
|
|
|
|
|
invertAndMergeSenTreeMap(trigToSenAct, trigKit.mapVec());
|
2022-05-15 17:03:32 +02:00
|
|
|
|
2022-07-14 13:35:44 +02:00
|
|
|
// The DPI Export trigger AstSenTree
|
2022-07-21 18:34:12 +02:00
|
|
|
AstSenTree* const dpiExportTriggeredAct
|
2025-11-01 16:43:20 +01:00
|
|
|
= dpiExportTriggerVscp
|
|
|
|
|
? trigKit.newExtraTriggerSenTree(trigKit.vscp(), dpiExportTriggerIndex)
|
|
|
|
|
: nullptr;
|
2022-07-14 13:35:44 +02:00
|
|
|
|
2025-10-29 22:27:15 +01:00
|
|
|
const auto& vifMemberTriggeredAct = virtIfaceTriggers.makeMemberToSensMap(
|
2025-10-31 19:29:11 +01:00
|
|
|
trigKit, firstVifMemberTriggerIndex, trigKit.vscp());
|
2023-12-05 04:11:07 +01:00
|
|
|
|
2022-05-15 17:03:32 +02:00
|
|
|
AstCFunc* const actFuncp = V3Order::order(
|
|
|
|
|
netlistp, {&logicRegions.m_pre, &logicRegions.m_act, &logicReplicas.m_act}, trigToSenAct,
|
Timing support (#3363)
Adds timing support to Verilator. It makes it possible to use delays,
event controls within processes (not just at the start), wait
statements, and forks.
Building a design with those constructs requires a compiler that
supports C++20 coroutines (GCC 10, Clang 5).
The basic idea is to have processes and tasks with delays/event controls
implemented as C++20 coroutines. This allows us to suspend and resume
them at any time.
There are five main runtime classes responsible for managing suspended
coroutines:
* `VlCoroutineHandle`, a wrapper over C++20's `std::coroutine_handle`
with move semantics and automatic cleanup.
* `VlDelayScheduler`, for coroutines suspended by delays. It resumes
them at a proper simulation time.
* `VlTriggerScheduler`, for coroutines suspended by event controls. It
resumes them if its corresponding trigger was set.
* `VlForkSync`, used for syncing `fork..join` and `fork..join_any`
blocks.
* `VlCoroutine`, the return type of all verilated coroutines. It allows
for suspending a stack of coroutines (normally, C++ coroutines are
stackless).
There is a new visitor in `V3Timing.cpp` which:
* scales delays according to the timescale,
* simplifies intra-assignment timing controls and net delays into
regular timing controls and assignments,
* simplifies wait statements into loops with event controls,
* marks processes and tasks with timing controls in them as
suspendable,
* creates delay, trigger scheduler, and fork sync variables,
* transforms timing controls and fork joins into C++ awaits
There are new functions in `V3SchedTiming.cpp` (used by `V3Sched.cpp`)
that integrate static scheduling with timing. This involves providing
external domains for variables, so that the necessary combinational
logic gets triggered after coroutine resumption, as well as statements
that need to be injected into the design eval function to perform this
resumption at the correct time.
There is also a function that transforms forked processes into separate
functions.
See the comments in `verilated_timing.h`, `verilated_timing.cpp`,
`V3Timing.cpp`, and `V3SchedTiming.cpp`, as well as the internals
documentation for more details.
Signed-off-by: Krzysztof Bieganski <kbieganski@antmicro.com>
2022-08-22 14:26:32 +02:00
|
|
|
"act", false, false, [&](const AstVarScope* vscp, std::vector<AstSenTree*>& out) {
|
|
|
|
|
auto it = actTimingDomains.find(vscp);
|
|
|
|
|
if (it != actTimingDomains.end()) out = it->second;
|
2022-07-21 18:34:12 +02:00
|
|
|
if (vscp->varp()->isWrittenByDpi()) out.push_back(dpiExportTriggeredAct);
|
2026-02-06 23:20:10 +01:00
|
|
|
if (vscp->varp()->isVirtIface()) {
|
2025-08-23 16:45:13 +02:00
|
|
|
std::vector<AstSenTree*> ifaceTriggered
|
2026-02-06 23:20:10 +01:00
|
|
|
= findTriggeredIface(vscp, vifMemberTriggeredAct);
|
2025-08-23 16:45:13 +02:00
|
|
|
out.insert(out.end(), ifaceTriggered.begin(), ifaceTriggered.end());
|
2023-12-05 04:11:07 +01:00
|
|
|
}
|
2022-07-14 13:35:44 +02:00
|
|
|
});
|
2025-10-27 16:16:28 +01:00
|
|
|
util::splitCheck(actFuncp);
|
2022-05-15 17:03:32 +02:00
|
|
|
if (v3Global.opt.stats()) V3Stats::statsStage("sched-create-act");
|
|
|
|
|
|
2025-10-31 19:29:11 +01:00
|
|
|
const EvalKit actKit{trigKit.vscp(), actFuncp};
|
2022-12-23 13:34:49 +01:00
|
|
|
|
|
|
|
|
// Orders a region's logic and creates the region eval function
|
|
|
|
|
const auto order = [&](const std::string& name,
|
|
|
|
|
const std::vector<V3Sched::LogicByScope*>& logic) -> EvalKit {
|
2025-05-23 02:29:32 +02:00
|
|
|
UINFO(2, "Scheduling " << name << " #logic = " << logic.size());
|
2025-10-31 19:29:11 +01:00
|
|
|
AstVarScope* const trigVscp = trigKit.newTrigVec(name);
|
2025-11-01 16:43:20 +01:00
|
|
|
const auto trigMap = cloneMapWithNewTriggerReferences(trigKit.mapVec(), trigVscp);
|
2022-12-23 13:34:49 +01:00
|
|
|
// Remap sensitivities of the input logic to the triggers
|
|
|
|
|
for (LogicByScope* lbs : logic) remapSensitivities(*lbs, trigMap);
|
|
|
|
|
|
|
|
|
|
// Create the inverse map from trigger ref AstSenTree to original AstSenTree
|
2024-03-09 13:43:09 +01:00
|
|
|
V3Order::TrigToSenMap trigToSen;
|
2022-12-23 13:34:49 +01:00
|
|
|
invertAndMergeSenTreeMap(trigToSen, trigMap);
|
|
|
|
|
|
|
|
|
|
AstSenTree* const dpiExportTriggered
|
2025-11-01 16:43:20 +01:00
|
|
|
= dpiExportTriggerVscp
|
|
|
|
|
? trigKit.newExtraTriggerSenTree(trigVscp, dpiExportTriggerIndex)
|
|
|
|
|
: nullptr;
|
2025-10-31 19:29:11 +01:00
|
|
|
const auto& vifMemberTriggered
|
|
|
|
|
= virtIfaceTriggers.makeMemberToSensMap(trigKit, firstVifMemberTriggerIndex, trigVscp);
|
2022-12-23 13:34:49 +01:00
|
|
|
|
|
|
|
|
const auto& timingDomains = timingKit.remapDomains(trigMap);
|
|
|
|
|
AstCFunc* const funcp = V3Order::order(
|
|
|
|
|
netlistp, logic, trigToSen, name, name == "nba" && v3Global.opt.mtasks(), false,
|
|
|
|
|
[&](const AstVarScope* vscp, std::vector<AstSenTree*>& out) {
|
|
|
|
|
auto it = timingDomains.find(vscp);
|
|
|
|
|
if (it != timingDomains.end()) out = it->second;
|
|
|
|
|
if (vscp->varp()->isWrittenByDpi()) out.push_back(dpiExportTriggered);
|
2026-02-06 23:20:10 +01:00
|
|
|
// Sometimes virtual interfaces mix with non-virtual one so, here both have to be
|
|
|
|
|
// detected - look `t_virtual_interface_nba_assign`
|
|
|
|
|
if (vscp->varp()->sensIfacep() || vscp->varp()->isVirtIface()) {
|
2025-08-23 16:45:13 +02:00
|
|
|
std::vector<AstSenTree*> ifaceTriggered
|
2026-02-06 23:20:10 +01:00
|
|
|
= findTriggeredIface(vscp, vifMemberTriggered);
|
2025-08-23 16:45:13 +02:00
|
|
|
out.insert(out.end(), ifaceTriggered.begin(), ifaceTriggered.end());
|
2023-12-05 04:11:07 +01:00
|
|
|
}
|
2022-12-23 13:34:49 +01:00
|
|
|
});
|
2022-05-15 17:03:32 +02:00
|
|
|
|
2025-10-31 19:29:11 +01:00
|
|
|
return {trigVscp, funcp};
|
2022-12-23 13:34:49 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Step 10: Create the 'nba' region evaluation function
|
2025-10-31 19:29:11 +01:00
|
|
|
const EvalKit nbaKit = order("nba", {&logicRegions.m_nba, &logicReplicas.m_nba});
|
2025-10-27 16:16:28 +01:00
|
|
|
util::splitCheck(nbaKit.m_funcp);
|
2022-12-23 13:34:49 +01:00
|
|
|
netlistp->evalNbap(nbaKit.m_funcp); // Remember for V3LifePost
|
2022-05-15 17:03:32 +02:00
|
|
|
if (v3Global.opt.stats()) V3Stats::statsStage("sched-create-nba");
|
|
|
|
|
|
2022-12-23 13:34:49 +01:00
|
|
|
// Orders a region's logic and creates the region eval function (only if there is any logic in
|
|
|
|
|
// the region)
|
2024-07-10 00:31:58 +02:00
|
|
|
const auto orderIfNonEmpty
|
|
|
|
|
= [&](const std::string& name, const std::vector<LogicByScope*>& logic) -> EvalKit {
|
|
|
|
|
if (logic[0]->empty())
|
|
|
|
|
return {}; // if region is empty, replica is supposed to be empty as well
|
|
|
|
|
const auto& kit = order(name, logic);
|
2022-12-23 13:34:49 +01:00
|
|
|
if (v3Global.opt.stats()) V3Stats::statsStage("sched-create-" + name);
|
|
|
|
|
return kit;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Step 11: Create the 'obs' region evaluation function
|
2025-10-31 19:29:11 +01:00
|
|
|
const EvalKit obsKit = orderIfNonEmpty("obs", {&logicRegions.m_obs, &logicReplicas.m_obs});
|
2022-12-23 13:34:49 +01:00
|
|
|
|
|
|
|
|
// Step 12: Create the 're' region evaluation function
|
2025-10-31 19:29:11 +01:00
|
|
|
const EvalKit reactKit
|
2024-07-10 00:31:58 +02:00
|
|
|
= orderIfNonEmpty("react", {&logicRegions.m_react, &logicReplicas.m_react});
|
2022-12-23 13:34:49 +01:00
|
|
|
|
|
|
|
|
// Step 13: Create the 'postponed' region evaluation function
|
2022-10-13 21:04:43 +02:00
|
|
|
auto* const postponedFuncp = createPostponed(netlistp, logicClasses);
|
|
|
|
|
|
2022-12-23 13:34:49 +01:00
|
|
|
// Step 14: Bolt it all together to create the '_eval' function
|
2025-11-01 16:43:20 +01:00
|
|
|
createEval(netlistp, icoLoopp, trigKit, actKit, nbaKit, obsKit, reactKit, postponedFuncp,
|
|
|
|
|
timingKit);
|
Timing support (#3363)
Adds timing support to Verilator. It makes it possible to use delays,
event controls within processes (not just at the start), wait
statements, and forks.
Building a design with those constructs requires a compiler that
supports C++20 coroutines (GCC 10, Clang 5).
The basic idea is to have processes and tasks with delays/event controls
implemented as C++20 coroutines. This allows us to suspend and resume
them at any time.
There are five main runtime classes responsible for managing suspended
coroutines:
* `VlCoroutineHandle`, a wrapper over C++20's `std::coroutine_handle`
with move semantics and automatic cleanup.
* `VlDelayScheduler`, for coroutines suspended by delays. It resumes
them at a proper simulation time.
* `VlTriggerScheduler`, for coroutines suspended by event controls. It
resumes them if its corresponding trigger was set.
* `VlForkSync`, used for syncing `fork..join` and `fork..join_any`
blocks.
* `VlCoroutine`, the return type of all verilated coroutines. It allows
for suspending a stack of coroutines (normally, C++ coroutines are
stackless).
There is a new visitor in `V3Timing.cpp` which:
* scales delays according to the timescale,
* simplifies intra-assignment timing controls and net delays into
regular timing controls and assignments,
* simplifies wait statements into loops with event controls,
* marks processes and tasks with timing controls in them as
suspendable,
* creates delay, trigger scheduler, and fork sync variables,
* transforms timing controls and fork joins into C++ awaits
There are new functions in `V3SchedTiming.cpp` (used by `V3Sched.cpp`)
that integrate static scheduling with timing. This involves providing
external domains for variables, so that the necessary combinational
logic gets triggered after coroutine resumption, as well as statements
that need to be injected into the design eval function to perform this
resumption at the correct time.
There is also a function that transforms forked processes into separate
functions.
See the comments in `verilated_timing.h`, `verilated_timing.cpp`,
`V3Timing.cpp`, and `V3SchedTiming.cpp`, as well as the internals
documentation for more details.
Signed-off-by: Krzysztof Bieganski <kbieganski@antmicro.com>
2022-08-22 14:26:32 +02:00
|
|
|
|
2026-02-11 19:35:59 +01:00
|
|
|
// Step 15: Add neccessary evaluation before awaits
|
|
|
|
|
if (AstCCall* const readyp = timingKit.createReady(netlistp)) {
|
|
|
|
|
staticp->addStmtsp(readyp->makeStmt());
|
|
|
|
|
beforeTrigVisitor(netlistp, senExprBuilder, trigKit);
|
|
|
|
|
} else {
|
|
|
|
|
// beforeTrigVisitor clears Sentree pointers in AstCAwaits (as these sentrees will get
|
|
|
|
|
// deleted later) if there was no need to call it, SenTrees have to be cleaned manually
|
|
|
|
|
netlistp->foreach([](AstCAwait* const cAwaitp) { cAwaitp->clearSentreep(); });
|
|
|
|
|
}
|
|
|
|
|
if (AstVarScope* const trigAccp = trigKit.vscAccp()) {
|
|
|
|
|
// Copy trigger vector to accumulator at the end of static initialziation so,
|
|
|
|
|
// triggers fired during initialization persist to the first resume.
|
|
|
|
|
const AstUnpackArrayDType* const trigAccDTypep
|
|
|
|
|
= VN_AS(trigAccp->dtypep(), UnpackArrayDType);
|
|
|
|
|
UASSERT_OBJ(
|
|
|
|
|
trigAccDTypep->right() == 0, trigAccp,
|
|
|
|
|
"Expected that trigger vector and accumulator start elements enumeration from 0");
|
|
|
|
|
UASSERT_OBJ(trigAccDTypep->left() >= 0, trigAccp,
|
|
|
|
|
"Expected that trigger vector and accumulator has no negative indexes");
|
|
|
|
|
FileLine* const flp = trigAccp->fileline();
|
|
|
|
|
AstVarScope* const vscp = netlistp->topScopep()->scopep()->createTemp("__Vi", 32);
|
|
|
|
|
AstLoop* const loopp = new AstLoop{flp};
|
|
|
|
|
loopp->addStmtsp(
|
|
|
|
|
new AstAssign{flp,
|
|
|
|
|
new AstArraySel{flp, new AstVarRef{flp, trigAccp, VAccess::WRITE},
|
|
|
|
|
new AstVarRef{flp, vscp, VAccess::READ}},
|
|
|
|
|
new AstArraySel{flp, new AstVarRef{flp, actKit.m_vscp, VAccess::READ},
|
|
|
|
|
new AstVarRef{flp, vscp, VAccess::READ}}});
|
|
|
|
|
loopp->addStmtsp(util::incrementVar(vscp));
|
|
|
|
|
loopp->addStmtsp(new AstLoopTest{
|
|
|
|
|
flp, loopp,
|
|
|
|
|
new AstLte{flp, new AstVarRef{flp, vscp, VAccess::READ},
|
|
|
|
|
new AstConst{flp, AstConst::WidthedValue{}, 32,
|
|
|
|
|
static_cast<uint32_t>(trigAccDTypep->left())}}});
|
|
|
|
|
staticp->addStmtsp(loopp);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Step 16: Clean up
|
2026-01-23 18:53:40 +01:00
|
|
|
netlistp->clearStlFirstIterationp();
|
|
|
|
|
|
2025-10-26 10:25:34 +01:00
|
|
|
// Haven't split static initializer yet
|
2025-10-27 16:16:28 +01:00
|
|
|
util::splitCheck(staticp);
|
2022-05-15 17:03:32 +02:00
|
|
|
|
2025-10-26 10:25:34 +01:00
|
|
|
// Dump
|
2024-01-09 16:35:13 +01:00
|
|
|
V3Global::dumpCheckGlobalTree("sched", 0, dumpTreeEitherLevel() >= 3);
|
2022-05-15 17:03:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace V3Sched
|