2018-07-23 02:54:28 +02:00
|
|
|
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
|
|
|
|
//*************************************************************************
|
|
|
|
|
// DESCRIPTION: Verilator: Threading's logic to mtask partitioner
|
|
|
|
|
//
|
2019-11-08 04:33:59 +01:00
|
|
|
// Code available from: https://verilator.org
|
2018-07-23 02:54:28 +02:00
|
|
|
//
|
|
|
|
|
//*************************************************************************
|
|
|
|
|
//
|
2021-01-01 16:29:54 +01:00
|
|
|
// Copyright 2003-2021 by Wilson Snyder. This program is free software; you
|
2020-03-21 16:24:24 +01:00
|
|
|
// can redistribute it and/or modify it under the terms of either the GNU
|
2018-07-23 02:54:28 +02:00
|
|
|
// Lesser General Public License Version 3 or the Perl Artistic License
|
|
|
|
|
// Version 2.0.
|
2020-03-21 16:24:24 +01:00
|
|
|
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
2018-07-23 02:54:28 +02:00
|
|
|
//
|
|
|
|
|
//*************************************************************************
|
2019-10-05 02:17:11 +02:00
|
|
|
|
2018-07-23 02:54:28 +02:00
|
|
|
#include "config_build.h"
|
|
|
|
|
#include "verilatedos.h"
|
|
|
|
|
|
2021-06-16 13:18:56 +02:00
|
|
|
#include "V3EmitCBase.h"
|
2021-09-27 04:51:11 +02:00
|
|
|
#include "V3Config.h"
|
2018-07-23 02:54:28 +02:00
|
|
|
#include "V3Os.h"
|
|
|
|
|
#include "V3File.h"
|
|
|
|
|
#include "V3GraphAlg.h"
|
|
|
|
|
#include "V3GraphStream.h"
|
|
|
|
|
#include "V3InstrCount.h"
|
|
|
|
|
#include "V3Partition.h"
|
|
|
|
|
#include "V3PartitionGraph.h"
|
|
|
|
|
#include "V3Scoreboard.h"
|
|
|
|
|
#include "V3Stats.h"
|
2021-09-27 04:51:11 +02:00
|
|
|
#include "V3UniqueNames.h"
|
2018-10-14 19:43:24 +02:00
|
|
|
|
|
|
|
|
#include <list>
|
|
|
|
|
#include <memory>
|
2020-08-15 16:03:34 +02:00
|
|
|
#include <unordered_set>
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
class MergeCandidate;
|
|
|
|
|
|
|
|
|
|
//######################################################################
|
|
|
|
|
// Partitioner tunable settings:
|
|
|
|
|
//
|
|
|
|
|
// Before describing these settings, a bit of background:
|
|
|
|
|
//
|
|
|
|
|
// Early during the development of the partitioner, V3Split was failing to
|
|
|
|
|
// split large always blocks (with ~100K assignments) so we had to handle
|
|
|
|
|
// very large vertices with ~100K incoming and outgoing edges.
|
|
|
|
|
//
|
|
|
|
|
// The partitioner attempts to deal with such densely connected
|
|
|
|
|
// graphs. Some of the tuning parameters below reference "huge vertices",
|
|
|
|
|
// that's what they're talking about, vertices with tens of thousands of
|
|
|
|
|
// edges in and out. Whereas most graphs have only tens of edges in and out
|
|
|
|
|
// of most vertices.
|
|
|
|
|
//
|
|
|
|
|
// V3Split has since been fixed to more reliably split large always
|
|
|
|
|
// blocks. It's kind of an open question whether the partitioner must
|
|
|
|
|
// handle huge nodes gracefully. Maybe not! But it still can, given
|
|
|
|
|
// appropriate tuning.
|
|
|
|
|
|
|
|
|
|
// PART_SIBLING_EDGE_LIMIT (integer)
|
|
|
|
|
//
|
|
|
|
|
// Arbitrarily limit the number of edges on a single vertex that will be
|
|
|
|
|
// considered when enumerating siblings, to the given value. This protects
|
|
|
|
|
// the partitioner runtime in the presence of huge vertices.
|
|
|
|
|
//
|
|
|
|
|
// The sibling-merge is less important than the edge merge. (You can
|
|
|
|
|
// totally disable the sibling merge and get halfway decent partitions; you
|
|
|
|
|
// can't disable edge merges, those are fundamental to the process.) So,
|
|
|
|
|
// skipping the enumeration of some siblings on a few vertices does not
|
|
|
|
|
// have a large impact on the result of the partitioner.
|
|
|
|
|
//
|
|
|
|
|
// If your vertices are small, the limit (at 25) approaches a no-op. Hence
|
|
|
|
|
// there's basically no cost to applying this limit even when we don't
|
|
|
|
|
// expect huge vertices.
|
|
|
|
|
//
|
|
|
|
|
// If you don't care about partitioner runtime and you want the most
|
|
|
|
|
// aggressive partition, set the limit very high. If you have huge
|
|
|
|
|
// vertices, leave this as is.
|
2020-08-16 20:19:12 +02:00
|
|
|
constexpr unsigned PART_SIBLING_EDGE_LIMIT = 25;
|
2018-07-23 02:54:28 +02:00
|
|
|
|
2020-08-16 20:19:12 +02:00
|
|
|
// PART_STEPPED_COST (defined/undef)
|
2018-07-23 02:54:28 +02:00
|
|
|
//
|
|
|
|
|
// When computing critical path costs, use a step function on the actual
|
|
|
|
|
// underlying vertex cost.
|
|
|
|
|
//
|
|
|
|
|
// If there are huge vertices, when a tiny vertex merges into a huge
|
|
|
|
|
// vertex, we can often avoid increasing the huge vertex's stepped cost.
|
|
|
|
|
// If the stepped cost hasn't increased, and the critical path into the huge
|
|
|
|
|
// vertex hasn't increased, we can avoid propagating a new critical path to
|
|
|
|
|
// vertices past the huge vertex. Since huge vertices tend to have huge lists
|
|
|
|
|
// of children and parents, this can be a substantial savings.
|
|
|
|
|
//
|
|
|
|
|
// Does not seem to reduce the quality of the partitioner's output.
|
|
|
|
|
//
|
|
|
|
|
// If you have huge vertices, leave this 'true', it is the major setting
|
|
|
|
|
// that allows the partitioner to handle such difficult graphs on anything
|
|
|
|
|
// like a human time scale.
|
|
|
|
|
//
|
|
|
|
|
// If you don't have huge vertices, the 'true' value doesn't help much but
|
|
|
|
|
// should cost almost nothing in terms of partitioner quality.
|
|
|
|
|
//
|
|
|
|
|
// If you want the most aggressive possible partition, set it "false" and
|
2019-09-09 13:50:21 +02:00
|
|
|
// be prepared to be disappointed when the improvement in the partition is
|
2018-07-23 02:54:28 +02:00
|
|
|
// negligible / in the noise.
|
|
|
|
|
//
|
|
|
|
|
// Q) Why retain the control, if there is really no downside?
|
|
|
|
|
//
|
|
|
|
|
// A) Cost stepping can lead to corner cases. A developer may wish to
|
|
|
|
|
// disable cost stepping to rule it out as the cause of unexpected
|
|
|
|
|
// behavior.
|
|
|
|
|
#define PART_STEPPED_COST true
|
|
|
|
|
|
|
|
|
|
// Don't produce more than a certain maximum number of MTasks. This helps
|
|
|
|
|
// the TSP variable sort not to blow up (a concern for some of the tests)
|
|
|
|
|
// and we probably don't want a huge number of mtasks in practice anyway
|
|
|
|
|
// (50 to 100 is typical.)
|
|
|
|
|
//
|
|
|
|
|
// If the user doesn't give one with '--threads-max-mtasks', we'll set the
|
|
|
|
|
// maximum # of MTasks to
|
|
|
|
|
// (# of threads * PART_DEFAULT_MAX_MTASKS_PER_THREAD)
|
2020-08-16 20:19:12 +02:00
|
|
|
constexpr unsigned PART_DEFAULT_MAX_MTASKS_PER_THREAD = 50;
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// end tunables.
|
|
|
|
|
|
|
|
|
|
//######################################################################
|
|
|
|
|
// Misc graph and assertion utilities
|
|
|
|
|
|
|
|
|
|
static void partCheckCachedScoreVsActual(uint32_t cached, uint32_t actual) {
|
|
|
|
|
#if PART_STEPPED_COST
|
|
|
|
|
// Cached CP might be a little bigger than actual, due to stepped CPs.
|
|
|
|
|
// Example:
|
|
|
|
|
// Let's say we have a parent with stepped_cost 40 and a grandparent
|
|
|
|
|
// with stepped_cost 27. Our forward-cp is 67. Then our parent and
|
|
|
|
|
// grandparent get merged, the merged node has stepped cost 66. We
|
|
|
|
|
// won't propagate that new CP to children as it hasn't grown. So,
|
|
|
|
|
// children may continue to think that the CP coming through this path
|
|
|
|
|
// is a little higher than it really is; permit that.
|
2020-04-15 13:58:34 +02:00
|
|
|
UASSERT((((cached * 10) <= (actual * 11)) && (cached * 11) >= (actual * 10)),
|
2018-07-23 02:54:28 +02:00
|
|
|
"Calculation error in scoring (approximate, may need tweak)");
|
|
|
|
|
#else
|
|
|
|
|
UASSERT(cached == actual, "Calculation error in scoring");
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//######################################################################
|
|
|
|
|
// PartPropagateCp
|
|
|
|
|
|
|
|
|
|
// Propagate increasing critical path (CP) costs through a graph.
|
|
|
|
|
//
|
|
|
|
|
// Usage:
|
|
|
|
|
// * Client increases the cost and/or CP at a node or small set of nodes
|
|
|
|
|
// (often a pair in practice, eg. edge contraction.)
|
|
|
|
|
// * Client instances a PartPropagateCp object
|
|
|
|
|
// * Client calls PartPropagateCp::cpHasIncreased() one or more times.
|
|
|
|
|
// Each call indicates that the inclusive CP of some "seed" vertex
|
|
|
|
|
// has increased to a given value.
|
|
|
|
|
// * NOTE: PartPropagateCp will neither read nor modify the cost
|
|
|
|
|
// or CPs at the seed vertices, it only accesses and modifies
|
|
|
|
|
// vertices wayward from the seeds.
|
|
|
|
|
// * Client calls PartPropagateCp::go(). Internally, this iteratively
|
|
|
|
|
// propagates the new CPs wayward through the graph.
|
|
|
|
|
//
|
|
|
|
|
template <class T_CostAccessor> class PartPropagateCp : GraphAlg<> {
|
|
|
|
|
private:
|
|
|
|
|
// MEMBERS
|
|
|
|
|
GraphWay m_way; // CPs oriented in this direction: either FORWARD
|
|
|
|
|
// // from graph-start to current node, or REVERSE
|
|
|
|
|
// // from graph-end to current node.
|
|
|
|
|
T_CostAccessor* m_accessp; // Access cost and CPs on V3GraphVertex's.
|
2020-08-15 19:11:27 +02:00
|
|
|
vluint64_t m_generation = 0; // Mark each vertex with this number;
|
2018-07-23 02:54:28 +02:00
|
|
|
// // confirm we only process each vertex once.
|
|
|
|
|
bool m_slowAsserts; // Enable nontrivial asserts
|
2021-03-12 23:26:53 +01:00
|
|
|
SortByValueMap<V3GraphVertex*, uint32_t> m_pending; // Pending rescores
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
// CONSTRUCTORS
|
2020-04-15 13:58:34 +02:00
|
|
|
PartPropagateCp(V3Graph* graphp, GraphWay way, T_CostAccessor* accessp, bool slowAsserts,
|
2018-07-23 02:54:28 +02:00
|
|
|
V3EdgeFuncP edgeFuncp = &V3GraphEdge::followAlwaysTrue)
|
2020-08-16 15:55:36 +02:00
|
|
|
: GraphAlg<>{graphp, edgeFuncp}
|
|
|
|
|
, m_way{way}
|
|
|
|
|
, m_accessp{accessp}
|
|
|
|
|
, m_slowAsserts{slowAsserts} {}
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// METHODS
|
|
|
|
|
void cpHasIncreased(V3GraphVertex* vxp, uint32_t newInclusiveCp) {
|
|
|
|
|
// For *vxp, whose CP-inclusive has just increased to
|
|
|
|
|
// newInclusiveCp, iterate to all wayward nodes, update the edges
|
|
|
|
|
// of each, and add each to m_pending if its overall CP has grown.
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphEdge* edgep = vxp->beginp(m_way); edgep; edgep = edgep->nextp(m_way)) {
|
2018-07-23 02:54:28 +02:00
|
|
|
if (!m_edgeFuncp(edgep)) continue;
|
2021-11-03 22:49:19 +01:00
|
|
|
V3GraphVertex* const relativep = edgep->furtherp(m_way);
|
2018-07-23 02:54:28 +02:00
|
|
|
m_accessp->notifyEdgeCp(relativep, m_way, vxp, newInclusiveCp);
|
|
|
|
|
|
|
|
|
|
if (m_accessp->critPathCost(relativep, m_way) < newInclusiveCp) {
|
|
|
|
|
// relativep's critPathCost() is out of step with its
|
|
|
|
|
// longest !wayward edge. Schedule that to be resolved.
|
2021-11-03 22:49:19 +01:00
|
|
|
const uint32_t newPendingVal
|
2020-04-15 13:58:34 +02:00
|
|
|
= newInclusiveCp - m_accessp->critPathCost(relativep, m_way);
|
2018-07-23 02:54:28 +02:00
|
|
|
if (m_pending.has(relativep)) {
|
|
|
|
|
if (newPendingVal > m_pending.at(relativep)) {
|
|
|
|
|
m_pending.set(relativep, newPendingVal);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
m_pending.set(relativep, newPendingVal);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void go() {
|
|
|
|
|
// m_pending maps each pending vertex to the amount that it wayward
|
|
|
|
|
// CP will grow.
|
|
|
|
|
//
|
|
|
|
|
// We can iterate over the pending set in reverse order, always
|
|
|
|
|
// choosing the nodes with the largest pending CP-growth.
|
|
|
|
|
//
|
|
|
|
|
// The intuition is: if the original seed node had its CP grow by
|
|
|
|
|
// 50, the most any wayward node can possibly grow is also 50. So
|
|
|
|
|
// for anything pending to grow by 50, we know we can process it
|
|
|
|
|
// once and we won't have to grow its CP again on the current pass.
|
|
|
|
|
// After we're done with all the grow-by-50s, nothing else will
|
|
|
|
|
// grow by 50 again on the current pass, and we can process the
|
|
|
|
|
// grow-by-49s and we know we'll only have to process each one
|
|
|
|
|
// once. And so on.
|
|
|
|
|
//
|
|
|
|
|
// This generalizes to multiple seed nodes also.
|
|
|
|
|
while (!m_pending.empty()) {
|
2020-08-16 17:43:49 +02:00
|
|
|
const auto it = m_pending.rbegin();
|
2021-11-03 22:49:19 +01:00
|
|
|
V3GraphVertex* const updateMep = (*it).key();
|
|
|
|
|
const uint32_t cpGrowBy = (*it).value();
|
2018-07-23 02:54:28 +02:00
|
|
|
m_pending.erase(it);
|
|
|
|
|
|
|
|
|
|
// For *updateMep, whose critPathCost was out-of-date with respect
|
|
|
|
|
// to its edges, update the critPathCost.
|
2021-11-03 22:49:19 +01:00
|
|
|
const uint32_t startCp = m_accessp->critPathCost(updateMep, m_way);
|
|
|
|
|
const uint32_t newCp = startCp + cpGrowBy;
|
2020-04-15 13:58:34 +02:00
|
|
|
if (m_slowAsserts) m_accessp->checkNewCpVersusEdges(updateMep, m_way, newCp);
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
m_accessp->setCritPathCost(updateMep, m_way, newCp);
|
|
|
|
|
cpHasIncreased(updateMep, newCp + m_accessp->cost(updateMep));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
VL_DEBUG_FUNC;
|
|
|
|
|
VL_UNCOPYABLE(PartPropagateCp);
|
|
|
|
|
};
|
|
|
|
|
|
2020-11-19 03:32:16 +01:00
|
|
|
class PartPropagateCpSelfTest final {
|
2018-07-23 02:54:28 +02:00
|
|
|
private:
|
|
|
|
|
// MEMBERS
|
|
|
|
|
V3Graph m_graph; // A graph
|
|
|
|
|
V3GraphVertex* m_vx[50]; // All vertices within the graph
|
2021-03-13 00:10:45 +01:00
|
|
|
using CpMap = std::unordered_map<V3GraphVertex*, uint32_t>;
|
2018-07-23 02:54:28 +02:00
|
|
|
CpMap m_cp; // Vertex-to-CP map
|
|
|
|
|
CpMap m_seen; // Set of vertices we've seen
|
|
|
|
|
|
|
|
|
|
// CONSTRUCTORS
|
2020-11-17 01:56:16 +01:00
|
|
|
PartPropagateCpSelfTest() = default;
|
|
|
|
|
~PartPropagateCpSelfTest() = default;
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// METHODS
|
|
|
|
|
protected:
|
|
|
|
|
friend class PartPropagateCp<PartPropagateCpSelfTest>;
|
2020-04-15 13:58:34 +02:00
|
|
|
void notifyEdgeCp(V3GraphVertex* vxp, GraphWay way, V3GraphVertex* throughp,
|
|
|
|
|
uint32_t cp) const {
|
2021-11-03 22:49:19 +01:00
|
|
|
const uint32_t throughCost = critPathCost(throughp, way);
|
2018-07-23 02:54:28 +02:00
|
|
|
UASSERT_SELFTEST(uint32_t, cp, (1 + throughCost));
|
|
|
|
|
}
|
2020-04-15 13:58:34 +02:00
|
|
|
|
2018-07-23 02:54:28 +02:00
|
|
|
private:
|
2020-04-15 13:58:34 +02:00
|
|
|
void checkNewCpVersusEdges(V3GraphVertex* vxp, GraphWay way, uint32_t cp) const {
|
2018-07-23 02:54:28 +02:00
|
|
|
// Don't need to check this in the self test; it supports an assert
|
|
|
|
|
// that runs in production code.
|
|
|
|
|
}
|
2020-04-15 13:58:34 +02:00
|
|
|
void setCritPathCost(V3GraphVertex* vxp, GraphWay way, uint32_t cost) {
|
2018-07-23 02:54:28 +02:00
|
|
|
m_cp[vxp] = cost;
|
|
|
|
|
// Confirm that we only set each node's CP once. That's an
|
|
|
|
|
// important property of PartPropagateCp which allows it to be far
|
|
|
|
|
// faster than a recursive algorithm on some graphs.
|
2020-08-16 17:43:49 +02:00
|
|
|
const auto it = m_seen.find(vxp);
|
2019-07-06 18:57:50 +02:00
|
|
|
UASSERT_OBJ(it == m_seen.end(), vxp, "Set CP on node twice");
|
2018-07-23 02:54:28 +02:00
|
|
|
m_seen[vxp] = cost;
|
|
|
|
|
}
|
|
|
|
|
uint32_t critPathCost(V3GraphVertex* vxp, GraphWay way) const {
|
2020-08-16 17:43:49 +02:00
|
|
|
const auto it = m_cp.find(vxp);
|
2018-07-23 02:54:28 +02:00
|
|
|
if (it != m_cp.end()) return it->second;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2020-11-11 03:40:14 +01:00
|
|
|
static uint32_t cost(const V3GraphVertex*) { return 1; }
|
2018-07-23 02:54:28 +02:00
|
|
|
void partInitCriticalPaths(bool checkOnly) {
|
|
|
|
|
// Set up the FORWARD cp's only. This test only looks in one
|
|
|
|
|
// direction, it assumes REVERSE is symmetrical and would be
|
|
|
|
|
// redundant to test.
|
|
|
|
|
GraphStreamUnordered order(&m_graph);
|
|
|
|
|
while (const V3GraphVertex* cvxp = order.nextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
V3GraphVertex* const vxp = const_cast<V3GraphVertex*>(cvxp);
|
2018-07-23 02:54:28 +02:00
|
|
|
uint32_t cpCost = 0;
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphEdge* edgep = vxp->inBeginp(); edgep; edgep = edgep->inNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
V3GraphVertex* const parentp = edgep->fromp();
|
2020-04-15 13:58:34 +02:00
|
|
|
cpCost = std::max(cpCost, critPathCost(parentp, GraphWay::FORWARD) + 1);
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
if (checkOnly) {
|
2020-04-15 13:58:34 +02:00
|
|
|
UASSERT_SELFTEST(uint32_t, cpCost, critPathCost(vxp, GraphWay::FORWARD));
|
2018-07-23 02:54:28 +02:00
|
|
|
} else {
|
|
|
|
|
setCritPathCost(vxp, GraphWay::FORWARD, cpCost);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
void go() {
|
|
|
|
|
// Generate a pseudo-random graph
|
2020-12-03 01:20:03 +01:00
|
|
|
std::array<vluint64_t, 2> rngState
|
|
|
|
|
= {{0x12345678ULL, 0x9abcdef0ULL}}; // GCC 3.8.0 wants {{}}
|
2018-07-23 02:54:28 +02:00
|
|
|
// Create 50 vertices
|
2020-11-11 04:10:38 +01:00
|
|
|
for (auto& i : m_vx) i = new V3GraphVertex(&m_graph);
|
2018-07-23 02:54:28 +02:00
|
|
|
// Create 250 edges at random. Edges must go from
|
|
|
|
|
// lower-to-higher index vertices, so we get a DAG.
|
|
|
|
|
for (unsigned i = 0; i < 250; ++i) {
|
2021-11-03 22:49:19 +01:00
|
|
|
const unsigned idx1 = V3Os::rand64(rngState) % 50;
|
|
|
|
|
const unsigned idx2 = V3Os::rand64(rngState) % 50;
|
2018-07-23 02:54:28 +02:00
|
|
|
if (idx1 > idx2) {
|
|
|
|
|
new V3GraphEdge(&m_graph, m_vx[idx2], m_vx[idx1], 1);
|
|
|
|
|
} else if (idx2 > idx1) {
|
|
|
|
|
new V3GraphEdge(&m_graph, m_vx[idx1], m_vx[idx2], 1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
partInitCriticalPaths(false);
|
|
|
|
|
|
|
|
|
|
// This SelfTest class is also the T_CostAccessor
|
2020-04-15 13:58:34 +02:00
|
|
|
PartPropagateCp<PartPropagateCpSelfTest> prop(&m_graph, GraphWay::FORWARD, this, true);
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// Seed the propagator with every input node;
|
|
|
|
|
// This should result in the complete graph getting all CP's assigned.
|
2020-11-11 04:10:38 +01:00
|
|
|
for (const auto& i : m_vx) {
|
|
|
|
|
if (!i->inBeginp()) prop.cpHasIncreased(i, 1 /* inclusive CP starts at 1 */);
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Run the propagator.
|
|
|
|
|
// * The setCritPathCost() routine checks that each node's CP changes
|
|
|
|
|
// at most once.
|
|
|
|
|
// * The notifyEdgeCp routine is also self checking.
|
|
|
|
|
m_seen.clear();
|
|
|
|
|
prop.go();
|
|
|
|
|
|
|
|
|
|
// Finally, confirm that the entire graph appears to have correct CPs.
|
|
|
|
|
partInitCriticalPaths(true);
|
|
|
|
|
}
|
2020-04-15 13:58:34 +02:00
|
|
|
|
2018-07-23 02:54:28 +02:00
|
|
|
public:
|
2020-04-15 13:58:34 +02:00
|
|
|
static void selfTest() { PartPropagateCpSelfTest().go(); }
|
2018-07-23 02:54:28 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
//######################################################################
|
|
|
|
|
// LogicMTask
|
|
|
|
|
|
2020-11-19 03:32:16 +01:00
|
|
|
class LogicMTask final : public AbstractLogicMTask {
|
2018-07-23 02:54:28 +02:00
|
|
|
public:
|
|
|
|
|
// TYPES
|
2021-03-13 00:10:45 +01:00
|
|
|
using VxList = std::list<MTaskMoveVertex*>;
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
struct CmpLogicMTask {
|
2020-04-15 13:58:34 +02:00
|
|
|
bool operator()(const LogicMTask* ap, const LogicMTask* bp) const {
|
2018-07-23 02:54:28 +02:00
|
|
|
return ap->id() < bp->id();
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// This adaptor class allows the PartPropagateCp class to be somewhat
|
|
|
|
|
// independent of the LogicMTask class
|
|
|
|
|
// - PartPropagateCp can thus be declared before LogicMTask
|
|
|
|
|
// - PartPropagateCp could be reused with graphs of other node types
|
|
|
|
|
// in the future, using another Accessor adaptor.
|
2020-11-19 03:32:16 +01:00
|
|
|
class CpCostAccessor final {
|
2018-07-23 02:54:28 +02:00
|
|
|
public:
|
2020-11-17 01:56:16 +01:00
|
|
|
CpCostAccessor() = default;
|
|
|
|
|
~CpCostAccessor() = default;
|
2018-07-23 02:54:28 +02:00
|
|
|
// Return cost of this node
|
|
|
|
|
uint32_t cost(const V3GraphVertex* vxp) const {
|
2021-11-03 22:49:19 +01:00
|
|
|
const LogicMTask* const mtaskp = dynamic_cast<const LogicMTask*>(vxp);
|
2018-07-23 02:54:28 +02:00
|
|
|
return mtaskp->stepCost();
|
|
|
|
|
}
|
|
|
|
|
// Return stored CP to this node
|
|
|
|
|
uint32_t critPathCost(const V3GraphVertex* vxp, GraphWay way) const {
|
2021-11-03 22:49:19 +01:00
|
|
|
const LogicMTask* const mtaskp = dynamic_cast<const LogicMTask*>(vxp);
|
2018-07-23 02:54:28 +02:00
|
|
|
return mtaskp->critPathCost(way);
|
|
|
|
|
}
|
|
|
|
|
// Store a new CP to this node
|
2020-04-15 13:58:34 +02:00
|
|
|
void setCritPathCost(V3GraphVertex* vxp, GraphWay way, uint32_t cost) const {
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const mtaskp = dynamic_cast<LogicMTask*>(vxp);
|
2018-07-23 02:54:28 +02:00
|
|
|
mtaskp->setCritPathCost(way, cost);
|
|
|
|
|
}
|
|
|
|
|
// Notify vxp that the wayward CP at the throughp-->vxp edge
|
|
|
|
|
// has increased to 'cp'. (vxp is wayward from throughp.)
|
|
|
|
|
// This is our cue to update vxp's m_edges[!way][throughp].
|
2020-04-15 13:58:34 +02:00
|
|
|
void notifyEdgeCp(V3GraphVertex* vxp, GraphWay way, V3GraphVertex* throuvhVxp,
|
|
|
|
|
uint32_t cp) const {
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const updateVxp = dynamic_cast<LogicMTask*>(vxp);
|
|
|
|
|
LogicMTask* const lthrouvhVxp = dynamic_cast<LogicMTask*>(throuvhVxp);
|
2018-07-23 02:54:28 +02:00
|
|
|
EdgeSet& edges = updateVxp->m_edges[way.invert()];
|
2021-11-03 22:49:19 +01:00
|
|
|
const uint32_t edgeCp = edges.at(lthrouvhVxp);
|
2018-07-23 02:54:28 +02:00
|
|
|
if (cp > edgeCp) edges.set(lthrouvhVxp, cp);
|
|
|
|
|
}
|
|
|
|
|
// Check that CP matches that of the longest edge wayward of vxp.
|
2020-04-15 13:58:34 +02:00
|
|
|
void checkNewCpVersusEdges(V3GraphVertex* vxp, GraphWay way, uint32_t cp) const {
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const mtaskp = dynamic_cast<LogicMTask*>(vxp);
|
2018-07-23 02:54:28 +02:00
|
|
|
EdgeSet& edges = mtaskp->m_edges[way.invert()];
|
|
|
|
|
// This is mtaskp's relative with longest !wayward inclusive CP:
|
2020-08-16 17:43:49 +02:00
|
|
|
const auto edgeIt = edges.rbegin();
|
2021-11-03 22:49:19 +01:00
|
|
|
const uint32_t edgeCp = (*edgeIt).value();
|
2019-07-06 18:57:50 +02:00
|
|
|
UASSERT_OBJ(edgeCp == cp, vxp, "CP doesn't match longest wayward edge");
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
2020-04-15 13:58:34 +02:00
|
|
|
|
2018-07-23 02:54:28 +02:00
|
|
|
private:
|
|
|
|
|
VL_UNCOPYABLE(CpCostAccessor);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
// MEMBERS
|
|
|
|
|
|
|
|
|
|
// Set of MTaskMoveVertex's assigned to this mtask. LogicMTask does not
|
|
|
|
|
// own the MTaskMoveVertex objects, we merely keep pointers to them
|
|
|
|
|
// here.
|
|
|
|
|
VxList m_vertices;
|
|
|
|
|
|
|
|
|
|
// Cost estimate for this LogicMTask, derived from V3InstrCount.
|
|
|
|
|
// In abstract time units.
|
2020-08-15 19:11:27 +02:00
|
|
|
uint32_t m_cost = 0;
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// Cost of critical paths going FORWARD from graph-start to the start
|
|
|
|
|
// of this vertex, and also going REVERSE from the end of the graph to
|
|
|
|
|
// the end of the vertex. Same units as m_cost.
|
2020-11-15 22:21:26 +01:00
|
|
|
std::array<uint32_t, GraphWay::NUM_WAYS> m_critPathCost;
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
uint32_t m_serialId; // Unique MTask ID number
|
|
|
|
|
|
|
|
|
|
// Count "generations" which are just operations that scan through the
|
|
|
|
|
// graph. We'll mark each node with the last generation that scanned
|
|
|
|
|
// it. We can use this to avoid recursing through the same node twice
|
|
|
|
|
// while searching for a path.
|
2020-08-15 19:11:27 +02:00
|
|
|
vluint64_t m_generation = 0;
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// Redundant with the V3GraphEdge's, store a map of relatives so we can
|
|
|
|
|
// quickly check if we have a given parent or child.
|
|
|
|
|
//
|
|
|
|
|
// 'm_edges[way]' maps a wayward relative to the !way critical path at
|
|
|
|
|
// our edge with them. The SortByValueMap supports iterating over
|
|
|
|
|
// relatives in longest-to-shortest CP order. We rely on this ordering
|
|
|
|
|
// in more than one place.
|
2021-03-13 00:10:45 +01:00
|
|
|
using EdgeSet = SortByValueMap<LogicMTask*, uint32_t, CmpLogicMTask>;
|
2020-11-15 22:21:26 +01:00
|
|
|
std::array<EdgeSet, GraphWay::NUM_WAYS> m_edges;
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
// CONSTRUCTORS
|
|
|
|
|
LogicMTask(V3Graph* graphp, MTaskMoveVertex* mtmvVxp)
|
2020-08-16 15:55:36 +02:00
|
|
|
: AbstractLogicMTask{graphp} {
|
2020-11-11 04:10:38 +01:00
|
|
|
for (unsigned int& i : m_critPathCost) i = 0;
|
2018-07-23 02:54:28 +02:00
|
|
|
if (mtmvVxp) { // Else null for test
|
|
|
|
|
m_vertices.push_back(mtmvVxp);
|
2021-11-03 22:49:19 +01:00
|
|
|
if (OrderLogicVertex* const olvp = mtmvVxp->logicp()) {
|
2018-07-23 02:54:28 +02:00
|
|
|
m_cost += V3InstrCount::count(olvp->nodep(), true);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Start at 1, so that 0 indicates no mtask ID.
|
|
|
|
|
static uint32_t s_nextId = 1;
|
|
|
|
|
m_serialId = s_nextId++;
|
|
|
|
|
UASSERT(s_nextId < 0xFFFFFFFFUL, "Too many mtasks");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// METHODS
|
|
|
|
|
void moveAllVerticesFrom(LogicMTask* otherp) {
|
|
|
|
|
// splice() is constant time
|
|
|
|
|
m_vertices.splice(m_vertices.end(), otherp->m_vertices);
|
|
|
|
|
m_cost += otherp->m_cost;
|
|
|
|
|
}
|
2020-08-15 17:44:10 +02:00
|
|
|
virtual const VxList* vertexListp() const override { return &m_vertices; }
|
2018-07-23 02:54:28 +02:00
|
|
|
static vluint64_t incGeneration() {
|
|
|
|
|
static vluint64_t s_generation = 0;
|
|
|
|
|
++s_generation;
|
|
|
|
|
return s_generation;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Use this instead of pointer-compares to compare LogicMTasks. Avoids
|
|
|
|
|
// nondeterministic output. Also name mtasks based on this number in
|
|
|
|
|
// the final C++ output.
|
2020-08-15 17:44:10 +02:00
|
|
|
virtual uint32_t id() const override { return m_serialId; }
|
2018-07-23 02:54:28 +02:00
|
|
|
void id(uint32_t id) { m_serialId = id; }
|
|
|
|
|
// Abstract cost of every logic mtask
|
2020-08-15 17:44:10 +02:00
|
|
|
virtual uint32_t cost() const override { return m_cost; }
|
2018-07-23 02:54:28 +02:00
|
|
|
void setCost(uint32_t cost) { m_cost = cost; } // For tests only
|
|
|
|
|
uint32_t stepCost() const { return stepCost(m_cost); }
|
|
|
|
|
static uint32_t stepCost(uint32_t cost) {
|
|
|
|
|
#if PART_STEPPED_COST
|
|
|
|
|
// Round cost up to the nearest 5%. Use this when computing all
|
|
|
|
|
// critical paths. The idea is that critical path changes don't
|
|
|
|
|
// need to propagate when they don't exceed the next step, saving a
|
|
|
|
|
// lot of recursion.
|
|
|
|
|
if (cost == 0) return 0;
|
|
|
|
|
|
|
|
|
|
double logcost = log(cost);
|
|
|
|
|
// log(1.05) is about 0.05
|
|
|
|
|
// So, round logcost up to the next 0.05 boundary
|
|
|
|
|
logcost *= 20.0;
|
|
|
|
|
logcost = ceil(logcost);
|
|
|
|
|
logcost = logcost / 20.0;
|
|
|
|
|
|
2021-11-03 22:49:19 +01:00
|
|
|
const uint32_t stepCost = static_cast<uint32_t>(exp(logcost));
|
2018-07-23 02:54:28 +02:00
|
|
|
UASSERT_STATIC(stepCost >= cost, "stepped cost error exceeded");
|
|
|
|
|
UASSERT_STATIC(stepCost <= ((cost * 11 / 10)), "stepped cost error exceeded");
|
|
|
|
|
return stepCost;
|
|
|
|
|
#else
|
|
|
|
|
return cost;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void addRelative(GraphWay way, LogicMTask* relativep) {
|
|
|
|
|
EdgeSet& edges = m_edges[way];
|
|
|
|
|
UASSERT(!edges.has(relativep), "Adding existing edge");
|
|
|
|
|
// value is !way cp to this edge
|
2020-04-15 13:58:34 +02:00
|
|
|
edges.set(relativep, relativep->stepCost() + relativep->critPathCost(way.invert()));
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
void removeRelative(GraphWay way, LogicMTask* relativep) {
|
|
|
|
|
EdgeSet& edges = m_edges[way];
|
|
|
|
|
edges.erase(relativep);
|
|
|
|
|
}
|
|
|
|
|
bool hasRelative(GraphWay way, LogicMTask* relativep) {
|
2020-02-04 05:21:56 +01:00
|
|
|
const EdgeSet& edges = m_edges[way];
|
2018-07-23 02:54:28 +02:00
|
|
|
return edges.has(relativep);
|
|
|
|
|
}
|
|
|
|
|
void checkRelativesCp(GraphWay way) const {
|
|
|
|
|
const EdgeSet& edges = m_edges[way];
|
2020-04-15 13:58:34 +02:00
|
|
|
for (EdgeSet::const_reverse_iterator it = edges.rbegin(); it != edges.rend(); ++it) {
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const relativep = (*it).key();
|
|
|
|
|
const uint32_t cachedCp = (*it).value();
|
2020-04-15 13:58:34 +02:00
|
|
|
partCheckCachedScoreVsActual(cachedCp, relativep->critPathCost(way.invert())
|
|
|
|
|
+ relativep->stepCost());
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-15 17:44:10 +02:00
|
|
|
virtual string name() const override {
|
2018-07-23 02:54:28 +02:00
|
|
|
// Display forward and reverse critical path costs. This gives a quick
|
|
|
|
|
// read on whether graph partitioning looks reasonable or bad.
|
|
|
|
|
std::ostringstream out;
|
2020-04-15 13:58:34 +02:00
|
|
|
out << "mt" << m_serialId << "." << this << " [b" << m_critPathCost[GraphWay::FORWARD]
|
|
|
|
|
<< " a" << m_critPathCost[GraphWay::REVERSE] << " c" << cost();
|
2018-07-23 02:54:28 +02:00
|
|
|
return out.str();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void setCritPathCost(GraphWay way, uint32_t cost) { m_critPathCost[way] = cost; }
|
|
|
|
|
uint32_t critPathCost(GraphWay way) const { return m_critPathCost[way]; }
|
2020-04-15 13:58:34 +02:00
|
|
|
uint32_t critPathCostWithout(GraphWay way, const V3GraphEdge* withoutp) const {
|
2018-07-23 02:54:28 +02:00
|
|
|
// Compute the critical path cost wayward to this node, without
|
|
|
|
|
// considering edge 'withoutp'
|
2020-04-15 13:58:34 +02:00
|
|
|
UASSERT(this == withoutp->furtherp(way), "In critPathCostWithout(), edge 'withoutp' must "
|
|
|
|
|
"further to 'this'");
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// Iterate through edges until we get a relative other than
|
|
|
|
|
// wayEdgeEndp(way, withoutp). This should take 2 iterations max.
|
|
|
|
|
const EdgeSet& edges = m_edges[way.invert()];
|
|
|
|
|
uint32_t result = 0;
|
2020-04-15 13:58:34 +02:00
|
|
|
for (EdgeSet::const_reverse_iterator it = edges.rbegin(); it != edges.rend(); ++it) {
|
2018-07-23 02:54:28 +02:00
|
|
|
if ((*it).key() != withoutp->furtherp(way.invert())) {
|
|
|
|
|
// Use the cached cost. It could be a small overestimate
|
|
|
|
|
// due to stepping. This is consistent with critPathCost()
|
|
|
|
|
// which also returns the cached cost.
|
|
|
|
|
result = (*it).value();
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
2020-04-15 13:58:34 +02:00
|
|
|
static bool pathExistsFromInternal(LogicMTask* fromp, LogicMTask* top,
|
|
|
|
|
const V3GraphEdge* excludedEdgep, vluint64_t generation) {
|
2018-07-23 02:54:28 +02:00
|
|
|
// Q) Why does this take LogicMTask instead of generic V3GraphVertex?
|
|
|
|
|
// A) We'll use the critical paths known to LogicMTask to prune the
|
|
|
|
|
// recursion for speed. Also store 'generation' in
|
|
|
|
|
// LogicMTask::m_generation so we can prune the search and avoid
|
|
|
|
|
// recursing through the same node more than once in a single
|
|
|
|
|
// search.
|
|
|
|
|
|
|
|
|
|
if (fromp->m_generation == generation) {
|
|
|
|
|
// Already looked at this node in the current search.
|
|
|
|
|
// Since we're back again, we must not have found a path on the
|
|
|
|
|
// first go.
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
fromp->m_generation = generation;
|
|
|
|
|
|
|
|
|
|
// Base case: we found a path.
|
|
|
|
|
if (fromp == top) return true;
|
|
|
|
|
|
|
|
|
|
// Base case: fromp is too late, cannot possibly be a prereq for top.
|
|
|
|
|
if (fromp->critPathCost(GraphWay::REVERSE)
|
2020-04-15 13:58:34 +02:00
|
|
|
< (top->critPathCost(GraphWay::REVERSE) + top->stepCost())) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2018-07-23 02:54:28 +02:00
|
|
|
if ((fromp->critPathCost(GraphWay::FORWARD) + fromp->stepCost())
|
2020-04-15 13:58:34 +02:00
|
|
|
> top->critPathCost(GraphWay::FORWARD)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// Recursively look for a path
|
2020-04-15 13:58:34 +02:00
|
|
|
for (const V3GraphEdge* followp = fromp->outBeginp(); followp;
|
|
|
|
|
followp = followp->outNextp()) {
|
2018-07-23 02:54:28 +02:00
|
|
|
if (followp == excludedEdgep) continue;
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const nextp = dynamic_cast<LogicMTask*>(followp->top());
|
2020-08-15 16:12:55 +02:00
|
|
|
if (pathExistsFromInternal(nextp, top, nullptr, generation)) return true;
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// True if there's a path from 'fromp' to 'top' excluding
|
|
|
|
|
// 'excludedEdgep', false otherwise.
|
|
|
|
|
//
|
2020-08-15 16:12:55 +02:00
|
|
|
// 'excludedEdgep' may be nullptr in which case no edge is excluded. If
|
|
|
|
|
// 'excludedEdgep' is non-nullptr it must connect fromp and top.
|
2018-07-23 02:54:28 +02:00
|
|
|
//
|
|
|
|
|
// TODO: consider changing this API to the 'isTransitiveEdge' API
|
|
|
|
|
// used by GraphPathChecker
|
|
|
|
|
public:
|
2020-04-15 13:58:34 +02:00
|
|
|
static bool pathExistsFrom(LogicMTask* fromp, LogicMTask* top,
|
2018-07-23 02:54:28 +02:00
|
|
|
const V3GraphEdge* excludedEdgep) {
|
2020-04-15 13:58:34 +02:00
|
|
|
return pathExistsFromInternal(fromp, top, excludedEdgep, incGeneration());
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
|
2020-04-15 13:58:34 +02:00
|
|
|
static void dumpCpFilePrefixed(const V3Graph* graphp, const string& nameComment) {
|
2021-06-21 00:32:57 +02:00
|
|
|
const string filename = v3Global.debugFilename(nameComment) + ".txt";
|
2020-04-15 13:58:34 +02:00
|
|
|
UINFO(1, "Writing " << filename << endl);
|
2021-07-12 00:42:01 +02:00
|
|
|
std::unique_ptr<std::ofstream> ofp{V3File::new_ofstream(filename)};
|
|
|
|
|
std::ostream* const osp = &(*ofp); // &* needed to deref unique_ptr
|
2020-04-15 13:58:34 +02:00
|
|
|
if (osp->fail()) v3fatalStatic("Can't write " << filename);
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// Find start vertex with longest CP
|
2020-08-15 16:12:55 +02:00
|
|
|
const LogicMTask* startp = nullptr;
|
2020-04-15 13:58:34 +02:00
|
|
|
for (const V3GraphVertex* vxp = graphp->verticesBeginp(); vxp;
|
|
|
|
|
vxp = vxp->verticesNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
const LogicMTask* const mtaskp = dynamic_cast<const LogicMTask*>(vxp);
|
2018-07-23 02:54:28 +02:00
|
|
|
if (!startp) {
|
|
|
|
|
startp = mtaskp;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (mtaskp->cost() + mtaskp->critPathCost(GraphWay::REVERSE)
|
|
|
|
|
> startp->cost() + startp->critPathCost(GraphWay::REVERSE)) {
|
|
|
|
|
startp = mtaskp;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Follow the entire critical path
|
|
|
|
|
std::vector<const LogicMTask*> path;
|
|
|
|
|
uint32_t totalCost = 0;
|
|
|
|
|
for (const LogicMTask* nextp = startp; nextp;) {
|
|
|
|
|
path.push_back(nextp);
|
|
|
|
|
totalCost += nextp->cost();
|
|
|
|
|
|
|
|
|
|
const EdgeSet& children = nextp->m_edges[GraphWay::FORWARD];
|
|
|
|
|
EdgeSet::const_reverse_iterator it = children.rbegin();
|
2020-04-15 13:58:34 +02:00
|
|
|
if (it == children.rend()) {
|
2020-08-15 16:12:55 +02:00
|
|
|
nextp = nullptr;
|
2020-04-15 13:58:34 +02:00
|
|
|
} else {
|
|
|
|
|
nextp = (*it).key();
|
|
|
|
|
}
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
|
2020-04-15 13:58:34 +02:00
|
|
|
*osp << "totalCost = " << totalCost
|
|
|
|
|
<< " (should match the computed critical path cost (CP) for the graph)\n";
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// Dump
|
2020-08-16 17:43:49 +02:00
|
|
|
for (const LogicMTask* mtaskp : path) {
|
2020-11-19 03:03:23 +01:00
|
|
|
*osp << "begin mtask with cost " << mtaskp->cost() << '\n';
|
2018-07-23 02:54:28 +02:00
|
|
|
for (VxList::const_iterator lit = mtaskp->vertexListp()->begin();
|
|
|
|
|
lit != mtaskp->vertexListp()->end(); ++lit) {
|
2021-11-03 22:49:19 +01:00
|
|
|
const OrderLogicVertex* const logicp = (*lit)->logicp();
|
2018-07-23 02:54:28 +02:00
|
|
|
if (!logicp) continue;
|
2020-04-04 04:31:54 +02:00
|
|
|
if (false) {
|
2018-07-23 02:54:28 +02:00
|
|
|
// Show nodes only
|
2020-04-15 13:58:34 +02:00
|
|
|
*osp << "> ";
|
|
|
|
|
logicp->nodep()->dumpTree(*osp);
|
2018-07-23 02:54:28 +02:00
|
|
|
} else {
|
|
|
|
|
// Show nodes with hierarchical costs
|
|
|
|
|
V3InstrCount::count(logicp->nodep(), false, osp);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
VL_DEBUG_FUNC; // Declare debug()
|
|
|
|
|
VL_UNCOPYABLE(LogicMTask);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
//######################################################################
|
|
|
|
|
// MTask utility classes
|
|
|
|
|
|
|
|
|
|
// Sort AbstractMTask objects into deterministic order by calling id()
|
|
|
|
|
// which is a unique and stable serial number.
|
2020-11-19 03:32:16 +01:00
|
|
|
class MTaskIdLessThan final {
|
2018-07-23 02:54:28 +02:00
|
|
|
public:
|
2020-11-17 01:56:16 +01:00
|
|
|
MTaskIdLessThan() = default;
|
|
|
|
|
virtual ~MTaskIdLessThan() = default;
|
2020-04-15 13:58:34 +02:00
|
|
|
virtual bool operator()(const AbstractMTask* lhsp, const AbstractMTask* rhsp) const {
|
2018-07-23 02:54:28 +02:00
|
|
|
return lhsp->id() < rhsp->id();
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Information associated with scoreboarding an MTask
|
2020-11-19 03:32:16 +01:00
|
|
|
class MergeCandidate VL_NOT_FINAL {
|
2018-07-23 02:54:28 +02:00
|
|
|
private:
|
2020-08-15 19:11:27 +02:00
|
|
|
bool m_removedFromSb = false; // Not on scoreboard, generally ignore
|
2018-07-23 02:54:28 +02:00
|
|
|
vluint64_t m_id; // Serial number for ordering
|
|
|
|
|
public:
|
|
|
|
|
// CONSTRUCTORS
|
2020-08-15 19:11:27 +02:00
|
|
|
MergeCandidate() {
|
2018-07-23 02:54:28 +02:00
|
|
|
static vluint64_t serial = 0;
|
|
|
|
|
++serial;
|
|
|
|
|
m_id = serial;
|
|
|
|
|
}
|
2021-11-04 00:19:23 +01:00
|
|
|
virtual ~MergeCandidate() = default;
|
2018-07-23 02:54:28 +02:00
|
|
|
virtual bool mergeWouldCreateCycle() const = 0;
|
|
|
|
|
// METHODS
|
|
|
|
|
bool removedFromSb() const { return m_removedFromSb; }
|
|
|
|
|
void removedFromSb(bool removed) { m_removedFromSb = removed; }
|
2020-04-15 13:58:34 +02:00
|
|
|
bool operator<(const MergeCandidate& other) const { return m_id < other.m_id; }
|
2018-07-23 02:54:28 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// A pair of associated LogicMTask's that are merge candidates for sibling
|
|
|
|
|
// contraction
|
2020-11-19 03:32:16 +01:00
|
|
|
class SiblingMC final : public MergeCandidate {
|
2018-07-23 02:54:28 +02:00
|
|
|
private:
|
|
|
|
|
LogicMTask* m_ap;
|
|
|
|
|
LogicMTask* m_bp;
|
2020-04-15 13:58:34 +02:00
|
|
|
|
2018-07-23 02:54:28 +02:00
|
|
|
public:
|
2020-11-11 03:40:14 +01:00
|
|
|
// CONSTRUCTORS
|
|
|
|
|
SiblingMC() = delete;
|
2018-07-23 02:54:28 +02:00
|
|
|
SiblingMC(LogicMTask* ap, LogicMTask* bp) {
|
|
|
|
|
// Assign 'ap' and 'bp' in a canonical order, so we can more easily
|
|
|
|
|
// compare pairs of SiblingMCs
|
|
|
|
|
if (ap->id() > bp->id()) {
|
|
|
|
|
m_ap = ap;
|
|
|
|
|
m_bp = bp;
|
|
|
|
|
} else {
|
|
|
|
|
m_ap = bp;
|
|
|
|
|
m_bp = ap;
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-11-17 01:56:16 +01:00
|
|
|
virtual ~SiblingMC() = default;
|
2018-07-23 02:54:28 +02:00
|
|
|
// METHODS
|
|
|
|
|
LogicMTask* ap() const { return m_ap; }
|
|
|
|
|
LogicMTask* bp() const { return m_bp; }
|
2020-11-11 03:40:14 +01:00
|
|
|
bool mergeWouldCreateCycle() const override {
|
2020-08-15 16:12:55 +02:00
|
|
|
return (LogicMTask::pathExistsFrom(m_ap, m_bp, nullptr)
|
|
|
|
|
|| LogicMTask::pathExistsFrom(m_bp, m_ap, nullptr));
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
bool operator<(const SiblingMC& other) const {
|
2021-02-22 03:25:21 +01:00
|
|
|
if (m_ap->id() < other.m_ap->id()) return true;
|
|
|
|
|
if (m_ap->id() > other.m_ap->id()) return false;
|
2018-07-23 02:54:28 +02:00
|
|
|
return m_bp->id() < other.m_bp->id();
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// GraphEdge for the MTask graph
|
2020-11-19 03:32:16 +01:00
|
|
|
class MTaskEdge final : public V3GraphEdge, public MergeCandidate {
|
2018-07-23 02:54:28 +02:00
|
|
|
public:
|
|
|
|
|
// CONSTRUCTORS
|
|
|
|
|
MTaskEdge(V3Graph* graphp, LogicMTask* fromp, LogicMTask* top, int weight)
|
2020-08-16 15:55:36 +02:00
|
|
|
: V3GraphEdge{graphp, fromp, top, weight} {
|
2018-07-23 02:54:28 +02:00
|
|
|
fromp->addRelative(GraphWay::FORWARD, top);
|
|
|
|
|
top->addRelative(GraphWay::REVERSE, fromp);
|
|
|
|
|
}
|
2020-08-15 17:44:10 +02:00
|
|
|
virtual ~MTaskEdge() override {
|
2018-07-23 02:54:28 +02:00
|
|
|
fromMTaskp()->removeRelative(GraphWay::FORWARD, toMTaskp());
|
|
|
|
|
toMTaskp()->removeRelative(GraphWay::REVERSE, fromMTaskp());
|
|
|
|
|
}
|
|
|
|
|
// METHODS
|
|
|
|
|
LogicMTask* furtherMTaskp(GraphWay way) const {
|
|
|
|
|
return dynamic_cast<LogicMTask*>(this->furtherp(way));
|
|
|
|
|
}
|
2020-04-15 13:58:34 +02:00
|
|
|
LogicMTask* fromMTaskp() const { return dynamic_cast<LogicMTask*>(fromp()); }
|
|
|
|
|
LogicMTask* toMTaskp() const { return dynamic_cast<LogicMTask*>(top()); }
|
2020-08-15 17:44:10 +02:00
|
|
|
virtual bool mergeWouldCreateCycle() const override {
|
2018-07-23 02:54:28 +02:00
|
|
|
return LogicMTask::pathExistsFrom(fromMTaskp(), toMTaskp(), this);
|
|
|
|
|
}
|
|
|
|
|
static MTaskEdge* cast(V3GraphEdge* edgep) {
|
2020-08-15 16:12:55 +02:00
|
|
|
if (!edgep) return nullptr;
|
2021-11-03 22:49:19 +01:00
|
|
|
MTaskEdge* const resultp = dynamic_cast<MTaskEdge*>(edgep);
|
2018-07-23 02:54:28 +02:00
|
|
|
UASSERT(resultp, "Failed to cast in MTaskEdge::cast");
|
|
|
|
|
return resultp;
|
|
|
|
|
}
|
|
|
|
|
// Following initial assignment of critical paths, clear this MTaskEdge
|
|
|
|
|
// out of the edge-map for each node and reinsert at a new location
|
|
|
|
|
// with updated critical path.
|
|
|
|
|
void resetCriticalPaths() {
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const fromp = fromMTaskp();
|
|
|
|
|
LogicMTask* const top = toMTaskp();
|
2018-07-23 02:54:28 +02:00
|
|
|
fromp->removeRelative(GraphWay::FORWARD, top);
|
|
|
|
|
top->removeRelative(GraphWay::REVERSE, fromp);
|
|
|
|
|
fromp->addRelative(GraphWay::FORWARD, top);
|
|
|
|
|
top->addRelative(GraphWay::REVERSE, fromp);
|
|
|
|
|
}
|
2020-04-15 13:58:34 +02:00
|
|
|
|
2018-07-23 02:54:28 +02:00
|
|
|
private:
|
|
|
|
|
VL_UNCOPYABLE(MTaskEdge);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
//######################################################################
|
|
|
|
|
// Vertex utility classes
|
|
|
|
|
|
2020-11-19 03:32:16 +01:00
|
|
|
class OrderByPtrId final {
|
2018-07-23 02:54:28 +02:00
|
|
|
PartPtrIdMap m_ids;
|
2020-04-15 13:58:34 +02:00
|
|
|
|
2018-07-23 02:54:28 +02:00
|
|
|
public:
|
2020-04-15 13:58:34 +02:00
|
|
|
virtual bool operator()(const OrderVarStdVertex* lhsp, const OrderVarStdVertex* rhsp) const {
|
2021-11-03 22:49:19 +01:00
|
|
|
const vluint64_t l_id = m_ids.findId(lhsp);
|
|
|
|
|
const vluint64_t r_id = m_ids.findId(rhsp);
|
2018-07-23 02:54:28 +02:00
|
|
|
return l_id < r_id;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
//######################################################################
|
|
|
|
|
// PartParallelismEst - Estimate parallelism of graph
|
|
|
|
|
|
2020-11-19 03:32:16 +01:00
|
|
|
class PartParallelismEst final {
|
2018-07-23 02:54:28 +02:00
|
|
|
// MEMBERS
|
|
|
|
|
const V3Graph* m_graphp; // Mtask-containing graph
|
|
|
|
|
|
|
|
|
|
// Total cost of evaluating the whole graph.
|
|
|
|
|
// The ratio of m_totalGraphCost to longestCpCost gives us an estimate
|
|
|
|
|
// of the parallelizability of this graph which is only as good as the
|
|
|
|
|
// guess returned by LogicMTask::cost().
|
2020-08-15 19:11:27 +02:00
|
|
|
uint32_t m_totalGraphCost = 0;
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// Cost of the longest critical path, in abstract units (the same units
|
|
|
|
|
// returned by the vertexCost)
|
2020-08-15 19:11:27 +02:00
|
|
|
uint32_t m_longestCpCost = 0;
|
2018-07-23 02:54:28 +02:00
|
|
|
|
2020-08-15 19:11:27 +02:00
|
|
|
size_t m_vertexCount = 0; // Number of vertexes calculated
|
|
|
|
|
size_t m_edgeCount = 0; // Number of edges calculated
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
// CONSTRUCTORS
|
|
|
|
|
explicit PartParallelismEst(const V3Graph* graphp)
|
2020-08-16 15:55:36 +02:00
|
|
|
: m_graphp{graphp} {}
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// METHODS
|
|
|
|
|
uint32_t totalGraphCost() const { return m_totalGraphCost; }
|
|
|
|
|
uint32_t longestCritPathCost() const { return m_longestCpCost; }
|
|
|
|
|
size_t vertexCount() const { return m_vertexCount; }
|
|
|
|
|
size_t edgeCount() const { return m_edgeCount; }
|
|
|
|
|
double parallelismFactor() const {
|
|
|
|
|
return (static_cast<double>(m_totalGraphCost) / m_longestCpCost);
|
|
|
|
|
}
|
|
|
|
|
void traverse() {
|
|
|
|
|
// For each node, record the critical path cost from the start
|
|
|
|
|
// of the graph through the end of the node.
|
2020-08-15 16:03:34 +02:00
|
|
|
std::unordered_map<const V3GraphVertex*, uint32_t> critPaths;
|
2018-07-23 02:54:28 +02:00
|
|
|
GraphStreamUnordered serialize(m_graphp);
|
2020-04-15 13:58:34 +02:00
|
|
|
for (const V3GraphVertex* vertexp; (vertexp = serialize.nextp());) {
|
2018-07-23 02:54:28 +02:00
|
|
|
m_vertexCount++;
|
|
|
|
|
uint32_t cpCostToHere = 0;
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphEdge* edgep = vertexp->inBeginp(); edgep; edgep = edgep->inNextp()) {
|
2018-07-23 02:54:28 +02:00
|
|
|
++m_edgeCount;
|
|
|
|
|
// For each upstream item, add its critical path cost to
|
|
|
|
|
// the cost of this edge, to form a new candidate critical
|
|
|
|
|
// path cost to the current node. Whichever is largest is
|
|
|
|
|
// the critical path to reach the start of this node.
|
|
|
|
|
cpCostToHere = std::max(cpCostToHere, critPaths[edgep->fromp()]);
|
|
|
|
|
}
|
|
|
|
|
// Include the cost of the current vertex in the critical
|
|
|
|
|
// path, so it represents the critical path to the end of
|
|
|
|
|
// this vertex.
|
|
|
|
|
cpCostToHere += vertexCost(vertexp);
|
|
|
|
|
critPaths[vertexp] = cpCostToHere;
|
|
|
|
|
m_longestCpCost = std::max(m_longestCpCost, cpCostToHere);
|
|
|
|
|
// Tally the total cost contributed by vertices.
|
|
|
|
|
m_totalGraphCost += vertexCost(vertexp);
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-08-16 20:55:46 +02:00
|
|
|
void statsReport(const string& stage) const {
|
2020-04-15 13:58:34 +02:00
|
|
|
V3Stats::addStat("MTask graph, " + stage + ", critical path cost", m_longestCpCost);
|
|
|
|
|
V3Stats::addStat("MTask graph, " + stage + ", total graph cost", m_totalGraphCost);
|
|
|
|
|
V3Stats::addStat("MTask graph, " + stage + ", mtask count", m_vertexCount);
|
|
|
|
|
V3Stats::addStat("MTask graph, " + stage + ", edge count", m_edgeCount);
|
|
|
|
|
V3Stats::addStat("MTask graph, " + stage + ", parallelism factor", parallelismFactor());
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
2020-08-16 20:55:46 +02:00
|
|
|
void debugReport() const {
|
2020-04-15 13:58:34 +02:00
|
|
|
UINFO(0, " Critical path cost = " << m_longestCpCost << endl);
|
|
|
|
|
UINFO(0, " Total graph cost = " << m_totalGraphCost << endl);
|
|
|
|
|
UINFO(0, " MTask vertex count = " << m_vertexCount << endl);
|
|
|
|
|
UINFO(0, " Edge count = " << m_edgeCount << endl);
|
|
|
|
|
UINFO(0, " Parallelism factor = " << parallelismFactor() << endl);
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
static uint32_t vertexCost(const V3GraphVertex* vertexp) {
|
|
|
|
|
return dynamic_cast<const AbstractMTask*>(vertexp)->cost();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
VL_DEBUG_FUNC; // Declare debug()
|
|
|
|
|
VL_UNCOPYABLE(PartParallelismEst);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
//######################################################################
|
|
|
|
|
|
|
|
|
|
// Look at vertex costs (in one way) to form critical paths for each
|
|
|
|
|
// vertex.
|
|
|
|
|
static void partInitHalfCriticalPaths(GraphWay way, V3Graph* mtasksp, bool checkOnly) {
|
|
|
|
|
GraphStreamUnordered order(mtasksp, way);
|
2021-06-21 00:32:57 +02:00
|
|
|
const GraphWay rev = way.invert();
|
2020-04-15 13:58:34 +02:00
|
|
|
for (const V3GraphVertex* vertexp; (vertexp = order.nextp());) {
|
2021-11-03 22:49:19 +01:00
|
|
|
const LogicMTask* const mtaskcp = dynamic_cast<const LogicMTask*>(vertexp);
|
|
|
|
|
LogicMTask* const mtaskp = const_cast<LogicMTask*>(mtaskcp);
|
2018-07-23 02:54:28 +02:00
|
|
|
uint32_t cpCost = 0;
|
2021-11-04 00:19:23 +01:00
|
|
|
#if VL_DEBUG
|
2020-08-15 16:03:34 +02:00
|
|
|
std::unordered_set<V3GraphVertex*> relatives;
|
2021-11-04 00:19:23 +01:00
|
|
|
#endif
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphEdge* edgep = vertexp->beginp(rev); edgep; edgep = edgep->nextp(rev)) {
|
2021-11-04 00:19:23 +01:00
|
|
|
#if VL_DEBUG
|
2018-07-23 02:54:28 +02:00
|
|
|
// Run a few asserts on the initial mtask graph,
|
|
|
|
|
// while we're iterating through...
|
2020-04-15 13:58:34 +02:00
|
|
|
UASSERT_OBJ(edgep->weight() != 0, mtaskp, "Should be no cut edges in mtasks graph");
|
2019-07-06 18:57:50 +02:00
|
|
|
UASSERT_OBJ(relatives.find(edgep->furtherp(rev)) == relatives.end(), mtaskp,
|
|
|
|
|
"Should be no redundant edges in mtasks graph");
|
2018-07-23 02:54:28 +02:00
|
|
|
relatives.insert(edgep->furtherp(rev));
|
2021-11-04 00:19:23 +01:00
|
|
|
#endif
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const relativep = dynamic_cast<LogicMTask*>(edgep->furtherp(rev));
|
2020-04-15 13:58:34 +02:00
|
|
|
cpCost = std::max(cpCost, (relativep->critPathCost(way)
|
|
|
|
|
+ static_cast<uint32_t>(relativep->stepCost())));
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
if (checkOnly) {
|
|
|
|
|
partCheckCachedScoreVsActual(mtaskp->critPathCost(way), cpCost);
|
|
|
|
|
} else {
|
|
|
|
|
mtaskp->setCritPathCost(way, cpCost);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Look at vertex costs to form critical paths for each vertex.
|
|
|
|
|
static void partInitCriticalPaths(V3Graph* mtasksp) {
|
|
|
|
|
partInitHalfCriticalPaths(GraphWay::FORWARD, mtasksp, false);
|
|
|
|
|
partInitHalfCriticalPaths(GraphWay::REVERSE, mtasksp, false);
|
|
|
|
|
|
|
|
|
|
// Reset all MTaskEdges so that 'm_edges' will show correct CP numbers.
|
|
|
|
|
// They would have been all zeroes on initial creation of the MTaskEdges.
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphVertex* vxp = mtasksp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
|
|
|
|
|
for (V3GraphEdge* edgep = vxp->outBeginp(); edgep; edgep = edgep->outNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
MTaskEdge* const mtedgep = dynamic_cast<MTaskEdge*>(edgep);
|
2018-07-23 02:54:28 +02:00
|
|
|
mtedgep->resetCriticalPaths();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Do an EXPENSIVE check to make sure that all incremental CP updates have
|
|
|
|
|
// gone correctly.
|
|
|
|
|
static void partCheckCriticalPaths(V3Graph* mtasksp) {
|
|
|
|
|
partInitHalfCriticalPaths(GraphWay::FORWARD, mtasksp, true);
|
|
|
|
|
partInitHalfCriticalPaths(GraphWay::REVERSE, mtasksp, true);
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphVertex* vxp = mtasksp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const mtaskp = dynamic_cast<LogicMTask*>(vxp);
|
2018-07-23 02:54:28 +02:00
|
|
|
mtaskp->checkRelativesCp(GraphWay::FORWARD);
|
|
|
|
|
mtaskp->checkRelativesCp(GraphWay::REVERSE);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Advance to nextp(way) and delete edge
|
|
|
|
|
static V3GraphEdge* partBlastEdgep(GraphWay way, V3GraphEdge* edgep) {
|
2021-11-03 22:49:19 +01:00
|
|
|
V3GraphEdge* const nextp = edgep->nextp(way);
|
2020-01-17 02:17:11 +01:00
|
|
|
VL_DO_DANGLING(edgep->unlinkDelete(), edgep);
|
2018-07-23 02:54:28 +02:00
|
|
|
return nextp;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Merge edges from a LogicMtask.
|
|
|
|
|
//
|
|
|
|
|
// This code removes 'hasRelative' edges. When this occurs, mark it in need
|
|
|
|
|
// of a rescore, in case its score has fallen and we need to move it up
|
|
|
|
|
// toward the front of the scoreboard.
|
|
|
|
|
//
|
|
|
|
|
// Wait, whaaat? Shouldn't the scores only increase as we merge nodes? Well
|
|
|
|
|
// that's almost true. But there is one exception.
|
|
|
|
|
//
|
|
|
|
|
// Suppose we have A->B, B->C, and A->C.
|
|
|
|
|
//
|
|
|
|
|
// The A->C edge is a "transitive" edge. It's ineligible to be merged, as
|
|
|
|
|
// the merge would create a cycle. We score it on the scoreboard like any
|
|
|
|
|
// other edge.
|
|
|
|
|
//
|
|
|
|
|
// However, our "score" estimate for A->C is bogus, because the forward
|
|
|
|
|
// critical path to C and the reverse critical path to A both contain the
|
|
|
|
|
// same node (B) so we overestimate the score of A->C. At first this
|
|
|
|
|
// doesn't matter, since transitive edges aren't eligible to merge anyway.
|
|
|
|
|
//
|
|
|
|
|
// Later, suppose the edge contractor decides to merge the B->C edge, with
|
|
|
|
|
// B donating all its incoming edges into C, say. (So we reach this
|
|
|
|
|
// function.)
|
|
|
|
|
//
|
|
|
|
|
// With B going away, the A->C edge will no longer be transitive and it
|
|
|
|
|
// will become eligible to merge. But if we don't mark it for rescore,
|
|
|
|
|
// it'll stay in the scoreboard with its old (overestimate) score. We'll
|
|
|
|
|
// merge it too late due to the bogus score. When we finally merge it, we
|
|
|
|
|
// fail the assert in the main edge contraction loop which checks that the
|
|
|
|
|
// actual score did not fall below the scoreboard's score.
|
|
|
|
|
//
|
|
|
|
|
// Another way of stating this: this code ensures that scores of
|
|
|
|
|
// non-transitive edges only ever increase.
|
2020-04-15 13:58:34 +02:00
|
|
|
static void partMergeEdgesFrom(V3Graph* mtasksp, LogicMTask* recipientp, LogicMTask* donorp,
|
2018-07-23 02:54:28 +02:00
|
|
|
V3Scoreboard<MergeCandidate, uint32_t>* sbp) {
|
2021-07-11 16:42:32 +02:00
|
|
|
for (const auto& way : {GraphWay::FORWARD, GraphWay::REVERSE}) {
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphEdge* edgep = donorp->beginp(way); edgep; edgep = partBlastEdgep(way, edgep)) {
|
2021-11-03 22:49:19 +01:00
|
|
|
MTaskEdge* const tedgep = MTaskEdge::cast(edgep);
|
2020-04-15 13:58:34 +02:00
|
|
|
if (sbp && !tedgep->removedFromSb()) sbp->removeElem(tedgep);
|
2018-07-23 02:54:28 +02:00
|
|
|
// Existing edge; mark it in need of a rescore
|
|
|
|
|
if (recipientp->hasRelative(way, tedgep->furtherMTaskp(way))) {
|
|
|
|
|
if (sbp) {
|
2021-11-03 22:49:19 +01:00
|
|
|
MTaskEdge* const existMTaskEdgep = MTaskEdge::cast(
|
2020-04-15 13:58:34 +02:00
|
|
|
recipientp->findConnectingEdgep(way, tedgep->furtherMTaskp(way)));
|
2018-07-23 02:54:28 +02:00
|
|
|
UASSERT(existMTaskEdgep, "findConnectingEdge didn't find edge");
|
|
|
|
|
if (!existMTaskEdgep->removedFromSb()) {
|
|
|
|
|
sbp->hintScoreChanged(existMTaskEdgep);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
// No existing edge into *this, make one.
|
|
|
|
|
MTaskEdge* newEdgep;
|
|
|
|
|
if (way == GraphWay::REVERSE) {
|
2020-04-15 13:58:34 +02:00
|
|
|
newEdgep = new MTaskEdge(mtasksp, tedgep->fromMTaskp(), recipientp, 1);
|
2018-07-23 02:54:28 +02:00
|
|
|
} else {
|
2020-04-15 13:58:34 +02:00
|
|
|
newEdgep = new MTaskEdge(mtasksp, recipientp, tedgep->toMTaskp(), 1);
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
if (sbp) sbp->addElem(newEdgep);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//######################################################################
|
|
|
|
|
// PartContraction
|
|
|
|
|
|
|
|
|
|
// Perform edge or sibling contraction on the partition graph
|
2020-11-19 03:32:16 +01:00
|
|
|
class PartContraction final {
|
2018-07-23 02:54:28 +02:00
|
|
|
private:
|
|
|
|
|
// TYPES
|
|
|
|
|
|
|
|
|
|
// TODO: might get a little more speed by making this a
|
2020-08-15 16:03:34 +02:00
|
|
|
// std::unordered_set and defining hash and equal_to functors for the
|
2018-07-23 02:54:28 +02:00
|
|
|
// SiblingMC:
|
2021-03-13 00:10:45 +01:00
|
|
|
using SibSet = std::set<SiblingMC>;
|
|
|
|
|
using SibpSet = std::unordered_set<const SiblingMC*>;
|
|
|
|
|
using MTask2Sibs = std::unordered_map<const LogicMTask*, SibpSet>;
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// New CP information for mtaskp reflecting an upcoming merge
|
|
|
|
|
struct NewCp {
|
|
|
|
|
uint32_t cp;
|
|
|
|
|
uint32_t propagateCp;
|
|
|
|
|
bool propagate;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// MEMBERS
|
|
|
|
|
V3Graph* m_mtasksp; // Mtask graph
|
|
|
|
|
uint32_t m_scoreLimit; // Sloppy score allowed when picking merges
|
2020-08-15 19:11:27 +02:00
|
|
|
uint32_t m_scoreLimitBeforeRescore = 0xffffffff; // Next score rescore at
|
|
|
|
|
unsigned m_mergesSinceRescore = 0; // Merges since last rescore
|
2018-07-23 02:54:28 +02:00
|
|
|
bool m_slowAsserts; // Take extra time to validate algorithm
|
|
|
|
|
V3Scoreboard<MergeCandidate, uint32_t> m_sb; // Scoreboard
|
|
|
|
|
SibSet m_pairs; // Storage for each SiblingMC
|
|
|
|
|
MTask2Sibs m_mtask2sibs; // SiblingMC set for each mtask
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
// CONSTRUCTORS
|
|
|
|
|
PartContraction(V3Graph* mtasksp, uint32_t scoreLimit, bool slowAsserts)
|
2020-08-16 15:55:36 +02:00
|
|
|
: m_mtasksp{mtasksp}
|
|
|
|
|
, m_scoreLimit{scoreLimit}
|
|
|
|
|
, m_slowAsserts{slowAsserts}
|
|
|
|
|
, m_sb{&mergeCandidateScore, slowAsserts} {}
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// METHODS
|
|
|
|
|
void go() {
|
|
|
|
|
unsigned maxMTasks = v3Global.opt.threadsMaxMTasks();
|
|
|
|
|
if (maxMTasks == 0) { // Unspecified so estimate
|
|
|
|
|
if (v3Global.opt.threads() > 1) {
|
2020-04-15 13:58:34 +02:00
|
|
|
maxMTasks = (PART_DEFAULT_MAX_MTASKS_PER_THREAD * v3Global.opt.threads());
|
2018-07-23 02:54:28 +02:00
|
|
|
} else {
|
|
|
|
|
// Running PartContraction with --threads <= 1 means self-test
|
|
|
|
|
maxMTasks = 500;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// OPTIMIZATION PASS: Edge contraction and sibling contraction.
|
|
|
|
|
// - Score each pair of mtasks which is a candidate to merge.
|
|
|
|
|
// * Each edge defines such a candidate pair
|
|
|
|
|
// * Two mtasks that are prereqs or postreqs of a common third
|
|
|
|
|
// vertex are "siblings", these are also a candidate pair.
|
|
|
|
|
// - Build a list of MergeCandidates, sorted by score.
|
|
|
|
|
// - Merge the best pair.
|
|
|
|
|
// - Incrementally recompute critical paths near the merged mtask.
|
|
|
|
|
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphVertex* itp = m_mtasksp->verticesBeginp(); itp; itp = itp->verticesNextp()) {
|
2020-08-15 16:03:34 +02:00
|
|
|
std::unordered_set<const V3GraphVertex*> neighbors;
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphEdge* edgep = itp->outBeginp(); edgep; edgep = edgep->outNextp()) {
|
2018-07-23 02:54:28 +02:00
|
|
|
m_sb.addElem(MTaskEdge::cast(edgep));
|
2021-11-04 00:19:23 +01:00
|
|
|
if (m_slowAsserts) {
|
|
|
|
|
UASSERT_OBJ(neighbors.find(edgep->top()) == neighbors.end(), itp,
|
|
|
|
|
"Redundant edge found in input to PartContraction()");
|
|
|
|
|
}
|
2018-07-23 02:54:28 +02:00
|
|
|
neighbors.insert(edgep->top());
|
|
|
|
|
}
|
|
|
|
|
siblingPairFromRelatives(GraphWay::REVERSE, itp, true);
|
|
|
|
|
siblingPairFromRelatives(GraphWay::FORWARD, itp, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
doRescore(); // Set initial scores in scoreboard
|
|
|
|
|
|
2020-04-04 04:31:54 +02:00
|
|
|
while (true) {
|
2018-07-23 02:54:28 +02:00
|
|
|
// This is the best edge to merge, with the lowest
|
|
|
|
|
// score (shortest local critical path)
|
2021-11-03 22:49:19 +01:00
|
|
|
MergeCandidate* const mergeCanp = const_cast<MergeCandidate*>(m_sb.bestp());
|
2018-07-23 02:54:28 +02:00
|
|
|
if (!mergeCanp) {
|
|
|
|
|
// Scoreboard found no eligible merges. Maybe a rescore
|
|
|
|
|
// will produce some merge-able pairs?
|
|
|
|
|
if (m_sb.needsRescore()) {
|
|
|
|
|
doRescore();
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (m_slowAsserts) {
|
|
|
|
|
UASSERT(!m_sb.needsRescore(mergeCanp),
|
|
|
|
|
"Need-rescore items should not be returned by bestp");
|
|
|
|
|
}
|
2021-11-03 22:49:19 +01:00
|
|
|
const uint32_t cachedScore = m_sb.cachedScore(mergeCanp);
|
|
|
|
|
const uint32_t actualScore = mergeCandidateScore(mergeCanp);
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
if (actualScore > cachedScore) {
|
|
|
|
|
// Cached score is out-of-date.
|
|
|
|
|
// Mark this elem as in need of a rescore and continue.
|
|
|
|
|
m_sb.hintScoreChanged(mergeCanp);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// ... we'll also confirm that actualScore hasn't shrunk relative
|
|
|
|
|
// to cached score, after the mergeWouldCreateCycle() check.
|
|
|
|
|
|
|
|
|
|
if (actualScore > m_scoreLimit) {
|
|
|
|
|
// Our best option isn't good enough
|
|
|
|
|
if (m_sb.needsRescore()) {
|
|
|
|
|
// Some pairs need a rescore, maybe those will be
|
|
|
|
|
// eligible to merge afterward.
|
|
|
|
|
doRescore();
|
|
|
|
|
continue;
|
|
|
|
|
} else {
|
|
|
|
|
// We've exhausted everything below m_scoreLimit; stop.
|
|
|
|
|
|
|
|
|
|
// Except, if we have too many mtasks, raise the score
|
|
|
|
|
// limit and keep going...
|
|
|
|
|
unsigned mtaskCount = 0;
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphVertex* vxp = m_mtasksp->verticesBeginp(); vxp;
|
|
|
|
|
vxp = vxp->verticesNextp()) {
|
2018-07-23 02:54:28 +02:00
|
|
|
++mtaskCount;
|
|
|
|
|
}
|
|
|
|
|
if (mtaskCount > maxMTasks) {
|
|
|
|
|
uint32_t oldLimit = m_scoreLimit;
|
|
|
|
|
m_scoreLimit = (m_scoreLimit * 120) / 100;
|
2018-11-17 02:48:57 +01:00
|
|
|
v3Global.rootp()->fileline()->v3warn(
|
2020-04-15 13:58:34 +02:00
|
|
|
UNOPTTHREADS, "Thread scheduler is unable to provide requested "
|
2020-04-29 03:15:27 +02:00
|
|
|
"parallelism; suggest asking for fewer threads.");
|
2020-04-15 13:58:34 +02:00
|
|
|
UINFO(1, "Critical path limit was=" << oldLimit << " now=" << m_scoreLimit
|
|
|
|
|
<< endl);
|
2018-07-23 02:54:28 +02:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// Really stop
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (actualScore > m_scoreLimitBeforeRescore) {
|
|
|
|
|
// Time to rescore, that will result in a higher
|
|
|
|
|
// scoreLimitBeforeRescore, and possibly lower-scoring
|
|
|
|
|
// elements returned from bestp().
|
|
|
|
|
doRescore();
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Avoid merging any edge that would create a cycle.
|
|
|
|
|
//
|
|
|
|
|
// For example suppose we begin with vertices A, B, C and edges
|
|
|
|
|
// A->B, B->C, A->C.
|
|
|
|
|
//
|
|
|
|
|
// Suppose we want to merge A->C into a single vertex.
|
|
|
|
|
// New edges would be AC->B and B->AC which is not a DAG.
|
|
|
|
|
// Do not allow this.
|
|
|
|
|
if (mergeCanp->mergeWouldCreateCycle()) {
|
|
|
|
|
// Remove this edge from scoreboard so we don't keep
|
|
|
|
|
// reconsidering it on every loop.
|
|
|
|
|
m_sb.removeElem(mergeCanp);
|
|
|
|
|
mergeCanp->removedFromSb(true);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
partCheckCachedScoreVsActual(cachedScore, actualScore);
|
|
|
|
|
|
|
|
|
|
// Finally there's no cycle risk, no need to rescore, we're
|
|
|
|
|
// within m_scoreLimit and m_scoreLimitBeforeRescore.
|
|
|
|
|
// This is the edge to merge.
|
|
|
|
|
//
|
|
|
|
|
// Bookkeeping: if this is the first edge we'll merge since
|
|
|
|
|
// the last rescore, compute the new m_scoreLimitBeforeRescore
|
|
|
|
|
// to be somewhat higher than this edge's score.
|
|
|
|
|
if (m_mergesSinceRescore == 0) {
|
|
|
|
|
#if PART_STEPPED_RESCORELIMIT
|
|
|
|
|
m_scoreLimitBeforeRescore = (actualScore * 105) / 100;
|
|
|
|
|
#else
|
|
|
|
|
m_scoreLimitBeforeRescore = actualScore;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
// This print can serve as a progress indicator, as it
|
|
|
|
|
// increases from low numbers up toward cpLimit. It may be
|
|
|
|
|
// helpful to see progress during slow partitions. Maybe
|
|
|
|
|
// display something by default even?
|
2020-04-15 13:58:34 +02:00
|
|
|
UINFO(6, "New scoreLimitBeforeRescore: " << m_scoreLimitBeforeRescore << endl);
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Finally merge this candidate.
|
|
|
|
|
contract(mergeCanp);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
2020-04-15 13:58:34 +02:00
|
|
|
NewCp newCp(GraphWay way, LogicMTask* mtaskp, LogicMTask* otherp, MTaskEdge* mergeEdgep) {
|
2018-07-23 02:54:28 +02:00
|
|
|
// Return new wayward-CP for mtaskp reflecting its upcoming merge
|
|
|
|
|
// with otherp. Set 'result.propagate' if mtaskp's wayward
|
|
|
|
|
// relatives will see a new wayward CP from this merge.
|
|
|
|
|
uint32_t newCp;
|
|
|
|
|
if (mergeEdgep) {
|
|
|
|
|
if (mtaskp == mergeEdgep->furtherp(way)) {
|
|
|
|
|
newCp = std::max(otherp->critPathCost(way),
|
|
|
|
|
mtaskp->critPathCostWithout(way, mergeEdgep));
|
|
|
|
|
} else {
|
|
|
|
|
newCp = std::max(mtaskp->critPathCost(way),
|
|
|
|
|
otherp->critPathCostWithout(way, mergeEdgep));
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
newCp = std::max(otherp->critPathCost(way), mtaskp->critPathCost(way));
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-03 22:49:19 +01:00
|
|
|
const uint32_t origRelativesCp = mtaskp->critPathCost(way) + mtaskp->stepCost();
|
|
|
|
|
const uint32_t newRelativesCp
|
|
|
|
|
= newCp + LogicMTask::stepCost(mtaskp->cost() + otherp->cost());
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
NewCp result;
|
|
|
|
|
result.cp = newCp;
|
|
|
|
|
result.propagate = (newRelativesCp > origRelativesCp);
|
|
|
|
|
result.propagateCp = newRelativesCp;
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void removeSiblingMCsWith(LogicMTask* mtaskp) {
|
2020-04-15 13:58:34 +02:00
|
|
|
for (SibpSet::iterator it = m_mtask2sibs[mtaskp].begin(); it != m_mtask2sibs[mtaskp].end();
|
|
|
|
|
++it) {
|
2021-11-03 22:49:19 +01:00
|
|
|
const SiblingMC* const pairp = *it;
|
2021-02-22 03:25:21 +01:00
|
|
|
if (!pairp->removedFromSb()) m_sb.removeElem(pairp);
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const otherp = (pairp->bp() == mtaskp) ? pairp->ap() : pairp->bp();
|
2018-07-23 02:54:28 +02:00
|
|
|
size_t erased = m_mtask2sibs[otherp].erase(pairp);
|
2019-07-06 18:57:50 +02:00
|
|
|
UASSERT_OBJ(erased > 0, otherp, "Expected existing mtask");
|
2018-07-23 02:54:28 +02:00
|
|
|
erased = m_pairs.erase(*pairp);
|
2019-07-06 18:57:50 +02:00
|
|
|
UASSERT_OBJ(erased > 0, mtaskp, "Expected existing mtask");
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
2021-06-21 00:32:57 +02:00
|
|
|
const size_t erased = m_mtask2sibs.erase(mtaskp);
|
2019-07-06 18:57:50 +02:00
|
|
|
UASSERT_OBJ(erased > 0, mtaskp, "Expected existing mtask");
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void contract(MergeCandidate* mergeCanp) {
|
2020-08-15 16:12:55 +02:00
|
|
|
LogicMTask* top = nullptr;
|
|
|
|
|
LogicMTask* fromp = nullptr;
|
2018-07-23 02:54:28 +02:00
|
|
|
MTaskEdge* mergeEdgep = dynamic_cast<MTaskEdge*>(mergeCanp);
|
2020-08-15 16:12:55 +02:00
|
|
|
SiblingMC* mergeSibsp = nullptr;
|
2018-07-23 02:54:28 +02:00
|
|
|
if (mergeEdgep) {
|
|
|
|
|
top = dynamic_cast<LogicMTask*>(mergeEdgep->top());
|
|
|
|
|
fromp = dynamic_cast<LogicMTask*>(mergeEdgep->fromp());
|
|
|
|
|
} else {
|
|
|
|
|
mergeSibsp = dynamic_cast<SiblingMC*>(mergeCanp);
|
2020-04-15 13:58:34 +02:00
|
|
|
UASSERT(mergeSibsp, "Failed to cast mergeCanp to either MTaskEdge or SiblingMC");
|
2018-07-23 02:54:28 +02:00
|
|
|
top = mergeSibsp->ap();
|
|
|
|
|
fromp = mergeSibsp->bp();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Merge the smaller mtask into the larger mtask. If one of them
|
|
|
|
|
// is much larger, this will save time in partMergeEdgesFrom().
|
|
|
|
|
// Assume the more costly mtask has more edges.
|
|
|
|
|
//
|
|
|
|
|
// [TODO: now that we have edge maps, we could count the edges
|
|
|
|
|
// exactly without a linear search.]
|
|
|
|
|
LogicMTask* recipientp;
|
|
|
|
|
LogicMTask* donorp;
|
|
|
|
|
if (fromp->cost() > top->cost()) {
|
|
|
|
|
recipientp = fromp;
|
|
|
|
|
donorp = top;
|
|
|
|
|
} else {
|
|
|
|
|
donorp = fromp;
|
|
|
|
|
recipientp = top;
|
|
|
|
|
}
|
2020-04-15 13:58:34 +02:00
|
|
|
VL_DANGLING(fromp);
|
|
|
|
|
VL_DANGLING(top); // Use donorp and recipientp now instead
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// Recursively update forward and reverse CP numbers.
|
|
|
|
|
//
|
|
|
|
|
// Doing this before merging the mtasks lets us often avoid
|
|
|
|
|
// recursing through either incoming or outgoing edges on one or
|
|
|
|
|
// both mtasks.
|
|
|
|
|
//
|
|
|
|
|
// These 'NewCp' objects carry a bit indicating whether we must
|
|
|
|
|
// propagate CP for each of the four cases:
|
2021-06-21 00:32:57 +02:00
|
|
|
const NewCp recipientNewCpFwd = newCp(GraphWay::FORWARD, recipientp, donorp, mergeEdgep);
|
|
|
|
|
const NewCp donorNewCpFwd = newCp(GraphWay::FORWARD, donorp, recipientp, mergeEdgep);
|
|
|
|
|
const NewCp recipientNewCpRev = newCp(GraphWay::REVERSE, recipientp, donorp, mergeEdgep);
|
|
|
|
|
const NewCp donorNewCpRev = newCp(GraphWay::REVERSE, donorp, recipientp, mergeEdgep);
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
if (mergeEdgep) {
|
|
|
|
|
// Remove and free the connecting edge. Must do this before
|
|
|
|
|
// propagating CP's below.
|
|
|
|
|
m_sb.removeElem(mergeCanp);
|
2020-08-15 16:12:55 +02:00
|
|
|
VL_DO_CLEAR(mergeEdgep->unlinkDelete(), mergeEdgep = nullptr);
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// This also updates cost and stepCost on recipientp
|
|
|
|
|
recipientp->moveAllVerticesFrom(donorp);
|
|
|
|
|
|
2020-04-15 13:58:34 +02:00
|
|
|
UINFO(9, "recipient = " << recipientp->id() << ", donor = " << donorp->id()
|
|
|
|
|
<< ", mergeEdgep = " << mergeEdgep << "\n"
|
|
|
|
|
<< "recipientNewCpFwd = " << recipientNewCpFwd.cp
|
|
|
|
|
<< (recipientNewCpFwd.propagate ? " true " : " false ")
|
|
|
|
|
<< recipientNewCpFwd.propagateCp << "\n"
|
|
|
|
|
<< "donorNewCpFwd = " << donorNewCpFwd.cp
|
|
|
|
|
<< (donorNewCpFwd.propagate ? " true " : " false ")
|
|
|
|
|
<< donorNewCpFwd.propagateCp << endl);
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
LogicMTask::CpCostAccessor cpAccess;
|
2020-04-15 13:58:34 +02:00
|
|
|
PartPropagateCp<LogicMTask::CpCostAccessor> forwardPropagator(m_mtasksp, GraphWay::FORWARD,
|
|
|
|
|
&cpAccess, m_slowAsserts);
|
|
|
|
|
PartPropagateCp<LogicMTask::CpCostAccessor> reversePropagator(m_mtasksp, GraphWay::REVERSE,
|
|
|
|
|
&cpAccess, m_slowAsserts);
|
2018-07-23 02:54:28 +02:00
|
|
|
|
2020-04-15 13:58:34 +02:00
|
|
|
recipientp->setCritPathCost(GraphWay::FORWARD, recipientNewCpFwd.cp);
|
2018-07-23 02:54:28 +02:00
|
|
|
if (recipientNewCpFwd.propagate) {
|
|
|
|
|
forwardPropagator.cpHasIncreased(recipientp, recipientNewCpFwd.propagateCp);
|
|
|
|
|
}
|
2020-04-15 13:58:34 +02:00
|
|
|
recipientp->setCritPathCost(GraphWay::REVERSE, recipientNewCpRev.cp);
|
2018-07-23 02:54:28 +02:00
|
|
|
if (recipientNewCpRev.propagate) {
|
|
|
|
|
reversePropagator.cpHasIncreased(recipientp, recipientNewCpRev.propagateCp);
|
|
|
|
|
}
|
|
|
|
|
if (donorNewCpFwd.propagate) {
|
|
|
|
|
forwardPropagator.cpHasIncreased(donorp, donorNewCpFwd.propagateCp);
|
|
|
|
|
}
|
|
|
|
|
if (donorNewCpRev.propagate) {
|
|
|
|
|
reversePropagator.cpHasIncreased(donorp, donorNewCpRev.propagateCp);
|
|
|
|
|
}
|
|
|
|
|
forwardPropagator.go();
|
|
|
|
|
reversePropagator.go();
|
|
|
|
|
|
|
|
|
|
// Remove all SiblingMCs that include donorp. This Includes the one
|
|
|
|
|
// we're merging, if we're merging a SiblingMC.
|
|
|
|
|
removeSiblingMCsWith(donorp);
|
|
|
|
|
// Remove all SiblingMCs that include recipientp also, so we can't
|
|
|
|
|
// get huge numbers of SiblingMCs. We'll recreate them below, up
|
|
|
|
|
// to a bounded number.
|
|
|
|
|
removeSiblingMCsWith(recipientp);
|
|
|
|
|
|
|
|
|
|
// Merge all edges
|
|
|
|
|
partMergeEdgesFrom(m_mtasksp, recipientp, donorp, &m_sb);
|
|
|
|
|
|
|
|
|
|
// Delete the donorp mtask from the graph
|
2020-08-15 16:12:55 +02:00
|
|
|
VL_DO_CLEAR(donorp->unlinkDelete(m_mtasksp), donorp = nullptr);
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
m_mergesSinceRescore++;
|
|
|
|
|
|
|
|
|
|
// Do an expensive check, confirm we haven't botched the CP
|
|
|
|
|
// updates.
|
|
|
|
|
if (m_slowAsserts) partCheckCriticalPaths(m_mtasksp);
|
|
|
|
|
|
|
|
|
|
// Finally, make new sibling pairs as needed:
|
|
|
|
|
// - prereqs and postreqs of recipientp
|
|
|
|
|
// - prereqs of recipientp's postreqs
|
|
|
|
|
// - postreqs of recipientp's prereqs
|
|
|
|
|
// Note that this depends on the updated critical paths (above).
|
|
|
|
|
siblingPairFromRelatives(GraphWay::REVERSE, recipientp, true);
|
|
|
|
|
siblingPairFromRelatives(GraphWay::FORWARD, recipientp, true);
|
|
|
|
|
unsigned edges = 0;
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphEdge* edgep = recipientp->outBeginp(); edgep; edgep = edgep->outNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const postreqp = dynamic_cast<LogicMTask*>(edgep->top());
|
2018-07-23 02:54:28 +02:00
|
|
|
siblingPairFromRelatives(GraphWay::REVERSE, postreqp, false);
|
|
|
|
|
edges++;
|
|
|
|
|
if (edges > PART_SIBLING_EDGE_LIMIT) break;
|
|
|
|
|
}
|
|
|
|
|
edges = 0;
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphEdge* edgep = recipientp->inBeginp(); edgep; edgep = edgep->inNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const prereqp = dynamic_cast<LogicMTask*>(edgep->fromp());
|
2018-07-23 02:54:28 +02:00
|
|
|
siblingPairFromRelatives(GraphWay::FORWARD, prereqp, false);
|
|
|
|
|
edges++;
|
|
|
|
|
if (edges > PART_SIBLING_EDGE_LIMIT) break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void doRescore() {
|
|
|
|
|
// During rescore, we know that graph isn't changing, so allow
|
|
|
|
|
// the critPathCost*Without() routines to cache some data in
|
|
|
|
|
// each LogicMTask. This is just an optimization, things should
|
|
|
|
|
// behave identically without the caching (just slower)
|
|
|
|
|
|
|
|
|
|
m_sb.rescore();
|
2020-04-15 13:58:34 +02:00
|
|
|
UINFO(6, "Did rescore. Merges since previous = " << m_mergesSinceRescore << endl);
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
m_mergesSinceRescore = 0;
|
|
|
|
|
m_scoreLimitBeforeRescore = 0xffffffff;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static uint32_t mergeCandidateScore(const MergeCandidate* pairp) {
|
2021-11-04 00:19:23 +01:00
|
|
|
if (const MTaskEdge* const edgep = dynamic_cast<const MTaskEdge*>(pairp)) {
|
2018-07-23 02:54:28 +02:00
|
|
|
// The '1 +' favors merging a SiblingMC over an otherwise-
|
|
|
|
|
// equal-scoring MTaskEdge. The comment on selfTest() talks
|
|
|
|
|
// about why.
|
|
|
|
|
return 1 + edgeScore(edgep);
|
|
|
|
|
}
|
2021-11-04 00:19:23 +01:00
|
|
|
if (const SiblingMC* const sibsp = dynamic_cast<const SiblingMC*>(pairp)) {
|
|
|
|
|
return siblingScore(sibsp);
|
|
|
|
|
}
|
2018-07-23 02:54:28 +02:00
|
|
|
v3fatalSrc("Failed to cast pairp to either MTaskEdge or SiblingMC in mergeCandidateScore");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static uint32_t siblingScore(const SiblingMC* sibsp) {
|
2021-11-03 22:49:19 +01:00
|
|
|
const LogicMTask* const ap = sibsp->ap();
|
|
|
|
|
const LogicMTask* const bp = sibsp->bp();
|
|
|
|
|
const uint32_t mergedCpCostFwd
|
2020-04-15 13:58:34 +02:00
|
|
|
= std::max(ap->critPathCost(GraphWay::FORWARD), bp->critPathCost(GraphWay::FORWARD));
|
2021-11-03 22:49:19 +01:00
|
|
|
const uint32_t mergedCpCostRev
|
2020-04-15 13:58:34 +02:00
|
|
|
= std::max(ap->critPathCost(GraphWay::REVERSE), bp->critPathCost(GraphWay::REVERSE));
|
|
|
|
|
return mergedCpCostRev + mergedCpCostFwd + LogicMTask::stepCost(ap->cost() + bp->cost());
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static uint32_t edgeScore(const V3GraphEdge* edgep) {
|
|
|
|
|
// Score this edge. Lower is better. The score is the new local CP
|
|
|
|
|
// length if we merge these mtasks. ("Local" means the longest
|
|
|
|
|
// critical path running through the merged node.)
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const top = dynamic_cast<LogicMTask*>(edgep->top());
|
|
|
|
|
LogicMTask* const fromp = dynamic_cast<LogicMTask*>(edgep->fromp());
|
|
|
|
|
const uint32_t mergedCpCostFwd
|
|
|
|
|
= std::max(fromp->critPathCost(GraphWay::FORWARD),
|
|
|
|
|
top->critPathCostWithout(GraphWay::FORWARD, edgep));
|
|
|
|
|
const uint32_t mergedCpCostRev
|
|
|
|
|
= std::max(fromp->critPathCostWithout(GraphWay::REVERSE, edgep),
|
|
|
|
|
top->critPathCost(GraphWay::REVERSE));
|
2018-07-23 02:54:28 +02:00
|
|
|
return mergedCpCostRev + mergedCpCostFwd
|
2020-04-15 13:58:34 +02:00
|
|
|
+ LogicMTask::stepCost(fromp->cost() + top->cost());
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
|
2020-04-15 13:58:34 +02:00
|
|
|
void makeSiblingMC(LogicMTask* ap, LogicMTask* bp) {
|
2018-07-23 02:54:28 +02:00
|
|
|
SiblingMC newSibs(ap, bp);
|
|
|
|
|
std::pair<SibSet::iterator, bool> insertResult = m_pairs.insert(newSibs);
|
|
|
|
|
if (insertResult.second) {
|
2021-11-03 22:49:19 +01:00
|
|
|
const SiblingMC* const newSibsp = &(*insertResult.first);
|
2018-07-23 02:54:28 +02:00
|
|
|
m_mtask2sibs[ap].insert(newSibsp);
|
|
|
|
|
m_mtask2sibs[bp].insert(newSibsp);
|
|
|
|
|
m_sb.addElem(newSibsp);
|
|
|
|
|
} else if (m_slowAsserts) {
|
|
|
|
|
// It's fine if we already have this SiblingMC, we may have
|
|
|
|
|
// created it earlier. Just confirm that we have associated data.
|
2020-04-15 13:58:34 +02:00
|
|
|
UASSERT_OBJ(m_mtask2sibs.find(ap) != m_mtask2sibs.end(), ap, "Sibling not found");
|
|
|
|
|
UASSERT_OBJ(m_mtask2sibs.find(bp) != m_mtask2sibs.end(), bp, "Sibling not found");
|
2018-07-23 02:54:28 +02:00
|
|
|
bool found = false;
|
2020-04-15 13:58:34 +02:00
|
|
|
for (SibpSet::iterator it = m_mtask2sibs[ap].begin(); it != m_mtask2sibs[ap].end();
|
|
|
|
|
++it) {
|
2021-11-03 22:49:19 +01:00
|
|
|
const SiblingMC* const sibsp = *it;
|
2019-07-06 18:57:50 +02:00
|
|
|
UASSERT_OBJ(!(!sibsp->removedFromSb() && !m_sb.contains(sibsp)), ap,
|
|
|
|
|
"One sibling must be the one we collided with");
|
2020-04-15 13:58:34 +02:00
|
|
|
if ((sibsp->ap() == ap && sibsp->bp() == bp)
|
2018-07-23 02:54:28 +02:00
|
|
|
|| (sibsp->bp() == ap && sibsp->ap() == bp))
|
|
|
|
|
found = true;
|
|
|
|
|
}
|
2019-07-06 18:57:50 +02:00
|
|
|
UASSERT_OBJ(found, ap, "Sibling not found");
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
2020-02-04 04:10:29 +01:00
|
|
|
}
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
static const GraphWay* s_shortestWaywardCpInclusiveWay;
|
|
|
|
|
static int shortestWaywardCpInclusive(const void* vap, const void* vbp) {
|
2021-11-03 22:49:19 +01:00
|
|
|
const GraphWay* const wp = s_shortestWaywardCpInclusiveWay;
|
|
|
|
|
const LogicMTask* const ap = *reinterpret_cast<const LogicMTask* const*>(vap);
|
|
|
|
|
const LogicMTask* const bp = *reinterpret_cast<const LogicMTask* const*>(vbp);
|
|
|
|
|
const uint32_t aCp = ap->critPathCost(*wp) + ap->stepCost();
|
|
|
|
|
const uint32_t bCp = bp->critPathCost(*wp) + bp->stepCost();
|
2021-02-22 03:25:21 +01:00
|
|
|
if (aCp < bCp) return -1;
|
|
|
|
|
if (aCp > bCp) return 1;
|
|
|
|
|
if (ap->id() < bp->id()) return -1;
|
|
|
|
|
if (ap->id() > bp->id()) return 1;
|
2018-07-23 02:54:28 +02:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-15 13:58:34 +02:00
|
|
|
void siblingPairFromRelatives(GraphWay way, V3GraphVertex* mtaskp, bool exhaustive) {
|
2018-07-23 02:54:28 +02:00
|
|
|
std::vector<LogicMTask*> shortestPrereqs;
|
|
|
|
|
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphEdge* edgep = mtaskp->beginp(way); edgep; edgep = edgep->nextp(way)) {
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const prereqp = dynamic_cast<LogicMTask*>(edgep->furtherp(way));
|
2018-07-23 02:54:28 +02:00
|
|
|
shortestPrereqs.push_back(prereqp);
|
|
|
|
|
// Prevent nodes with huge numbers of edges from massively
|
|
|
|
|
// slowing down the partitioner:
|
|
|
|
|
if (shortestPrereqs.size() > PART_SIBLING_EDGE_LIMIT) break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (shortestPrereqs.empty()) return;
|
|
|
|
|
|
|
|
|
|
// qsort_r would be nice here, but it isn't portable
|
|
|
|
|
s_shortestWaywardCpInclusiveWay = &way;
|
2020-04-15 13:58:34 +02:00
|
|
|
qsort(&shortestPrereqs[0], shortestPrereqs.size(), sizeof(LogicMTask*),
|
|
|
|
|
&shortestWaywardCpInclusive);
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// Don't make all NxN/2 possible pairs of prereqs, that's a lot
|
|
|
|
|
// to cart around. Just make a few pairs.
|
2020-08-16 17:43:49 +02:00
|
|
|
auto it = shortestPrereqs.cbegin();
|
2018-07-23 02:54:28 +02:00
|
|
|
for (unsigned i = 0; exhaustive || (i < 3); ++i) {
|
2020-08-28 00:48:26 +02:00
|
|
|
if (it == shortestPrereqs.cend()) break;
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const ap = *(it++);
|
2020-08-28 00:48:26 +02:00
|
|
|
if (it == shortestPrereqs.cend()) break;
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const bp = *(it++);
|
2018-07-23 02:54:28 +02:00
|
|
|
makeSiblingMC(ap, bp);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// SELF TESTS
|
|
|
|
|
|
|
|
|
|
// This is a performance test, its intent is to demonstrate that the
|
|
|
|
|
// partitioner doesn't run on this chain in N^2 time or worse. Overall
|
|
|
|
|
// runtime should be N*log(N) for a chain-shaped graph.
|
|
|
|
|
//
|
|
|
|
|
static void selfTestChain() {
|
2021-11-03 22:49:19 +01:00
|
|
|
const vluint64_t usecsSmall = partitionChainUsecs(5);
|
|
|
|
|
const vluint64_t usecsLarge = partitionChainUsecs(500);
|
2018-07-23 02:54:28 +02:00
|
|
|
// Large input is 50x bigger than small input.
|
|
|
|
|
// Its runtime should be about 10x longer -- not about 2500x longer
|
|
|
|
|
// or worse which would suggest N^2 scaling or worse.
|
|
|
|
|
UASSERT(usecsLarge < (usecsSmall * 1500),
|
|
|
|
|
"selfTestChain() took longer than expected. Small input runtime = "
|
2020-04-15 13:58:34 +02:00
|
|
|
<< usecsSmall << ", large input runtime = " << usecsLarge);
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static vluint64_t partitionChainUsecs(unsigned chain_len) {
|
|
|
|
|
// NOTE: To get a dot file run with --debugi-V3Partition 4 or more.
|
|
|
|
|
vluint64_t startUsecs = V3Os::timeUsecs();
|
|
|
|
|
V3Graph mtasks;
|
2020-08-15 16:12:55 +02:00
|
|
|
LogicMTask* lastp = nullptr;
|
2020-04-15 13:58:34 +02:00
|
|
|
for (unsigned i = 0; i < chain_len; ++i) {
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const mtp = new LogicMTask(&mtasks, nullptr);
|
2018-07-23 02:54:28 +02:00
|
|
|
mtp->setCost(1);
|
2021-02-22 03:25:21 +01:00
|
|
|
if (lastp) new MTaskEdge(&mtasks, lastp, mtp, 1);
|
2018-07-23 02:54:28 +02:00
|
|
|
lastp = mtp;
|
|
|
|
|
}
|
|
|
|
|
partInitCriticalPaths(&mtasks);
|
|
|
|
|
|
|
|
|
|
// Since slowAsserts mode is *expected* to cause N^2 runtime, and the
|
|
|
|
|
// intent of this test is to demonstrate better-than-N^2 runtime, disable
|
|
|
|
|
// slowAsserts.
|
|
|
|
|
PartContraction ec(&mtasks,
|
|
|
|
|
// Any CP limit >chain_len should work:
|
2020-04-15 13:58:34 +02:00
|
|
|
chain_len * 2, false /* slowAsserts */);
|
2018-07-23 02:54:28 +02:00
|
|
|
ec.go();
|
|
|
|
|
|
|
|
|
|
PartParallelismEst check(&mtasks);
|
|
|
|
|
check.traverse();
|
|
|
|
|
|
2021-11-03 22:49:19 +01:00
|
|
|
const vluint64_t endUsecs = V3Os::timeUsecs();
|
|
|
|
|
const vluint64_t elapsedUsecs = endUsecs - startUsecs;
|
2018-07-23 02:54:28 +02:00
|
|
|
|
2020-04-15 13:58:34 +02:00
|
|
|
if (debug() >= 6) {
|
2018-07-23 02:54:28 +02:00
|
|
|
UINFO(0, "Chain self test stats:\n");
|
|
|
|
|
check.debugReport();
|
|
|
|
|
UINFO(0, "Elapsed usecs = " << elapsedUsecs << "\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// All vertices should merge into one
|
|
|
|
|
UASSERT_SELFTEST(size_t, check.vertexCount(), 1);
|
|
|
|
|
return elapsedUsecs;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// This test defends against a particular failure mode that the
|
|
|
|
|
// partitioner exhibited during development:
|
|
|
|
|
//
|
|
|
|
|
// At one time, the partitioner consistently favored edge-merges over
|
|
|
|
|
// equal-scoring sibling merges. Every edge and sibling merge in this
|
|
|
|
|
// test starts out with an equal score. If you only do edge-merges, all
|
|
|
|
|
// possible merges will continue to have equal score as the center node
|
|
|
|
|
// grows and grows. Soon the critical path budget is exhausted by a
|
|
|
|
|
// large center node, and we still have many small leaf nodes -- it's
|
|
|
|
|
// literally the worst partition possible.
|
|
|
|
|
//
|
|
|
|
|
// Now, instead, the partitioner gives slight favoritism to sibling
|
|
|
|
|
// merges in the event that scores are tied. This is better for the
|
|
|
|
|
// test and also real designs.
|
|
|
|
|
static void selfTestX() {
|
|
|
|
|
// NOTE: To get a dot file run with --debugi-V3Partition 4 or more.
|
|
|
|
|
V3Graph mtasks;
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const centerp = new LogicMTask(&mtasks, nullptr);
|
|
|
|
|
centerp->setCost(1);
|
2018-07-23 02:54:28 +02:00
|
|
|
unsigned i;
|
2020-04-15 13:58:34 +02:00
|
|
|
for (i = 0; i < 50; ++i) {
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const mtp = new LogicMTask(&mtasks, nullptr);
|
2018-07-23 02:54:28 +02:00
|
|
|
mtp->setCost(1);
|
2021-11-03 22:49:19 +01:00
|
|
|
// Edge from every input -> centerp
|
|
|
|
|
new MTaskEdge(&mtasks, mtp, centerp, 1);
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
2020-04-15 13:58:34 +02:00
|
|
|
for (i = 0; i < 50; ++i) {
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const mtp = new LogicMTask(&mtasks, nullptr);
|
2018-07-23 02:54:28 +02:00
|
|
|
mtp->setCost(1);
|
2021-11-03 22:49:19 +01:00
|
|
|
// Edge from centerp -> every output
|
|
|
|
|
new MTaskEdge(&mtasks, centerp, mtp, 1);
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
partInitCriticalPaths(&mtasks);
|
|
|
|
|
PartContraction(&mtasks, 20, true).go();
|
|
|
|
|
|
|
|
|
|
PartParallelismEst check(&mtasks);
|
|
|
|
|
check.traverse();
|
|
|
|
|
|
|
|
|
|
// Checking exact values here is maybe overly precise. What we're
|
|
|
|
|
// mostly looking for is a healthy reduction in the number of
|
|
|
|
|
// mtasks.
|
2020-04-15 13:58:34 +02:00
|
|
|
if (debug() >= 5) {
|
2018-07-23 02:54:28 +02:00
|
|
|
UINFO(0, "X self test stats:\n");
|
|
|
|
|
check.debugReport();
|
|
|
|
|
}
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, check.longestCritPathCost(), 19);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, check.totalGraphCost(), 101);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, check.vertexCount(), 14);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, check.edgeCount(), 13);
|
|
|
|
|
}
|
2020-04-15 13:58:34 +02:00
|
|
|
|
2018-07-23 02:54:28 +02:00
|
|
|
public:
|
|
|
|
|
static void selfTest() {
|
|
|
|
|
selfTestX();
|
|
|
|
|
selfTestChain();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
VL_DEBUG_FUNC; // Declare debug()
|
|
|
|
|
VL_UNCOPYABLE(PartContraction);
|
|
|
|
|
};
|
|
|
|
|
|
2020-08-15 16:12:55 +02:00
|
|
|
const GraphWay* PartContraction::s_shortestWaywardCpInclusiveWay = nullptr;
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
//######################################################################
|
|
|
|
|
// DpiImportCallVisitor
|
|
|
|
|
|
|
|
|
|
// Scan node, indicate whether it contains a call to a DPI imported
|
|
|
|
|
// routine.
|
2020-11-19 03:32:16 +01:00
|
|
|
class DpiImportCallVisitor final : public AstNVisitor {
|
2018-07-23 02:54:28 +02:00
|
|
|
private:
|
2020-08-15 19:11:27 +02:00
|
|
|
bool m_hasDpiHazard = false; // Found a DPI import call.
|
|
|
|
|
bool m_tracingCall = false; // Iterating into a CCall to a CFunc
|
2018-07-23 02:54:28 +02:00
|
|
|
// METHODS
|
|
|
|
|
VL_DEBUG_FUNC;
|
|
|
|
|
|
2020-08-15 16:03:34 +02:00
|
|
|
virtual void visit(AstCFunc* nodep) override {
|
2018-07-23 02:54:28 +02:00
|
|
|
if (!m_tracingCall) return;
|
|
|
|
|
m_tracingCall = false;
|
|
|
|
|
if (nodep->dpiImportWrapper()) {
|
|
|
|
|
if (nodep->pure() ? !v3Global.opt.threadsDpiPure()
|
2020-04-15 13:58:34 +02:00
|
|
|
: !v3Global.opt.threadsDpiUnpure()) {
|
2018-07-23 02:54:28 +02:00
|
|
|
m_hasDpiHazard = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
iterateChildren(nodep);
|
|
|
|
|
}
|
2020-08-15 16:03:34 +02:00
|
|
|
virtual void visit(AstNodeCCall* nodep) override {
|
2018-07-23 02:54:28 +02:00
|
|
|
iterateChildren(nodep);
|
|
|
|
|
// Enter the function and trace it
|
|
|
|
|
m_tracingCall = true;
|
|
|
|
|
iterate(nodep->funcp());
|
|
|
|
|
}
|
2020-08-15 16:03:34 +02:00
|
|
|
virtual void visit(AstNode* nodep) override { iterateChildren(nodep); }
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
public:
|
2019-09-12 13:22:22 +02:00
|
|
|
// CONSTRUCTORS
|
2020-08-15 19:11:27 +02:00
|
|
|
explicit DpiImportCallVisitor(AstNode* nodep) { iterate(nodep); }
|
2018-07-23 02:54:28 +02:00
|
|
|
bool hasDpiHazard() const { return m_hasDpiHazard; }
|
2020-11-17 01:56:16 +01:00
|
|
|
virtual ~DpiImportCallVisitor() override = default;
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
VL_UNCOPYABLE(DpiImportCallVisitor);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
//######################################################################
|
|
|
|
|
// PartFixDataHazards
|
|
|
|
|
|
|
|
|
|
// Fix data hazards in the partition graph.
|
|
|
|
|
//
|
|
|
|
|
// The fine-grained graph from V3Order may contain data hazards which are
|
|
|
|
|
// not a problem for serial mode, but which would be a problem in parallel
|
|
|
|
|
// mode.
|
|
|
|
|
//
|
|
|
|
|
// There are basically two classes: unordered pairs of writes, and
|
|
|
|
|
// unordered write-read pairs. We fix both here, with a combination of
|
|
|
|
|
// MTask-merges and new edges to ensure no such unordered pairs remain.
|
|
|
|
|
//
|
|
|
|
|
// ABOUT UNORDERED WRITE-WRITE PAIRS
|
|
|
|
|
//
|
|
|
|
|
// The V3Order dependency graph treats these as unordered events:
|
|
|
|
|
//
|
|
|
|
|
// a) sig[15:8] = stuff;
|
|
|
|
|
// ...
|
|
|
|
|
// b) sig[7:0] = other_stuff;
|
|
|
|
|
//
|
|
|
|
|
// Seems OK right? They are writes to disjoint bits of the same
|
|
|
|
|
// signal. They can run in either order, in serial mode, and the result
|
|
|
|
|
// will be the same.
|
|
|
|
|
//
|
|
|
|
|
// The resulting C code for each of this isn't a pure write, it's
|
|
|
|
|
// actually an R-M-W sequence:
|
|
|
|
|
//
|
|
|
|
|
// a) sig = (sig & 0xff) | (0xff00 & (stuff << 8));
|
|
|
|
|
// ...
|
|
|
|
|
// b) sig = (sig & 0xff00) | (0xff & other_stuff);
|
|
|
|
|
//
|
|
|
|
|
// In serial mode, order doesn't matter so long as these run serially.
|
|
|
|
|
// In parallel mode, we must serialize these RMW's to avoid a race.
|
|
|
|
|
//
|
|
|
|
|
// We don't actually check here if each write would involve an R-M-W, we
|
|
|
|
|
// just assume that it would. If this routine ever causes a drastic
|
|
|
|
|
// increase in critical path, it could be optimized to make a better
|
|
|
|
|
// prediction (with all the risk that word implies!) about whether a
|
|
|
|
|
// given write is likely to turn into an R-M-W.
|
|
|
|
|
//
|
|
|
|
|
// ABOUT UNORDERED WRITE-READ PAIRS
|
|
|
|
|
//
|
|
|
|
|
// If we don't put unordered write-read pairs into some order at verilation
|
|
|
|
|
// time, we risk a runtime race.
|
|
|
|
|
//
|
|
|
|
|
// How do such unordered writer/reader pairs happen? Here's a partial list
|
|
|
|
|
// of scenarios:
|
|
|
|
|
//
|
|
|
|
|
// Case 1: Circular logic
|
|
|
|
|
//
|
|
|
|
|
// If the design has circular logic, V3Order has by now generated some
|
|
|
|
|
// dependency cycles, and also cut some of the edges to make it
|
|
|
|
|
// acyclic.
|
|
|
|
|
//
|
|
|
|
|
// For serial mode, that was fine. We can break logic circles at an
|
|
|
|
|
// arbitrary point. At runtime, we'll repeat the _eval() until no
|
|
|
|
|
// changes are detected, which papers over the discarded dependency.
|
|
|
|
|
//
|
|
|
|
|
// For parallel mode, this situation can lead to unordered reads and
|
|
|
|
|
// writes of the same variable, causing a data race. For example if the
|
|
|
|
|
// original code is this:
|
|
|
|
|
//
|
|
|
|
|
// assign b = b | a << 2;
|
|
|
|
|
// assign out = b;
|
|
|
|
|
//
|
|
|
|
|
// ... there's originally a dependency edge which records that 'b'
|
|
|
|
|
// depends on the first assign. V3Order may cut this edge, making the
|
|
|
|
|
// statements unordered. In serial mode that's fine, they can run in
|
|
|
|
|
// either order. In parallel mode it's a reader/writer race.
|
|
|
|
|
//
|
|
|
|
|
// Case 2: Race Condition in Verilog Sources
|
|
|
|
|
//
|
|
|
|
|
// If the input has races, eg. blocking assignments in always blocks
|
|
|
|
|
// that share variables, the graph at this point will contain unordered
|
|
|
|
|
// writes and reads (or unordered write-write pairs) reflecting that.
|
|
|
|
|
//
|
|
|
|
|
// Case 3: Interesting V3Order Behavior
|
|
|
|
|
//
|
|
|
|
|
// There's code in V3Order that explicitly avoids making a dependency
|
|
|
|
|
// edge from a clock-gater signal to the logic node that produces the
|
|
|
|
|
// clock signal. This leads to unordered reader/writer pairs in
|
|
|
|
|
// parallel mode.
|
|
|
|
|
//
|
2020-11-19 03:32:16 +01:00
|
|
|
class PartFixDataHazards final {
|
2018-07-23 02:54:28 +02:00
|
|
|
private:
|
|
|
|
|
// TYPES
|
2021-03-13 00:10:45 +01:00
|
|
|
using LogicMTaskSet = std::set<LogicMTask*, MTaskIdLessThan>;
|
|
|
|
|
using TasksByRank = std::map<uint32_t /*rank*/, LogicMTaskSet>;
|
|
|
|
|
using OvvSet = std::set<const OrderVarStdVertex*, OrderByPtrId&>;
|
|
|
|
|
using Olv2MTaskMap = std::unordered_map<const OrderLogicVertex*, LogicMTask*>;
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// MEMBERS
|
|
|
|
|
V3Graph* m_mtasksp; // Mtask graph
|
|
|
|
|
Olv2MTaskMap m_olv2mtask; // Map OrderLogicVertex to LogicMTask who wraps it
|
2020-08-16 15:55:36 +02:00
|
|
|
unsigned m_mergesDone = 0; // Number of MTasks merged. For stats only.
|
2018-07-23 02:54:28 +02:00
|
|
|
public:
|
|
|
|
|
// CONSTRUCTORs
|
|
|
|
|
explicit PartFixDataHazards(V3Graph* mtasksp)
|
2020-08-16 15:55:36 +02:00
|
|
|
: m_mtasksp{mtasksp} {}
|
2018-07-23 02:54:28 +02:00
|
|
|
// METHODS
|
|
|
|
|
private:
|
|
|
|
|
void findAdjacentTasks(OvvSet::iterator ovvIt, TasksByRank* tasksByRankp) {
|
|
|
|
|
// Find all writer tasks for this variable, group by rank.
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphEdge* edgep = (*ovvIt)->inBeginp(); edgep; edgep = edgep->inNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
OrderLogicVertex* const logicp = dynamic_cast<OrderLogicVertex*>(edgep->fromp());
|
2018-07-23 02:54:28 +02:00
|
|
|
if (!logicp) continue;
|
2020-04-15 13:58:34 +02:00
|
|
|
if (logicp->domainp()->hasInitial() || logicp->domainp()->hasSettle()) continue;
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const writerMtaskp = m_olv2mtask.at(logicp);
|
2018-07-23 02:54:28 +02:00
|
|
|
(*tasksByRankp)[writerMtaskp->rank()].insert(writerMtaskp);
|
|
|
|
|
}
|
|
|
|
|
// Find all reader tasks for this variable, group by rank.
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphEdge* edgep = (*ovvIt)->outBeginp(); edgep; edgep = edgep->outNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
OrderLogicVertex* const logicp = dynamic_cast<OrderLogicVertex*>(edgep->fromp());
|
2018-07-23 02:54:28 +02:00
|
|
|
if (!logicp) continue;
|
2020-04-15 13:58:34 +02:00
|
|
|
if (logicp->domainp()->hasInitial() || logicp->domainp()->hasSettle()) continue;
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const readerMtaskp = m_olv2mtask.at(logicp);
|
2018-07-23 02:54:28 +02:00
|
|
|
(*tasksByRankp)[readerMtaskp->rank()].insert(readerMtaskp);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
void mergeSameRankTasks(TasksByRank* tasksByRankp) {
|
2020-08-15 16:12:55 +02:00
|
|
|
LogicMTask* lastMergedp = nullptr;
|
2020-04-15 13:58:34 +02:00
|
|
|
for (TasksByRank::iterator rankIt = tasksByRankp->begin(); rankIt != tasksByRankp->end();
|
|
|
|
|
++rankIt) {
|
2018-07-23 02:54:28 +02:00
|
|
|
// Find the largest node at this rank, merge into it. (If we
|
|
|
|
|
// happen to find a huge node, this saves time in
|
|
|
|
|
// partMergeEdgesFrom() versus merging into an arbitrary node.)
|
2020-08-15 16:12:55 +02:00
|
|
|
LogicMTask* mergedp = nullptr;
|
2020-04-15 13:58:34 +02:00
|
|
|
for (LogicMTaskSet::iterator it = rankIt->second.begin(); it != rankIt->second.end();
|
|
|
|
|
++it) {
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const mtaskp = *it;
|
2018-07-23 02:54:28 +02:00
|
|
|
if (mergedp) {
|
2021-02-22 03:25:21 +01:00
|
|
|
if (mergedp->cost() < mtaskp->cost()) mergedp = mtaskp;
|
2018-07-23 02:54:28 +02:00
|
|
|
} else {
|
|
|
|
|
mergedp = mtaskp;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
rankIt->second.erase(mergedp);
|
|
|
|
|
|
|
|
|
|
while (!rankIt->second.empty()) {
|
2020-08-16 17:43:49 +02:00
|
|
|
const auto begin = rankIt->second.cbegin();
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const donorp = *begin;
|
2019-07-06 18:57:50 +02:00
|
|
|
UASSERT_OBJ(donorp != mergedp, donorp, "Donor can't be merged edge");
|
2018-07-23 02:54:28 +02:00
|
|
|
rankIt->second.erase(begin);
|
|
|
|
|
// Merge donorp into mergedp.
|
|
|
|
|
// Fix up the map, so donor's OLVs map to mergedp
|
2020-04-15 13:58:34 +02:00
|
|
|
for (LogicMTask::VxList::const_iterator tmvit = donorp->vertexListp()->begin();
|
2018-07-23 02:54:28 +02:00
|
|
|
tmvit != donorp->vertexListp()->end(); ++tmvit) {
|
2021-11-03 22:49:19 +01:00
|
|
|
MTaskMoveVertex* const tmvp = *tmvit;
|
|
|
|
|
OrderLogicVertex* const logicp = tmvp->logicp();
|
2018-07-23 02:54:28 +02:00
|
|
|
if (logicp) m_olv2mtask[logicp] = mergedp;
|
|
|
|
|
}
|
|
|
|
|
// Move all vertices from donorp to mergedp
|
|
|
|
|
mergedp->moveAllVerticesFrom(donorp);
|
|
|
|
|
// Move edges from donorp to recipientp
|
2020-08-15 16:12:55 +02:00
|
|
|
partMergeEdgesFrom(m_mtasksp, mergedp, donorp, nullptr);
|
2018-07-23 02:54:28 +02:00
|
|
|
// Remove donorp from the graph
|
2020-01-17 02:17:11 +01:00
|
|
|
VL_DO_DANGLING(donorp->unlinkDelete(m_mtasksp), donorp);
|
2018-07-23 02:54:28 +02:00
|
|
|
m_mergesDone++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (lastMergedp) {
|
2019-07-06 18:57:50 +02:00
|
|
|
UASSERT_OBJ(lastMergedp->rank() < mergedp->rank(), mergedp,
|
|
|
|
|
"Merging must be on lower rank");
|
2018-07-23 02:54:28 +02:00
|
|
|
if (!lastMergedp->hasRelative(GraphWay::FORWARD, mergedp)) {
|
|
|
|
|
new MTaskEdge(m_mtasksp, lastMergedp, mergedp, 1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
lastMergedp = mergedp;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
bool hasDpiHazard(LogicMTask* mtaskp) {
|
|
|
|
|
for (LogicMTask::VxList::const_iterator it = mtaskp->vertexListp()->begin();
|
|
|
|
|
it != mtaskp->vertexListp()->end(); ++it) {
|
|
|
|
|
if (!(*it)->logicp()) continue;
|
2021-11-03 22:49:19 +01:00
|
|
|
AstNode* const nodep = (*it)->logicp()->nodep();
|
2018-07-23 02:54:28 +02:00
|
|
|
// NOTE: We don't handle DPI exports. If testbench code calls a
|
|
|
|
|
// DPI-exported function at any time during eval() we may have
|
|
|
|
|
// a data hazard. (Likewise in non-threaded mode if an export
|
|
|
|
|
// messes with an ordered variable we're broken.)
|
|
|
|
|
|
|
|
|
|
// Find all calls to DPI-imported functions, we can put those
|
|
|
|
|
// into a serial order at least. That should solve the most
|
|
|
|
|
// likely DPI-related data hazards.
|
2020-04-15 13:58:34 +02:00
|
|
|
if (DpiImportCallVisitor(nodep).hasDpiHazard()) { //
|
2018-07-23 02:54:28 +02:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2020-04-15 13:58:34 +02:00
|
|
|
|
2018-07-23 02:54:28 +02:00
|
|
|
public:
|
|
|
|
|
void go() {
|
|
|
|
|
vluint64_t startUsecs = 0;
|
|
|
|
|
if (debug() >= 3) startUsecs = V3Os::timeUsecs();
|
|
|
|
|
|
|
|
|
|
// Build an OLV->mtask map and a set of OVVs
|
|
|
|
|
OrderByPtrId ovvOrder;
|
|
|
|
|
OvvSet ovvSet(ovvOrder);
|
|
|
|
|
// OVV's which wrap systemC vars will be handled slightly specially
|
|
|
|
|
OvvSet ovvSetSystemC(ovvOrder);
|
|
|
|
|
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphVertex* vxp = m_mtasksp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const mtaskp = dynamic_cast<LogicMTask*>(vxp);
|
2018-07-23 02:54:28 +02:00
|
|
|
// Should be only one MTaskMoveVertex in each mtask at this
|
|
|
|
|
// stage, but whatever, write it as a loop:
|
2020-04-15 13:58:34 +02:00
|
|
|
for (LogicMTask::VxList::const_iterator it = mtaskp->vertexListp()->begin();
|
2018-07-23 02:54:28 +02:00
|
|
|
it != mtaskp->vertexListp()->end(); ++it) {
|
2021-11-03 22:49:19 +01:00
|
|
|
MTaskMoveVertex* const tmvp = *it;
|
|
|
|
|
if (OrderLogicVertex* const logicp = tmvp->logicp()) {
|
2018-07-23 02:54:28 +02:00
|
|
|
m_olv2mtask[logicp] = mtaskp;
|
|
|
|
|
// Look at downstream vars.
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphEdge* edgep = logicp->outBeginp(); edgep;
|
|
|
|
|
edgep = edgep->outNextp()) {
|
2018-07-23 02:54:28 +02:00
|
|
|
// Only consider OrderVarStdVertex which reflects
|
|
|
|
|
// an actual lvalue assignment; the others do not.
|
2021-11-03 22:49:19 +01:00
|
|
|
OrderVarStdVertex* const ovvp
|
|
|
|
|
= dynamic_cast<OrderVarStdVertex*>(edgep->top());
|
2018-07-23 02:54:28 +02:00
|
|
|
if (!ovvp) continue;
|
|
|
|
|
if (ovvp->varScp()->varp()->isSc()) {
|
|
|
|
|
ovvSetSystemC.insert(ovvp);
|
|
|
|
|
} else {
|
|
|
|
|
ovvSet.insert(ovvp);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Rank the graph.
|
|
|
|
|
// DGS is faster than V3GraphAlg's recursive rank, in the worst
|
|
|
|
|
// cases where the recursive rank must pass through the same node
|
|
|
|
|
// many times. (We saw 22s for DGS vs. 500s for recursive rank on
|
|
|
|
|
// one large design.)
|
|
|
|
|
{
|
|
|
|
|
GraphStreamUnordered serialize(m_mtasksp);
|
|
|
|
|
const V3GraphVertex* vertexp;
|
|
|
|
|
while ((vertexp = serialize.nextp())) {
|
|
|
|
|
uint32_t rank = 0;
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphEdge* edgep = vertexp->inBeginp(); edgep; edgep = edgep->inNextp()) {
|
2018-07-23 02:54:28 +02:00
|
|
|
rank = std::max(edgep->fromp()->rank() + 1, rank);
|
|
|
|
|
}
|
|
|
|
|
const_cast<V3GraphVertex*>(vertexp)->rank(rank);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// For each OrderVarVertex, look at its writer and reader mtasks.
|
|
|
|
|
//
|
|
|
|
|
// If there's a set of writers and readers at the same rank, we
|
|
|
|
|
// know these are unordered with respect to one another, so merge
|
|
|
|
|
// those mtasks all together.
|
|
|
|
|
//
|
|
|
|
|
// At this point, we have at most one merged mtask per rank (for a
|
|
|
|
|
// given OVV.) Create edges across these remaining mtasks to ensure
|
|
|
|
|
// they run in serial order (going along with the existing ranks.)
|
|
|
|
|
//
|
|
|
|
|
// NOTE: we don't update the CP's stored in the LogicMTasks to
|
|
|
|
|
// reflect the changes we make to the graph. That's OK, as we
|
|
|
|
|
// haven't yet initialized CPs when we call this routine.
|
2020-04-15 13:58:34 +02:00
|
|
|
for (OvvSet::iterator ovvit = ovvSet.begin(); ovvit != ovvSet.end(); ++ovvit) {
|
2018-07-23 02:54:28 +02:00
|
|
|
// Build a set of mtasks, per rank, which access this var.
|
|
|
|
|
// Within a rank, sort by MTaskID to avoid nondeterminism.
|
|
|
|
|
TasksByRank tasksByRank;
|
|
|
|
|
|
|
|
|
|
// Find all reader and writer tasks for this variable, add to
|
|
|
|
|
// tasksByRank.
|
|
|
|
|
findAdjacentTasks(ovvit, &tasksByRank);
|
|
|
|
|
|
|
|
|
|
// Merge all writer and reader tasks from same rank together.
|
|
|
|
|
//
|
|
|
|
|
// NOTE: Strictly speaking, we don't need to merge all the
|
|
|
|
|
// readers together. That may lead to extra serialization. The
|
|
|
|
|
// least amount of ordering we could impose here would be to
|
|
|
|
|
// merge all writers at a given rank together; then make edges
|
|
|
|
|
// from the merged writer node to each reader node at the same
|
|
|
|
|
// rank; and then from each reader node to the merged writer at
|
|
|
|
|
// the next rank.
|
|
|
|
|
//
|
|
|
|
|
// Whereas, merging all readers and writers at the same rank
|
|
|
|
|
// together is "the simplest thing that could possibly work"
|
|
|
|
|
// and it seems to. It also creates fairly few edges. We don't
|
|
|
|
|
// want to create tons of edges here, doing so is not nice to
|
|
|
|
|
// the main edge contraction pass.
|
|
|
|
|
mergeSameRankTasks(&tasksByRank);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Handle SystemC vars just a little differently. Instead of
|
|
|
|
|
// treating each var as an independent entity, and serializing
|
|
|
|
|
// writes to that one var, we treat ALL systemC vars as a single
|
|
|
|
|
// entity and serialize writes (and, conservatively, reads) across
|
|
|
|
|
// all of them.
|
|
|
|
|
//
|
|
|
|
|
// Reasoning: writing a systemC var actually turns into a call to a
|
|
|
|
|
// var.write() method, which under the hood is accessing some data
|
|
|
|
|
// structure that's shared by many SC vars. It's not thread safe.
|
|
|
|
|
//
|
|
|
|
|
// Hopefully we only have a few SC vars -- top level ports, probably.
|
|
|
|
|
{
|
|
|
|
|
TasksByRank tasksByRank;
|
2020-04-15 13:58:34 +02:00
|
|
|
for (OvvSet::iterator ovvit = ovvSetSystemC.begin(); ovvit != ovvSetSystemC.end();
|
|
|
|
|
++ovvit) {
|
2018-07-23 02:54:28 +02:00
|
|
|
findAdjacentTasks(ovvit, &tasksByRank);
|
|
|
|
|
}
|
|
|
|
|
mergeSameRankTasks(&tasksByRank);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Handle nodes containing DPI calls, we want to serialize those
|
|
|
|
|
// by default unless user gave --threads-dpi-concurrent.
|
|
|
|
|
// Same basic strategy as above to serialize access to SC vars.
|
|
|
|
|
if (!v3Global.opt.threadsDpiPure() || !v3Global.opt.threadsDpiUnpure()) {
|
|
|
|
|
TasksByRank tasksByRank;
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphVertex* vxp = m_mtasksp->verticesBeginp(); vxp;
|
|
|
|
|
vxp = vxp->verticesNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const mtaskp = dynamic_cast<LogicMTask*>(vxp);
|
2021-02-22 03:25:21 +01:00
|
|
|
if (hasDpiHazard(mtaskp)) tasksByRank[vxp->rank()].insert(mtaskp);
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
mergeSameRankTasks(&tasksByRank);
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-15 13:58:34 +02:00
|
|
|
UINFO(4, "PartFixDataHazards() merged " << m_mergesDone << " pairs of nodes in "
|
|
|
|
|
<< (V3Os::timeUsecs() - startUsecs)
|
|
|
|
|
<< " usecs.\n");
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
VL_UNCOPYABLE(PartFixDataHazards);
|
|
|
|
|
VL_DEBUG_FUNC;
|
|
|
|
|
};
|
|
|
|
|
|
2021-06-16 13:18:56 +02:00
|
|
|
//######################################################################
|
|
|
|
|
// ThreadSchedule
|
|
|
|
|
|
|
|
|
|
class PartPackMTasks;
|
|
|
|
|
|
|
|
|
|
// The thread schedule, containing all information needed later. Note that this is a simple
|
|
|
|
|
// aggregate data type and the only way to get hold of an instance of it is via
|
|
|
|
|
// PartPackMTasks::pack, which is moved from there and is const, which means we can only acquire a
|
|
|
|
|
// const reference to is so no further modifications are allowed, so all members are public
|
|
|
|
|
// (attributes).
|
|
|
|
|
class ThreadSchedule final {
|
|
|
|
|
public:
|
2021-07-06 13:06:00 +02:00
|
|
|
// CONSTANTS
|
|
|
|
|
static constexpr uint32_t UNASSIGNED = 0xffffffff;
|
|
|
|
|
|
|
|
|
|
// TYPES
|
|
|
|
|
struct MTaskState {
|
|
|
|
|
uint32_t completionTime = 0; // Estimated time this mtask will complete
|
|
|
|
|
uint32_t threadId = UNASSIGNED; // Thread id this MTask is assigned to
|
|
|
|
|
const ExecMTask* nextp = nullptr; // Next MTask on same thread after this
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// MEMBERS
|
2021-06-16 13:18:56 +02:00
|
|
|
// Allocation of sequence of MTasks to threads. Can be considered a map from thread ID to
|
|
|
|
|
// the sequence of MTasks to be executed by that thread.
|
|
|
|
|
std::vector<std::vector<const ExecMTask*>> threads;
|
|
|
|
|
|
2021-07-06 13:06:00 +02:00
|
|
|
// State for each mtask.
|
|
|
|
|
std::unordered_map<const ExecMTask*, MTaskState> mtaskState;
|
|
|
|
|
|
|
|
|
|
uint32_t threadId(const ExecMTask* mtaskp) const {
|
|
|
|
|
const auto& it = mtaskState.find(mtaskp);
|
|
|
|
|
if (it != mtaskState.end()) {
|
|
|
|
|
return it->second.threadId;
|
|
|
|
|
} else {
|
|
|
|
|
return UNASSIGNED;
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-06-16 13:18:56 +02:00
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
friend class PartPackMTasks;
|
|
|
|
|
|
|
|
|
|
explicit ThreadSchedule(uint32_t nThreads)
|
|
|
|
|
: threads{nThreads} {}
|
|
|
|
|
VL_UNCOPYABLE(ThreadSchedule); // But movable
|
|
|
|
|
ThreadSchedule(ThreadSchedule&&) = default;
|
|
|
|
|
ThreadSchedule& operator=(ThreadSchedule&&) = default;
|
|
|
|
|
|
2021-07-06 13:06:00 +02:00
|
|
|
// Debugging
|
|
|
|
|
void dumpDotFile(const string& filename) const;
|
|
|
|
|
void dumpDotFilePrefixedAlways(const string& nameComment) const;
|
|
|
|
|
|
2021-06-16 13:18:56 +02:00
|
|
|
public:
|
|
|
|
|
// Returns the number of cross-thread dependencies of the given MTask. If > 0, the MTask must
|
|
|
|
|
// test whether its dependencies are ready before starting, and therefore may need to block.
|
|
|
|
|
uint32_t crossThreadDependencies(const ExecMTask* mtaskp) const {
|
2021-07-06 13:06:00 +02:00
|
|
|
const uint32_t thisThreadId = threadId(mtaskp);
|
2021-06-16 13:18:56 +02:00
|
|
|
uint32_t result = 0;
|
|
|
|
|
for (V3GraphEdge* edgep = mtaskp->inBeginp(); edgep; edgep = edgep->inNextp()) {
|
|
|
|
|
const ExecMTask* const prevp = dynamic_cast<ExecMTask*>(edgep->fromp());
|
2021-07-06 13:06:00 +02:00
|
|
|
if (threadId(prevp) != thisThreadId) ++result;
|
2021-06-16 13:18:56 +02:00
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
2021-07-06 13:06:00 +02:00
|
|
|
|
|
|
|
|
uint32_t startTime(const ExecMTask* mtaskp) const {
|
|
|
|
|
return mtaskState.at(mtaskp).completionTime - mtaskp->cost();
|
|
|
|
|
}
|
|
|
|
|
uint32_t endTime(const ExecMTask* mtaskp) const {
|
|
|
|
|
return mtaskState.at(mtaskp).completionTime;
|
|
|
|
|
}
|
2021-06-16 13:18:56 +02:00
|
|
|
};
|
|
|
|
|
|
2021-07-06 13:06:00 +02:00
|
|
|
//! Variant of dumpDotFilePrefixed without --dump option check
|
|
|
|
|
void ThreadSchedule::dumpDotFilePrefixedAlways(const string& nameComment) const {
|
|
|
|
|
dumpDotFile(v3Global.debugFilename(nameComment) + ".dot");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ThreadSchedule::dumpDotFile(const string& filename) const {
|
|
|
|
|
// This generates a file used by graphviz, https://www.graphviz.org
|
2021-07-12 00:42:01 +02:00
|
|
|
const std::unique_ptr<std::ofstream> logp{V3File::new_ofstream(filename)};
|
2021-07-06 13:06:00 +02:00
|
|
|
if (logp->fail()) v3fatal("Can't write " << filename);
|
|
|
|
|
auto* depGraph = v3Global.rootp()->execGraphp()->depGraphp();
|
|
|
|
|
|
|
|
|
|
// Header
|
|
|
|
|
*logp << "digraph v3graph {\n";
|
|
|
|
|
*logp << " graph[layout=\"neato\" labelloc=t labeljust=l label=\"" << filename << "\"]\n";
|
|
|
|
|
*logp << " node[shape=\"rect\" ratio=\"fill\" fixedsize=true]\n";
|
|
|
|
|
|
|
|
|
|
// Thread labels
|
|
|
|
|
*logp << "\n // Threads\n";
|
|
|
|
|
const int threadBoxWidth = 2;
|
|
|
|
|
for (int i = 0; i < v3Global.opt.threads(); i++) {
|
|
|
|
|
*logp << " t" << i << " [label=\"Thread " << i << "\" width=" << threadBoxWidth
|
|
|
|
|
<< " pos=\"" << (-threadBoxWidth / 2) << "," << -i
|
|
|
|
|
<< "!\" style=\"filled\" fillcolor=\"grey\"] \n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// MTask nodes
|
|
|
|
|
*logp << "\n // MTasks\n";
|
|
|
|
|
|
|
|
|
|
// Find minimum cost MTask for scaling MTask node widths
|
|
|
|
|
uint32_t minCost = UINT32_MAX;
|
|
|
|
|
for (const V3GraphVertex* vxp = depGraph->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
if (const ExecMTask* const mtaskp = dynamic_cast<const ExecMTask*>(vxp)) {
|
2021-07-06 13:06:00 +02:00
|
|
|
minCost = minCost > mtaskp->cost() ? mtaskp->cost() : minCost;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
const double minWidth = 2.0;
|
|
|
|
|
auto mtaskXPos = [&](const ExecMTask* mtaskp, const double nodeWidth) {
|
|
|
|
|
const double startPosX = (minWidth * startTime(mtaskp)) / minCost;
|
|
|
|
|
return nodeWidth / minWidth + startPosX;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
auto emitMTask = [&](const ExecMTask* mtaskp) {
|
|
|
|
|
const int thread = threadId(mtaskp);
|
|
|
|
|
const double nodeWidth = minWidth * (static_cast<double>(mtaskp->cost()) / minCost);
|
|
|
|
|
const double x = mtaskXPos(mtaskp, nodeWidth);
|
|
|
|
|
const int y = -thread;
|
|
|
|
|
string label = "label=\"" + mtaskp->name() + " (" + cvtToStr(startTime(mtaskp)) + ":"
|
|
|
|
|
+ std::to_string(endTime(mtaskp)) + ")" + "\"";
|
|
|
|
|
*logp << " " << mtaskp->name() << " [" << label << " width=" << nodeWidth << " pos=\""
|
|
|
|
|
<< x << "," << y << "!\"]\n";
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Emit MTasks
|
|
|
|
|
for (const V3GraphVertex* vxp = depGraph->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
if (const ExecMTask* const mtaskp = dynamic_cast<const ExecMTask*>(vxp)) emitMTask(mtaskp);
|
2021-07-06 13:06:00 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Emit MTask dependency edges
|
|
|
|
|
*logp << "\n // MTask dependencies\n";
|
|
|
|
|
for (const V3GraphVertex* vxp = depGraph->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
if (const ExecMTask* const mtaskp = dynamic_cast<const ExecMTask*>(vxp)) {
|
2021-07-06 13:06:00 +02:00
|
|
|
for (V3GraphEdge* edgep = mtaskp->outBeginp(); edgep; edgep = edgep->outNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
const V3GraphVertex* const top = edgep->top();
|
2021-07-06 13:06:00 +02:00
|
|
|
*logp << " " << vxp->name() << " -> " << top->name() << "\n";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Trailer
|
|
|
|
|
*logp << "}\n";
|
|
|
|
|
logp->close();
|
|
|
|
|
}
|
|
|
|
|
|
2018-07-23 02:54:28 +02:00
|
|
|
//######################################################################
|
|
|
|
|
// PartPackMTasks
|
|
|
|
|
|
|
|
|
|
// Statically pack tasks into threads.
|
|
|
|
|
//
|
|
|
|
|
// The simplest thing that could possibly work would be to assume that our
|
|
|
|
|
// predictions of task runtimes are precise, and that every thread will
|
|
|
|
|
// make progress at an equal rate. Simulate a single "clock", pack the the
|
|
|
|
|
// highest priority ready task into whatever thread becomes ready earliest,
|
|
|
|
|
// repeating until no tasks remain.
|
|
|
|
|
//
|
|
|
|
|
// That doesn't work well, as our predictions of task runtimes have wide
|
|
|
|
|
// error bars (+/- 60% is typical.)
|
|
|
|
|
//
|
|
|
|
|
// So be a little more clever: let each task have a different end time,
|
|
|
|
|
// depending on which thread is looking. Be a little bit pessimistic when
|
|
|
|
|
// thread A checks the end time of an mtask running on thread B. This extra
|
|
|
|
|
// "padding" avoids tight "layovers" at cross-thread dependencies.
|
2020-11-19 03:32:16 +01:00
|
|
|
class PartPackMTasks final {
|
2018-07-23 02:54:28 +02:00
|
|
|
// TYPES
|
|
|
|
|
struct MTaskCmp {
|
2021-06-16 13:18:56 +02:00
|
|
|
bool operator()(const ExecMTask* ap, const ExecMTask* bp) const {
|
|
|
|
|
return ap->id() < bp->id();
|
|
|
|
|
}
|
2018-07-23 02:54:28 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// MEMBERS
|
2021-06-16 13:18:56 +02:00
|
|
|
const uint32_t m_nThreads; // Number of threads
|
|
|
|
|
const uint32_t m_sandbagNumerator; // Numerator padding for est runtime
|
|
|
|
|
const uint32_t m_sandbagDenom; // Denominator padding for est runtime
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
// CONSTRUCTORS
|
2021-06-16 13:18:56 +02:00
|
|
|
explicit PartPackMTasks(uint32_t nThreads = v3Global.opt.threads(),
|
2020-04-15 13:58:34 +02:00
|
|
|
unsigned sandbagNumerator = 30, unsigned sandbagDenom = 100)
|
2021-06-16 13:18:56 +02:00
|
|
|
: m_nThreads{nThreads}
|
2020-08-16 15:55:36 +02:00
|
|
|
, m_sandbagNumerator{sandbagNumerator}
|
2021-06-16 13:18:56 +02:00
|
|
|
, m_sandbagDenom{sandbagDenom} {}
|
2020-11-17 01:56:16 +01:00
|
|
|
~PartPackMTasks() = default;
|
2018-07-23 02:54:28 +02:00
|
|
|
|
2021-06-16 13:18:56 +02:00
|
|
|
private:
|
2019-09-09 13:50:21 +02:00
|
|
|
// METHODS
|
2021-07-06 13:06:00 +02:00
|
|
|
uint32_t completionTime(const ThreadSchedule& schedule, const ExecMTask* mtaskp,
|
|
|
|
|
uint32_t threadId) {
|
|
|
|
|
const ThreadSchedule::MTaskState& state = schedule.mtaskState.at(mtaskp);
|
|
|
|
|
UASSERT(state.threadId != ThreadSchedule::UNASSIGNED, "Mtask should have assigned thread");
|
2021-06-16 13:18:56 +02:00
|
|
|
if (threadId == state.threadId) {
|
|
|
|
|
// No overhead on same thread
|
2018-07-23 02:54:28 +02:00
|
|
|
return state.completionTime;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add some padding to the estimated runtime when looking from
|
|
|
|
|
// another thread
|
2020-04-15 13:58:34 +02:00
|
|
|
uint32_t sandbaggedEndTime
|
|
|
|
|
= state.completionTime + (m_sandbagNumerator * mtaskp->cost()) / m_sandbagDenom;
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// If task B is packed after task A on thread 0, don't let thread 1
|
2021-06-16 13:18:56 +02:00
|
|
|
// think that A finishes earlier than thread 0 thinks that B
|
2018-07-23 02:54:28 +02:00
|
|
|
// finishes, otherwise we get priority inversions and fail the self
|
|
|
|
|
// test.
|
2021-06-16 13:18:56 +02:00
|
|
|
if (state.nextp) {
|
2021-07-06 13:06:00 +02:00
|
|
|
const uint32_t successorEndTime
|
|
|
|
|
= completionTime(schedule, state.nextp, state.threadId);
|
2020-04-15 13:58:34 +02:00
|
|
|
if ((sandbaggedEndTime >= successorEndTime) && (successorEndTime > 1)) {
|
2018-07-23 02:54:28 +02:00
|
|
|
sandbaggedEndTime = successorEndTime - 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-16 13:18:56 +02:00
|
|
|
UINFO(6, "Sandbagged end time for " << mtaskp->name() << " on th " << threadId << " = "
|
2020-04-15 13:58:34 +02:00
|
|
|
<< sandbaggedEndTime << endl);
|
2018-07-23 02:54:28 +02:00
|
|
|
return sandbaggedEndTime;
|
|
|
|
|
}
|
|
|
|
|
|
2021-07-06 13:06:00 +02:00
|
|
|
bool isReady(ThreadSchedule& schedule, const ExecMTask* mtaskp) {
|
2021-06-16 13:18:56 +02:00
|
|
|
for (V3GraphEdge* edgeInp = mtaskp->inBeginp(); edgeInp; edgeInp = edgeInp->inNextp()) {
|
|
|
|
|
const ExecMTask* const prevp = dynamic_cast<ExecMTask*>(edgeInp->fromp());
|
2021-07-06 13:06:00 +02:00
|
|
|
if (schedule.threadId(prevp) == ThreadSchedule::UNASSIGNED) {
|
2021-06-16 13:18:56 +02:00
|
|
|
// This predecessor is not assigned yet
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return true;
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
|
2021-06-16 13:18:56 +02:00
|
|
|
public:
|
|
|
|
|
// Pack an MTasks from given graph into m_nThreads threads, return the schedule.
|
|
|
|
|
const ThreadSchedule pack(const V3Graph& mtaskGraph) {
|
|
|
|
|
// The result
|
|
|
|
|
ThreadSchedule schedule(m_nThreads);
|
|
|
|
|
|
|
|
|
|
// Time each thread is occupied until
|
|
|
|
|
std::vector<uint32_t> busyUntil(m_nThreads, 0);
|
|
|
|
|
|
|
|
|
|
// MTasks ready to be assigned next. All their dependencies are already assigned.
|
2021-09-24 04:59:36 +02:00
|
|
|
std::set<ExecMTask*, MTaskCmp> readyMTasks;
|
2021-06-16 13:18:56 +02:00
|
|
|
|
2018-07-23 02:54:28 +02:00
|
|
|
// Build initial ready list
|
2021-06-16 13:18:56 +02:00
|
|
|
for (V3GraphVertex* vxp = mtaskGraph.verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
|
2021-09-24 04:59:36 +02:00
|
|
|
ExecMTask* const mtaskp = dynamic_cast<ExecMTask*>(vxp);
|
2021-07-06 13:06:00 +02:00
|
|
|
if (isReady(schedule, mtaskp)) readyMTasks.insert(mtaskp);
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
|
2021-06-16 13:18:56 +02:00
|
|
|
while (!readyMTasks.empty()) {
|
2018-07-23 02:54:28 +02:00
|
|
|
// For each task in the ready set, compute when it might start
|
|
|
|
|
// on each thread (in that thread's local time frame.)
|
|
|
|
|
uint32_t bestTime = 0xffffffff;
|
2021-06-16 13:18:56 +02:00
|
|
|
uint32_t bestThreadId = 0;
|
2021-09-24 04:59:36 +02:00
|
|
|
ExecMTask* bestMtaskp = nullptr; // Todo: const ExecMTask*
|
2021-06-16 13:18:56 +02:00
|
|
|
for (uint32_t threadId = 0; threadId < m_nThreads; ++threadId) {
|
2021-09-24 04:59:36 +02:00
|
|
|
for (ExecMTask* const mtaskp : readyMTasks) {
|
2021-06-16 13:18:56 +02:00
|
|
|
uint32_t timeBegin = busyUntil[threadId];
|
2018-07-23 02:54:28 +02:00
|
|
|
if (timeBegin > bestTime) {
|
2021-06-16 13:18:56 +02:00
|
|
|
UINFO(6, "th " << threadId << " busy until " << timeBegin
|
2020-04-15 13:58:34 +02:00
|
|
|
<< ", later than bestTime " << bestTime
|
|
|
|
|
<< ", skipping thread.\n");
|
2018-07-23 02:54:28 +02:00
|
|
|
break;
|
|
|
|
|
}
|
2021-06-16 13:18:56 +02:00
|
|
|
for (V3GraphEdge* edgep = mtaskp->inBeginp(); edgep;
|
|
|
|
|
edgep = edgep->inNextp()) {
|
|
|
|
|
const ExecMTask* const priorp = dynamic_cast<ExecMTask*>(edgep->fromp());
|
2021-07-06 13:06:00 +02:00
|
|
|
const uint32_t priorEndTime = completionTime(schedule, priorp, threadId);
|
2020-04-15 13:58:34 +02:00
|
|
|
if (priorEndTime > timeBegin) timeBegin = priorEndTime;
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
2021-06-16 13:18:56 +02:00
|
|
|
UINFO(6, "Task " << mtaskp->name() << " start at " << timeBegin
|
|
|
|
|
<< " on thread " << threadId << endl);
|
2018-07-23 02:54:28 +02:00
|
|
|
if ((timeBegin < bestTime)
|
|
|
|
|
|| ((timeBegin == bestTime)
|
2018-10-15 00:39:33 +02:00
|
|
|
&& bestMtaskp // Redundant, but appeases static analysis tools
|
2021-06-16 13:18:56 +02:00
|
|
|
&& (mtaskp->priority() > bestMtaskp->priority()))) {
|
2018-07-23 02:54:28 +02:00
|
|
|
bestTime = timeBegin;
|
2021-06-16 13:18:56 +02:00
|
|
|
bestThreadId = threadId;
|
|
|
|
|
bestMtaskp = mtaskp;
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-16 13:18:56 +02:00
|
|
|
UASSERT(bestMtaskp, "Should have found some task");
|
|
|
|
|
UINFO(6, "Will schedule " << bestMtaskp->name() << " onto thread " << bestThreadId
|
|
|
|
|
<< endl);
|
|
|
|
|
|
|
|
|
|
// Reference to thread in schedule we are assigning this MTask to.
|
|
|
|
|
std::vector<const ExecMTask*>& bestThread = schedule.threads[bestThreadId];
|
|
|
|
|
|
|
|
|
|
// Update algorithm state
|
2021-09-24 04:59:36 +02:00
|
|
|
bestMtaskp->predictStart(bestTime); // Only for gantt reporting
|
2021-06-16 13:18:56 +02:00
|
|
|
const uint32_t bestEndTime = bestTime + bestMtaskp->cost();
|
2021-07-06 13:06:00 +02:00
|
|
|
schedule.mtaskState[bestMtaskp].completionTime = bestEndTime;
|
|
|
|
|
schedule.mtaskState[bestMtaskp].threadId = bestThreadId;
|
2021-09-24 04:59:36 +02:00
|
|
|
if (!bestThread.empty()) schedule.mtaskState[bestThread.back()].nextp = bestMtaskp;
|
2021-06-16 13:18:56 +02:00
|
|
|
busyUntil[bestThreadId] = bestEndTime;
|
|
|
|
|
|
|
|
|
|
// Add the MTask to the schedule
|
|
|
|
|
bestThread.push_back(bestMtaskp);
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// Update the ready list
|
2021-06-16 13:18:56 +02:00
|
|
|
const size_t erased = readyMTasks.erase(bestMtaskp);
|
2019-07-06 18:57:50 +02:00
|
|
|
UASSERT_OBJ(erased > 0, bestMtaskp, "Should have erased something?");
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphEdge* edgeOutp = bestMtaskp->outBeginp(); edgeOutp;
|
|
|
|
|
edgeOutp = edgeOutp->outNextp()) {
|
2021-09-24 04:59:36 +02:00
|
|
|
ExecMTask* const nextp = dynamic_cast<ExecMTask*>(edgeOutp->top());
|
2021-06-16 13:18:56 +02:00
|
|
|
// Dependent MTask should not yet be assigned to a thread
|
2021-07-06 13:06:00 +02:00
|
|
|
UASSERT(schedule.threadId(nextp) == ThreadSchedule::UNASSIGNED,
|
2018-07-23 02:54:28 +02:00
|
|
|
"Tasks after one being assigned should not be assigned yet");
|
2021-06-16 13:18:56 +02:00
|
|
|
// Dependent MTask should not be ready yet, since dependency is just being assigned
|
|
|
|
|
UASSERT_OBJ(readyMTasks.find(nextp) == readyMTasks.end(), nextp,
|
2019-07-06 18:57:50 +02:00
|
|
|
"Tasks after one being assigned should not be ready");
|
2021-07-06 13:06:00 +02:00
|
|
|
if (isReady(schedule, nextp)) {
|
2021-06-16 13:18:56 +02:00
|
|
|
readyMTasks.insert(nextp);
|
2020-04-15 13:58:34 +02:00
|
|
|
UINFO(6, "Inserted " << nextp->name() << " into ready\n");
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-06-16 13:18:56 +02:00
|
|
|
|
2021-07-06 13:06:00 +02:00
|
|
|
if (debug() >= 4) schedule.dumpDotFilePrefixedAlways("schedule");
|
|
|
|
|
|
2021-06-16 13:18:56 +02:00
|
|
|
return schedule;
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// SELF TEST
|
|
|
|
|
static void selfTest() {
|
|
|
|
|
V3Graph graph;
|
2021-06-16 13:18:56 +02:00
|
|
|
ExecMTask* const t0 = new ExecMTask(&graph, nullptr, 0);
|
2018-07-23 02:54:28 +02:00
|
|
|
t0->cost(1000);
|
|
|
|
|
t0->priority(1100);
|
2021-06-16 13:18:56 +02:00
|
|
|
ExecMTask* const t1 = new ExecMTask(&graph, nullptr, 1);
|
2018-07-23 02:54:28 +02:00
|
|
|
t1->cost(100);
|
|
|
|
|
t1->priority(100);
|
2021-06-16 13:18:56 +02:00
|
|
|
ExecMTask* const t2 = new ExecMTask(&graph, nullptr, 2);
|
2018-07-23 02:54:28 +02:00
|
|
|
t2->cost(100);
|
|
|
|
|
t2->priority(100);
|
|
|
|
|
|
|
|
|
|
new V3GraphEdge(&graph, t0, t1, 1);
|
|
|
|
|
new V3GraphEdge(&graph, t0, t2, 1);
|
|
|
|
|
|
2021-06-16 13:18:56 +02:00
|
|
|
PartPackMTasks packer(2, // Threads
|
2018-07-23 02:54:28 +02:00
|
|
|
3, // Sandbag numerator
|
|
|
|
|
10); // Sandbag denom
|
2021-06-16 13:18:56 +02:00
|
|
|
const ThreadSchedule& schedule = packer.pack(graph);
|
|
|
|
|
|
|
|
|
|
UASSERT_SELFTEST(size_t, schedule.threads.size(), 2);
|
|
|
|
|
|
|
|
|
|
UASSERT_SELFTEST(size_t, schedule.threads[0].size(), 2);
|
|
|
|
|
UASSERT_SELFTEST(size_t, schedule.threads[1].size(), 1);
|
2018-07-23 02:54:28 +02:00
|
|
|
|
2021-06-16 13:18:56 +02:00
|
|
|
UASSERT_SELFTEST(const ExecMTask*, schedule.threads[0][0], t0);
|
|
|
|
|
UASSERT_SELFTEST(const ExecMTask*, schedule.threads[0][1], t1);
|
|
|
|
|
UASSERT_SELFTEST(const ExecMTask*, schedule.threads[1][0], t2);
|
2018-07-23 02:54:28 +02:00
|
|
|
|
2021-07-06 13:06:00 +02:00
|
|
|
UASSERT_SELFTEST(size_t, schedule.mtaskState.size(), 3);
|
2018-07-23 02:54:28 +02:00
|
|
|
|
2021-07-06 13:06:00 +02:00
|
|
|
UASSERT_SELFTEST(uint32_t, schedule.threadId(t0), 0);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, schedule.threadId(t1), 0);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, schedule.threadId(t2), 1);
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// On its native thread, we see the actual end time for t0:
|
2021-07-06 13:06:00 +02:00
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(schedule, t0, 0), 1000);
|
2018-07-23 02:54:28 +02:00
|
|
|
// On the other thread, we see a sandbagged end time which does not
|
|
|
|
|
// exceed the t1 end time:
|
2021-07-06 13:06:00 +02:00
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(schedule, t0, 1), 1099);
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// Actual end time on native thread:
|
2021-07-06 13:06:00 +02:00
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(schedule, t1, 0), 1100);
|
2018-07-23 02:54:28 +02:00
|
|
|
// Sandbagged end time seen on thread 1. Note it does not compound
|
|
|
|
|
// with t0's sandbagged time; compounding caused trouble in
|
|
|
|
|
// practice.
|
2021-07-06 13:06:00 +02:00
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(schedule, t1, 1), 1130);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(schedule, t2, 0), 1229);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(schedule, t2, 1), 1199);
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
VL_DEBUG_FUNC; // Declare debug()
|
|
|
|
|
VL_UNCOPYABLE(PartPackMTasks);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
//######################################################################
|
|
|
|
|
// V3Partition implementation
|
|
|
|
|
|
|
|
|
|
void V3Partition::debugMTaskGraphStats(const V3Graph* graphp, const string& stage) {
|
|
|
|
|
if (!debug()) return;
|
|
|
|
|
|
|
|
|
|
UINFO(4, "\n");
|
2020-04-15 13:58:34 +02:00
|
|
|
UINFO(4, " Stats for " << stage << endl);
|
2018-07-23 02:54:28 +02:00
|
|
|
uint32_t mtaskCount = 0;
|
|
|
|
|
uint32_t totalCost = 0;
|
2020-11-15 22:21:26 +01:00
|
|
|
std::array<uint32_t, 32> mtaskCostHist;
|
|
|
|
|
mtaskCostHist.fill(0);
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
for (const V3GraphVertex* mtaskp = graphp->verticesBeginp(); mtaskp;
|
|
|
|
|
mtaskp = mtaskp->verticesNextp()) {
|
|
|
|
|
++mtaskCount;
|
|
|
|
|
uint32_t mtaskCost = dynamic_cast<const AbstractMTask*>(mtaskp)->cost();
|
|
|
|
|
totalCost += mtaskCost;
|
|
|
|
|
|
|
|
|
|
unsigned log2Cost = 0;
|
|
|
|
|
while (mtaskCost >>= 1) ++log2Cost;
|
|
|
|
|
UASSERT(log2Cost < 32, "log2Cost overflow in debugMTaskGraphStats");
|
|
|
|
|
++mtaskCostHist[log2Cost];
|
|
|
|
|
}
|
2020-04-15 13:58:34 +02:00
|
|
|
UINFO(4, " Total mtask cost = " << totalCost << "\n");
|
|
|
|
|
UINFO(4, " Mtask count = " << mtaskCount << "\n");
|
2018-07-23 02:54:28 +02:00
|
|
|
UINFO(4, " Avg cost / mtask = "
|
2020-04-15 13:58:34 +02:00
|
|
|
<< ((mtaskCount > 0) ? cvtToStr(totalCost / mtaskCount) : "INF!") << "\n");
|
2018-07-23 02:54:28 +02:00
|
|
|
UINFO(4, " Histogram of mtask costs:\n");
|
|
|
|
|
for (unsigned i = 0; i < 32; ++i) {
|
|
|
|
|
if (mtaskCostHist[i]) {
|
2020-04-15 13:58:34 +02:00
|
|
|
UINFO(4, " 2^" << i << ": " << mtaskCostHist[i] << endl);
|
|
|
|
|
V3Stats::addStat("MTask graph, " + stage + ", mtask cost 2^" + (i < 10 ? " " : "")
|
|
|
|
|
+ cvtToStr(i),
|
|
|
|
|
mtaskCostHist[i]);
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (mtaskCount < 1000) {
|
|
|
|
|
string filePrefix("ordermv_");
|
|
|
|
|
filePrefix += stage;
|
|
|
|
|
if (debug() >= 4) graphp->dumpDotFilePrefixedAlways(filePrefix);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Look only at the cost of each mtask, neglect communication cost.
|
|
|
|
|
// This will show us how much parallelism we expect, assuming cache-miss
|
|
|
|
|
// costs are minor and the cost of running logic is the dominant cost.
|
|
|
|
|
PartParallelismEst vertexParEst(graphp);
|
|
|
|
|
vertexParEst.traverse();
|
|
|
|
|
vertexParEst.statsReport(stage);
|
2020-04-15 13:58:34 +02:00
|
|
|
if (debug() >= 4) {
|
2018-07-23 02:54:28 +02:00
|
|
|
UINFO(0, "\n");
|
|
|
|
|
UINFO(0, " Parallelism estimate for based on mtask costs:\n");
|
|
|
|
|
vertexParEst.debugReport();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Print a hash of the shape of graphp. If you are battling
|
|
|
|
|
// nondeterminism, this can help to pinpoint where in the pipeline it's
|
|
|
|
|
// creeping in.
|
|
|
|
|
void V3Partition::hashGraphDebug(const V3Graph* graphp, const char* debugName) {
|
|
|
|
|
// Disabled when there are no nondeterminism issues in flight.
|
|
|
|
|
if (!v3Global.opt.debugNondeterminism()) return;
|
|
|
|
|
|
2020-08-15 16:03:34 +02:00
|
|
|
std::unordered_map<const V3GraphVertex*, uint32_t> vx2Id;
|
2018-07-23 02:54:28 +02:00
|
|
|
unsigned id = 0;
|
2020-04-15 13:58:34 +02:00
|
|
|
for (const V3GraphVertex* vxp = graphp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
|
2018-07-23 02:54:28 +02:00
|
|
|
vx2Id[vxp] = id++;
|
|
|
|
|
}
|
|
|
|
|
unsigned hash = 0;
|
2020-04-15 13:58:34 +02:00
|
|
|
for (const V3GraphVertex* vxp = graphp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
|
|
|
|
|
for (const V3GraphEdge* edgep = vxp->outBeginp(); edgep; edgep = edgep->outNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
const V3GraphVertex* const top = edgep->top();
|
2020-04-16 03:47:37 +02:00
|
|
|
hash = vx2Id[top] + 31U * hash; // The K&R hash function
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
}
|
2020-04-15 13:58:34 +02:00
|
|
|
UINFO(0, "Hash of shape (not contents) of " << debugName << " = " << cvtToStr(hash) << endl);
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void V3Partition::setupMTaskDeps(V3Graph* mtasksp, const Vx2MTaskMap* vx2mtaskp) {
|
|
|
|
|
// Look at each mtask
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphVertex* itp = mtasksp->verticesBeginp(); itp; itp = itp->verticesNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const mtaskp = dynamic_cast<LogicMTask*>(itp);
|
2018-07-23 02:54:28 +02:00
|
|
|
const LogicMTask::VxList* vertexListp = mtaskp->vertexListp();
|
|
|
|
|
|
|
|
|
|
// For each logic vertex in this mtask, create an mtask-to-mtask
|
|
|
|
|
// edge based on the logic-to-logic edge.
|
|
|
|
|
for (LogicMTask::VxList::const_iterator vit = vertexListp->begin();
|
|
|
|
|
vit != vertexListp->end(); ++vit) {
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphEdge* outp = (*vit)->outBeginp(); outp; outp = outp->outNextp()) {
|
2018-07-23 02:54:28 +02:00
|
|
|
UASSERT(outp->weight() > 0, "Mtask not assigned weight");
|
2021-11-03 22:49:19 +01:00
|
|
|
const MTaskMoveVertex* const top = dynamic_cast<MTaskMoveVertex*>(outp->top());
|
2018-07-23 02:54:28 +02:00
|
|
|
UASSERT(top, "MoveVertex not associated to mtask");
|
2020-08-16 17:43:49 +02:00
|
|
|
const auto it = vlstd::as_const(vx2mtaskp)->find(top);
|
2018-07-23 02:54:28 +02:00
|
|
|
UASSERT(it != vx2mtaskp->end(), "MTask map can't find id");
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const otherMTaskp = it->second;
|
2020-08-15 16:12:55 +02:00
|
|
|
UASSERT(otherMTaskp, "nullptr other Mtask");
|
2019-07-06 18:57:50 +02:00
|
|
|
UASSERT_OBJ(otherMTaskp != mtaskp, mtaskp, "Would create a cycle edge");
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// Don't create redundant edges.
|
2020-04-15 13:58:34 +02:00
|
|
|
if (mtaskp->hasRelative(GraphWay::FORWARD, otherMTaskp)) { //
|
2018-07-23 02:54:28 +02:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
new MTaskEdge(mtasksp, mtaskp, otherMTaskp, 1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void V3Partition::go(V3Graph* mtasksp) {
|
|
|
|
|
// Called by V3Order
|
|
|
|
|
hashGraphDebug(m_fineDepsGraphp, "v3partition initial fine-grained deps");
|
|
|
|
|
|
|
|
|
|
// Create the first MTasks. Initially, each MTask just wraps one
|
|
|
|
|
// MTaskMoveVertex. Over time, we'll merge MTasks together and
|
|
|
|
|
// eventually each MTask will wrap a large number of MTaskMoveVertices
|
|
|
|
|
// (and the logic nodes therein.)
|
|
|
|
|
uint32_t totalGraphCost = 0;
|
|
|
|
|
{
|
|
|
|
|
// The V3InstrCount within LogicMTask will set user5 on each AST
|
|
|
|
|
// node, to assert that we never count any node twice.
|
|
|
|
|
AstUser5InUse inUser5;
|
|
|
|
|
Vx2MTaskMap vx2mtask;
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphVertex* vxp = m_fineDepsGraphp->verticesBeginp(); vxp;
|
|
|
|
|
vxp = vxp->verticesNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
MTaskMoveVertex* const mtmvVxp = dynamic_cast<MTaskMoveVertex*>(vxp);
|
2019-07-06 18:57:50 +02:00
|
|
|
UASSERT_OBJ(mtmvVxp, vxp, "Every vertex here should be an MTaskMoveVertex");
|
2018-07-23 02:54:28 +02:00
|
|
|
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const mtaskp = new LogicMTask(mtasksp, mtmvVxp);
|
2018-07-23 02:54:28 +02:00
|
|
|
vx2mtask[mtmvVxp] = mtaskp;
|
|
|
|
|
|
|
|
|
|
totalGraphCost += mtaskp->cost();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create the mtask->mtask dep edges based on vertex deps
|
|
|
|
|
setupMTaskDeps(mtasksp, &vx2mtask);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
V3Partition::debugMTaskGraphStats(mtasksp, "initial");
|
|
|
|
|
|
|
|
|
|
// For debug: print out the longest critical path. This allows us to
|
|
|
|
|
// verify that the costs look reasonable, that we aren't combining
|
|
|
|
|
// nodes that should probably be split, etc.
|
|
|
|
|
if (v3Global.opt.dumpTreeLevel(__FILE__) >= 3) {
|
|
|
|
|
LogicMTask::dumpCpFilePrefixed(mtasksp, "cp");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Merge nodes that could present data hazards; see comment within.
|
|
|
|
|
{
|
|
|
|
|
PartFixDataHazards(mtasksp).go();
|
|
|
|
|
V3Partition::debugMTaskGraphStats(mtasksp, "hazards");
|
|
|
|
|
hashGraphDebug(mtasksp, "mtasksp after fixDataHazards()");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Setup the critical path into and out of each node.
|
|
|
|
|
partInitCriticalPaths(mtasksp);
|
|
|
|
|
hashGraphDebug(mtasksp, "after partInitCriticalPaths()");
|
|
|
|
|
|
|
|
|
|
// Order the graph. We know it's already ranked from fixDataHazards()
|
|
|
|
|
// so we don't need to rank it again.
|
|
|
|
|
//
|
|
|
|
|
// On at least some models, ordering the graph here seems to help
|
|
|
|
|
// performance. (Why? Is it just triggering noise in a lucky direction?
|
|
|
|
|
// Is it just as likely to harm results?)
|
|
|
|
|
//
|
|
|
|
|
// More diversity of models that can build with --threads will
|
|
|
|
|
// eventually tell us. For now keep the order() so we don't forget
|
|
|
|
|
// about it, in case it actually helps. TODO: get more data and maybe
|
|
|
|
|
// remove this later if it doesn't really help.
|
|
|
|
|
mtasksp->orderPreRanked();
|
|
|
|
|
|
2021-06-21 00:32:57 +02:00
|
|
|
const int targetParFactor = v3Global.opt.threads();
|
2021-02-22 03:25:21 +01:00
|
|
|
if (targetParFactor < 2) v3fatalSrc("We should not reach V3Partition when --threads <= 1");
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// Set cpLimit to roughly totalGraphCost / nThreads
|
|
|
|
|
//
|
|
|
|
|
// Actually set it a bit lower, by a hardcoded fudge factor. This
|
|
|
|
|
// results in more smaller mtasks, which helps reduce fragmentation
|
|
|
|
|
// when scheduling them.
|
2021-06-21 00:32:57 +02:00
|
|
|
const unsigned fudgeNumerator = 3;
|
|
|
|
|
const unsigned fudgeDenominator = 5;
|
2020-04-15 13:58:34 +02:00
|
|
|
uint32_t cpLimit = ((totalGraphCost * fudgeNumerator) / (targetParFactor * fudgeDenominator));
|
|
|
|
|
UINFO(4, "V3Partition set cpLimit = " << cpLimit << endl);
|
2018-07-23 02:54:28 +02:00
|
|
|
|
|
|
|
|
// Merge MTask nodes together, repeatedly, until the CP budget is
|
|
|
|
|
// reached. Coarsens the graph, usually by several orders of
|
|
|
|
|
// magnitude.
|
|
|
|
|
//
|
|
|
|
|
// Some tests disable this, hence the test on threadsCoarsen().
|
|
|
|
|
// Coarsening is always enabled in production.
|
|
|
|
|
if (v3Global.opt.threadsCoarsen()) {
|
|
|
|
|
PartContraction(mtasksp, cpLimit,
|
|
|
|
|
// --debugPartition is used by tests
|
|
|
|
|
// to enable slow assertions.
|
2020-04-15 13:58:34 +02:00
|
|
|
v3Global.opt.debugPartition())
|
|
|
|
|
.go();
|
2018-07-23 02:54:28 +02:00
|
|
|
V3Partition::debugMTaskGraphStats(mtasksp, "contraction");
|
|
|
|
|
}
|
|
|
|
|
{
|
|
|
|
|
mtasksp->removeTransitiveEdges();
|
|
|
|
|
V3Partition::debugMTaskGraphStats(mtasksp, "transitive1");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Reassign MTask IDs onto smaller numbers, which should be more stable
|
|
|
|
|
// across small logic changes. Keep MTask IDs in the same relative
|
|
|
|
|
// order though, otherwise we break CmpLogicMTask for still-existing
|
|
|
|
|
// EdgeSet's that haven't destructed yet.
|
|
|
|
|
{
|
2021-03-13 00:10:45 +01:00
|
|
|
using SortedMTaskSet = std::set<LogicMTask*, LogicMTask::CmpLogicMTask>;
|
2018-07-23 02:54:28 +02:00
|
|
|
SortedMTaskSet sorted;
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphVertex* itp = mtasksp->verticesBeginp(); itp; itp = itp->verticesNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const mtaskp = dynamic_cast<LogicMTask*>(itp);
|
2018-07-23 02:54:28 +02:00
|
|
|
sorted.insert(mtaskp);
|
|
|
|
|
}
|
|
|
|
|
uint32_t nextId = 1;
|
2021-03-12 23:26:53 +01:00
|
|
|
for (auto it = sorted.begin(); it != sorted.end(); ++it) {
|
2018-07-23 02:54:28 +02:00
|
|
|
// We shouldn't perturb the sort order of the set, despite
|
|
|
|
|
// changing the IDs, they should all just remain in the same
|
|
|
|
|
// relative order. Confirm that:
|
|
|
|
|
UASSERT(nextId <= (*it)->id(), "Should only shrink MTaskIDs here");
|
2020-04-15 13:58:34 +02:00
|
|
|
UINFO(4, "Reassigning MTask id " << (*it)->id() << " to id " << nextId << "\n");
|
2018-07-23 02:54:28 +02:00
|
|
|
(*it)->id(nextId);
|
2021-11-03 22:49:19 +01:00
|
|
|
++nextId;
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Set color to indicate an mtaskId on every underlying MTaskMoveVertex.
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphVertex* itp = mtasksp->verticesBeginp(); itp; itp = itp->verticesNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
LogicMTask* const mtaskp = dynamic_cast<LogicMTask*>(itp);
|
2020-04-15 13:58:34 +02:00
|
|
|
for (LogicMTask::VxList::const_iterator it = mtaskp->vertexListp()->begin();
|
2018-07-23 02:54:28 +02:00
|
|
|
it != mtaskp->vertexListp()->end(); ++it) {
|
2021-11-03 22:49:19 +01:00
|
|
|
MTaskMoveVertex* const mvertexp = *it;
|
2018-07-23 02:54:28 +02:00
|
|
|
mvertexp->color(mtaskp->id());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-09-27 04:51:11 +02:00
|
|
|
void add(std::unordered_map<int, vluint64_t>& cmap, int id, vluint64_t cost) { cmap[id] += cost; }
|
|
|
|
|
|
|
|
|
|
using EstimateAndProfiled = std::pair<uint64_t, vluint64_t>; // cost est, cost profiled
|
|
|
|
|
using Costs = std::unordered_map<uint32_t, EstimateAndProfiled>;
|
|
|
|
|
|
|
|
|
|
static void normalizeCosts(Costs& costs) {
|
|
|
|
|
const auto scaleCost = [](vluint64_t value, double multiplier) {
|
|
|
|
|
double scaled = static_cast<double>(value) * multiplier;
|
|
|
|
|
if (value && scaled < 1) scaled = 1;
|
|
|
|
|
return static_cast<uint64_t>(scaled);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// For all costs with a profile, compute sum
|
|
|
|
|
vluint64_t sumCostProfiled = 0; // For data with estimate and profile
|
|
|
|
|
vluint64_t sumCostEstimate = 0; // For data with estimate and profile
|
|
|
|
|
for (const auto& est : costs) {
|
|
|
|
|
if (est.second.second) {
|
|
|
|
|
sumCostEstimate += est.second.first;
|
|
|
|
|
sumCostProfiled += est.second.second;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (sumCostEstimate) {
|
|
|
|
|
// For data where we don't have profiled data, compute how much to
|
|
|
|
|
// scale up/down the estimate to make on same relative scale as
|
|
|
|
|
// profiled data. (Improves results if only a few profiles missing.)
|
2021-11-03 22:49:19 +01:00
|
|
|
const double estToProfile
|
2021-09-27 04:51:11 +02:00
|
|
|
= static_cast<double>(sumCostProfiled) / static_cast<double>(sumCostEstimate);
|
|
|
|
|
UINFO(5, "Estimated data needs scaling by "
|
|
|
|
|
<< estToProfile << ", sumCostProfiled=" << sumCostProfiled
|
|
|
|
|
<< " sumCostEstimate=" << sumCostEstimate << endl);
|
|
|
|
|
for (auto& est : costs) {
|
|
|
|
|
uint64_t& costEstimate = est.second.first;
|
|
|
|
|
costEstimate = scaleCost(costEstimate, estToProfile);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// COSTS can overflow a uint32. Using maximum value of costs, scale all down
|
|
|
|
|
vluint64_t maxCost = 0;
|
|
|
|
|
for (auto& est : costs) {
|
|
|
|
|
const uint64_t& costEstimate = est.second.first;
|
|
|
|
|
const uint64_t& costProfiled = est.second.second;
|
|
|
|
|
if (maxCost < costEstimate) maxCost = costEstimate;
|
|
|
|
|
if (maxCost < costProfiled) maxCost = costProfiled;
|
|
|
|
|
UINFO(9,
|
|
|
|
|
"Post uint scale: ce = " << est.second.first << " cp=" << est.second.second << endl);
|
|
|
|
|
}
|
|
|
|
|
vluint64_t scaleDownTo = 10000000; // Extra room for future algorithms to add costs
|
|
|
|
|
if (maxCost > scaleDownTo) {
|
|
|
|
|
const double scaleup = static_cast<double>(scaleDownTo) / static_cast<double>(maxCost);
|
|
|
|
|
UINFO(5, "Scaling data to within 32-bits by multiply by=" << scaleup << ", maxCost="
|
|
|
|
|
<< maxCost << endl);
|
|
|
|
|
for (auto& est : costs) {
|
|
|
|
|
est.second.first = scaleCost(est.second.first, scaleup);
|
|
|
|
|
est.second.second = scaleCost(est.second.second, scaleup);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void V3Partition::selfTestNormalizeCosts() {
|
|
|
|
|
{ // Test that omitted profile data correctly scales estimates
|
|
|
|
|
Costs costs({// id est prof
|
|
|
|
|
{1, {10, 1000}},
|
|
|
|
|
{2, {20, 0}}, // Note no profile
|
|
|
|
|
{3, {30, 3000}}});
|
|
|
|
|
normalizeCosts(costs);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[1].first, 1000);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[1].second, 1000);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[2].first, 2000);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[2].second, 0);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[3].first, 3000);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[3].second, 3000);
|
|
|
|
|
}
|
|
|
|
|
{ // Test that very large profile data properly scales
|
|
|
|
|
Costs costs({// id est prof
|
|
|
|
|
{1, {10, 100000000000}},
|
|
|
|
|
{2, {20, 200000000000}},
|
|
|
|
|
{3, {30, 1}}}); // Make sure doesn't underflow
|
|
|
|
|
normalizeCosts(costs);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[1].first, 2500000);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[1].second, 5000000);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[2].first, 5000000);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[2].second, 10000000);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[3].first, 7500000);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[3].second, 1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void fillinCosts(V3Graph* execMTaskGraphp) {
|
|
|
|
|
V3UniqueNames m_uniqueNames; // For generating unique mtask profile hash names
|
|
|
|
|
|
|
|
|
|
// Pass 1: See what profiling data applies
|
|
|
|
|
Costs costs; // For each mtask, costs
|
|
|
|
|
|
|
|
|
|
for (const V3GraphVertex* vxp = execMTaskGraphp->verticesBeginp(); vxp;
|
|
|
|
|
vxp = vxp->verticesNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
ExecMTask* const mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
|
2021-09-27 04:51:11 +02:00
|
|
|
// Compute name of mtask, for hash lookup
|
|
|
|
|
mtp->hashName(m_uniqueNames.get(mtp->bodyp()));
|
|
|
|
|
|
|
|
|
|
// This estimate is 64 bits, but the final mtask graph algorithm needs 32 bits
|
2021-11-03 22:49:19 +01:00
|
|
|
const vluint64_t costEstimate = V3InstrCount::count(mtp->bodyp(), false);
|
|
|
|
|
const vluint64_t costProfiled
|
|
|
|
|
= V3Config::getProfileData(v3Global.opt.prefix(), mtp->hashName());
|
2021-09-27 04:51:11 +02:00
|
|
|
if (costProfiled) {
|
|
|
|
|
UINFO(5, "Profile data for mtask " << mtp->id() << " " << mtp->hashName()
|
|
|
|
|
<< " cost override " << costProfiled << endl);
|
|
|
|
|
}
|
|
|
|
|
costs[mtp->id()] = std::make_pair(costEstimate, costProfiled);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
normalizeCosts(costs /*ref*/);
|
|
|
|
|
|
|
|
|
|
int totalEstimates = 0;
|
|
|
|
|
int missingProfiles = 0;
|
|
|
|
|
for (const V3GraphVertex* vxp = execMTaskGraphp->verticesBeginp(); vxp;
|
|
|
|
|
vxp = vxp->verticesNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
ExecMTask* const mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
|
2021-09-27 04:51:11 +02:00
|
|
|
const uint32_t costEstimate = costs[mtp->id()].first;
|
|
|
|
|
const uint64_t costProfiled = costs[mtp->id()].second;
|
|
|
|
|
UINFO(9, "ce = " << costEstimate << " cp=" << costProfiled << endl);
|
|
|
|
|
UASSERT(costEstimate <= (1UL << 31), "cost scaling math would overflow uint32");
|
|
|
|
|
UASSERT(costProfiled <= (1UL << 31), "cost scaling math would overflow uint32");
|
|
|
|
|
const uint64_t costProfiled32 = static_cast<uint32_t>(costProfiled);
|
|
|
|
|
uint32_t costToUse = costProfiled32;
|
|
|
|
|
if (!costProfiled32) {
|
|
|
|
|
costToUse = costEstimate;
|
|
|
|
|
if (costEstimate != 0) ++missingProfiles;
|
|
|
|
|
}
|
|
|
|
|
if (costEstimate != 0) ++totalEstimates;
|
|
|
|
|
mtp->cost(costToUse);
|
|
|
|
|
mtp->priority(costToUse);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (missingProfiles) {
|
2021-11-03 22:49:19 +01:00
|
|
|
if (FileLine* const fl = V3Config::getProfileDataFileLine()) {
|
2021-09-27 04:51:11 +02:00
|
|
|
fl->v3warn(PROFOUTOFDATE, "Profile data for mtasks may be out of date. "
|
|
|
|
|
<< missingProfiles << " of " << totalEstimates
|
|
|
|
|
<< " mtasks had no data");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-16 13:18:56 +02:00
|
|
|
static void finalizeCosts(V3Graph* execMTaskGraphp) {
|
2018-07-23 02:54:28 +02:00
|
|
|
GraphStreamUnordered ser(execMTaskGraphp, GraphWay::REVERSE);
|
|
|
|
|
while (const V3GraphVertex* vxp = ser.nextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
ExecMTask* const mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
|
2018-07-23 02:54:28 +02:00
|
|
|
// "Priority" is the critical path from the start of the mtask, to
|
|
|
|
|
// the end of the graph reachable from this mtask. Given the
|
|
|
|
|
// choice among several ready mtasks, we'll want to start the
|
|
|
|
|
// highest priority one first, so we're always working on the "long
|
|
|
|
|
// pole"
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphEdge* edgep = mtp->outBeginp(); edgep; edgep = edgep->outNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
ExecMTask* const followp = dynamic_cast<ExecMTask*>(edgep->top());
|
2018-07-23 02:54:28 +02:00
|
|
|
if ((followp->priority() + mtp->cost()) > mtp->priority()) {
|
|
|
|
|
mtp->priority(followp->priority() + mtp->cost());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Some MTasks may now have zero cost, eliminate those.
|
|
|
|
|
// (It's common for tasks to shrink to nothing when V3LifePost
|
|
|
|
|
// removes dly assignments.)
|
2020-04-15 13:58:34 +02:00
|
|
|
for (V3GraphVertex* vxp = execMTaskGraphp->verticesBeginp(); vxp;) {
|
2021-11-03 22:49:19 +01:00
|
|
|
ExecMTask* const mtp = dynamic_cast<ExecMTask*>(vxp);
|
2018-07-23 02:54:28 +02:00
|
|
|
vxp = vxp->verticesNextp(); // Advance before delete
|
|
|
|
|
|
|
|
|
|
// Don't rely on checking mtp->cost() == 0 to detect an empty task.
|
|
|
|
|
// Our cost-estimating logic is just an estimate. Instead, check
|
|
|
|
|
// the MTaskBody to see if it's empty. That's the source of truth.
|
2021-11-03 22:49:19 +01:00
|
|
|
AstMTaskBody* const bodyp = mtp->bodyp();
|
2018-07-23 02:54:28 +02:00
|
|
|
if (!bodyp->stmtsp()) { // Kill this empty mtask
|
2020-04-15 13:58:34 +02:00
|
|
|
UINFO(6, "Removing zero-cost " << mtp->name() << endl);
|
|
|
|
|
for (V3GraphEdge* inp = mtp->inBeginp(); inp; inp = inp->inNextp()) {
|
|
|
|
|
for (V3GraphEdge* outp = mtp->outBeginp(); outp; outp = outp->outNextp()) {
|
|
|
|
|
new V3GraphEdge(execMTaskGraphp, inp->fromp(), outp->top(), 1);
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
}
|
2020-01-17 02:17:11 +01:00
|
|
|
VL_DO_DANGLING(mtp->unlinkDelete(execMTaskGraphp), mtp);
|
2018-07-23 02:54:28 +02:00
|
|
|
// Also remove and delete the AstMTaskBody, otherwise it would
|
|
|
|
|
// keep a dangling pointer to the ExecMTask.
|
2020-01-17 02:17:11 +01:00
|
|
|
VL_DO_DANGLING(bodyp->unlinkFrBack()->deleteTree(), bodyp);
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-09-27 04:51:11 +02:00
|
|
|
// Assign profiler IDs
|
|
|
|
|
vluint64_t profilerId = 0;
|
|
|
|
|
for (const V3GraphVertex* vxp = execMTaskGraphp->verticesBeginp(); vxp;
|
|
|
|
|
vxp = vxp->verticesNextp()) {
|
2021-11-03 22:49:19 +01:00
|
|
|
ExecMTask* const mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
|
2021-09-27 04:51:11 +02:00
|
|
|
mtp->profilerId(profilerId++);
|
|
|
|
|
}
|
|
|
|
|
|
2018-07-23 02:54:28 +02:00
|
|
|
// Removing tasks may cause edges that were formerly non-transitive to
|
|
|
|
|
// become transitive. Also we just created new edges around the removed
|
|
|
|
|
// tasks, which could be transitive. Prune out all transitive edges.
|
|
|
|
|
{
|
|
|
|
|
execMTaskGraphp->removeTransitiveEdges();
|
2020-04-15 13:58:34 +02:00
|
|
|
V3Partition::debugMTaskGraphStats(execMTaskGraphp, "transitive2");
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Record summary stats for final m_tasks graph.
|
|
|
|
|
// (More verbose stats are available with --debugi-V3Partition >= 3.)
|
|
|
|
|
PartParallelismEst parEst(execMTaskGraphp);
|
|
|
|
|
parEst.traverse();
|
|
|
|
|
parEst.statsReport("final");
|
|
|
|
|
if (debug() >= 3) {
|
2020-04-15 13:58:34 +02:00
|
|
|
UINFO(0, " Final mtask parallelism report:\n");
|
2018-07-23 02:54:28 +02:00
|
|
|
parEst.debugReport();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-16 13:18:56 +02:00
|
|
|
static void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t threadId,
|
|
|
|
|
AstCFunc* funcp, const ExecMTask* mtaskp) {
|
|
|
|
|
AstNodeModule* const modp = v3Global.rootp()->topModulep();
|
|
|
|
|
FileLine* const fl = modp->fileline();
|
|
|
|
|
|
|
|
|
|
// Helper function to make the code a bit more legible
|
|
|
|
|
const auto addStrStmt = [=](const string& stmt) -> void { //
|
|
|
|
|
funcp->addStmtsp(new AstCStmt(fl, stmt));
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (const uint32_t nDependencies = schedule.crossThreadDependencies(mtaskp)) {
|
|
|
|
|
// This mtask has dependencies executed on another thread, so it may block. Create the task
|
|
|
|
|
// state variable and wait to be notified.
|
|
|
|
|
const string name = "__Vm_mtaskstate_" + cvtToStr(mtaskp->id());
|
|
|
|
|
AstBasicDType* const mtaskStateDtypep
|
|
|
|
|
= v3Global.rootp()->typeTablep()->findBasicDType(fl, AstBasicDTypeKwd::MTASKSTATE);
|
|
|
|
|
AstVar* const varp = new AstVar(fl, AstVarType::MODULETEMP, name, mtaskStateDtypep);
|
|
|
|
|
varp->valuep(new AstConst(fl, nDependencies));
|
|
|
|
|
varp->protect(false); // Do not protect as we still have references in AstText
|
|
|
|
|
modp->addStmtp(varp);
|
|
|
|
|
// For now, reference is still via text bashing
|
|
|
|
|
addStrStmt("vlSelf->" + name + +".waitUntilUpstreamDone(even_cycle);\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
string recName;
|
|
|
|
|
if (v3Global.opt.profThreads()) {
|
|
|
|
|
recName = "__Vprfthr_" + cvtToStr(mtaskp->id());
|
|
|
|
|
addStrStmt("VlProfileRec* " + recName + " = nullptr;\n");
|
|
|
|
|
// Leave this if() here, as don't want to call VL_RDTSC_Q unless profiling
|
Introduce model interface class, make $root part or Syms (#3036)
This patch implements #3032. Verilator creates a module representing the
SystemVerilog $root scope (V3LinkLevel::wrapTop). Until now, this was
called the "TOP" module, which also acted as the user instantiated model
class. Syms used to hold a pointer to this root module, but hold
instances of any submodule. This patch renames this root scope module
from "TOP" to "$root", and introduces a separate model class which is
now an interface class. As the root module is no longer the user
interface class, it can now be made an instance of Syms, just like any
other submodule. This allows absolute references into the root module to
avoid an additional pointer indirection resulting in a potential speedup
(about 1.5% on OpenTitan). The model class now also contains all non
design specific generated code (e.g.: eval loops, trace config, etc),
which additionally simplifies Verilator internals.
Please see the updated documentation for the model interface changes.
2021-06-21 16:30:20 +02:00
|
|
|
addStrStmt("if (VL_UNLIKELY(vlSymsp->__Vm_profile_cycle_start)) {\n" + //
|
|
|
|
|
recName + " = vlSymsp->__Vm_threadPoolp->profileAppend();\n" + //
|
2021-09-24 01:43:02 +02:00
|
|
|
recName + "->startRecord(VL_RDTSC_Q()," + //
|
2021-06-16 13:18:56 +02:00
|
|
|
" " + cvtToStr(mtaskp->id()) + "," + //
|
2021-09-24 04:59:36 +02:00
|
|
|
" " + cvtToStr(mtaskp->predictStart()) + "," + //
|
2021-06-16 13:18:56 +02:00
|
|
|
" " + cvtToStr(mtaskp->cost()) + ");\n" + //
|
|
|
|
|
"}\n");
|
|
|
|
|
}
|
2021-09-27 04:51:11 +02:00
|
|
|
if (v3Global.opt.profThreads()) {
|
|
|
|
|
// No lock around startCounter, as counter numbers are unique per thread
|
|
|
|
|
addStrStmt("vlSymsp->_vm_profiler.startCounter(" + cvtToStr(mtaskp->profilerId())
|
|
|
|
|
+ ");\n");
|
|
|
|
|
}
|
2021-06-16 13:18:56 +02:00
|
|
|
|
|
|
|
|
//
|
|
|
|
|
addStrStmt("Verilated::mtaskId(" + cvtToStr(mtaskp->id()) + ");\n");
|
|
|
|
|
|
|
|
|
|
// Move the the actual body of calls to leaf functions into this function
|
|
|
|
|
funcp->addStmtsp(mtaskp->bodyp()->unlinkFrBack());
|
|
|
|
|
|
2021-09-27 04:51:11 +02:00
|
|
|
if (v3Global.opt.profThreads()) {
|
|
|
|
|
// No lock around stopCounter, as counter numbers are unique per thread
|
|
|
|
|
addStrStmt("vlSymsp->_vm_profiler.stopCounter(" + cvtToStr(mtaskp->profilerId()) + ");\n");
|
|
|
|
|
}
|
2021-06-16 13:18:56 +02:00
|
|
|
if (v3Global.opt.profThreads()) {
|
2021-09-24 04:59:36 +02:00
|
|
|
addStrStmt("if (VL_UNLIKELY(" + recName + ")) " //
|
|
|
|
|
+ recName + "->endRecord(VL_RDTSC_Q());\n");
|
2021-06-16 13:18:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Flush message queue
|
|
|
|
|
addStrStmt("Verilated::endOfThreadMTask(vlSymsp->__Vm_evalMsgQp);\n");
|
|
|
|
|
|
|
|
|
|
// For any dependent mtask that's on another thread, signal one dependency completion.
|
|
|
|
|
for (V3GraphEdge* edgep = mtaskp->outBeginp(); edgep; edgep = edgep->outNextp()) {
|
|
|
|
|
const ExecMTask* const nextp = dynamic_cast<ExecMTask*>(edgep->top());
|
2021-07-06 13:06:00 +02:00
|
|
|
if (schedule.threadId(nextp) != threadId) {
|
2021-06-16 13:18:56 +02:00
|
|
|
addStrStmt("vlSelf->__Vm_mtaskstate_" + cvtToStr(nextp->id())
|
|
|
|
|
+ ".signalUpstreamDone(even_cycle);\n");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static const std::vector<AstCFunc*> createThreadFunctions(const ThreadSchedule& schedule) {
|
|
|
|
|
AstNodeModule* const modp = v3Global.rootp()->topModulep();
|
|
|
|
|
FileLine* const fl = modp->fileline();
|
|
|
|
|
|
|
|
|
|
std::vector<AstCFunc*> funcps;
|
|
|
|
|
|
|
|
|
|
// For each thread, create a function representing its entry point
|
|
|
|
|
for (const std::vector<const ExecMTask*>& thread : schedule.threads) {
|
|
|
|
|
if (thread.empty()) continue;
|
2021-07-06 13:06:00 +02:00
|
|
|
const uint32_t threadId = schedule.threadId(thread.front());
|
2021-06-16 13:18:56 +02:00
|
|
|
string name = "__Vthread_";
|
|
|
|
|
name += cvtToStr(threadId);
|
|
|
|
|
AstCFunc* const funcp = new AstCFunc(fl, name, nullptr, "void");
|
|
|
|
|
modp->addStmtp(funcp);
|
|
|
|
|
funcps.push_back(funcp);
|
|
|
|
|
funcp->isStatic(true); // Uses void self pointer, so static and hand rolled
|
|
|
|
|
funcp->isLoose(true);
|
|
|
|
|
funcp->entryPoint(true);
|
|
|
|
|
funcp->argTypes("void* voidSelf, bool even_cycle");
|
|
|
|
|
|
|
|
|
|
// Setup vlSelf an vlSyms
|
2021-07-12 00:42:01 +02:00
|
|
|
funcp->addStmtsp(new AstCStmt{fl, EmitCBaseVisitor::voidSelfAssign(modp)});
|
|
|
|
|
funcp->addStmtsp(new AstCStmt{fl, EmitCBaseVisitor::symClassAssign()});
|
2021-06-16 13:18:56 +02:00
|
|
|
|
|
|
|
|
// Invoke each mtask scheduled to this thread from the thread function
|
|
|
|
|
for (const ExecMTask* const mtaskp : thread) {
|
|
|
|
|
addMTaskToFunction(schedule, threadId, funcp, mtaskp);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Unblock the fake "final" mtask when this thread is finished
|
|
|
|
|
funcp->addStmtsp(
|
|
|
|
|
new AstCStmt(fl, "vlSelf->__Vm_mtaskstate_final.signalUpstreamDone(even_cycle);\n"));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create the fake "final" mtask state variable
|
|
|
|
|
AstBasicDType* const mtaskStateDtypep
|
|
|
|
|
= v3Global.rootp()->typeTablep()->findBasicDType(fl, AstBasicDTypeKwd::MTASKSTATE);
|
|
|
|
|
AstVar* const varp
|
|
|
|
|
= new AstVar(fl, AstVarType::MODULETEMP, "__Vm_mtaskstate_final", mtaskStateDtypep);
|
|
|
|
|
varp->valuep(new AstConst(fl, funcps.size()));
|
|
|
|
|
varp->protect(false); // Do not protect as we still have references in AstText
|
|
|
|
|
modp->addStmtp(varp);
|
|
|
|
|
|
|
|
|
|
return funcps;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void addThreadStartToExecGraph(AstExecGraph* const execGraphp,
|
|
|
|
|
const std::vector<AstCFunc*>& funcps) {
|
|
|
|
|
// FileLine used for constructing nodes below
|
|
|
|
|
FileLine* const fl = v3Global.rootp()->fileline();
|
|
|
|
|
|
|
|
|
|
// Add thread function invocations to execGraph
|
|
|
|
|
const auto addStrStmt = [=](const string& stmt) -> void { //
|
|
|
|
|
execGraphp->addStmtsp(new AstCStmt(fl, stmt));
|
|
|
|
|
};
|
|
|
|
|
const auto addTextStmt = [=](const string& text) -> void {
|
|
|
|
|
execGraphp->addStmtsp(new AstText(fl, text, /* tracking: */ true));
|
|
|
|
|
};
|
|
|
|
|
|
Introduce model interface class, make $root part or Syms (#3036)
This patch implements #3032. Verilator creates a module representing the
SystemVerilog $root scope (V3LinkLevel::wrapTop). Until now, this was
called the "TOP" module, which also acted as the user instantiated model
class. Syms used to hold a pointer to this root module, but hold
instances of any submodule. This patch renames this root scope module
from "TOP" to "$root", and introduces a separate model class which is
now an interface class. As the root module is no longer the user
interface class, it can now be made an instance of Syms, just like any
other submodule. This allows absolute references into the root module to
avoid an additional pointer indirection resulting in a potential speedup
(about 1.5% on OpenTitan). The model class now also contains all non
design specific generated code (e.g.: eval loops, trace config, etc),
which additionally simplifies Verilator internals.
Please see the updated documentation for the model interface changes.
2021-06-21 16:30:20 +02:00
|
|
|
addStrStmt("vlSymsp->__Vm_even_cycle = !vlSymsp->__Vm_even_cycle;\n");
|
2021-06-16 13:18:56 +02:00
|
|
|
|
|
|
|
|
const uint32_t last = funcps.size() - 1;
|
|
|
|
|
for (uint32_t i = 0; i <= last; ++i) {
|
|
|
|
|
AstCFunc* const funcp = funcps.at(i);
|
|
|
|
|
if (i != last) {
|
|
|
|
|
// The first N-1 will run on the thread pool.
|
Introduce model interface class, make $root part or Syms (#3036)
This patch implements #3032. Verilator creates a module representing the
SystemVerilog $root scope (V3LinkLevel::wrapTop). Until now, this was
called the "TOP" module, which also acted as the user instantiated model
class. Syms used to hold a pointer to this root module, but hold
instances of any submodule. This patch renames this root scope module
from "TOP" to "$root", and introduces a separate model class which is
now an interface class. As the root module is no longer the user
interface class, it can now be made an instance of Syms, just like any
other submodule. This allows absolute references into the root module to
avoid an additional pointer indirection resulting in a potential speedup
(about 1.5% on OpenTitan). The model class now also contains all non
design specific generated code (e.g.: eval loops, trace config, etc),
which additionally simplifies Verilator internals.
Please see the updated documentation for the model interface changes.
2021-06-21 16:30:20 +02:00
|
|
|
addTextStmt("vlSymsp->__Vm_threadPoolp->workerp(" + cvtToStr(i) + ")->addTask(");
|
2021-06-16 13:18:56 +02:00
|
|
|
execGraphp->addStmtsp(new AstAddrOfCFunc(fl, funcp));
|
Introduce model interface class, make $root part or Syms (#3036)
This patch implements #3032. Verilator creates a module representing the
SystemVerilog $root scope (V3LinkLevel::wrapTop). Until now, this was
called the "TOP" module, which also acted as the user instantiated model
class. Syms used to hold a pointer to this root module, but hold
instances of any submodule. This patch renames this root scope module
from "TOP" to "$root", and introduces a separate model class which is
now an interface class. As the root module is no longer the user
interface class, it can now be made an instance of Syms, just like any
other submodule. This allows absolute references into the root module to
avoid an additional pointer indirection resulting in a potential speedup
(about 1.5% on OpenTitan). The model class now also contains all non
design specific generated code (e.g.: eval loops, trace config, etc),
which additionally simplifies Verilator internals.
Please see the updated documentation for the model interface changes.
2021-06-21 16:30:20 +02:00
|
|
|
addTextStmt(", vlSelf, vlSymsp->__Vm_even_cycle);\n");
|
2021-06-16 13:18:56 +02:00
|
|
|
} else {
|
|
|
|
|
// The last will run on the main thread.
|
|
|
|
|
AstCCall* const callp = new AstCCall(fl, funcp);
|
Introduce model interface class, make $root part or Syms (#3036)
This patch implements #3032. Verilator creates a module representing the
SystemVerilog $root scope (V3LinkLevel::wrapTop). Until now, this was
called the "TOP" module, which also acted as the user instantiated model
class. Syms used to hold a pointer to this root module, but hold
instances of any submodule. This patch renames this root scope module
from "TOP" to "$root", and introduces a separate model class which is
now an interface class. As the root module is no longer the user
interface class, it can now be made an instance of Syms, just like any
other submodule. This allows absolute references into the root module to
avoid an additional pointer indirection resulting in a potential speedup
(about 1.5% on OpenTitan). The model class now also contains all non
design specific generated code (e.g.: eval loops, trace config, etc),
which additionally simplifies Verilator internals.
Please see the updated documentation for the model interface changes.
2021-06-21 16:30:20 +02:00
|
|
|
callp->argTypes("vlSelf, vlSymsp->__Vm_even_cycle");
|
2021-06-16 13:18:56 +02:00
|
|
|
execGraphp->addStmtsp(callp);
|
|
|
|
|
addStrStmt("Verilated::mtaskId(0);\n");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
Introduce model interface class, make $root part or Syms (#3036)
This patch implements #3032. Verilator creates a module representing the
SystemVerilog $root scope (V3LinkLevel::wrapTop). Until now, this was
called the "TOP" module, which also acted as the user instantiated model
class. Syms used to hold a pointer to this root module, but hold
instances of any submodule. This patch renames this root scope module
from "TOP" to "$root", and introduces a separate model class which is
now an interface class. As the root module is no longer the user
interface class, it can now be made an instance of Syms, just like any
other submodule. This allows absolute references into the root module to
avoid an additional pointer indirection resulting in a potential speedup
(about 1.5% on OpenTitan). The model class now also contains all non
design specific generated code (e.g.: eval loops, trace config, etc),
which additionally simplifies Verilator internals.
Please see the updated documentation for the model interface changes.
2021-06-21 16:30:20 +02:00
|
|
|
addStrStmt("vlSelf->__Vm_mtaskstate_final.waitUntilUpstreamDone(vlSymsp->__Vm_even_cycle);\n");
|
2021-06-16 13:18:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void implementExecGraph(AstExecGraph* const execGraphp) {
|
|
|
|
|
// Nothing to be done if there are no MTasks in the graph at all.
|
|
|
|
|
if (execGraphp->depGraphp()->empty()) return;
|
|
|
|
|
|
|
|
|
|
// Schedule the mtasks: statically associate each mtask with a thread,
|
|
|
|
|
// and determine the order in which each thread will runs its mtasks.
|
|
|
|
|
const ThreadSchedule& schedule = PartPackMTasks().pack(*execGraphp->mutableDepGraphp());
|
|
|
|
|
|
|
|
|
|
// Create a function to be run by each thread. Note this moves all AstMTaskBody nodes form the
|
|
|
|
|
// AstExecGrap into the AstCFunc created
|
|
|
|
|
const std::vector<AstCFunc*>& funcps = createThreadFunctions(schedule);
|
|
|
|
|
UASSERT(!funcps.empty(), "Non-empty ExecGraph yields no threads?");
|
|
|
|
|
|
|
|
|
|
// Start the thread functions at the point this AstExecGraph is located in the tree.
|
|
|
|
|
addThreadStartToExecGraph(execGraphp, funcps);
|
|
|
|
|
}
|
|
|
|
|
|
2018-07-23 02:54:28 +02:00
|
|
|
void V3Partition::finalize() {
|
|
|
|
|
// Called by Verilator top stage
|
2021-06-16 13:18:56 +02:00
|
|
|
AstExecGraph* const execGraphp = v3Global.rootp()->execGraphp();
|
2018-07-23 02:54:28 +02:00
|
|
|
UASSERT(execGraphp, "Couldn't find AstExecGraph singleton.");
|
|
|
|
|
|
|
|
|
|
// Back in V3Order, we partitioned mtasks using provisional cost
|
|
|
|
|
// estimates. However, V3Order precedes some optimizations (notably
|
|
|
|
|
// V3LifePost) that can change the cost of logic within each mtask.
|
|
|
|
|
// Now that logic is final, recompute the cost and priority of each
|
|
|
|
|
// ExecMTask.
|
2021-09-27 04:51:11 +02:00
|
|
|
fillinCosts(execGraphp->mutableDepGraphp());
|
2018-07-23 02:54:28 +02:00
|
|
|
finalizeCosts(execGraphp->mutableDepGraphp());
|
|
|
|
|
|
2021-09-27 04:51:11 +02:00
|
|
|
// Replace the graph body with its multi-threaded implementation.
|
2021-06-16 13:18:56 +02:00
|
|
|
implementExecGraph(execGraphp);
|
2018-07-23 02:54:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void V3Partition::selfTest() {
|
|
|
|
|
PartPropagateCpSelfTest::selfTest();
|
|
|
|
|
PartPackMTasks::selfTest();
|
|
|
|
|
PartContraction::selfTest();
|
|
|
|
|
}
|