Factor out graph parallelism report into a generic algorithm (#4957)
This is a generic algorithm parametrised by a cost function, so implement it as such for easy reuse.
This commit is contained in:
parent
1481d34959
commit
a686e547cf
|
|
@ -26,6 +26,7 @@
|
||||||
#include "V3ThreadSafety.h"
|
#include "V3ThreadSafety.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
class FileLine;
|
class FileLine;
|
||||||
class V3Graph;
|
class V3Graph;
|
||||||
|
|
@ -182,6 +183,38 @@ public:
|
||||||
void dumpEdges(std::ostream& os, const V3GraphVertex* vertexp) const VL_MT_DISABLED;
|
void dumpEdges(std::ostream& os, const V3GraphVertex* vertexp) const VL_MT_DISABLED;
|
||||||
static void selfTest() VL_MT_DISABLED;
|
static void selfTest() VL_MT_DISABLED;
|
||||||
|
|
||||||
|
class ParallelismReport final {
|
||||||
|
friend class GraphAlgParallelismReport;
|
||||||
|
// Total cost of evaluating the whole graph. The ratio of m_totalGraphCost to
|
||||||
|
// m_criticalPathCost gives us an estimate of the parallelizability of this graph which is
|
||||||
|
// only as good as the guess returned by vertexCost.
|
||||||
|
uint32_t m_totalGraphCost = 0;
|
||||||
|
|
||||||
|
// Cost of the critical path, in abstract units (the same units returned by the vertexCost)
|
||||||
|
uint32_t m_criticalPathCost = 0;
|
||||||
|
|
||||||
|
size_t m_vertexCount = 0; // Number of vertexes in the graph
|
||||||
|
size_t m_edgeCount = 0; // Number of edges in the grap
|
||||||
|
|
||||||
|
ParallelismReport() = default;
|
||||||
|
|
||||||
|
public:
|
||||||
|
~ParallelismReport() = default;
|
||||||
|
ParallelismReport(const ParallelismReport&) = default;
|
||||||
|
ParallelismReport& operator=(const ParallelismReport&) = default;
|
||||||
|
|
||||||
|
uint32_t totalGraphCost() const { return m_totalGraphCost; }
|
||||||
|
uint32_t criticalPathCost() const { return m_criticalPathCost; }
|
||||||
|
size_t vertexCount() const { return m_vertexCount; }
|
||||||
|
size_t edgeCount() const { return m_edgeCount; }
|
||||||
|
double parallelismFactor() const {
|
||||||
|
return (static_cast<double>(m_totalGraphCost) / m_criticalPathCost);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
ParallelismReport parallelismReport(
|
||||||
|
std::function<uint32_t(const V3GraphVertex*)> vertexCost) const VL_MT_DISABLED;
|
||||||
|
|
||||||
// CALLBACKS
|
// CALLBACKS
|
||||||
virtual void loopsMessageCb(V3GraphVertex* vertexp) VL_MT_DISABLED;
|
virtual void loopsMessageCb(V3GraphVertex* vertexp) VL_MT_DISABLED;
|
||||||
virtual void loopsVertexCb(V3GraphVertex* vertexp) VL_MT_DISABLED;
|
virtual void loopsVertexCb(V3GraphVertex* vertexp) VL_MT_DISABLED;
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,8 @@
|
||||||
|
|
||||||
#include "V3Global.h"
|
#include "V3Global.h"
|
||||||
#include "V3GraphPathChecker.h"
|
#include "V3GraphPathChecker.h"
|
||||||
|
#include "V3GraphStream.h"
|
||||||
|
#include "V3Stats.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <list>
|
#include <list>
|
||||||
|
|
@ -515,3 +517,59 @@ double V3Graph::orderDFSIterate(V3GraphVertex* vertexp) {
|
||||||
vertexp->user(2);
|
vertexp->user(2);
|
||||||
return vertexp->fanout();
|
return vertexp->fanout();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//######################################################################
|
||||||
|
//######################################################################
|
||||||
|
// Algorithms - parallelism report
|
||||||
|
|
||||||
|
class GraphAlgParallelismReport final {
|
||||||
|
// MEMBERS
|
||||||
|
const V3Graph& m_graph; // The graph
|
||||||
|
const std::function<uint32_t(const V3GraphVertex*)> m_vertexCost; // vertex cost function
|
||||||
|
V3Graph::ParallelismReport m_report; // The result report
|
||||||
|
|
||||||
|
// CONSTRUCTORS
|
||||||
|
explicit GraphAlgParallelismReport(const V3Graph& graph,
|
||||||
|
std::function<uint32_t(const V3GraphVertex*)> vertexCost)
|
||||||
|
: m_graph{graph}
|
||||||
|
, m_vertexCost{vertexCost} {
|
||||||
|
// For each node, record the critical path cost from the start
|
||||||
|
// of the graph through the end of the node.
|
||||||
|
std::unordered_map<const V3GraphVertex*, uint32_t> critPaths;
|
||||||
|
GraphStreamUnordered serialize{&m_graph};
|
||||||
|
for (const V3GraphVertex* vertexp; (vertexp = serialize.nextp());) {
|
||||||
|
++m_report.m_vertexCount;
|
||||||
|
uint32_t cpCostToHere = 0;
|
||||||
|
for (V3GraphEdge* edgep = vertexp->inBeginp(); edgep; edgep = edgep->inNextp()) {
|
||||||
|
++m_report.m_edgeCount;
|
||||||
|
// For each upstream item, add its critical path cost to
|
||||||
|
// the cost of this edge, to form a new candidate critical
|
||||||
|
// path cost to the current node. Whichever is largest is
|
||||||
|
// the critical path to reach the start of this node.
|
||||||
|
cpCostToHere = std::max(cpCostToHere, critPaths[edgep->fromp()]);
|
||||||
|
}
|
||||||
|
// Include the cost of the current vertex in the critical
|
||||||
|
// path, so it represents the critical path to the end of
|
||||||
|
// this vertex.
|
||||||
|
cpCostToHere += m_vertexCost(vertexp);
|
||||||
|
critPaths[vertexp] = cpCostToHere;
|
||||||
|
m_report.m_criticalPathCost = std::max(m_report.m_criticalPathCost, cpCostToHere);
|
||||||
|
// Tally the total cost contributed by vertices.
|
||||||
|
m_report.m_totalGraphCost += m_vertexCost(vertexp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
~GraphAlgParallelismReport() = default;
|
||||||
|
VL_UNCOPYABLE(GraphAlgParallelismReport);
|
||||||
|
VL_UNMOVABLE(GraphAlgParallelismReport);
|
||||||
|
|
||||||
|
public:
|
||||||
|
static V3Graph::ParallelismReport
|
||||||
|
apply(const V3Graph& graph, std::function<uint32_t(const V3GraphVertex*)> vertexCost) {
|
||||||
|
return GraphAlgParallelismReport(graph, vertexCost).m_report;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
V3Graph::ParallelismReport
|
||||||
|
V3Graph::parallelismReport(std::function<uint32_t(const V3GraphVertex*)> vertexCost) const {
|
||||||
|
return GraphAlgParallelismReport::apply(*this, vertexCost);
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -176,6 +176,7 @@ using EdgeHeap = PairingHeap<EdgeKey>;
|
||||||
// LogicMTask
|
// LogicMTask
|
||||||
|
|
||||||
class LogicMTask final : public AbstractLogicMTask {
|
class LogicMTask final : public AbstractLogicMTask {
|
||||||
|
VL_RTTI_IMPL(LogicMTask, AbstractLogicMTask)
|
||||||
template <GraphWay::en T_Way>
|
template <GraphWay::en T_Way>
|
||||||
friend class PartPropagateCp;
|
friend class PartPropagateCp;
|
||||||
|
|
||||||
|
|
@ -730,87 +731,6 @@ void MergeCandidate::rescore() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//######################################################################
|
|
||||||
// PartParallelismEst - Estimate parallelism of graph
|
|
||||||
|
|
||||||
class PartParallelismEst final {
|
|
||||||
// MEMBERS
|
|
||||||
const V3Graph* const m_graphp; // Mtask-containing graph
|
|
||||||
|
|
||||||
// Total cost of evaluating the whole graph.
|
|
||||||
// The ratio of m_totalGraphCost to longestCpCost gives us an estimate
|
|
||||||
// of the parallelizability of this graph which is only as good as the
|
|
||||||
// guess returned by LogicMTask::cost().
|
|
||||||
uint32_t m_totalGraphCost = 0;
|
|
||||||
|
|
||||||
// Cost of the longest critical path, in abstract units (the same units
|
|
||||||
// returned by the vertexCost)
|
|
||||||
uint32_t m_longestCpCost = 0;
|
|
||||||
|
|
||||||
size_t m_vertexCount = 0; // Number of vertexes calculated
|
|
||||||
size_t m_edgeCount = 0; // Number of edges calculated
|
|
||||||
|
|
||||||
public:
|
|
||||||
// CONSTRUCTORS
|
|
||||||
explicit PartParallelismEst(const V3Graph* graphp)
|
|
||||||
: m_graphp{graphp} {}
|
|
||||||
|
|
||||||
// METHODS
|
|
||||||
uint32_t totalGraphCost() const { return m_totalGraphCost; }
|
|
||||||
uint32_t longestCritPathCost() const { return m_longestCpCost; }
|
|
||||||
size_t vertexCount() const { return m_vertexCount; }
|
|
||||||
size_t edgeCount() const { return m_edgeCount; }
|
|
||||||
double parallelismFactor() const {
|
|
||||||
return (static_cast<double>(m_totalGraphCost) / m_longestCpCost);
|
|
||||||
}
|
|
||||||
void traverse() {
|
|
||||||
// For each node, record the critical path cost from the start
|
|
||||||
// of the graph through the end of the node.
|
|
||||||
std::unordered_map<const V3GraphVertex*, uint32_t> critPaths;
|
|
||||||
GraphStreamUnordered serialize{m_graphp};
|
|
||||||
for (const V3GraphVertex* vertexp; (vertexp = serialize.nextp());) {
|
|
||||||
++m_vertexCount;
|
|
||||||
uint32_t cpCostToHere = 0;
|
|
||||||
for (V3GraphEdge* edgep = vertexp->inBeginp(); edgep; edgep = edgep->inNextp()) {
|
|
||||||
++m_edgeCount;
|
|
||||||
// For each upstream item, add its critical path cost to
|
|
||||||
// the cost of this edge, to form a new candidate critical
|
|
||||||
// path cost to the current node. Whichever is largest is
|
|
||||||
// the critical path to reach the start of this node.
|
|
||||||
cpCostToHere = std::max(cpCostToHere, critPaths[edgep->fromp()]);
|
|
||||||
}
|
|
||||||
// Include the cost of the current vertex in the critical
|
|
||||||
// path, so it represents the critical path to the end of
|
|
||||||
// this vertex.
|
|
||||||
cpCostToHere += vertexCost(vertexp);
|
|
||||||
critPaths[vertexp] = cpCostToHere;
|
|
||||||
m_longestCpCost = std::max(m_longestCpCost, cpCostToHere);
|
|
||||||
// Tally the total cost contributed by vertices.
|
|
||||||
m_totalGraphCost += vertexCost(vertexp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void statsReport(const string& stage) const {
|
|
||||||
V3Stats::addStat("MTask graph, " + stage + ", critical path cost", m_longestCpCost);
|
|
||||||
V3Stats::addStat("MTask graph, " + stage + ", total graph cost", m_totalGraphCost);
|
|
||||||
V3Stats::addStat("MTask graph, " + stage + ", mtask count", m_vertexCount);
|
|
||||||
V3Stats::addStat("MTask graph, " + stage + ", edge count", m_edgeCount);
|
|
||||||
V3Stats::addStat("MTask graph, " + stage + ", parallelism factor", parallelismFactor());
|
|
||||||
}
|
|
||||||
void debugReport() const {
|
|
||||||
UINFO(0, " Critical path cost = " << m_longestCpCost << endl);
|
|
||||||
UINFO(0, " Total graph cost = " << m_totalGraphCost << endl);
|
|
||||||
UINFO(0, " MTask vertex count = " << m_vertexCount << endl);
|
|
||||||
UINFO(0, " Edge count = " << m_edgeCount << endl);
|
|
||||||
UINFO(0, " Parallelism factor = " << parallelismFactor() << endl);
|
|
||||||
}
|
|
||||||
static uint32_t vertexCost(const V3GraphVertex* vertexp) {
|
|
||||||
return vertexp->as<const AbstractMTask>()->cost();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
VL_UNCOPYABLE(PartParallelismEst);
|
|
||||||
};
|
|
||||||
|
|
||||||
//######################################################################
|
//######################################################################
|
||||||
|
|
||||||
// Look at vertex costs (in one way) to form critical paths for each
|
// Look at vertex costs (in one way) to form critical paths for each
|
||||||
|
|
@ -1758,20 +1678,13 @@ private:
|
||||||
chain_len * 2, nullptr, nullptr, false /* slowAsserts */};
|
chain_len * 2, nullptr, nullptr, false /* slowAsserts */};
|
||||||
ec.go();
|
ec.go();
|
||||||
|
|
||||||
PartParallelismEst check{&mtasks};
|
// All vertices should merge into one
|
||||||
check.traverse();
|
UASSERT_SELFTEST(
|
||||||
|
bool, mtasks.verticesBeginp() && !mtasks.verticesBeginp()->verticesNextp(), true);
|
||||||
|
|
||||||
const uint64_t endUsecs = V3Os::timeUsecs();
|
const uint64_t endUsecs = V3Os::timeUsecs();
|
||||||
const uint64_t elapsedUsecs = endUsecs - startUsecs;
|
const uint64_t elapsedUsecs = endUsecs - startUsecs;
|
||||||
|
|
||||||
if (debug() >= 6) {
|
|
||||||
UINFO(0, "Chain self test stats:\n");
|
|
||||||
check.debugReport();
|
|
||||||
UINFO(0, "Elapsed usecs = " << elapsedUsecs << "\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
// All vertices should merge into one
|
|
||||||
UASSERT_SELFTEST(size_t, check.vertexCount(), 1);
|
|
||||||
return elapsedUsecs;
|
return elapsedUsecs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1811,20 +1724,15 @@ private:
|
||||||
partInitCriticalPaths(&mtasks);
|
partInitCriticalPaths(&mtasks);
|
||||||
PartContraction{&mtasks, 20, nullptr, nullptr, true}.go();
|
PartContraction{&mtasks, 20, nullptr, nullptr, true}.go();
|
||||||
|
|
||||||
PartParallelismEst check{&mtasks};
|
const auto report = mtasks.parallelismReport(
|
||||||
check.traverse();
|
[](const V3GraphVertex* vtxp) { return vtxp->as<const LogicMTask>()->cost(); });
|
||||||
|
|
||||||
// Checking exact values here is maybe overly precise. What we're
|
// Checking exact values here is maybe overly precise. What we're
|
||||||
// mostly looking for is a healthy reduction in the number of
|
// mostly looking for is a healthy reduction in the number of mtasks.
|
||||||
// mtasks.
|
UASSERT_SELFTEST(uint32_t, report.criticalPathCost(), 19);
|
||||||
if (debug() >= 5) {
|
UASSERT_SELFTEST(uint32_t, report.totalGraphCost(), 101);
|
||||||
UINFO(0, "X self test stats:\n");
|
UASSERT_SELFTEST(uint32_t, report.vertexCount(), 14);
|
||||||
check.debugReport();
|
UASSERT_SELFTEST(uint32_t, report.edgeCount(), 13);
|
||||||
}
|
|
||||||
UASSERT_SELFTEST(uint32_t, check.longestCritPathCost(), 19);
|
|
||||||
UASSERT_SELFTEST(uint32_t, check.totalGraphCost(), 101);
|
|
||||||
UASSERT_SELFTEST(uint32_t, check.vertexCount(), 14);
|
|
||||||
UASSERT_SELFTEST(uint32_t, check.edgeCount(), 13);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
@ -2581,13 +2489,21 @@ void V3Partition::debugMTaskGraphStats(const V3Graph* graphp, const string& stag
|
||||||
// Look only at the cost of each mtask, neglect communication cost.
|
// Look only at the cost of each mtask, neglect communication cost.
|
||||||
// This will show us how much parallelism we expect, assuming cache-miss
|
// This will show us how much parallelism we expect, assuming cache-miss
|
||||||
// costs are minor and the cost of running logic is the dominant cost.
|
// costs are minor and the cost of running logic is the dominant cost.
|
||||||
PartParallelismEst vertexParEst{graphp};
|
const auto report = graphp->parallelismReport(
|
||||||
vertexParEst.traverse();
|
[](const V3GraphVertex* vtxp) { return vtxp->as<const AbstractMTask>()->cost(); });
|
||||||
vertexParEst.statsReport(stage);
|
V3Stats::addStat("MTask graph, " + stage + ", critical path cost", report.criticalPathCost());
|
||||||
|
V3Stats::addStat("MTask graph, " + stage + ", total graph cost", report.totalGraphCost());
|
||||||
|
V3Stats::addStat("MTask graph, " + stage + ", mtask count", report.vertexCount());
|
||||||
|
V3Stats::addStat("MTask graph, " + stage + ", edge count", report.edgeCount());
|
||||||
|
V3Stats::addStat("MTask graph, " + stage + ", parallelism factor", report.parallelismFactor());
|
||||||
if (debug() >= 4) {
|
if (debug() >= 4) {
|
||||||
UINFO(0, "\n");
|
UINFO(0, "\n");
|
||||||
UINFO(0, " Parallelism estimate for based on mtask costs:\n");
|
UINFO(0, " MTask Parallelism estimate based costs at stage" << stage << ":\n");
|
||||||
vertexParEst.debugReport();
|
UINFO(0, " Critical path cost = " << report.criticalPathCost() << "\n");
|
||||||
|
UINFO(0, " Total graph cost = " << report.totalGraphCost() << "\n");
|
||||||
|
UINFO(0, " MTask vertex count = " << report.vertexCount() << "\n");
|
||||||
|
UINFO(0, " Edge count = " << report.edgeCount() << "\n");
|
||||||
|
UINFO(0, " Parallelism factor = " << report.parallelismFactor() << "\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3041,13 +2957,21 @@ static void finalizeCosts(V3Graph* execMTaskGraphp) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Record summary stats for final m_tasks graph.
|
// Record summary stats for final m_tasks graph.
|
||||||
// (More verbose stats are available with --debugi-V3Partition >= 3.)
|
const auto report = execMTaskGraphp->parallelismReport(
|
||||||
PartParallelismEst parEst{execMTaskGraphp};
|
[](const V3GraphVertex* vtxp) { return vtxp->as<const ExecMTask>()->cost(); });
|
||||||
parEst.traverse();
|
V3Stats::addStat("MTask graph, final, critical path cost", report.criticalPathCost());
|
||||||
parEst.statsReport("final");
|
V3Stats::addStat("MTask graph, final, total graph cost", report.totalGraphCost());
|
||||||
|
V3Stats::addStat("MTask graph, final, mtask count", report.vertexCount());
|
||||||
|
V3Stats::addStat("MTask graph, final, edge count", report.edgeCount());
|
||||||
|
V3Stats::addStat("MTask graph, final, parallelism factor", report.parallelismFactor());
|
||||||
if (debug() >= 3) {
|
if (debug() >= 3) {
|
||||||
|
UINFO(0, "\n");
|
||||||
UINFO(0, " Final mtask parallelism report:\n");
|
UINFO(0, " Final mtask parallelism report:\n");
|
||||||
parEst.debugReport();
|
UINFO(0, " Critical path cost = " << report.criticalPathCost() << "\n");
|
||||||
|
UINFO(0, " Total graph cost = " << report.totalGraphCost() << "\n");
|
||||||
|
UINFO(0, " MTask vertex count = " << report.vertexCount() << "\n");
|
||||||
|
UINFO(0, " Edge count = " << report.edgeCount() << "\n");
|
||||||
|
UINFO(0, " Parallelism factor = " << report.parallelismFactor() << "\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue