diff --git a/src/V3Graph.h b/src/V3Graph.h index af0c965fe..ccc208e97 100644 --- a/src/V3Graph.h +++ b/src/V3Graph.h @@ -26,6 +26,7 @@ #include "V3ThreadSafety.h" #include +#include class FileLine; class V3Graph; @@ -182,6 +183,38 @@ public: void dumpEdges(std::ostream& os, const V3GraphVertex* vertexp) const VL_MT_DISABLED; static void selfTest() VL_MT_DISABLED; + class ParallelismReport final { + friend class GraphAlgParallelismReport; + // Total cost of evaluating the whole graph. The ratio of m_totalGraphCost to + // m_criticalPathCost gives us an estimate of the parallelizability of this graph which is + // only as good as the guess returned by vertexCost. + uint32_t m_totalGraphCost = 0; + + // Cost of the critical path, in abstract units (the same units returned by the vertexCost) + uint32_t m_criticalPathCost = 0; + + size_t m_vertexCount = 0; // Number of vertexes in the graph + size_t m_edgeCount = 0; // Number of edges in the grap + + ParallelismReport() = default; + + public: + ~ParallelismReport() = default; + ParallelismReport(const ParallelismReport&) = default; + ParallelismReport& operator=(const ParallelismReport&) = default; + + uint32_t totalGraphCost() const { return m_totalGraphCost; } + uint32_t criticalPathCost() const { return m_criticalPathCost; } + size_t vertexCount() const { return m_vertexCount; } + size_t edgeCount() const { return m_edgeCount; } + double parallelismFactor() const { + return (static_cast(m_totalGraphCost) / m_criticalPathCost); + } + }; + + ParallelismReport parallelismReport( + std::function vertexCost) const VL_MT_DISABLED; + // CALLBACKS virtual void loopsMessageCb(V3GraphVertex* vertexp) VL_MT_DISABLED; virtual void loopsVertexCb(V3GraphVertex* vertexp) VL_MT_DISABLED; diff --git a/src/V3GraphAlg.cpp b/src/V3GraphAlg.cpp index f6bbfc92b..5cb058497 100644 --- a/src/V3GraphAlg.cpp +++ b/src/V3GraphAlg.cpp @@ -23,6 +23,8 @@ #include "V3Global.h" #include "V3GraphPathChecker.h" +#include "V3GraphStream.h" +#include "V3Stats.h" #include #include @@ -515,3 +517,59 @@ double V3Graph::orderDFSIterate(V3GraphVertex* vertexp) { vertexp->user(2); return vertexp->fanout(); } + +//###################################################################### +//###################################################################### +// Algorithms - parallelism report + +class GraphAlgParallelismReport final { + // MEMBERS + const V3Graph& m_graph; // The graph + const std::function m_vertexCost; // vertex cost function + V3Graph::ParallelismReport m_report; // The result report + + // CONSTRUCTORS + explicit GraphAlgParallelismReport(const V3Graph& graph, + std::function vertexCost) + : m_graph{graph} + , m_vertexCost{vertexCost} { + // For each node, record the critical path cost from the start + // of the graph through the end of the node. + std::unordered_map critPaths; + GraphStreamUnordered serialize{&m_graph}; + for (const V3GraphVertex* vertexp; (vertexp = serialize.nextp());) { + ++m_report.m_vertexCount; + uint32_t cpCostToHere = 0; + for (V3GraphEdge* edgep = vertexp->inBeginp(); edgep; edgep = edgep->inNextp()) { + ++m_report.m_edgeCount; + // For each upstream item, add its critical path cost to + // the cost of this edge, to form a new candidate critical + // path cost to the current node. Whichever is largest is + // the critical path to reach the start of this node. + cpCostToHere = std::max(cpCostToHere, critPaths[edgep->fromp()]); + } + // Include the cost of the current vertex in the critical + // path, so it represents the critical path to the end of + // this vertex. + cpCostToHere += m_vertexCost(vertexp); + critPaths[vertexp] = cpCostToHere; + m_report.m_criticalPathCost = std::max(m_report.m_criticalPathCost, cpCostToHere); + // Tally the total cost contributed by vertices. + m_report.m_totalGraphCost += m_vertexCost(vertexp); + } + } + ~GraphAlgParallelismReport() = default; + VL_UNCOPYABLE(GraphAlgParallelismReport); + VL_UNMOVABLE(GraphAlgParallelismReport); + +public: + static V3Graph::ParallelismReport + apply(const V3Graph& graph, std::function vertexCost) { + return GraphAlgParallelismReport(graph, vertexCost).m_report; + } +}; + +V3Graph::ParallelismReport +V3Graph::parallelismReport(std::function vertexCost) const { + return GraphAlgParallelismReport::apply(*this, vertexCost); +} diff --git a/src/V3Partition.cpp b/src/V3Partition.cpp index 0cb8ac20d..effe0b509 100644 --- a/src/V3Partition.cpp +++ b/src/V3Partition.cpp @@ -176,6 +176,7 @@ using EdgeHeap = PairingHeap; // LogicMTask class LogicMTask final : public AbstractLogicMTask { + VL_RTTI_IMPL(LogicMTask, AbstractLogicMTask) template friend class PartPropagateCp; @@ -730,87 +731,6 @@ void MergeCandidate::rescore() { } } -//###################################################################### -// PartParallelismEst - Estimate parallelism of graph - -class PartParallelismEst final { - // MEMBERS - const V3Graph* const m_graphp; // Mtask-containing graph - - // Total cost of evaluating the whole graph. - // The ratio of m_totalGraphCost to longestCpCost gives us an estimate - // of the parallelizability of this graph which is only as good as the - // guess returned by LogicMTask::cost(). - uint32_t m_totalGraphCost = 0; - - // Cost of the longest critical path, in abstract units (the same units - // returned by the vertexCost) - uint32_t m_longestCpCost = 0; - - size_t m_vertexCount = 0; // Number of vertexes calculated - size_t m_edgeCount = 0; // Number of edges calculated - -public: - // CONSTRUCTORS - explicit PartParallelismEst(const V3Graph* graphp) - : m_graphp{graphp} {} - - // METHODS - uint32_t totalGraphCost() const { return m_totalGraphCost; } - uint32_t longestCritPathCost() const { return m_longestCpCost; } - size_t vertexCount() const { return m_vertexCount; } - size_t edgeCount() const { return m_edgeCount; } - double parallelismFactor() const { - return (static_cast(m_totalGraphCost) / m_longestCpCost); - } - void traverse() { - // For each node, record the critical path cost from the start - // of the graph through the end of the node. - std::unordered_map critPaths; - GraphStreamUnordered serialize{m_graphp}; - for (const V3GraphVertex* vertexp; (vertexp = serialize.nextp());) { - ++m_vertexCount; - uint32_t cpCostToHere = 0; - for (V3GraphEdge* edgep = vertexp->inBeginp(); edgep; edgep = edgep->inNextp()) { - ++m_edgeCount; - // For each upstream item, add its critical path cost to - // the cost of this edge, to form a new candidate critical - // path cost to the current node. Whichever is largest is - // the critical path to reach the start of this node. - cpCostToHere = std::max(cpCostToHere, critPaths[edgep->fromp()]); - } - // Include the cost of the current vertex in the critical - // path, so it represents the critical path to the end of - // this vertex. - cpCostToHere += vertexCost(vertexp); - critPaths[vertexp] = cpCostToHere; - m_longestCpCost = std::max(m_longestCpCost, cpCostToHere); - // Tally the total cost contributed by vertices. - m_totalGraphCost += vertexCost(vertexp); - } - } - void statsReport(const string& stage) const { - V3Stats::addStat("MTask graph, " + stage + ", critical path cost", m_longestCpCost); - V3Stats::addStat("MTask graph, " + stage + ", total graph cost", m_totalGraphCost); - V3Stats::addStat("MTask graph, " + stage + ", mtask count", m_vertexCount); - V3Stats::addStat("MTask graph, " + stage + ", edge count", m_edgeCount); - V3Stats::addStat("MTask graph, " + stage + ", parallelism factor", parallelismFactor()); - } - void debugReport() const { - UINFO(0, " Critical path cost = " << m_longestCpCost << endl); - UINFO(0, " Total graph cost = " << m_totalGraphCost << endl); - UINFO(0, " MTask vertex count = " << m_vertexCount << endl); - UINFO(0, " Edge count = " << m_edgeCount << endl); - UINFO(0, " Parallelism factor = " << parallelismFactor() << endl); - } - static uint32_t vertexCost(const V3GraphVertex* vertexp) { - return vertexp->as()->cost(); - } - -private: - VL_UNCOPYABLE(PartParallelismEst); -}; - //###################################################################### // Look at vertex costs (in one way) to form critical paths for each @@ -1758,20 +1678,13 @@ private: chain_len * 2, nullptr, nullptr, false /* slowAsserts */}; ec.go(); - PartParallelismEst check{&mtasks}; - check.traverse(); + // All vertices should merge into one + UASSERT_SELFTEST( + bool, mtasks.verticesBeginp() && !mtasks.verticesBeginp()->verticesNextp(), true); const uint64_t endUsecs = V3Os::timeUsecs(); const uint64_t elapsedUsecs = endUsecs - startUsecs; - if (debug() >= 6) { - UINFO(0, "Chain self test stats:\n"); - check.debugReport(); - UINFO(0, "Elapsed usecs = " << elapsedUsecs << "\n"); - } - - // All vertices should merge into one - UASSERT_SELFTEST(size_t, check.vertexCount(), 1); return elapsedUsecs; } @@ -1811,20 +1724,15 @@ private: partInitCriticalPaths(&mtasks); PartContraction{&mtasks, 20, nullptr, nullptr, true}.go(); - PartParallelismEst check{&mtasks}; - check.traverse(); + const auto report = mtasks.parallelismReport( + [](const V3GraphVertex* vtxp) { return vtxp->as()->cost(); }); // Checking exact values here is maybe overly precise. What we're - // mostly looking for is a healthy reduction in the number of - // mtasks. - if (debug() >= 5) { - UINFO(0, "X self test stats:\n"); - check.debugReport(); - } - UASSERT_SELFTEST(uint32_t, check.longestCritPathCost(), 19); - UASSERT_SELFTEST(uint32_t, check.totalGraphCost(), 101); - UASSERT_SELFTEST(uint32_t, check.vertexCount(), 14); - UASSERT_SELFTEST(uint32_t, check.edgeCount(), 13); + // mostly looking for is a healthy reduction in the number of mtasks. + UASSERT_SELFTEST(uint32_t, report.criticalPathCost(), 19); + UASSERT_SELFTEST(uint32_t, report.totalGraphCost(), 101); + UASSERT_SELFTEST(uint32_t, report.vertexCount(), 14); + UASSERT_SELFTEST(uint32_t, report.edgeCount(), 13); } public: @@ -2581,13 +2489,21 @@ void V3Partition::debugMTaskGraphStats(const V3Graph* graphp, const string& stag // Look only at the cost of each mtask, neglect communication cost. // This will show us how much parallelism we expect, assuming cache-miss // costs are minor and the cost of running logic is the dominant cost. - PartParallelismEst vertexParEst{graphp}; - vertexParEst.traverse(); - vertexParEst.statsReport(stage); + const auto report = graphp->parallelismReport( + [](const V3GraphVertex* vtxp) { return vtxp->as()->cost(); }); + V3Stats::addStat("MTask graph, " + stage + ", critical path cost", report.criticalPathCost()); + V3Stats::addStat("MTask graph, " + stage + ", total graph cost", report.totalGraphCost()); + V3Stats::addStat("MTask graph, " + stage + ", mtask count", report.vertexCount()); + V3Stats::addStat("MTask graph, " + stage + ", edge count", report.edgeCount()); + V3Stats::addStat("MTask graph, " + stage + ", parallelism factor", report.parallelismFactor()); if (debug() >= 4) { UINFO(0, "\n"); - UINFO(0, " Parallelism estimate for based on mtask costs:\n"); - vertexParEst.debugReport(); + UINFO(0, " MTask Parallelism estimate based costs at stage" << stage << ":\n"); + UINFO(0, " Critical path cost = " << report.criticalPathCost() << "\n"); + UINFO(0, " Total graph cost = " << report.totalGraphCost() << "\n"); + UINFO(0, " MTask vertex count = " << report.vertexCount() << "\n"); + UINFO(0, " Edge count = " << report.edgeCount() << "\n"); + UINFO(0, " Parallelism factor = " << report.parallelismFactor() << "\n"); } } @@ -3041,13 +2957,21 @@ static void finalizeCosts(V3Graph* execMTaskGraphp) { } // Record summary stats for final m_tasks graph. - // (More verbose stats are available with --debugi-V3Partition >= 3.) - PartParallelismEst parEst{execMTaskGraphp}; - parEst.traverse(); - parEst.statsReport("final"); + const auto report = execMTaskGraphp->parallelismReport( + [](const V3GraphVertex* vtxp) { return vtxp->as()->cost(); }); + V3Stats::addStat("MTask graph, final, critical path cost", report.criticalPathCost()); + V3Stats::addStat("MTask graph, final, total graph cost", report.totalGraphCost()); + V3Stats::addStat("MTask graph, final, mtask count", report.vertexCount()); + V3Stats::addStat("MTask graph, final, edge count", report.edgeCount()); + V3Stats::addStat("MTask graph, final, parallelism factor", report.parallelismFactor()); if (debug() >= 3) { - UINFO(0, " Final mtask parallelism report:\n"); - parEst.debugReport(); + UINFO(0, "\n"); + UINFO(0, " Final mtask parallelism report:\n"); + UINFO(0, " Critical path cost = " << report.criticalPathCost() << "\n"); + UINFO(0, " Total graph cost = " << report.totalGraphCost() << "\n"); + UINFO(0, " MTask vertex count = " << report.vertexCount() << "\n"); + UINFO(0, " Edge count = " << report.edgeCount() << "\n"); + UINFO(0, " Parallelism factor = " << report.parallelismFactor() << "\n"); } }