Internals: Add new graph algs for future partitioning.

2018-07-15 22:09:27 -04:00 · 2018-07-15 22:09:27 -04:00 · e37dce9d85
parent 43694ec87c
commit e37dce9d85
10 changed files with 1152 additions and 0 deletions
--- a/src/Makefile_obj.in
+++ b/src/Makefile_obj.in
@ -195,6 +195,7 @@ RAW_OBJS = \
 	V3GraphAlg.o \
 	V3GraphAcyc.o \
 	V3GraphDfa.o \
+	V3GraphPathChecker.o \
 	V3GraphTest.o \
 	V3Hashed.o \
 	V3Inline.o \
@ -220,6 +221,7 @@ RAW_OBJS = \
 	V3Premit.o \
 	V3Reloop.o \
 	V3Scope.o \
+	V3Scoreboard.o \
 	V3Slice.o \
 	V3Split.o \
 	V3SplitAs.o \
--- a/src/V3Graph.cpp
+++ b/src/V3Graph.cpp
@ -118,6 +118,23 @@ uint32_t V3GraphVertex::outHash() const {
    return hash;
 }

+V3GraphEdge* V3GraphVertex::findConnectingEdgep(GraphWay way,
+                                                const V3GraphVertex* waywardp) {
+    // O(edges) linear search. Searches search both nodes' edge lists in
+    // parallel.  The lists probably aren't _both_ huge, so this is
+    // unlikely to blow up even on fairly nasty graphs.
+    GraphWay inv = way.invert();
+    V3GraphEdge* aedgep = this->beginp(way);
+    V3GraphEdge* bedgep = waywardp->beginp(inv);
+    while (aedgep && bedgep) {
+        if (aedgep->furtherp(way) == waywardp) return aedgep;
+        if (bedgep->furtherp(inv) == this) return bedgep;
+        aedgep = aedgep->nextp(way);
+        bedgep = bedgep->nextp(inv);
+    }
+    return NULL;
+}
+
 void V3GraphVertex::v3errorEnd(std::ostringstream& str) const {
    std::ostringstream nsstr;
    nsstr<<str.str();
--- a/src/V3Graph.h
+++ b/src/V3Graph.h
@ -156,6 +156,12 @@ public:
    /// Remove any redundant edges, weights become SUM of any other weight
    void removeRedundantEdgesSum(V3EdgeFuncP edgeFuncp);

+    /// Remove any transitive edges.  E.g. if have edges A->B, B->C, and A->C
+    /// then A->C is a "transitive" edge; it's implied by the first two
+    /// (assuming the DAG is a dependency graph.)
+    /// This algorithm can be expensive.
+    void removeTransitiveEdges();
+
    /// Call loopsVertexCb on any one loop starting where specified
    void reportLoops(V3EdgeFuncP edgeFuncp, V3GraphVertex* vertexp);

@ -254,6 +260,9 @@ public:
    void v3errorEndFatal(std::ostringstream& str) const;
    /// Edges are routed around this vertex to point from "from" directly to "to"
    void rerouteEdges(V3Graph* graphp);
+    /// Find the edge connecting ap and bp, where bp is wayward from ap.
+    /// If edge is not found returns NULL. O(edges) performance.
+    V3GraphEdge* findConnectingEdgep(GraphWay way, const V3GraphVertex* waywardp);
 };

 std::ostream& operator<<(std::ostream& os, V3GraphVertex* vertexp);
--- a/src/V3GraphAlg.cpp
+++ b/src/V3GraphAlg.cpp
@ -30,6 +30,7 @@

 #include "V3Global.h"
 #include "V3GraphAlg.h"
+#include "V3GraphPathChecker.h"

 //######################################################################
 //######################################################################
@ -134,6 +135,46 @@ void V3Graph::removeRedundantEdgesSum(V3EdgeFuncP edgeFuncp) {
    GraphRemoveRedundant (this, edgeFuncp, true);
 }

+//######################################################################
+//######################################################################
+// Algorithms - remove transitive
+
+class GraphAlgRemoveTransitiveEdges : GraphAlg<> {
+public:
+    explicit GraphAlgRemoveTransitiveEdges(V3Graph* graphp)
+        : GraphAlg (graphp, NULL) {}
+    void go() {
+        GraphPathChecker checker(m_graphp);
+        for (V3GraphVertex* vxp = m_graphp->verticesBeginp();
+             vxp; vxp = vxp->verticesNextp()) {
+            V3GraphEdge* deletep = NULL;
+            for (V3GraphEdge* edgep = vxp->outBeginp();
+                 edgep; edgep = edgep->outNextp()) {
+                if (deletep) {
+                    deletep->unlinkDelete(); deletep = NULL;
+                }
+                // It should be safe to modify the graph, despite using
+                // the GraphPathChecker, as none of the modifications will
+                // change what can be reached from what, nor should they
+                // change the rank or CP of any node.
+                if (checker.isTransitiveEdge(edgep)) {
+                    deletep = edgep;
+                }
+            }
+            if (deletep) {
+                deletep->unlinkDelete(); VL_DANGLING(deletep);
+            }
+        }
+    }
+private:
+    VL_DEBUG_FUNC;  // Declare debug()
+    VL_UNCOPYABLE(GraphAlgRemoveTransitiveEdges);
+};
+
+void V3Graph::removeTransitiveEdges() {
+    GraphAlgRemoveTransitiveEdges(this).go();
+}
+
 //######################################################################
 //######################################################################
 // Algorithms - weakly connected components
--- a/src/V3GraphPathChecker.cpp
+++ b/src/V3GraphPathChecker.cpp
@ -0,0 +1,167 @@
+// -*- mode: C++; c-file-style: "cc-mode" -*-
+//*************************************************************************
+// DESCRIPTION: Verilator: DAG Path Checking
+//
+// Code available from: http://www.veripool.org/verilator
+//
+//*************************************************************************
+//
+// Copyright 2003-2018 by Wilson Snyder.  This program is free software; you can
+// redistribute it and/or modify it under the terms of either the GNU
+// Lesser General Public License Version 3 or the Perl Artistic License
+// Version 2.0.
+//
+// Verilator is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+//*************************************************************************
+
+#include "config_build.h"
+#include "verilatedos.h"
+
+#include "V3GraphStream.h"
+#include "V3Global.h"
+#include "V3GraphPathChecker.h"
+
+//######################################################################
+// GraphPCNode
+
+struct GraphPCNode {
+    // User data for each node in GraphPathChecker.
+    //
+    // Like the LogicMTasks's, store before and after CPs for the nodes in
+    // the GraphPathChecker graph.
+    //
+    // Unlike the LogicMTasks's, we have no cost info for the generic graph
+    // accepted by GraphPathChecker, so assume each node has unit cost.
+    vluint32_t m_cp[GraphWay::NUM_WAYS];
+
+    // Detect if we've seen this node before in a given recursive
+    // operation. We'll use this in pathExistsInternal() to avoid checking
+    // the same node twice, and again in updateHalfCriticalPath() to assert
+    // there are no cycles.
+    vluint64_t m_seenAtGeneration;
+
+    // CONSTRUCTORS
+    GraphPCNode() : m_seenAtGeneration(0) {
+        for (int w = 0; w < GraphWay::NUM_WAYS; w++) m_cp[w] = 0;
+    }
+    ~GraphPCNode() { }
+};
+
+//######################################################################
+// GraphPathChecker implementation
+
+GraphPathChecker::GraphPathChecker(const V3Graph* graphp, V3EdgeFuncP edgeFuncp)
+    : GraphAlg(graphp, edgeFuncp)
+    , m_generation(0) {
+    for (V3GraphVertex* vxp = graphp->verticesBeginp();
+         vxp; vxp = vxp->verticesNextp()) {
+        // Setup tracking structure for each node.  If delete a vertex
+        // there would be a leak, but ok as accept only const V3Graph*'s.
+        vxp->userp(new GraphPCNode);
+    }
+    // Init critical paths in userp() for each vertex
+    initHalfCriticalPaths(GraphWay::FORWARD, false);
+    initHalfCriticalPaths(GraphWay::REVERSE, false);
+}
+
+GraphPathChecker::~GraphPathChecker() {
+    // Free every GraphPCNode
+    for (V3GraphVertex* vxp = m_graphp->verticesBeginp();
+         vxp; vxp = vxp->verticesNextp()) {
+        GraphPCNode* nodep = static_cast<GraphPCNode*>(vxp->userp());
+        delete nodep; VL_DANGLING(nodep);
+        vxp->userp(NULL);
+    }
+}
+
+void GraphPathChecker::initHalfCriticalPaths(GraphWay way, bool checkOnly) {
+    GraphStreamUnordered order(m_graphp, way);
+    GraphWay rev = way.invert();
+    while (const V3GraphVertex* vertexp = order.nextp()) {
+        unsigned critPathCost = 0;
+        for (V3GraphEdge* edgep = vertexp->beginp(rev);
+             edgep; edgep = edgep->nextp(rev)) {
+            if (!m_edgeFuncp(edgep)) continue;
+
+            V3GraphVertex* wrelativep = edgep->furtherp(rev);
+            GraphPCNode* wrelUserp = static_cast<GraphPCNode*>(wrelativep->userp());
+            critPathCost = std::max(critPathCost, wrelUserp->m_cp[way] + 1);
+        }
+
+        GraphPCNode* ourUserp = static_cast<GraphPCNode*>(vertexp->userp());
+        if (checkOnly) {
+            if (ourUserp->m_cp[way] != critPathCost) {
+                vertexp->v3fatalSrc("Validation of critical paths failed");
+            }
+        } else {
+            ourUserp->m_cp[way] = critPathCost;
+        }
+    }
+}
+
+bool GraphPathChecker::pathExistsInternal(const V3GraphVertex* ap,
+                                          const V3GraphVertex* bp,
+                                          unsigned* costp) {
+    GraphPCNode* auserp = static_cast<GraphPCNode*>(ap->userp());
+    GraphPCNode* buserp = static_cast<GraphPCNode*>(bp->userp());
+
+    // If have already searched this node on the current search, don't
+    // recurse through it again. Since we're still searching, we must not
+    // have found a path on the first go either.
+    if (auserp->m_seenAtGeneration == m_generation) {
+        if (costp) *costp = 0;
+        return false;
+    }
+    auserp->m_seenAtGeneration = m_generation;
+
+    if (costp) *costp = 1;  // count 'a' toward the search cost
+
+    if (ap == bp) return true;
+
+    // Rule out an a->b path based on their CPs
+    if (auserp->m_cp[GraphWay::REVERSE] < buserp->m_cp[GraphWay::REVERSE] + 1) {
+        return false;
+    }
+    if (buserp->m_cp[GraphWay::FORWARD] < auserp->m_cp[GraphWay::FORWARD] + 1) {
+        return false;
+    }
+
+    // Slow path; visit some extended family
+    bool foundPath = false;
+    for (V3GraphEdge* edgep = ap->outBeginp();
+         edgep && !foundPath; edgep = edgep->outNextp()) {
+        if (!m_edgeFuncp(edgep)) continue;
+
+        unsigned childCost;
+        if (pathExistsInternal(edgep->top(), bp, &childCost)) {
+            foundPath = true;
+        }
+        if (costp) *costp += childCost;
+    }
+
+    return foundPath;
+}
+
+bool GraphPathChecker::pathExistsFrom(const V3GraphVertex* fromp,
+                                      const V3GraphVertex* top) {
+    incGeneration();
+    return pathExistsInternal(fromp, top);
+}
+
+bool GraphPathChecker::isTransitiveEdge(const V3GraphEdge* edgep) {
+    const V3GraphVertex* fromp = edgep->fromp();
+    const V3GraphVertex* top = edgep->top();
+    incGeneration();
+    for (const V3GraphEdge* fromOutp = fromp->outBeginp();
+         fromOutp; fromOutp = fromOutp->outNextp()) {
+        if (fromOutp == edgep) continue;
+        if (pathExistsInternal(fromOutp->top(), top)) {
+            return true;
+        }
+    }
+    return false;
+}
--- a/src/V3GraphPathChecker.h
+++ b/src/V3GraphPathChecker.h
@ -0,0 +1,68 @@
+// -*- mode: C++; c-file-style: "cc-mode" -*-
+//*************************************************************************
+// DESCRIPTION: Verilator: DAG Path Checking
+//
+// Code available from: http://www.veripool.org/verilator
+//
+//*************************************************************************
+//
+// Copyright 2003-2018 by Wilson Snyder.  This program is free software; you can
+// redistribute it and/or modify it under the terms of either the GNU
+// Lesser General Public License Version 3 or the Perl Artistic License
+// Version 2.0.
+//
+// Verilator is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+//*************************************************************************
+
+#ifndef _V3GRAPHPATHCHECKER_H_
+#define _V3GRAPHPATHCHECKER_H_
+
+#include "V3Error.h"
+#include "V3Graph.h"
+#include "V3GraphAlg.h"
+
+//######################################################################
+
+/// Implement pathExistsFrom() with some caching to prune the search.
+/// Far more aggressive caching/pruning is possible; for now the use cases
+/// don't rely so heavily on this class that it's necessary.
+///
+/// The graph (or at least, the subset the algorithm sees through
+/// edgeFuncp) must not change during the lifetime of the checker.
+class GraphPathChecker : GraphAlg<const V3Graph> {
+    // Count "generations" which increases on operations that scan through
+    // the graph. Each node is marked with the last generation that scanned
+    // it, to enable asserting there are no cycles, and to avoid recursing
+    // through the same node twice while searching for a path.
+    vluint64_t m_generation;
+
+ public:
+    // CONSTRUCTORS
+    GraphPathChecker(const V3Graph* graphp,
+                     V3EdgeFuncP edgeFuncp = V3GraphEdge::followAlwaysTrue);
+    ~GraphPathChecker();
+
+    // METHODS
+    bool pathExistsFrom(const V3GraphVertex* fromp, const V3GraphVertex* top);
+
+    // If have edges A->B, B->C, and A->C then A->C is considered a
+    // "transitive" edge (implied by A->B and B->C) and it could be safely
+    // removed. Detect such an edge.
+    bool isTransitiveEdge(const V3GraphEdge* edgep);
+
+ private:
+    bool pathExistsInternal(const V3GraphVertex* ap,
+                            const V3GraphVertex* bp,
+                            unsigned* costp = NULL);
+    void initHalfCriticalPaths(GraphWay w, bool checkOnly);
+    void incGeneration() { ++m_generation; }
+
+    VL_DEBUG_FUNC;  // Declare debug()
+    VL_UNCOPYABLE(GraphPathChecker);
+};
+
+#endif  // Guard
--- a/src/V3GraphStream.h
+++ b/src/V3GraphStream.h
@ -0,0 +1,246 @@
+// -*- mode: C++; c-file-style: "cc-mode" -*-
+//*************************************************************************
+// DESCRIPTION: Verilator: Dependency graph iterator. Iterates over nodes
+//                         in any DAG, following dependency order.
+//
+// Code available from: http://www.veripool.org/verilator
+//
+//*************************************************************************
+//
+// Copyright 2003-2018 by Wilson Snyder.  This program is free software; you can
+// redistribute it and/or modify it under the terms of either the GNU
+// Lesser General Public License Version 3 or the Perl Artistic License
+// Version 2.0.
+//
+// Verilator is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+//*************************************************************************
+
+#ifndef _V3GRAPHSTREAM_H_
+#define _V3GRAPHSTREAM_H_
+
+#include "config_build.h"
+#include "verilatedos.h"
+#include <set>
+#include VL_INCLUDE_UNORDERED_MAP
+
+#include "V3Graph.h"
+
+//######################################################################
+// GraphStream
+//
+// Template 'T_Compare' is a tie-breaker for ordering nodes that the DAG
+// itself does not order. It must provide an operator() that does a logical
+// less-than on two V3GraphVertex*'s, with the same signature as
+// std::less<const V3GraphVertex*>::operator().  This does not default to
+// std::less<const V3GraphVertex*> because that is nondeterministic, and so
+// not generally safe. If you want a raw pointer compare, see
+// GraphStreamUnordered below.
+
+template <class T_Compare> class GraphStream {
+private:
+    // TYPES
+    class VxHolder {
+    public:
+        // MEMBERS
+        const V3GraphVertex* m_vxp;  // [mtask] Vertex
+        uint32_t m_pos;  // Sort position
+        uint32_t m_numBlockingEdges;  // Number of blocking edges
+        // CONSTRUCTORS
+        VxHolder(const V3GraphVertex* vxp, uint32_t pos, uint32_t numBlockingEdges)
+            : m_vxp(vxp)
+            , m_pos(pos)
+            , m_numBlockingEdges(numBlockingEdges) {}
+        // METHODS
+        const V3GraphVertex* vertexp() const { return m_vxp; }
+        // Decrement blocking edges count, return true if the vertex is
+        // newly unblocked
+        bool unblock() {
+            if (m_numBlockingEdges <= 0) vertexp()->v3fatalSrc("Underflow of blocking edges");
+            m_numBlockingEdges--;
+            return (m_numBlockingEdges == 0);
+        }
+    };
+
+    class VxHolderCmp {
+    public:
+        // MEMBERS
+        T_Compare m_lessThan;  // Sorting functor
+        // CONSTRUCTORS
+        explicit VxHolderCmp(const T_Compare& lessThan)
+            : m_lessThan(lessThan) {}
+        // METHODS
+        bool operator() (const VxHolder& a, const VxHolder& b) const {
+            if (m_lessThan.operator()(a.vertexp(), b.vertexp())) return true;
+            if (m_lessThan.operator()(b.vertexp(), a.vertexp())) return false;
+            return a.m_pos < b.m_pos;
+        }
+    private:
+        VL_UNCOPYABLE(VxHolderCmp);
+    };
+
+    typedef std::set<VxHolder, VxHolderCmp&> ReadyVertices;
+    typedef vl_unordered_map<const V3GraphVertex*, VxHolder> WaitingVertices;
+
+    // MEMBERS
+    VxHolderCmp m_vxHolderCmp;  // Vertext comparison functor
+    ReadyVertices m_readyVertices;  // List of ready verticies
+    WaitingVertices m_waitingVertices;  // List of wiating verticies
+    typename ReadyVertices::iterator m_last;  // Previously returned element
+    GraphWay m_way;  // FORWARD or REVERSE order of traversal
+
+public:
+    // CONSTRUCTORS
+    GraphStream(const V3Graph* graphp,
+                GraphWay way = GraphWay::FORWARD,
+                const T_Compare& lessThan = T_Compare())
+        // NOTE: Perhaps REVERSE way should also reverse the sense of the
+        // lessThan function? For now the only usage of REVERSE is not
+        // sensitive to its lessThan at all, so it doesn't matter.
+        : m_vxHolderCmp(lessThan)
+        , m_readyVertices(m_vxHolderCmp)
+        , m_last(m_readyVertices.end())
+        , m_way(way) {
+        uint32_t pos = 0;
+        for (const V3GraphVertex* vxp = graphp->verticesBeginp();
+             vxp; vxp=vxp->verticesNextp()) {
+            // Every vertex initially is waiting, or ready.
+            if (way == GraphWay::FORWARD) {
+                if (vxp->inEmpty()) {
+                    VxHolder newVx(vxp, pos++, 0);
+                    m_readyVertices.insert(newVx);
+                } else {
+                    uint32_t depCount = 0;
+                    for (V3GraphEdge* depp = vxp->inBeginp();
+                         depp; depp = depp->inNextp()) {
+                        depCount++;
+                    }
+                    VxHolder newVx(vxp, pos++, depCount);
+                    m_waitingVertices.insert(make_pair(vxp, newVx));
+                }
+            } else {  // REVERSE
+                if (vxp->outEmpty()) {
+                    VxHolder newVx(vxp, pos++, 0);
+                    m_readyVertices.insert(newVx);
+                } else {
+                    uint32_t depCount = 0;
+                    for (V3GraphEdge* depp = vxp->outBeginp();
+                         depp; depp = depp->outNextp()) {
+                        depCount++;
+                    }
+                    VxHolder newVx(vxp, pos++, depCount);
+                    m_waitingVertices.insert(make_pair(vxp, newVx));
+                }
+            }
+        }
+    }
+    ~GraphStream() {}
+
+    // METHODS
+
+    // Each call to nextp() returns a unique vertex in the graph, in
+    // dependency order.
+    //
+    // Dependencies alone don't fully specify the order. Usually a graph
+    // has many "ready" vertices, any of which might return next.
+    //
+    // To decide among the "ready" vertices, GraphStream keeps an ordered
+    // list of ready vertices, sorted first by lessThan and second by
+    // original graph order.
+    //
+    // You might expect that nextp() would return the first item from this
+    // sorted list -- but that's not what it does!  What nextp() actually
+    // does is to return the next item in the list, following the position
+    // where the previously-returned item would have been.  This maximizes
+    // locality: given an appropriate lessThan, nextp() will stay on a
+    // given domain (or domscope, or mtask, or whatever) for as long as
+    // possible before an unmet dependency forces us to switch to another
+    // one.
+    //
+    // Within a group of vertices that lessThan considers equivalent,
+    // nextp() returns them in the original graph order (presumably also
+    // good locality.) V3Order.cpp relies on this to order the logic
+    // vertices within a given mtask without jumping over domains too much.
+    const V3GraphVertex* nextp() {
+        const V3GraphVertex* resultp = NULL;
+
+        typename ReadyVertices::iterator curIt;
+        if (m_last == m_readyVertices.end()) {
+            // First call to nextp()
+            curIt = m_readyVertices.begin();
+        } else {
+            // Subsequent call to nextp()
+            curIt = m_last;
+            ++curIt;
+            // Remove previously-returned element
+            m_readyVertices.erase(m_last);
+            // Wrap curIt. Expect to wrap, and make another pass, to find
+            // newly-ready elements that could have appeared ahead of the
+            // m_last iterator
+            if (curIt == m_readyVertices.end()) {
+                curIt = m_readyVertices.begin();
+            }
+        }
+
+        if (curIt != m_readyVertices.end()) {
+            resultp = curIt->vertexp();
+            unblockDeps(resultp);
+        } else {
+            // No ready vertices; waiting should be empty too, otherwise we
+            // were fed a graph with cycles (which is not supported.)
+            UASSERT(m_waitingVertices.empty(), "DGS fed non-DAG");
+        }
+
+        m_last = curIt;
+        return resultp;
+    }
+
+private:
+    void unblockDeps(const V3GraphVertex* vertexp) {
+        if (m_way == GraphWay::FORWARD) {
+            for (V3GraphEdge* edgep = vertexp->outBeginp();
+                 edgep; edgep=edgep->outNextp()) {
+                V3GraphVertex* toVertexp = edgep->top();
+
+                typename WaitingVertices::iterator it =
+                    m_waitingVertices.find(toVertexp);
+                if (it == m_waitingVertices.end()) {
+                    toVertexp->v3fatalSrc("Found edge into vertex not in waiting list.");
+                }
+                if (it->second.unblock()) {
+                    m_readyVertices.insert(it->second);
+                    m_waitingVertices.erase(it);
+                }
+            }
+        } else {
+            for (V3GraphEdge* edgep = vertexp->inBeginp();
+                 edgep; edgep=edgep->inNextp()) {
+                V3GraphVertex* fromVertexp = edgep->fromp();
+
+                typename WaitingVertices::iterator it =
+                    m_waitingVertices.find(fromVertexp);
+                if (it == m_waitingVertices.end()) {
+                    fromVertexp->v3fatalSrc("Found edge into vertex not in waiting list.");
+                }
+                if (it->second.unblock()) {
+                    m_readyVertices.insert(it->second);
+                    m_waitingVertices.erase(it);
+                }
+            }
+        }
+    }
+
+    VL_UNCOPYABLE(GraphStream);
+};
+
+//######################################################################
+
+// GraphStreamUnordered is GraphStream using a plain pointer compare to
+// break ties in the graph order. This WILL return nodes in
+// nondeterministic order.
+typedef GraphStream<std::less<const V3GraphVertex*> > GraphStreamUnordered;
+
+#endif  // Guard
--- a/src/V3Scoreboard.cpp
+++ b/src/V3Scoreboard.cpp
@ -0,0 +1,91 @@
+// -*- mode: C++; c-file-style: "cc-mode" -*-
+//*************************************************************************
+// DESCRIPTION: Verilator: Threading's element scoreboarding
+//
+// Code available from: http://www.veripool.org/verilator
+//
+//*************************************************************************
+//
+// Copyright 2003-2018 by Wilson Snyder.  This program is free software; you can
+// redistribute it and/or modify it under the terms of either the GNU
+// Lesser General Public License Version 3 or the Perl Artistic License
+// Version 2.0.
+//
+// Verilator is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+//*************************************************************************
+
+#include "config_build.h"
+#include "verilatedos.h"
+
+#include "V3Scoreboard.h"
+
+class ScoreboardTestElem {
+public:
+    // MEMBERS
+    uint32_t m_score;
+    uint32_t m_id;
+    // CONSTRUCTORS
+    explicit ScoreboardTestElem(uint32_t score) : m_score(score) {
+        static uint32_t s_serial = 0;
+        m_id = ++s_serial;
+    }
+    ScoreboardTestElem() {}
+    // METHODS
+    static uint32_t scoreFn(const ScoreboardTestElem* elp) { return elp->m_score; }
+
+    bool operator< (const ScoreboardTestElem& other) const {
+        return m_id < other.m_id;
+    }
+};
+
+void V3ScoreboardBase::selfTest() {
+    V3Scoreboard<ScoreboardTestElem, uint32_t> sb(ScoreboardTestElem::scoreFn, true);
+
+    UASSERT(!sb.needsRescore(), "SelfTest: Empty sb should not need rescore.");
+
+    ScoreboardTestElem e1(10);
+    ScoreboardTestElem e2(20);
+    ScoreboardTestElem e3(30);
+
+    sb.addElem(&e1);
+    sb.addElem(&e2);
+    sb.addElem(&e3);
+
+    UASSERT(sb.needsRescore(), "SelfTest: Newly filled sb should need a rescore.");
+    UASSERT(sb.needsRescore(&e1),
+            "SelfTest: Individual newly-added element should need rescore");
+    UASSERT(NULL == sb.bestp(),
+            "SelfTest: Newly filled sb should have nothing eligible for Bestp()");
+
+    sb.rescore();
+
+    UASSERT(!sb.needsRescore(), "SelfTest: Newly rescored sb should not need rescore");
+    UASSERT(!sb.needsRescore(&e1),
+            "SelfTest: Newly rescored sb should not need an element rescored");
+    UASSERT(e2.m_score == sb.cachedScore(&e2),
+            "SelfTest: Cached score should match current score");
+    UASSERT(&e1 == sb.bestp(),
+            "SelfTest: Should return element with lowest (best) score");
+
+    // Change one element's score
+    sb.hintScoreChanged(&e2);
+    e2.m_score = 21;
+    UASSERT(sb.needsRescore(&e2),
+            "SelfTest: Should need rescore on elem after hintScoreChanged");
+
+    // Remove an element
+    UASSERT(sb.contains(&e1), "SelfTest: e1 should be there");
+    sb.removeElem(&e1);
+    UASSERT(!sb.contains(&e1), "SelfTest: e1 should be gone");
+    UASSERT(sb.contains(&e2), "SelfTest: e2 should be there, despite needing rescore");
+
+    // Now e3 should be our best-scoring element, even though
+    // e2 has a better score, since e2 is pending rescore.
+    UASSERT(&e3 == sb.bestp(), "SelfTest: Expect e3 as best element with known score.");
+    sb.rescore();
+    UASSERT(&e2 == sb.bestp(), "SelfTest: Expect e2 as best element again after Rescore");
+}
--- a/src/V3Scoreboard.h
+++ b/src/V3Scoreboard.h
@ -0,0 +1,509 @@
+// -*- mode: C++; c-file-style: "cc-mode" -*-
+//*************************************************************************
+// DESCRIPTION: Verilator: Scoreboards for thread partitioner
+//
+// Provides scoreboard classes:
+//
+//  * SortByValueMap
+//  * V3Scoreboard
+//
+// See details below
+//
+// Code available from: http://www.veripool.org/verilator
+//
+//*************************************************************************
+//
+// Copyright 2003-2018 by Wilson Snyder.  This program is free software; you can
+// redistribute it and/or modify it under the terms of either the GNU
+// Lesser General Public License Version 3 or the Perl Artistic License
+// Version 2.0.
+//
+// Verilator is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+//*************************************************************************
+
+#ifndef _V3SCOREBOARD_H_
+#define _V3SCOREBOARD_H_
+
+#include "config_build.h"
+#include "verilatedos.h"
+#include <map>
+
+#include "V3Error.h"
+#include VL_INCLUDE_UNORDERED_MAP
+#include VL_INCLUDE_UNORDERED_SET
+
+//######################################################################
+// SortByValueMap
+
+/// A generic key-value map, except it also supports iterating in
+/// value-sorted order.  Values need not be unique. Uses T_KeyCompare to
+/// break ties in the sort when values collide.
+
+template <typename T_Key, typename T_Value, class T_KeyCompare = std::less<T_Key> >
+class SortByValueMap {
+    // TYPES
+private:
+    typedef vl_unordered_map<T_Key, T_Value> Key2Val;
+    typedef std::set<T_Key, T_KeyCompare> KeySet;
+    typedef std::map<T_Value, KeySet> Val2Keys;
+
+    // MEMBERS
+    Key2Val m_keys;  // Map each key to its value. Not sorted.
+    Val2Keys m_vals;  // Map each value to its keys. Sorted.
+
+public:
+    // CONSTRUCTORS
+    SortByValueMap() {}
+
+    class const_iterator {
+        // TYPES
+    public:
+        typedef const_iterator value_type;
+        typedef const_iterator reference;  // See comment on operator*()
+        typedef void pointer;
+        typedef std::ptrdiff_t difference_type;
+        typedef std::bidirectional_iterator_tag iterator_category;
+    protected:
+        friend class SortByValueMap;
+
+        // MEMBERS
+        typename KeySet::iterator m_keyIt;
+        typename Val2Keys::iterator m_valIt;
+        SortByValueMap* m_sbvmp;
+        bool m_end;  // At the end()
+
+        // CONSTRUCTORS
+        const_iterator(SortByValueMap* sbmvp)  // for end()
+            : m_sbvmp(sbmvp)
+            , m_end(true) {}
+        const_iterator(typename Val2Keys::iterator valIt,
+                       typename KeySet::iterator keyIt,
+                       SortByValueMap* sbvmp)
+            : m_keyIt(keyIt)
+            , m_valIt(valIt)
+            , m_sbvmp(sbvmp)
+            , m_end(false) {}
+
+        // METHODS
+        void advanceUntilValid() {
+            ++m_keyIt;
+            if (m_keyIt != m_valIt->second.end()) {  // Valid iterator, done.
+                return;
+            }
+            // Try the next value?
+            ++m_valIt;
+            if (m_valIt == m_sbvmp->m_vals.end()) {  // No more values
+                m_end = true;
+                return;
+            }
+            // Should find a value here, as every value bucket is supposed
+            // to have at least one key, even after keys get removed.
+            m_keyIt = m_valIt->second.begin();
+            UASSERT(m_keyIt != m_valIt->second.end(), "Algorithm should have key left");
+        }
+        void reverseUntilValid() {
+            if (m_end) {
+                UASSERT(!m_sbvmp->m_vals.empty(), "Reverse iterator causes underflow");
+                m_valIt = m_sbvmp->m_vals.end();
+                --m_valIt;
+
+                UASSERT(!m_valIt->second.empty(), "Reverse iterator causes underflow");
+                m_keyIt = m_valIt->second.end();
+                --m_keyIt;
+
+                m_end = false;
+                return;
+            }
+            if (m_keyIt != m_valIt->second.begin()) {
+                // Valid iterator, we're done.
+                --m_keyIt;
+                return;
+            }
+            // Try the previous value?
+            if (m_valIt == m_sbvmp->m_vals.begin()) {
+                // No more values but it's not defined to decrement an
+                // iterator past the beginning.
+                v3fatalSrc("Decremented iterator past beginning");
+                return;
+            }
+            --m_valIt;
+            // Should find a value here, as Every value bucket is supposed
+            // to have at least one key, even after keys get removed.
+            UASSERT(!m_valIt->second.empty(), "Value bucket should have key");
+            m_keyIt = m_valIt->second.end();
+            --m_keyIt;
+            UASSERT(m_keyIt != m_valIt->second.end(), "Value bucket should have key");
+        }
+    public:
+        const T_Key& key() const { return *m_keyIt; }
+        const T_Value& value() const { return m_valIt->first; }
+        const_iterator& operator++() {
+            advanceUntilValid();
+            return *this;
+        }
+        const_iterator& operator--() {
+            reverseUntilValid();
+            return *this;
+        }
+        bool operator==(const const_iterator& other) const {
+            // It's not legal to compare iterators from different
+            // sequences.  So check m_end before comparing m_valIt, and
+            // compare m_valIt's before comparing m_keyIt to ensure nothing
+            // here is undefined.
+            if (m_end || other.m_end) {
+                return m_end && other.m_end;
+            }
+            return ((m_valIt == other.m_valIt)
+                    && (m_keyIt == other.m_keyIt));
+        }
+        bool operator!=(const const_iterator& other) const {
+            return (!this->operator==(other));
+        }
+
+        // WARNING: Cleverness.
+        //
+        // The "reference" returned by *it must remain valid after 'it'
+        // gets destroyed. The reverse_iterator relies on this for its
+        // operator*(), so it's not just a theoretical requirement, it's a
+        // real requirement.
+        //
+        // To make that work, define the "reference" type to be the
+        // iterator itself. So clients can do (*it).key() and
+        // (*it).value(). This is the clever part.
+        //
+        // That's mostly useful for a reverse iterator, where *rit returns
+        // the forward iterator pointing the to same element, so
+        // (*rit).key() and (*rit).value() work where rit.key() and
+        // rit.value() cannot.
+        //
+        // It would be nice to support it->key() and it->value(), however
+        // uncertain what would be an appropriate 'pointer' type define
+        // that makes this work safely through a reverse iterator. So this
+        // class does not provide an operator->().
+        //
+        // Q) Why not make our value_type be a pair<T_Key, T_Value> like a
+        //    normal map, and return a reference to that?  This could
+        //    return a reference to one of the pairs inside m_keys, that
+        //    would satisfy the constraint above.
+        //
+        // A) It would take a lookup to find that pair within m_keys. This
+        //    iterator is designed to minimize the number of hashtable and
+        //    tree lookups. Increment, decrement, key(), value(), erase()
+        //    by iterator, begin(), end() -- none of these require a
+        //    container lookup. That's true for reverse_iterators too.
+        reference operator*() const {
+            UASSERT(!m_end, "Dereferencing iterator that is at end()");
+            return *this;
+        }
+    };
+
+    class iterator : public const_iterator {
+    public:
+        // TYPES
+        typedef iterator value_type;
+        typedef iterator reference;
+        // pointer, difference_type, and iterator_category inherit from
+        // const_iterator
+
+        // CONSTRUCTORS
+        iterator(SortByValueMap* sbvmp)
+            : const_iterator(sbvmp) {}
+        iterator(typename Val2Keys::iterator valIt,
+                 typename KeySet::iterator keyIt,
+                 SortByValueMap* sbvmp)
+            : const_iterator(valIt, keyIt, sbvmp) {}
+
+        // METHODS
+        iterator& operator++() {
+            this->advanceUntilValid();
+            return *this;
+        }
+        iterator& operator--() {
+            this->reverseUntilValid();
+            return *this;
+        }
+        reference operator*() const {
+            UASSERT(!this->m_end, "Dereferencing iterator that is at end()");
+            return *this;
+        }
+    };
+
+    typedef std::reverse_iterator<iterator> reverse_iterator;
+    typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+
+    // METHODS
+private:
+    void removeKeyFromOldVal(const T_Key& k, const T_Value& oldVal) {
+        // The value of 'k' is about to change, or, 'k' is about to be
+        // removed from the map.
+        // Clear the m_vals mapping for k.
+        KeySet& keysAtOldVal = m_vals[oldVal];
+        size_t erased = keysAtOldVal.erase(k);
+        UASSERT(erased == 1, "removeKeyFromOldVal() removal key not found");
+        if (keysAtOldVal.empty()) {
+            // Don't keep empty sets in the value map.
+            m_vals.erase(oldVal);
+        }
+    }
+    void removeKeyFromOldVal(iterator it) {
+        it.m_valIt->second.erase(it.m_keyIt);
+        if (it.m_valIt->second.empty()) {
+            m_vals.erase(it.m_valIt);
+        }
+    }
+
+public:
+    iterator begin() {
+        typename Val2Keys::iterator valIt = m_vals.begin();
+        if (valIt == m_vals.end()) {
+            return end();
+        }
+        typename KeySet::const_iterator keyIt = valIt->second.begin();
+        return iterator(valIt, keyIt, this);
+    }
+    const_iterator begin() const {
+        SortByValueMap* mutp = const_cast<SortByValueMap*>(this);
+        typename Val2Keys::iterator valIt = mutp->m_vals.begin();
+        if (valIt == mutp->m_vals.end()) {
+            return end();
+        }
+        typename KeySet::const_iterator keyIt = valIt->second.begin();
+        return const_iterator(valIt, keyIt, mutp);
+    }
+    iterator end() {
+        return iterator(this);
+    }
+    const_iterator end() const {
+        // Safe to cast away const; the const_iterator will still enforce
+        // it. Same for the const begin() below.
+        return const_iterator(const_cast<SortByValueMap*>(this));
+    }
+    reverse_iterator rbegin() {
+        return reverse_iterator(end());
+    }
+    reverse_iterator rend() {
+        return reverse_iterator(begin());
+    }
+    const_reverse_iterator rbegin() const {
+        return const_reverse_iterator(end());
+    }
+    const_reverse_iterator rend() const {
+        return const_reverse_iterator(begin());
+    }
+
+    iterator find(const T_Key& k) {
+        typename Key2Val::iterator kvit = m_keys.find(k);
+        if (kvit == m_keys.end()) return end();
+
+        typename Val2Keys::iterator valIt = m_vals.find(kvit->second);
+        typename KeySet::iterator keyIt = valIt->second.find(k);
+        return iterator(valIt, keyIt, this);
+    }
+    const_iterator find(const T_Key& k) const {
+        SortByValueMap* mutp = const_cast<SortByValueMap*>(this);
+        typename Key2Val::iterator kvit = mutp->m_keys.find(k);
+        if (kvit == m_keys.end()) return end();
+
+        typename Val2Keys::iterator valIt = mutp->m_vals.find(kvit->second);
+        typename KeySet::iterator keyIt = valIt->second.find(k);
+        return const_iterator(valIt, keyIt, mutp);
+    }
+    void set(const T_Key& k, const T_Value& v) {
+        typename Key2Val::iterator kvit = m_keys.find(k);
+        if (kvit != m_keys.end()) {
+            if (kvit->second == v) {
+                return;  // Same value already present; stop.
+            }
+            // Must remove element from m_vals[oldValue]
+            removeKeyFromOldVal(k, kvit->second);
+        }
+        m_keys[k] = v;
+        m_vals[v].insert(k);
+    }
+    size_t erase(const T_Key& k) {
+        typename Key2Val::iterator kvit = m_keys.find(k);
+        if (kvit == m_keys.end()) return 0;
+        removeKeyFromOldVal(k, kvit->second);
+        m_keys.erase(kvit);
+        return 1;
+    }
+    void erase(iterator it) {
+        m_keys.erase(it.key());
+        removeKeyFromOldVal(it);
+    }
+    void erase(reverse_iterator it) {
+        erase(*it);  // Dereferencing returns a copy of the forward iterator
+    }
+    bool has(const T_Key& k) const {
+        return (m_keys.find(k) != m_keys.end());
+    }
+    bool empty() const {
+        return m_keys.empty();
+    }
+    // Look up a value. Returns a reference for efficiency. Note this must
+    // be a const reference, otherwise the client could corrupt the sorted
+    // order of m_byValue by reaching through and changing the value.
+    const T_Value& at(const T_Key& k) const {
+        typename Key2Val::const_iterator kvit = m_keys.find(k);
+        UASSERT(kvit != m_keys.end(), "at() lookup key not found");
+        return kvit->second;
+    }
+
+private:
+    VL_UNCOPYABLE(SortByValueMap);
+};
+
+//######################################################################
+
+/// V3Scoreboard takes a set of Elem*'s, each having some score.
+/// Scores are assigned by a user-supplied scoring function.
+///
+/// At any time, the V3Scoreboard can return the elem with the "best" score
+/// among those elements whose scores are known.
+///
+/// The best score is the _lowest_ score. This makes sense in contexts
+/// where scores represent costs.
+///
+/// The Scoreboard supports mutating element scores efficiently. The client
+/// must hint to the V3Scoreboard when an element's score may have
+/// changed. When it receives this hint, the V3Scoreboard will move the
+/// element into the set of elements whose scores are unknown. Later the
+/// client can tell V3Scoreboard to re-sort the list, which it does
+/// incrementally, by re-scoring all elements whose scores are unknown, and
+/// then moving these back into the score-sorted map. This is efficient
+/// when the subset of elements whose scores change is much smaller than
+/// the full set size.
+
+template <typename T_Elem,
+          typename T_Score,
+          class T_ElemCompare = std::less<T_Elem> >
+class V3Scoreboard {
+private:
+    // TYPES
+    typedef vl_unordered_set<const T_Elem*> NeedRescoreSet;
+    class CmpElems {
+    public:
+        bool operator() (const T_Elem* const& ap, const T_Elem* const& bp) {
+            T_ElemCompare cmp;
+            return cmp.operator()(*ap, *bp);
+        }
+    };
+    typedef SortByValueMap<const T_Elem*, T_Score, CmpElems> SortedMap;
+    typedef T_Score (*UserScoreFnp)(const T_Elem*);
+
+    // MEMBERS
+    NeedRescoreSet m_unknown;  // Elements with unknown scores
+    SortedMap m_sorted;  // Set of elements with known scores
+    UserScoreFnp m_scoreFnp;  // Scoring function
+    bool m_slowAsserts;  // Do some asserts that require extra lookups
+
+public:
+    // CONSTRUCTORS
+    explicit V3Scoreboard(UserScoreFnp scoreFnp, bool slowAsserts)
+        : m_scoreFnp(scoreFnp)
+        , m_slowAsserts(slowAsserts) {}
+    ~V3Scoreboard() {}
+
+    // METHODS
+
+    // Add an element to the scoreboard.
+    // Element begins in needs-rescore state; it won't be returned by
+    // bestp() until after the next rescore().
+    void addElem(const T_Elem* elp) {
+        if (m_slowAsserts) {
+            UASSERT(!contains(elp),
+                    "Adding element to scoreboard that was already in scoreboard");
+        }
+        m_unknown.insert(elp);
+    }
+
+    // Remove elp from scoreboard.
+    void removeElem(const T_Elem* elp) {
+        if (0 == m_sorted.erase(elp)) {
+            UASSERT(m_unknown.erase(elp),
+                    "Could not find requested elem to remove from scoreboard");
+        }
+    }
+
+    // Returns true if elp is present in the scoreboard, false otherwise.
+    //
+    // Note: every other V3Scoreboard routine that takes an T_Elem* has
+    // undefined behavior if the element is not in the scoreboard.
+    bool contains(const T_Elem* elp) const {
+        if (m_unknown.find(elp) != m_unknown.end()) return true;
+        return (m_sorted.find(elp) != m_sorted.end());
+    }
+
+    // Get the best element, with the lowest score (lower is better), among
+    // elements whose scores are known. Returns NULL if no elements with
+    // known scores exist.
+    //
+    // Note: This does not automatically rescore. Client must call
+    // rescore() periodically to ensure all elems in the scoreboard are
+    // reflected in the result of bestp(). Otherwise, bestp() only
+    // considers elements that aren't pending rescore.
+    const T_Elem* bestp() {
+        typename SortedMap::iterator result = m_sorted.begin();
+        if (VL_UNLIKELY(result == m_sorted.end())) return NULL;
+        return (*result).key();
+    }
+
+    // Tell the scoreboard that this element's score may have changed.
+    //
+    // At the time of this call, the element's score becomes "unknown"
+    // to the V3Scoreboard. Unknown elements won't be returned by bestp().
+    // The element's score will remain unknown until the next rescore().
+    //
+    // The client MUST call this for each element whose score has changed.
+    //
+    // The client MAY call this for elements whose score has not changed.
+    // Doing so incurs some compute cost (to re-sort the element back to
+    // its original location) and still makes it ineligible to be returned
+    // by bestp() until the next rescore().
+    void hintScoreChanged(const T_Elem* elp) {
+        m_unknown.insert(elp);
+        m_sorted.erase(elp);
+    }
+
+    // True if any element's score is unknown to V3Scoreboard.
+    bool needsRescore() { return !m_unknown.empty(); }
+    // False if elp's score is known to V3Scoreboard,
+    // else true if elp's score is unknown until the next rescore().
+    bool needsRescore(const T_Elem* elp) {
+        return (m_unknown.find(elp) != m_unknown.end());
+    }
+    // Retrieve the last known score for an element.
+    T_Score cachedScore(const T_Elem* elp) {
+        typename SortedMap::iterator result = m_sorted.find(elp);
+        UASSERT(result != m_sorted.end(),
+                "V3Scoreboard::cachedScore() failed to find element");
+        return (*result).value();
+    }
+    // For each element whose score is unknown to V3Scoreboard,
+    // call the client's scoring function to get a new score,
+    // and sort all elements by their current score.
+    void rescore() {
+        for (typename NeedRescoreSet::iterator it = m_unknown.begin();
+             it != m_unknown.end(); ++it) {
+            const T_Elem* elp = *it;
+            T_Score sortScore = m_scoreFnp(elp);
+            m_sorted.set(elp, sortScore);
+        }
+        m_unknown.clear();
+    }
+
+private:
+    VL_UNCOPYABLE(V3Scoreboard);
+};
+
+//######################################################################
+
+namespace V3ScoreboardBase {
+    void selfTest();
+};
+
+#endif  // Guard
--- a/src/Verilator.cpp
+++ b/src/Verilator.cpp
@ -77,6 +77,7 @@
 #include "V3Premit.h"
 #include "V3Reloop.h"
 #include "V3Scope.h"
+#include "V3Scoreboard.h"
 #include "V3Slice.h"
 #include "V3Split.h"
 #include "V3SplitAs.h"
@ -607,6 +608,7 @@ int main(int argc, char** argv, char** env) {
    AstBasicDTypeKwd::selfTest();
    V3Graph::selfTest();
    V3TSP::selfTest();
+    V3ScoreboardBase::selfTest();

    // Read first filename
    v3Global.readFiles();