diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ccb25ffef..2f8816345 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -236,6 +236,7 @@ set(COMMON_SOURCES V3DfgBreakCycles.cpp V3DfgCache.cpp V3DfgColorSCCs.cpp + V3DfgCse.cpp V3DfgDecomposition.cpp V3DfgDfgToAst.cpp V3DfgOptimizer.cpp diff --git a/src/Makefile_obj.in b/src/Makefile_obj.in index 98d06b6f6..c71d7faf7 100644 --- a/src/Makefile_obj.in +++ b/src/Makefile_obj.in @@ -246,6 +246,7 @@ RAW_OBJS_PCH_ASTNOMT = \ V3DfgBreakCycles.o \ V3DfgCache.o \ V3DfgColorSCCs.o \ + V3DfgCse.o \ V3DfgDecomposition.o \ V3DfgDfgToAst.o \ V3DfgOptimizer.o \ diff --git a/src/V3Dfg.cpp b/src/V3Dfg.cpp index df8b0a570..6c86b95cc 100644 --- a/src/V3Dfg.cpp +++ b/src/V3Dfg.cpp @@ -555,63 +555,6 @@ DfgVertex::DfgVertex(DfgGraph& dfg, VDfgType type, FileLine* flp, AstNodeDType* dfg.addVertex(*this); } -bool DfgVertex::equals(const DfgVertex& that, EqualsCache& cache) const { - // If same vertex, then equal - if (this == &that) return true; - - // If different type, then not equal - if (this->type() != that.type()) return false; - - // If different data type, then not equal - if (this->dtypep() != that.dtypep()) return false; - - // If different number of inputs, then not equal - if (this->nInputs() != that.nInputs()) return false; - - // Check vertex specifics - if (!this->selfEquals(that)) return false; - - // Check sources - const auto key = (this < &that) ? EqualsCache::key_type{this, &that} // - : EqualsCache::key_type{&that, this}; - // Note: the recursive invocation can cause a re-hash but that will not invalidate references - uint8_t& result = cache[key]; - if (!result) { - const bool equal = [&]() { - for (size_t i = 0; i < nInputs(); ++i) { - const DfgVertex* const ap = this->inputp(i); - const DfgVertex* const bp = that.inputp(i); - if (!ap && !bp) continue; - if (!ap || !bp) return false; - if (!ap->equals(*bp, cache)) return false; - } - return true; - }(); - result = (static_cast(equal) << 1) | 1; - } - return result >> 1; -} - -V3Hash DfgVertex::hash(DfgUserMap& cache) { - V3Hash& result = cache[this]; - if (!result.value()) { - V3Hash hash{selfHash()}; - // Variables are defined by themselves, so there is no need to hash them further - // (especially the sources). This enables sound hashing of graphs circular only through - // variables, which we rely on. - if (!is()) { - hash += m_type; - hash += size(); - foreachSource([&](DfgVertex& vtx) { - hash += vtx.hash(cache); - return false; - }); - } - result = hash; - } - return result; -} - uint32_t DfgVertex::fanout() const { uint32_t result = 0; foreachSink([&](const DfgVertex&) { diff --git a/src/V3Dfg.h b/src/V3Dfg.h index 8ec383941..b9eeb681a 100644 --- a/src/V3Dfg.h +++ b/src/V3Dfg.h @@ -67,17 +67,6 @@ class DfgVisitor; template ()> class DfgUserMap; -// Specialization of std::hash for a std::pair for use below -template <> -struct std::hash> final { - size_t operator()(const std::pair& item) const { - const size_t a = reinterpret_cast(item.first); - const size_t b = reinterpret_cast(item.second); - constexpr size_t halfWidth = 8 * sizeof(b) / 2; - return a ^ ((b << halfWidth) | (b >> halfWidth)); - } -}; - namespace V3Dfg { //----------------------------------------------------------------------- // Functions for compatibility tests @@ -226,12 +215,6 @@ class DfgVertex VL_NOT_FINAL { // Visitor accept method virtual void accept(DfgVisitor& v) = 0; - // Part of Vertex equality only dependent on this vertex - virtual bool selfEquals(const DfgVertex& that) const = 0; - - // Part of Vertex hash only dependent on this vertex - virtual V3Hash selfHash() const = 0; - // Acessor for type List V3ListLinks& links() { return m_links; } @@ -283,26 +266,6 @@ public: return VN_AS(dtypep(), UnpackArrayDType)->elementsConst(); } - // Cache type for 'equals' below - using EqualsCache = std::unordered_map, uint8_t>; - - // Vertex equality (based on this vertex and all upstream vertices feeding into this vertex). - // Returns true, if the vertices can be substituted for each other without changing the - // semantics of the logic. The 'cache' argument is used to store results to avoid repeat - // evaluations, but it requires that the upstream sources of the compared vertices do not - // change between invocations. - bool equals(const DfgVertex& that, EqualsCache& cache) const VL_MT_DISABLED; - - // Uncached version of 'equals' - bool equals(const DfgVertex& that) const { - EqualsCache cache; // Still cache recursive calls within this invocation - return equals(that, cache); - } - - // Hash of vertex (depends on this vertex and all upstream vertices feeding into this vertex). - // Uses the given DfgUserMap for caching hashes - V3Hash hash(DfgUserMap& cache) VL_MT_DISABLED; - // Predicate: has 1 or more sinks bool hasSinks() const { return !m_sinks.empty(); } diff --git a/src/V3DfgCse.cpp b/src/V3DfgCse.cpp new file mode 100644 index 000000000..76c7e53b5 --- /dev/null +++ b/src/V3DfgCse.cpp @@ -0,0 +1,336 @@ +// -*- mode: C++; c-file-style: "cc-mode" -*- +//************************************************************************* +// DESCRIPTION: Verilator: DfgGraph common sub-expression elimination (CSE) +// +// Code available from: https://verilator.org +// +//************************************************************************* +// +// Copyright 2003-2025 by Wilson Snyder. This program is free software; you +// can redistribute it and/or modify it under the terms of either the GNU +// Lesser General Public License Version 3 or the Perl Artistic License +// Version 2.0. +// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 +// +//************************************************************************* + +#include "V3PchAstNoMT.h" // VL_MT_DISABLED_CODE_UNIT + +#include "V3Dfg.h" +#include "V3DfgPasses.h" + +VL_DEFINE_DEBUG_FUNCTIONS; + +class V3DfgCse final { + // TYPES + using VertexPair = std::pair; + struct VertexPairHash final { + size_t operator()(const VertexPair& pair) const { + V3Hash hash; + hash += pair.first; + hash += pair.second; + return hash.value(); + } + }; + + // STATE + // The graph being processed + DfgGraph& m_dfg; + // Cache for vertex hashes + DfgUserMap m_hashCache = m_dfg.makeUserMap(); + // Cache for vertex equality + std::unordered_map m_equivalentCache; + + // METHODS + // Returns hash of vertex dependent on information internal to the vertex + static V3Hash vertexSelfHash(const DfgVertex& vtx) { + switch (vtx.type()) { + // Unhandled vertices + case VDfgType::Logic: + case VDfgType::Unresolved: vtx.v3fatalSrc("Should not have reached CSE"); + + // Special vertices + case VDfgType::Const: + case VDfgType::VarArray: + case VDfgType::VarPacked: vtx.v3fatalSrc("Hash should have been pre-computed"); + + // Vertices with internal information + case VDfgType::Sel: return V3Hash{vtx.as()->lsb()}; + + case VDfgType::SpliceArray: + case VDfgType::SplicePacked: { + V3Hash hash; + vtx.as()->foreachDriver([&](const DfgVertex&, uint32_t lo) { + hash += lo; + return false; + }); + return hash; + } + + // Vertices with no internal information + case VDfgType::Mux: + case VDfgType::UnitArray: return V3Hash{}; + + // Generated classes - none of them have internal information + case VDfgType::Add: + case VDfgType::And: + case VDfgType::ArraySel: + case VDfgType::BufIf1: + case VDfgType::Concat: + case VDfgType::Cond: + case VDfgType::Div: + case VDfgType::DivS: + case VDfgType::Eq: + case VDfgType::EqCase: + case VDfgType::EqWild: + case VDfgType::Extend: + case VDfgType::ExtendS: + case VDfgType::Gt: + case VDfgType::GtS: + case VDfgType::Gte: + case VDfgType::GteS: + case VDfgType::LogAnd: + case VDfgType::LogEq: + case VDfgType::LogIf: + case VDfgType::LogNot: + case VDfgType::LogOr: + case VDfgType::Lt: + case VDfgType::LtS: + case VDfgType::Lte: + case VDfgType::LteS: + case VDfgType::ModDiv: + case VDfgType::ModDivS: + case VDfgType::Mul: + case VDfgType::MulS: + case VDfgType::Negate: + case VDfgType::Neq: + case VDfgType::NeqCase: + case VDfgType::NeqWild: + case VDfgType::Not: + case VDfgType::Or: + case VDfgType::Pow: + case VDfgType::PowSS: + case VDfgType::PowSU: + case VDfgType::PowUS: + case VDfgType::RedAnd: + case VDfgType::RedOr: + case VDfgType::RedXor: + case VDfgType::Replicate: + case VDfgType::ShiftL: + case VDfgType::ShiftR: + case VDfgType::ShiftRS: + case VDfgType::StreamL: + case VDfgType::StreamR: + case VDfgType::Sub: + case VDfgType::Xor: return V3Hash{}; + } + VL_UNREACHABLE; + } + + // Returns hash of vertex dependent on and all its input + V3Hash vertexHash(DfgVertex& vtx) { + V3Hash& result = m_hashCache[vtx]; + if (!result.value()) { + V3Hash hash{vertexSelfHash(vtx)}; + // Variables are defined by themselves, so there is no need to hash them further + // (especially the sources). This enables sound hashing of graphs circular only through + // variables, which we rely on. + if (!vtx.is()) { + hash += vtx.type(); + hash += vtx.size(); + vtx.foreachSource([&](DfgVertex& src) { + hash += vertexHash(src); + return false; + }); + } + result = hash; + } + return result; + } + + // Compare 'a' and 'b' for equivalence based on their internal information only + bool vertexSelfEquivalent(const DfgVertex& a, const DfgVertex& b) { + // Note: 'a' and 'b' are of the same Vertex type, data type, and have + // the same number of inputs with matching types. This is established + // by 'vertexEquivalent'. + switch (a.type()) { + // Unhandled vertices + case VDfgType::Logic: + case VDfgType::Unresolved: a.v3fatalSrc("Should not have reached CSE"); + + // Special vertices + case VDfgType::Const: return a.as()->num().isCaseEq(b.as()->num()); + + case VDfgType::VarArray: + case VDfgType::VarPacked: + return false; // CSE does not combine variables + + // Vertices with internal information + case VDfgType::Sel: return a.as()->lsb() == b.as()->lsb(); + + case VDfgType::SpliceArray: + case VDfgType::SplicePacked: { + const DfgVertexSplice* const ap = a.as(); + // Gather indices of drivers of 'a' + std::vector aLo; + aLo.reserve(ap->nInputs()); + ap->foreachDriver([&](const DfgVertex&, uint32_t lo) { + aLo.push_back(lo); + return false; + }); + // Compare indices of drivers of 'b' + uint32_t* aLop = aLo.data(); + return !b.as()->foreachDriver( + [&](const DfgVertex&, uint32_t lo) { return *aLop++ != lo; }); + } + + // Vertices with no internal information + case VDfgType::Mux: + case VDfgType::UnitArray: return true; + + // Generated classes - none of them have internal information + case VDfgType::Add: + case VDfgType::And: + case VDfgType::ArraySel: + case VDfgType::BufIf1: + case VDfgType::Concat: + case VDfgType::Cond: + case VDfgType::Div: + case VDfgType::DivS: + case VDfgType::Eq: + case VDfgType::EqCase: + case VDfgType::EqWild: + case VDfgType::Extend: + case VDfgType::ExtendS: + case VDfgType::Gt: + case VDfgType::GtS: + case VDfgType::Gte: + case VDfgType::GteS: + case VDfgType::LogAnd: + case VDfgType::LogEq: + case VDfgType::LogIf: + case VDfgType::LogNot: + case VDfgType::LogOr: + case VDfgType::Lt: + case VDfgType::LtS: + case VDfgType::Lte: + case VDfgType::LteS: + case VDfgType::ModDiv: + case VDfgType::ModDivS: + case VDfgType::Mul: + case VDfgType::MulS: + case VDfgType::Negate: + case VDfgType::Neq: + case VDfgType::NeqCase: + case VDfgType::NeqWild: + case VDfgType::Not: + case VDfgType::Or: + case VDfgType::Pow: + case VDfgType::PowSS: + case VDfgType::PowSU: + case VDfgType::PowUS: + case VDfgType::RedAnd: + case VDfgType::RedOr: + case VDfgType::RedXor: + case VDfgType::Replicate: + case VDfgType::ShiftL: + case VDfgType::ShiftR: + case VDfgType::ShiftRS: + case VDfgType::StreamL: + case VDfgType::StreamR: + case VDfgType::Sub: + case VDfgType::Xor: return true; + } + VL_UNREACHABLE; + } + + // Compares 'a' and 'b' for equivalence + bool vertexEquivalent(const DfgVertex& a, const DfgVertex& b) { + // If same vertex, then equal + if (&a == &b) return true; + + // If different type, then not equal + if (a.type() != b.type()) return false; + + // If different data type, then not equal + if (a.dtypep() != b.dtypep()) return false; + + // If different number of inputs, then not equal + if (a.nInputs() != b.nInputs()) return false; + + // Check vertex specifics + if (!vertexSelfEquivalent(a, b)) return false; + + // Check sources + const VertexPair key = (&a < &b) ? std::make_pair(&a, &b) : std::make_pair(&b, &a); + // The recursive invocation can cause a re-hash but that will not invalidate references + uint8_t& result = m_equivalentCache[key]; + if (!result) { + const bool equal = [&]() { + for (size_t i = 0; i < a.nInputs(); ++i) { + const DfgVertex* const ap = a.inputp(i); + const DfgVertex* const bp = b.inputp(i); + if (!ap && !bp) continue; + if (!ap || !bp) return false; + if (!vertexEquivalent(*ap, *bp)) return false; + } + return true; + }(); + result = (static_cast(equal) << 1) | 1; + } + return result >> 1; + } + + V3DfgCse(DfgGraph& dfg, V3DfgCseContext& ctx) + : m_dfg{dfg} { + std::unordered_map> verticesWithEqualHashes; + verticesWithEqualHashes.reserve(dfg.size()); + + // Pre-hash variables, these are all unique, so just set their hash to a unique value + uint32_t varHash = 0; + for (const DfgVertexVar& vtx : dfg.varVertices()) m_hashCache[vtx] = V3Hash{++varHash}; + + // Similarly pre-hash constants for speed. While we don't combine constants, we do want + // expressions using the same constants to be combined, so we do need to hash equal + // constants to equal values. + for (DfgConst* const vtxp : dfg.constVertices().unlinkable()) { + // Delete unused constants while we are at it. + if (!vtxp->hasSinks()) { + VL_DO_DANGLING(vtxp->unlinkDelete(dfg), vtxp); + continue; + } + m_hashCache[vtxp] = vtxp->num().toHash() + varHash; + } + + // Combine operation vertices + for (DfgVertex* const vtxp : dfg.opVertices().unlinkable()) { + // Delete unused nodes while we are at it. + if (!vtxp->hasSinks()) { + vtxp->unlinkDelete(dfg); + continue; + } + std::vector& vec = verticesWithEqualHashes[vertexHash(*vtxp)]; + bool replaced = false; + for (DfgVertex* const candidatep : vec) { + if (vertexEquivalent(*candidatep, *vtxp)) { + ++ctx.m_eliminated; + vtxp->replaceWith(candidatep); + VL_DO_DANGLING(vtxp->unlinkDelete(dfg), vtxp); + replaced = true; + break; + } + } + if (replaced) continue; + vec.push_back(vtxp); + } + } + +public: + static void apply(DfgGraph& dfg, V3DfgCseContext& ctx) { + V3DfgCse{dfg, ctx}; + // Prune unused nodes + V3DfgPasses::removeUnused(dfg); + } +}; + +void V3DfgPasses::cse(DfgGraph& dfg, V3DfgCseContext& ctx) { V3DfgCse::apply(dfg, ctx); } diff --git a/src/V3DfgPasses.cpp b/src/V3DfgPasses.cpp index 76579c131..bbdfe6468 100644 --- a/src/V3DfgPasses.cpp +++ b/src/V3DfgPasses.cpp @@ -25,61 +25,6 @@ VL_DEFINE_DEBUG_FUNCTIONS; -// Common sub-expression elimination -void V3DfgPasses::cse(DfgGraph& dfg, V3DfgCseContext& ctx) { - // Remove common sub-expressions - { - // Used by DfgVertex::hash - DfgUserMap hashCache = dfg.makeUserMap(); - - DfgVertex::EqualsCache equalsCache; - std::unordered_map> verticesWithEqualHashes; - verticesWithEqualHashes.reserve(dfg.size()); - - // Pre-hash variables, these are all unique, so just set their hash to a unique value - uint32_t varHash = 0; - for (const DfgVertexVar& vtx : dfg.varVertices()) hashCache[vtx] = V3Hash{++varHash}; - - // Similarly pre-hash constants for speed. While we don't combine constants, we do want - // expressions using the same constants to be combined, so we do need to hash equal - // constants to equal values. - for (DfgConst* const vtxp : dfg.constVertices().unlinkable()) { - // Delete unused constants while we are at it. - if (!vtxp->hasSinks()) { - VL_DO_DANGLING(vtxp->unlinkDelete(dfg), vtxp); - continue; - } - hashCache[vtxp] = vtxp->num().toHash() + varHash; - } - - // Combine operation vertices - for (DfgVertex* const vtxp : dfg.opVertices().unlinkable()) { - // Delete unused nodes while we are at it. - if (!vtxp->hasSinks()) { - vtxp->unlinkDelete(dfg); - continue; - } - const V3Hash hash = vtxp->hash(hashCache); - std::vector& vec = verticesWithEqualHashes[hash]; - bool replaced = false; - for (DfgVertex* const candidatep : vec) { - if (candidatep->equals(*vtxp, equalsCache)) { - ++ctx.m_eliminated; - vtxp->replaceWith(candidatep); - VL_DO_DANGLING(vtxp->unlinkDelete(dfg), vtxp); - replaced = true; - break; - } - } - if (replaced) continue; - vec.push_back(vtxp); - } - } - - // Prune unused nodes - removeUnused(dfg); -} - void V3DfgPasses::inlineVars(DfgGraph& dfg) { for (DfgVertexVar& vtx : dfg.varVertices()) { if (DfgVarPacked* const varp = vtx.cast()) { diff --git a/src/V3DfgVertices.h b/src/V3DfgVertices.h index b445ac0aa..bac94fd11 100644 --- a/src/V3DfgVertices.h +++ b/src/V3DfgVertices.h @@ -57,19 +57,6 @@ class DfgVertexVar VL_NOT_FINAL : public DfgVertex { // associated input Var/VarScope. AstNode* m_tmpForp = nullptr; - bool selfEquals(const DfgVertex& that) const final { - UASSERT_OBJ(nodep() != that.as()->nodep(), this, - "There should only be one DfgVertexVar for a given AstVar/AstVarScope"); - return false; - } - - V3Hash selfHash() const final { - V3Hash hash; - hash += nodep()->name(); - hash += varp()->varType(); - return hash; - } - DfgVertexVar(DfgGraph& dfg, VDfgType type, AstVar* varp, AstVarScope* vscp) : DfgVertex{dfg, type, varp->fileline(), V3Dfg::toDfgDType(varp->dtypep())} , m_varp{varp} @@ -219,11 +206,6 @@ class DfgConst final : public DfgVertexNullary { V3Number m_num; // Constant value - bool selfEquals(const DfgVertex& that) const override { - return num().isCaseEq(that.as()->num()); - } - V3Hash selfHash() const override { return num().toHash(); } - public: DfgConst(DfgGraph& dfg, FileLine* flp, const V3Number& num) : DfgVertexNullary{dfg, dfgType(), flp, V3Dfg::dtypePacked(num.width())} @@ -272,11 +254,6 @@ class DfgSel final : public DfgVertexUnary { // 'DfgMux` for the non-constant 'lsbp'. uint32_t m_lsb = 0; // The LSB index - bool selfEquals(const DfgVertex& that) const override { - return lsb() == that.as()->lsb(); - } - V3Hash selfHash() const override { return V3Hash{lsb()}; } - public: DfgSel(DfgGraph& dfg, FileLine* flp, AstNodeDType* dtypep) : DfgVertexUnary{dfg, dfgType(), flp, dtypep} {} @@ -291,9 +268,6 @@ public: class DfgUnitArray final : public DfgVertexUnary { // This is a type adapter for modeling arrays. It's a single element array, // with the value of the single element being the source operand. - bool selfEquals(const DfgVertex&) const final { return true; } - V3Hash selfHash() const final { return V3Hash{}; } - public: DfgUnitArray(DfgGraph& dfg, FileLine* flp, AstNodeDType* dtypep) : DfgVertexUnary{dfg, dfgType(), flp, dtypep} { @@ -322,9 +296,6 @@ class DfgMux final : public DfgVertexBinary { // AstSel is binary, but 'lsbp' is very often constant. As AstSel is fairly // common, we special case as a DfgSel for the constant 'lsbp', and as // 'DfgMux` for the non-constant 'lsbp'. - bool selfEquals(const DfgVertex&) const override { return true; } - V3Hash selfHash() const override { return V3Hash{}; } - public: DfgMux(DfgGraph& dfg, FileLine* flp, AstNodeDType* dtypep) : DfgVertexBinary{dfg, dfgType(), flp, dtypep} {} @@ -379,20 +350,6 @@ class DfgVertexSplice VL_NOT_FINAL : public DfgVertexVariadic { }; std::vector m_driverData; // Additional data associated with each driver - bool selfEquals(const DfgVertex& that) const override final { - const DfgVertexSplice* const thatp = that.as(); - for (size_t i = 0; i < nInputs(); ++i) { - if (m_driverData[i].m_lo != thatp->m_driverData[i].m_lo) return false; - } - return true; - } - V3Hash selfHash() const override final { - V3Hash hash; - const size_t n = nInputs(); - for (size_t i = 0; i < n; ++i) hash += m_driverData[i].m_lo; - return hash; - } - protected: DfgVertexSplice(DfgGraph& dfg, VDfgType type, FileLine* flp, AstNodeDType* dtypep) : DfgVertexVariadic{dfg, type, flp, dtypep} {} @@ -512,10 +469,6 @@ class DfgLogic final : public DfgVertexVariadic { // Vertices this logic was synthesized into. Excluding variables std::vector m_synth; - // Used very early, should never be needed - bool selfEquals(const DfgVertex&) const final { V3ERROR_NA_RETURN(false); } - V3Hash selfHash() const final { V3ERROR_NA_RETURN(V3Hash{}); } - public: DfgLogic(DfgGraph& dfg, AstAssignW* nodep, AstScope* scopep) : DfgVertexVariadic{dfg, dfgType(), nodep->fileline(), nullptr} @@ -547,10 +500,6 @@ public: class DfgUnresolved final : public DfgVertexVariadic { // Represents a collection of unresolved variable drivers before synthesis - - bool selfEquals(const DfgVertex&) const final { return true; } - V3Hash selfHash() const final { return V3Hash{}; } - public: DfgUnresolved(DfgGraph& dfg, const DfgVertexVar* vtxp) : DfgVertexVariadic{dfg, dfgType(), vtxp->fileline(), vtxp->dtypep()} {} diff --git a/src/astgen b/src/astgen index 8f965d20d..9d08af903 100755 --- a/src/astgen +++ b/src/astgen @@ -1220,8 +1220,6 @@ def write_dfg_auto_classes(filename): emitBlock('''\ class Dfg{t} final : public Dfg{s} {{ - bool selfEquals(const DfgVertex&) const final {{ return true; }} - V3Hash selfHash() const final {{ return V3Hash{{}}; }} public: Dfg{t}(DfgGraph& dfg, FileLine* flp, AstNodeDType* dtypep) : Dfg{s}{{dfg, dfgType(), flp, dtypep}} {{}}