From e0edb596eac2fb632e69bd2b14770b6a5c175312 Mon Sep 17 00:00:00 2001 From: Varun Koyyalagunta Date: Wed, 20 Feb 2013 20:14:15 -0500 Subject: [PATCH] Add duplicate clock gate optimization, msg980. Experimental and disabled unless -OD or -O3 used (for now), Please try it as may get some significant speedups. Signed-off-by: Wilson Snyder --- Changes | 4 + src/V3Ast.h | 2 + src/V3Gate.cpp | 297 ++++++++++++++++++++++++++- src/V3Options.cpp | 2 + src/V3Options.h | 2 + test_regress/driver.pl | 1 + test_regress/t/t_cdc_async_bad.pl | 1 - test_regress/t/t_dedupe_clk_gate.pl | 19 ++ test_regress/t/t_dedupe_clk_gate.v | 61 ++++++ test_regress/t/t_dedupe_seq_logic.pl | 19 ++ test_regress/t/t_dedupe_seq_logic.v | 123 +++++++++++ 11 files changed, 520 insertions(+), 11 deletions(-) create mode 100755 test_regress/t/t_dedupe_clk_gate.pl create mode 100644 test_regress/t/t_dedupe_clk_gate.v create mode 100755 test_regress/t/t_dedupe_seq_logic.pl create mode 100644 test_regress/t/t_dedupe_seq_logic.v diff --git a/Changes b/Changes index c7eb413b2..069109135 100644 --- a/Changes +++ b/Changes @@ -5,6 +5,10 @@ indicates the contributor was also the author of the fix; Thanks! * Verilator 3.846-devel +*** Add duplicate clock gate optimization, msg980. [Varun Koyyalagunta] + Disabled unless -OD or -O3 used, please try it as may get some + significant speedups. + **** Support pattern assignment features, bug616, bug617, bug618. [Ed Lander] **** Support bind in $unit, bug602. [Ed Lander] diff --git a/src/V3Ast.h b/src/V3Ast.h index fc7095c5d..f27a7725b 100644 --- a/src/V3Ast.h +++ b/src/V3Ast.h @@ -1136,6 +1136,7 @@ public: virtual bool isPure() const { return true; } // Else a $display, etc, that must be ordered with other displays virtual bool isBrancher() const { return false; } // Changes control flow, disable some optimizations virtual bool isGateOptimizable() const { return true; } // Else a AstTime etc that can't be pushed out + virtual bool isGateDedupable() const { return isGateOptimizable(); } // GateDedupable is a slightly larger superset of GateOptimzable (eg, AstNodeIf) virtual bool isSubstOptimizable() const { return true; } // Else a AstTime etc that can't be substituted out virtual bool isPredictOptimizable() const { return true; } // Else a AstTime etc which output can't be predicted from input virtual bool isOutputter() const { return false; } // Else creates output or exits, etc, not unconsumed @@ -1389,6 +1390,7 @@ public: void addIfsp(AstNode* newp) { addOp2p(newp); } void addElsesp(AstNode* newp) { addOp3p(newp); } virtual bool isGateOptimizable() const { return false; } + virtual bool isGateDedupable() const { return true; } virtual int instrCount() const { return instrCountBranch(); } virtual V3Hash sameHash() const { return V3Hash(); } virtual bool same(AstNode* samep) const { return true; } diff --git a/src/V3Gate.cpp b/src/V3Gate.cpp index 119bf3c4e..2db2b3c14 100644 --- a/src/V3Gate.cpp +++ b/src/V3Gate.cpp @@ -41,6 +41,7 @@ #include "V3Graph.h" #include "V3Const.h" #include "V3Stats.h" +#include "V3Hashed.h" typedef list GateVarRefList; @@ -55,21 +56,33 @@ public: } }; +//###################################################################### + +class GateLogicVertex; +class GateVarVertex; +class GateGraphBaseVisitor { +public: + virtual AstNUser* visit(GateLogicVertex* vertexp, AstNUser* vup=NULL) =0; + virtual AstNUser* visit(GateVarVertex* vertexp, AstNUser* vup=NULL) =0; +}; + //###################################################################### // Support classes class GateEitherVertex : public V3GraphVertex { AstScope* m_scopep; bool m_reducible; // True if this node should be able to be eliminated + bool m_dedupable; // True if this node should be able to be deduped bool m_consumed; // Output goes to something meaningful public: GateEitherVertex(V3Graph* graphp, AstScope* scopep) - : V3GraphVertex(graphp), m_scopep(scopep), m_reducible(true), m_consumed(false) {} + : V3GraphVertex(graphp), m_scopep(scopep), m_reducible(true), m_dedupable(true), m_consumed(false) {} virtual ~GateEitherVertex() {} // ACCESSORS virtual string dotStyle() const { return m_consumed?"":"dotted"; } AstScope* scopep() const { return m_scopep; } bool reducible() const { return m_reducible; } + bool dedupable() const { return m_dedupable; } void setConsumed(const char* consumedReason) { m_consumed = true; //UINFO(0,"\t\tSetConsumed "<inNextp()) { + retp = dynamic_cast(edgep->fromp())->accept(v, vup); + } + return retp; + } }; class GateVarVertex : public GateEitherVertex { @@ -113,6 +143,7 @@ public: setIsClock(); } } + AstNUser* accept(GateGraphBaseVisitor& v, AstNUser* vup=NULL) { return v.visit(this,vup); } }; class GateLogicVertex : public GateEitherVertex { @@ -129,6 +160,7 @@ public: AstNode* nodep() const { return m_nodep; } AstActive* activep() const { return m_activep; } bool slow() const { return m_slow; } + AstNUser* accept(GateGraphBaseVisitor& v, AstNUser* vup=NULL) { return v.visit(this,vup); } }; //###################################################################### @@ -143,6 +175,7 @@ private: // STATE bool m_buffersOnly; // Set when we only allow simple buffering, no equations (for clocks) AstNodeVarRef* m_lhsVarRef; // VarRef on lhs of assignment (what we're replacing) + bool m_dedupe; // Set when we use isGateDedupable instead of isGateOptimizable // METHODS void clearSimple(const char* because) { @@ -202,7 +235,7 @@ private: virtual void visit(AstNode* nodep, AstNUser*) { // *** Special iterator if (!m_isSimple) return; // Fastpath - if (!nodep->isGateOptimizable() + if (!(m_dedupe ? nodep->isGateDedupable() : nodep->isGateOptimizable()) || !nodep->isPure() || nodep->isBrancher()) { UINFO(5, "Non optimizable type: "<accept(*this); // Check results @@ -267,6 +301,7 @@ private: bool m_inSlow; // Inside a slow structure V3Double0 m_statSigs; // Statistic tracking V3Double0 m_statRefs; // Statistic tracking + V3Double0 m_statDedupLogic; // Statistic tracking // METHODS void iterateNewStmt(AstNode* nodep, const char* nonReducibleReason, const char* consumeReason) { @@ -274,9 +309,10 @@ private: UINFO(4," STMT "<clearReducible(nonReducibleReason); + m_logicVertexp->clearReducibleAndDedupable(nonReducibleReason); + } else if (!m_activeReducible) { + m_logicVertexp->clearReducible("Block Unreducible"); // Sequential logic is dedupable } if (consumeReason) m_logicVertexp->setConsumed(consumeReason); if (nodep->castSenItem()) m_logicVertexp->setConsumed("senItem"); @@ -293,13 +329,13 @@ private: varscp->user1p(vertexp); if (varscp->varp()->isSigPublic()) { // Public signals shouldn't be changed, pli code might be messing with them - vertexp->clearReducible("SigPublic"); + vertexp->clearReducibleAndDedupable("SigPublic"); vertexp->setConsumed("SigPublic"); } if (varscp->varp()->isIO() && varscp->scopep()->isTop()) { // We may need to convert to/from sysc/reg sigs vertexp->setIsTop(); - vertexp->clearReducible("isTop"); + vertexp->clearReducibleAndDedupable("isTop"); vertexp->setConsumed("isTop"); } if (varscp->varp()->isUsedClock()) vertexp->setConsumed("clock"); @@ -314,6 +350,7 @@ private: void consumedMarkRecurse(GateEitherVertex* vertexp); void consumedMove(); void replaceAssigns(); + void dedupe(); // VISITORS virtual void visit(AstNetlist* nodep, AstNUser*) { @@ -328,6 +365,8 @@ private: optimizeSignals(false); // Then propagate more complicated equations optimizeSignals(true); + // Remove redundant logic + if (v3Global.opt.oDedupe()) dedupe(); // Warn warnSignals(); consumedMark(); @@ -452,6 +491,7 @@ private: public: // CONSTUCTORS GateVisitor(AstNode* nodep) { + AstNode::user1ClearTree(); m_logicVertexp = NULL; m_scopep = NULL; m_modp = NULL; @@ -464,6 +504,7 @@ public: virtual ~GateVisitor() { V3Stats::addStat("Optimizations, Gate sigs deleted", m_statSigs); V3Stats::addStat("Optimizations, Gate inputs replaced", m_statRefs); + V3Stats::addStat("Optimizations, Gate sigs deduped", m_statDedupLogic); } }; @@ -473,7 +514,7 @@ void GateVisitor::optimizeSignals(bool allowMultiIn) { for (V3GraphVertex* itp = m_graph.verticesBeginp(); itp; itp=itp->verticesNextp()) { if (GateVarVertex* vvertexp = dynamic_cast(itp)) { if (vvertexp->inEmpty()) { - vvertexp->clearReducible("inEmpty"); // Can't deal with no sources + vvertexp->clearReducibleAndDedupable("inEmpty"); // Can't deal with no sources if (!vvertexp->isTop() // Ok if top inputs are driverless && !vvertexp->varScp()->varp()->valuep() && !vvertexp->varScp()->varp()->isSigPublic()) { @@ -489,7 +530,7 @@ void GateVisitor::optimizeSignals(bool allowMultiIn) { } } else if (!vvertexp->inSize1()) { - vvertexp->clearReducible("size!1"); // Can't deal with more than one src + vvertexp->clearReducibleAndDedupable("size!1"); // Can't deal with more than one src } // Reduce it? if (!vvertexp->reducible()) { @@ -502,7 +543,7 @@ void GateVisitor::optimizeSignals(bool allowMultiIn) { AstNode* logicp = logicVertexp->nodep(); if (logicVertexp->reducible()) { // Can we eliminate? - GateOkVisitor okVisitor(logicp, vvertexp->isClock()); + GateOkVisitor okVisitor(logicp, vvertexp->isClock(), false); bool multiInputs = okVisitor.rhsVarRefs().size() > 1; // Was it ok? bool doit = okVisitor.isSimple(); @@ -730,6 +771,8 @@ private: // However a VARREF should point to the original as it's otherwise confusing // to throw warnings that point to a PIN rather than where the pin us used. if (substp->castVarRef()) substp->fileline(nodep->fileline()); + // Make the substp an rvalue like nodep. This facilitate the hashing in dedupe. + if (AstNodeVarRef* varrefp = substp->castNodeVarRef()) varrefp->lvalue(false); nodep->replaceWith(substp); nodep->deleteTree(); nodep=NULL; } @@ -761,6 +804,240 @@ void GateVisitor::optimizeElimVar(AstVarScope* varscp, AstNode* substp, AstNode* } } +//###################################################################### +// Auxiliary hash class for GateDedupeVarVisitor + +class GateDedupeHash : public V3HashedUserCheck { +private: + // NODE STATE + // Ast*::user2p -> parent AstNodeAssign* for this rhsp + // Ast*::user3p -> AstNode* checked in test for duplicate + // Ast*::user5p -> AstNode* checked in test for duplicate + // AstUser2InUse m_inuser2; (Allocated for use in GateVisitor) + AstUser3InUse m_inuser3; + AstUser5InUse m_inuser5; + V3Hashed m_hashed; // Hash, contains rhs of assigns + + void hash(AstNode* nodep) { + // !NULL && the object is hashable + if (nodep && !nodep->sameHash().isIllegal()) { + m_hashed.hash(nodep); + } + } + bool sameHash(AstNode* node1p, AstNode* node2p) { + return (node1p && node2p + && !node1p->sameHash().isIllegal() + && !node2p->sameHash().isIllegal() + && m_hashed.sameNodes(node1p,node2p)); + } + bool same(AstNUser* node1p, AstNUser* node2p) { + return node1p == node2p || sameHash((AstNode*)node1p,(AstNode*)node2p); + } +public: + bool check(AstNode* node1p,AstNode* node2p) { + return same(node1p->user3p(),node2p->user3p()) && same(node1p->user5p(),node2p->user5p()) + && node1p->user2p()->castNode()->type() == node2p->user2p()->castNode()->type() + ; + } + + AstNodeAssign* hashAndFindDupe(AstNodeAssign* assignp, AstNode* extra1p, AstNode* extra2p) { + AstNode *rhsp = assignp->rhsp(); + rhsp->user2p(assignp); + rhsp->user3p(extra1p); + rhsp->user5p(extra2p); + + hash(extra1p); + hash(extra2p); + + V3Hashed::iterator inserted = m_hashed.hashAndInsert(rhsp); + V3Hashed::iterator dupit = m_hashed.findDuplicate(rhsp, this); + // Even though rhsp was just inserted, V3Hashed::findDuplicate doesn't + // return anything in the hash that has the same pointer (V3Hashed.cpp::findDuplicate) + // So dupit is either a different, duplicate rhsp, or the end of the hash. + if (dupit != m_hashed.end()) { + m_hashed.erase(inserted); + return m_hashed.iteratorNodep(dupit)->user2p()->castNode()->castNodeAssign(); + } + return NULL; + } +}; + +//###################################################################### +// Have we seen the rhs of this assign before? + +class GateDedupeVarVisitor : public GateBaseVisitor { + // A node passed to findDupe() is visited in this order + // (otherwise dupe not found) + // 1. AstNodeAssign + // 2. AstAlways -> AstNodeAssign + // 3. AstAlways -> AstNodeIf -> AstNodeAssign +private: + // RETURN STATE + AstNodeVarRef* m_dupLhsVarRefp; // Duplicate lhs varref that was found + // STATE + GateDedupeHash m_hash; // Hash used to find dupes + AstVarScope* m_consumerVarScopep; // VarScope on lhs of assignment (what we're replacing) + AstActive* m_activep; // AstActive that assign is under + AstNode* m_ifCondp; // IF condition that assign is under + bool m_always; // Assign is under an always + + // VISITORS + virtual void visit(AstNodeAssign* assignp, AstNUser*) { + AstNode* lhsp = assignp->lhsp(); + // Possible todo, handle more complex lhs expressions + if (AstNodeVarRef* lhsVarRefp = lhsp->castNodeVarRef()) { + if (lhsVarRefp->varScopep() != m_consumerVarScopep) m_consumerVarScopep->v3fatalSrc("Consumer doesn't match lhs of assign"); + if (AstNodeAssign* dup = m_hash.hashAndFindDupe(assignp,m_activep,m_ifCondp)) { + m_dupLhsVarRefp = dup->lhsp()->castNodeVarRef(); + } + } + } + virtual void visit(AstAlways* alwaysp, AstNUser*) { + // I think we could safely dedupe an always block with multiple non-blocking statements, + // but erring on side of caution here + if (!m_always && alwaysp->isJustOneBodyStmt()) { + m_always = true; + alwaysp->bodysp()->accept(*this); + } + } + // Ugly support for latches of the specific form - + // always @(...) + // if (...) + // foo = ...; // or foo <= ...; + virtual void visit(AstNodeIf* ifp, AstNUser*) { + if (m_always && !ifp->elsesp()) { //we're under an always and there's no else + AstNode* ifsp = ifp->ifsp(); + if (!ifsp->nextp()) { //only one stmt under if + m_ifCondp = ifp->condp(); + ifsp->accept(*this); + } + } + } + //-------------------- + // Default + virtual void visit(AstNode* nodep, AstNUser*) {} + +public: + // CONSTUCTORS + GateDedupeVarVisitor() {} + // PUBLIC METHODS + AstNodeVarRef* findDupe(AstNode* nodep, AstVarScope* consumerVarScopep, AstActive* activep) { + m_consumerVarScopep = consumerVarScopep; + m_activep = activep; + m_always = false; + m_ifCondp = NULL; + m_dupLhsVarRefp = NULL; + nodep->accept(*this); + return m_dupLhsVarRefp; + } +}; + +//###################################################################### +// Recurse through the graph, looking for duplicate expressions on the rhs of an assign + +class GateDedupeGraphVisitor : public GateGraphBaseVisitor { +private: + // NODE STATE + // AstVarScope::user2p -> bool: already visited + // AstUser2InUse m_inuser2; (Allocated for use in GateVisitor) + V3Double0 m_numDeduped; // Statistic tracking + GateDedupeVarVisitor m_varVisitor; // Looks for a dupe of the logic + + virtual AstNUser* visit(GateVarVertex *vvertexp, AstNUser*) { + // Check that we haven't been here before + if (vvertexp->varScp()->user2()) return NULL; + vvertexp->varScp()->user2(true); + + AstNodeVarRef* dupVarRefp = (AstNodeVarRef*) vvertexp->iterateInEdges(*this, (AstNUser*) vvertexp); + + if (dupVarRefp && vvertexp->inSize1()) { + V3GraphEdge* edgep = vvertexp->inBeginp(); + GateLogicVertex* lvertexp = (GateLogicVertex*)edgep->fromp(); + if (!vvertexp->dedupable()) vvertexp->varScp()->v3fatalSrc("GateLogicVertex* visit should have returned NULL if consumer var vertex is not dedupable."); + GateOkVisitor okVisitor(lvertexp->nodep(), false, true); + if (okVisitor.isSimple()) { + AstVarScope* dupVarScopep = dupVarRefp->varScopep(); + GateVarVertex* dupVvertexp = (GateVarVertex*) (dupVarScopep->user1p()); + UINFO(4,"replacing " << vvertexp << " with " << dupVvertexp << endl); + m_numDeduped++; + // Replace all of this varvertex's consumers with dupVarRefp + for (V3GraphEdge* outedgep = vvertexp->outBeginp();outedgep;) { + GateLogicVertex* consumeVertexp = dynamic_cast(outedgep->top()); + AstNode* consumerp = consumeVertexp->nodep(); + GateElimVisitor elimVisitor(consumerp,vvertexp->varScp(),dupVarRefp); + outedgep = outedgep->relinkFromp(dupVvertexp); + } + + // Propogate attributes + dupVvertexp->propagateAttrClocksFrom(vvertexp); + + // Remove inputs links + while (V3GraphEdge* inedgep = vvertexp->inBeginp()) { + inedgep->unlinkDelete(); inedgep=NULL; + } + // replaceAssigns() does the deleteTree on lvertexNodep in a later step + AstNode* lvertexNodep = lvertexp->nodep(); + lvertexNodep->unlinkFrBack(); + vvertexp->varScp()->valuep(lvertexNodep); + lvertexNodep = NULL; + vvertexp->user(true); + lvertexp->user(true); + } + } + + return NULL; + } + + // Returns a varref that has the same logic input + virtual AstNUser* visit(GateLogicVertex* lvertexp, AstNUser* vup) { + lvertexp->iterateInEdges(*this); + + GateVarVertex* consumerVvertexpp = (GateVarVertex*) vup; + if (lvertexp->dedupable() && consumerVvertexpp->dedupable()) { + AstNode* nodep = lvertexp->nodep(); + AstVarScope* consumerVarScopep = consumerVvertexpp->varScp(); + // TODO: Doing a simple pointer comparison of activep won't work + // optimally for statements under generated clocks. Statements under + // different generated clocks will never compare as equal, even if the + // generated clocks are deduped into one clock. + AstActive* activep = lvertexp->activep(); + return (AstNUser*) m_varVisitor.findDupe(nodep, consumerVarScopep, activep); + } + return NULL; + } + +public: + GateDedupeGraphVisitor() {} + void dedupeTree(GateVarVertex* vvertexp) { + vvertexp->accept(*this); + } + V3Double0 numDeduped() { return m_numDeduped; } +}; + +//---------------------------------------------------------------------- + +void GateVisitor::dedupe() { + AstNode::user2ClearTree(); + GateDedupeGraphVisitor deduper; + // Traverse starting from each of the clocks + for (V3GraphVertex* itp = m_graph.verticesBeginp(); itp; itp=itp->verticesNextp()) { + if (GateVarVertex* vvertexp = dynamic_cast(itp)) { + if (vvertexp->isClock()) { + deduper.dedupeTree(vvertexp); + } + } + } + // Traverse starting from each of the outputs + for (V3GraphVertex* itp = m_graph.verticesBeginp(); itp; itp=itp->verticesNextp()) { + if (GateVarVertex* vvertexp = dynamic_cast(itp)) { + if (vvertexp->isTop() && vvertexp->varScp()->varp()->isOutput()) { + deduper.dedupeTree(vvertexp); + } + } + } + m_statDedupLogic += deduper.numDeduped(); +} + //###################################################################### // Convert VARSCOPE(ASSIGN(default, VARREF)) to just VARSCOPE(default) diff --git a/src/V3Options.cpp b/src/V3Options.cpp index 225cada90..40ef9af83 100644 --- a/src/V3Options.cpp +++ b/src/V3Options.cpp @@ -765,6 +765,7 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, char case 'a': m_oTable = flag; break; case 'b': m_oCombine = flag; break; case 'c': m_oConst = flag; break; + case 'd': m_oDedupe = flag; break; case 'e': m_oCase = flag; break; case 'f': m_oFlopGater = flag; break; case 'g': m_oGate = flag; break; @@ -1301,6 +1302,7 @@ void V3Options::optimize(int level) { m_oSubst = flag; m_oSubstConst = flag; m_oTable = flag; + m_oDedupe = level >= 3; // And set specific optimization levels if (level >= 3) { m_inlineMult = -1; // Maximum inlining diff --git a/src/V3Options.h b/src/V3Options.h index 637c55ad9..5f6cd2939 100644 --- a/src/V3Options.h +++ b/src/V3Options.h @@ -131,6 +131,7 @@ class V3Options { bool m_oCase; // main switch: -Oe: case tree conversion bool m_oCombine; // main switch: -Ob: common icode packing bool m_oConst; // main switch: -Oc: constant folding + bool m_oDedupe; // main switch: -Od: logic deduplication bool m_oExpand; // main switch: -Ox: expansion of C macros bool m_oFlopGater; // main switch: -Of: flop gater detection bool m_oGate; // main switch: -Og: gate wire elimination @@ -266,6 +267,7 @@ class V3Options { bool oCase() const { return m_oCase; } bool oCombine() const { return m_oCombine; } bool oConst() const { return m_oConst; } + bool oDedupe() const { return m_oDedupe; } bool oExpand() const { return m_oExpand; } bool oFlopGater() const { return m_oFlopGater; } bool oGate() const { return m_oGate; } diff --git a/test_regress/driver.pl b/test_regress/driver.pl index 4ad026b8a..9a82af554 100755 --- a/test_regress/driver.pl +++ b/test_regress/driver.pl @@ -365,6 +365,7 @@ sub new { 'v3' => 0, verilator_flags => ["-cc", "-Mdir $self->{obj_dir}", + "-OD", # As currently disabled unless -O3 "--debug-check"], verilator_flags2 => [], verilator_make_gcc => 1, diff --git a/test_regress/t/t_cdc_async_bad.pl b/test_regress/t/t_cdc_async_bad.pl index 02e1d5efc..6da31872a 100755 --- a/test_regress/t/t_cdc_async_bad.pl +++ b/test_regress/t/t_cdc_async_bad.pl @@ -17,7 +17,6 @@ compile ( %Warning-CDCRSTLOGIC: See details in obj_dir/t_cdc_async_bad/Vt_cdc_async_bad__cdc.txt %Warning-CDCRSTLOGIC: t/t_cdc_async_bad.v:\d+: Logic in path that feeds async reset, via signal: v.rst6a_bad_n %Warning-CDCRSTLOGIC: t/t_cdc_async_bad.v:\d+: Logic in path that feeds async reset, via signal: v.rst6b_bad_n -%Warning-CDCRSTLOGIC: t/t_cdc_async_bad.v:\d+: Logic in path that feeds async reset, via signal: v.rst3_bad_n %Error: Exiting due to.*', ); diff --git a/test_regress/t/t_dedupe_clk_gate.pl b/test_regress/t/t_dedupe_clk_gate.pl new file mode 100755 index 000000000..6cd30aa41 --- /dev/null +++ b/test_regress/t/t_dedupe_clk_gate.pl @@ -0,0 +1,19 @@ +#!/usr/bin/perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2003 by Wilson Snyder. This program is free software; you can +# redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. + +compile ( + verilator_flags2 => ["--stats"], + ); + +if ($Self->{vlt}) { + file_grep ($Self->{stats}, qr/Optimizations, Gate sigs deduped\s+(\d+)/i, 4); +} + +ok(1); +1; diff --git a/test_regress/t/t_dedupe_clk_gate.v b/test_regress/t/t_dedupe_clk_gate.v new file mode 100644 index 000000000..fda3b8dc0 --- /dev/null +++ b/test_regress/t/t_dedupe_clk_gate.v @@ -0,0 +1,61 @@ +// DESCRIPTION: Verilator: Dedupe optimization test. +// +// This file ONLY is placed into the Public Domain, for any use, +// without warranty. + +// Contributed 2012 by Varun Koyyalagunta, Centaur Technology. + +module t(res,d,clk,en); + output res; + input d,en,clk; + wire q0,q1,q2,q3; + + flop_gated_latch f0(q0,d,clk,en); + flop_gated_latch f1(q1,d,clk,en); + flop_gated_flop f2(q2,d,clk,en); + flop_gated_flop f3(q3,d,clk,en); + assign res = (q0 + q1) * (q2 - q3); +endmodule + +module flop_gated_latch(q,d,clk,en); + input d, clk, en; + output q; + wire gated_clock; + clock_gate_latch clock_gate(gated_clock, clk, en); + always @(posedge gated_clock) begin + q <= d; + end +endmodule + +module flop_gated_flop(q,d,clk,en); + input d, clk, en; + output q; + wire gated_clock; + clock_gate_flop clock_gate(gated_clock, clk, en); + always @(posedge gated_clock) begin + q <= d; + end +endmodule + +module clock_gate_latch (gated_clk, clk, clken); + output gated_clk; + input clk, clken; + reg clken_latched /*verilator clock_enable*/; + assign gated_clk = clk & clken_latched ; + + wire clkb = ~clk; + always @(clkb or clken) + if(clkb) clken_latched = clken; + +endmodule + +module clock_gate_flop (gated_clk, clk, clken); + output gated_clk; + input clk, clken; + reg clken_r /*verilator clock_enable*/; + assign gated_clk = clk & clken_r ; + + always @(negedge clk) + clken_r <= clken; + +endmodule diff --git a/test_regress/t/t_dedupe_seq_logic.pl b/test_regress/t/t_dedupe_seq_logic.pl new file mode 100755 index 000000000..eb80a4317 --- /dev/null +++ b/test_regress/t/t_dedupe_seq_logic.pl @@ -0,0 +1,19 @@ +#!/usr/bin/perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2003 by Wilson Snyder. This program is free software; you can +# redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. + +compile ( + verilator_flags2 => ["--stats"], + ); + +if ($Self->{vlt}) { + file_grep ($Self->{stats}, qr/Optimizations, Gate sigs deduped\s+(\d+)/i, 6); +} + +ok(1); +1; diff --git a/test_regress/t/t_dedupe_seq_logic.v b/test_regress/t/t_dedupe_seq_logic.v new file mode 100644 index 000000000..332214f14 --- /dev/null +++ b/test_regress/t/t_dedupe_seq_logic.v @@ -0,0 +1,123 @@ +// DESCRIPTION: Verilator: Dedupe optimization test. +// +// This file ONLY is placed into the Public Domain, for any use, +// without warranty. + +// Contributed 2012 by Varun Koyyalagunta, Centaur Technology. +// +// Test consists of the follow logic tree, which has many obvious +// places for dedupe: +/* + output + + + --------------/ \-------------- + / \ + + + + ----/ \----- ----/ \---- + / + / + + + / \ + / \ + -/ \- a b -/ \- a b + / \ / \ + + + + + + / \ / \ / \ / \ + a b c d a b c d +*/ + +module t(sum,a,b,c,d,clk); + output sum; + input a,b,c,d,clk; + wire left,right; + add add(sum,left,right,clk); + l l(left,a,b,c,d,clk); + r r(right,a,b,c,d,clk); +endmodule + +module l(sum,a,b,c,d,clk); + output sum; + input a,b,c,d,clk; + wire left, right; + add add(sum,left,right,clk); + ll ll(left,a,b,c,d,clk); + lr lr(right,a,b,c,d,clk); +endmodule + +module ll(sum,a,b,c,d,clk); + output sum; + input a,b,c,d,clk; + wire left, right; + add add(sum,left,right,clk); + lll lll(left,a,b,c,d,clk); + llr llr(right,a,b,c,d,clk); +endmodule + +module lll(sum,a,b,c,d,clk); + output sum; + input a,b,c,d,clk; + add add(sum,a,b,clk); +endmodule + +module llr(sum,a,b,c,d,clk); + output sum; + input a,b,c,d,clk; + add add(sum,c,d,clk); +endmodule + +module lr(sum,a,b,c,d,clk); + output sum; + input a,b,c,d,clk; + add add(sum,a,b,clk); +endmodule + +module r(sum,a,b,c,d,clk); + output sum; + input a,b,c,d,clk; + wire left, right; + add add(sum,left,right,clk); + rl rl(left,a,b,c,d,clk); + rr rr(right,a,b,c,d,clk); +endmodule + +module rr(sum,a,b,c,d,clk); + output sum; + input a,b,c,d,clk; + add add(sum,a,b,clk); +endmodule + +module rl(sum,a,b,c,d,clk); + output sum; + input a,b,c,d,clk; + wire left, right; + add add(sum,left,right,clk); + rll rll(left,a,b,c,d,clk); + rlr rlr(right,a,b,c,d,clk); +endmodule + +module rll(sum,a,b,c,d,clk); + output sum; + input a,b,c,d,clk; + add2 add(sum,a,b,clk); +endmodule + +module rlr(sum,a,b,c,d,clk); + output sum; + input a,b,c,d,clk; + add2 add(sum,c,d,clk); +endmodule + +module add(sum,x,y,clk); + output sum; + input x,y,clk; + reg t1,t2; + always @(posedge clk) begin + sum <= x + y; + end +endmodule + +module add2(sum,x,y,clk); + output sum; + input x,y,clk; + reg t1,t2; + always @(posedge clk) begin + sum <= x + y; + end +endmodule