diff --git a/Changes b/Changes index 98b397e28..fb84803db 100644 --- a/Changes +++ b/Changes @@ -5,6 +5,8 @@ indicates the contributor was also the author of the fix; Thanks! * Verilator 3.865 devel +*** Add optimization of wires from arrayed cells, msg1447. [Jie Xu] + *** Add optimization of operators between concats, msg1447. [Jie Xu] **** Fix generate unrolling with function call, bug830. [Steven Slatter] diff --git a/src/V3Gate.cpp b/src/V3Gate.cpp index 9096158f6..ebc7cb8fe 100644 --- a/src/V3Gate.cpp +++ b/src/V3Gate.cpp @@ -303,6 +303,7 @@ private: V3Double0 m_statSigs; // Statistic tracking V3Double0 m_statRefs; // Statistic tracking V3Double0 m_statDedupLogic; // Statistic tracking + V3Double0 m_statAssignMerged; // Statistic tracking // METHODS void iterateNewStmt(AstNode* nodep, const char* nonReducibleReason, const char* consumeReason) { @@ -352,6 +353,7 @@ private: void consumedMove(); void replaceAssigns(); void dedupe(); + void mergeAssigns(); // VISITORS virtual void visit(AstNetlist* nodep, AstNUser*) { @@ -368,6 +370,7 @@ private: optimizeSignals(true); // Remove redundant logic if (v3Global.opt.oDedupe()) dedupe(); + if (v3Global.opt.oAssemble()) mergeAssigns(); // Warn warnSignals(); consumedMark(); @@ -506,6 +509,7 @@ public: V3Stats::addStat("Optimizations, Gate sigs deleted", m_statSigs); V3Stats::addStat("Optimizations, Gate inputs replaced", m_statRefs); V3Stats::addStat("Optimizations, Gate sigs deduped", m_statDedupLogic); + V3Stats::addStat("Optimizations, Gate assign merged", m_statAssignMerged); } }; @@ -1009,7 +1013,6 @@ private: lvertexp->user(true); } } - return NULL; } @@ -1063,6 +1066,132 @@ void GateVisitor::dedupe() { m_statDedupLogic += deduper.numDeduped(); } + +//###################################################################### +// Recurse through the graph, try to merge assigns + +class GateMergeAssignsGraphVisitor : public GateGraphBaseVisitor { +private: + // NODE STATE + AstNodeAssign* m_assignp; + AstActive* m_activep; + GateLogicVertex* m_logicvp; + V3Graph* m_graphp; + V3Double0 m_numMergedAssigns; // Statistic tracking + + + // assemble two Sel into one if possible + AstSel* merge(AstSel* pre, AstSel* cur) { + AstVarRef* preVarRefp = pre->fromp()->castVarRef(); + AstVarRef* curVarRefp = cur->fromp()->castVarRef(); + if (!preVarRefp || !curVarRefp || !curVarRefp->same(preVarRefp)) return NULL; // not the same var + AstConst* pstart = pre->lsbp()->castConst(); + AstConst* pwidth = pre->widthp()->castConst(); + AstConst* cstart = cur->lsbp()->castConst(); + AstConst* cwidth = cur->widthp()->castConst(); + if (!pstart || !pwidth || !cstart || !cwidth) return NULL; // too complicated + if (cur->lsbConst()+cur->widthConst() == pre->lsbConst()) + return new AstSel(curVarRefp->fileline(), curVarRefp->cloneTree(false), cur->lsbConst(), pre->widthConst()+cur->widthConst()); + else return NULL; + } + + virtual AstNUser* visit(GateVarVertex *vvertexp, AstNUser*) { + for (V3GraphEdge* edgep = vvertexp->inBeginp(); edgep; ) { + V3GraphEdge* oldedgep = edgep; + edgep = edgep->inNextp(); // for recursive since the edge could be deleted + if (GateLogicVertex* lvertexp = dynamic_cast(oldedgep->fromp())) { + if (AstNodeAssign* assignp = lvertexp->nodep()->castNodeAssign()) { + //if (lvertexp->outSize1() && assignp->lhsp()->castSel()) { + if (assignp->lhsp()->castSel() && lvertexp->outSize1()) { + UINFO(9, "assing to the nodep["<lhsp()->castSel()->lsbConst()<<"]"<activep(); + if (!m_logicvp) m_logicvp = lvertexp; + if (!m_assignp) m_assignp = assignp; + + // not under the same active + if (m_activep != lvertexp->activep()) { + m_activep = lvertexp->activep(); + m_logicvp = lvertexp; + m_assignp = assignp; + continue; + } + + AstSel* preselp = m_assignp->lhsp()->castSel(); + AstSel* curselp = assignp->lhsp()->castSel(); + if (!preselp || !curselp) continue; + + if (AstSel* newselp = merge(preselp, curselp)) { + UINFO(5, "assemble to new sel: "<replaceWith(newselp); preselp->deleteTree(); preselp = NULL; + // create new rhs for pre assignment + AstNode* newrhsp = new AstConcat(m_assignp->rhsp()->fileline(), m_assignp->rhsp()->cloneTree(false), assignp->rhsp()->cloneTree(false)); + AstNode* oldrhsp = m_assignp->rhsp(); + oldrhsp->replaceWith(newrhsp); oldrhsp->deleteTree(); oldrhsp = NULL; + m_assignp->dtypeChgWidthSigned(m_assignp->width()+assignp->width(), m_assignp->width()+assignp->width(), AstNumeric::fromBool(true)); + // don't need to delete, will be handled + //assignp->unlinkFrBack(); assignp->deleteTree(); assignp = NULL; + + // update the graph + { + // delete all inedges to lvertexp + if (!lvertexp->inEmpty()) { + for (V3GraphEdge* ledgep = lvertexp->inBeginp(); ledgep; ) { + V3GraphEdge* oedgep = ledgep; + ledgep = ledgep->inNextp(); + GateEitherVertex* fromvp = dynamic_cast(oedgep->fromp()); + new V3GraphEdge(m_graphp, fromvp, m_logicvp, 1); + oedgep->unlinkDelete(); oedgep = NULL; + } + } + // delete all outedges to lvertexp, only one + oldedgep->unlinkDelete(); oldedgep = NULL; + } + ++m_numMergedAssigns; + } else { + m_assignp = assignp; + m_logicvp = lvertexp; + } + } + } + } + } + return NULL; + } + + virtual AstNUser* visit(GateLogicVertex* lvertexp, AstNUser* vup) { + return NULL; + } + +public: + GateMergeAssignsGraphVisitor(V3Graph* graphp) { + m_assignp = NULL; + m_activep = NULL; + m_logicvp = NULL; + m_numMergedAssigns = 0; + m_graphp = graphp; + } + void mergeAssignsTree(GateVarVertex* vvertexp) { + vvertexp->accept(*this); + } + V3Double0 numMergedAssigns() { return m_numMergedAssigns; } +}; + + +//---------------------------------------------------------------------- + +void GateVisitor::mergeAssigns() { + GateMergeAssignsGraphVisitor merger(&m_graph); + for (V3GraphVertex* itp = m_graph.verticesBeginp(); itp; itp=itp->verticesNextp()) { + if (GateVarVertex* vvertexp = dynamic_cast(itp)) { + merger.mergeAssignsTree(vvertexp); + } + } + m_statAssignMerged += merger.numMergedAssigns(); +} + + //###################################################################### // Convert VARSCOPE(ASSIGN(default, VARREF)) to just VARSCOPE(default) diff --git a/test_regress/t/t_cellarray.pl b/test_regress/t/t_cellarray.pl new file mode 100755 index 000000000..b39a34d2a --- /dev/null +++ b/test_regress/t/t_cellarray.pl @@ -0,0 +1,23 @@ +#!/usr/bin/perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2003 by Wilson Snyder. This program is free software; you can +# redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. + +compile ( + v_flags2 => ["--stats"], + ); + +execute ( + check_finished=>1, + ); + +if ($Self->{vlt}) { + file_grep ($Self->{stats}, qr/Optimizations, Gate assign merged\s+(\d+)/i, 28); +}; + +ok(1); +1; diff --git a/test_regress/t/t_cellarray.v b/test_regress/t/t_cellarray.v new file mode 100644 index 000000000..0d2a62dfb --- /dev/null +++ b/test_regress/t/t_cellarray.v @@ -0,0 +1,100 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed into the Public Domain, for any use, +// without warranty, 2014 by Jie Xu. +// + +// change these two parameters to see the speed differences +`define DATA_WIDTH 8 +`define REP_COUNT4 `DATA_WIDTH/4 +`define REP_COUNT2 `DATA_WIDTH/2 + + +module t (/*AUTOARG*/ + // Inputs + clk + ); + input clk; + reg [3:0] count4 = 0; + reg [1:0] count2 = 0; + + reg [`DATA_WIDTH-1:0] a = {`REP_COUNT4{4'b0000}}; + reg [`DATA_WIDTH-1:0] b = {`REP_COUNT4{4'b1111}}; + reg [`DATA_WIDTH-1:0] c = {`REP_COUNT4{4'b1111}}; + reg [`DATA_WIDTH-1:0] d = {`REP_COUNT4{4'b1111}}; + reg [`DATA_WIDTH-1:0] res1; + reg [`DATA_WIDTH-1:0] res2; + reg [`DATA_WIDTH-1:0] res3; + reg [`DATA_WIDTH-1:0] res4; + + drv1 t_drv1 [`DATA_WIDTH-1:0] (.colSelA(a), .datao(res1)); + drv2 t_drv2 [`DATA_WIDTH-1:0] (.colSelA(a), .colSelB(b), .datao(res2)); + drv3 t_drv3 [`DATA_WIDTH-1:0] (.colSelA(a), .colSelB(b), .colSelC(c), .datao(res3)); + drv4 t_drv4 [`DATA_WIDTH-1:0] (.colSelA(a), .colSelB(b), .colSelC(c), .colSelD(d), .datao(res4)); + + always@(posedge clk) + begin + count2 <= count2 + 1; + count4 <= count4 + 1; + a <= {`REP_COUNT4{count4}}; + b <= {`REP_COUNT4{count4}}; + c <= {`REP_COUNT2{count2}}; + d <= {`REP_COUNT2{count2}}; + + if (res1 != (a)) begin + $stop; + end + if (res2 != (a&b)) begin + $stop; + end + if (res3 != (a&b&c)) begin + $stop; + end + if (res4 != (a&b&c&d)) begin + $stop; + end + + if (count4 > 10) begin + $write("*-* All Finished *-*\n"); + $finish; + end + end +endmodule + + +module drv1 + (input colSelA, + output datao + ); + assign datao = colSelA; +endmodule + +module drv2 + (input colSelA, + input colSelB, + output datao + ); + assign datao = colSelB & colSelA; +endmodule + +module drv3 + (input colSelA, + input colSelB, + input colSelC, + output datao + ); + assign datao = colSelB & colSelA & colSelC; + +endmodule + +module drv4 + (input colSelA, + input colSelB, + input colSelC, + input colSelD, + output datao + ); + assign datao = colSelB & colSelA & colSelC & colSelD; + +endmodule +