diff --git a/Changes b/Changes index 700bc035d..a19916036 100644 --- a/Changes +++ b/Changes @@ -30,7 +30,7 @@ The contributors that suggested a given feature are shown in []. Thanks! **** Fix wide array indices causing compile error. -**** Better optimize Shift-And constructs. +**** Better optimize Shift-And, and replication constructs. * Verilator 3.910 2017-09-07 diff --git a/src/V3Const.cpp b/src/V3Const.cpp index dc5862205..a59573b9e 100644 --- a/src/V3Const.cpp +++ b/src/V3Const.cpp @@ -1393,22 +1393,67 @@ private: } nodep->deleteTree(); VL_DANGLING(nodep); } - - void replaceSelReplicate(AstSel* nodep) { - // SEL(REPLICATE(a,b),1,bit) => SEL(a,1,bit) + bool operandSelReplicate(AstSel* nodep) { + // SEL(REPLICATE(from,rep),lsb,width) => SEL(from,0,width) as long as SEL's width <= b's width AstReplicate* repp = nodep->fromp()->castReplicate(); - AstNode* fromp = repp->lhsp()->unlinkFrBack(); - AstConst* lsbp = nodep->lsbp()->castConst(); - AstNode* widthp = nodep->widthp()->unlinkFrBack(); + AstNode* fromp = repp->lhsp(); + AstConst* lsbp = nodep->lsbp()->castConst(); if (!lsbp) return false; + AstNode* widthp = nodep->widthp(); if (!widthp->castConst()) return false; if (!fromp->width()) nodep->v3fatalSrc("Not widthed"); + if ((lsbp->toUInt() / fromp->width()) + != ((lsbp->toUInt()+nodep->width()-1) / fromp->width())) return false; + // + fromp->unlinkFrBack(); + widthp->unlinkFrBack(); AstSel* newp = new AstSel(nodep->fileline(), fromp, new AstConst(lsbp->fileline(), lsbp->toUInt() % fromp->width()), widthp); newp->dtypeFrom(nodep); nodep->replaceWith(newp); nodep->deleteTree(); VL_DANGLING(nodep); + return true; + } + bool operandRepRep(AstReplicate* nodep) { + // REPLICATE(REPLICATE2(from2,cnt2),cnt1) => REPLICATE(from2,(cnt1+cnt2)) + AstReplicate* rep2p = nodep->lhsp()->castReplicate(); + AstNode* from2p = rep2p->lhsp(); + AstConst* cnt1p = nodep->rhsp()->castConst(); if (!cnt1p) return false; + AstConst* cnt2p = rep2p->rhsp()->castConst(); if (!cnt2p) return false; + // + from2p->unlinkFrBack(); + cnt1p->unlinkFrBack(); + cnt2p->unlinkFrBack(); + AstReplicate* newp = new AstReplicate(nodep->fileline(), + from2p, cnt1p->toUInt()*cnt2p->toUInt()); + newp->dtypeFrom(nodep); + nodep->replaceWith(newp); nodep->deleteTree(); VL_DANGLING(nodep); + return true; + } + bool operandConcatSame(AstConcat* nodep) { + // CONCAT(fromp,fromp) -> REPLICATE(fromp,1+1) + // CONCAT(REP(fromp,cnt1),fromp) -> REPLICATE(fromp,cnt1+1) + // CONCAT(fromp,REP(fromp,cnt1)) -> REPLICATE(fromp,1+cnt1) + // CONCAT(REP(fromp,cnt1),REP(fromp,cnt2)) -> REPLICATE(fromp,cnt1+cnt2) + AstNode* from1p = nodep->lhsp(); uint32_t cnt1 = 1; + AstNode* from2p = nodep->rhsp(); uint32_t cnt2 = 1; + if (from1p->castReplicate()) { + AstConst* cnt1p = from1p->castReplicate()->rhsp()->castConst(); if (!cnt1p) return false; + from1p = from1p->castReplicate()->lhsp(); + cnt1 = cnt1p->toUInt(); + } + if (from2p->castReplicate()) { + AstConst* cnt2p = from2p->castReplicate()->rhsp()->castConst(); if (!cnt2p) return false; + from2p = from2p->castReplicate()->lhsp(); + cnt2 = cnt2p->toUInt(); + } + if (!operandsSame(from1p,from2p)) return false; + // + from1p->unlinkFrBack(); + AstReplicate* newp = new AstReplicate(nodep->fileline(), from1p, cnt1+cnt2); + newp->dtypeFrom(nodep); + nodep->replaceWith(newp); nodep->deleteTree(); VL_DANGLING(nodep); + return true; } - void replaceSelIntoBiop(AstSel* nodep) { // SEL(BUFIF1(a,b),1,bit) => BUFIF1(SEL(a,1,bit),SEL(b,1,bit)) AstNodeBiop* fromp = nodep->fromp()->unlinkFrBack()->castNodeBiop(); @@ -2268,6 +2313,8 @@ private: TREEOPV("AstExtendS{$lhsp.castExtendS}", "replaceExtend(nodep, nodep->lhsp()->castExtendS()->lhsp())"); TREEOPV("AstReplicate{$lhsp, $rhsp.isOne, $lhsp->width()==nodep->width()}", "replaceWLhs(nodep)"); // {1{lhs}}->lhs TREEOPV("AstReplicateN{$lhsp, $rhsp.isOne, $lhsp->width()==nodep->width()}", "replaceWLhs(nodep)"); // {1{lhs}}->lhs + TREEOPV("AstReplicate{$lhsp.castReplicate, operandRepRep(nodep)}", "DONE"); // {2{3{lhs}}}->{6{lhs}} + TREEOPV("AstConcat{operandConcatSame(nodep)}", "DONE"); // {a,a}->{2{a}}, {a,2{a}}->{3{a}, etc // Next rule because AUTOINST puts the width of bits in // to pins, even when the widths are exactly the same across the hierarchy. TREEOPV("AstSel{operandSelExtend(nodep)}", "DONE"); @@ -2282,7 +2329,7 @@ private: TREEOPV("AstSel{$fromp.castShiftR, operandSelShiftLower(nodep)}", "DONE"); TREEOPC("AstSel{$fromp.castConst, $lsbp.castConst, $widthp.castConst, }", "replaceConst(nodep)"); TREEOPV("AstSel{$fromp.castConcat, $lsbp.castConst, $widthp.castConst, }", "replaceSelConcat(nodep)"); - TREEOPV("AstSel{$fromp.castReplicate, $lsbp.castConst, $widthp.isOne, }", "replaceSelReplicate(nodep)"); + TREEOPV("AstSel{$fromp.castReplicate, $lsbp.castConst, $widthp.castConst, operandSelReplicate(nodep) }", "DONE"); // V3Tristate requires selects below BufIf1. // Also do additional operators that are bit-independent, but only definite // win if bit select is a constant (otherwise we may need to compute bit index several times) diff --git a/test_regress/t/t_math_repl.v b/test_regress/t/t_math_repl.v index 1cecfe26d..b93d64729 100644 --- a/test_regress/t/t_math_repl.v +++ b/test_regress/t/t_math_repl.v @@ -14,13 +14,13 @@ module t (/*AUTOARG*/ reg [63:0] rf; reg [63:0] rf2; reg [63:0] biu; - reg okidoki; + reg b; always @* begin - rf[63:32] = biu[63:32] & {32{okidoki}}; - rf[31:0] = {32{okidoki}}; + rf[63:32] = biu[63:32] & {32{b}}; + rf[31:0] = {32{b}}; rf2 = rf; - rf2[31:0] = ~{32{okidoki}}; + rf2[31:0] = ~{32{b}}; end reg [31:0] src1, src0, sr, mask; @@ -49,15 +49,18 @@ module t (/*AUTOARG*/ sl_mask[27], sl_mask[28], sl_mask[29], sl_mask[30], sl_mask[31]}; + wire [95:0] widerep = {2{({2{({2{ {b,b}, {b,{2{b}}}, {{2{b}},b}, {2{({2{b}})}} }})}})}}; + wire [1:0] w = {2{b}}; + always @ (posedge clk) begin if (cyc!=0) begin cyc <= cyc + 1; `ifdef TEST_VERBOSE - $write("%x %x %x %x %x\n", rf, rf2, dualasr, sl_mask, sr_mask); + $write("cyc=%0d d=%x %x %x %x %x %x\n", cyc, b, rf, rf2, dualasr, sl_mask, sr_mask); `endif if (cyc==1) begin biu <= 64'h12451282_abadee00; - okidoki <= 1'b0; + b <= 1'b0; src1 <= 32'h00000001; src0 <= 32'h9a4f1235; sr <= 32'h0f19f567; @@ -65,7 +68,7 @@ module t (/*AUTOARG*/ end if (cyc==2) begin biu <= 64'h12453382_abad8801; - okidoki <= 1'b1; + b <= 1'b1; if (rf != 64'h0) $stop; if (rf2 != 64'h00000000ffffffff) $stop; src1 <= 32'h0010000f; @@ -75,10 +78,11 @@ module t (/*AUTOARG*/ if (dualasr != 32'h8f1f7060) $stop; if (sl_mask != 32'hfffffffe) $stop; if (sr_mask != 32'h7fffffff) $stop; + if (widerep != '0) $stop; end if (cyc==3) begin biu <= 64'h12422382_77ad8802; - okidoki <= 1'b1; + b <= 1'b1; if (rf != 64'h12453382ffffffff) $stop; if (rf2 != 64'h1245338200000000) $stop; src1 <= 32'h0000000f; @@ -88,6 +92,7 @@ module t (/*AUTOARG*/ if (dualasr != 32'h0000ffff) $stop; if (sl_mask != 32'hffff8000) $stop; if (sr_mask != 32'h0001ffff) $stop; + if (widerep != '1) $stop; end if (cyc==4) begin if (rf != 64'h12422382ffffffff) $stop; @@ -96,6 +101,7 @@ module t (/*AUTOARG*/ if (sl_mask != 32'hffff8000) $stop; if (sr_mask != 32'h0001ffff) $stop; $write("*-* All Finished *-*\n"); + if (widerep != '1) $stop; $finish; end end