Optimize concat/replicate+replicate.

This commit is contained in:
Wilson Snyder 2017-09-21 21:05:42 -04:00
parent 48d3ce46d8
commit 47e13cfdf4
3 changed files with 70 additions and 17 deletions

View File

@ -30,7 +30,7 @@ The contributors that suggested a given feature are shown in []. Thanks!
**** Fix wide array indices causing compile error.
**** Better optimize Shift-And constructs.
**** Better optimize Shift-And, and replication constructs.
* Verilator 3.910 2017-09-07

View File

@ -1393,22 +1393,67 @@ private:
}
nodep->deleteTree(); VL_DANGLING(nodep);
}
void replaceSelReplicate(AstSel* nodep) {
// SEL(REPLICATE(a,b),1,bit) => SEL(a,1,bit)
bool operandSelReplicate(AstSel* nodep) {
// SEL(REPLICATE(from,rep),lsb,width) => SEL(from,0,width) as long as SEL's width <= b's width
AstReplicate* repp = nodep->fromp()->castReplicate();
AstNode* fromp = repp->lhsp()->unlinkFrBack();
AstConst* lsbp = nodep->lsbp()->castConst();
AstNode* widthp = nodep->widthp()->unlinkFrBack();
AstNode* fromp = repp->lhsp();
AstConst* lsbp = nodep->lsbp()->castConst(); if (!lsbp) return false;
AstNode* widthp = nodep->widthp(); if (!widthp->castConst()) return false;
if (!fromp->width()) nodep->v3fatalSrc("Not widthed");
if ((lsbp->toUInt() / fromp->width())
!= ((lsbp->toUInt()+nodep->width()-1) / fromp->width())) return false;
//
fromp->unlinkFrBack();
widthp->unlinkFrBack();
AstSel* newp = new AstSel(nodep->fileline(),
fromp,
new AstConst(lsbp->fileline(), lsbp->toUInt() % fromp->width()),
widthp);
newp->dtypeFrom(nodep);
nodep->replaceWith(newp); nodep->deleteTree(); VL_DANGLING(nodep);
return true;
}
bool operandRepRep(AstReplicate* nodep) {
// REPLICATE(REPLICATE2(from2,cnt2),cnt1) => REPLICATE(from2,(cnt1+cnt2))
AstReplicate* rep2p = nodep->lhsp()->castReplicate();
AstNode* from2p = rep2p->lhsp();
AstConst* cnt1p = nodep->rhsp()->castConst(); if (!cnt1p) return false;
AstConst* cnt2p = rep2p->rhsp()->castConst(); if (!cnt2p) return false;
//
from2p->unlinkFrBack();
cnt1p->unlinkFrBack();
cnt2p->unlinkFrBack();
AstReplicate* newp = new AstReplicate(nodep->fileline(),
from2p, cnt1p->toUInt()*cnt2p->toUInt());
newp->dtypeFrom(nodep);
nodep->replaceWith(newp); nodep->deleteTree(); VL_DANGLING(nodep);
return true;
}
bool operandConcatSame(AstConcat* nodep) {
// CONCAT(fromp,fromp) -> REPLICATE(fromp,1+1)
// CONCAT(REP(fromp,cnt1),fromp) -> REPLICATE(fromp,cnt1+1)
// CONCAT(fromp,REP(fromp,cnt1)) -> REPLICATE(fromp,1+cnt1)
// CONCAT(REP(fromp,cnt1),REP(fromp,cnt2)) -> REPLICATE(fromp,cnt1+cnt2)
AstNode* from1p = nodep->lhsp(); uint32_t cnt1 = 1;
AstNode* from2p = nodep->rhsp(); uint32_t cnt2 = 1;
if (from1p->castReplicate()) {
AstConst* cnt1p = from1p->castReplicate()->rhsp()->castConst(); if (!cnt1p) return false;
from1p = from1p->castReplicate()->lhsp();
cnt1 = cnt1p->toUInt();
}
if (from2p->castReplicate()) {
AstConst* cnt2p = from2p->castReplicate()->rhsp()->castConst(); if (!cnt2p) return false;
from2p = from2p->castReplicate()->lhsp();
cnt2 = cnt2p->toUInt();
}
if (!operandsSame(from1p,from2p)) return false;
//
from1p->unlinkFrBack();
AstReplicate* newp = new AstReplicate(nodep->fileline(), from1p, cnt1+cnt2);
newp->dtypeFrom(nodep);
nodep->replaceWith(newp); nodep->deleteTree(); VL_DANGLING(nodep);
return true;
}
void replaceSelIntoBiop(AstSel* nodep) {
// SEL(BUFIF1(a,b),1,bit) => BUFIF1(SEL(a,1,bit),SEL(b,1,bit))
AstNodeBiop* fromp = nodep->fromp()->unlinkFrBack()->castNodeBiop();
@ -2268,6 +2313,8 @@ private:
TREEOPV("AstExtendS{$lhsp.castExtendS}", "replaceExtend(nodep, nodep->lhsp()->castExtendS()->lhsp())");
TREEOPV("AstReplicate{$lhsp, $rhsp.isOne, $lhsp->width()==nodep->width()}", "replaceWLhs(nodep)"); // {1{lhs}}->lhs
TREEOPV("AstReplicateN{$lhsp, $rhsp.isOne, $lhsp->width()==nodep->width()}", "replaceWLhs(nodep)"); // {1{lhs}}->lhs
TREEOPV("AstReplicate{$lhsp.castReplicate, operandRepRep(nodep)}", "DONE"); // {2{3{lhs}}}->{6{lhs}}
TREEOPV("AstConcat{operandConcatSame(nodep)}", "DONE"); // {a,a}->{2{a}}, {a,2{a}}->{3{a}, etc
// Next rule because AUTOINST puts the width of bits in
// to pins, even when the widths are exactly the same across the hierarchy.
TREEOPV("AstSel{operandSelExtend(nodep)}", "DONE");
@ -2282,7 +2329,7 @@ private:
TREEOPV("AstSel{$fromp.castShiftR, operandSelShiftLower(nodep)}", "DONE");
TREEOPC("AstSel{$fromp.castConst, $lsbp.castConst, $widthp.castConst, }", "replaceConst(nodep)");
TREEOPV("AstSel{$fromp.castConcat, $lsbp.castConst, $widthp.castConst, }", "replaceSelConcat(nodep)");
TREEOPV("AstSel{$fromp.castReplicate, $lsbp.castConst, $widthp.isOne, }", "replaceSelReplicate(nodep)");
TREEOPV("AstSel{$fromp.castReplicate, $lsbp.castConst, $widthp.castConst, operandSelReplicate(nodep) }", "DONE");
// V3Tristate requires selects below BufIf1.
// Also do additional operators that are bit-independent, but only definite
// win if bit select is a constant (otherwise we may need to compute bit index several times)

View File

@ -14,13 +14,13 @@ module t (/*AUTOARG*/
reg [63:0] rf;
reg [63:0] rf2;
reg [63:0] biu;
reg okidoki;
reg b;
always @* begin
rf[63:32] = biu[63:32] & {32{okidoki}};
rf[31:0] = {32{okidoki}};
rf[63:32] = biu[63:32] & {32{b}};
rf[31:0] = {32{b}};
rf2 = rf;
rf2[31:0] = ~{32{okidoki}};
rf2[31:0] = ~{32{b}};
end
reg [31:0] src1, src0, sr, mask;
@ -49,15 +49,18 @@ module t (/*AUTOARG*/
sl_mask[27], sl_mask[28], sl_mask[29],
sl_mask[30], sl_mask[31]};
wire [95:0] widerep = {2{({2{({2{ {b,b}, {b,{2{b}}}, {{2{b}},b}, {2{({2{b}})}} }})}})}};
wire [1:0] w = {2{b}};
always @ (posedge clk) begin
if (cyc!=0) begin
cyc <= cyc + 1;
`ifdef TEST_VERBOSE
$write("%x %x %x %x %x\n", rf, rf2, dualasr, sl_mask, sr_mask);
$write("cyc=%0d d=%x %x %x %x %x %x\n", cyc, b, rf, rf2, dualasr, sl_mask, sr_mask);
`endif
if (cyc==1) begin
biu <= 64'h12451282_abadee00;
okidoki <= 1'b0;
b <= 1'b0;
src1 <= 32'h00000001;
src0 <= 32'h9a4f1235;
sr <= 32'h0f19f567;
@ -65,7 +68,7 @@ module t (/*AUTOARG*/
end
if (cyc==2) begin
biu <= 64'h12453382_abad8801;
okidoki <= 1'b1;
b <= 1'b1;
if (rf != 64'h0) $stop;
if (rf2 != 64'h00000000ffffffff) $stop;
src1 <= 32'h0010000f;
@ -75,10 +78,11 @@ module t (/*AUTOARG*/
if (dualasr != 32'h8f1f7060) $stop;
if (sl_mask != 32'hfffffffe) $stop;
if (sr_mask != 32'h7fffffff) $stop;
if (widerep != '0) $stop;
end
if (cyc==3) begin
biu <= 64'h12422382_77ad8802;
okidoki <= 1'b1;
b <= 1'b1;
if (rf != 64'h12453382ffffffff) $stop;
if (rf2 != 64'h1245338200000000) $stop;
src1 <= 32'h0000000f;
@ -88,6 +92,7 @@ module t (/*AUTOARG*/
if (dualasr != 32'h0000ffff) $stop;
if (sl_mask != 32'hffff8000) $stop;
if (sr_mask != 32'h0001ffff) $stop;
if (widerep != '1) $stop;
end
if (cyc==4) begin
if (rf != 64'h12422382ffffffff) $stop;
@ -96,6 +101,7 @@ module t (/*AUTOARG*/
if (sl_mask != 32'hffff8000) $stop;
if (sr_mask != 32'h0001ffff) $stop;
$write("*-* All Finished *-*\n");
if (widerep != '1) $stop;
$finish;
end
end