From b37c84109dc362c59ff308c62795b4f1889e1900 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Mon, 22 Jun 2026 10:06:26 +0100 Subject: [PATCH] Optimize staticly known oversize shifts (#7806) The non *Ovr flavours of AstShift* have better downstream constant folding, so keep using those if proven safe. Fold overshifts explicitly instead of introducing *Ovr shifts. --- src/V3Premit.cpp | 70 +++++++++++++++++++------------ test_regress/t/t_opt_const.py | 2 +- test_regress/t/t_opt_const_dfg.py | 2 +- 3 files changed, 46 insertions(+), 28 deletions(-) diff --git a/src/V3Premit.cpp b/src/V3Premit.cpp index 9fcd89ddf..c5a9fa9d1 100644 --- a/src/V3Premit.cpp +++ b/src/V3Premit.cpp @@ -139,34 +139,52 @@ class PremitVisitor final : public VNVisitor { } void visitShift(AstNodeBiop* nodep) { - // Shifts of > 32/64 bits in C++ will wrap-around and generate non-0s UINFO(4, " ShiftFix " << nodep); - const AstConst* const shiftp = VN_CAST(nodep->rhsp(), Const); - if (shiftp && shiftp->num().mostSetBitP1() > 32) { - shiftp->v3warn( - E_UNSUPPORTED, - "Unsupported: Shifting of by over 32-bit number isn't supported." - << " (This isn't a shift of 32 bits, but a shift of 2^32, or 4 billion!)\n"); - } - if (nodep->widthMin() <= 64 // Else we'll use large operators which work right - // C operator's width must be < maximum shift which is - // based on Verilog width - && nodep->width() < (1LL << nodep->rhsp()->widthMin())) { - AstNode* newp; - if (VN_IS(nodep, ShiftL)) { - newp = new AstShiftLOvr{nodep->fileline(), nodep->lhsp()->unlinkFrBack(), - nodep->rhsp()->unlinkFrBack()}; - } else if (VN_IS(nodep, ShiftR)) { - newp = new AstShiftROvr{nodep->fileline(), nodep->lhsp()->unlinkFrBack(), - nodep->rhsp()->unlinkFrBack()}; - } else { - UASSERT_OBJ(VN_IS(nodep, ShiftRS), nodep, "Bad case"); - newp = new AstShiftRSOvr{nodep->fileline(), nodep->lhsp()->unlinkFrBack(), - nodep->rhsp()->unlinkFrBack()}; + UASSERT_OBJ(VN_IS(nodep, ShiftL) || VN_IS(nodep, ShiftR) || VN_IS(nodep, ShiftRS), nodep, + "Bad case"); + // Shift larger than the width of the type (overshift) is undefined behavour in C++ + // (in practice will shift by the wrapped shift amount). These are requierd to go to + // zero/msbs, so replacing them here. + FileLine* const flp = nodep->fileline(); + if (const AstConst* const shiftp = VN_CAST(nodep->rhsp(), Const)) { + // Shift amount known to be constant. If oversized shift, replace with zero/msbs. + // Otherwise we can leave the original shifts which have better constant folding + // than the *Ovr versions. + const bool isOversized = shiftp->num().mostSetBitP1() > 32 // + || (shiftp->num().toSQuad() >= nodep->width()); + if (isOversized) { + AstNodeExpr* newp = nullptr; + if (VN_IS(nodep, ShiftRS)) { + AstNodeExpr* const lhsp = nodep->lhsp()->unlinkFrBack(); + AstNodeExpr* const msbp = new AstSel{flp, lhsp, nodep->width() - 1, 1}; + newp = new AstExtendS{flp, msbp, nodep->width()}; + } else { + newp = new AstConst{flp, AstConst::DTyped{}, nodep->dtypep()}; + } + nodep->replaceWithKeepDType(newp); + VL_DO_DANGLING(pushDeletep(nodep), nodep); + return; + } + } else { + // Shift amount not known at compile time. Convert to *Ovr version. Don't need to do + // if it would use a wide operation which works correctly at runtime, of if the max + // value of the shift amount is less than the with of the shifted value. + if (nodep->widthMin() <= VL_QUADSIZE + && (nodep->width() < (1LL << nodep->rhsp()->widthMin()))) { + AstNodeExpr* const lhsp = nodep->lhsp()->unlinkFrBack(); + AstNodeExpr* const rhsp = nodep->rhsp()->unlinkFrBack(); + AstNodeExpr* newp = nullptr; + if (VN_IS(nodep, ShiftL)) { + newp = new AstShiftLOvr{flp, lhsp, rhsp}; + } else if (VN_IS(nodep, ShiftR)) { + newp = new AstShiftROvr{flp, lhsp, rhsp}; + } else { + newp = new AstShiftRSOvr{flp, lhsp, rhsp}; + } + nodep->replaceWithKeepDType(newp); + VL_DO_DANGLING(pushDeletep(nodep), nodep); + return; } - nodep->replaceWithKeepDType(newp); - VL_DO_DANGLING(pushDeletep(nodep), nodep); - return; } iterateChildren(nodep); checkNode(nodep); diff --git a/test_regress/t/t_opt_const.py b/test_regress/t/t_opt_const.py index 9a49ab61d..e4d50d945 100755 --- a/test_regress/t/t_opt_const.py +++ b/test_regress/t/t_opt_const.py @@ -16,7 +16,7 @@ test.compile(verilator_flags2=["-Wno-UNOPTTHREADS", "-fno-dfg", "--stats", test. test.execute() if test.vlt: - test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 48) + test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 50) test.file_grep(test.stats, r'SplitVar, packed variables split automatically\s+(\d+)', 1) test.passes() diff --git a/test_regress/t/t_opt_const_dfg.py b/test_regress/t/t_opt_const_dfg.py index 6ad2d3549..41f6a2976 100755 --- a/test_regress/t/t_opt_const_dfg.py +++ b/test_regress/t/t_opt_const_dfg.py @@ -18,7 +18,7 @@ test.compile(verilator_flags2=["-Wno-UNOPTTHREADS", "--stats", test.pli_filename test.execute() if test.vlt: - test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 38) + test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 40) test.file_grep(test.stats, r'SplitVar, packed variables split automatically\s+(\d+)', 1) test.passes()