From 22fcd616aa919f5b8def25d671feaf1090c59e0c Mon Sep 17 00:00:00 2001
From: Geza Lore <gezalore@gmail.com>
Date: Wed, 5 Oct 2022 22:54:36 +0100
Subject: [PATCH] DfgPeephole: Further restrict PUSH_REDUCTION_THROUGH_CONCAT

Only apply when there is guaranteed to be a subsequent constant folding
and elimination of some of the expression, otherwise this sometimes
interferes with the simplification of concatenations and harms overall
performance.
---
 src/V3DfgPeephole.cpp           |  9 +++++----
 test_regress/t/t_dfg_peephole.v | 12 ++++++------
 2 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/src/V3DfgPeephole.cpp b/src/V3DfgPeephole.cpp
index ca8d5b95e..79c7cd0c6 100644
--- a/src/V3DfgPeephole.cpp
+++ b/src/V3DfgPeephole.cpp
@@ -415,12 +415,12 @@ class V3DfgPeephole final : public DfgVisitor {
         }
 
         if (DfgConcat* const concatp = srcp->cast<DfgConcat>()) {
-            if (!concatp->hasMultipleSinks()) {
+            if (concatp->lhsp()->is<DfgConst>() || concatp->rhsp()->is<DfgConst>()) {
                 APPLYING(PUSH_REDUCTION_THROUGH_CONCAT) {
                     // Reduce the parts of the concatenation
-                    Reduction* const lRedp = new Reduction{m_dfg, srcp->fileline(), m_bitDType};
+                    Reduction* const lRedp = new Reduction{m_dfg, concatp->fileline(), m_bitDType};
                     lRedp->srcp(concatp->lhsp());
-                    Reduction* const rRedp = new Reduction{m_dfg, srcp->fileline(), m_bitDType};
+                    Reduction* const rRedp = new Reduction{m_dfg, concatp->fileline(), m_bitDType};
                     rRedp->srcp(concatp->rhsp());
 
                     // Bitwise reduce the results
@@ -429,9 +429,10 @@ class V3DfgPeephole final : public DfgVisitor {
                     replacementp->rhsp(rRedp);
                     vtxp->replaceWith(replacementp);
 
-                    // Optimize the new reductions
+                    // Optimize the new terms
                     optimizeReduction(lRedp);
                     optimizeReduction(rRedp);
+                    iterate(replacementp);
                     return;
                 }
             }
diff --git a/test_regress/t/t_dfg_peephole.v b/test_regress/t/t_dfg_peephole.v
index bf7a1a721..7f9acc62b 100644
--- a/test_regress/t/t_dfg_peephole.v
+++ b/test_regress/t/t_dfg_peephole.v
@@ -149,12 +149,12 @@ module t (
    `signal(PUSH_BITWISE_THROUGH_REDUCTION_AND, (&(rand_a + 64'd105)) & (&(rand_b + 64'd108)));
    `signal(PUSH_BITWISE_THROUGH_REDUCTION_OR,  (|(rand_a + 64'd106)) | (|(rand_b + 64'd109)));
    `signal(PUSH_BITWISE_THROUGH_REDUCTION_XOR, (^(rand_a + 64'd107)) ^ (^(rand_b + 64'd110)));
-   `signal(PUSH_REDUCTION_THROUGH_CONCAT_AND, &{(rand_a + 64'd102), rand_b});
-   `signal(PUSH_REDUCTION_THROUGH_CONCAT_OR,  |{(rand_a + 64'd103), rand_b});
-   `signal(PUSH_REDUCTION_THROUGH_CONCAT_XOR, ^{(rand_a + 64'd104), rand_b});
-   `signal(REMOVE_WIDTH_ONE_REDUCTION_AND, &{randbit_a ^ rand_a[0], rand_b});
-   `signal(REMOVE_WIDTH_ONE_REDUCTION_OR,  |{randbit_a ^ rand_a[1], rand_b});
-   `signal(REMOVE_WIDTH_ONE_REDUCTION_XOR, ^{randbit_a ^ rand_a[2], rand_b});
+   `signal(PUSH_REDUCTION_THROUGH_CONCAT_AND, &{1'd1, rand_b});
+   `signal(PUSH_REDUCTION_THROUGH_CONCAT_OR,  |{1'd1, rand_b});
+   `signal(PUSH_REDUCTION_THROUGH_CONCAT_XOR, ^{1'd1, rand_b});
+   `signal(REMOVE_WIDTH_ONE_REDUCTION_AND, &rand_a[0]);
+   `signal(REMOVE_WIDTH_ONE_REDUCTION_OR,  |rand_a[0]);
+   `signal(REMOVE_WIDTH_ONE_REDUCTION_XOR, ^rand_a[0]);
    `signal(REMOVE_XOR_WITH_ZERO, 64'd0 ^ rand_a);
    `signal(REMOVE_XOR_WITH_ONES, -64'd1 ^ rand_a);
    `signal(REPLACE_COND_DEC, randbit_a ? rand_b - 64'b1 : rand_b);