diff --git a/src/Makefile_obj.in b/src/Makefile_obj.in index ce9ccead6..0cd331087 100644 --- a/src/Makefile_obj.in +++ b/src/Makefile_obj.in @@ -217,6 +217,7 @@ RAW_OBJS = \ V3LinkParse.o \ V3LinkResolve.o \ V3Localize.o \ + V3MergeCond.o \ V3Name.o \ V3Number.o \ V3Options.o \ diff --git a/src/V3MergeCond.cpp b/src/V3MergeCond.cpp new file mode 100644 index 000000000..2a5fd9556 --- /dev/null +++ b/src/V3MergeCond.cpp @@ -0,0 +1,340 @@ +// -*- mode: C++; c-file-style: "cc-mode" -*- +//************************************************************************* +// DESCRIPTION: Verilator: Merge branches/ternary ?: +// +// Code available from: https://verilator.org +// +//************************************************************************* +// +// Copyright 2003-2020 by Wilson Snyder. This program is free software; you +// can redistribute it and/or modify it under the terms of either the GNU +// Lesser General Public License Version 3 or the Perl Artistic License +// Version 2.0. +// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 +// +//************************************************************************* +// V3BranchMerge's Transformations: +// +// Look for sequences of assignments with ternary conditional on the right +// hand side with the same condition: +// lhs0 = cond ? then0 : else0; +// lhs1 = cond ? then1 : else1; +// lhs2 = cond ? then2 : else2; +// +// This seems to be a common pattern and can make the C compiler take a +// long time when compiling it with optimization. For us it's easy and fast +// to convert this to 'if' statements because we know the pattern is common: +// if (cond) { +// lhs0 = then0; +// lhs1 = then1; +// lhs2 = then2; +// } else { +// lhs0 = else0; +// lhs1 = else1; +// lhs2 = else2; +// } +// +// For 1-bit signals, we consider strength reduced forms to be conditionals, +// but only if we already encountered a true conditional we can merge with. +// If we did, then act as if: +// 'lhs = cond & value' is actually 'lhs = cond ? value : 1'd0' +// 'lhs = cond' is actually 'lhs = cond ? 1'd1 : 1'd0'. +// +//************************************************************************* + +#include "config_build.h" +#include "verilatedos.h" + +#include "V3Global.h" +#include "V3MergeCond.h" +#include "V3Stats.h" +#include "V3Ast.h" + +//###################################################################### + +class CheckMergeableVisitor : public AstNVisitor { +private: + // STATE + bool m_mergeable; // State tracking whether tree being processed is a mergeable condition + + // METHODS + VL_DEBUG_FUNC; // Declare debug() + + void clearMergeable(const AstNode* nodep, const char* reason) { + UASSERT_OBJ(m_mergeable, nodep, "Should have short-circuited traversal"); + m_mergeable = false; + UINFO(9, "Clearing mergeable on " << nodep << " due to " << reason << endl); + } + + // VISITORS + virtual void visit(AstNode* nodep) VL_OVERRIDE { + if (!m_mergeable) return; + // Clear if node is impure + if (!nodep->isPure()) { + clearMergeable(nodep, "impure"); + return; + } + iterateChildrenConst(nodep); + } + virtual void visit(AstVarRef* nodep) VL_OVERRIDE { + if (!m_mergeable) return; + // Clear if it's an LValue referencing a marked variable + if (nodep->lvalue() && nodep->varp()->user1()) { + clearMergeable(nodep, "might modify condition"); + } + } + +public: + CheckMergeableVisitor() + : m_mergeable(false) {} + + // Return false if this node should not be merged at all because: + // - It contains an impure expression + // - It contains an LValue referencing the condition + bool operator()(const AstNodeAssign* node) { + m_mergeable = true; + iterateChildrenConst(const_cast(node)); + return m_mergeable; + } +}; + +class MarkVarsVisitor : public AstNVisitor { +private: + // METHODS + VL_DEBUG_FUNC; // Declare debug() + + // VISITORS + virtual void visit(AstVarRef* nodep) VL_OVERRIDE { nodep->varp()->user1(1); } + virtual void visit(AstNode* nodep) VL_OVERRIDE { iterateChildrenConst(nodep); } + +public: + // Set user1 on all referenced AstVar + void operator()(AstNode* node) { + AstNode::user1ClearTree(); + iterate(node); + } +}; + +class MergeCondVisitor : public AstNVisitor { +private: + // NODE STATE + // AstVar::user1 -> Flag set for variables referenced by m_mgCondp + AstUser1InUse m_user1InUse; + + // STATE + VDouble0 m_statMerges; // Statistic tracking + VDouble0 m_statMergedItems; // Statistic tracking + VDouble0 m_statLongestList; // Statistic tracking + + AstNode* m_mgFirstp; // First node in merged sequence + AstNode* m_mgCondp; // The condition of the first node + AstNode* m_mgLastp; // Last node in merged sequence + const AstNode* m_mgNextp; // Next node in list being examined + uint32_t m_listLenght; // Length of current list + + CheckMergeableVisitor m_checkMergeable; // Sub visitor for encapsulation & speed + MarkVarsVisitor m_markVars; // Sub visitor for encapsulation & speed + + // METHODS + VL_DEBUG_FUNC; // Declare debug() + + // This function extracts the Cond node from the RHS, if there is one and + // it is in a supported position, which are: + // - RHS is the Cond + // - RHS is And(Const, Cond). This And is inserted often by V3Clean. + AstNodeCond* extractCond(AstNode* rhsp) { + if (AstNodeCond* const condp = VN_CAST(rhsp, NodeCond)) { + return condp; + } else if (AstAnd* const andp = VN_CAST(rhsp, And)) { + if (AstNodeCond* const condp = VN_CAST(andp->rhsp(), NodeCond)) { + if (VN_IS(andp->lhsp(), Const)) { return condp; } + } + } + return NULL; + } + + // Apply (_ & 1'b1), iff node is wider than 1 bit. This is necessary + // because this pass is after V3Clean, and sometimes we have an AstAnd with + // a 1-bit condition on one side, but a more than 1-bit value on the other + // side, so we need to keep only the LSB. + AstNode* maskLsb(AstNode* nodep) { + if (nodep->width1()) { + return nodep; + } else { + AstNode* const maskp = new AstConst(nodep->fileline(), AstConst::LogicTrue()); + return new AstAnd(nodep->fileline(), nodep, maskp); + } + } + + // Fold the RHS expression assuming the given condition state. Unlink bits + // from the RHS which is only used once, and can be reused. What remains + // of the RHS is expected to be deleted by the caller. + AstNode* foldAndUnlink(AstNode* rhsp, bool condTrue) { + if (rhsp->sameTree(m_mgCondp)) { + return condTrue ? new AstConst(rhsp->fileline(), AstConst::LogicTrue()) + : new AstConst(rhsp->fileline(), AstConst::LogicFalse()); + } else if (AstNodeCond* const condp = extractCond(rhsp)) { + AstNode* const resp + = condTrue ? condp->expr1p()->unlinkFrBack() : condp->expr2p()->unlinkFrBack(); + if (condp == rhsp) { return resp; } + if (AstAnd* const andp = VN_CAST(rhsp, And)) { + UASSERT_OBJ(andp->rhsp() == condp, rhsp, "Should not try to fold this"); + return new AstAnd(andp->fileline(), andp->lhsp()->cloneTree(false), resp); + } + } else if (AstAnd* const andp = VN_CAST(rhsp, And)) { + if (andp->lhsp()->sameTree(m_mgCondp)) { + return condTrue ? maskLsb(andp->rhsp()->unlinkFrBack()) + : new AstConst(rhsp->fileline(), AstConst::LogicFalse()); + } else { + UASSERT_OBJ(andp->rhsp()->sameTree(m_mgCondp), rhsp, + "AstAnd doesn't hold condition expression"); + return condTrue ? maskLsb(andp->lhsp()->unlinkFrBack()) + : new AstConst(rhsp->fileline(), AstConst::LogicFalse()); + } + } + rhsp->v3fatal("Don't know how to fold expression"); + } + + void mergeEnd() { + UASSERT(m_mgFirstp, "mergeEnd without list"); + // Merge if list is longer than one node + if (m_mgFirstp != m_mgLastp) { + UINFO(6, "MergeCond - First: " << m_mgFirstp << " Last: " << m_mgLastp << endl); + ++m_statMerges; + if (m_listLenght > m_statLongestList) m_statLongestList = m_listLenght; + + // Create equivalent 'if' statement and insert it before the first node + AstIf* const ifp + = new AstIf(m_mgCondp->fileline(), m_mgCondp->unlinkFrBack(), NULL, NULL); + m_mgFirstp->replaceWith(ifp); + ifp->addNextHere(m_mgFirstp); + // Unzip the list and insert under branches + AstNode* nextp = m_mgFirstp; + do { + // Grab next pointer and unlink + AstNode* const currp = nextp; + nextp = currp != m_mgLastp ? currp->nextp() : NULL; + currp->unlinkFrBack(); + // Skip over comments + if (VN_IS(currp, Comment)) { + VL_DO_DANGLING(currp->deleteTree(), currp); + continue; + } + // Count + ++m_statMergedItems; + // Unlink RHS and clone to get the 2 assignments (reusing currp) + AstNodeAssign* const thenp = VN_CAST(currp, NodeAssign); + AstNode* const rhsp = thenp->rhsp()->unlinkFrBack(); + AstNodeAssign* const elsep = thenp->cloneTree(false); + // Construct the new RHSs and add to branches + thenp->rhsp(foldAndUnlink(rhsp, true)); + elsep->rhsp(foldAndUnlink(rhsp, false)); + ifp->addIfsp(thenp); + ifp->addElsesp(elsep); + // Cleanup + VL_DO_DANGLING(rhsp->deleteTree(), rhsp); + } while (nextp); + } + // Reset state + m_mgFirstp = NULL; + m_mgCondp = NULL; + m_mgLastp = NULL; + m_mgNextp = NULL; + } + + void addToList(AstNode* nodep, AstNode* condp) { + // Set up head of new list if node is first in list + if (!m_mgFirstp) { + UASSERT_OBJ(condp, nodep, "Cannot start new list without condition"); + m_mgFirstp = nodep; + m_mgCondp = condp; + m_listLenght = 0; + m_markVars(condp); + } + // Add node + ++m_listLenght; + // Track end of list + m_mgLastp = nodep; + // Set up expected next node in list. Skip over any comments, (inserted + // by V3Order before always blocks) + m_mgNextp = nodep->nextp(); + while (m_mgNextp && VN_IS(m_mgNextp, Comment)) { m_mgNextp = m_mgNextp->nextp(); } + // If last under parent, done with current list + if (!m_mgNextp) mergeEnd(); + } + + // VISITORS + virtual void visit(AstNodeAssign* nodep) VL_OVERRIDE { + AstNode* const rhsp = nodep->rhsp(); + if (AstNodeCond* const condp = extractCond(rhsp)) { + if (!m_checkMergeable(nodep)) { + // Node not mergeable. + // Finish current list if any, do not start a new one. + if (m_mgFirstp) mergeEnd(); + return; + } + if (m_mgFirstp && (m_mgNextp != nodep || !condp->condp()->sameTree(m_mgCondp))) { + // Node in different list, or has different condition. + // Finish current list, addToList will start a new one. + mergeEnd(); + } + // Add current node + addToList(nodep, condp->condp()); + } else if (m_mgFirstp) { + // RHS is not a conditional, but we already started a list. + // If it's a 1-bit signal, and a mergeable assignment, try reduced forms + if (rhsp->widthMin() == 1 && m_checkMergeable(nodep)) { + // Is it a 'lhs = cond & value' or 'lhs = value & cond'? + if (AstAnd* const andp = VN_CAST(rhsp, And)) { + if (andp->lhsp()->sameTree(m_mgCondp) || andp->rhsp()->sameTree(m_mgCondp)) { + addToList(nodep, NULL); + return; + } + } + // Is it simply 'lhs = cond'? + if (rhsp->sameTree(m_mgCondp)) { + addToList(nodep, NULL); + return; + } + } + // Not added to list, so we are done with the current list + mergeEnd(); + } + } + virtual void visit(AstComment*) VL_OVERRIDE {} // Skip over comments + // For speed, only iterate what is necessary. + virtual void visit(AstNetlist* nodep) VL_OVERRIDE { iterateAndNextNull(nodep->modulesp()); } + virtual void visit(AstNodeModule* nodep) VL_OVERRIDE { iterateAndNextNull(nodep->stmtsp()); } + virtual void visit(AstCFunc* nodep) VL_OVERRIDE { + iterateChildren(nodep); + // Close list, if there is one at the end of the function + if (m_mgFirstp) mergeEnd(); + } + virtual void visit(AstNodeStmt* nodep) VL_OVERRIDE { iterateChildren(nodep); } + virtual void visit(AstNode* nodep) VL_OVERRIDE {} + +public: + // CONSTRUCTORS + explicit MergeCondVisitor(AstNetlist* nodep) { + m_mgFirstp = NULL; + m_mgCondp = NULL; + m_mgLastp = NULL; + m_mgNextp = NULL; + m_listLenght = 0; + iterate(nodep); + } + virtual ~MergeCondVisitor() { + V3Stats::addStat("Optimizations, MergeCond merges", m_statMerges); + V3Stats::addStat("Optimizations, MergeCond merged items", m_statMergedItems); + V3Stats::addStat("Optimizations, MergeCond longest merge", m_statLongestList); + } +}; + +//###################################################################### +// MergeConditionals class functions + +void V3MergeCond::mergeAll(AstNetlist* nodep) { + UINFO(2, __FUNCTION__ << ": " << endl); + { MergeCondVisitor visitor(nodep); } + V3Global::dumpCheckGlobalTree("merge_cond", 0, v3Global.opt.dumpTreeLevel(__FILE__) >= 6); +} diff --git a/src/V3MergeCond.h b/src/V3MergeCond.h new file mode 100644 index 000000000..52232bd0b --- /dev/null +++ b/src/V3MergeCond.h @@ -0,0 +1,33 @@ +// -*- mode: C++; c-file-style: "cc-mode" -*- +//************************************************************************* +// DESCRIPTION: Verilator: Recreate loops to help pack caches +// +// Code available from: https://verilator.org +// +//************************************************************************* +// +// Copyright 2003-2020 by Wilson Snyder. This program is free software; you +// can redistribute it and/or modify it under the terms of either the GNU +// Lesser General Public License Version 3 or the Perl Artistic License +// Version 2.0. +// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 +// +//************************************************************************* + +#ifndef _V3MERGECOND_H_ +#define _V3MERGECOND_H_ 1 + +#include "config_build.h" +#include "verilatedos.h" + +#include "V3Error.h" +#include "V3Ast.h" + +//============================================================================ + +class V3MergeCond { +public: + static void mergeAll(AstNetlist* nodep); +}; + +#endif // Guard diff --git a/src/V3Options.cpp b/src/V3Options.cpp index 775f3e79e..364917adc 100644 --- a/src/V3Options.cpp +++ b/src/V3Options.cpp @@ -917,12 +917,12 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, char case 'b': m_oCombine = flag; break; case 'c': m_oConst = flag; break; case 'd': m_oDedupe = flag; break; - case 'm': m_oAssemble = flag; break; case 'e': m_oCase = flag; break; case 'g': m_oGate = flag; break; case 'i': m_oInline = flag; break; case 'k': m_oSubstConst = flag; break; case 'l': m_oLife = flag; break; + case 'm': m_oAssemble = flag; break; case 'p': m_public = !flag; break; // With -Op so flag=0, we want public on so few optimizations done @@ -931,6 +931,7 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, char case 't': m_oLifePost = flag; break; case 'u': m_oSubst = flag; break; case 'v': m_oReloop = flag; break; + case 'w': m_oMergeCond = flag; break; case 'x': m_oExpand = flag; break; case 'y': m_oAcycSimp = flag; break; case 'z': m_oLocalize = flag; break; @@ -1706,23 +1707,24 @@ void V3Options::optimize(int level) { // Set all optimizations to on/off bool flag = level > 0; m_oAcycSimp = flag; + m_oAssemble = flag; m_oCase = flag; m_oCombine = flag; m_oConst = flag; + m_oDedupe = flag; m_oExpand = flag; m_oGate = flag; m_oInline = flag; m_oLife = flag; m_oLifePost = flag; m_oLocalize = flag; + m_oMergeCond = flag; m_oReloop = flag; m_oReorder = flag; m_oSplit = flag; m_oSubst = flag; m_oSubstConst = flag; m_oTable = flag; - m_oDedupe = flag; - m_oAssemble = flag; // And set specific optimization levels if (level >= 3) { m_inlineMult = -1; // Maximum inlining diff --git a/src/V3Options.h b/src/V3Options.h index c52f6883f..368070cff 100644 --- a/src/V3Options.h +++ b/src/V3Options.h @@ -329,17 +329,18 @@ private: // MEMBERS (optimizations) // // main switch: -Op: --public bool m_oAcycSimp; // main switch: -Oy: acyclic pre-optimizations + bool m_oAssemble; // main switch: -Om: assign assemble bool m_oCase; // main switch: -Oe: case tree conversion bool m_oCombine; // main switch: -Ob: common icode packing bool m_oConst; // main switch: -Oc: constant folding bool m_oDedupe; // main switch: -Od: logic deduplication - bool m_oAssemble; // main switch: -Om: assign assemble bool m_oExpand; // main switch: -Ox: expansion of C macros bool m_oGate; // main switch: -Og: gate wire elimination + bool m_oInline; // main switch: -Oi: module inlining bool m_oLife; // main switch: -Ol: variable lifetime bool m_oLifePost; // main switch: -Ot: delayed assignment elimination bool m_oLocalize; // main switch: -Oz: convert temps to local variables - bool m_oInline; // main switch: -Oi: module inlining + bool m_oMergeCond; // main switch: -Ob: merge conditionals bool m_oReloop; // main switch: -Ov: reform loops bool m_oReorder; // main switch: -Or: reorder assignments in blocks bool m_oSplit; // main switch: -Os: always assignment splitting @@ -548,18 +549,18 @@ public: // ACCESSORS (optimization options) bool oAcycSimp() const { return m_oAcycSimp; } + bool oAssemble() const { return m_oAssemble; } bool oCase() const { return m_oCase; } bool oCombine() const { return m_oCombine; } bool oConst() const { return m_oConst; } bool oDedupe() const { return m_oDedupe; } - bool oAssemble() const { return m_oAssemble; } bool oExpand() const { return m_oExpand; } bool oGate() const { return m_oGate; } - bool oDup() const { return oLife(); } + bool oInline() const { return m_oInline; } bool oLife() const { return m_oLife; } bool oLifePost() const { return m_oLifePost; } bool oLocalize() const { return m_oLocalize; } - bool oInline() const { return m_oInline; } + bool oMergeCond() const { return m_oMergeCond; } bool oReloop() const { return m_oReloop; } bool oReorder() const { return m_oReorder; } bool oSplit() const { return m_oSplit; } diff --git a/src/Verilator.cpp b/src/Verilator.cpp index ed2826ffb..f793f4388 100644 --- a/src/Verilator.cpp +++ b/src/Verilator.cpp @@ -65,6 +65,7 @@ #include "V3LinkParse.h" #include "V3LinkResolve.h" #include "V3Localize.h" +#include "V3MergeCond.h" #include "V3Name.h" #include "V3Order.h" #include "V3Os.h" @@ -423,13 +424,18 @@ static void process() { V3Dead::deadifyAll(v3Global.rootp()); } - if (!v3Global.opt.lintOnly() && !v3Global.opt.xmlOnly() && v3Global.opt.oReloop()) { - // Reform loops to reduce code size - // Must be after all Sel/array index based optimizations - V3Reloop::reloopAll(v3Global.rootp()); - } - if (!v3Global.opt.lintOnly() && !v3Global.opt.xmlOnly()) { + if (v3Global.opt.oMergeCond()) { + // Merge conditionals + V3MergeCond::mergeAll(v3Global.rootp()); + } + + if (v3Global.opt.oReloop()) { + // Reform loops to reduce code size + // Must be after all Sel/array index based optimizations + V3Reloop::reloopAll(v3Global.rootp()); + } + // Fix very deep expressions // Mark evaluation functions as member functions, if needed. V3Depth::depthAll(v3Global.rootp()); diff --git a/test_regress/t/t_merge_cond.pl b/test_regress/t/t_merge_cond.pl new file mode 100755 index 000000000..529b23ff4 --- /dev/null +++ b/test_regress/t/t_merge_cond.pl @@ -0,0 +1,32 @@ +#!/usr/bin/env perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2003 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(vlt_all => 1); + +compile( + verilator_flags2 => ["-unroll-count 64", "--stats"], + ); + +execute( + check_finished => 1, + ); + +if ($Self->{vlt}) { + # Note, with vltmt this might be split differently, so only checking vlt + file_grep($Self->{stats}, qr/Optimizations, MergeCond merges\s+(\d+)/i, + 10); + file_grep($Self->{stats}, qr/Optimizations, MergeCond merged items\s+(\d+)/i, + 640); + file_grep($Self->{stats}, qr/Optimizations, MergeCond longest merge\s+(\d+)/i, + 64); +} + +ok(1); +1; diff --git a/test_regress/t/t_merge_cond.v b/test_regress/t/t_merge_cond.v new file mode 100644 index 000000000..10a12bb29 --- /dev/null +++ b/test_regress/t/t_merge_cond.v @@ -0,0 +1,193 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2020 by Geza Lore. +// SPDX-License-Identifier: CC0-1.0 + +`define check(got ,exp) do if ((got) !== (exp)) begin $write("%%Error: %s:%0d: cyc=%0d got='h%x exp='h%x\n", `__FILE__,`__LINE__, cyc, (got), (exp)); $stop; end while(0) + +module t (/*AUTOARG*/ + // Inputs + clk + ); + input clk; + + integer cyc=0; + reg [63:0] crc= 64'h5aef0c8d_d70a4497; + reg [63:0] prev_crc; + + always @ (posedge clk) begin + cyc <= cyc + 1; + crc <= {crc[62:0], crc[63]^crc[2]^crc[0]}; + + prev_crc <= crc; + if (cyc==99) begin + $write("*-* All Finished *-*\n"); + $finish; + end + end + + wire cond2 = &crc[1:0]; + wire cond3 = &crc[2:0]; + + reg shuf_q [63:0]; + + always @(posedge clk) begin + reg bits [63:0]; + reg shuf_a [63:0]; + reg shuf_b [63:0]; + reg shuf_c [63:0]; + reg shuf_d [63:0]; + reg shuf_e [63:0]; + + // Unpack these to test core algorithm + for (int i = 0; i < 64; i = i + 1) begin + bits[i] = crc[i]; + end + + for (int i = 0; i < 64; i = i + 1) begin + shuf_a[i] = cyc[0] ? bits[i] : bits[63-i]; + end + + if (cyc[1]) begin + for (int i = 0; i < 64; i = i + 1) begin + shuf_b[i] = cyc[0] ? bits[i] : bits[63-i]; + end + end else begin + for (int i = 0; i < 64; i = i + 1) begin + shuf_b[i] = cyc[0] ? bits[63-i] : bits[i]; + end + end + + // Also test merge under clean/bit extract + for (int i = 0; i < 64; i = i + 1) begin + shuf_c[i] = cyc[0] ? crc[i] : crc[63-i]; + end + + // Merge with 'cond & value', 'value & cond', or 'cond' + shuf_d[0] = cond2 ? bits[0] : bits[63]; + for (int i = 1; i < 32; i = i + 2) begin + shuf_d[i] = cond2 & bits[i]; + end + for (int i = 2; i < 32; i = i + 2) begin + shuf_d[i] = bits[i] & cond2; + end + for (int i = 32; i < 64; i = i + 1) begin + shuf_d[i] = cond2; + end + + // Merge with an '&' also used for masking of LSB. + shuf_e[0] = cond3 ? bits[0] : bits[63]; + for (int i = 1; i < 64; i = i + 1) begin + shuf_e[i] = cond3 & crc[0]; + end + + // Also delayed.. + for (int i = 0; i < 64; i = i + 1) begin + shuf_q[i] <= cyc[0] ? crc[i] : crc[63-i]; + end + + // Check results + + if (cyc[0]) begin + for (int i = 0; i < 64; i = i + 1) `check(shuf_a[i], crc[i]); + end else begin + for (int i = 0; i < 64; i = i + 1) `check(shuf_a[i], crc[63-i]); + end + + if (cyc[0] ~^ cyc[1]) begin + for (int i = 0; i < 64; i = i + 1) `check(shuf_b[i], crc[i]); + end else begin + for (int i = 0; i < 64; i = i + 1) `check(shuf_b[i], crc[63-i]); + end + + if (cyc[0]) begin + for (int i = 0; i < 64; i = i + 1) `check(shuf_c[i], crc[i]); + end else begin + for (int i = 0; i < 64; i = i + 1) `check(shuf_c[i], crc[63-i]); + end + + if (cond2) begin + `check(shuf_d[0], crc[0]); + for (int i = 1; i < 32; i = i + 1) `check(shuf_d[i], crc[i]); + for (int i = 32; i < 63; i = i + 1) `check(shuf_d[i], 1'd1); + end else begin + `check(shuf_d[0], crc[63]); + for (int i = 1; i < 32; i = i + 1) `check(shuf_d[i], 1'b0); + for (int i = 32; i < 63; i = i + 1) `check(shuf_d[i], 1'd0); + end + + if (cond3) begin + `check(shuf_e[0], crc[0]); + for (int i = 1; i < 63; i = i + 1) `check(shuf_e[i], crc[0]); + end else begin + `check(shuf_e[0], crc[63]); + for (int i = 1; i < 63; i = i + 1) `check(shuf_e[i], 1'b0); + end + + if (cyc > 0) begin + if (~cyc[0]) begin + for (int i = 0; i < 64; i = i + 1) `check(shuf_q[i], prev_crc[i]); + end else begin + for (int i = 0; i < 64; i = i + 1) `check(shuf_q[i], prev_crc[63-i]); + end + + if (((cyc - 1) >> 1) % 2 == 1) begin + for (int i = 0; i < 64; i = i + 1) `check(shuf_g[i], prev_crc[i]); + end else begin + for (int i = 0; i < 64; i = i + 1) `check(shuf_g[i], prev_crc[63-i]); + end + end + + if (cyc[2]) begin + for (int i = 0; i < 64; i = i + 1) `check(shuf_w[i], crc[i]); + end else begin + for (int i = 0; i < 64; i = i + 1) `check(shuf_w[i], crc[63-i]); + end + end + + // Generated always + reg shuf_g [63:0]; + generate for (genvar i = 0 ; i < 64; i = i + 1) + always @(posedge clk) begin + shuf_g[i] <= cyc[1] ? crc[i] : crc[63-i]; + end + endgenerate + + // Generated assign + wire shuf_w [63:0]; + generate for (genvar i = 0 ; i < 64; i = i + 1) + assign shuf_w[i] = cyc[2] ? crc[i] : crc[63-i]; + endgenerate + + // Things not to merge + always @(posedge clk) begin + reg x; + reg y; + reg z; + reg w; + + // Do not merge if condition appears in an LVALUE + x = crc[0]; + y = x ? crc[2] : crc[1]; + x = x ? crc[3] : crc[4]; + x = x ? crc[5] : crc[6]; + + `check(x, (crc[0] ? crc[3] : crc[4]) ? crc[5] : crc[6]); + `check(y, crc[0] ? crc[2] : crc[1]); + + // Do not merge if condition is not a pure expression + $c("int _cnt = 0;"); + x = $c("_cnt++") ? crc[0] : crc[1]; + y = $c("_cnt++") ? crc[2] : crc[3]; + z = $c("_cnt++") ? crc[4] : crc[5]; + w = $c("_cnt++") ? crc[6] : crc[7]; + $c("if (_cnt != 4) abort();"); + + `check(x, crc[1]); + `check(y, crc[2]); + `check(z, crc[4]); + `check(w, crc[6]); + end + +endmodule