Add MergeCond pass to combine assignments with ?: on rhs (#2376)
This provides minor simulation performance benefit, but can provide large C++ compilation time improvement, notably with Clang (4x). This patch implements #2366 .
This commit is contained in:
parent
18870f8b62
commit
fe306a36b8
|
|
@ -217,6 +217,7 @@ RAW_OBJS = \
|
|||
V3LinkParse.o \
|
||||
V3LinkResolve.o \
|
||||
V3Localize.o \
|
||||
V3MergeCond.o \
|
||||
V3Name.o \
|
||||
V3Number.o \
|
||||
V3Options.o \
|
||||
|
|
|
|||
|
|
@ -0,0 +1,340 @@
|
|||
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
||||
//*************************************************************************
|
||||
// DESCRIPTION: Verilator: Merge branches/ternary ?:
|
||||
//
|
||||
// Code available from: https://verilator.org
|
||||
//
|
||||
//*************************************************************************
|
||||
//
|
||||
// Copyright 2003-2020 by Wilson Snyder. This program is free software; you
|
||||
// can redistribute it and/or modify it under the terms of either the GNU
|
||||
// Lesser General Public License Version 3 or the Perl Artistic License
|
||||
// Version 2.0.
|
||||
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
//
|
||||
//*************************************************************************
|
||||
// V3BranchMerge's Transformations:
|
||||
//
|
||||
// Look for sequences of assignments with ternary conditional on the right
|
||||
// hand side with the same condition:
|
||||
// lhs0 = cond ? then0 : else0;
|
||||
// lhs1 = cond ? then1 : else1;
|
||||
// lhs2 = cond ? then2 : else2;
|
||||
//
|
||||
// This seems to be a common pattern and can make the C compiler take a
|
||||
// long time when compiling it with optimization. For us it's easy and fast
|
||||
// to convert this to 'if' statements because we know the pattern is common:
|
||||
// if (cond) {
|
||||
// lhs0 = then0;
|
||||
// lhs1 = then1;
|
||||
// lhs2 = then2;
|
||||
// } else {
|
||||
// lhs0 = else0;
|
||||
// lhs1 = else1;
|
||||
// lhs2 = else2;
|
||||
// }
|
||||
//
|
||||
// For 1-bit signals, we consider strength reduced forms to be conditionals,
|
||||
// but only if we already encountered a true conditional we can merge with.
|
||||
// If we did, then act as if:
|
||||
// 'lhs = cond & value' is actually 'lhs = cond ? value : 1'd0'
|
||||
// 'lhs = cond' is actually 'lhs = cond ? 1'd1 : 1'd0'.
|
||||
//
|
||||
//*************************************************************************
|
||||
|
||||
#include "config_build.h"
|
||||
#include "verilatedos.h"
|
||||
|
||||
#include "V3Global.h"
|
||||
#include "V3MergeCond.h"
|
||||
#include "V3Stats.h"
|
||||
#include "V3Ast.h"
|
||||
|
||||
//######################################################################
|
||||
|
||||
class CheckMergeableVisitor : public AstNVisitor {
|
||||
private:
|
||||
// STATE
|
||||
bool m_mergeable; // State tracking whether tree being processed is a mergeable condition
|
||||
|
||||
// METHODS
|
||||
VL_DEBUG_FUNC; // Declare debug()
|
||||
|
||||
void clearMergeable(const AstNode* nodep, const char* reason) {
|
||||
UASSERT_OBJ(m_mergeable, nodep, "Should have short-circuited traversal");
|
||||
m_mergeable = false;
|
||||
UINFO(9, "Clearing mergeable on " << nodep << " due to " << reason << endl);
|
||||
}
|
||||
|
||||
// VISITORS
|
||||
virtual void visit(AstNode* nodep) VL_OVERRIDE {
|
||||
if (!m_mergeable) return;
|
||||
// Clear if node is impure
|
||||
if (!nodep->isPure()) {
|
||||
clearMergeable(nodep, "impure");
|
||||
return;
|
||||
}
|
||||
iterateChildrenConst(nodep);
|
||||
}
|
||||
virtual void visit(AstVarRef* nodep) VL_OVERRIDE {
|
||||
if (!m_mergeable) return;
|
||||
// Clear if it's an LValue referencing a marked variable
|
||||
if (nodep->lvalue() && nodep->varp()->user1()) {
|
||||
clearMergeable(nodep, "might modify condition");
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
CheckMergeableVisitor()
|
||||
: m_mergeable(false) {}
|
||||
|
||||
// Return false if this node should not be merged at all because:
|
||||
// - It contains an impure expression
|
||||
// - It contains an LValue referencing the condition
|
||||
bool operator()(const AstNodeAssign* node) {
|
||||
m_mergeable = true;
|
||||
iterateChildrenConst(const_cast<AstNodeAssign*>(node));
|
||||
return m_mergeable;
|
||||
}
|
||||
};
|
||||
|
||||
class MarkVarsVisitor : public AstNVisitor {
|
||||
private:
|
||||
// METHODS
|
||||
VL_DEBUG_FUNC; // Declare debug()
|
||||
|
||||
// VISITORS
|
||||
virtual void visit(AstVarRef* nodep) VL_OVERRIDE { nodep->varp()->user1(1); }
|
||||
virtual void visit(AstNode* nodep) VL_OVERRIDE { iterateChildrenConst(nodep); }
|
||||
|
||||
public:
|
||||
// Set user1 on all referenced AstVar
|
||||
void operator()(AstNode* node) {
|
||||
AstNode::user1ClearTree();
|
||||
iterate(node);
|
||||
}
|
||||
};
|
||||
|
||||
class MergeCondVisitor : public AstNVisitor {
|
||||
private:
|
||||
// NODE STATE
|
||||
// AstVar::user1 -> Flag set for variables referenced by m_mgCondp
|
||||
AstUser1InUse m_user1InUse;
|
||||
|
||||
// STATE
|
||||
VDouble0 m_statMerges; // Statistic tracking
|
||||
VDouble0 m_statMergedItems; // Statistic tracking
|
||||
VDouble0 m_statLongestList; // Statistic tracking
|
||||
|
||||
AstNode* m_mgFirstp; // First node in merged sequence
|
||||
AstNode* m_mgCondp; // The condition of the first node
|
||||
AstNode* m_mgLastp; // Last node in merged sequence
|
||||
const AstNode* m_mgNextp; // Next node in list being examined
|
||||
uint32_t m_listLenght; // Length of current list
|
||||
|
||||
CheckMergeableVisitor m_checkMergeable; // Sub visitor for encapsulation & speed
|
||||
MarkVarsVisitor m_markVars; // Sub visitor for encapsulation & speed
|
||||
|
||||
// METHODS
|
||||
VL_DEBUG_FUNC; // Declare debug()
|
||||
|
||||
// This function extracts the Cond node from the RHS, if there is one and
|
||||
// it is in a supported position, which are:
|
||||
// - RHS is the Cond
|
||||
// - RHS is And(Const, Cond). This And is inserted often by V3Clean.
|
||||
AstNodeCond* extractCond(AstNode* rhsp) {
|
||||
if (AstNodeCond* const condp = VN_CAST(rhsp, NodeCond)) {
|
||||
return condp;
|
||||
} else if (AstAnd* const andp = VN_CAST(rhsp, And)) {
|
||||
if (AstNodeCond* const condp = VN_CAST(andp->rhsp(), NodeCond)) {
|
||||
if (VN_IS(andp->lhsp(), Const)) { return condp; }
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Apply (_ & 1'b1), iff node is wider than 1 bit. This is necessary
|
||||
// because this pass is after V3Clean, and sometimes we have an AstAnd with
|
||||
// a 1-bit condition on one side, but a more than 1-bit value on the other
|
||||
// side, so we need to keep only the LSB.
|
||||
AstNode* maskLsb(AstNode* nodep) {
|
||||
if (nodep->width1()) {
|
||||
return nodep;
|
||||
} else {
|
||||
AstNode* const maskp = new AstConst(nodep->fileline(), AstConst::LogicTrue());
|
||||
return new AstAnd(nodep->fileline(), nodep, maskp);
|
||||
}
|
||||
}
|
||||
|
||||
// Fold the RHS expression assuming the given condition state. Unlink bits
|
||||
// from the RHS which is only used once, and can be reused. What remains
|
||||
// of the RHS is expected to be deleted by the caller.
|
||||
AstNode* foldAndUnlink(AstNode* rhsp, bool condTrue) {
|
||||
if (rhsp->sameTree(m_mgCondp)) {
|
||||
return condTrue ? new AstConst(rhsp->fileline(), AstConst::LogicTrue())
|
||||
: new AstConst(rhsp->fileline(), AstConst::LogicFalse());
|
||||
} else if (AstNodeCond* const condp = extractCond(rhsp)) {
|
||||
AstNode* const resp
|
||||
= condTrue ? condp->expr1p()->unlinkFrBack() : condp->expr2p()->unlinkFrBack();
|
||||
if (condp == rhsp) { return resp; }
|
||||
if (AstAnd* const andp = VN_CAST(rhsp, And)) {
|
||||
UASSERT_OBJ(andp->rhsp() == condp, rhsp, "Should not try to fold this");
|
||||
return new AstAnd(andp->fileline(), andp->lhsp()->cloneTree(false), resp);
|
||||
}
|
||||
} else if (AstAnd* const andp = VN_CAST(rhsp, And)) {
|
||||
if (andp->lhsp()->sameTree(m_mgCondp)) {
|
||||
return condTrue ? maskLsb(andp->rhsp()->unlinkFrBack())
|
||||
: new AstConst(rhsp->fileline(), AstConst::LogicFalse());
|
||||
} else {
|
||||
UASSERT_OBJ(andp->rhsp()->sameTree(m_mgCondp), rhsp,
|
||||
"AstAnd doesn't hold condition expression");
|
||||
return condTrue ? maskLsb(andp->lhsp()->unlinkFrBack())
|
||||
: new AstConst(rhsp->fileline(), AstConst::LogicFalse());
|
||||
}
|
||||
}
|
||||
rhsp->v3fatal("Don't know how to fold expression");
|
||||
}
|
||||
|
||||
void mergeEnd() {
|
||||
UASSERT(m_mgFirstp, "mergeEnd without list");
|
||||
// Merge if list is longer than one node
|
||||
if (m_mgFirstp != m_mgLastp) {
|
||||
UINFO(6, "MergeCond - First: " << m_mgFirstp << " Last: " << m_mgLastp << endl);
|
||||
++m_statMerges;
|
||||
if (m_listLenght > m_statLongestList) m_statLongestList = m_listLenght;
|
||||
|
||||
// Create equivalent 'if' statement and insert it before the first node
|
||||
AstIf* const ifp
|
||||
= new AstIf(m_mgCondp->fileline(), m_mgCondp->unlinkFrBack(), NULL, NULL);
|
||||
m_mgFirstp->replaceWith(ifp);
|
||||
ifp->addNextHere(m_mgFirstp);
|
||||
// Unzip the list and insert under branches
|
||||
AstNode* nextp = m_mgFirstp;
|
||||
do {
|
||||
// Grab next pointer and unlink
|
||||
AstNode* const currp = nextp;
|
||||
nextp = currp != m_mgLastp ? currp->nextp() : NULL;
|
||||
currp->unlinkFrBack();
|
||||
// Skip over comments
|
||||
if (VN_IS(currp, Comment)) {
|
||||
VL_DO_DANGLING(currp->deleteTree(), currp);
|
||||
continue;
|
||||
}
|
||||
// Count
|
||||
++m_statMergedItems;
|
||||
// Unlink RHS and clone to get the 2 assignments (reusing currp)
|
||||
AstNodeAssign* const thenp = VN_CAST(currp, NodeAssign);
|
||||
AstNode* const rhsp = thenp->rhsp()->unlinkFrBack();
|
||||
AstNodeAssign* const elsep = thenp->cloneTree(false);
|
||||
// Construct the new RHSs and add to branches
|
||||
thenp->rhsp(foldAndUnlink(rhsp, true));
|
||||
elsep->rhsp(foldAndUnlink(rhsp, false));
|
||||
ifp->addIfsp(thenp);
|
||||
ifp->addElsesp(elsep);
|
||||
// Cleanup
|
||||
VL_DO_DANGLING(rhsp->deleteTree(), rhsp);
|
||||
} while (nextp);
|
||||
}
|
||||
// Reset state
|
||||
m_mgFirstp = NULL;
|
||||
m_mgCondp = NULL;
|
||||
m_mgLastp = NULL;
|
||||
m_mgNextp = NULL;
|
||||
}
|
||||
|
||||
void addToList(AstNode* nodep, AstNode* condp) {
|
||||
// Set up head of new list if node is first in list
|
||||
if (!m_mgFirstp) {
|
||||
UASSERT_OBJ(condp, nodep, "Cannot start new list without condition");
|
||||
m_mgFirstp = nodep;
|
||||
m_mgCondp = condp;
|
||||
m_listLenght = 0;
|
||||
m_markVars(condp);
|
||||
}
|
||||
// Add node
|
||||
++m_listLenght;
|
||||
// Track end of list
|
||||
m_mgLastp = nodep;
|
||||
// Set up expected next node in list. Skip over any comments, (inserted
|
||||
// by V3Order before always blocks)
|
||||
m_mgNextp = nodep->nextp();
|
||||
while (m_mgNextp && VN_IS(m_mgNextp, Comment)) { m_mgNextp = m_mgNextp->nextp(); }
|
||||
// If last under parent, done with current list
|
||||
if (!m_mgNextp) mergeEnd();
|
||||
}
|
||||
|
||||
// VISITORS
|
||||
virtual void visit(AstNodeAssign* nodep) VL_OVERRIDE {
|
||||
AstNode* const rhsp = nodep->rhsp();
|
||||
if (AstNodeCond* const condp = extractCond(rhsp)) {
|
||||
if (!m_checkMergeable(nodep)) {
|
||||
// Node not mergeable.
|
||||
// Finish current list if any, do not start a new one.
|
||||
if (m_mgFirstp) mergeEnd();
|
||||
return;
|
||||
}
|
||||
if (m_mgFirstp && (m_mgNextp != nodep || !condp->condp()->sameTree(m_mgCondp))) {
|
||||
// Node in different list, or has different condition.
|
||||
// Finish current list, addToList will start a new one.
|
||||
mergeEnd();
|
||||
}
|
||||
// Add current node
|
||||
addToList(nodep, condp->condp());
|
||||
} else if (m_mgFirstp) {
|
||||
// RHS is not a conditional, but we already started a list.
|
||||
// If it's a 1-bit signal, and a mergeable assignment, try reduced forms
|
||||
if (rhsp->widthMin() == 1 && m_checkMergeable(nodep)) {
|
||||
// Is it a 'lhs = cond & value' or 'lhs = value & cond'?
|
||||
if (AstAnd* const andp = VN_CAST(rhsp, And)) {
|
||||
if (andp->lhsp()->sameTree(m_mgCondp) || andp->rhsp()->sameTree(m_mgCondp)) {
|
||||
addToList(nodep, NULL);
|
||||
return;
|
||||
}
|
||||
}
|
||||
// Is it simply 'lhs = cond'?
|
||||
if (rhsp->sameTree(m_mgCondp)) {
|
||||
addToList(nodep, NULL);
|
||||
return;
|
||||
}
|
||||
}
|
||||
// Not added to list, so we are done with the current list
|
||||
mergeEnd();
|
||||
}
|
||||
}
|
||||
virtual void visit(AstComment*) VL_OVERRIDE {} // Skip over comments
|
||||
// For speed, only iterate what is necessary.
|
||||
virtual void visit(AstNetlist* nodep) VL_OVERRIDE { iterateAndNextNull(nodep->modulesp()); }
|
||||
virtual void visit(AstNodeModule* nodep) VL_OVERRIDE { iterateAndNextNull(nodep->stmtsp()); }
|
||||
virtual void visit(AstCFunc* nodep) VL_OVERRIDE {
|
||||
iterateChildren(nodep);
|
||||
// Close list, if there is one at the end of the function
|
||||
if (m_mgFirstp) mergeEnd();
|
||||
}
|
||||
virtual void visit(AstNodeStmt* nodep) VL_OVERRIDE { iterateChildren(nodep); }
|
||||
virtual void visit(AstNode* nodep) VL_OVERRIDE {}
|
||||
|
||||
public:
|
||||
// CONSTRUCTORS
|
||||
explicit MergeCondVisitor(AstNetlist* nodep) {
|
||||
m_mgFirstp = NULL;
|
||||
m_mgCondp = NULL;
|
||||
m_mgLastp = NULL;
|
||||
m_mgNextp = NULL;
|
||||
m_listLenght = 0;
|
||||
iterate(nodep);
|
||||
}
|
||||
virtual ~MergeCondVisitor() {
|
||||
V3Stats::addStat("Optimizations, MergeCond merges", m_statMerges);
|
||||
V3Stats::addStat("Optimizations, MergeCond merged items", m_statMergedItems);
|
||||
V3Stats::addStat("Optimizations, MergeCond longest merge", m_statLongestList);
|
||||
}
|
||||
};
|
||||
|
||||
//######################################################################
|
||||
// MergeConditionals class functions
|
||||
|
||||
void V3MergeCond::mergeAll(AstNetlist* nodep) {
|
||||
UINFO(2, __FUNCTION__ << ": " << endl);
|
||||
{ MergeCondVisitor visitor(nodep); }
|
||||
V3Global::dumpCheckGlobalTree("merge_cond", 0, v3Global.opt.dumpTreeLevel(__FILE__) >= 6);
|
||||
}
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
||||
//*************************************************************************
|
||||
// DESCRIPTION: Verilator: Recreate loops to help pack caches
|
||||
//
|
||||
// Code available from: https://verilator.org
|
||||
//
|
||||
//*************************************************************************
|
||||
//
|
||||
// Copyright 2003-2020 by Wilson Snyder. This program is free software; you
|
||||
// can redistribute it and/or modify it under the terms of either the GNU
|
||||
// Lesser General Public License Version 3 or the Perl Artistic License
|
||||
// Version 2.0.
|
||||
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
//
|
||||
//*************************************************************************
|
||||
|
||||
#ifndef _V3MERGECOND_H_
|
||||
#define _V3MERGECOND_H_ 1
|
||||
|
||||
#include "config_build.h"
|
||||
#include "verilatedos.h"
|
||||
|
||||
#include "V3Error.h"
|
||||
#include "V3Ast.h"
|
||||
|
||||
//============================================================================
|
||||
|
||||
class V3MergeCond {
|
||||
public:
|
||||
static void mergeAll(AstNetlist* nodep);
|
||||
};
|
||||
|
||||
#endif // Guard
|
||||
|
|
@ -917,12 +917,12 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, char
|
|||
case 'b': m_oCombine = flag; break;
|
||||
case 'c': m_oConst = flag; break;
|
||||
case 'd': m_oDedupe = flag; break;
|
||||
case 'm': m_oAssemble = flag; break;
|
||||
case 'e': m_oCase = flag; break;
|
||||
case 'g': m_oGate = flag; break;
|
||||
case 'i': m_oInline = flag; break;
|
||||
case 'k': m_oSubstConst = flag; break;
|
||||
case 'l': m_oLife = flag; break;
|
||||
case 'm': m_oAssemble = flag; break;
|
||||
case 'p':
|
||||
m_public = !flag;
|
||||
break; // With -Op so flag=0, we want public on so few optimizations done
|
||||
|
|
@ -931,6 +931,7 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, char
|
|||
case 't': m_oLifePost = flag; break;
|
||||
case 'u': m_oSubst = flag; break;
|
||||
case 'v': m_oReloop = flag; break;
|
||||
case 'w': m_oMergeCond = flag; break;
|
||||
case 'x': m_oExpand = flag; break;
|
||||
case 'y': m_oAcycSimp = flag; break;
|
||||
case 'z': m_oLocalize = flag; break;
|
||||
|
|
@ -1706,23 +1707,24 @@ void V3Options::optimize(int level) {
|
|||
// Set all optimizations to on/off
|
||||
bool flag = level > 0;
|
||||
m_oAcycSimp = flag;
|
||||
m_oAssemble = flag;
|
||||
m_oCase = flag;
|
||||
m_oCombine = flag;
|
||||
m_oConst = flag;
|
||||
m_oDedupe = flag;
|
||||
m_oExpand = flag;
|
||||
m_oGate = flag;
|
||||
m_oInline = flag;
|
||||
m_oLife = flag;
|
||||
m_oLifePost = flag;
|
||||
m_oLocalize = flag;
|
||||
m_oMergeCond = flag;
|
||||
m_oReloop = flag;
|
||||
m_oReorder = flag;
|
||||
m_oSplit = flag;
|
||||
m_oSubst = flag;
|
||||
m_oSubstConst = flag;
|
||||
m_oTable = flag;
|
||||
m_oDedupe = flag;
|
||||
m_oAssemble = flag;
|
||||
// And set specific optimization levels
|
||||
if (level >= 3) {
|
||||
m_inlineMult = -1; // Maximum inlining
|
||||
|
|
|
|||
|
|
@ -329,17 +329,18 @@ private:
|
|||
// MEMBERS (optimizations)
|
||||
// // main switch: -Op: --public
|
||||
bool m_oAcycSimp; // main switch: -Oy: acyclic pre-optimizations
|
||||
bool m_oAssemble; // main switch: -Om: assign assemble
|
||||
bool m_oCase; // main switch: -Oe: case tree conversion
|
||||
bool m_oCombine; // main switch: -Ob: common icode packing
|
||||
bool m_oConst; // main switch: -Oc: constant folding
|
||||
bool m_oDedupe; // main switch: -Od: logic deduplication
|
||||
bool m_oAssemble; // main switch: -Om: assign assemble
|
||||
bool m_oExpand; // main switch: -Ox: expansion of C macros
|
||||
bool m_oGate; // main switch: -Og: gate wire elimination
|
||||
bool m_oInline; // main switch: -Oi: module inlining
|
||||
bool m_oLife; // main switch: -Ol: variable lifetime
|
||||
bool m_oLifePost; // main switch: -Ot: delayed assignment elimination
|
||||
bool m_oLocalize; // main switch: -Oz: convert temps to local variables
|
||||
bool m_oInline; // main switch: -Oi: module inlining
|
||||
bool m_oMergeCond; // main switch: -Ob: merge conditionals
|
||||
bool m_oReloop; // main switch: -Ov: reform loops
|
||||
bool m_oReorder; // main switch: -Or: reorder assignments in blocks
|
||||
bool m_oSplit; // main switch: -Os: always assignment splitting
|
||||
|
|
@ -548,18 +549,18 @@ public:
|
|||
|
||||
// ACCESSORS (optimization options)
|
||||
bool oAcycSimp() const { return m_oAcycSimp; }
|
||||
bool oAssemble() const { return m_oAssemble; }
|
||||
bool oCase() const { return m_oCase; }
|
||||
bool oCombine() const { return m_oCombine; }
|
||||
bool oConst() const { return m_oConst; }
|
||||
bool oDedupe() const { return m_oDedupe; }
|
||||
bool oAssemble() const { return m_oAssemble; }
|
||||
bool oExpand() const { return m_oExpand; }
|
||||
bool oGate() const { return m_oGate; }
|
||||
bool oDup() const { return oLife(); }
|
||||
bool oInline() const { return m_oInline; }
|
||||
bool oLife() const { return m_oLife; }
|
||||
bool oLifePost() const { return m_oLifePost; }
|
||||
bool oLocalize() const { return m_oLocalize; }
|
||||
bool oInline() const { return m_oInline; }
|
||||
bool oMergeCond() const { return m_oMergeCond; }
|
||||
bool oReloop() const { return m_oReloop; }
|
||||
bool oReorder() const { return m_oReorder; }
|
||||
bool oSplit() const { return m_oSplit; }
|
||||
|
|
|
|||
|
|
@ -65,6 +65,7 @@
|
|||
#include "V3LinkParse.h"
|
||||
#include "V3LinkResolve.h"
|
||||
#include "V3Localize.h"
|
||||
#include "V3MergeCond.h"
|
||||
#include "V3Name.h"
|
||||
#include "V3Order.h"
|
||||
#include "V3Os.h"
|
||||
|
|
@ -423,13 +424,18 @@ static void process() {
|
|||
V3Dead::deadifyAll(v3Global.rootp());
|
||||
}
|
||||
|
||||
if (!v3Global.opt.lintOnly() && !v3Global.opt.xmlOnly() && v3Global.opt.oReloop()) {
|
||||
// Reform loops to reduce code size
|
||||
// Must be after all Sel/array index based optimizations
|
||||
V3Reloop::reloopAll(v3Global.rootp());
|
||||
}
|
||||
|
||||
if (!v3Global.opt.lintOnly() && !v3Global.opt.xmlOnly()) {
|
||||
if (v3Global.opt.oMergeCond()) {
|
||||
// Merge conditionals
|
||||
V3MergeCond::mergeAll(v3Global.rootp());
|
||||
}
|
||||
|
||||
if (v3Global.opt.oReloop()) {
|
||||
// Reform loops to reduce code size
|
||||
// Must be after all Sel/array index based optimizations
|
||||
V3Reloop::reloopAll(v3Global.rootp());
|
||||
}
|
||||
|
||||
// Fix very deep expressions
|
||||
// Mark evaluation functions as member functions, if needed.
|
||||
V3Depth::depthAll(v3Global.rootp());
|
||||
|
|
|
|||
|
|
@ -0,0 +1,32 @@
|
|||
#!/usr/bin/env perl
|
||||
if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# Copyright 2003 by Wilson Snyder. This program is free software; you
|
||||
# can redistribute it and/or modify it under the terms of either the GNU
|
||||
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||
# Version 2.0.
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
scenarios(vlt_all => 1);
|
||||
|
||||
compile(
|
||||
verilator_flags2 => ["-unroll-count 64", "--stats"],
|
||||
);
|
||||
|
||||
execute(
|
||||
check_finished => 1,
|
||||
);
|
||||
|
||||
if ($Self->{vlt}) {
|
||||
# Note, with vltmt this might be split differently, so only checking vlt
|
||||
file_grep($Self->{stats}, qr/Optimizations, MergeCond merges\s+(\d+)/i,
|
||||
10);
|
||||
file_grep($Self->{stats}, qr/Optimizations, MergeCond merged items\s+(\d+)/i,
|
||||
640);
|
||||
file_grep($Self->{stats}, qr/Optimizations, MergeCond longest merge\s+(\d+)/i,
|
||||
64);
|
||||
}
|
||||
|
||||
ok(1);
|
||||
1;
|
||||
|
|
@ -0,0 +1,193 @@
|
|||
// DESCRIPTION: Verilator: Verilog Test module
|
||||
//
|
||||
// This file ONLY is placed under the Creative Commons Public Domain, for
|
||||
// any use, without warranty, 2020 by Geza Lore.
|
||||
// SPDX-License-Identifier: CC0-1.0
|
||||
|
||||
`define check(got ,exp) do if ((got) !== (exp)) begin $write("%%Error: %s:%0d: cyc=%0d got='h%x exp='h%x\n", `__FILE__,`__LINE__, cyc, (got), (exp)); $stop; end while(0)
|
||||
|
||||
module t (/*AUTOARG*/
|
||||
// Inputs
|
||||
clk
|
||||
);
|
||||
input clk;
|
||||
|
||||
integer cyc=0;
|
||||
reg [63:0] crc= 64'h5aef0c8d_d70a4497;
|
||||
reg [63:0] prev_crc;
|
||||
|
||||
always @ (posedge clk) begin
|
||||
cyc <= cyc + 1;
|
||||
crc <= {crc[62:0], crc[63]^crc[2]^crc[0]};
|
||||
|
||||
prev_crc <= crc;
|
||||
if (cyc==99) begin
|
||||
$write("*-* All Finished *-*\n");
|
||||
$finish;
|
||||
end
|
||||
end
|
||||
|
||||
wire cond2 = &crc[1:0];
|
||||
wire cond3 = &crc[2:0];
|
||||
|
||||
reg shuf_q [63:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
reg bits [63:0];
|
||||
reg shuf_a [63:0];
|
||||
reg shuf_b [63:0];
|
||||
reg shuf_c [63:0];
|
||||
reg shuf_d [63:0];
|
||||
reg shuf_e [63:0];
|
||||
|
||||
// Unpack these to test core algorithm
|
||||
for (int i = 0; i < 64; i = i + 1) begin
|
||||
bits[i] = crc[i];
|
||||
end
|
||||
|
||||
for (int i = 0; i < 64; i = i + 1) begin
|
||||
shuf_a[i] = cyc[0] ? bits[i] : bits[63-i];
|
||||
end
|
||||
|
||||
if (cyc[1]) begin
|
||||
for (int i = 0; i < 64; i = i + 1) begin
|
||||
shuf_b[i] = cyc[0] ? bits[i] : bits[63-i];
|
||||
end
|
||||
end else begin
|
||||
for (int i = 0; i < 64; i = i + 1) begin
|
||||
shuf_b[i] = cyc[0] ? bits[63-i] : bits[i];
|
||||
end
|
||||
end
|
||||
|
||||
// Also test merge under clean/bit extract
|
||||
for (int i = 0; i < 64; i = i + 1) begin
|
||||
shuf_c[i] = cyc[0] ? crc[i] : crc[63-i];
|
||||
end
|
||||
|
||||
// Merge with 'cond & value', 'value & cond', or 'cond'
|
||||
shuf_d[0] = cond2 ? bits[0] : bits[63];
|
||||
for (int i = 1; i < 32; i = i + 2) begin
|
||||
shuf_d[i] = cond2 & bits[i];
|
||||
end
|
||||
for (int i = 2; i < 32; i = i + 2) begin
|
||||
shuf_d[i] = bits[i] & cond2;
|
||||
end
|
||||
for (int i = 32; i < 64; i = i + 1) begin
|
||||
shuf_d[i] = cond2;
|
||||
end
|
||||
|
||||
// Merge with an '&' also used for masking of LSB.
|
||||
shuf_e[0] = cond3 ? bits[0] : bits[63];
|
||||
for (int i = 1; i < 64; i = i + 1) begin
|
||||
shuf_e[i] = cond3 & crc[0];
|
||||
end
|
||||
|
||||
// Also delayed..
|
||||
for (int i = 0; i < 64; i = i + 1) begin
|
||||
shuf_q[i] <= cyc[0] ? crc[i] : crc[63-i];
|
||||
end
|
||||
|
||||
// Check results
|
||||
|
||||
if (cyc[0]) begin
|
||||
for (int i = 0; i < 64; i = i + 1) `check(shuf_a[i], crc[i]);
|
||||
end else begin
|
||||
for (int i = 0; i < 64; i = i + 1) `check(shuf_a[i], crc[63-i]);
|
||||
end
|
||||
|
||||
if (cyc[0] ~^ cyc[1]) begin
|
||||
for (int i = 0; i < 64; i = i + 1) `check(shuf_b[i], crc[i]);
|
||||
end else begin
|
||||
for (int i = 0; i < 64; i = i + 1) `check(shuf_b[i], crc[63-i]);
|
||||
end
|
||||
|
||||
if (cyc[0]) begin
|
||||
for (int i = 0; i < 64; i = i + 1) `check(shuf_c[i], crc[i]);
|
||||
end else begin
|
||||
for (int i = 0; i < 64; i = i + 1) `check(shuf_c[i], crc[63-i]);
|
||||
end
|
||||
|
||||
if (cond2) begin
|
||||
`check(shuf_d[0], crc[0]);
|
||||
for (int i = 1; i < 32; i = i + 1) `check(shuf_d[i], crc[i]);
|
||||
for (int i = 32; i < 63; i = i + 1) `check(shuf_d[i], 1'd1);
|
||||
end else begin
|
||||
`check(shuf_d[0], crc[63]);
|
||||
for (int i = 1; i < 32; i = i + 1) `check(shuf_d[i], 1'b0);
|
||||
for (int i = 32; i < 63; i = i + 1) `check(shuf_d[i], 1'd0);
|
||||
end
|
||||
|
||||
if (cond3) begin
|
||||
`check(shuf_e[0], crc[0]);
|
||||
for (int i = 1; i < 63; i = i + 1) `check(shuf_e[i], crc[0]);
|
||||
end else begin
|
||||
`check(shuf_e[0], crc[63]);
|
||||
for (int i = 1; i < 63; i = i + 1) `check(shuf_e[i], 1'b0);
|
||||
end
|
||||
|
||||
if (cyc > 0) begin
|
||||
if (~cyc[0]) begin
|
||||
for (int i = 0; i < 64; i = i + 1) `check(shuf_q[i], prev_crc[i]);
|
||||
end else begin
|
||||
for (int i = 0; i < 64; i = i + 1) `check(shuf_q[i], prev_crc[63-i]);
|
||||
end
|
||||
|
||||
if (((cyc - 1) >> 1) % 2 == 1) begin
|
||||
for (int i = 0; i < 64; i = i + 1) `check(shuf_g[i], prev_crc[i]);
|
||||
end else begin
|
||||
for (int i = 0; i < 64; i = i + 1) `check(shuf_g[i], prev_crc[63-i]);
|
||||
end
|
||||
end
|
||||
|
||||
if (cyc[2]) begin
|
||||
for (int i = 0; i < 64; i = i + 1) `check(shuf_w[i], crc[i]);
|
||||
end else begin
|
||||
for (int i = 0; i < 64; i = i + 1) `check(shuf_w[i], crc[63-i]);
|
||||
end
|
||||
end
|
||||
|
||||
// Generated always
|
||||
reg shuf_g [63:0];
|
||||
generate for (genvar i = 0 ; i < 64; i = i + 1)
|
||||
always @(posedge clk) begin
|
||||
shuf_g[i] <= cyc[1] ? crc[i] : crc[63-i];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// Generated assign
|
||||
wire shuf_w [63:0];
|
||||
generate for (genvar i = 0 ; i < 64; i = i + 1)
|
||||
assign shuf_w[i] = cyc[2] ? crc[i] : crc[63-i];
|
||||
endgenerate
|
||||
|
||||
// Things not to merge
|
||||
always @(posedge clk) begin
|
||||
reg x;
|
||||
reg y;
|
||||
reg z;
|
||||
reg w;
|
||||
|
||||
// Do not merge if condition appears in an LVALUE
|
||||
x = crc[0];
|
||||
y = x ? crc[2] : crc[1];
|
||||
x = x ? crc[3] : crc[4];
|
||||
x = x ? crc[5] : crc[6];
|
||||
|
||||
`check(x, (crc[0] ? crc[3] : crc[4]) ? crc[5] : crc[6]);
|
||||
`check(y, crc[0] ? crc[2] : crc[1]);
|
||||
|
||||
// Do not merge if condition is not a pure expression
|
||||
$c("int _cnt = 0;");
|
||||
x = $c("_cnt++") ? crc[0] : crc[1];
|
||||
y = $c("_cnt++") ? crc[2] : crc[3];
|
||||
z = $c("_cnt++") ? crc[4] : crc[5];
|
||||
w = $c("_cnt++") ? crc[6] : crc[7];
|
||||
$c("if (_cnt != 4) abort();");
|
||||
|
||||
`check(x, crc[1]);
|
||||
`check(y, crc[2]);
|
||||
`check(z, crc[4]);
|
||||
`check(w, crc[6]);
|
||||
end
|
||||
|
||||
endmodule
|
||||
Loading…
Reference in New Issue