Improve trace buffer memory utilization and performance.

Convert trace buffer to 32-bit entries, rather than a union containing a
pointer type. Also tweaked trace entry layouts for a bit more
performance. This gains another 10% on SweRV EH1 CoreMark.
This commit is contained in:
Geza Lore 2020-04-25 22:38:25 +01:00
parent b79ef672e1
commit dd967f7769
5 changed files with 147 additions and 140 deletions

View File

@ -81,36 +81,26 @@ public:
} }
}; };
// Commands used by thread tracing. Note that the bottom 8 bits of all these // Commands used by thread tracing. Anonymous enum in class, as we want
// values are empty and are used to store small amounts of additional command // it scoped, but we also want the automatic conversion to integer types.
// parameters. Anonymous enum in class, as we want it scoped, but we also
// want the automatic conversion to integer types.
class VerilatedTraceCommand { class VerilatedTraceCommand {
public: public:
// These must all fit in 4 bit at the moment, as the tracing routines
// pack parameters in the top bits.
enum { enum {
CHG_BIT = 0x0000, CHG_BIT_0 = 0x0,
CHG_BUS = 0x0100, CHG_BIT_1 = 0x1,
CHG_QUAD = 0x0200, CHG_BUS = 0x2,
CHG_ARRAY = 0x0300, CHG_QUAD = 0x3,
CHG_FLOAT = 0x0400, CHG_ARRAY = 0x4,
CHG_DOUBLE = 0x0500, CHG_FLOAT = 0x5,
TIME_CHANGE = 0x8000, CHG_DOUBLE = 0x6,
END = 0xf000, // End of buffer // TODO: full..
SHUTDOWN = 0xf200 // Shutdown worker thread, also marks end of buffer TIME_CHANGE = 0xd,
END = 0xe, // End of buffer
SHUTDOWN = 0xf // Shutdown worker thread, also marks end of buffer
}; };
}; };
typedef union {
vluint32_t cmd; // Command code + params in bottom 8 bits
vluint32_t* oldp; // Pointer to previous value buffer to consult/update
// Note: These are 64-bit wide, as this union already has a pointer type in it.
vluint64_t params; // Command parameter
// New signal value in various forms
vluint64_t newBits;
float newFloat;
double newDouble;
vluint64_t timeui;
} VerilatedTraceEntry;
#endif #endif
class VerilatedTraceCallInfo; class VerilatedTraceCallInfo;
@ -149,18 +139,18 @@ private:
size_t m_traceBufferSize; size_t m_traceBufferSize;
// Buffers handed to worker for processing // Buffers handed to worker for processing
VerilatedThreadQueue<VerilatedTraceEntry*> m_buffersToWorker; VerilatedThreadQueue<vluint32_t*> m_buffersToWorker;
// Buffers returned from worker after processing // Buffers returned from worker after processing
VerilatedThreadQueue<VerilatedTraceEntry*> m_buffersFromWorker; VerilatedThreadQueue<vluint32_t*> m_buffersFromWorker;
// Get a new trace buffer that can be populated. May block if none available // Get a new trace buffer that can be populated. May block if none available
VerilatedTraceEntry* getTraceBuffer(); vluint32_t* getTraceBuffer();
// Write pointer into current buffer // Write pointer into current buffer
VerilatedTraceEntry* m_traceBufferWritep; vluint32_t* m_traceBufferWritep;
// End of trace buffer // End of trace buffer
VerilatedTraceEntry* m_traceBufferEndp; vluint32_t* m_traceBufferEndp;
// The worker thread itself // The worker thread itself
std::unique_ptr<std::thread> m_workerThread; std::unique_ptr<std::thread> m_workerThread;
@ -169,7 +159,7 @@ private:
void workerThreadMain(); void workerThreadMain();
// Wait until given buffer is placed in m_buffersFromWorker // Wait until given buffer is placed in m_buffersFromWorker
void waitForBuffer(const VerilatedTraceEntry* bufferp); void waitForBuffer(const vluint32_t* bufferp);
// Shut down and join worker, if it's running, otherwise do nothing // Shut down and join worker, if it's running, otherwise do nothing
void shutdownWorker(); void shutdownWorker();
@ -280,48 +270,47 @@ public:
#ifdef VL_TRACE_THREADED #ifdef VL_TRACE_THREADED
// Threaded tracing. Just dump everything in the trace buffer // Threaded tracing. Just dump everything in the trace buffer
inline void chgBit(vluint32_t* oldp, vluint32_t newval) { inline void chgBit(vluint32_t code, vluint32_t newval) {
m_traceBufferWritep[0].cmd = VerilatedTraceCommand::CHG_BIT | newval; m_traceBufferWritep[0] = VerilatedTraceCommand::CHG_BIT_0 | newval;
m_traceBufferWritep[1].oldp = oldp; m_traceBufferWritep[1] = code;
m_traceBufferWritep += 2; m_traceBufferWritep += 2;
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp);); VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
} }
inline void chgBus(vluint32_t* oldp, vluint32_t newval, int bits) { inline void chgBus(vluint32_t code, vluint32_t newval, int bits) {
m_traceBufferWritep[0].cmd = VerilatedTraceCommand::CHG_BUS | bits; m_traceBufferWritep[0] = (bits << 4) | VerilatedTraceCommand::CHG_BUS;
m_traceBufferWritep[1].oldp = oldp; m_traceBufferWritep[1] = code;
m_traceBufferWritep[2].newBits = newval; m_traceBufferWritep[2] = newval;
m_traceBufferWritep += 3; m_traceBufferWritep += 3;
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp);); VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
} }
inline void chgQuad(vluint32_t* oldp, vluint64_t newval, int bits) { inline void chgQuad(vluint32_t code, vluint64_t newval, int bits) {
m_traceBufferWritep[0].cmd = VerilatedTraceCommand::CHG_QUAD | bits; m_traceBufferWritep[0] = (bits << 4) | VerilatedTraceCommand::CHG_QUAD;
m_traceBufferWritep[1].oldp = oldp; m_traceBufferWritep[1] = code;
m_traceBufferWritep[2].newBits = newval; *reinterpret_cast<vluint64_t*>(m_traceBufferWritep + 2) = newval;
m_traceBufferWritep += 4;
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
}
inline void chgArray(vluint32_t code, const vluint32_t* newvalp, int bits) {
m_traceBufferWritep[0] = (bits << 4) | VerilatedTraceCommand::CHG_ARRAY;
m_traceBufferWritep[1] = code;
m_traceBufferWritep += 2;
for (int i = 0; i < (bits + 31) / 32; ++i) { *m_traceBufferWritep++ = newvalp[i]; }
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
}
inline void chgFloat(vluint32_t code, float newval) {
m_traceBufferWritep[0] = VerilatedTraceCommand::CHG_FLOAT;
m_traceBufferWritep[1] = code;
// cppcheck-suppress invalidPointerCast
*reinterpret_cast<float*>(m_traceBufferWritep + 2) = newval;
m_traceBufferWritep += 3; m_traceBufferWritep += 3;
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp);); VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
} }
inline void chgArray(vluint32_t* oldp, const vluint32_t* newvalp, int bits) { inline void chgDouble(vluint32_t code, double newval) {
m_traceBufferWritep[0].cmd = VerilatedTraceCommand::CHG_ARRAY; m_traceBufferWritep[0] = VerilatedTraceCommand::CHG_DOUBLE;
m_traceBufferWritep[1].oldp = oldp; m_traceBufferWritep[1] = code;
m_traceBufferWritep[2].params = bits; // cppcheck-suppress invalidPointerCast
m_traceBufferWritep += 3; *reinterpret_cast<double*>(m_traceBufferWritep + 2) = newval;
vluint32_t* const wp = reinterpret_cast<vluint32_t*>(m_traceBufferWritep); m_traceBufferWritep += 4;
for (int i = 0; i < (bits + 31) / 32; ++i) { wp[i] = newvalp[i]; }
m_traceBufferWritep += (bits + 63) / 64;
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
}
inline void chgFloat(vluint32_t* oldp, float newval) {
m_traceBufferWritep[0].cmd = VerilatedTraceCommand::CHG_FLOAT;
m_traceBufferWritep[1].oldp = oldp;
m_traceBufferWritep[2].newFloat = newval;
m_traceBufferWritep += 3;
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
}
inline void chgDouble(vluint32_t* oldp, double newval) {
m_traceBufferWritep[0].cmd = VerilatedTraceCommand::CHG_DOUBLE;
m_traceBufferWritep[1].oldp = oldp;
m_traceBufferWritep[2].newDouble = newval;
m_traceBufferWritep += 3;
VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp);); VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp););
} }

View File

@ -101,8 +101,8 @@ public: // This is in .cpp file so is not widely visible
//========================================================================= //=========================================================================
// Buffer management // Buffer management
template <> VerilatedTraceEntry* VerilatedTrace<VL_DERIVED_T>::getTraceBuffer() { template <> vluint32_t* VerilatedTrace<VL_DERIVED_T>::getTraceBuffer() {
VerilatedTraceEntry* bufferp; vluint32_t* bufferp;
// Some jitter is expected, so some number of alternative trace buffers are // Some jitter is expected, so some number of alternative trace buffers are
// required, but don't allocate more than 8 buffers. // required, but don't allocate more than 8 buffers.
if (m_numTraceBuffers < 8) { if (m_numTraceBuffers < 8) {
@ -111,7 +111,7 @@ template <> VerilatedTraceEntry* VerilatedTrace<VL_DERIVED_T>::getTraceBuffer()
++m_numTraceBuffers; ++m_numTraceBuffers;
// Note: over allocate a bit so pointer comparison is well defined // Note: over allocate a bit so pointer comparison is well defined
// if we overflow only by a small amount // if we overflow only by a small amount
bufferp = new VerilatedTraceEntry[m_traceBufferSize + 16]; bufferp = new vluint32_t[m_traceBufferSize + 16];
} }
} else { } else {
// Block until a buffer becomes available // Block until a buffer becomes available
@ -120,10 +120,10 @@ template <> VerilatedTraceEntry* VerilatedTrace<VL_DERIVED_T>::getTraceBuffer()
return bufferp; return bufferp;
} }
template <> void VerilatedTrace<VL_DERIVED_T>::waitForBuffer(const VerilatedTraceEntry* buffp) { template <> void VerilatedTrace<VL_DERIVED_T>::waitForBuffer(const vluint32_t* buffp) {
// Slow path code only called on flush/shutdown, so use a simple algorithm. // Slow path code only called on flush/shutdown, so use a simple algorithm.
// Collect buffers from worker and stash them until we get the one we want. // Collect buffers from worker and stash them until we get the one we want.
std::deque<VerilatedTraceEntry*> stash; std::deque<vluint32_t*> stash;
do { stash.push_back(m_buffersFromWorker.get()); } while (stash.back() != buffp); do { stash.push_back(m_buffersFromWorker.get()); } while (stash.back() != buffp);
// Now put them back in the queue, in the original order. // Now put them back in the queue, in the original order.
while (!stash.empty()) { while (!stash.empty()) {
@ -136,65 +136,71 @@ template <> void VerilatedTrace<VL_DERIVED_T>::waitForBuffer(const VerilatedTrac
// Worker thread // Worker thread
template <> void VerilatedTrace<VL_DERIVED_T>::workerThreadMain() { template <> void VerilatedTrace<VL_DERIVED_T>::workerThreadMain() {
while (true) { bool shutdown = false;
VerilatedTraceEntry* const bufferp = m_buffersToWorker.get();
do {
vluint32_t* const bufferp = m_buffersToWorker.get();
VL_TRACE_THREAD_DEBUG(""); VL_TRACE_THREAD_DEBUG("");
VL_TRACE_THREAD_DEBUG("Got buffer"); VL_TRACE_THREAD_DEBUG("Got buffer: " << bufferp);
const VerilatedTraceEntry* readp = bufferp; const vluint32_t* readp = bufferp;
vluint32_t cmd;
unsigned bits;
vluint32_t* oldp;
vluint64_t newBits;
while (true) { while (true) {
cmd = (readp++)->cmd; const vluint32_t cmd = readp[0];
const vluint32_t top = cmd >> 4;
// Always set this up, as it is almost always needed
vluint32_t* const oldp = m_sigs_oldvalp + readp[1];
// Note this increment needs to be undone on commands which do not
// actually contain a code, but those are the rare cases.
readp += 2;
switch (cmd & ~0xFFU) { switch (cmd & 0xF) {
//=== //===
// CHG_* commands // CHG_* commands
case VerilatedTraceCommand::CHG_BIT: case VerilatedTraceCommand::CHG_BIT_0:
VL_TRACE_THREAD_DEBUG("Command CHG_BIT"); VL_TRACE_THREAD_DEBUG("Command CHG_BIT_0 " << top);
chgBitImpl((readp++)->oldp, cmd & 1); chgBitImpl(oldp, 0);
continue;
case VerilatedTraceCommand::CHG_BIT_1:
VL_TRACE_THREAD_DEBUG("Command CHG_BIT_1 " << top);
chgBitImpl(oldp, 1);
continue; continue;
case VerilatedTraceCommand::CHG_BUS: case VerilatedTraceCommand::CHG_BUS:
VL_TRACE_THREAD_DEBUG("Command CHG_BUS"); VL_TRACE_THREAD_DEBUG("Command CHG_BUS " << top);
// Bits stored in bottom byte of command // Bits stored in bottom byte of command
chgBusImpl(readp[0].oldp, readp[1].newBits, cmd & 0xFFULL); chgBusImpl(oldp, *readp, top);
readp += 2; readp += 1;
VL_TRACE_THREAD_DEBUG("Command CHG_BUS DONE");
continue; continue;
case VerilatedTraceCommand::CHG_QUAD: case VerilatedTraceCommand::CHG_QUAD:
VL_TRACE_THREAD_DEBUG("Command CHG_QUAD"); VL_TRACE_THREAD_DEBUG("Command CHG_QUAD " << top);
// Bits stored in bottom byte of command // Bits stored in bottom byte of command
chgQuadImpl(readp[0].oldp, readp[1].newBits, cmd & 0xFFULL); chgQuadImpl(oldp, *reinterpret_cast<const vluint64_t*>(readp), top);
readp += 2; readp += 2;
continue; continue;
case VerilatedTraceCommand::CHG_ARRAY: case VerilatedTraceCommand::CHG_ARRAY:
VL_TRACE_THREAD_DEBUG("Command CHG_CHG_ARRAY"); VL_TRACE_THREAD_DEBUG("Command CHG_ARRAY " << top);
oldp = (readp++)->oldp; chgArrayImpl(oldp, readp, top);
bits = (readp++)->params; readp += (top + 31) / 32;
chgArrayImpl(oldp, reinterpret_cast<const vluint32_t*>(readp), bits);
readp += (bits + 63) / 64;
continue; continue;
case VerilatedTraceCommand::CHG_FLOAT: case VerilatedTraceCommand::CHG_FLOAT:
VL_TRACE_THREAD_DEBUG("Command CHG_FLOAT"); VL_TRACE_THREAD_DEBUG("Command CHG_FLOAT " << top);
chgFloatImpl(readp[0].oldp, readp[1].newFloat); chgFloatImpl(oldp, *reinterpret_cast<const float*>(readp));
readp += 2; readp += 1;
continue; continue;
case VerilatedTraceCommand::CHG_DOUBLE: case VerilatedTraceCommand::CHG_DOUBLE:
VL_TRACE_THREAD_DEBUG("Command CHG_DOUBLE"); VL_TRACE_THREAD_DEBUG("Command CHG_DOUBLE " << top);
chgDoubleImpl(readp[0].oldp, readp[1].newDouble); chgDoubleImpl(oldp, *reinterpret_cast<const double*>(readp));
readp += 2; readp += 2;
continue; continue;
//=== //===
// Rare commands // Rare commands
case VerilatedTraceCommand::TIME_CHANGE: case VerilatedTraceCommand::TIME_CHANGE:
VL_TRACE_THREAD_DEBUG("Command TIME_CHANGE"); VL_TRACE_THREAD_DEBUG("Command TIME_CHANGE " << top);
emitTimeChange((readp++)->timeui); readp -= 1; // No code in this command, undo increment
emitTimeChange(*reinterpret_cast<const vluint64_t*>(readp));
readp += 2;
continue; continue;
//=== //===
@ -202,11 +208,13 @@ template <> void VerilatedTrace<VL_DERIVED_T>::workerThreadMain() {
case VerilatedTraceCommand::END: VL_TRACE_THREAD_DEBUG("Command END"); break; case VerilatedTraceCommand::END: VL_TRACE_THREAD_DEBUG("Command END"); break;
case VerilatedTraceCommand::SHUTDOWN: case VerilatedTraceCommand::SHUTDOWN:
VL_TRACE_THREAD_DEBUG("Command SHUTDOWN"); VL_TRACE_THREAD_DEBUG("Command SHUTDOWN");
shutdown = true;
break; break;
//=== //===
// Unknown command // Unknown command
default: default:
VL_TRACE_THREAD_DEBUG("Command UNKNOWN");
VL_PRINTF_MT("Trace command: 0x%08x\n", cmd); VL_PRINTF_MT("Trace command: 0x%08x\n", cmd);
VL_FATAL_MT(__FILE__, __LINE__, "", "Unknown trace command"); VL_FATAL_MT(__FILE__, __LINE__, "", "Unknown trace command");
break; break;
@ -221,10 +229,7 @@ template <> void VerilatedTrace<VL_DERIVED_T>::workerThreadMain() {
// Return buffer // Return buffer
m_buffersFromWorker.put(bufferp); m_buffersFromWorker.put(bufferp);
} while (VL_LIKELY(!shutdown));
// Shut down if required
if (VL_UNLIKELY(cmd == VerilatedTraceCommand::SHUTDOWN)) return;
}
} }
template <> void VerilatedTrace<VL_DERIVED_T>::shutdownWorker() { template <> void VerilatedTrace<VL_DERIVED_T>::shutdownWorker() {
@ -232,8 +237,8 @@ template <> void VerilatedTrace<VL_DERIVED_T>::shutdownWorker() {
if (!m_workerThread) return; if (!m_workerThread) return;
// Hand an buffer with a shutdown command to the worker thread // Hand an buffer with a shutdown command to the worker thread
VerilatedTraceEntry* const bufferp = getTraceBuffer(); vluint32_t* const bufferp = getTraceBuffer();
bufferp[0].cmd = VerilatedTraceCommand::SHUTDOWN; bufferp[0] = VerilatedTraceCommand::SHUTDOWN;
m_buffersToWorker.put(bufferp); m_buffersToWorker.put(bufferp);
// Wait for it to return // Wait for it to return
waitForBuffer(bufferp); waitForBuffer(bufferp);
@ -260,8 +265,8 @@ template <> void VerilatedTrace<VL_DERIVED_T>::close() {
template <> void VerilatedTrace<VL_DERIVED_T>::flush() { template <> void VerilatedTrace<VL_DERIVED_T>::flush() {
#ifdef VL_TRACE_THREADED #ifdef VL_TRACE_THREADED
// Hand an empty buffer to the worker thread // Hand an empty buffer to the worker thread
VerilatedTraceEntry* const bufferp = getTraceBuffer(); vluint32_t* const bufferp = getTraceBuffer();
bufferp[0].cmd = VerilatedTraceCommand::END; *bufferp = VerilatedTraceCommand::END;
m_buffersToWorker.put(bufferp); m_buffersToWorker.put(bufferp);
// Wait for it to be returned. As the processing is in-order, // Wait for it to be returned. As the processing is in-order,
// this ensures all previous buffers have been processed. // this ensures all previous buffers have been processed.
@ -333,10 +338,10 @@ template <> void VerilatedTrace<VL_DERIVED_T>::traceInit() VL_MT_UNSAFE {
#ifdef VL_TRACE_THREADED #ifdef VL_TRACE_THREADED
// Compute trace buffer size. we need to be able to store a new value for // Compute trace buffer size. we need to be able to store a new value for
// each signal, which is 'nextCode()' entries after the init callbacks // each signal, which is 'nextCode()' entries after the init callbacks
// above have been run, plus up to 3 more words of metadata per signal, // above have been run, plus up to 2 more words of metadata per signal,
// plus fixed overhead of 1 for a termination flag and 2 for a time stamp // plus fixed overhead of 1 for a termination flag and 3 for a time stamp
// update. // update.
m_traceBufferSize = nextCode() + numSignals() * 3 + 3; m_traceBufferSize = nextCode() + numSignals() * 2 + 4;
// Start the worker thread // Start the worker thread
m_workerThread.reset(new std::thread(&VerilatedTrace<VL_DERIVED_T>::workerThreadMain, this)); m_workerThread.reset(new std::thread(&VerilatedTrace<VL_DERIVED_T>::workerThreadMain, this));
@ -406,18 +411,21 @@ template <> void VerilatedTrace<VL_DERIVED_T>::dump(vluint64_t timeui) {
} }
#ifdef VL_TRACE_THREADED #ifdef VL_TRACE_THREADED
// Get the trace buffer we are about to fill
VerilatedTraceEntry* const bufferp = getTraceBuffer();
m_traceBufferWritep = bufferp;
m_traceBufferEndp = bufferp + m_traceBufferSize;
// Currently only incremental dumps run on the worker thread // Currently only incremental dumps run on the worker thread
vluint32_t* bufferp = nullptr;
if (VL_LIKELY(!m_fullDump)) { if (VL_LIKELY(!m_fullDump)) {
// Get the trace buffer we are about to fill
bufferp = getTraceBuffer();
m_traceBufferWritep = bufferp;
m_traceBufferEndp = bufferp + m_traceBufferSize;
// Tell worker to update time point // Tell worker to update time point
(m_traceBufferWritep++)->cmd = VerilatedTraceCommand::TIME_CHANGE; m_traceBufferWritep[0] = VerilatedTraceCommand::TIME_CHANGE;
(m_traceBufferWritep++)->timeui = timeui; *reinterpret_cast<vluint64_t*>(m_traceBufferWritep + 1) = timeui;
m_traceBufferWritep += 3;
} else { } else {
// Update time point // Update time point
flush();
emitTimeChange(timeui); emitTimeChange(timeui);
} }
#else #else
@ -440,14 +448,16 @@ template <> void VerilatedTrace<VL_DERIVED_T>::dump(vluint64_t timeui) {
} }
#ifdef VL_TRACE_THREADED #ifdef VL_TRACE_THREADED
// Mark end of the trace buffer we just filled if (VL_LIKELY(bufferp)) {
(m_traceBufferWritep++)->cmd = VerilatedTraceCommand::END; // Mark end of the trace buffer we just filled
*m_traceBufferWritep++ = VerilatedTraceCommand::END;
// Assert no buffer overflow // Assert no buffer overflow
assert(m_traceBufferWritep - bufferp <= m_traceBufferSize); assert(m_traceBufferWritep - bufferp <= m_traceBufferSize);
// Pass it to the worker thread // Pass it to the worker thread
m_buffersToWorker.put(bufferp); m_buffersToWorker.put(bufferp);
}
#endif #endif
} }

View File

@ -3547,29 +3547,30 @@ class EmitCTrace : EmitCStmts {
void emitTraceChangeOne(AstTraceInc* nodep, int arrayindex) { void emitTraceChangeOne(AstTraceInc* nodep, int arrayindex) {
iterateAndNextNull(nodep->precondsp()); iterateAndNextNull(nodep->precondsp());
string full = ((m_funcp->funcType() == AstCFuncType::TRACE_FULL const bool full = (m_funcp->funcType() == AstCFuncType::TRACE_FULL
|| m_funcp->funcType() == AstCFuncType::TRACE_FULL_SUB) || m_funcp->funcType() == AstCFuncType::TRACE_FULL_SUB);
? "full" const string func = full ? "full" : "chg";
: "chg");
bool emitWidth = false; bool emitWidth = false;
if (nodep->dtypep()->basicp()->isDouble()) { if (nodep->dtypep()->basicp()->isDouble()) {
puts("vcdp->" + full + "Double"); puts("vcdp->" + func + "Double");
} else if (nodep->isWide() || emitTraceIsScBv(nodep) || emitTraceIsScBigUint(nodep)) { } else if (nodep->isWide() || emitTraceIsScBv(nodep) || emitTraceIsScBigUint(nodep)) {
puts("vcdp->" + full + "Array"); puts("vcdp->" + func + "Array");
emitWidth = true; emitWidth = true;
} else if (nodep->isQuad()) { } else if (nodep->isQuad()) {
puts("vcdp->" + full + "Quad"); puts("vcdp->" + func + "Quad");
emitWidth = true; emitWidth = true;
} else if (nodep->declp()->widthMin() > 1) { } else if (nodep->declp()->widthMin() > 1) {
puts("vcdp->" + full + "Bus"); puts("vcdp->" + func + "Bus");
emitWidth = true; emitWidth = true;
} else { } else {
puts("vcdp->" + full + "Bit"); puts("vcdp->" + func + "Bit");
} }
const uint32_t offset = (arrayindex < 0) ? 0 : (arrayindex * nodep->declp()->widthWords()); const uint32_t offset = (arrayindex < 0) ? 0 : (arrayindex * nodep->declp()->widthWords());
const uint32_t code = nodep->declp()->code() + offset; const uint32_t code = nodep->declp()->code() + offset;
puts("(oldp+" + cvtToStr(code - m_baseCode) + ","); puts(v3Global.opt.trueTraceThreads() && !full ? "(base+" : "(oldp+");
puts(cvtToStr(code - m_baseCode));
puts(",");
emitTraceValue(nodep, arrayindex); emitTraceValue(nodep, arrayindex);
if (emitWidth) puts("," + cvtToStr(nodep->declp()->widthMin())); if (emitWidth) puts("," + cvtToStr(nodep->declp()->widthMin()));
puts(");\n"); puts(");\n");
@ -3648,8 +3649,14 @@ class EmitCTrace : EmitCStmts {
nodep->stmtsp()->v3fatalSrc("Trace sub function should contain AstTraceInc"); nodep->stmtsp()->v3fatalSrc("Trace sub function should contain AstTraceInc");
} }
m_baseCode = stmtp->declp()->code(); m_baseCode = stmtp->declp()->code();
puts("vluint32_t* oldp = vcdp->oldp(code+" + cvtToStr(m_baseCode) + ");\n"); if (v3Global.opt.trueTraceThreads()
puts("if (false && vcdp && oldp) {} // Prevent unused\n"); && nodep->funcType() == AstCFuncType::TRACE_CHANGE_SUB) {
puts("vluint32_t base = code+" + cvtToStr(m_baseCode) + ";\n");
puts("if (false && vcdp && base) {} // Prevent unused\n");
} else {
puts("vluint32_t* oldp = vcdp->oldp(code+" + cvtToStr(m_baseCode) + ");\n");
puts("if (false && vcdp && oldp) {} // Prevent unused\n");
}
} else if (nodep->funcType() == AstCFuncType::TRACE_INIT_SUB) { } else if (nodep->funcType() == AstCFuncType::TRACE_INIT_SUB) {
puts("int c = code;\n"); puts("int c = code;\n");
puts("if (false && vcdp && c) {} // Prevent unused\n"); puts("if (false && vcdp && c) {} // Prevent unused\n");

View File

@ -69,9 +69,7 @@ public:
of.puts("\n"); of.puts("\n");
of.puts("# Tracing threaded output mode? 0/1/N threads (from --trace-thread)\n"); of.puts("# Tracing threaded output mode? 0/1/N threads (from --trace-thread)\n");
of.puts("VM_TRACE_THREADS = "); of.puts("VM_TRACE_THREADS = ");
of.puts(!v3Global.opt.traceThreads() of.puts(cvtToStr(v3Global.opt.trueTraceThreads()));
? "0"
: cvtToStr(v3Global.opt.traceThreads() - v3Global.opt.traceFormat().fst()));
of.puts("\n"); of.puts("\n");
of.puts("# Separate FST writer thread? 0/1 (from --trace-fst with --trace-thread > 0)\n"); of.puts("# Separate FST writer thread? 0/1 (from --trace-fst with --trace-thread > 0)\n");
of.puts("VM_TRACE_FST_WRITER_THREAD = "); of.puts("VM_TRACE_FST_WRITER_THREAD = ");

View File

@ -490,6 +490,9 @@ public:
int traceMaxArray() const { return m_traceMaxArray; } int traceMaxArray() const { return m_traceMaxArray; }
int traceMaxWidth() const { return m_traceMaxWidth; } int traceMaxWidth() const { return m_traceMaxWidth; }
int traceThreads() const { return m_traceThreads; } int traceThreads() const { return m_traceThreads; }
bool trueTraceThreads() const {
return traceThreads() == 0 ? 0 : traceThreads() - traceFormat().fst();
}
int unrollCount() const { return m_unrollCount; } int unrollCount() const { return m_unrollCount; }
int unrollStmts() const { return m_unrollStmts; } int unrollStmts() const { return m_unrollStmts; }