diff --git a/configure.in b/configure.in index 4497f033c..58022f816 100644 --- a/configure.in +++ b/configure.in @@ -74,6 +74,7 @@ AC_CHECK_HEADERS(readline/readline.h readline/history.h sys/resource.h) case "${host}" in *linux*) AC_DEFINE([LINUX], [1], [Host operating system is Linux.]) ;; esac # vpi uses these +AC_CHECK_LIB(pthread, pthread_create) AC_CHECK_LIB(z, gzwrite) AC_CHECK_LIB(z, gzwrite, HAVE_LIBZ=yes, HAVE_LIBZ=no) AC_SUBST(HAVE_LIBZ) diff --git a/vpi/sys_lxt2.c b/vpi/sys_lxt2.c index ebc9792cb..91fb76cb4 100644 --- a/vpi/sys_lxt2.c +++ b/vpi/sys_lxt2.c @@ -32,17 +32,19 @@ # include # include # include -# include # include #ifdef HAVE_MALLOC_H # include #endif # include "stringheap.h" +# include static char *dump_path = NULL; static struct lxt2_wr_trace *dump_file = NULL; +static void* lxt2_thread(void*arg); + struct vcd_info { vpiHandle item; vpiHandle cb; @@ -172,15 +174,12 @@ static void show_this_item(struct vcd_info*info) if (vpi_get(vpiType, info->item) == vpiRealVar) { value.format = vpiRealVal; vpi_get_value(info->item, &value); - lxt2_wr_emit_value_double(dump_file, info->sym, 0, - value.value.real); + vcd_work_emit_double(info->sym, value.value.real); } else { value.format = vpiBinStrVal; vpi_get_value(info->item, &value); - lxt2_wr_emit_value_bit_string(dump_file, info->sym, - 0 /* array row */, - value.value.str); + vcd_work_emit_bits(info->sym, value.value.str); } } @@ -190,18 +189,11 @@ static void show_this_item_x(struct vcd_info*info) if (vpi_get(vpiType,info->item) == vpiRealVar) { /* Should write a NaN here? */ } else { - lxt2_wr_emit_value_bit_string(dump_file, info->sym, 0, "x"); + vcd_work_emit_bits(info->sym, "x"); } } -/* - * managed qsorted list of scope names for duplicates bsearching - */ - -struct vcd_names_list_s lxt_tab; - - static int dumpvars_status = 0; /* 0:fresh 1:cb installed, 2:callback done */ static PLI_UINT64 dumpvars_time; __inline__ static int dump_header_pending(void) @@ -235,7 +227,7 @@ static PLI_INT32 variable_cb_2(p_cb_data cause) PLI_UINT64 now = timerec_to_time64(cause->time); if (now != vcd_cur_time) { - lxt2_wr_set_time64(dump_file, now); + vcd_work_set_time(now); vcd_cur_time = now; } @@ -290,7 +282,7 @@ static PLI_INT32 dumpvars_cb(p_cb_data cause) vcd_cur_time = dumpvars_time; if (!dump_is_off) { - lxt2_wr_set_time64(dump_file, dumpvars_time); + vcd_work_set_time(dumpvars_time); vcd_checkpoint(); } @@ -307,16 +299,17 @@ static PLI_INT32 finish_cb(p_cb_data cause) dumpvars_time = timerec_to_time64(cause->time); if (!dump_is_off && !dump_is_full && dumpvars_time != vcd_cur_time) { - lxt2_wr_set_time64(dump_file, dumpvars_time); + vcd_work_set_time(dumpvars_time); } + vcd_work_terminate(); for (cur = vcd_list ; cur ; cur = next) { next = cur->next; free(cur); } vcd_list = 0; - vcd_names_delete(&lxt_tab); + vcd_scope_names_delete(); nexus_ident_delete(); free(dump_path); dump_path = 0; @@ -373,11 +366,11 @@ static PLI_INT32 sys_dumpoff_calltf(PLI_BYTE8*name) now64 = timerec_to_time64(&now); if (now64 > vcd_cur_time) { - lxt2_wr_set_time(dump_file, now64); + vcd_work_set_time(now64); vcd_cur_time = now64; } - lxt2_wr_set_dumpoff(dump_file); + vcd_work_dumpoff(); vcd_checkpoint_x(); return 0; @@ -400,11 +393,11 @@ static PLI_INT32 sys_dumpon_calltf(PLI_BYTE8*name) now64 = timerec_to_time64(&now); if (now64 > vcd_cur_time) { - lxt2_wr_set_time64(dump_file, now64); + vcd_work_set_time(now64); vcd_cur_time = now64; } - lxt2_wr_set_dumpon(dump_file); + vcd_work_dumpon(); vcd_checkpoint(); return 0; @@ -424,7 +417,7 @@ static PLI_INT32 sys_dumpall_calltf(PLI_BYTE8*name) now64 = timerec_to_time64(&now); if (now64 > vcd_cur_time) { - lxt2_wr_set_time64(dump_file, now64); + vcd_work_set_time(now64); vcd_cur_time = now64; } @@ -435,6 +428,7 @@ static PLI_INT32 sys_dumpall_calltf(PLI_BYTE8*name) static void *close_dumpfile(void) { + vcd_work_terminate(); lxt2_wr_close(dump_file); dump_file = NULL; return NULL; @@ -480,6 +474,7 @@ static void open_dumpfile(vpiHandle callh) lxt2_wr_set_partial_on(dump_file, 1); lxt2_wr_set_break_size(dump_file, use_file_size_limit); + vcd_work_start(lxt2_thread, 0); atexit((void(*)(void))close_dumpfile); } } @@ -527,7 +522,7 @@ static PLI_INT32 sys_dumpfile_calltf(PLI_BYTE8*name) */ static PLI_INT32 sys_dumpflush_calltf(PLI_BYTE8*name) { - if (dump_file) lxt2_wr_flush(dump_file); + if (dump_file) vcd_work_flush(); return 0; } @@ -697,10 +692,10 @@ static void scan_item(unsigned depth, vpiHandle item, int skip) vpi_printf("LXT2 info: scanning scope %s, %u levels\n", fullname, depth); #endif - nskip = 0 != vcd_names_search(&lxt_tab, fullname); + nskip = vcd_scope_names_test(fullname); if (!nskip) - vcd_names_add(&lxt_tab, fullname); + vcd_scope_names_add(fullname); else vpi_printf("LXT2 warning: ignoring signals in " "previously scanned scope %s\n", fullname); @@ -784,7 +779,6 @@ static PLI_INT32 sys_dumpvars_calltf(PLI_BYTE8*name) int dep = draw_scope(item); - vcd_names_sort(&lxt_tab); scan_item(depth, item, 0); while (dep--) pop_scope(); @@ -799,6 +793,51 @@ static PLI_INT32 sys_dumpvars_calltf(PLI_BYTE8*name) return 0; } +static void* lxt2_thread(void*arg) +{ + /* Keep track of the current time, and only call the set_time + function when the time changes. */ + uint64_t cur_time = 0; + int run_flag = 1; + while (run_flag) { + struct vcd_work_item_s*cell = vcd_work_thread_peek(); + + if (cell->time != cur_time) { + cur_time = cell->time; + lxt2_wr_set_time64(dump_file, cur_time); + } + + switch (cell->type) { + case WT_NONE: + break; + case WT_FLUSH: + lxt2_wr_flush(dump_file); + break; + case WT_DUMPON: + lxt2_wr_set_dumpon(dump_file); + break; + case WT_DUMPOFF: + lxt2_wr_set_dumpoff(dump_file); + break; + case WT_EMIT_DOUBLE: + lxt2_wr_emit_value_double(dump_file, cell->sym_.lxt2, + 0, cell->op_.val_double); + break; + case WT_EMIT_BITS: + lxt2_wr_emit_value_bit_string(dump_file, cell->sym_.lxt2, + 0, cell->op_.val_char); + break; + case WT_TERMINATE: + run_flag = 0; + break; + } + + vcd_work_thread_pop(); + } + + return 0; +} + void sys_lxt2_register() { int idx; diff --git a/vpi/vcd_priv.h b/vpi/vcd_priv.h index a263a571b..672cf602c 100644 --- a/vpi/vcd_priv.h +++ b/vpi/vcd_priv.h @@ -47,11 +47,85 @@ EXTERN void vcd_names_sort(struct vcd_names_list_s*tab); EXTERN void vcd_names_delete(); +/* + * Keep a map of nexus ident's to help with alias detection. + */ EXTERN const char*find_nexus_ident(int nex); EXTERN void set_nexus_ident(int nex, const char *id); EXTERN void nexus_ident_delete(); +/* + * Keep a set of scope names to help with duplicate detection. + */ +EXTERN void vcd_scope_names_add(const char*name); +EXTERN int vcd_scope_names_test(const char*name); +EXTERN void vcd_scope_names_delete(void); + +/* + * Implement a work queue that can be used to send commands to a + * dumper thread. + */ + +typedef enum vcd_work_item_type_e { + WT_NONE, + WT_EMIT_BITS, + WT_EMIT_DOUBLE, + WT_DUMPON, + WT_DUMPOFF, + WT_FLUSH, + WT_TERMINATE +} vcd_work_item_type_t; + +struct lxt2_wr_symbol; + +struct vcd_work_item_s { + vcd_work_item_type_t type; + uint64_t time; + union { + struct lxt2_wr_symbol*lxt2; + } sym_; + + union { + double val_double; + char*val_char; + } op_; +}; + +/* + * The thread_peek and thread_pop functions work as pairs. The work + * thread processing work items uses vcd_work_thread_peek to look at + * the first item in the work queue. The work thread can be assured + * that the work item it stable. When it is done with the work item, + * it calls vcd_work_thread_pop to cause it to be popped from the work + * queue. + */ +EXTERN struct vcd_work_item_s* vcd_work_thread_peek(void); +EXTERN void vcd_work_thread_pop(void); + +/* + * Create work threads with the vcd_work_start function, and terminate + * the work thread (gracefully) with the vcd_work_terminate + * function. Synchronize with the work thread with the vcd_work_sync + * function. This blocks until the work thread is done all the work it + * has so far. + */ +EXTERN void vcd_work_start( void* (*fun) (void*arg), void*arg); +EXTERN void vcd_work_terminate(void); + +EXTERN void vcd_work_sync(void); + +/* + * The remaining vcd_work_* functions send messages to the work thread + * causing it to perform various VCD-related tasks. + */ +EXTERN void vcd_work_flush(void); /* Drain output caches. */ +EXTERN void vcd_work_set_time(uint64_t val); +EXTERN void vcd_work_dumpon(void); +EXTERN void vcd_work_dumpoff(void); +EXTERN void vcd_work_emit_double(struct lxt2_wr_symbol*sym, double val); +EXTERN void vcd_work_emit_bits(struct lxt2_wr_symbol*sym, const char*bits); + /* The compiletf routines are common for the VCD, LXT and LXT2 dumpers. */ EXTERN PLI_INT32 sys_dumpvars_compiletf(PLI_BYTE8 *name); diff --git a/vpi/vcd_priv2.cc b/vpi/vcd_priv2.cc index a811c710b..20213adbf 100644 --- a/vpi/vcd_priv2.cc +++ b/vpi/vcd_priv2.cc @@ -19,6 +19,9 @@ # include "vcd_priv.h" # include +# include +# include +# include /* Nexus Id cache @@ -55,3 +58,225 @@ extern "C" void nexus_ident_delete() { nexus_ident_map.clear(); } + + +static std::set vcd_scope_names_set; + +extern "C" void vcd_scope_names_add(const char*name) +{ + vcd_scope_names_set .insert(name); +} + +extern "C" int vcd_scope_names_test(const char*name) +{ + if (vcd_scope_names_set.find(name) == vcd_scope_names_set.end()) + return 0; + else + return 1; +} + +extern "C" void vcd_scope_names_delete(void) +{ + vcd_scope_names_set.clear(); +} + +static pthread_t work_thread; + +static const unsigned WORK_QUEUE_SIZE = 128*1024; +static const unsigned WORK_QUEUE_BATCH_MIN = 4*1024; +static const unsigned WORK_QUEUE_BATCH_MAX = 32*1024; + +static struct vcd_work_item_s work_queue[WORK_QUEUE_SIZE]; +static volatile unsigned work_queue_next = 0; +static volatile unsigned work_queue_fill = 0; + +static pthread_mutex_t work_queue_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t work_queue_is_empty_sig = PTHREAD_COND_INITIALIZER; +static pthread_cond_t work_queue_notempty_sig = PTHREAD_COND_INITIALIZER; +static pthread_cond_t work_queue_minfree_sig = PTHREAD_COND_INITIALIZER; + + +struct vcd_work_item_s* vcd_work_thread_peek(void) +{ + // There must always only be 1 vcd work thread, and only the + // work thread decreases the fill, so if the work_queue_fill + // is non-zero, I can reliably assume that there is at least + // one item that I can peek at. I only need to lock if I must + // wait for the work_queue_fill to become non-zero. + if (work_queue_fill == 0) { + pthread_mutex_lock(&work_queue_mutex); + while (work_queue_fill == 0) + pthread_cond_wait(&work_queue_notempty_sig, &work_queue_mutex); + pthread_mutex_unlock(&work_queue_mutex); + } + + return work_queue + work_queue_next; +} + +void vcd_work_thread_pop(void) +{ + pthread_mutex_lock(&work_queue_mutex); + + unsigned use_fill = work_queue_fill - 1; + work_queue_fill = use_fill; + + unsigned use_next = work_queue_next; + + struct vcd_work_item_s*cell = work_queue + use_next; + if (cell->type == WT_EMIT_BITS) { + free(cell->op_.val_char); + } + + use_next += 1; + if (use_next >= WORK_QUEUE_SIZE) + use_next = 0; + work_queue_next = use_next; + + if (use_fill == WORK_QUEUE_SIZE-WORK_QUEUE_BATCH_MIN) + pthread_cond_signal(&work_queue_minfree_sig); + else if (use_fill == 0) + pthread_cond_signal(&work_queue_is_empty_sig); + + pthread_mutex_unlock(&work_queue_mutex); +} + +/* + * Work queue items are created in batches to reduce thread + * bouncing. When the producer gets a free work item, it actually + * locks the queue in order to produce a batch. The queue stays locked + * until the batch is complete. Then the releases the whole lot to the + * consumer. + */ +static uint64_t work_queue_next_time = 0; +static unsigned current_batch_cnt = 0; +static unsigned current_batch_alloc = 0; +static unsigned current_batch_base = 0; + +void vcd_work_start( void* (*fun) (void*), void*arg ) +{ + pthread_create(&work_thread, 0, fun, arg); +} + +static struct vcd_work_item_s* grab_item(void) +{ + if (current_batch_alloc == 0) { + pthread_mutex_lock(&work_queue_mutex); + while ((WORK_QUEUE_SIZE-work_queue_fill) < WORK_QUEUE_BATCH_MIN) + pthread_cond_wait(&work_queue_minfree_sig, &work_queue_mutex); + + current_batch_base = work_queue_next + work_queue_fill; + current_batch_alloc = WORK_QUEUE_SIZE - work_queue_fill; + + pthread_mutex_unlock(&work_queue_mutex); + + if (current_batch_base >= WORK_QUEUE_SIZE) + current_batch_base -= WORK_QUEUE_SIZE; + if (current_batch_alloc > WORK_QUEUE_BATCH_MAX) + current_batch_alloc = WORK_QUEUE_BATCH_MAX; + current_batch_cnt = 0; + } + + assert(current_batch_cnt < current_batch_alloc); + + unsigned cur = current_batch_base + current_batch_cnt; + if (cur >= WORK_QUEUE_SIZE) + cur -= WORK_QUEUE_SIZE; + + // Write the new timestamp into the work item. + struct vcd_work_item_s*cell = work_queue + cur; + cell->time = work_queue_next_time; + return cell; +} + +static void end_batch(void) +{ + pthread_mutex_lock(&work_queue_mutex); + + unsigned use_fill = work_queue_fill; + bool was_empty_flag = (use_fill==0) && (current_batch_cnt > 0); + + use_fill += current_batch_cnt; + work_queue_fill = use_fill; + + current_batch_alloc = 0; + current_batch_cnt = 0; + + if (was_empty_flag) + pthread_cond_signal(&work_queue_notempty_sig); + + pthread_mutex_unlock(&work_queue_mutex); +} + +static inline void unlock_item(bool flush_batch =false) +{ + current_batch_cnt += 1; + if (current_batch_cnt == current_batch_alloc || flush_batch) + end_batch(); +} + +void vcd_work_sync(void) +{ + if (current_batch_alloc > 0) + end_batch(); + + if (work_queue_fill > 0) { + pthread_mutex_lock(&work_queue_mutex); + while (work_queue_fill > 0) + pthread_cond_wait(&work_queue_is_empty_sig, &work_queue_mutex); + pthread_mutex_unlock(&work_queue_mutex); + } +} + +void vcd_work_flush(void) +{ + struct vcd_work_item_s*cell = grab_item(); + cell->type = WT_FLUSH; + unlock_item(true); +} + +void vcd_work_dumpon(void) +{ + struct vcd_work_item_s*cell = grab_item(); + cell->type = WT_DUMPON; + unlock_item(); +} + +void vcd_work_dumpoff(void) +{ + struct vcd_work_item_s*cell = grab_item(); + cell->type = WT_DUMPOFF; + unlock_item(); +} + +void vcd_work_set_time(uint64_t val) +{ + work_queue_next_time = val; +} + +void vcd_work_emit_double(struct lxt2_wr_symbol*sym, double val) +{ + struct vcd_work_item_s*cell = grab_item(); + cell->type = WT_EMIT_DOUBLE; + cell->sym_.lxt2 = sym; + cell->op_.val_double = val; + unlock_item(); +} + +void vcd_work_emit_bits(struct lxt2_wr_symbol*sym, const char* val) +{ + + struct vcd_work_item_s*cell = grab_item(); + cell->type = WT_EMIT_BITS; + cell->sym_.lxt2 = sym; + cell->op_.val_char = strdup(val); + + unlock_item(); +} + +void vcd_work_terminate(void) +{ + struct vcd_work_item_s*cell = grab_item(); + cell->type = WT_TERMINATE; + unlock_item(true); + pthread_join(work_thread, 0); +}