Merge branch 'vcd-thread'

2010-01-09 10:39:12 -08:00 · 2010-01-09 10:39:12 -08:00 · ffeeeaf0af
parent 2638cd9f6e 7fc6b02e96
commit ffeeeaf0af
4 changed files with 366 additions and 27 deletions
--- a/configure.in
+++ b/configure.in
@ -74,6 +74,7 @@ AC_CHECK_HEADERS(readline/readline.h readline/history.h sys/resource.h)
 case "${host}" in *linux*) AC_DEFINE([LINUX], [1], [Host operating system is Linux.]) ;; esac

 # vpi uses these
+AC_CHECK_LIB(pthread, pthread_create)
 AC_CHECK_LIB(z, gzwrite)
 AC_CHECK_LIB(z, gzwrite, HAVE_LIBZ=yes, HAVE_LIBZ=no)
 AC_SUBST(HAVE_LIBZ)
--- a/vpi/sys_lxt2.c
+++ b/vpi/sys_lxt2.c
@ -32,17 +32,19 @@
 # include  <stdio.h>
 # include  <stdlib.h>
 # include  <string.h>
-# include  <assert.h>
 # include  <time.h>
 #ifdef HAVE_MALLOC_H
 # include  <malloc.h>
 #endif
 # include  "stringheap.h"
+# include  <assert.h>


 static char *dump_path = NULL;
 static struct lxt2_wr_trace *dump_file = NULL;

+static void* lxt2_thread(void*arg);
+
 struct vcd_info {
      vpiHandle item;
      vpiHandle cb;
@ -172,15 +174,12 @@ static void show_this_item(struct vcd_info*info)
      if (vpi_get(vpiType, info->item) == vpiRealVar) {
 	    value.format = vpiRealVal;
 	    vpi_get_value(info->item, &value);
-	    lxt2_wr_emit_value_double(dump_file, info->sym, 0,
-				      value.value.real);
+	    vcd_work_emit_double(info->sym, value.value.real);

      } else {
 	    value.format = vpiBinStrVal;
 	    vpi_get_value(info->item, &value);
-	    lxt2_wr_emit_value_bit_string(dump_file, info->sym,
-	                                  0 /* array row */,
-	                                  value.value.str);
+	    vcd_work_emit_bits(info->sym, value.value.str);
      }
 }

@ -190,18 +189,11 @@ static void show_this_item_x(struct vcd_info*info)
      if (vpi_get(vpiType,info->item) == vpiRealVar) {
 	      /* Should write a NaN here? */
      } else {
-	    lxt2_wr_emit_value_bit_string(dump_file, info->sym, 0, "x");
+	    vcd_work_emit_bits(info->sym, "x");
      }
 }


-/*
- * managed qsorted list of scope names for duplicates bsearching
- */
-
-struct vcd_names_list_s lxt_tab;
-
-
 static int dumpvars_status = 0; /* 0:fresh 1:cb installed, 2:callback done */
 static PLI_UINT64 dumpvars_time;
 __inline__ static int dump_header_pending(void)
@ -235,7 +227,7 @@ static PLI_INT32 variable_cb_2(p_cb_data cause)
      PLI_UINT64 now = timerec_to_time64(cause->time);

      if (now != vcd_cur_time) {
-            lxt2_wr_set_time64(dump_file, now);
+	    vcd_work_set_time(now);
 	    vcd_cur_time = now;
      }

@ -290,7 +282,7 @@ static PLI_INT32 dumpvars_cb(p_cb_data cause)
      vcd_cur_time = dumpvars_time;

      if (!dump_is_off) {
-            lxt2_wr_set_time64(dump_file, dumpvars_time);
+	    vcd_work_set_time(dumpvars_time);
 	    vcd_checkpoint();
      }

@ -307,16 +299,17 @@ static PLI_INT32 finish_cb(p_cb_data cause)

      dumpvars_time = timerec_to_time64(cause->time);
      if (!dump_is_off && !dump_is_full && dumpvars_time != vcd_cur_time) {
-            lxt2_wr_set_time64(dump_file, dumpvars_time);
+	    vcd_work_set_time(dumpvars_time);
      }

+      vcd_work_terminate();
      for (cur = vcd_list ;  cur ;  cur = next) {
 	    next = cur->next;
 	    free(cur);
      }
      vcd_list = 0;

-      vcd_names_delete(&lxt_tab);
+      vcd_scope_names_delete();
      nexus_ident_delete();
      free(dump_path);
      dump_path = 0;
@ -373,11 +366,11 @@ static PLI_INT32 sys_dumpoff_calltf(PLI_BYTE8*name)
      now64 = timerec_to_time64(&now);

      if (now64 > vcd_cur_time) {
-	    lxt2_wr_set_time(dump_file, now64);
+	    vcd_work_set_time(now64);
 	    vcd_cur_time = now64;
      }

-      lxt2_wr_set_dumpoff(dump_file);
+      vcd_work_dumpoff();
      vcd_checkpoint_x();

      return 0;
@ -400,11 +393,11 @@ static PLI_INT32 sys_dumpon_calltf(PLI_BYTE8*name)
      now64 = timerec_to_time64(&now);

      if (now64 > vcd_cur_time) {
-	    lxt2_wr_set_time64(dump_file, now64);
+	    vcd_work_set_time(now64);
 	    vcd_cur_time = now64;
      }

-      lxt2_wr_set_dumpon(dump_file);
+      vcd_work_dumpon();
      vcd_checkpoint();

      return 0;
@ -424,7 +417,7 @@ static PLI_INT32 sys_dumpall_calltf(PLI_BYTE8*name)
      now64 = timerec_to_time64(&now);

      if (now64 > vcd_cur_time) {
-	    lxt2_wr_set_time64(dump_file, now64);
+	    vcd_work_set_time(now64);
 	    vcd_cur_time = now64;
      }

@ -435,6 +428,7 @@ static PLI_INT32 sys_dumpall_calltf(PLI_BYTE8*name)

 static void *close_dumpfile(void)
 {
+      vcd_work_terminate();
      lxt2_wr_close(dump_file);
      dump_file = NULL;
      return NULL;
@ -480,6 +474,7 @@ static void open_dumpfile(vpiHandle callh)
 	    lxt2_wr_set_partial_on(dump_file, 1);
 	    lxt2_wr_set_break_size(dump_file, use_file_size_limit);

+	    vcd_work_start(lxt2_thread, 0);
            atexit((void(*)(void))close_dumpfile);
      }
 }
@ -527,7 +522,7 @@ static PLI_INT32 sys_dumpfile_calltf(PLI_BYTE8*name)
 */
 static PLI_INT32 sys_dumpflush_calltf(PLI_BYTE8*name)
 {
-      if (dump_file) lxt2_wr_flush(dump_file);
+      if (dump_file) vcd_work_flush();

      return 0;
 }
@ -697,10 +692,10 @@ static void scan_item(unsigned depth, vpiHandle item, int skip)
 		  vpi_printf("LXT2 info: scanning scope %s, %u levels\n",
 		             fullname, depth);
 #endif
-		  nskip = 0 != vcd_names_search(&lxt_tab, fullname);
+		  nskip = vcd_scope_names_test(fullname);

 		  if (!nskip)
-			vcd_names_add(&lxt_tab, fullname);
+			vcd_scope_names_add(fullname);
 		  else
 		    vpi_printf("LXT2 warning: ignoring signals in "
 		               "previously scanned scope %s\n", fullname);
@ -784,7 +779,6 @@ static PLI_INT32 sys_dumpvars_calltf(PLI_BYTE8*name)

 	    int dep = draw_scope(item);

-	    vcd_names_sort(&lxt_tab);
 	    scan_item(depth, item, 0);

 	    while (dep--) pop_scope();
@ -799,6 +793,51 @@ static PLI_INT32 sys_dumpvars_calltf(PLI_BYTE8*name)
      return 0;
 }

+static void* lxt2_thread(void*arg)
+{
+	/* Keep track of the current time, and only call the set_time
+	   function when the time changes. */
+      uint64_t cur_time = 0;
+      int run_flag = 1;
+      while (run_flag) {
+	    struct vcd_work_item_s*cell = vcd_work_thread_peek();
+
+	    if (cell->time != cur_time) {
+		  cur_time = cell->time;
+		  lxt2_wr_set_time64(dump_file, cur_time);
+	    }
+
+	    switch (cell->type) {
+		case WT_NONE:
+		  break;
+		case WT_FLUSH:
+		  lxt2_wr_flush(dump_file);
+		  break;
+		case WT_DUMPON:
+		  lxt2_wr_set_dumpon(dump_file);
+		  break;
+		case WT_DUMPOFF:
+		  lxt2_wr_set_dumpoff(dump_file);
+		  break;
+		case WT_EMIT_DOUBLE:
+		  lxt2_wr_emit_value_double(dump_file, cell->sym_.lxt2,
+					    0, cell->op_.val_double);
+		  break;
+		case WT_EMIT_BITS:
+		  lxt2_wr_emit_value_bit_string(dump_file, cell->sym_.lxt2,
+						0, cell->op_.val_char);
+		  break;
+		case WT_TERMINATE:
+		  run_flag = 0;
+		  break;
+	    }
+
+	    vcd_work_thread_pop();
+      }
+
+      return 0;
+}
+
 void sys_lxt2_register()
 {
      int idx;
--- a/vpi/vcd_priv.h
+++ b/vpi/vcd_priv.h
@ -47,11 +47,85 @@ EXTERN void vcd_names_sort(struct vcd_names_list_s*tab);

 EXTERN void vcd_names_delete();

+/*
+ * Keep a map of nexus ident's to help with alias detection.
+ */
 EXTERN const char*find_nexus_ident(int nex);
 EXTERN void       set_nexus_ident(int nex, const char *id);

 EXTERN void nexus_ident_delete();

+/*
+ * Keep a set of scope names to help with duplicate detection.
+ */
+EXTERN void vcd_scope_names_add(const char*name);
+EXTERN int  vcd_scope_names_test(const char*name);
+EXTERN void vcd_scope_names_delete(void);
+
+/*
+ * Implement a work queue that can be used to send commands to a
+ * dumper thread.
+ */
+
+typedef enum vcd_work_item_type_e {
+      WT_NONE,
+      WT_EMIT_BITS,
+      WT_EMIT_DOUBLE,
+      WT_DUMPON,
+      WT_DUMPOFF,
+      WT_FLUSH,
+      WT_TERMINATE
+} vcd_work_item_type_t;
+
+struct lxt2_wr_symbol;
+
+struct vcd_work_item_s {
+      vcd_work_item_type_t type;
+      uint64_t time;
+      union {
+	    struct lxt2_wr_symbol*lxt2;
+      } sym_;
+
+      union {
+	    double val_double;
+	    char*val_char;
+      } op_;
+};
+
+/*
+ * The thread_peek and thread_pop functions work as pairs. The work
+ * thread processing work items uses vcd_work_thread_peek to look at
+ * the first item in the work queue. The work thread can be assured
+ * that the work item it stable. When it is done with the work item,
+ * it calls vcd_work_thread_pop to cause it to be popped from the work
+ * queue.
+ */
+EXTERN struct vcd_work_item_s* vcd_work_thread_peek(void);
+EXTERN void vcd_work_thread_pop(void);
+
+/*
+ * Create work threads with the vcd_work_start function, and terminate
+ * the work thread (gracefully) with the vcd_work_terminate
+ * function. Synchronize with the work thread with the vcd_work_sync
+ * function. This blocks until the work thread is done all the work it
+ * has so far.
+ */
+EXTERN void vcd_work_start( void* (*fun) (void*arg), void*arg);
+EXTERN void vcd_work_terminate(void);
+
+EXTERN void vcd_work_sync(void);
+
+/*
+ * The remaining vcd_work_* functions send messages to the work thread
+ * causing it to perform various VCD-related tasks.
+ */
+EXTERN void vcd_work_flush(void); /* Drain output caches. */
+EXTERN void vcd_work_set_time(uint64_t val);
+EXTERN void vcd_work_dumpon(void);
+EXTERN void vcd_work_dumpoff(void);
+EXTERN void vcd_work_emit_double(struct lxt2_wr_symbol*sym, double val);
+EXTERN void vcd_work_emit_bits(struct lxt2_wr_symbol*sym, const char*bits);
+
 /* The compiletf routines are common for the VCD, LXT and LXT2 dumpers. */
 EXTERN PLI_INT32 sys_dumpvars_compiletf(PLI_BYTE8 *name);

--- a/vpi/vcd_priv2.cc
+++ b/vpi/vcd_priv2.cc
@ -19,6 +19,9 @@

 # include  "vcd_priv.h"
 # include  <map>
+# include  <set>
+# include  <string>
+# include  <assert.h>

 /*
   Nexus Id cache
@ -55,3 +58,225 @@ extern "C" void nexus_ident_delete()
 {
      nexus_ident_map.clear();
 }
+
+
+static std::set<std::string> vcd_scope_names_set;
+
+extern "C" void vcd_scope_names_add(const char*name)
+{
+      vcd_scope_names_set .insert(name);
+}
+
+extern "C" int vcd_scope_names_test(const char*name)
+{
+      if (vcd_scope_names_set.find(name) == vcd_scope_names_set.end())
+	    return 0;
+      else
+	    return 1;
+}
+
+extern "C" void vcd_scope_names_delete(void)
+{
+      vcd_scope_names_set.clear();
+}
+
+static pthread_t work_thread;
+
+static const unsigned WORK_QUEUE_SIZE = 128*1024;
+static const unsigned WORK_QUEUE_BATCH_MIN = 4*1024;
+static const unsigned WORK_QUEUE_BATCH_MAX = 32*1024;
+
+static struct vcd_work_item_s work_queue[WORK_QUEUE_SIZE];
+static volatile unsigned work_queue_next = 0;
+static volatile unsigned work_queue_fill = 0;
+
+static pthread_mutex_t work_queue_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t  work_queue_is_empty_sig = PTHREAD_COND_INITIALIZER;
+static pthread_cond_t  work_queue_notempty_sig = PTHREAD_COND_INITIALIZER;
+static pthread_cond_t  work_queue_minfree_sig = PTHREAD_COND_INITIALIZER;
+
+
+struct vcd_work_item_s* vcd_work_thread_peek(void)
+{
+	// There must always only be 1 vcd work thread, and only the
+	// work thread decreases the fill, so if the work_queue_fill
+	// is non-zero, I can reliably assume that there is at least
+	// one item that I can peek at. I only need to lock if I must
+	// wait for the work_queue_fill to become non-zero.
+      if (work_queue_fill == 0) {
+	    pthread_mutex_lock(&work_queue_mutex);
+	    while (work_queue_fill == 0)
+		  pthread_cond_wait(&work_queue_notempty_sig, &work_queue_mutex);
+	    pthread_mutex_unlock(&work_queue_mutex);
+      }
+
+      return work_queue + work_queue_next;
+}
+
+void vcd_work_thread_pop(void)
+{
+      pthread_mutex_lock(&work_queue_mutex);
+
+      unsigned use_fill = work_queue_fill - 1;
+      work_queue_fill = use_fill;
+
+      unsigned use_next = work_queue_next;
+
+      struct vcd_work_item_s*cell = work_queue + use_next;
+      if (cell->type == WT_EMIT_BITS) {
+	    free(cell->op_.val_char);
+      }
+
+      use_next += 1;
+      if (use_next >= WORK_QUEUE_SIZE)
+	    use_next = 0;
+      work_queue_next = use_next;
+
+      if (use_fill == WORK_QUEUE_SIZE-WORK_QUEUE_BATCH_MIN)
+	    pthread_cond_signal(&work_queue_minfree_sig);
+      else if (use_fill == 0)
+	    pthread_cond_signal(&work_queue_is_empty_sig);
+
+      pthread_mutex_unlock(&work_queue_mutex);
+}
+
+/*
+ * Work queue items are created in batches to reduce thread
+ * bouncing. When the producer gets a free work item, it actually
+ * locks the queue in order to produce a batch. The queue stays locked
+ * until the batch is complete. Then the releases the whole lot to the
+ * consumer.
+ */
+static uint64_t work_queue_next_time = 0;
+static unsigned current_batch_cnt = 0;
+static unsigned current_batch_alloc = 0;
+static unsigned current_batch_base = 0;
+
+void vcd_work_start( void* (*fun) (void*), void*arg )
+{
+      pthread_create(&work_thread, 0, fun, arg);
+}
+
+static struct vcd_work_item_s* grab_item(void)
+{
+      if (current_batch_alloc == 0) {
+	     pthread_mutex_lock(&work_queue_mutex);
+	     while ((WORK_QUEUE_SIZE-work_queue_fill) < WORK_QUEUE_BATCH_MIN)
+		  pthread_cond_wait(&work_queue_minfree_sig, &work_queue_mutex);
+
+	     current_batch_base = work_queue_next + work_queue_fill;
+	     current_batch_alloc = WORK_QUEUE_SIZE - work_queue_fill;
+
+	     pthread_mutex_unlock(&work_queue_mutex);
+
+	     if (current_batch_base >= WORK_QUEUE_SIZE)
+		   current_batch_base -= WORK_QUEUE_SIZE;
+	     if (current_batch_alloc > WORK_QUEUE_BATCH_MAX)
+		   current_batch_alloc = WORK_QUEUE_BATCH_MAX;
+	     current_batch_cnt = 0;
+      }
+
+      assert(current_batch_cnt < current_batch_alloc);
+
+      unsigned cur = current_batch_base + current_batch_cnt;
+      if (cur >= WORK_QUEUE_SIZE)
+	    cur -= WORK_QUEUE_SIZE;
+
+	// Write the new timestamp into the work item.
+      struct vcd_work_item_s*cell = work_queue + cur;
+      cell->time = work_queue_next_time;
+      return cell;
+}
+
+static void end_batch(void)
+{
+      pthread_mutex_lock(&work_queue_mutex);
+
+      unsigned use_fill = work_queue_fill;
+      bool was_empty_flag = (use_fill==0) && (current_batch_cnt > 0);
+
+      use_fill += current_batch_cnt;
+      work_queue_fill = use_fill;
+
+      current_batch_alloc = 0;
+      current_batch_cnt = 0;
+
+      if (was_empty_flag)
+	    pthread_cond_signal(&work_queue_notempty_sig);
+
+      pthread_mutex_unlock(&work_queue_mutex);
+}
+
+static inline void unlock_item(bool flush_batch =false)
+{
+      current_batch_cnt += 1;
+      if (current_batch_cnt == current_batch_alloc || flush_batch)
+	    end_batch();
+}
+
+void vcd_work_sync(void)
+{
+      if (current_batch_alloc > 0)
+	    end_batch();
+
+      if (work_queue_fill > 0) {
+	    pthread_mutex_lock(&work_queue_mutex);
+	    while (work_queue_fill > 0)
+		  pthread_cond_wait(&work_queue_is_empty_sig, &work_queue_mutex);
+	    pthread_mutex_unlock(&work_queue_mutex);
+      }
+}
+
+void vcd_work_flush(void)
+{
+      struct vcd_work_item_s*cell = grab_item();
+      cell->type = WT_FLUSH;
+      unlock_item(true);
+}
+
+void vcd_work_dumpon(void)
+{
+      struct vcd_work_item_s*cell = grab_item();
+      cell->type = WT_DUMPON;
+      unlock_item();
+}
+
+void vcd_work_dumpoff(void)
+{
+      struct vcd_work_item_s*cell = grab_item();
+      cell->type = WT_DUMPOFF;
+      unlock_item();
+}
+
+void vcd_work_set_time(uint64_t val)
+{
+      work_queue_next_time = val;
+}
+
+void vcd_work_emit_double(struct lxt2_wr_symbol*sym, double val)
+{
+      struct vcd_work_item_s*cell = grab_item();
+      cell->type = WT_EMIT_DOUBLE;
+      cell->sym_.lxt2 = sym;
+      cell->op_.val_double = val;
+      unlock_item();
+}
+
+void vcd_work_emit_bits(struct lxt2_wr_symbol*sym, const char* val)
+{
+
+      struct vcd_work_item_s*cell = grab_item();
+      cell->type = WT_EMIT_BITS;
+      cell->sym_.lxt2 = sym;
+      cell->op_.val_char = strdup(val);
+
+      unlock_item();
+}
+
+void vcd_work_terminate(void)
+{
+      struct vcd_work_item_s*cell = grab_item();
+      cell->type = WT_TERMINATE;
+      unlock_item(true);
+      pthread_join(work_thread, 0);
+}