From 23ba0bc0195cdd4a5886691cbde732c310564ed2 Mon Sep 17 00:00:00 2001
From: Stephen Williams <steve@icarus.com>
Date: Sat, 25 Jan 2014 19:25:21 -0800
Subject: [PATCH] various vec4 fixes.

---
 tgt-vvp/eval_vec4.c |  6 ++++++
 tgt-vvp/vvp.c       | 23 ++++++++++++++---------
 vvp/vthread.cc      | 20 +++++++++++++-------
 3 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/tgt-vvp/eval_vec4.c b/tgt-vvp/eval_vec4.c
index 355505311..6c92610cb 100644
--- a/tgt-vvp/eval_vec4.c
+++ b/tgt-vvp/eval_vec4.c
@@ -270,8 +270,14 @@ static void draw_binary_vec4_le(ivl_expr_t expr, int stuff_ok_flag)
 	    break;
       }
 
+	/* NOTE: I think I would rather the elaborator handle the
+	   operand widths. When that happens, take this code out. */
       draw_eval_vec4(le, stuff_ok_flag);
+      if (ivl_expr_width(le) < ivl_expr_width(re))
+	    fprintf(vvp_out, "    %%pad/%c %u;\n", s_flag, ivl_expr_width(re));
       draw_eval_vec4(re, stuff_ok_flag);
+      if (ivl_expr_width(re) < ivl_expr_width(le))
+	    fprintf(vvp_out, "    %%pad/%c %u;\n", s_flag, ivl_expr_width(le));
 
       switch (use_opcode) {
 	  case 'L':
diff --git a/tgt-vvp/vvp.c b/tgt-vvp/vvp.c
index fd32dd9a5..3c09b1801 100644
--- a/tgt-vvp/vvp.c
+++ b/tgt-vvp/vvp.c
@@ -48,7 +48,10 @@ FILE*vvp_out = 0;
 int vvp_errors = 0;
 unsigned show_file_line = 0;
 
-static uint32_t allocate_flag_mask = 0x00ff;
+# define FLAGS_COUNT 256
+
+static uint32_t allocate_flag_mask[FLAGS_COUNT / 32] = { 0x000000ff, 0 };
+
 
 __inline__ static void draw_execute_header(ivl_design_t des)
 {
@@ -90,12 +93,13 @@ __inline__ static void draw_module_declarations(ivl_design_t des)
 int allocate_flag(void)
 {
       int idx;
-      for (idx = 0 ; idx < 8*sizeof(allocate_flag_mask) ; idx += 1) {
-	    uint32_t mask = 1 << idx;
-	    if (allocate_flag_mask & mask)
+      for (idx = 0 ; idx < FLAGS_COUNT ; idx += 1) {
+	    int word = idx / 32;
+	    uint32_t mask = 1 << (idx%32);
+	    if (allocate_flag_mask[word] & mask)
 		  continue;
 
-	    allocate_flag_mask |= mask;
+	    allocate_flag_mask[word] |= mask;
 	    return idx;
       }
 
@@ -104,12 +108,13 @@ int allocate_flag(void)
 
 void clr_flag(int idx)
 {
-      assert(idx < 8*sizeof(allocate_flag_mask));
-      uint32_t mask = 1 << idx;
+      assert(idx < FLAGS_COUNT);
+      int word = idx / 32;
+      uint32_t mask = 1 << (idx%32);
 
-      assert(allocate_flag_mask & mask);
+      assert(allocate_flag_mask[word] & mask);
 
-      allocate_flag_mask &= ~mask;
+      allocate_flag_mask[word] &= ~mask;
 }
 
 int target_design(ivl_design_t des)
diff --git a/vvp/vthread.cc b/vvp/vthread.cc
index e8a1f8a44..a80a22599 100644
--- a/vvp/vthread.cc
+++ b/vvp/vthread.cc
@@ -103,7 +103,7 @@ struct vthread_s {
       vvp_code_t pc;
 	/* These hold the private thread bits. */
 	//vvp_vector4_t bits4;
-      enum { FLAGS_COUNT = 16, WORDS_COUNT = 16 };
+      enum { FLAGS_COUNT = 256, WORDS_COUNT = 16 };
       vvp_bit4_t flags[FLAGS_COUNT];
 
 	/* These are the word registers. */
@@ -1363,12 +1363,19 @@ bool of_ASSIGN_VEC4_OFF_D(vthread_t thr, vvp_code_t cp)
       if (off >= (long)sig->value_size())
 	    return true;
       if (off < 0) {
-	    if ((unsigned)-off >= sig->value_size())
+	    if ((unsigned)-off >= wid)
 		  return true;
-	    assert(0); // XXXX Not implemented yet.
+
+	    int use_off = -off;
+	    assert(wid > use_off);
+	    unsigned use_wid = wid - use_off;
+	    val = val.subvalue(use_off, use_wid);
+	    off = 0;
+	    wid = use_wid;
       }
       if (off+wid > sig->value_size()) {
-	    assert(0); // XXXX Not implemented yet.
+	    val = val.subvalue(0, sig->value_size()-off);
+	    wid = val.size();
       }
 
       schedule_assign_vector(ptr, off, sig->value_size(), val, del);
@@ -1398,7 +1405,7 @@ bool of_ASSIGN_VEC4_OFF_E(vthread_t thr, vvp_code_t cp)
       if (off >= (long)sig->value_size())
 	    return true;
       if (off < 0) {
-	    if ((unsigned)-off >= sig->value_size())
+	    if ((unsigned)-off >= wid)
 		  return true;
 
 	    int use_off = -off;
@@ -3014,7 +3021,7 @@ bool of_EVENT(vthread_t thr, vvp_code_t cp)
 {
       vvp_net_ptr_t ptr (cp->net, 0);
       vvp_vector4_t tmp (1, BIT4_X);
-      vvp_send_vec4(ptr, tmp, 0);
+      vvp_send_vec4(ptr, tmp, thr->wt_context);
       return true;
 }
 
@@ -5036,7 +5043,6 @@ bool of_POW(vthread_t thr, vvp_code_t)
       vvp_vector4_t valb = thr->pop_vec4();
       vvp_vector4_t vala = thr->pop_vec4();
 
-      assert(vala.size()==valb.size());
       unsigned wid = vala.size();
 
       vvp_vector2_t xv2 = vvp_vector2_t(vala);