diff --git a/tgt-vvp/eval_vec4.c b/tgt-vvp/eval_vec4.c
index 7781625b4..634919489 100644
--- a/tgt-vvp/eval_vec4.c
+++ b/tgt-vvp/eval_vec4.c
@@ -321,6 +321,32 @@ static void draw_binary_vec4(ivl_expr_t expr, int stuff_ok_flag)
       }
 }
 
+static void draw_concat_vec4(ivl_expr_t expr, int stuff_ok_flag)
+{
+	/* Repeat the concatenation this many times to make a
+	   super-concatenation. */
+      unsigned repeat = ivl_expr_repeat(expr);
+	/* This is the number of expressions that go into the
+	   concatenation. */
+      unsigned num_sube = ivl_expr_parms(expr);
+      unsigned sub_idx;
+
+      assert(num_sube > 0);
+
+	/* Start with the least-significant bits. */
+      draw_eval_vec4(ivl_expr_parm(expr, 0), stuff_ok_flag);
+
+      for (sub_idx = 1 ; sub_idx < num_sube ; sub_idx += 1) {
+	      /* Concatenate progressively higher parts. */
+	    draw_eval_vec4(ivl_expr_parm(expr, sub_idx), stuff_ok_flag);
+	    fprintf(vvp_out, "    %%concat/vec4;\n");
+      }
+
+      if (repeat > 1) {
+	    fprintf(vvp_out, "    %%replicate %u;\n", repeat);
+      }
+}
+
 static void draw_number_vec4(ivl_expr_t expr)
 {
       unsigned long val0 = 0;
@@ -466,6 +492,10 @@ void draw_eval_vec4(ivl_expr_t expr, int stuff_ok_flag)
 	    draw_binary_vec4(expr, stuff_ok_flag);
 	    return;
 
+	  case IVL_EX_CONCAT:
+	    draw_concat_vec4(expr, stuff_ok_flag);
+	    return;
+
 	  case IVL_EX_NUMBER:
 	    draw_number_vec4(expr);
 	    return;
diff --git a/tgt-vvp/vvp_process.c b/tgt-vvp/vvp_process.c
index f4808f669..12d751d56 100644
--- a/tgt-vvp/vvp_process.c
+++ b/tgt-vvp/vvp_process.c
@@ -628,9 +628,11 @@ static int show_stmt_assign_nb(ivl_statement_t net)
 	      if (bit_limit > ivl_lval_width(lval))
 		    bit_limit = ivl_lval_width(lval);
 
-		/* XXXX For now, don't know how to actually split
-		   vectors */
-	      assert(lidx == 0);
+		/* If there are more lvals after this, split off from
+		   the top of the vec4 stack only the bits (lsb) that
+		   we need for the current lval. */
+	      if (lidx+1 < ivl_stmt_lvals(net))
+		    fprintf(vvp_out, "    %%split/vec4 %u;\n", bit_limit);
 	      assign_to_lvector(lval, delay, del, nevents);
 
 	      cur_rbit += bit_limit;
diff --git a/vvp/codes.h b/vvp/codes.h
index 4921b3056..7c7dde565 100644
--- a/vvp/codes.h
+++ b/vvp/codes.h
@@ -78,6 +78,7 @@ extern bool of_CMPX(vthread_t thr, vvp_code_t code);
 extern bool of_CMPZ(vthread_t thr, vvp_code_t code);
 extern bool of_CONCAT_STR(vthread_t thr, vvp_code_t code);
 extern bool of_CONCATI_STR(vthread_t thr, vvp_code_t code);
+extern bool of_CONCAT_VEC4(vthread_t thr, vvp_code_t code);
 extern bool of_CVT_RS(vthread_t thr, vvp_code_t code);
 extern bool of_CVT_RU(vthread_t thr, vvp_code_t code);
 extern bool of_CVT_RV(vthread_t thr, vvp_code_t code);
@@ -202,6 +203,7 @@ extern bool of_SET_X0_X(vthread_t thr, vvp_code_t code);
 extern bool of_SHIFTL(vthread_t thr, vvp_code_t code);
 extern bool of_SHIFTR(vthread_t thr, vvp_code_t code);
 extern bool of_SHIFTR_S(vthread_t thr, vvp_code_t code);
+extern bool of_SPLIT_VEC4(vthread_t thr, vvp_code_t code);
 extern bool of_STORE_DAR_R(vthread_t thr, vvp_code_t code);
 extern bool of_STORE_DAR_STR(vthread_t thr, vvp_code_t code);
 extern bool of_STORE_OBJ(vthread_t thr, vvp_code_t code);
diff --git a/vvp/compile.cc b/vvp/compile.cc
index 85b551656..0eee42f11 100644
--- a/vvp/compile.cc
+++ b/vvp/compile.cc
@@ -127,7 +127,8 @@ static const struct opcode_table_s opcode_table[] = {
       { "%cmp/z",  of_CMPZ,   3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
       { "%cmpi/s", of_CMPIS,  3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
       { "%cmpi/u", of_CMPIU,  3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
-      { "%concat/str",of_CONCAT_STR,0,{OA_NONE, OA_NONE,     OA_NONE} },
+      { "%concat/str", of_CONCAT_STR, 0,{OA_NONE,  OA_NONE,  OA_NONE} },
+      { "%concat/vec4",of_CONCAT_VEC4,0,{OA_NONE,  OA_NONE,  OA_NONE} },
       { "%concati/str",of_CONCATI_STR,1,{OA_STRING,OA_NONE,  OA_NONE} },
       { "%cvt/rs", of_CVT_RS, 1,  {OA_BIT1,     OA_NONE,     OA_NONE} },
       { "%cvt/ru", of_CVT_RU, 1,  {OA_BIT1,     OA_NONE,     OA_NONE} },
@@ -249,6 +250,7 @@ static const struct opcode_table_s opcode_table[] = {
       { "%shiftl",   of_SHIFTL,   1, {OA_NUMBER, OA_NONE,   OA_NONE} },
       { "%shiftr",   of_SHIFTR,   1, {OA_NUMBER, OA_NONE,   OA_NONE} },
       { "%shiftr/s", of_SHIFTR_S, 1, {OA_NUMBER, OA_NONE,   OA_NONE} },
+      { "%split/vec4",   of_SPLIT_VEC4,    1, {OA_NUMBER,   OA_NONE, OA_NONE} },
       { "%store/dar/r",  of_STORE_DAR_R,   1, {OA_FUNC_PTR, OA_NONE, OA_NONE} },
       { "%store/dar/str",of_STORE_DAR_STR, 1, {OA_FUNC_PTR, OA_NONE, OA_NONE} },
       { "%store/obj",   of_STORE_OBJ,   1, {OA_FUNC_PTR,OA_NONE, OA_NONE} },
diff --git a/vvp/opcodes.txt b/vvp/opcodes.txt
index cfcced0d9..9d3d42150 100644
--- a/vvp/opcodes.txt
+++ b/vvp/opcodes.txt
@@ -373,6 +373,12 @@ of it as passing the tail, then the head, concatenating them, and
 pushing the result. The stack starts with two strings in the stack,
 and ends with one string in the stack.
 
+* %concat/vec4
+
+Pop two vec4 vectors, concatenate them, and push the combined
+result. The top of the vec4 stack is the LSB of the result, and the
+next in this stack is the MSB bits of the result.
+
 * %cvt/sr <bit-l>
 * %cvt/rs <bit-l>
 
@@ -1149,6 +1155,15 @@ These instructions shift the top value in the vec4 stack left (towards
 MSB) or right, possibly signed. The <idx> is the address of the index
 register that contains the amount to shift.
 
+* %split/vec4 <wid>
+
+Pull the top vec4 vector from the stack and split it into two
+parts. Split off <wid> bits from the LSB, then push the remaining bits
+of the original (the MSB) back to the stack. Then push the split off
+LSB vector.
+
+The <wid> must be less then the width of the original, unsplit vector.
+
 * %store/obj <var-label>
 
 This pops the top of the object stack and writes it to the object
diff --git a/vvp/vthread.cc b/vvp/vthread.cc
index 951cd50dc..b44424210 100644
--- a/vvp/vthread.cc
+++ b/vvp/vthread.cc
@@ -2132,6 +2132,22 @@ bool of_CONCATI_STR(vthread_t thr, vvp_code_t cp)
       return true;
 }
 
+/*
+ * %concat/vec4
+ */
+bool of_CONCAT_VEC4(vthread_t thr, vvp_code_t)
+{
+      vvp_vector4_t lsb = thr->pop_vec4();
+      vvp_vector4_t msb = thr->pop_vec4();
+
+      vvp_vector4_t res (msb.size()+lsb.size(), BIT4_X);
+      res.set_vec(0, lsb);
+      res.set_vec(lsb.size(), msb);
+
+      thr->push_vec4(res);
+      return true;
+}
+
 bool of_CVT_RS(vthread_t thr, vvp_code_t cp)
 {
       int64_t r = thr->words[cp->bit_idx[0]].w_int;
@@ -5792,6 +5808,23 @@ bool of_SHIFTR_S(vthread_t thr, vvp_code_t cp)
       return true;
 }
 
+/*
+ * %split/vec4 <wid>
+ */
+bool of_SPLIT_VEC4(vthread_t thr, vvp_code_t cp)
+{
+      unsigned lsb_wid = cp->number;
+
+      vvp_vector4_t val = thr->pop_vec4();
+      assert(lsb_wid < val.size());
+
+      vvp_vector4_t lsb = val.subvalue(0, lsb_wid);
+      vvp_vector4_t msb = val.subvalue(lsb_wid, val.size()-lsb_wid);
+
+      thr->push_vec4(msb);
+      thr->push_vec4(lsb);
+      return true;
+}
 
 bool of_STORE_DAR_R(vthread_t thr, vvp_code_t cp)
 {