diff --git a/tgt-vvp/eval_expr.c b/tgt-vvp/eval_expr.c
index d4f341cd7..b18761c98 100644
--- a/tgt-vvp/eval_expr.c
+++ b/tgt-vvp/eval_expr.c
@@ -2109,7 +2109,6 @@ static struct vector_info draw_select_signal(ivl_expr_t sube,
 {
       ivl_signal_t sig = ivl_expr_signal(sube);
       struct vector_info res;
-      unsigned idx;
 
 	/* Use this word of the signal. */
       unsigned use_word = 0;
@@ -2153,23 +2152,23 @@ static struct vector_info draw_select_signal(ivl_expr_t sube,
 	    return res;
       }
 
-      draw_eval_expr_into_integer(bit_idx, 0);
-
 	/* Alas, do it the hard way. */
+
+      draw_eval_expr_into_integer(bit_idx, 1);
+
       res.base = allocate_vector(wid);
       res.wid = wid;
       assert(res.base);
 
-      for (idx = 0 ;  idx < res.wid ;  idx += 1) {
-	    if (idx >= bit_wid) {
-		  fprintf(vvp_out, "   %%movi %u, 0, %u; Pad from %u to %u\n",
-			  res.base+idx, res.wid-idx,
-			  ivl_expr_width(sube), wid);
-		  break;
-	    }
-	    fprintf(vvp_out, "   %%load/x.p %u, v%p_%u, 0;\n",
-		    res.base+idx, sig, use_word);
-      }
+      unsigned use_wid = res.wid;
+      if (use_wid > bit_wid)
+	    use_wid = bit_wid;
+
+      fprintf(vvp_out, "   %%load/x1p %u, v%p_%u, %u;\n",
+	      res.base, sig, use_word, use_wid);
+      if (use_wid < res.wid)
+	    fprintf(vvp_out, "   %%movi %u, 0, %u;\n",
+		    res.base + use_wid, res.wid - use_wid);
 
       return res;
 }
diff --git a/vvp/codes.h b/vvp/codes.h
index 26fa1bf0d..37341e6a1 100644
--- a/vvp/codes.h
+++ b/vvp/codes.h
@@ -101,7 +101,7 @@ extern bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t code);
 extern bool of_LOAD_VEC(vthread_t thr, vvp_code_t code);
 extern bool of_LOAD_VP0(vthread_t thr, vvp_code_t code);
 extern bool of_LOAD_WR(vthread_t thr, vvp_code_t code);
-extern bool of_LOAD_XP(vthread_t thr, vvp_code_t code);
+extern bool of_LOAD_X1P(vthread_t thr, vvp_code_t code);
 extern bool of_LOADI_WR(vthread_t thr, vvp_code_t code);
 extern bool of_MOD(vthread_t thr, vvp_code_t code);
 extern bool of_MOD_S(vthread_t thr, vvp_code_t code);
diff --git a/vvp/compile.cc b/vvp/compile.cc
index 2c163df84..45919b724 100644
--- a/vvp/compile.cc
+++ b/vvp/compile.cc
@@ -144,7 +144,7 @@ const static struct opcode_table_s opcode_table[] = {
       { "%load/v", of_LOAD_VEC,3, {OA_BIT1,     OA_FUNC_PTR, OA_BIT2} },
       { "%load/vp0",of_LOAD_VP0,3,{OA_BIT1,     OA_FUNC_PTR, OA_BIT2} },
       { "%load/wr",of_LOAD_WR,2,  {OA_BIT1,     OA_VPI_PTR,  OA_BIT2} },
-      { "%load/x.p",of_LOAD_XP, 3,{OA_BIT1,     OA_FUNC_PTR, OA_BIT2} },
+      { "%load/x1p",of_LOAD_X1P,3,{OA_BIT1,     OA_FUNC_PTR, OA_BIT2} },
       { "%loadi/wr",of_LOADI_WR,3,{OA_BIT1,     OA_NUMBER,   OA_BIT2} },
       { "%mod",    of_MOD,    3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
       { "%mod/s",  of_MOD_S,  3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
diff --git a/vvp/opcodes.txt b/vvp/opcodes.txt
index 735a08d95..3a8935a94 100644
--- a/vvp/opcodes.txt
+++ b/vvp/opcodes.txt
@@ -455,17 +455,17 @@ small vector with a large immediate offset indexing an array.
 This instruction reads a real value from the vpi-like object to a word
 register.
 
-* %load/x.p <bit>, <functor-label>, <idx>
+* %load/x1p <bit>, <functor-label>, <wid>
 
-This is an indexed load. It uses the contents of the specified index
-register to select a bit from a vector functor at <functor-label>. The
-bit is pulled from the indexed bit of the addressed functor and loaded
-into the destination thread bit. If the indexed value is beyond the
-width of the vector, then the result is X.
+This is an indexed load. It uses the contents of index register 1 to
+select a part from a vector functor at <functor-label>. The 
+part is pulled from the indexed bit of the addressed functor and loaded
+into the destination thread bit. The <wid> is the width of the
+part. If any bit of the desired value is outside the vector, then that
+bit is set to X. The index register 1 is interpreted as a signed value.
 
-The %load/x.p is the same, but when the operation is done, it
-increments the specified index register. This provides a basic
-auto-increment feature.
+When the operation is done, the <wid> is added to index register 1, to
+provide a basic auto-increment behavior.
 
 * %loadi/wr <bit>, <mant>, <exp>
 
diff --git a/vvp/vthread.cc b/vvp/vthread.cc
index dd4706977..b20131ea1 100644
--- a/vvp/vthread.cc
+++ b/vvp/vthread.cc
@@ -2558,20 +2558,19 @@ bool of_LOAD_WR(vthread_t thr, vvp_code_t cp)
 }
 
 /*
- * %load/x <bit>, <functor>, <index>
+ * %load/x16 <bit>, <functor>, <wid>
  *
  * <bit> is the destination thread bit and must be >= 4.
  */
-static bool of_LOAD_X(vthread_t thr, vvp_code_t cp)
+bool of_LOAD_X1P(vthread_t thr, vvp_code_t cp)
 {
 	// <bit> is the thread bit to load
       assert(cp->bit_idx[0] >= 4);
       unsigned bit = cp->bit_idx[0];
+      int wid = cp->bit_idx[1];
 
-	// <index> is the index register to use. The actual index into
-	// the vector is the value of the index register.
-      unsigned index_idx = cp->bit_idx[1];
-      unsigned index = thr->words[index_idx].w_int;
+	// <index> is the canonical base address of the part select.
+      long index = thr->words[1].w_int;
 
 	// <functor> is converted to a vvp_net_t pointer from which we
 	// read our value.
@@ -2582,20 +2581,16 @@ static bool of_LOAD_X(vthread_t thr, vvp_code_t cp)
       vvp_fun_signal_vec*sig = dynamic_cast<vvp_fun_signal_vec*> (net->fun);
       assert(sig);
 
-      vvp_bit4_t val = index >= sig->size()? BIT4_X : sig->value(index);
-      thr_put_bit(thr, bit, val);
+      for (long idx = 0 ; idx < wid ; idx += 1) {
+	    long use_index = index + idx;
+	    vvp_bit4_t val;
+	    if (use_index < 0 || use_index >= sig->size())
+		  val = BIT4_X;
+	    else
+		  val = sig->value(use_index);
 
-      return true;
-}
-
-bool of_LOAD_XP(vthread_t thr, vvp_code_t cp)
-{
-	// First do the normal handling of the %load/x
-      of_LOAD_X(thr, cp);
-
-	// Now do the post-increment
-      unsigned index_idx = cp->bit_idx[1];
-      thr->words[index_idx].w_int += 1;
+	    thr_put_bit(thr, bit+idx, val);
+      }
 
       return true;
 }