diff --git a/tgt-vvp/draw_ufunc.c b/tgt-vvp/draw_ufunc.c
index c7fc80081..3cc547e12 100644
--- a/tgt-vvp/draw_ufunc.c
+++ b/tgt-vvp/draw_ufunc.c
@@ -24,7 +24,6 @@
 
 static void function_argument_logic(ivl_signal_t port, ivl_expr_t expr)
 {
-      struct vector_info res;
       unsigned ewidth, pwidth;
 
 	/* ports cannot be arrays. */
@@ -32,16 +31,12 @@ static void function_argument_logic(ivl_signal_t port, ivl_expr_t expr)
 
       ewidth = ivl_expr_width(expr);
       pwidth = ivl_signal_width(port);
-	/* Just like a normal assignment the function arguments need to
-	 * be evaluated at either their width or the argument width if
-	 * it is larger. */
-      if (ewidth < pwidth) ewidth = pwidth;
-      res = draw_eval_expr_wid(expr, ewidth, 0);
 
-	/* We could have extra bits so only select the ones we need. */
-      fprintf(vvp_out, "    %%set/v v%p_0, %u, %u;\n", port, res.base, pwidth);
+      draw_eval_vec4(expr, 0);
+      if (ewidth < pwidth)
+	    fprintf(vvp_out, "    %%pad/u %u;\n", pwidth);
 
-      clr_vector(res);
+      fprintf(vvp_out, "    %%store/vec4 v%p_0, %u;\n", port, pwidth);
 }
 
 static void function_argument_real(ivl_signal_t port, ivl_expr_t expr)
@@ -153,13 +148,10 @@ static void draw_ufunc_epilogue(ivl_expr_t expr)
  * parameter 0 of the function definition.
  */
 
-struct vector_info draw_ufunc_expr(ivl_expr_t expr, unsigned wid)
+void draw_ufunc_vec4(ivl_expr_t expr)
 {
-      unsigned swid = ivl_expr_width(expr);
       ivl_scope_t def = ivl_expr_def(expr);
       ivl_signal_t retval = ivl_scope_port(def, 0);
-      struct vector_info res;
-      unsigned load_wid;
 
 	/* Take in arguments to function and call function code. */
       draw_ufunc_preamble(expr);
@@ -167,36 +159,10 @@ struct vector_info draw_ufunc_expr(ivl_expr_t expr, unsigned wid)
 	/* Fresh basic block starts after the join. */
       clear_expression_lookaside();
 
-	/* The return value is in a signal that has the name of the
-	   expression. Load that into the thread and return the
-	   vector result. */
-
-      res.base = allocate_vector(wid);
-      res.wid  = wid;
-      if (res.base == 0) {
-	    fprintf(stderr, "%s:%u: vvp.tgt error: "
-		    "Unable to allocate %u thread bits for function result.\n",
-		    ivl_expr_file(expr), ivl_expr_lineno(expr), wid);
-	    vvp_errors += 1;
-	    return res;
-      }
-
-      assert(res.base != 0);
-
-      load_wid = swid;
-      if (load_wid > ivl_signal_width(retval))
-	    load_wid = ivl_signal_width(retval);
-
       assert(ivl_signal_dimensions(retval) == 0);
-      fprintf(vvp_out, "    %%load/v  %u, v%p_0, %u;\n",
-	      res.base, retval, load_wid);
-
-	/* Pad the signal value with zeros. */
-      if (load_wid < wid)
-	    pad_expr_in_place(expr, res, swid);
+      fprintf(vvp_out, "    %%load/vec4  v%p_0;\n", retval);
 
       draw_ufunc_epilogue(expr);
-      return res;
 }
 
 void draw_ufunc_real(ivl_expr_t expr)
diff --git a/tgt-vvp/eval_expr.c b/tgt-vvp/eval_expr.c
index c6f50038d..0f6988998 100644
--- a/tgt-vvp/eval_expr.c
+++ b/tgt-vvp/eval_expr.c
@@ -3685,11 +3685,11 @@ struct vector_info draw_eval_expr_wid(ivl_expr_t expr, unsigned wid,
 	  case IVL_EX_SFUNC:
 	    res = draw_sfunc_expr(expr, wid);
 	    break;
-
+#if 0
 	  case IVL_EX_UFUNC:
 	    res = draw_ufunc_expr(expr, wid);
 	    break;
-
+#endif
 	  case IVL_EX_UNARY:
 	    res = draw_unary_expr(expr, wid);
 	    break;
diff --git a/tgt-vvp/eval_vec4.c b/tgt-vvp/eval_vec4.c
index 154d66fa7..8e3fdcee7 100644
--- a/tgt-vvp/eval_vec4.c
+++ b/tgt-vvp/eval_vec4.c
@@ -157,6 +157,29 @@ static void draw_binary_vec4_compare(ivl_expr_t expr, int stuff_ok_flag)
       }
 }
 
+static void draw_binary_vec4_land(ivl_expr_t expr, int stuff_ok_flag)
+{
+      ivl_expr_t le = ivl_expr_oper1(expr);
+      ivl_expr_t re = ivl_expr_oper2(expr);
+
+	/* Push the left expression. Reduce it to a single bit if
+	   necessary. */
+      draw_eval_vec4(le, STUFF_OK_XZ);
+      if (ivl_expr_width(le) > 1)
+	    fprintf(vvp_out, "    %%or/r;\n");
+
+	/* Now push the right expression. Again, reduce to a single
+	   bit if necessasry. */
+      draw_eval_vec4(re, STUFF_OK_XZ);
+      if (ivl_expr_width(re) > 1)
+	    fprintf(vvp_out, "    %%or/r;\n");
+
+      fprintf(vvp_out, "    %%and;\n");
+
+      if (ivl_expr_width(expr) > 1)
+	    fprintf(vvp_out, "    %%pad/u %u;\n", ivl_expr_width(expr));
+}
+
 static void draw_binary_vec4_le_real(ivl_expr_t expr)
 {
       ivl_expr_t le = ivl_expr_oper1(expr);
@@ -312,6 +335,10 @@ static void draw_binary_vec4_lrs(ivl_expr_t expr, int stuff_ok_flag)
 static void draw_binary_vec4(ivl_expr_t expr, int stuff_ok_flag)
 {
       switch (ivl_expr_opcode(expr)) {
+	  case 'a': /* Logical && */
+	    draw_binary_vec4_land(expr, stuff_ok_flag);
+	    break;
+
 	  case '+':
 	  case '-':
 	  case '*':
@@ -618,6 +645,10 @@ void draw_eval_vec4(ivl_expr_t expr, int stuff_ok_flag)
 	    draw_ternary_vec4(expr, stuff_ok_flag);
 	    return;
 
+	  case IVL_EX_UFUNC:
+	    draw_ufunc_vec4(expr);
+	    return;
+
 	  case IVL_EX_UNARY:
 	    draw_unary_vec4(expr, stuff_ok_flag);
 	    return;
diff --git a/tgt-vvp/vvp_priv.h b/tgt-vvp/vvp_priv.h
index d390b0b40..7621a1471 100644
--- a/tgt-vvp/vvp_priv.h
+++ b/tgt-vvp/vvp_priv.h
@@ -104,7 +104,7 @@ extern int draw_scope(ivl_scope_t scope, ivl_scope_t parent);
 
 extern void draw_lpm_mux(ivl_lpm_t net);
 
-extern struct vector_info draw_ufunc_expr(ivl_expr_t expr, unsigned wid);
+extern void draw_ufunc_vec4(ivl_expr_t expr);
 extern void draw_ufunc_real(ivl_expr_t expr);
 extern void draw_ufunc_string(ivl_expr_t expr);
 extern void draw_ufunc_object(ivl_expr_t expr);
diff --git a/tgt-vvp/vvp_process.c b/tgt-vvp/vvp_process.c
index 0f82915f7..c18a39af4 100644
--- a/tgt-vvp/vvp_process.c
+++ b/tgt-vvp/vvp_process.c
@@ -1654,11 +1654,15 @@ static int show_stmt_repeat(ivl_statement_t net, ivl_scope_t sscope)
 	/* Calculate the repeat count onto the top of the vec4 stack. */
       draw_eval_vec4(expr, STUFF_OK_XZ);
 
-	/* Test that 0 < expr */
+	/* Test that 0 < expr, escape if expr <= 0. If the expr is
+	   unsigned, then we only need to try to escape if expr==0 as
+	   it will never be <0. */
       fprintf(vvp_out, "T_%u.%u %%dup/vec4;\n", thread_count, lab_top);
       fprintf(vvp_out, "    %%pushi/vec4 0, 0, %u;\n", ivl_expr_width(expr));
       fprintf(vvp_out, "    %%cmp/%s;\n", sign);
-      fprintf(vvp_out, "    %%jmp/1xz T_%u.%u, 5;\n", thread_count, lab_out);
+      if (ivl_expr_signed(expr))
+	    fprintf(vvp_out, "    %%jmp/1xz T_%u.%u, 5;\n", thread_count, lab_out);
+      fprintf(vvp_out, "    %%jmp/1 T_%u.%u, 4;\n", thread_count, lab_out);
 	/* This adds -1 (all ones in 2's complement) to the count. */
       fprintf(vvp_out, "    %%pushi/vec4 1, 0, %u;\n", ivl_expr_width(expr));
       fprintf(vvp_out, "    %%sub;\n");