From 604a62379c941d6f62c1086907a9c568710d5918 Mon Sep 17 00:00:00 2001 From: Stephen Williams Date: Sun, 31 Jan 2016 15:29:52 -0800 Subject: [PATCH] Make .scope aware of return type, and extend %ret/vec4 operands The .scope needs to be aware of return types so that the %call/vec4 function knows how to intialize the return value. We also need to extend the %ret/vec4 to support writing parts of the return value. --- ivl_target.h | 10 ++++++ t-dll-api.cc | 23 +++++++++++++ t-dll.cc | 27 +++++++++++++-- t-dll.h | 5 +++ tgt-vvp/stmt_assign.c | 71 ++++++++++++++++++++++++++++++--------- tgt-vvp/vvp_scope.c | 29 ++++++++++++++-- vvp/compile.cc | 2 +- vvp/opcodes.txt | 14 ++++++-- vvp/vpi_priv.h | 29 ++++++++++++++-- vvp/vpi_scope.cc | 78 ++++++++++++++++++++++++++++--------------- vvp/vthread.cc | 70 ++++++++++++++++++++++++++++++++++---- 11 files changed, 300 insertions(+), 58 deletions(-) diff --git a/ivl_target.h b/ivl_target.h index 1d44f3a5c..a5d27ed80 100644 --- a/ivl_target.h +++ b/ivl_target.h @@ -1745,6 +1745,13 @@ extern unsigned ivl_parameter_lineno(ivl_parameter_t net); * ivl_scope_lineno * Returns the instantiation file and line for this scope. * + * ivl_scope_func_type + * ivl_scope_func_signed + * ivl_scope_func_width + * + * If the scope is a function, these function can be used to get + * the type of the return value. + * * ivl_scope_is_auto * Is the task or function declared to be automatic? * @@ -1863,6 +1870,9 @@ extern const char* ivl_scope_tname(ivl_scope_t net); extern int ivl_scope_time_precision(ivl_scope_t net); extern int ivl_scope_time_units(ivl_scope_t net); +extern ivl_variable_type_t ivl_scope_func_type(ivl_scope_t net); +extern int ivl_scope_func_signed(ivl_scope_t net); +extern unsigned ivl_scope_func_width(ivl_scope_t net); /* SIGNALS * Signals are named things in the Verilog source, like wires and diff --git a/t-dll-api.cc b/t-dll-api.cc index 79bc73a2b..d74938e4b 100644 --- a/t-dll-api.cc +++ b/t-dll-api.cc @@ -2043,6 +2043,29 @@ extern "C" const char*ivl_scope_file(ivl_scope_t net) return net->file.str(); } +extern "C" ivl_variable_type_t ivl_scope_func_type(ivl_scope_t net) +{ + assert(net); + assert(net->type_ == IVL_SCT_FUNCTION); + return net->func_type; +} + +extern "C" int ivl_scope_func_signed(ivl_scope_t net) +{ + assert(net); + assert(net->type_ == IVL_SCT_FUNCTION); + assert(net->func_type == IVL_VT_LOGIC); + return net->func_signed? !0 : 0; +} + +extern "C" unsigned ivl_scope_func_width(ivl_scope_t net) +{ + assert(net); + assert(net->type_ == IVL_SCT_FUNCTION); + assert(net->func_type == IVL_VT_LOGIC); + return net->func_width; +} + extern "C" unsigned ivl_scope_is_auto(ivl_scope_t net) { assert(net); diff --git a/t-dll.cc b/t-dll.cc index c3ad49d0e..067826466 100644 --- a/t-dll.cc +++ b/t-dll.cc @@ -103,6 +103,13 @@ inline const char*dlerror(void) { return strerror( errno ); } #endif +ivl_scope_s::ivl_scope_s() +{ + func_type = IVL_VT_NO_TYPE; + func_signed = false; + func_width = 0; +} + /* * The custom new operator for the ivl_nexus_s type allows us to * allocate nexus objects in blocks. There are generally lots of them @@ -589,6 +596,21 @@ void dll_target::make_scope_param_expr(ivl_parameter_t cur_par, NetExpr*etmp) expr_ = 0; } +static void fill_in_scope_function(ivl_scope_t scope, const NetScope*net) +{ + scope->type_ = IVL_SCT_FUNCTION; + const NetFuncDef*def = net->func_def(); + assert(def); + + const NetNet*return_sig = def->return_sig(); + assert(return_sig); + + scope->tname_ = def->scope()->basename(); + scope->func_type = return_sig->data_type(); + scope->func_signed = return_sig->get_signed(); + scope->func_width = return_sig->vector_width(); +} + void dll_target::add_root(const NetScope *s) { ivl_scope_t root_ = new struct ivl_scope_s; @@ -635,7 +657,7 @@ void dll_target::add_root(const NetScope *s) } break; case NetScope::FUNC: - root_->type_ = IVL_SCT_FUNCTION; + fill_in_scope_function(root_, s); break; default: assert(0); @@ -2508,8 +2530,7 @@ void dll_target::scope(const NetScope*net) break; } case NetScope::FUNC: - scop->type_ = IVL_SCT_FUNCTION; - scop->tname_ = net->func_def()->scope()->basename(); + fill_in_scope_function(scop, net); break; case NetScope::BEGIN_END: scop->type_ = IVL_SCT_BEGIN; diff --git a/t-dll.h b/t-dll.h index 97ed7607f..a94137383 100644 --- a/t-dll.h +++ b/t-dll.h @@ -653,6 +653,8 @@ struct ivl_process_s { * there. */ struct ivl_scope_s { + ivl_scope_s(); + ivl_scope_t parent; std::map children; // This is just like the children map above, but in vector @@ -686,6 +688,9 @@ struct ivl_scope_s { /* Scopes that are tasks/functions have a definition. */ ivl_statement_t def; unsigned is_auto; + ivl_variable_type_t func_type; + bool func_signed; + unsigned func_width; unsigned is_cell; diff --git a/tgt-vvp/stmt_assign.c b/tgt-vvp/stmt_assign.c index a28f04ff5..c0f4032de 100644 --- a/tgt-vvp/stmt_assign.c +++ b/tgt-vvp/stmt_assign.c @@ -202,6 +202,49 @@ static void get_vec_from_lval(ivl_statement_t net, struct vec_slice_info*slices) } +static void put_vec_to_ret_slice(ivl_signal_t sig, struct vec_slice_info*slice, + unsigned wid) +{ + int part_off_idx; + + /* If the slice of the l-value is a BOOL variable, then cast + the data to a BOOL vector so that the stores can be valid. */ + if (ivl_signal_data_type(sig) == IVL_VT_BOOL) { + fprintf(vvp_out, " %%cast2;\n"); + } + + switch (slice->type) { + default: + fprintf(vvp_out, " ; XXXX slice->type=%d\n", slice->type); + assert(0); + break; + + case SLICE_SIMPLE_VECTOR: + assert(slice->u_.simple_vector.use_word == 0); + fprintf(vvp_out, " %%ret/vec4 0, 0, %u;\n", wid); + break; + + case SLICE_PART_SELECT_STATIC: + part_off_idx = allocate_word(); + fprintf(vvp_out, " %%ix/load %d, %lu, 0;\n", + part_off_idx, slice->u_.part_select_static.part_off); + fprintf(vvp_out, " %%flag_set/imm 4, 0;\n"); + fprintf(vvp_out, " %%ret/vec4 0, %d, %u;\n", part_off_idx, wid); + clr_word(part_off_idx); + break; + + case SLICE_PART_SELECT_DYNAMIC: + fprintf(vvp_out, " %%flag_mov 4, %u;\n", + slice->u_.part_select_dynamic.x_flag); + fprintf(vvp_out, " %%ret/vec4 0, %d, %u;\n", + slice->u_.part_select_dynamic.word_idx_reg, wid); + clr_word(slice->u_.part_select_dynamic.word_idx_reg); + clr_flag(slice->u_.part_select_dynamic.x_flag); + break; + + } +} + static void put_vec_to_lval_slice(ivl_lval_t lval, struct vec_slice_info*slice, unsigned wid) { @@ -214,9 +257,7 @@ static void put_vec_to_lval_slice(ivl_lval_t lval, struct vec_slice_info*slice, and the scope is a function, then this is an assign to a return value and should be handled differently. */ if (signal_is_return_value(sig)) { - assert(ivl_signal_dimensions(sig) == 0); - fprintf(vvp_out, " %%ret/vec4 0; Assign to %s\n", - ivl_signal_basename(sig)); + put_vec_to_ret_slice(sig, slice, wid); return; } @@ -356,16 +397,6 @@ static void store_vec4_to_lval(ivl_statement_t net) if (lidx+1 < ivl_stmt_lvals(net)) fprintf(vvp_out, " %%split/vec4 %u;\n", lwid); - /* Special Case: If the l-value signal is named after its scope, - and the scope is a function, then this is an assign to a return - value and should be handled differently. */ - if (signal_is_return_value(lsig)) { - assert(ivl_signal_dimensions(lsig) == 0); - fprintf(vvp_out, " %%ret/vec4 0; Assign to %s (store_vec4_to_lval)\n", - ivl_signal_basename(lsig)); - continue; - } - if (word_ex) { /* Handle index into an array */ int word_index = allocate_word(); @@ -396,7 +427,10 @@ static void store_vec4_to_lval(ivl_statement_t net) draw_eval_expr_into_integer(part_off_ex, offset_index); /* Note that flag4 is set by the eval above. */ assert(lsig); - if (ivl_signal_type(lsig)==IVL_SIT_UWIRE) { + if (signal_is_return_value(lsig)) { + fprintf(vvp_out, " %%ret/vec4 0, %d, %u; Assign to %s (store_vec4_to_lval)\n", + offset_index, lwid, ivl_signal_basename(lsig)); + } else if (ivl_signal_type(lsig)==IVL_SIT_UWIRE) { fprintf(vvp_out, " %%force/vec4/off v%p_0, %d;\n", lsig, offset_index); } else { @@ -411,6 +445,7 @@ static void store_vec4_to_lval(ivl_statement_t net) member. We will use a property assign function. */ assert(!lsig); + assert(!signal_is_return_value(lsig)); ivl_type_t sub_type = draw_lval_expr(nest); assert(ivl_type_base(sub_type) == IVL_VT_CLASS); fprintf(vvp_out, " %%store/prop/v %d, %u;\n", @@ -421,7 +456,13 @@ static void store_vec4_to_lval(ivl_statement_t net) /* No offset expression, so use simpler store function. */ assert(lsig); assert(lwid == ivl_signal_width(lsig)); - fprintf(vvp_out, " %%store/vec4 v%p_0, 0, %u;\n", lsig, lwid); + if (signal_is_return_value(lsig)) { + fprintf(vvp_out, " %%ret/vec4 0, 0, %u; Assign to %s (store_vec4_to_lval)\n", + lwid, ivl_signal_basename(lsig)); + } else { + fprintf(vvp_out, " %%store/vec4 v%p_0, 0, %u;\n", + lsig, lwid); + } } } } diff --git a/tgt-vvp/vvp_scope.c b/tgt-vvp/vvp_scope.c index 51a2e2615..56c3ca928 100644 --- a/tgt-vvp/vvp_scope.c +++ b/tgt-vvp/vvp_scope.c @@ -2235,6 +2235,9 @@ int draw_scope(ivl_scope_t net, ivl_scope_t parent) const char *type; const char*prefix = ivl_scope_is_auto(net) ? "auto" : ""; + char suffix[32]; + + suffix[0] = 0; switch (ivl_scope_type(net)) { case IVL_SCT_MODULE: type = "module"; break; @@ -2248,8 +2251,30 @@ int draw_scope(ivl_scope_t net, ivl_scope_t parent) default: type = "?"; assert(0); } - fprintf(vvp_out, "S_%p .scope %s%s, \"%s\" \"%s\" %u %u", - net, prefix, type, + if (ivl_scope_type(net)==IVL_SCT_FUNCTION) { + switch (ivl_scope_func_type(net)) { + case IVL_VT_LOGIC: + snprintf(suffix, sizeof suffix, ".vec4.%c%u", + ivl_scope_func_signed(net)? 'u' : 's', + ivl_scope_func_width(net)); + break; + case IVL_VT_REAL: + snprintf(suffix, sizeof suffix, ".real"); + break; + case IVL_VT_STRING: + snprintf(suffix, sizeof suffix, ".str"); + break; + case IVL_VT_CLASS: + snprintf(suffix, sizeof suffix, ".obj"); + break; + default: + assert(0); + break; + } + } + + fprintf(vvp_out, "S_%p .scope %s%s%s, \"%s\" \"%s\" %u %u", + net, prefix, type, suffix, vvp_mangle_name(ivl_scope_basename(net)), vvp_mangle_name(ivl_scope_tname(net)), ivl_file_table_index(ivl_scope_file(net)), diff --git a/vvp/compile.cc b/vvp/compile.cc index 2191cd2a6..3bb461e0d 100644 --- a/vvp/compile.cc +++ b/vvp/compile.cc @@ -250,7 +250,7 @@ static const struct opcode_table_s opcode_table[] = { { "%release/wr", of_RELEASE_WR, 2,{OA_FUNC_PTR,OA_BIT1,OA_NONE} }, { "%replicate", of_REPLICATE, 1,{OA_NUMBER, OA_NONE,OA_NONE} }, { "%ret/real", of_RET_REAL, 1,{OA_NUMBER, OA_NONE,OA_NONE} }, - { "%ret/vec4", of_RET_VEC4, 1,{OA_NUMBER, OA_NONE,OA_NONE} }, + { "%ret/vec4", of_RET_VEC4, 3,{OA_NUMBER, OA_BIT1,OA_BIT2} }, { "%retload/real",of_RETLOAD_REAL,1,{OA_NUMBER, OA_NONE,OA_NONE} }, { "%retload/vec4",of_RETLOAD_VEC4,1,{OA_NUMBER, OA_NONE,OA_NONE} }, { "%scopy", of_SCOPY, 0, {OA_NONE, OA_NONE, OA_NONE} }, diff --git a/vvp/opcodes.txt b/vvp/opcodes.txt index 6403fc969..e7c1d50ae 100644 --- a/vvp/opcodes.txt +++ b/vvp/opcodes.txt @@ -1007,7 +1007,7 @@ See also the %concat instruction. * %ret/obj * %ret/real * %ret/str -* %ret/vec4 +* %ret/vec4 , , Write a value to the indexed function argument. The value is popped from the appropriate stack and written into the argument. The return @@ -1016,6 +1016,14 @@ for example to store the return value for a real function, use "%ret/real 0;". It is up to the function caller to set up the argument references. +The %ret/vec4 opcode works very much like the %store/vec4 opcode. The + and operands are the offset and width of the subvector of +the destination value that is written by the value popped from the +vec4 stack. Off the is zero, then the literal offset is +zero. If the is non-zero, then it selects an index register +that contains the actual offset. In this case, flag-4 is tested, and +if not 1, the assign is suppressed. + * %retload/vec4 Read a value from the indexed function argument. The value is read @@ -1137,7 +1145,9 @@ popped off the top of the stack and written to the variable. The value is then optionally truncated to bits and assigned to the variable. It is an error for the value to be fewer then bits. The is the index register that contains a part offset -for writing into a part of the variable. +for writing into a part of the variable. If the is 0, then +use the literal value 0 instead of getting an offset from index +register 0. The %store/vec4a is similar, but the target is an array of vec4, the is an index register that contains the canonical address, and diff --git a/vvp/vpi_priv.h b/vvp/vpi_priv.h index e09d762dc..027754aa4 100644 --- a/vvp/vpi_priv.h +++ b/vvp/vpi_priv.h @@ -282,17 +282,42 @@ class __vpiScope : public __vpiHandle { signed int time_precision :8; protected: - __vpiScope(const char*nam, const char*tnam); + __vpiScope(const char*nam, const char*tnam, bool is_auto_flag =false); private: /* The scope has a name. */ const char*name_; const char*tname_; - protected: /* the scope may be "automatic" */ bool is_automatic_; }; +class vpiScopeFunction : public __vpiScope { + public: + inline vpiScopeFunction(const char*nam, const char*tnam, + bool auto_flag, int func_type, unsigned func_wid) + : __vpiScope(nam,tnam, auto_flag), func_type_(func_type), func_wid_(func_wid) + { } + + int get_type_code(void) const { return vpiFunction; } + int vpi_get(int code) + { + switch (code) { + case vpiFuncType: + return func_type_; + default: + return __vpiScope::vpi_get(code); + } + } + + public: + inline unsigned get_func_width(void) const { return func_wid_; } + + private: + int func_type_; + unsigned func_wid_; +}; + extern __vpiScope* vpip_peek_current_scope(void); extern void vpip_attach_to_scope(__vpiScope*scope, vpiHandle obj); extern void vpip_attach_to_current_scope(vpiHandle obj); diff --git a/vvp/vpi_scope.cc b/vvp/vpi_scope.cc index ab4b56d75..2cc862a34 100644 --- a/vvp/vpi_scope.cc +++ b/vvp/vpi_scope.cc @@ -327,8 +327,8 @@ static vpiHandle module_iter(int code, vpiHandle obj) } -__vpiScope::__vpiScope(const char*nam, const char*tnam) -: is_automatic_(false) +__vpiScope::__vpiScope(const char*nam, const char*tnam, bool auto_flag) +: is_automatic_(auto_flag) { name_ = vpip_name_string(nam); tname_ = vpip_name_string(tnam? tnam : ""); @@ -376,7 +376,7 @@ vpiHandle __vpiScope::vpi_iterate(int code) class vpiScopeModule : public __vpiScope { public: inline vpiScopeModule(const char*nam, const char*tnam) - : __vpiScope(nam,tnam) { } + : __vpiScope(nam,tnam,false) { } int get_type_code(void) const { return vpiModule; } }; @@ -394,34 +394,20 @@ struct vpiScopeTask : public __vpiScope { struct vpiScopeTaskAuto : public __vpiScope { inline vpiScopeTaskAuto(const char*nam, const char*tnam) - : __vpiScope(nam,tnam) { is_automatic_=true; } + : __vpiScope(nam,tnam,true) { } int get_type_code(void) const { return vpiTask; } }; -class vpiScopeFunction : public __vpiScope { - public: - inline vpiScopeFunction(const char*nam, const char*tnam) - : __vpiScope(nam,tnam) { } - int get_type_code(void) const { return vpiFunction; } -}; - -class vpiScopeFunctionAuto : public __vpiScope { - public: - inline vpiScopeFunctionAuto(const char*nam, const char*tnam) - : __vpiScope(nam,tnam) { is_automatic_=true; } - int get_type_code(void) const { return vpiFunction; } -}; - struct vpiScopeBegin : public __vpiScope { inline vpiScopeBegin(const char*nam, const char*tnam) - : __vpiScope(nam,tnam) { } + : __vpiScope(nam,tnam,false) { } int get_type_code(void) const { return vpiNamedBegin; } }; class vpiScopeBeginAuto : public __vpiScope { public: inline vpiScopeBeginAuto(const char*nam, const char*tnam) - : __vpiScope(nam,tnam) { is_automatic_=true; } + : __vpiScope(nam,tnam,true) { } int get_type_code(void) const { return vpiNamedBegin; } }; @@ -433,14 +419,14 @@ struct vpiScopeGenerate : public __vpiScope { struct vpiScopeFork : public __vpiScope { inline vpiScopeFork(const char*nam, const char*tnam) - : __vpiScope(nam,tnam) { } + : __vpiScope(nam,tnam,false) { } int get_type_code(void) const { return vpiNamedFork; } }; class vpiScopeForkAuto : public __vpiScope { public: inline vpiScopeForkAuto(const char*nam, const char*tnam) - : __vpiScope(nam,tnam) { is_automatic_=true; } + : __vpiScope(nam,tnam,true) { } int get_type_code(void) const { return vpiNamedFork; } }; @@ -475,14 +461,54 @@ compile_scope_decl(char*label, char*type, char*name, char*tname, long def_file_idx, long def_lineno, long is_cell) { count_vpi_scopes += 1; + char sign_flag; + unsigned wid; __vpiScope*scope; if (strcmp(type,"module") == 0) { scope = new vpiScopeModule(name, tname); - } else if (strcmp(type,"function") == 0) { - scope = new vpiScopeFunction(name, tname); - } else if (strcmp(type,"autofunction") == 0) { - scope = new vpiScopeFunctionAuto(name, tname); + } else if ( sscanf(type, "function.vec4.%c%u", &sign_flag, &wid) == 2 ) { + int type_code; + if (sign_flag=='s') { + type_code = vpiSizedSignedFunc; + } else if (sign_flag=='u') { + type_code = vpiSizedFunc; + } else if (sign_flag=='i') { + type_code = vpiIntFunc; + } else { + assert(0); + type_code = vpiSizedFunc; + } + scope = new vpiScopeFunction(name, tname, false, type_code, wid); + + } else if ( sscanf(type, "autofunction.vec4.%c%u", &sign_flag, &wid) == 2 ) { + int type_code; + switch (sign_flag) { + case 's': + type_code = vpiSizedSignedFunc; + break; + case 'u': + type_code = vpiSizedFunc; + break; + default: + assert(0); + type_code = vpiSizedFunc; + break; + } + scope = new vpiScopeFunction(name, tname, true, type_code, wid); + + } else if (strcmp(type,"function.obj") == 0) { + scope = new vpiScopeFunction(name, tname, false, vpiSizedFunc, 0); + } else if (strcmp(type,"autofunction.obj") == 0) { + scope = new vpiScopeFunction(name, tname, true, vpiSizedFunc, 0); + } else if (strcmp(type,"function.real") == 0) { + scope = new vpiScopeFunction(name, tname, false, vpiRealFunc, 0); + } else if (strcmp(type,"autofunction.real") == 0) { + scope = new vpiScopeFunction(name, tname, true, vpiRealFunc, 0); + } else if (strcmp(type,"function.str") == 0) { + scope = new vpiScopeFunction(name, tname, false, vpiSizedFunc, 0); + } else if (strcmp(type,"autofunction.str") == 0) { + scope = new vpiScopeFunction(name, tname, true, vpiSizedFunc, 0); } else if (strcmp(type,"task") == 0) { scope = new vpiScopeTask(name, tname); } else if (strcmp(type,"autotask") == 0) { diff --git a/vvp/vthread.cc b/vvp/vthread.cc index 4725d8b0b..0bc396471 100644 --- a/vvp/vthread.cc +++ b/vvp/vthread.cc @@ -1356,9 +1356,12 @@ bool of_CALLF_VEC4(vthread_t thr, vvp_code_t cp) { vthread_t child = vthread_new(cp->cptr2, cp->scope); + vpiScopeFunction*scope_func = dynamic_cast(cp->scope); + assert(scope_func); + // This is the return value. Push a place-holder value. The function // will replace this with the actual value using a %ret/real instruction. - thr->push_vec4(vvp_vector4_t()); + thr->push_vec4(vvp_vector4_t(scope_func->get_func_width())); child->args_vec4.push_back(0); return do_callf_void(thr, child); @@ -4976,18 +4979,68 @@ bool of_RET_REAL(vthread_t thr, vvp_code_t cp) } /* - * %ret/vec4 + * %ret/vec4 , , */ bool of_RET_VEC4(vthread_t thr, vvp_code_t cp) { size_t index = cp->number; - vvp_vector4_t val = thr->pop_vec4(); + unsigned off_index = cp->bit_idx[0]; + int wid = cp->bit_idx[1]; assert(index >= 0 && index < thr->args_vec4.size()); unsigned depth = thr->args_vec4[index]; - // Use the depth to put the value into the stack of - // the parent thread. - thr->parent->poke_vec4(depth, val); + + int off = off_index? thr->words[off_index].w_int : 0; + const int sig_value_size = thr->parent->peek_vec4(depth).size(); + + vvp_vector4_t&val = thr->peek_vec4(); + unsigned val_size = val.size(); + + if (off_index!=0 && thr->flags[4] == BIT4_1) { + thr->pop_vec4(1); + return true; + } + + if (off <= -wid) { + thr->pop_vec4(1); + return true; + } + + if (off >= sig_value_size) { + thr->pop_vec4(1); + return true; + } + + // IF the index is below the vector, then only assign the high + // bits that overlap with the target + if (off < 0) { + int use_off = -off; + wid -= use_off; + val = val.subvalue(use_off, wid); + val_size = wid; + off = 0; + } + + // If the value is partly above the taret, then only assign + // the bits that overlap + if ((off+wid) > sig_value_size) { + wid = sig_value_size - off; + val = val.subvalue(0, wid); + val.resize(wid); + val_size = wid; + } + + if (off==0 && val_size==(unsigned)sig_value_size) { + thr->parent->poke_vec4(depth, val); + + } else { + vvp_vector4_t tmp_dst = thr->parent->peek_vec4(depth); + assert(wid>=0 && val.size() == (unsigned)wid); + tmp_dst.set_vec(off, val); + thr->parent->poke_vec4(depth, tmp_dst); + } + + thr->pop_vec4(1); return true; } @@ -6033,9 +6086,12 @@ bool of_EXEC_UFUNC_VEC4(vthread_t thr, vvp_code_t cp) __vpiScope*child_scope = cp->ufunc_core_ptr->func_scope(); assert(child_scope); + vpiScopeFunction*scope_func = dynamic_cast(child_scope); + assert(scope_func); + /* Create a temporary thread and run it immediately. */ vthread_t child = vthread_new(cp->cptr, child_scope); - thr->push_vec4(vvp_vector4_t()); + thr->push_vec4(vvp_vector4_t(scope_func->get_func_width())); child->args_vec4.push_back(0); return do_exec_ufunc(thr, cp, child);