From 5ef077fdf6a27dbf729dadf789024b8d0148b054 Mon Sep 17 00:00:00 2001 From: Stephen Williams Date: Fri, 27 Dec 2013 17:04:42 +0200 Subject: [PATCH] Start work on converting vec4 expressions to use stack. Instead of using a bit4 space to hold thread vectors, create a vec4 stack--much like the real, string, and object stacks--to hold intermediate values. --- tgt-vvp/Makefile.in | 1 + tgt-vvp/eval_expr.c | 46 +- tgt-vvp/eval_vec4.c | 498 ++++++++++++++++++ tgt-vvp/stmt_assign.c | 35 +- tgt-vvp/vvp.c | 26 + tgt-vvp/vvp_priv.h | 12 + tgt-vvp/vvp_process.c | 192 ++++--- vvp/codes.h | 29 +- vvp/compile.cc | 45 +- vvp/opcodes.txt | 210 ++++++-- vvp/vthread.cc | 1169 +++++++++++++++++++++++++++++++---------- 11 files changed, 1802 insertions(+), 461 deletions(-) create mode 100644 tgt-vvp/eval_vec4.c diff --git a/tgt-vvp/Makefile.in b/tgt-vvp/Makefile.in index 0aee3a915..0a793a29c 100644 --- a/tgt-vvp/Makefile.in +++ b/tgt-vvp/Makefile.in @@ -50,6 +50,7 @@ LDFLAGS = @LDFLAGS@ O = vvp.o draw_class.o draw_enum.o draw_mux.o draw_net_input.o \ draw_switch.o draw_ufunc.o draw_vpi.o \ eval_bool.o eval_expr.o eval_object.o eval_real.o eval_string.o \ + eval_vec4.o \ modpath.o stmt_assign.o vector.o \ vvp_process.o vvp_scope.o diff --git a/tgt-vvp/eval_expr.c b/tgt-vvp/eval_expr.c index c315039bc..a9d15198f 100644 --- a/tgt-vvp/eval_expr.c +++ b/tgt-vvp/eval_expr.c @@ -189,7 +189,7 @@ uint64_t get_number_immediate64(ivl_expr_t expr) return imm; } -static void eval_logic_into_integer(ivl_expr_t expr, unsigned ix) +void eval_logic_into_integer(ivl_expr_t expr, unsigned ix) { switch (ivl_expr_type(expr)) { @@ -200,7 +200,7 @@ static void eval_logic_into_integer(ivl_expr_t expr, unsigned ix) if (number_is_unknown(expr)) { /* We are loading a 'bx so mimic %ix/get. */ fprintf(vvp_out, " %%ix/load %u, 0, 0;\n", ix); - fprintf(vvp_out, " %%mov 4, 1, 1;\n"); + fprintf(vvp_out, " %%flag_set/imm 4, 1;\n"); break; } long imm = get_number_immediate(expr); @@ -210,11 +210,14 @@ static void eval_logic_into_integer(ivl_expr_t expr, unsigned ix) fprintf(vvp_out, " %%ix/load %u, 0, 0; loading %ld\n", ix, imm); fprintf(vvp_out, " %%ix/sub %u, %ld, 0;\n", ix, -imm); } - /* This can not have have a X/Z value so clear bit 4. */ - fprintf(vvp_out, " %%mov 4, 0, 1;\n"); + /* This can not have have a X/Z value so clear flag 4. */ + fprintf(vvp_out, " %%flag_set/imm 4, 0;\n"); } break; + /* Special case: There is an %ix instruction for + reading index values directly from variables. In + this case, try to use that special instruction. */ case IVL_EX_SIGNAL: { ivl_signal_t sig = ivl_expr_signal(expr); @@ -227,11 +230,8 @@ static void eval_logic_into_integer(ivl_expr_t expr, unsigned ix) variable array. In this case, the ix/getv will not work, so do it the hard way. */ if (ivl_signal_type(sig) == IVL_SIT_REG) { - struct vector_info rv; - rv = draw_eval_expr(expr, 0); - fprintf(vvp_out, " %%ix/get%s %u, %u, %u;\n", - type, ix, rv.base, rv.wid); - clr_vector(rv); + draw_eval_vec4(expr, 0); + fprintf(vvp_out, " %%ix/vec4%s %u;\n", type, ix); break; } @@ -240,11 +240,8 @@ static void eval_logic_into_integer(ivl_expr_t expr, unsigned ix) assert(! number_is_unknown(ixe)); word = get_number_immediate(ixe); } else { - struct vector_info rv; - rv = draw_eval_expr(expr, 0); - fprintf(vvp_out, " %%ix/get%s %u, %u, %u;\n", - type, ix, rv.base, rv.wid); - clr_vector(rv); + draw_eval_vec4(expr, 0); + fprintf(vvp_out, " %%ix/vec4%s %u;\n", type, ix); break; } } @@ -254,20 +251,15 @@ static void eval_logic_into_integer(ivl_expr_t expr, unsigned ix) break; } - default: { - struct vector_info rv; - rv = draw_eval_expr(expr, 0); - /* Is this a signed expression? */ - if (ivl_expr_signed(expr)) { - fprintf(vvp_out, " %%ix/get/s %u, %u, %u;\n", - ix, rv.base, rv.wid); - } else { - fprintf(vvp_out, " %%ix/get %u, %u, %u;\n", - ix, rv.base, rv.wid); - } - clr_vector(rv); - break; + default: + draw_eval_vec4(expr, 0); + /* Is this a signed expression? */ + if (ivl_expr_signed(expr)) { + fprintf(vvp_out, " %%ix/vec4/s %u;\n", ix); + } else { + fprintf(vvp_out, " %%ix/vec4 %u;\n", ix); } + break; } } diff --git a/tgt-vvp/eval_vec4.c b/tgt-vvp/eval_vec4.c new file mode 100644 index 000000000..7781625b4 --- /dev/null +++ b/tgt-vvp/eval_vec4.c @@ -0,0 +1,498 @@ +/* + * Copyright (c) 2013 Stephen Williams (steve@icarus.com) + * + * This source code is free software; you can redistribute it + * and/or modify it in source code form under the terms of the GNU + * General Public License as published by the Free Software + * Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +/* + * This file includes functions for evaluating VECTOR expressions. + */ +# include "vvp_priv.h" +# include +# include +# include +# include +# include + +static void draw_binary_vec4_arith(ivl_expr_t expr, int stuff_ok_flag) +{ + draw_eval_vec4(ivl_expr_oper1(expr), stuff_ok_flag); + draw_eval_vec4(ivl_expr_oper2(expr), stuff_ok_flag); + + switch (ivl_expr_opcode(expr)) { + case '+': + fprintf(vvp_out, " %%add;\n"); + break; + case '-': + fprintf(vvp_out, " %%sub;\n"); + break; + case '*': + fprintf(vvp_out, " %%mul;\n"); + break; + default: + assert(0); + break; + } +} + +static void draw_binary_vec4_bitwise(ivl_expr_t expr, int stuff_ok_flag) +{ + draw_eval_vec4(ivl_expr_oper1(expr), stuff_ok_flag); + draw_eval_vec4(ivl_expr_oper2(expr), stuff_ok_flag); + + switch (ivl_expr_opcode(expr)) { + case '&': + fprintf(vvp_out, " %%and;\n"); + break; + case '|': + fprintf(vvp_out, " %%or;\n"); + break; + default: + assert(0); + break; + } +} + +static void draw_binary_vec4_compare_real(ivl_expr_t expr) +{ + draw_eval_real(ivl_expr_oper1(expr)); + draw_eval_real(ivl_expr_oper2(expr)); + + switch (ivl_expr_opcode(expr)) { + case 'e': /* == */ + fprintf(vvp_out, " %%cmp/wr;\n"); + fprintf(vvp_out, " %%flag_get/vec4 4;\n"); + break; + case 'n': /* != */ + fprintf(vvp_out, " %%cmp/wr;\n"); + fprintf(vvp_out, " %%flag_get/vec4 4;\n"); + fprintf(vvp_out, " %%inv;\n"); + break; + default: + assert(0); + } +} + +static void draw_binary_vec4_compare(ivl_expr_t expr, int stuff_ok_flag) +{ + ivl_expr_t le = ivl_expr_oper1(expr); + ivl_expr_t re = ivl_expr_oper2(expr); + + if ((ivl_expr_value(le) == IVL_VT_REAL) + || (ivl_expr_value(re) == IVL_VT_REAL)) { + draw_binary_vec4_compare_real(expr); + return; + } + + draw_eval_vec4(le, stuff_ok_flag); + draw_eval_vec4(re, stuff_ok_flag); + + switch (ivl_expr_opcode(expr)) { + case 'e': /* == */ + fprintf(vvp_out, " %%cmp/u;\n"); + fprintf(vvp_out, " %%flag_get/vec4 4;\n"); + break; + case 'n': /* != */ + fprintf(vvp_out, " %%cmp/u;\n"); + fprintf(vvp_out, " %%flag_get/vec4 4;\n"); + fprintf(vvp_out, " %%inv;\n"); + break; + case 'E': /* === */ + fprintf(vvp_out, " %%cmp/u;\n"); + fprintf(vvp_out, " %%flag_get/vec4 6;\n"); + break; + case 'N': /* !== */ + fprintf(vvp_out, " %%cmp/u;\n"); + fprintf(vvp_out, " %%flag_get/vec4 6;\n"); + fprintf(vvp_out, " %%inv;\n"); + break; + default: + assert(0); + } +} + +static void draw_binary_vec4_le_real(ivl_expr_t expr) +{ + ivl_expr_t le = ivl_expr_oper1(expr); + ivl_expr_t re = ivl_expr_oper2(expr); + + switch (ivl_expr_opcode(expr)) { + case '<': + draw_eval_real(le); + draw_eval_real(re); + fprintf(vvp_out, " %%cmp/wr;\n"); + fprintf(vvp_out, " %%flag_get/vec4 5;\n"); + break; + + case 'L': /* <= */ + draw_eval_real(le); + draw_eval_real(re); + fprintf(vvp_out, " %%cmp/wr;\n"); + fprintf(vvp_out, " %%flag_get/vec4 4;\n"); + fprintf(vvp_out, " %%flag_get/vec4 5;\n"); + fprintf(vvp_out, " %%or;\n"); + break; + + case '>': + draw_eval_real(re); + draw_eval_real(le); + fprintf(vvp_out, " %%cmp/wr;\n"); + fprintf(vvp_out, " %%flag_get/vec4 5;\n"); + break; + + case 'G': /* >= */ + draw_eval_real(re); + draw_eval_real(le); + fprintf(vvp_out, " %%cmp/wr;\n"); + fprintf(vvp_out, " %%flag_get/vec4 4;\n"); + fprintf(vvp_out, " %%flag_get/vec4 5;\n"); + fprintf(vvp_out, " %%or;\n"); + break; + + default: + assert(0); + break; + } +} + +static void draw_binary_vec4_le(ivl_expr_t expr, int stuff_ok_flag) +{ + ivl_expr_t le = ivl_expr_oper1(expr); + ivl_expr_t re = ivl_expr_oper2(expr); + ivl_expr_t tmp; + + if ((ivl_expr_value(le) == IVL_VT_REAL) + || (ivl_expr_value(re) == IVL_VT_REAL)) { + draw_binary_vec4_le_real(expr); + return; + } + + char use_opcode = ivl_expr_opcode(expr); + char s_flag = (ivl_expr_signed(le) && ivl_expr_signed(re)) ? 's' : 'u'; + + /* If this is a > or >=, then convert it to < or <= by + swapping the operands. Adjust the opcode to match. */ + switch (use_opcode) { + case 'G': + tmp = le; + le = re; + re = tmp; + use_opcode = 'L'; + break; + case '>': + tmp = le; + le = re; + re = tmp; + use_opcode = '<'; + break; + } + + draw_eval_vec4(le, stuff_ok_flag); + draw_eval_vec4(re, stuff_ok_flag); + + switch (use_opcode) { + case 'L': + fprintf(vvp_out, " %%cmp/%c;\n", s_flag); + fprintf(vvp_out, " %%flag_get/vec4 4;\n"); + fprintf(vvp_out, " %%flag_get/vec4 5;\n"); + fprintf(vvp_out, " %%or;\n"); + break; + case '<': + fprintf(vvp_out, " %%cmp/%c;\n", s_flag); + fprintf(vvp_out, " %%flag_get/vec4 5;\n"); + break; + default: + assert(0); + break; + } +} + +static void draw_binary_vec4_lor(ivl_expr_t expr, int stuff_ok_flag) +{ + ivl_expr_t le = ivl_expr_oper1(expr); + ivl_expr_t re = ivl_expr_oper2(expr); + + /* Push the left expression. Reduce it to a single bit if + necessary. */ + draw_eval_vec4(le, STUFF_OK_XZ); + if (ivl_expr_width(le) > 1) + fprintf(vvp_out, " %%or/r;\n"); + + /* Now push the right expression. Again, reduce to a single + bit if necessasry. */ + draw_eval_vec4(re, STUFF_OK_XZ); + if (ivl_expr_width(re) > 1) + fprintf(vvp_out, " %%or/r;\n"); + + fprintf(vvp_out, " %%or;\n"); + + if (ivl_expr_width(expr) > 1) + fprintf(vvp_out, " %%pad/u %u;\n", ivl_expr_width(expr)); +} + +static void draw_binary_vec4_lrs(ivl_expr_t expr, int stuff_ok_flag) +{ + ivl_expr_t le = ivl_expr_oper1(expr); + ivl_expr_t re = ivl_expr_oper2(expr); + + // Push the left expression onto the stack. + draw_eval_vec4(le, stuff_ok_flag); + + // Calculate the shift amount into an index register. + int use_index_reg = allocate_word(); + assert(use_index_reg >= 0); + draw_eval_expr_into_integer(re, use_index_reg); + + // Emit the actual shift instruction. This will pop the top of + // the stack and replace it with the result of the shift. + switch (ivl_expr_opcode(expr)) { + case 'l': /* << */ + fprintf(vvp_out, " %%shiftl %u;\n", use_index_reg); + break; + case 'r': /* >> */ + fprintf(vvp_out, " %%shiftr %u;\n", use_index_reg); + break; + case 'R': /* >>> */ + fprintf(vvp_out, " %%shiftrs %u;\n", use_index_reg); + break; + default: + assert(0); + break; + } + + clr_word(use_index_reg); +} + +static void draw_binary_vec4(ivl_expr_t expr, int stuff_ok_flag) +{ + switch (ivl_expr_opcode(expr)) { + case '+': + case '-': + case '*': + draw_binary_vec4_arith(expr, stuff_ok_flag); + break; + + case '&': + case '|': + draw_binary_vec4_bitwise(expr, stuff_ok_flag); + break; + + case 'e': /* == */ + case 'E': /* === */ + case 'n': /* !== */ + case 'N': /* !== */ + draw_binary_vec4_compare(expr, stuff_ok_flag); + break; + + case 'G': /* >= */ + case 'L': /* <= */ + case '>': + case '<': + draw_binary_vec4_le(expr, stuff_ok_flag); + break; + + case 'l': /* << */ + case 'r': /* >> */ + case 'R': /* >>> */ + draw_binary_vec4_lrs(expr, stuff_ok_flag); + break; + + case 'o': /* || (logical or) */ + draw_binary_vec4_lor(expr, stuff_ok_flag); + break; + + default: + fprintf(stderr, "vvp.tgt error: unsupported binary (%c)\n", + ivl_expr_opcode(expr)); + assert(0); + } +} + +static void draw_number_vec4(ivl_expr_t expr) +{ + unsigned long val0 = 0; + unsigned long valx = 0; + unsigned wid = ivl_expr_width(expr); + const char*bits = ivl_expr_bits(expr); + + int idx; + + assert(wid <= 64); + + for (idx = 0 ; idx < wid ; idx += 1) { + val0 <<= 1; + valx <<= 1; + switch (bits[wid-idx-1]) { + case '0': + break; + case '1': + val0 |= 1; + break; + case 'x': + val0 |= 1; + valx |= 1; + break; + case 'z': + val0 |= 0; + valx |= 1; + break; + default: + assert(0); + break; + } + } + fprintf(vvp_out, " %%pushi/vec4 %lu, %lu, %u;\n", val0, valx, wid); +} + +static void draw_select_vec4(ivl_expr_t expr) +{ + // This is the sub-expression to part-select. + ivl_expr_t subexpr = ivl_expr_oper1(expr); + // This is the base of the part select + ivl_expr_t base = ivl_expr_oper2(expr); + // This is the part select width + unsigned wid = ivl_expr_width(expr); + + draw_eval_vec4(subexpr, 0); + draw_eval_vec4(base, 0); + fprintf(vvp_out, " %%part %u;\n", wid); +} + +static void draw_select_pad_vec4(ivl_expr_t expr, int stuff_ok_flag) +{ + // This is the sub-expression to pad/truncate + ivl_expr_t subexpr = ivl_expr_oper1(expr); + // This is the target width of the expression + unsigned wid = ivl_expr_width(expr); + + // Push the sub-expression onto the stack. + draw_eval_vec4(subexpr, stuff_ok_flag); + + // Special case: The expression is already the correct width, + // so there is nothing to be done. + if (wid == ivl_expr_width(subexpr)) + return; + + if (ivl_expr_signed(expr)) + fprintf(vvp_out, " %%pad/s %u;\n", wid); + else + fprintf(vvp_out, " %%pad/u %u;\n", wid); +} + +static void draw_signal_vec4(ivl_expr_t expr) +{ + ivl_signal_t sig = ivl_expr_signal(expr); + + assert(ivl_signal_dimensions(sig) == 0); + fprintf(vvp_out, " %%load/vec4 v%p_0;\n", sig); +} + +static void draw_ternary_vec4(ivl_expr_t expr, int stuff_ok_flag) +{ + ivl_expr_t cond = ivl_expr_oper1(expr); + ivl_expr_t true_ex = ivl_expr_oper2(expr); + ivl_expr_t false_ex = ivl_expr_oper3(expr); + + unsigned lab_true = local_count++; + unsigned lab_out = local_count++; + + int use_flag = allocate_flag(); + + /* Evaluate the condition expression, including optionally + reducing it to a single bit. Put the result into a flag bit + for use by all the tests. */ + draw_eval_vec4(cond, STUFF_OK_XZ); + if (ivl_expr_width(cond) > 1) + fprintf(vvp_out, " %%or/r;\n"); + fprintf(vvp_out, " %%flag_set/vec4 %d;\n", use_flag); + + fprintf(vvp_out, " %%jmp/0 T_%u.%u, %d;\n", thread_count, lab_true, use_flag); + + /* If the condition is true or xz (not false), we need the true + expression. If the condition is true, then we ONLY need the + true expression. */ + draw_eval_vec4(true_ex, stuff_ok_flag); + fprintf(vvp_out, " %%jmp/1 T_%u.%u, %d;\n", thread_count, lab_out, use_flag); + fprintf(vvp_out, "T_%u.%u ; End of true expr.\n", thread_count, lab_true); + + /* If the condition is false or xz (not true), we need the false + expression. If the condition is false, then we ONLY need + the false expression. */ + draw_eval_vec4(false_ex, stuff_ok_flag); + fprintf(vvp_out, " %%jmp/0 T_%u.%u, %d;\n", thread_count, lab_out, use_flag); + fprintf(vvp_out, " ; End of false expr.\n"); + + /* Here, the condition is not true or false, it is xz. Both + the true and false expressions have been pushed onto the + stack, we just need to blend the bits. */ + fprintf(vvp_out, " %%blend;\n"); + fprintf(vvp_out, "T_%u.%u;\n", thread_count, lab_out); + + clr_flag(use_flag); +} + +static void draw_unary_vec4(ivl_expr_t expr, int stuff_ok_flag) +{ + ivl_expr_t sub = ivl_expr_oper1(expr); + + switch (ivl_expr_opcode(expr)) { + case '~': + draw_eval_vec4(sub, stuff_ok_flag); + fprintf(vvp_out, " %%inv;\n"); + break; + default: + fprintf(stderr, "XXXX Unary operator %c no implemented\n", ivl_expr_opcode(expr)); + break; + } +} + +void draw_eval_vec4(ivl_expr_t expr, int stuff_ok_flag) +{ + switch (ivl_expr_type(expr)) { + case IVL_EX_BINARY: + draw_binary_vec4(expr, stuff_ok_flag); + return; + + case IVL_EX_NUMBER: + draw_number_vec4(expr); + return; + + case IVL_EX_SELECT: + if (ivl_expr_oper2(expr)==0) + draw_select_pad_vec4(expr, stuff_ok_flag); + else + draw_select_vec4(expr); + return; + + case IVL_EX_SIGNAL: + draw_signal_vec4(expr); + return; + + case IVL_EX_TERNARY: + draw_ternary_vec4(expr, stuff_ok_flag); + return; + + case IVL_EX_UNARY: + draw_unary_vec4(expr, stuff_ok_flag); + return; + + default: + break; + } + + fprintf(stderr, "XXXX Evaluate VEC4 expression (%d)\n", ivl_expr_type(expr)); + fprintf(vvp_out, "; XXXX Evaluate VEC4 expression (%d)\n", ivl_expr_type(expr)); +} diff --git a/tgt-vvp/stmt_assign.c b/tgt-vvp/stmt_assign.c index 4201778aa..db4b08638 100644 --- a/tgt-vvp/stmt_assign.c +++ b/tgt-vvp/stmt_assign.c @@ -339,6 +339,7 @@ static ivl_type_t draw_lval_expr(ivl_lval_t lval) return ivl_type_prop_type(sub_type, ivl_lval_property_idx(lval)); } +#if 0 static void set_vec_to_lval_slice_nest(ivl_lval_t lval, unsigned bit, unsigned wid) { ivl_lval_t lval_nest = ivl_lval_nest(lval); @@ -349,7 +350,9 @@ static void set_vec_to_lval_slice_nest(ivl_lval_t lval, unsigned bit, unsigned w ivl_lval_property_idx(lval), bit, wid); fprintf(vvp_out, " %%pop/obj 1, 0;\n"); } +#endif +#if 0 static void set_vec_to_lval_slice(ivl_lval_t lval, unsigned bit, unsigned wid) { ivl_signal_t sig = ivl_lval_sig(lval); @@ -507,8 +510,8 @@ static void set_vec_to_lval_slice(ivl_lval_t lval, unsigned bit, unsigned wid) } } - - +#endif +#if 0 /* * This is a private function to generate %set code for the * statement. At this point, the r-value is evaluated and stored in @@ -542,6 +545,24 @@ static void set_vec_to_lval(ivl_statement_t net, struct vector_info res) cur_rbit += bit_limit; } } +#endif + +/* + * Store a vector from the vec4 stack to the statement l-values. This + * all assumes that the value to be assigned is already on the top of + * the stack. + */ +static void store_vec4_to_lval(ivl_statement_t net) +{ + assert(ivl_stmt_lvals(net) == 1); + + ivl_lval_t lval = ivl_stmt_lval(net,0); + ivl_signal_t lsig = ivl_lval_sig(lval); + + assert(ivl_lval_width(lval) == ivl_signal_width(lsig)); + + fprintf(vvp_out, " %%store/vec4 v%p_0, %u;\n", lsig, ivl_signal_width(lsig)); +} static int show_stmt_assign_vector(ivl_statement_t net) { @@ -554,7 +575,7 @@ static int show_stmt_assign_vector(ivl_statement_t net) of the l-value. We need these values as part of the r-value calculation. */ if (ivl_stmt_opcode(net) != 0) { - slices = calloc(ivl_stmt_lvals(net), sizeof(struct vec_slice_info)); + slices = calloc(ivl_stmt_lvals(net), sizeof(struct vec_slice_info)); lres = get_vec_from_lval(net, slices); } @@ -563,7 +584,7 @@ static int show_stmt_assign_vector(ivl_statement_t net) result to a vector. Then store that vector into the l-value. */ if (ivl_expr_value(rval) == IVL_VT_REAL) { - draw_eval_real(rval); + draw_eval_real(rval); /* This is the accumulated with of the l-value of the assignment. */ unsigned wid = ivl_stmt_lwidth(net); @@ -582,12 +603,14 @@ static int show_stmt_assign_vector(ivl_statement_t net) fprintf(vvp_out, " %%cvt/vr %u, %u;\n", res.base, res.wid); } else { - res = draw_eval_expr(rval, 0); + draw_eval_vec4(rval, 0); + res.base = 0; // XXXX This is just to suppress the clr_vector below. + res.wid = 0; } switch (ivl_stmt_opcode(net)) { case 0: - set_vec_to_lval(net, res); + store_vec4_to_lval(net); break; case '+': diff --git a/tgt-vvp/vvp.c b/tgt-vvp/vvp.c index c7af7e4ce..fd32dd9a5 100644 --- a/tgt-vvp/vvp.c +++ b/tgt-vvp/vvp.c @@ -48,6 +48,8 @@ FILE*vvp_out = 0; int vvp_errors = 0; unsigned show_file_line = 0; +static uint32_t allocate_flag_mask = 0x00ff; + __inline__ static void draw_execute_header(ivl_design_t des) { const char*cp = ivl_design_flag(des, "VVP_EXECUTABLE"); @@ -85,6 +87,30 @@ __inline__ static void draw_module_declarations(ivl_design_t des) } } +int allocate_flag(void) +{ + int idx; + for (idx = 0 ; idx < 8*sizeof(allocate_flag_mask) ; idx += 1) { + uint32_t mask = 1 << idx; + if (allocate_flag_mask & mask) + continue; + + allocate_flag_mask |= mask; + return idx; + } + + return -1; +} + +void clr_flag(int idx) +{ + assert(idx < 8*sizeof(allocate_flag_mask)); + uint32_t mask = 1 << idx; + + assert(allocate_flag_mask & mask); + + allocate_flag_mask &= ~mask; +} int target_design(ivl_design_t des) diff --git a/tgt-vvp/vvp_priv.h b/tgt-vvp/vvp_priv.h index 40d8d87dc..957288e16 100644 --- a/tgt-vvp/vvp_priv.h +++ b/tgt-vvp/vvp_priv.h @@ -306,6 +306,12 @@ extern int number_is_immediate(ivl_expr_t ex, unsigned lim_wid, int negative_is_ extern long get_number_immediate(ivl_expr_t ex); extern uint64_t get_number_immediate64(ivl_expr_t ex); +/* + * draw_eval_vec4 evaluates vec4 expressions. The result of the + * evaluation is the vec4 result in the top of the vec4 expression stack. + */ +extern void draw_eval_vec4(ivl_expr_t ex, int stuff_ok_flag); + /* * draw_eval_real evaluates real value expressions. The result of the * evaluation is the real result in the top of the real expression stack. @@ -342,6 +348,12 @@ extern void show_stmt_file_line(ivl_statement_t net, const char*desc); extern int allocate_word(void); extern void clr_word(int idx); +/* + * These functions manage flag bit allocation. + */ +extern int allocate_flag(void); +extern void clr_flag(int idx); + /* * These are used to count labels as I generate code. */ diff --git a/tgt-vvp/vvp_process.c b/tgt-vvp/vvp_process.c index f9aca0528..f4808f669 100644 --- a/tgt-vvp/vvp_process.c +++ b/tgt-vvp/vvp_process.c @@ -209,9 +209,9 @@ static void assign_to_array_word(ivl_signal_t lsig, ivl_expr_t word_ix, clear_expression_lookaside(); } -static void assign_to_lvector(ivl_lval_t lval, unsigned bit, +static void assign_to_lvector(ivl_lval_t lval, uint64_t delay, ivl_expr_t dexp, - unsigned width, unsigned nevents) + unsigned nevents) { ivl_signal_t sig = ivl_lval_sig(lval); ivl_expr_t part_off_ex = ivl_lval_part_off(lval); @@ -221,9 +221,13 @@ static void assign_to_lvector(ivl_lval_t lval, unsigned bit, const unsigned long use_word = 0; if (ivl_signal_dimensions(sig) > 0) { +#if 0 assert(word_ix); assign_to_array_word(sig, word_ix, bit, delay, dexp, part_off_ex, width, nevents); +#else + fprintf(stderr, "XXXX %%assign to array word not supported yet.\n"); +#endif return; } @@ -247,9 +251,13 @@ static void assign_to_lvector(ivl_lval_t lval, unsigned bit, draw_eval_expr_into_integer(part_off_ex, 1); /* If the index expression has XZ bits, skip the assign. */ fprintf(vvp_out, " %%jmp/1 t_%u, 4;\n", skip_assign); +#if 0 fprintf(vvp_out, " %%ix/load 0, %u, 0;\n", width); fprintf(vvp_out, " %%assign/v0/x1/d v%p_%lu, %d, %u;\n", sig, use_word, delay_index, bit); +#else + assert(0); // XXXX +#endif fprintf(vvp_out, "t_%u ;\n", skip_assign); clr_word(delay_index); } else if (nevents != 0) { @@ -257,9 +265,13 @@ static void assign_to_lvector(ivl_lval_t lval, unsigned bit, draw_eval_expr_into_integer(part_off_ex, 1); /* If the index expression has XZ bits, skip the assign. */ fprintf(vvp_out, " %%jmp/1 t_%u, 4;\n", skip_assign); +#if 0 fprintf(vvp_out, " %%ix/load 0, %u, 0;\n", width); fprintf(vvp_out, " %%assign/v0/x1/e v%p_%lu, %u;\n", sig, use_word, bit); +#else + assert(0); // XXXX +#endif fprintf(vvp_out, "t_%u ;\n", skip_assign); fprintf(vvp_out, " %%evctl/c;\n"); } else { @@ -267,6 +279,7 @@ static void assign_to_lvector(ivl_lval_t lval, unsigned bit, draw_eval_expr_into_integer(part_off_ex, 1); /* If the index expression has XZ bits, skip the assign. */ fprintf(vvp_out, " %%jmp/1 t_%u, 4;\n", skip_assign); +#if 0 fprintf(vvp_out, " %%ix/load 0, %u, 0;\n", width); /* * The %assign can only take a 32 bit delay. For a larger @@ -285,10 +298,14 @@ static void assign_to_lvector(ivl_lval_t lval, unsigned bit, " %%assign/v0/x1 v%p_%lu, %lu, %u;\n", sig, use_word, low_d, bit); } +#else + assert(0); // XXXX +#endif fprintf(vvp_out, "t_%u ;\n", skip_assign); } } else if (part_off>0 || ivl_lval_width(lval)!=ivl_signal_width(sig)) { +#if 0 /* There is no mux expression, but a constant part offset. Load that into index x1 and generate a single-bit set instruction. */ @@ -331,23 +348,41 @@ static void assign_to_lvector(ivl_lval_t lval, unsigned bit, sig, use_word, low_d, bit); } } +#else + if (dexp != 0) { + assert(0); // XXXX + + } else if (nevents != 0) { + assert(0); // XXXX + + } else { + int offset_index = allocate_word(); + int delay_index = allocate_word(); + fprintf(vvp_out, " %%ix/load %d, %lu, 0;\n", offset_index, part_off); + if (dexp) + draw_eval_expr_into_integer(dexp,delay_index); + else + fprintf(vvp_out, " %%ix/load %d, %lu, %lu;\n", + delay_index, low_d, hig_d); + fprintf(vvp_out, " %%assign/vec4/off/d v%p_%lu, %d, %d;\n", + sig, use_word, offset_index, delay_index); + clr_word(offset_index); + clr_word(delay_index); + } +#endif } else if (dexp != 0) { /* Calculated delay... */ int delay_index = allocate_word(); draw_eval_expr_into_integer(dexp, delay_index); - fprintf(vvp_out, " %%ix/load 0, %u, 0;\n", width); - fprintf(vvp_out, " %%assign/v0/d v%p_%lu, %d, %u;\n", - sig, use_word, delay_index, bit); + fprintf(vvp_out, " %%assign/vec4/d v%p_%lu, %d;\n", + sig, use_word, delay_index); clr_word(delay_index); } else if (nevents != 0) { /* Event control delay... */ - fprintf(vvp_out, " %%ix/load 0, %u, 0;\n", width); - fprintf(vvp_out, " %%assign/v0/e v%p_%lu, %u;\n", - sig, use_word, bit); + fprintf(vvp_out, " %%assign/vec4/e v%p_%lu;\n", + sig, use_word); } else { - /* Constant delay... */ - fprintf(vvp_out, " %%ix/load 0, %u, 0;\n", width); /* * The %assign can only take a 32 bit delay. For a larger * delay we need to put it into an index register. @@ -356,12 +391,12 @@ static void assign_to_lvector(ivl_lval_t lval, unsigned bit, int delay_index = allocate_word(); fprintf(vvp_out, " %%ix/load %d, %lu, %lu;\n", delay_index, low_d, hig_d); - fprintf(vvp_out, " %%assign/v0/d v%p_%lu, %d, %u;\n", - sig, use_word, delay_index, bit); + fprintf(vvp_out, " %%assign/vec4/d v%p_%lu, %d;\n", + sig, use_word, delay_index); clr_word(delay_index); } else { - fprintf(vvp_out, " %%assign/v0 v%p_%lu, %lu, %u;\n", - sig, use_word, low_d, bit); + fprintf(vvp_out, " %%assign/vec4 v%p_%lu, %lu;\n", + sig, use_word, low_d); } } } @@ -546,7 +581,7 @@ static int show_stmt_assign_nb(ivl_statement_t net) } - { struct vector_info res; + { struct vector_info res = {0,0}; unsigned wid; unsigned lidx; unsigned cur_rbit = 0; @@ -574,21 +609,29 @@ static int show_stmt_assign_nb(ivl_statement_t net) res.base, res.wid); } else { - res = draw_eval_expr(rval, 0); - wid = res.wid; + wid = ivl_stmt_lwidth(net); + draw_eval_vec4(rval, 0); + if (ivl_expr_width(rval) != wid) { + if (ivl_expr_signed(rval)) + fprintf(vvp_out, " %%pad/s %u;\n", wid); + else + fprintf(vvp_out, " %%pad/u %u;\n", wid); + } } + /* Spread the r-value vector over the bits of the l-value. */ for (lidx = 0 ; lidx < ivl_stmt_lvals(net) ; lidx += 1) { unsigned bit_limit = wid - cur_rbit; - unsigned bidx; lval = ivl_stmt_lval(net, lidx); if (bit_limit > ivl_lval_width(lval)) bit_limit = ivl_lval_width(lval); - bidx = res.base < 4? res.base : (res.base+cur_rbit); - assign_to_lvector(lval, bidx, delay, del, bit_limit, nevents); + /* XXXX For now, don't know how to actually split + vectors */ + assert(lidx == 0); + assign_to_lvector(lval, delay, del, nevents); cur_rbit += bit_limit; @@ -655,7 +698,6 @@ static int show_stmt_case(ivl_statement_t net, ivl_scope_t sscope) { int rc = 0; ivl_expr_t expr = ivl_stmt_cond_expr(net); - struct vector_info cond = draw_eval_expr(expr, 0); unsigned count = ivl_stmt_case_count(net); unsigned local_base = local_count; @@ -666,6 +708,11 @@ static int show_stmt_case(ivl_statement_t net, ivl_scope_t sscope) local_count += count + 1; + /* Evaluate the case condition to the top of the vec4 + stack. This expression will be compared multiple times to + each case guard. */ + draw_eval_vec4(expr,0); + /* First draw the branch table. All the non-default cases generate a branch out of here, to the code that implements the case. The default will fall through all the tests. */ @@ -673,55 +720,34 @@ static int show_stmt_case(ivl_statement_t net, ivl_scope_t sscope) for (idx = 0 ; idx < count ; idx += 1) { ivl_expr_t cex = ivl_stmt_case_expr(net, idx); - struct vector_info cvec; if (cex == 0) { default_case = idx; continue; } - /* Is the guard expression something I can pass to a - %cmpi/u instruction? If so, use that instead. */ - - if ((ivl_statement_type(net) == IVL_ST_CASE) - && (ivl_expr_type(cex) == IVL_EX_NUMBER) - && (! number_is_unknown(cex)) - && number_is_immediate(cex, 16, 0)) { - - unsigned long imm = get_number_immediate(cex); - - fprintf(vvp_out, " %%cmpi/u %u, %lu, %u;\n", - cond.base, imm, cond.wid); - fprintf(vvp_out, " %%jmp/1 T_%u.%u, 6;\n", - thread_count, local_base+idx); - - continue; - } - - /* Oh well, do this case the hard way. */ - - cvec = draw_eval_expr_wid(cex, cond.wid, STUFF_OK_RO); - assert(cvec.wid == cond.wid); + /* Duplicate the case expression so that the cmp + instructions below do not completely erase the + value. Do this in fromt of each compare. */ + fprintf(vvp_out, " %%dup/vec4;\n"); + draw_eval_vec4(cex, STUFF_OK_RO); switch (ivl_statement_type(net)) { case IVL_ST_CASE: - fprintf(vvp_out, " %%cmp/u %u, %u, %u;\n", - cond.base, cvec.base, cond.wid); + fprintf(vvp_out, " %%cmp/u;\n"); fprintf(vvp_out, " %%jmp/1 T_%u.%u, 6;\n", thread_count, local_base+idx); break; case IVL_ST_CASEX: - fprintf(vvp_out, " %%cmp/x %u, %u, %u;\n", - cond.base, cvec.base, cond.wid); + fprintf(vvp_out, " %%cmp/x;\n"); fprintf(vvp_out, " %%jmp/1 T_%u.%u, 4;\n", thread_count, local_base+idx); break; case IVL_ST_CASEZ: - fprintf(vvp_out, " %%cmp/z %u, %u, %u;\n", - cond.base, cvec.base, cond.wid); + fprintf(vvp_out, " %%cmp/z;\n"); fprintf(vvp_out, " %%jmp/1 T_%u.%u, 4;\n", thread_count, local_base+idx); break; @@ -729,14 +755,8 @@ static int show_stmt_case(ivl_statement_t net, ivl_scope_t sscope) default: assert(0); } - - /* Done with the case expression */ - clr_vector(cvec); } - /* Done with the condition expression */ - clr_vector(cond); - /* Emit code for the default case. */ if (default_case < count) { ivl_statement_t cst = ivl_stmt_case_stmt(net, default_case); @@ -757,6 +777,7 @@ static int show_stmt_case(ivl_statement_t net, ivl_scope_t sscope) clear_expression_lookaside(); rc += show_statement(cst, sscope); + /* Statement is done, jump to the out of the case. */ fprintf(vvp_out, " %%jmp T_%u.%u;\n", thread_count, local_base+count); @@ -765,6 +786,10 @@ static int show_stmt_case(ivl_statement_t net, ivl_scope_t sscope) /* The out of the case. */ fprintf(vvp_out, "T_%u.%u ;\n", thread_count, local_base+count); + /* The case tests will leave the case expression on the top of + the stack, but we are done with it now. Pop it. */ + fprintf(vvp_out, " %%pop/vec4 1;\n"); + clear_expression_lookaside(); return rc; @@ -1238,23 +1263,20 @@ static int show_stmt_condit(ivl_statement_t net, ivl_scope_t sscope) int rc = 0; unsigned lab_false, lab_out; ivl_expr_t expr = ivl_stmt_cond_expr(net); - struct vector_info cond; show_stmt_file_line(net, "If statement."); - cond = draw_eval_expr(expr, STUFF_OK_XZ|STUFF_OK_47|STUFF_OK_RO); - - assert(cond.wid == 1); + draw_eval_vec4(expr, STUFF_OK_XZ|STUFF_OK_47|STUFF_OK_RO); lab_false = local_count++; lab_out = local_count++; - fprintf(vvp_out, " %%jmp/0xz T_%u.%u, %u;\n", - thread_count, lab_false, cond.base); - - /* Done with the condition expression. */ - if (cond.base >= 8) - clr_vector(cond); + int use_flag = allocate_flag(); + /* The %flag/vec4 pops the vec4 bit and puts it to the flag. */ + fprintf(vvp_out, " %%flag_set/vec4 %d;\n", use_flag); + fprintf(vvp_out, " %%jmp/0xz T_%u.%u, %d;\n", + thread_count, lab_false, use_flag); + clr_flag(use_flag); if (ivl_stmt_cond_true(net)) rc += show_statement(ivl_stmt_cond_true(net), sscope); @@ -1320,20 +1342,19 @@ static int show_stmt_delayx(ivl_statement_t net, ivl_scope_t sscope) show_stmt_file_line(net, "Delay statement."); + int use_idx = allocate_word(); switch (ivl_expr_value(expr)) { case IVL_VT_BOOL: case IVL_VT_LOGIC: { - struct vector_info del = draw_eval_expr(expr, 0); - fprintf(vvp_out, " %%ix/get 0, %u, %u;\n", - del.base, del.wid); - clr_vector(del); + draw_eval_vec4(expr, 0); + fprintf(vvp_out, " %%ix/vec4 %d;\n", use_idx); break; } case IVL_VT_REAL: { draw_eval_real(expr); - fprintf(vvp_out, " %%cvt/ur 0;\n"); + fprintf(vvp_out, " %%cvt/ur %d;\n", use_idx); break; } @@ -1341,7 +1362,9 @@ static int show_stmt_delayx(ivl_statement_t net, ivl_scope_t sscope) assert(0); } - fprintf(vvp_out, " %%delayx 0;\n"); + fprintf(vvp_out, " %%delayx %d;\n", use_idx); + clr_word(use_idx); + /* Lots of things can happen during a delay. */ clear_expression_lookaside(); @@ -1755,7 +1778,6 @@ static int show_stmt_wait(ivl_statement_t net, ivl_scope_t sscope) static int show_stmt_while(ivl_statement_t net, ivl_scope_t sscope) { int rc = 0; - struct vector_info cvec; unsigned top_label = local_count++; unsigned out_label = local_count++; @@ -1771,14 +1793,16 @@ static int show_stmt_while(ivl_statement_t net, ivl_scope_t sscope) /* Draw the evaluation of the condition expression, and test the result. If the expression evaluates to false, then branch to the out label. */ - cvec = draw_eval_expr(ivl_stmt_cond_expr(net), STUFF_OK_XZ|STUFF_OK_47); - if (cvec.wid > 1) - cvec = reduction_or(cvec); + draw_eval_vec4(ivl_stmt_cond_expr(net), STUFF_OK_XZ|STUFF_OK_47); + if (ivl_expr_width(ivl_stmt_cond_expr(net)) > 1) { + fprintf(vvp_out, " %%or/r;\n"); + } + int use_flag = allocate_flag(); + fprintf(vvp_out, " %%flag_set/vec4 %d;\n", use_flag); fprintf(vvp_out, " %%jmp/0xz T_%u.%u, %u;\n", - thread_count, out_label, cvec.base); - if (cvec.base >= 8) - clr_vector(cvec); + thread_count, out_label, use_flag); + clr_flag(use_flag); /* Draw the body of the loop. */ rc += show_statement(ivl_stmt_sub_stmt(net), sscope); @@ -1966,7 +1990,7 @@ static unsigned is_repeat_event_assign(ivl_scope_t scope, */ static unsigned is_wait(ivl_scope_t scope, ivl_statement_t stmt) { - ivl_statement_t while_wait, wait, wait_stmt; + ivl_statement_t while_wait, wait_x, wait_stmt; ivl_expr_t while_expr, expr; const char *bits; /* We must have two block elements. */ @@ -1975,9 +1999,9 @@ static unsigned is_wait(ivl_scope_t scope, ivl_statement_t stmt) while_wait = ivl_stmt_block_stmt(stmt, 0); if (ivl_statement_type(while_wait) != IVL_ST_WHILE) return 0; /* That has a wait with a NOOP statement. */ - wait = ivl_stmt_sub_stmt(while_wait); - if (ivl_statement_type(wait) != IVL_ST_WAIT) return 0; - wait_stmt = ivl_stmt_sub_stmt(wait); + wait_x = ivl_stmt_sub_stmt(while_wait); + if (ivl_statement_type(wait_x) != IVL_ST_WAIT) return 0; + wait_stmt = ivl_stmt_sub_stmt(wait_x); if (ivl_statement_type(wait_stmt) != IVL_ST_NOOP) return 0; /* Check that the while condition has the correct form. */ while_expr = ivl_stmt_cond_expr(while_wait); @@ -1994,7 +2018,7 @@ static unsigned is_wait(ivl_scope_t scope, ivl_statement_t stmt) /* And finally the two statements that represent the wait must * have the same line number as the block. */ if ((ivl_stmt_lineno(stmt) != ivl_stmt_lineno(while_wait)) || - (ivl_stmt_lineno(stmt) != ivl_stmt_lineno(wait))) { + (ivl_stmt_lineno(stmt) != ivl_stmt_lineno(wait_x))) { return 0; } diff --git a/vvp/codes.h b/vvp/codes.h index 3cab77057..4921b3056 100644 --- a/vvp/codes.h +++ b/vvp/codes.h @@ -47,9 +47,10 @@ extern bool of_ASSIGN_AVD(vthread_t thr, vvp_code_t code); extern bool of_ASSIGN_AVE(vthread_t thr, vvp_code_t code); extern bool of_ASSIGN_D(vthread_t thr, vvp_code_t code); extern bool of_ASSIGN_MV(vthread_t thr, vvp_code_t code); -extern bool of_ASSIGN_V0(vthread_t thr, vvp_code_t code); -extern bool of_ASSIGN_V0D(vthread_t thr, vvp_code_t code); -extern bool of_ASSIGN_V0E(vthread_t thr, vvp_code_t code); +extern bool of_ASSIGN_VEC4(vthread_t thr, vvp_code_t code); +extern bool of_ASSIGN_VEC4D(vthread_t thr, vvp_code_t code); +extern bool of_ASSIGN_VEC4E(vthread_t thr, vvp_code_t code); +extern bool of_ASSIGN_VEC4_OFF_D(vthread_t thr, vvp_code_t code); extern bool of_ASSIGN_V0X1(vthread_t thr, vvp_code_t code); extern bool of_ASSIGN_V0X1D(vthread_t thr, vvp_code_t code); extern bool of_ASSIGN_V0X1E(vthread_t thr, vvp_code_t code); @@ -86,6 +87,7 @@ extern bool of_CVT_UR(vthread_t thr, vvp_code_t code); extern bool of_CVT_VR(vthread_t thr, vvp_code_t code); extern bool of_DEASSIGN(vthread_t thr, vvp_code_t code); extern bool of_DEASSIGN_WR(vthread_t thr, vvp_code_t code); +extern bool of_DEBUG_THR(vthread_t thr, vvp_code_t code); extern bool of_DELAY(vthread_t thr, vvp_code_t code); extern bool of_DELAYX(vthread_t thr, vvp_code_t code); extern bool of_DELETE_OBJ(vthread_t thr, vvp_code_t code); @@ -95,12 +97,16 @@ extern bool of_DIV(vthread_t thr, vvp_code_t code); extern bool of_DIV_S(vthread_t thr, vvp_code_t code); extern bool of_DIV_WR(vthread_t thr, vvp_code_t code); extern bool of_DUP_REAL(vthread_t thr, vvp_code_t code); +extern bool of_DUP_VEC4(vthread_t thr, vvp_code_t code); extern bool of_END(vthread_t thr, vvp_code_t code); extern bool of_EVCTL(vthread_t thr, vvp_code_t code); extern bool of_EVCTLC(vthread_t thr, vvp_code_t code); extern bool of_EVCTLI(vthread_t thr, vvp_code_t code); extern bool of_EVCTLS(vthread_t thr, vvp_code_t code); extern bool of_FILE_LINE(vthread_t thr, vvp_code_t code); +extern bool of_FLAG_GET_VEC4(vthread_t thr, vvp_code_t code); +extern bool of_FLAG_SET_IMM(vthread_t thr, vvp_code_t code); +extern bool of_FLAG_SET_VEC4(vthread_t thr, vvp_code_t code); extern bool of_FORCE_LINK(vthread_t thr, vvp_code_t code); extern bool of_FORCE_V(vthread_t thr, vvp_code_t code); extern bool of_FORCE_WR(vthread_t thr, vvp_code_t code); @@ -117,6 +123,8 @@ extern bool of_IX_LOAD(vthread_t thr, vvp_code_t code); extern bool of_IX_MOV(vthread_t thr, vvp_code_t code); extern bool of_IX_MUL(vthread_t thr, vvp_code_t code); extern bool of_IX_SUB(vthread_t thr, vvp_code_t code); +extern bool of_IX_VEC4(vthread_t thr, vvp_code_t code); +extern bool of_IX_VEC4_S(vthread_t thr, vvp_code_t code); extern bool of_JMP(vthread_t thr, vvp_code_t code); extern bool of_JMP0(vthread_t thr, vvp_code_t code); extern bool of_JMP0XZ(vthread_t thr, vvp_code_t code); @@ -135,7 +143,7 @@ extern bool of_LOAD_DAR_STR(vthread_t thr, vvp_code_t code); extern bool of_LOAD_OBJ(vthread_t thr, vvp_code_t code); extern bool of_LOAD_STR(vthread_t thr, vvp_code_t code); extern bool of_LOAD_STRA(vthread_t thr, vvp_code_t code); -extern bool of_LOAD_VEC(vthread_t thr, vvp_code_t code); +extern bool of_LOAD_VEC4(vthread_t thr, vvp_code_t code); extern bool of_LOAD_VP0(vthread_t thr, vvp_code_t code); extern bool of_LOAD_VP0_S(vthread_t thr, vvp_code_t code); extern bool of_LOAD_X1P(vthread_t thr, vvp_code_t code); @@ -160,10 +168,13 @@ extern bool of_NORR(vthread_t thr, vvp_code_t code); extern bool of_NULL(vthread_t thr, vvp_code_t code); extern bool of_OR(vthread_t thr, vvp_code_t code); extern bool of_ORR(vthread_t thr, vvp_code_t code); -extern bool of_PAD(vthread_t thr, vvp_code_t code); +extern bool of_PAD_S(vthread_t thr, vvp_code_t code); +extern bool of_PAD_U(vthread_t thr, vvp_code_t code); +extern bool of_PART(vthread_t thr, vvp_code_t code); extern bool of_POP_OBJ(vthread_t thr, vvp_code_t code); extern bool of_POP_REAL(vthread_t thr, vvp_code_t code); extern bool of_POP_STR(vthread_t thr, vvp_code_t code); +extern bool of_POP_VEC4(vthread_t thr, vvp_code_t code); extern bool of_POW(vthread_t thr, vvp_code_t code); extern bool of_POW_S(vthread_t thr, vvp_code_t code); extern bool of_POW_WR(vthread_t thr, vvp_code_t code); @@ -173,6 +184,7 @@ extern bool of_PROP_STR(vthread_t thr, vvp_code_t code); extern bool of_PROP_V(vthread_t thr, vvp_code_t code); extern bool of_PUSHI_STR(vthread_t thr, vvp_code_t code); extern bool of_PUSHI_REAL(vthread_t thr, vvp_code_t code); +extern bool of_PUSHI_VEC4(vthread_t thr, vvp_code_t code); extern bool of_PUSHV_STR(vthread_t thr, vvp_code_t code); extern bool of_PUTC_STR_V(vthread_t thr, vvp_code_t code); extern bool of_RELEASE_NET(vthread_t thr, vvp_code_t code); @@ -187,9 +199,9 @@ extern bool of_SET_DAR_OBJ_STR(vthread_t thr, vvp_code_t code); extern bool of_SET_VEC(vthread_t thr, vvp_code_t code); extern bool of_SET_X0(vthread_t thr, vvp_code_t code); extern bool of_SET_X0_X(vthread_t thr, vvp_code_t code); -extern bool of_SHIFTL_I0(vthread_t thr, vvp_code_t code); -extern bool of_SHIFTR_I0(vthread_t thr, vvp_code_t code); -extern bool of_SHIFTR_S_I0(vthread_t thr, vvp_code_t code); +extern bool of_SHIFTL(vthread_t thr, vvp_code_t code); +extern bool of_SHIFTR(vthread_t thr, vvp_code_t code); +extern bool of_SHIFTR_S(vthread_t thr, vvp_code_t code); extern bool of_STORE_DAR_R(vthread_t thr, vvp_code_t code); extern bool of_STORE_DAR_STR(vthread_t thr, vvp_code_t code); extern bool of_STORE_OBJ(vthread_t thr, vvp_code_t code); @@ -201,6 +213,7 @@ extern bool of_STORE_REAL(vthread_t thr, vvp_code_t code); extern bool of_STORE_REALA(vthread_t thr, vvp_code_t code); extern bool of_STORE_STR(vthread_t thr, vvp_code_t code); extern bool of_STORE_STRA(vthread_t thr, vvp_code_t code); +extern bool of_STORE_VEC4(vthread_t thr, vvp_code_t code); extern bool of_SUB(vthread_t thr, vvp_code_t code); extern bool of_SUB_WR(vthread_t thr, vvp_code_t code); extern bool of_SUBI(vthread_t thr, vvp_code_t code); diff --git a/vvp/compile.cc b/vvp/compile.cc index f79b5b883..85b551656 100644 --- a/vvp/compile.cc +++ b/vvp/compile.cc @@ -85,11 +85,11 @@ struct opcode_table_s { static const struct opcode_table_s opcode_table[] = { { "%abs/wr", of_ABS_WR, 0, {OA_NONE, OA_NONE, OA_NONE} }, - { "%add", of_ADD, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, + { "%add", of_ADD, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%add/wr", of_ADD_WR, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%addi", of_ADDI, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%alloc", of_ALLOC, 1, {OA_VPI_PTR, OA_NONE, OA_NONE} }, - { "%and", of_AND, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, + { "%and", of_AND, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%and/r", of_ANDR, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%andi", of_ANDI, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%assign/ar",of_ASSIGN_AR,2,{OA_ARR_PTR,OA_BIT1, OA_NONE} }, @@ -98,17 +98,18 @@ static const struct opcode_table_s opcode_table[] = { { "%assign/av",of_ASSIGN_AV,3,{OA_ARR_PTR,OA_BIT1, OA_BIT2} }, { "%assign/av/d",of_ASSIGN_AVD,3,{OA_ARR_PTR,OA_BIT1, OA_BIT2} }, { "%assign/av/e",of_ASSIGN_AVE,2,{OA_ARR_PTR,OA_BIT1, OA_NONE} }, - { "%assign/v0",of_ASSIGN_V0,3,{OA_FUNC_PTR,OA_BIT1, OA_BIT2} }, - { "%assign/v0/d",of_ASSIGN_V0D,3,{OA_FUNC_PTR,OA_BIT1, OA_BIT2} }, - { "%assign/v0/e",of_ASSIGN_V0E,2,{OA_FUNC_PTR,OA_BIT1, OA_NONE} }, { "%assign/v0/x1",of_ASSIGN_V0X1,3,{OA_FUNC_PTR,OA_BIT1,OA_BIT2} }, { "%assign/v0/x1/d",of_ASSIGN_V0X1D,3,{OA_FUNC_PTR,OA_BIT1,OA_BIT2} }, { "%assign/v0/x1/e",of_ASSIGN_V0X1E,2,{OA_FUNC_PTR,OA_BIT1,OA_NONE} }, + { "%assign/vec4", of_ASSIGN_VEC4, 2,{OA_FUNC_PTR, OA_BIT1, OA_NONE} }, + { "%assign/vec4/d",of_ASSIGN_VEC4D,2,{OA_FUNC_PTR, OA_BIT1, OA_NONE} }, + { "%assign/vec4/e",of_ASSIGN_VEC4E,1,{OA_FUNC_PTR, OA_NONE, OA_NONE} }, + { "%assign/vec4/off/d",of_ASSIGN_VEC4_OFF_D, 3,{OA_FUNC_PTR, OA_BIT1, OA_BIT2} }, { "%assign/wr", of_ASSIGN_WR, 2,{OA_VPI_PTR, OA_BIT1, OA_NONE} }, { "%assign/wr/d",of_ASSIGN_WRD,2,{OA_VPI_PTR, OA_BIT1, OA_NONE} }, { "%assign/wr/e",of_ASSIGN_WRE,1,{OA_VPI_PTR, OA_NONE, OA_NONE} }, { "%assign/x0",of_ASSIGN_X0,3,{OA_FUNC_PTR,OA_BIT1, OA_BIT2} }, - { "%blend", of_BLEND, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, + { "%blend", of_BLEND, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%blend/wr", of_BLEND_WR,0, {OA_NONE, OA_NONE, OA_NONE} }, { "%breakpoint", of_BREAKPOINT, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cassign/link",of_CASSIGN_LINK,2,{OA_FUNC_PTR,OA_FUNC_PTR2,OA_NONE} }, @@ -116,9 +117,9 @@ static const struct opcode_table_s opcode_table[] = { { "%cassign/wr",of_CASSIGN_WR,1,{OA_FUNC_PTR,OA_NONE, OA_NONE} }, { "%cassign/x0",of_CASSIGN_X0,3,{OA_FUNC_PTR,OA_BIT1, OA_BIT2} }, { "%cast2", of_CAST2, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, - { "%cmp/s", of_CMPS, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, + { "%cmp/s", of_CMPS, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmp/str",of_CMPSTR, 0, {OA_NONE, OA_NONE, OA_NONE} }, - { "%cmp/u", of_CMPU, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, + { "%cmp/u", of_CMPU, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmp/wr", of_CMPWR, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmp/ws", of_CMPWS, 2, {OA_BIT1, OA_BIT2, OA_NONE} }, { "%cmp/wu", of_CMPWU, 2, {OA_BIT1, OA_BIT2, OA_NONE} }, @@ -137,6 +138,7 @@ static const struct opcode_table_s opcode_table[] = { { "%cvt/vr", of_CVT_VR, 2, {OA_BIT1, OA_NUMBER, OA_NONE} }, { "%deassign",of_DEASSIGN,3,{OA_FUNC_PTR, OA_BIT1, OA_BIT2} }, { "%deassign/wr",of_DEASSIGN_WR,1,{OA_FUNC_PTR, OA_NONE, OA_NONE} }, + { "%debug/thr", of_DEBUG_THR, 0,{OA_NONE, OA_NONE, OA_NONE} }, { "%delay", of_DELAY, 2, {OA_BIT1, OA_BIT2, OA_NONE} }, { "%delayx", of_DELAYX, 1, {OA_NUMBER, OA_NONE, OA_NONE} }, { "%delete/obj",of_DELETE_OBJ,1,{OA_FUNC_PTR,OA_NONE, OA_NONE} }, @@ -145,17 +147,21 @@ static const struct opcode_table_s opcode_table[] = { { "%div/s", of_DIV_S, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%div/wr", of_DIV_WR, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%dup/real", of_DUP_REAL,0, {OA_NONE, OA_NONE, OA_NONE} }, + { "%dup/vec4", of_DUP_VEC4,0, {OA_NONE, OA_NONE, OA_NONE} }, { "%end", of_END, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%evctl", of_EVCTL, 2, {OA_FUNC_PTR, OA_BIT1, OA_NONE} }, { "%evctl/c",of_EVCTLC, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%evctl/i",of_EVCTLI, 2, {OA_FUNC_PTR, OA_BIT1, OA_NONE} }, { "%evctl/s",of_EVCTLS, 2, {OA_FUNC_PTR, OA_BIT1, OA_NONE} }, + { "%flag_get/vec4", of_FLAG_GET_VEC4, 1, {OA_NUMBER, OA_NONE, OA_NONE} }, + { "%flag_set/imm", of_FLAG_SET_IMM, 2, {OA_NUMBER, OA_BIT1, OA_NONE} }, + { "%flag_set/vec4", of_FLAG_SET_VEC4, 1, {OA_NUMBER, OA_NONE, OA_NONE} }, { "%force/link",of_FORCE_LINK,2,{OA_FUNC_PTR,OA_FUNC_PTR2,OA_NONE} }, { "%force/v",of_FORCE_V,3, {OA_FUNC_PTR, OA_BIT1, OA_BIT2} }, { "%force/wr",of_FORCE_WR,1,{OA_FUNC_PTR, OA_NONE, OA_NONE} }, { "%force/x0",of_FORCE_X0,3,{OA_FUNC_PTR, OA_BIT1, OA_BIT2} }, { "%free", of_FREE, 1, {OA_VPI_PTR, OA_NONE, OA_NONE} }, - { "%inv", of_INV, 2, {OA_BIT1, OA_BIT2, OA_NONE} }, + { "%inv", of_INV, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%ix/add", of_IX_ADD, 3, {OA_NUMBER, OA_BIT1, OA_BIT2} }, { "%ix/get", of_IX_GET, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%ix/get/s",of_IX_GET_S,3,{OA_BIT1, OA_BIT2, OA_NUMBER} }, @@ -165,6 +171,8 @@ static const struct opcode_table_s opcode_table[] = { { "%ix/mov", of_IX_MOV, 2, {OA_BIT1, OA_BIT2, OA_NONE} }, { "%ix/mul", of_IX_MUL, 3, {OA_NUMBER, OA_BIT1, OA_BIT2} }, { "%ix/sub", of_IX_SUB, 3, {OA_NUMBER, OA_BIT1, OA_BIT2} }, + { "%ix/vec4", of_IX_VEC4, 1, {OA_NUMBER, OA_NONE, OA_NONE} }, + { "%ix/vec4/s",of_IX_VEC4_S,1, {OA_NUMBER, OA_NONE, OA_NONE} }, { "%jmp", of_JMP, 1, {OA_CODE_PTR, OA_NONE, OA_NONE} }, { "%jmp/0", of_JMP0, 2, {OA_CODE_PTR, OA_BIT1, OA_NONE} }, { "%jmp/0xz",of_JMP0XZ, 2, {OA_CODE_PTR, OA_BIT1, OA_NONE} }, @@ -183,7 +191,7 @@ static const struct opcode_table_s opcode_table[] = { { "%load/real", of_LOAD_REAL,1,{OA_VPI_PTR, OA_NONE, OA_NONE} }, { "%load/str", of_LOAD_STR, 1,{OA_FUNC_PTR,OA_NONE, OA_NONE} }, { "%load/stra", of_LOAD_STRA,2,{OA_ARR_PTR, OA_BIT1, OA_NONE} }, - { "%load/v", of_LOAD_VEC,3, {OA_BIT1, OA_FUNC_PTR, OA_BIT2} }, + { "%load/vec4", of_LOAD_VEC4,1,{OA_FUNC_PTR,OA_NONE, OA_NONE} }, { "%load/vp0",of_LOAD_VP0,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} }, { "%load/vp0/s",of_LOAD_VP0_S,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} }, { "%load/x1p",of_LOAD_X1P,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} }, @@ -206,12 +214,15 @@ static const struct opcode_table_s opcode_table[] = { { "%nor", of_NOR, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%nor/r", of_NORR, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%null", of_NULL, 0, {OA_NONE, OA_NONE, OA_NONE} }, - { "%or", of_OR, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, + { "%or", of_OR, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%or/r", of_ORR, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, - { "%pad", of_PAD, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, + { "%pad/s", of_PAD_S, 1, {OA_NUMBER, OA_NONE, OA_NONE} }, + { "%pad/u", of_PAD_U, 1, {OA_NUMBER, OA_NONE, OA_NONE} }, + { "%part", of_PART, 1, {OA_NUMBER, OA_NONE, OA_NONE} }, { "%pop/obj", of_POP_OBJ, 2, {OA_BIT1, OA_BIT2, OA_NONE} }, { "%pop/real",of_POP_REAL,1, {OA_NUMBER, OA_NONE, OA_NONE} }, { "%pop/str", of_POP_STR, 1, {OA_NUMBER, OA_NONE, OA_NONE} }, + { "%pop/vec4",of_POP_VEC4,1, {OA_NUMBER, OA_NONE, OA_NONE} }, { "%pow", of_POW, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%pow/s", of_POW_S, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%pow/wr", of_POW_WR, 0, {OA_NONE, OA_NONE, OA_NONE} }, @@ -221,6 +232,7 @@ static const struct opcode_table_s opcode_table[] = { { "%prop/v", of_PROP_V, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%pushi/real",of_PUSHI_REAL,2,{OA_BIT1, OA_BIT2, OA_NONE} }, { "%pushi/str", of_PUSHI_STR, 1,{OA_STRING, OA_NONE, OA_NONE} }, + { "%pushi/vec4",of_PUSHI_VEC4,3,{OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%pushv/str", of_PUSHV_STR, 2, {OA_BIT1,OA_BIT2, OA_NONE} }, { "%putc/str/v",of_PUTC_STR_V,3,{OA_FUNC_PTR,OA_BIT1, OA_BIT2} }, { "%release/net",of_RELEASE_NET,3,{OA_FUNC_PTR,OA_BIT1,OA_BIT2} }, @@ -234,9 +246,9 @@ static const struct opcode_table_s opcode_table[] = { { "%set/dar/obj/str", of_SET_DAR_OBJ_STR, 1,{OA_NUMBER,OA_NONE,OA_NONE} }, { "%set/v", of_SET_VEC,3, {OA_FUNC_PTR, OA_BIT1, OA_BIT2} }, { "%set/x0", of_SET_X0, 3, {OA_FUNC_PTR, OA_BIT1, OA_BIT2} }, - { "%shiftl/i0", of_SHIFTL_I0, 2, {OA_BIT1,OA_NUMBER, OA_NONE} }, - { "%shiftr/i0", of_SHIFTR_I0, 2, {OA_BIT1,OA_NUMBER, OA_NONE} }, - { "%shiftr/s/i0", of_SHIFTR_S_I0,2,{OA_BIT1,OA_NUMBER, OA_NONE} }, + { "%shiftl", of_SHIFTL, 1, {OA_NUMBER, OA_NONE, OA_NONE} }, + { "%shiftr", of_SHIFTR, 1, {OA_NUMBER, OA_NONE, OA_NONE} }, + { "%shiftr/s", of_SHIFTR_S, 1, {OA_NUMBER, OA_NONE, OA_NONE} }, { "%store/dar/r", of_STORE_DAR_R, 1, {OA_FUNC_PTR, OA_NONE, OA_NONE} }, { "%store/dar/str",of_STORE_DAR_STR, 1, {OA_FUNC_PTR, OA_NONE, OA_NONE} }, { "%store/obj", of_STORE_OBJ, 1, {OA_FUNC_PTR,OA_NONE, OA_NONE} }, @@ -248,7 +260,8 @@ static const struct opcode_table_s opcode_table[] = { { "%store/reala", of_STORE_REALA, 2, {OA_ARR_PTR, OA_BIT1, OA_NONE} }, { "%store/str", of_STORE_STR, 1, {OA_FUNC_PTR,OA_NONE, OA_NONE} }, { "%store/stra", of_STORE_STRA, 2, {OA_ARR_PTR, OA_BIT1, OA_NONE} }, - { "%sub", of_SUB, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, + { "%store/vec4", of_STORE_VEC4, 2, {OA_FUNC_PTR,OA_BIT1, OA_NONE} }, + { "%sub", of_SUB, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%sub/wr", of_SUB_WR, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%subi", of_SUBI, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%substr", of_SUBSTR, 2,{OA_BIT1, OA_BIT2, OA_NONE} }, diff --git a/vvp/opcodes.txt b/vvp/opcodes.txt index d3e702356..cfcced0d9 100644 --- a/vvp/opcodes.txt +++ b/vvp/opcodes.txt @@ -32,12 +32,19 @@ experience of implementing it for strings, I'll want to change other types around to using this method as well. Keep this in mind whenever considering adding new instructions to vvp. +FLAGS + +There are up to 16 bits in each thread that are available for +flags. These are used as destinations for operations that return +boolean values, for example comparisons. They are also used as inputs +for test and branch opcodes. + * %abs/wr , This instruction calculates the absolute value of a real value. It uses the fabs() function in the run-time to do the work. -* %add , , +* %add , , (XXXX Old version) This instruction adds the right vector into the left vector, the vectors having the width . If any of the bits of either vector @@ -46,6 +53,13 @@ sum. See also the %sub instruction. +* %add + +This opcode pops and adds two vec4 values from the vec4 stack, adds +them, and pushes the result back to the stack. The input values must +have the same size, and the pushed result will have the same width. + +See also the %sub instruction. * %add/wr , @@ -67,17 +81,20 @@ is zero extended to match any width. This instruction allocates the storage for a new instance of an automatically allocated scope. -* %and , , +* %and -Perform the bitwise AND of the two vectors, and store the result in -the left vector. Each bit is calculated independent of other bits. AND -means the following: +Perform the bitwise AND of the two vectors popped from the vec4 stack, +and push the result. Each bit is calculated independent of other +bits. AND means the following: 0 and ? --> 0 ? and 0 --> 0 1 and 1 --> 1 otherwise x +The input vectors must be the same width, and the output vector will +be the width of the input. + * %assign/ar , * %assign/ar/d , * %assign/ar/e @@ -123,9 +140,9 @@ The %assign/av/e variation uses the information in the thread event control registers to determine when to perform the assign. %evctl is used to set the event control information. -* %assign/v0 , , -* %assign/v0/d , , -* %assign/v0/e , +* %assign/v0 , , (XXXX Old description) +* %assign/v0/d , , (XXXX Old description +* %assign/v0/e , (XXXX Old description) The %assign/v0 instruction is a vector version of non-blocking assignment. The is the number of clock ticks in the future @@ -152,6 +169,27 @@ This is similar to the %assign/v0 instruction, but adds the index-1 index register with the canonical index of the destination where the vector is to be written. This allows for part writes into the vector. +* %assign/vec4 , +* %assign/vec4/d , +* %assign/vec4/e + +The %assign/vec4 instruction if a vec4 version of non-blocking +assignment, The is the number lf clock ticks in the future +where the assignment should schedule, and the value to assign is +pulled from the vec4 stack. + +The %assign/vec4/d instruction is the same, but gets its delay value +from the index register instead. + +* %assign/vec4/off/d , , + +This is for writing parts to the target variable. The is +the variable to write, as usual. The selects an index +register that holds the offset into the target variable, and the + selects the index register that contains the delay. The +offset is in canonical bits. The width that is written is taken from +the width of the value on the stack. + * %assign/wr , * %assign/wr/d , * %assign/wr/e @@ -180,10 +218,12 @@ The is the address of the thread register that contains the bit value to assign. -* %blend , , +* %blend -This instruction blends the bits of a vector into the destination in a -manner like the expression (x ? : ). The truth table is: +This instruction blends the bits of two vectors into a result in a +manner line the expressions ('bx ? : ). The two source vectors +are popped from the vec4 stack (and must have the same width) and the +result poshed in their place. The truth table for each bit is: 1 1 --> 1 0 0 --> 0 @@ -238,8 +278,8 @@ Convert the source vector, of type logic, to a bool vector by changing all the X and Z bits to 0. The source and destinations may overlap. -* %cmp/u , , -* %cmp/s , , +* %cmp/u , , (XXXX Old meaning) +* %cmp/s , , (XXXX Old meaning) These instructions perform a generic comparison of two vectors of equal size. The and numbers address the least-significant @@ -268,6 +308,21 @@ The %cmp/u and %cmp/s differ only in the handling of the lt bit. The compare. In either case, if either operand contains x or z, then lt bit gets the x value. +* %cmp/s +* %cmp/u + +These instructions perform a generic comparison of two vectors of +equal size. Two values are pulled from the top of the stack, and not +replaced. The results are written into flag bits 4,5,6. The +expressions (a, , * %cmpi/u , , @@ -424,6 +479,7 @@ right operand is 0, then the result is NaN. * dup/real +* dup/vec4 These opcodes duplicate the value on the top of the stack for the corresponding type. @@ -458,6 +514,18 @@ the format of the output is: is a string, if string is 0 then the following default message is used: "Procedural tracing.". +* %flag_set/imm , + +This instruction sets an immediate value into a flag bit. This is a +single bit, and the value is 0==0, 1==1, 2==z, 3==x. + +* %flag_get/vec4 +* %flag_set/vec4 + +These instructions provide a means for accessing flag bits. The +%flag_get/vec4 loads the numbered flag as a vec4 on top of the vec4 +stack, and the %flag_set/vec4 pops the top of the vec4 stack and +writes the LSB to the selected flag. * %force/v