From e5eb7541508512ab9289819717154a934247e5be Mon Sep 17 00:00:00 2001 From: Stephen Williams Date: Sun, 5 Jan 2014 14:12:27 -0800 Subject: [PATCH] vec4 versions of a bunch of unary operators. --- tgt-vvp/eval_vec4.c | 81 +++++++++++++++++++++- vvp/compile.cc | 14 ++-- vvp/opcodes.txt | 14 ++-- vvp/vthread.cc | 160 ++++++++++++++++++-------------------------- 4 files changed, 163 insertions(+), 106 deletions(-) diff --git a/tgt-vvp/eval_vec4.c b/tgt-vvp/eval_vec4.c index 54f4e1cda..086e42be3 100644 --- a/tgt-vvp/eval_vec4.c +++ b/tgt-vvp/eval_vec4.c @@ -609,6 +609,21 @@ static void draw_unary_vec4(ivl_expr_t expr, int stuff_ok_flag) ivl_expr_t sub = ivl_expr_oper1(expr); switch (ivl_expr_opcode(expr)) { + case '&': + draw_eval_vec4(sub, stuff_ok_flag); + fprintf(vvp_out, " %%and/r;\n"); + break; + + case '|': + draw_eval_vec4(sub, stuff_ok_flag); + fprintf(vvp_out, " %%or/r;\n"); + break; + + case '^': + draw_eval_vec4(sub, stuff_ok_flag); + fprintf(vvp_out, " %%xor/r;\n"); + break; + case '~': draw_eval_vec4(sub, stuff_ok_flag); fprintf(vvp_out, " %%inv;\n"); @@ -619,8 +634,72 @@ static void draw_unary_vec4(ivl_expr_t expr, int stuff_ok_flag) fprintf(vvp_out, " %%nor/r;\n"); break; + case '-': + draw_eval_vec4(sub, stuff_ok_flag); + fprintf(vvp_out, " %%inv;\n"); + fprintf(vvp_out, " %%pushi/vec4 1, 0, %u;\n", ivl_expr_width(sub)); + fprintf(vvp_out, " %%add;\n"); + break; + + case 'A': /* nand (~&) */ + draw_eval_vec4(sub, stuff_ok_flag); + fprintf(vvp_out, " %%nand/r;\n"); + break; + + case 'N': /* nor (~|) */ + draw_eval_vec4(sub, stuff_ok_flag); + fprintf(vvp_out, " %%nor/r;\n"); + break; + + case 'X': /* xnor (~^) */ + draw_eval_vec4(sub, stuff_ok_flag); + fprintf(vvp_out, " %%xnor/r;\n"); + break; + + case 'm': /* abs(m) */ + draw_eval_vec4(sub, stuff_ok_flag); + if (! ivl_expr_signed(sub)) + break; + + /* Test if (m) < 0 */ + fprintf(vvp_out, " %%dup/vec4;\n"); + fprintf(vvp_out, " %%pushi/vec4 0, 0, %u;\n", ivl_expr_width(sub)); + fprintf(vvp_out, " %%cmp/s;\n"); + fprintf(vvp_out, " %%jmp/0xz T_%u.%u, 5;\n", thread_count, local_count); + /* If so, calculate -(m) */ + fprintf(vvp_out, " %%inv;\n"); + fprintf(vvp_out, " %%pushi/vec4 1, 0, %u;\n", ivl_expr_width(sub)); + fprintf(vvp_out, " %%add;\n"); + fprintf(vvp_out, "T_%u.%u ;\n", thread_count, local_count); + break; + + case 'v': /* Cast real to vec4 */ + assert(ivl_expr_value(sub) == IVL_VT_REAL); + draw_eval_real(sub); + fprintf(vvp_out, " %%cvt/vr %u;\n", ivl_expr_width(expr)); + break; + + case '2': /* Cast expression to bool */ + switch (ivl_expr_value(sub)) { + case IVL_VT_LOGIC: + draw_eval_vec4(sub, STUFF_OK_XZ); + fprintf(vvp_out, " %%cast2;\n"); + break; + case IVL_VT_BOOL: + draw_eval_vec4(sub, 0); + break; + case IVL_VT_REAL: + draw_eval_real(sub); + fprintf(vvp_out, " %%cvt/vr;\n"); + break; + default: + assert(0); + break; + } + break; + default: - fprintf(stderr, "XXXX Unary operator %c no implemented\n", ivl_expr_opcode(expr)); + fprintf(stderr, "XXXX Unary operator %c not implemented\n", ivl_expr_opcode(expr)); break; } } diff --git a/vvp/compile.cc b/vvp/compile.cc index 8ebf6bb71..e34adeed8 100644 --- a/vvp/compile.cc +++ b/vvp/compile.cc @@ -90,7 +90,7 @@ static const struct opcode_table_s opcode_table[] = { { "%addi", of_ADDI, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%alloc", of_ALLOC, 1, {OA_VPI_PTR, OA_NONE, OA_NONE} }, { "%and", of_AND, 0, {OA_NONE, OA_NONE, OA_NONE} }, - { "%and/r", of_ANDR, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, + { "%and/r", of_ANDR, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%andi", of_ANDI, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%assign/ar",of_ASSIGN_AR,2,{OA_ARR_PTR,OA_BIT1, OA_NONE} }, { "%assign/ar/d",of_ASSIGN_ARD,2,{OA_ARR_PTR,OA_BIT1, OA_NONE} }, @@ -117,7 +117,7 @@ static const struct opcode_table_s opcode_table[] = { { "%cassign/vec4", of_CASSIGN_VEC4, 1,{OA_FUNC_PTR,OA_NONE, OA_NONE} }, { "%cassign/vec4/off",of_CASSIGN_VEC4_OFF,2,{OA_FUNC_PTR,OA_BIT1, OA_NONE} }, { "%cassign/wr", of_CASSIGN_WR, 1,{OA_FUNC_PTR,OA_NONE, OA_NONE} }, - { "%cast2", of_CAST2, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, + { "%cast2", of_CAST2, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmp/s", of_CMPS, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmp/str",of_CMPSTR, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmp/u", of_CMPU, 0, {OA_NONE, OA_NONE, OA_NONE} }, @@ -137,7 +137,7 @@ static const struct opcode_table_s opcode_table[] = { { "%cvt/rv/s", of_CVT_RV_S,2, {OA_BIT1, OA_BIT2, OA_NONE} }, { "%cvt/sr", of_CVT_SR, 1, {OA_BIT1, OA_NONE, OA_NONE} }, { "%cvt/ur", of_CVT_UR, 1, {OA_BIT1, OA_NONE, OA_NONE} }, - { "%cvt/vr", of_CVT_VR, 2, {OA_BIT1, OA_NUMBER, OA_NONE} }, + { "%cvt/vr", of_CVT_VR, 1, {OA_NUMBER, OA_NONE, OA_NONE} }, { "%deassign",of_DEASSIGN,3,{OA_FUNC_PTR, OA_BIT1, OA_BIT2} }, { "%deassign/wr",of_DEASSIGN_WR,1,{OA_FUNC_PTR, OA_NONE, OA_NONE} }, { "%debug/thr", of_DEBUG_THR, 0,{OA_NONE, OA_NONE, OA_NONE} }, @@ -210,7 +210,7 @@ static const struct opcode_table_s opcode_table[] = { { "%mul/wr", of_MUL_WR, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%muli", of_MULI, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%nand", of_NAND, 0, {OA_NONE, OA_NONE, OA_NONE} }, - { "%nand/r", of_NANDR, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, + { "%nand/r", of_NANDR, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%new/cobj", of_NEW_COBJ, 1, {OA_VPI_PTR,OA_NONE, OA_NONE} }, { "%new/darray",of_NEW_DARRAY,2, {OA_BIT1, OA_STRING,OA_NONE} }, { "%noop", of_NOOP, 0, {OA_NONE, OA_NONE, OA_NONE} }, @@ -218,7 +218,7 @@ static const struct opcode_table_s opcode_table[] = { { "%nor/r", of_NORR, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%null", of_NULL, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%or", of_OR, 0, {OA_NONE, OA_NONE, OA_NONE} }, - { "%or/r", of_ORR, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, + { "%or/r", of_ORR, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%pad/s", of_PAD_S, 1, {OA_NUMBER, OA_NONE, OA_NONE} }, { "%pad/u", of_PAD_U, 1, {OA_NUMBER, OA_NONE, OA_NONE} }, { "%part/s", of_PART_S, 1, {OA_NUMBER, OA_NONE, OA_NONE} }, @@ -276,9 +276,9 @@ static const struct opcode_table_s opcode_table[] = { { "%wait", of_WAIT, 1, {OA_FUNC_PTR, OA_NONE, OA_NONE} }, { "%wait/fork",of_WAIT_FORK,0,{OA_NONE, OA_NONE, OA_NONE} }, { "%xnor", of_XNOR, 0, {OA_NONE, OA_NONE, OA_NONE} }, - { "%xnor/r", of_XNORR, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, + { "%xnor/r", of_XNORR, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%xor", of_XOR, 0, {OA_NONE, OA_NONE, OA_NONE} }, - { "%xor/r", of_XORR, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, + { "%xor/r", of_XORR, 0, {OA_NONE, OA_NONE, OA_NONE} }, { 0, of_NOOP, 0, {OA_NONE, OA_NONE, OA_NONE} } }; diff --git a/vvp/opcodes.txt b/vvp/opcodes.txt index c1a1e995b..bb1dd3a43 100644 --- a/vvp/opcodes.txt +++ b/vvp/opcodes.txt @@ -95,6 +95,11 @@ bits. AND means the following: The input vectors must be the same width, and the output vector will be the width of the input. +* %and/r + +Pop the top value from the vec4 stack, perform a reduction &, then +return the single-bit result. + * %assign/ar , * %assign/ar/d , * %assign/ar/e @@ -414,11 +419,12 @@ value stack. Precision may be lost in the conversion. The %cvt/rv/s instruction is the same as %cvt/rv, but treats the thread vector as a signed value. -* %cvt/vr , +* %cvt/vr -The %cvt/vr opcode converts a real word from the stack to a thread vector -starting at and with the width . Non-integer precision is -lost in the conversion, and the real value is popped from the stack. +The %cvt/vr opcode converts a real word from the stack to a vec4 that +is wide. Non-integer precision is lost in the conversion, and +the real value is popped from the stack. The result is pushed to the +vec4 stack. * %deassign , , diff --git a/vvp/vthread.cc b/vvp/vthread.cc index 71791c3cf..c8ffb6ba4 100644 --- a/vvp/vthread.cc +++ b/vvp/vthread.cc @@ -1742,36 +1742,25 @@ bool of_CASSIGN_WR(vthread_t thr, vvp_code_t cp) return true; } - -bool of_CAST2(vthread_t thr, vvp_code_t cp) +/* + * %cast2 + */ +bool of_CAST2(vthread_t thr, vvp_code_t) { -#if 0 - unsigned dst = cp->bit_idx[0]; - unsigned src = cp->bit_idx[1]; - unsigned wid = cp->number; + vvp_vector4_t val = thr->pop_vec4(); + unsigned wid = val.size(); - thr_check_addr(thr, dst+wid-1); - thr_check_addr(thr, src+wid-1); - - vvp_vector4_t res; - switch (src) { - case 0: - case 2: - case 3: - res = vvp_vector4_t(wid, BIT4_0); - break; - case 1: - res = vvp_vector4_t(wid, BIT4_1); - break; - default: - res = vector2_to_vector4(vvp_vector2_t(vthread_bits_to_vector(thr, src, wid)), wid); - break; + for (unsigned idx = 0 ; idx < wid ; idx += 1) { + switch (val.value(idx)) { + case BIT4_1: + val.set_bit(idx, BIT4_1); + break; + default: + val.set_bit(idx, BIT4_0); + break; + } } - - thr->bits4.set_vec(dst, res); -#else - fprintf(stderr, "XXXX NOT IMPLEMENTED: %%cast2 ...\n"); -#endif + thr->push_vec4(val); return true; } @@ -2272,22 +2261,15 @@ bool of_CVT_UR(vthread_t thr, vvp_code_t cp) } /* - * %cvt/vr + * %cvt/vr */ bool of_CVT_VR(vthread_t thr, vvp_code_t cp) { -#if 0 double r = thr->pop_real(); - unsigned base = cp->bit_idx[0]; unsigned wid = cp->number; - vvp_vector4_t tmp(wid, r); - /* Make sure there is enough space for the new vector. */ - thr_check_addr(thr, base+wid-1); - thr->bits4.set_vec(base, tmp); -#else - fprintf(stderr, "XXXX NOT IMPLEMENTED: %%cvt/vr ...\n"); -#endif + vvp_vector4_t tmp(wid, r); + thr->push_vec4(tmp); return true; } @@ -4709,45 +4691,43 @@ bool of_NULL(vthread_t thr, vvp_code_t) return true; } - -bool of_ANDR(vthread_t thr, vvp_code_t cp) +/* + * %and/r + */ +bool of_ANDR(vthread_t thr, vvp_code_t) { -#if 0 - assert(cp->bit_idx[0] >= 4); + vvp_vector4_t val = thr->pop_vec4(); vvp_bit4_t lb = BIT4_1; - unsigned idx2 = cp->bit_idx[1]; - for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { - - vvp_bit4_t rb = thr_get_bit(thr, idx2+idx); + for (unsigned idx = 0 ; idx < val.size() ; idx += 1) { + vvp_bit4_t rb = val.value(idx); if (rb == BIT4_0) { lb = BIT4_0; break; } - if (rb != BIT4_1) + if (rb != 1) lb = BIT4_X; } - thr_put_bit(thr, cp->bit_idx[0], lb); -#else - fprintf(stderr, "XXXX NOT IMPLEMENTED: %%and/r ...\n"); -#endif + vvp_vector4_t res (1, lb); + thr->push_vec4(res); + return true; } -bool of_NANDR(vthread_t thr, vvp_code_t cp) +/* + * %nand/r + */ +bool of_NANDR(vthread_t thr, vvp_code_t) { -#if 0 - assert(cp->bit_idx[0] >= 4); + vvp_vector4_t val = thr->pop_vec4(); vvp_bit4_t lb = BIT4_0; - unsigned idx2 = cp->bit_idx[1]; + for (unsigned idx = 0 ; idx < val.size() ; idx += 1) { - for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { - - vvp_bit4_t rb = thr_get_bit(thr, idx2+idx); + vvp_bit4_t rb = val.value(idx); if (rb == BIT4_0) { lb = BIT4_1; break; @@ -4757,24 +4737,22 @@ bool of_NANDR(vthread_t thr, vvp_code_t cp) lb = BIT4_X; } - thr_put_bit(thr, cp->bit_idx[0], lb); -#else - fprintf(stderr, "XXXX NOT IMPLEMENTED: %%nand/r ...\n"); -#endif + vvp_vector4_t res (1, lb); + thr->push_vec4(res); + return true; } -bool of_ORR(vthread_t thr, vvp_code_t cp) +/* + * %or/r + */ +bool of_ORR(vthread_t thr, vvp_code_t) { -#if 0 - assert(cp->bit_idx[0] >= 4); + vvp_vector4_t val = thr->pop_vec4(); vvp_bit4_t lb = BIT4_0; - unsigned idx2 = cp->bit_idx[1]; - - for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { - - vvp_bit4_t rb = thr_get_bit(thr, idx2+idx); + for (unsigned idx = 0 ; idx < val.size() ; idx += 1) { + vvp_bit4_t rb = val.value(idx); if (rb == BIT4_1) { lb = BIT4_1; break; @@ -4784,24 +4762,22 @@ bool of_ORR(vthread_t thr, vvp_code_t cp) lb = BIT4_X; } - thr_put_bit(thr, cp->bit_idx[0], lb); -#else - fprintf(stderr, "XXXX NOT IMPLEMENTED: %%orr ...\n"); -#endif + vvp_vector4_t res (1, lb); + thr->push_vec4(res); return true; } -bool of_XORR(vthread_t thr, vvp_code_t cp) +/* + * %xor/r + */ +bool of_XORR(vthread_t thr, vvp_code_t) { -#if 0 - assert(cp->bit_idx[0] >= 4); + vvp_vector4_t val = thr->pop_vec4(); vvp_bit4_t lb = BIT4_0; - unsigned idx2 = cp->bit_idx[1]; + for (unsigned idx = 0 ; idx < val.size() ; idx += 1) { - for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { - - vvp_bit4_t rb = thr_get_bit(thr, idx2+idx); + vvp_bit4_t rb = val.value(idx); if (rb == BIT4_1) lb = ~lb; else if (rb != BIT4_0) { @@ -4810,24 +4786,22 @@ bool of_XORR(vthread_t thr, vvp_code_t cp) } } - thr_put_bit(thr, cp->bit_idx[0], lb); -#else - fprintf(stderr, "XXXX NOT IMPLEMENTED: %%xorr ...\n"); -#endif + vvp_vector4_t res (1, lb); + thr->push_vec4(res); return true; } +/* + * %xnor/r + */ bool of_XNORR(vthread_t thr, vvp_code_t cp) { -#if 0 - assert(cp->bit_idx[0] >= 4); + vvp_vector4_t val = thr->pop_vec4(); vvp_bit4_t lb = BIT4_1; - unsigned idx2 = cp->bit_idx[1]; + for (unsigned idx = 0 ; idx < val.size() ; idx += 1) { - for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { - - vvp_bit4_t rb = thr_get_bit(thr, idx2+idx); + vvp_bit4_t rb = val.value(idx); if (rb == BIT4_1) lb = ~lb; else if (rb != BIT4_0) { @@ -4836,10 +4810,8 @@ bool of_XNORR(vthread_t thr, vvp_code_t cp) } } - thr_put_bit(thr, cp->bit_idx[0], lb); -#else - fprintf(stderr, "XXXX NOT IMPLEMENTED: %%xnorr...\n"); -#endif + vvp_vector4_t res (1, lb); + thr->push_vec4(res); return true; }