From 5d750b7779555aa4e643e0d9c0a498d075f0fff9 Mon Sep 17 00:00:00 2001 From: Stephen Williams Date: Wed, 3 Oct 2007 20:58:40 -0700 Subject: [PATCH] Optomize runtime using immediate compare Implement compare-immediate instructions and generate code to use these new instructions to improve runtime performance. Signed-off-by: Stephen Williams --- tgt-vvp/eval_expr.c | 89 +++++++++++++++++++++++++++++++++------------ vvp/codes.h | 2 + vvp/compile.cc | 2 + vvp/opcodes.txt | 6 +++ vvp/vthread.cc | 63 ++++++++++++++++++++++++++++++++ 5 files changed, 139 insertions(+), 23 deletions(-) diff --git a/tgt-vvp/eval_expr.c b/tgt-vvp/eval_expr.c index 46447f1ac..4627ecf77 100644 --- a/tgt-vvp/eval_expr.c +++ b/tgt-vvp/eval_expr.c @@ -36,6 +36,9 @@ int number_is_unknown(ivl_expr_t ex) const char*bits; unsigned idx; + if (ivl_expr_type(ex) == IVL_EX_ULONG) + return 0; + assert(ivl_expr_type(ex) == IVL_EX_NUMBER); bits = ivl_expr_bits(ex); @@ -66,6 +69,10 @@ int number_is_immediate(ivl_expr_t ex, unsigned lim_wid) if (bits[idx] != '0') return 0; + /* Negative numbers are not "immediate". */ + if (ivl_expr_signed(ex) && bits[ivl_expr_width(ex)-1]=='1') + return 0; + return 1; } @@ -290,13 +297,7 @@ static struct vector_info draw_binary_expr_eq(ivl_expr_t exp, return draw_binary_expr_eq_real(exp); } - if ((ivl_expr_type(re) == IVL_EX_ULONG) - && (0 == (ivl_expr_uvalue(re) & ~0xffff))) - return draw_eq_immediate(exp, ewid, le, re, stuff_ok_flag); - - if ((ivl_expr_type(re) == IVL_EX_NUMBER) - && (! number_is_unknown(re)) - && number_is_immediate(re, 16)) + if (number_is_immediate(re,16) && !number_is_unknown(re)) return draw_eq_immediate(exp, ewid, le, re, stuff_ok_flag); assert(ivl_expr_value(le) == IVL_VT_LOGIC @@ -683,42 +684,78 @@ static struct vector_info draw_binary_expr_le(ivl_expr_t exp, assert(ivl_expr_value(re) == IVL_VT_LOGIC || ivl_expr_value(re) == IVL_VT_BOOL); - lv = draw_eval_expr_wid(le, owid, STUFF_OK_XZ); - rv = draw_eval_expr_wid(re, owid, STUFF_OK_XZ); + lv.wid = 0; + rv.wid = 0; switch (ivl_expr_opcode(exp)) { case 'G': - assert(lv.wid == rv.wid); - fprintf(vvp_out, " %%cmp/%c %u, %u, %u;\n", s_flag, - rv.base, lv.base, lv.wid); + rv = draw_eval_expr_wid(re, owid, STUFF_OK_XZ); + if (number_is_immediate(le,16) && !number_is_unknown(le)) { + unsigned imm = get_number_immediate(le); + assert(imm >= 0); + fprintf(vvp_out, " %%cmpi/%c %u, %u, %u;\n", s_flag, + rv.base, imm, rv.wid); + } else { + lv = draw_eval_expr_wid(le, owid, STUFF_OK_XZ); + assert(lv.wid == rv.wid); + fprintf(vvp_out, " %%cmp/%c %u, %u, %u;\n", s_flag, + rv.base, lv.base, lv.wid); + } fprintf(vvp_out, " %%or 5, 4, 1;\n"); break; case 'L': - assert(lv.wid == rv.wid); - fprintf(vvp_out, " %%cmp/%c %u, %u, %u;\n", s_flag, - lv.base, rv.base, lv.wid); + lv = draw_eval_expr_wid(le, owid, STUFF_OK_XZ); + if (number_is_immediate(re,16) && !number_is_unknown(re)) { + unsigned imm = get_number_immediate(re); + assert(imm >= 0); + fprintf(vvp_out, " %%cmpi/%c %u, %u, %u;\n", s_flag, + lv.base, imm, lv.wid); + } else { + rv = draw_eval_expr_wid(re, owid, STUFF_OK_XZ); + assert(lv.wid == rv.wid); + fprintf(vvp_out, " %%cmp/%c %u, %u, %u;\n", s_flag, + lv.base, rv.base, lv.wid); + } fprintf(vvp_out, " %%or 5, 4, 1;\n"); break; case '<': - assert(lv.wid == rv.wid); - fprintf(vvp_out, " %%cmp/%c %u, %u, %u;\n", s_flag, - lv.base, rv.base, lv.wid); + lv = draw_eval_expr_wid(le, owid, STUFF_OK_XZ); + if (number_is_immediate(re,16) && !number_is_unknown(re)) { + unsigned imm = get_number_immediate(re); + assert(imm >= 0); + fprintf(vvp_out, " %%cmpi/%c %u, %u, %u;\n", s_flag, + lv.base, imm, lv.wid); + } else { + rv = draw_eval_expr_wid(re, owid, STUFF_OK_XZ); + assert(lv.wid == rv.wid); + fprintf(vvp_out, " %%cmp/%c %u, %u, %u;\n", s_flag, + lv.base, rv.base, lv.wid); + } break; case '>': - assert(lv.wid == rv.wid); - fprintf(vvp_out, " %%cmp/%c %u, %u, %u;\n", s_flag, - rv.base, lv.base, lv.wid); + rv = draw_eval_expr_wid(re, owid, STUFF_OK_XZ); + if (number_is_immediate(le,16) && !number_is_unknown(le)) { + unsigned imm = get_number_immediate(le); + assert(imm >= 0); + fprintf(vvp_out, " %%cmpi/%c %u, %u, %u;\n", s_flag, + rv.base, imm, rv.wid); + } else { + lv = draw_eval_expr_wid(le, owid, STUFF_OK_XZ); + assert(lv.wid == rv.wid); + fprintf(vvp_out, " %%cmp/%c %u, %u, %u;\n", s_flag, + rv.base, lv.base, lv.wid); + } break; default: assert(0); } - clr_vector(lv); - clr_vector(rv); + if (lv.wid > 0) clr_vector(lv); + if (rv.wid > 0) clr_vector(rv); if ((stuff_ok_flag&STUFF_OK_47) && (wid == 1)) { lv.base = 5; @@ -1365,6 +1402,12 @@ static struct vector_info draw_number_expr(ivl_expr_t exp, unsigned wid) load the constant bit values. */ res.base = allocate_vector(wid); + if ((!number_is_unknown(exp)) && number_is_immediate(exp, 16)) { + int val = get_number_immediate(exp); + fprintf(vvp_out, " %%movi %u, %d, %u;\n", res.base, val, wid); + return res; + } + idx = 0; while (idx < nwid) { unsigned cnt; diff --git a/vvp/codes.h b/vvp/codes.h index 7243202dc..c70093494 100644 --- a/vvp/codes.h +++ b/vvp/codes.h @@ -54,6 +54,7 @@ extern bool of_BLEND(vthread_t thr, vvp_code_t code); extern bool of_BREAKPOINT(vthread_t thr, vvp_code_t code); extern bool of_CASSIGN_LINK(vthread_t thr, vvp_code_t code); extern bool of_CASSIGN_V(vthread_t thr, vvp_code_t code); +extern bool of_CMPIS(vthread_t thr, vvp_code_t code); extern bool of_CMPIU(vthread_t thr, vvp_code_t code); extern bool of_CMPS(vthread_t thr, vvp_code_t code); extern bool of_CMPU(vthread_t thr, vvp_code_t code); @@ -103,6 +104,7 @@ extern bool of_MOD_S(vthread_t thr, vvp_code_t code); extern bool of_MOD_WR(vthread_t thr, vvp_code_t code); extern bool of_MOV(vthread_t thr, vvp_code_t code); extern bool of_MOV_WR(vthread_t thr, vvp_code_t code); +extern bool of_MOVI(vthread_t thr, vvp_code_t code); extern bool of_MUL(vthread_t thr, vvp_code_t code); extern bool of_MUL_WR(vthread_t thr, vvp_code_t code); extern bool of_MULI(vthread_t thr, vvp_code_t code); diff --git a/vvp/compile.cc b/vvp/compile.cc index 9a4dcb9fa..505c41d37 100644 --- a/vvp/compile.cc +++ b/vvp/compile.cc @@ -124,6 +124,7 @@ const static struct opcode_table_s opcode_table[] = { { "%cmp/wu", of_CMPWU, 2, {OA_BIT1, OA_BIT2, OA_NONE} }, { "%cmp/x", of_CMPX, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%cmp/z", of_CMPZ, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, + { "%cmpi/s", of_CMPIS, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%cmpi/u", of_CMPIU, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%cvt/ir", of_CVT_IR, 2, {OA_BIT1, OA_BIT2, OA_NONE} }, { "%cvt/ri", of_CVT_RI, 2, {OA_BIT1, OA_BIT2, OA_NONE} }, @@ -164,6 +165,7 @@ const static struct opcode_table_s opcode_table[] = { { "%mod/wr", of_MOD_WR, 2, {OA_BIT1, OA_BIT2, OA_NONE} }, { "%mov", of_MOV, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%mov/wr", of_MOV_WR, 2, {OA_BIT1, OA_BIT2, OA_NONE} }, + { "%movi", of_MOVI, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%mul", of_MUL, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%mul/wr", of_MUL_WR, 2, {OA_BIT1, OA_BIT2, OA_NONE} }, { "%muli", of_MULI, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, diff --git a/vvp/opcodes.txt b/vvp/opcodes.txt index bb2d5d91d..74a64f21b 100644 --- a/vvp/opcodes.txt +++ b/vvp/opcodes.txt @@ -189,6 +189,12 @@ The %cmp/u and %cmp/s differ only in the handling of the lt bit. The compare. In either case, if either operand contains x or z, then lt bit gets the x value. +* %cmpi/s , , +* %cmpi/u , , + +These instructions are similar to the %cmp instructions above, except +that the right hand operand is an immediate value. This is a positive +number that the vector is compared with. * %cmp/wr , diff --git a/vvp/vthread.cc b/vvp/vthread.cc index a7fddbc43..1e8707684 100644 --- a/vvp/vthread.cc +++ b/vvp/vthread.cc @@ -891,6 +891,55 @@ bool of_CMPS(vthread_t thr, vvp_code_t cp) return true; } +bool of_CMPIS(vthread_t thr, vvp_code_t cp) +{ + vvp_bit4_t eq = BIT4_1; + vvp_bit4_t eeq = BIT4_1; + vvp_bit4_t lt = BIT4_0; + + unsigned idx1 = cp->bit_idx[0]; + unsigned imm = cp->bit_idx[1]; + + const unsigned end1 = (idx1 < 4)? idx1 : idx1 + cp->number - 1; + thr_check_addr(thr, end1); + const vvp_bit4_t sig1 = thr_get_bit(thr, end1); + + for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { + vvp_bit4_t lv = thr_get_bit(thr, idx1); + vvp_bit4_t rv = (imm & 1)? BIT4_1 : BIT4_0; + imm >>= 1; + + if (lv > rv) { + lt = BIT4_0; + eeq = BIT4_0; + } else if (lv < rv) { + lt = BIT4_1; + eeq = BIT4_0; + } + if (eq != BIT4_X) { + if ((lv == BIT4_0) && (rv != BIT4_0)) + eq = BIT4_0; + if ((lv == BIT4_1) && (rv != BIT4_1)) + eq = BIT4_0; + if (bit4_is_xz(lv) || bit4_is_xz(rv)) + eq = BIT4_X; + } + + if (idx1 >= 4) idx1 += 1; + } + + if (eq == BIT4_X) + lt = BIT4_X; + else if (sig1 == BIT4_1) + lt = BIT4_1; + + thr_put_bit(thr, 4, eq); + thr_put_bit(thr, 5, lt); + thr_put_bit(thr, 6, eeq); + + return true; +} + bool of_CMPIU(vthread_t thr, vvp_code_t cp) { vvp_bit4_t eq = BIT4_1; @@ -2531,6 +2580,20 @@ bool of_MOV_WR(vthread_t thr, vvp_code_t cp) return true; } +bool of_MOVI(vthread_t thr, vvp_code_t cp) +{ + unsigned dst = cp->bit_idx[0]; + unsigned val = cp->bit_idx[1]; + unsigned wid = cp->number; + + thr_check_addr(thr, dst+wid); + + for (unsigned idx = 0 ; idx < wid ; idx += 1, val >>= 1) + thr->bits4.set_bit(dst+idx, (val&1)? BIT4_1 : BIT4_0); + + return true; +} + bool of_MUL(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4);