From b08120e2233cafe64b1f9f4306c7da0bfed3c6c7 Mon Sep 17 00:00:00 2001 From: Martin Whitaker Date: Tue, 1 Feb 2011 22:44:01 +0000 Subject: [PATCH] Patch to improve sign extension efficiency in vvp. Currently the vvp target emits multiple single bit %mov instructions to perform sign extension. This patch adds a new %pad instruction that allows sign extension to be performed with just one instruction. --- tgt-vvp/eval_expr.c | 76 +++++++++++++++++++-------------------------- vvp/codes.h | 1 + vvp/compile.cc | 1 + vvp/opcodes.txt | 8 +++++ vvp/vthread.cc | 18 +++++++++++ 5 files changed, 60 insertions(+), 44 deletions(-) diff --git a/tgt-vvp/eval_expr.c b/tgt-vvp/eval_expr.c index 90709ec6c..9a20299b4 100644 --- a/tgt-vvp/eval_expr.c +++ b/tgt-vvp/eval_expr.c @@ -395,7 +395,7 @@ static struct vector_info draw_eq_immediate(ivl_expr_t expr, unsigned ewid, lv.base = base; lv.wid = ewid; if (ewid > 1) - fprintf(vvp_out, " %%mov %u, 0, %u;\n", base+1, ewid-1); + fprintf(vvp_out, " %%pad %u, 0, %u;\n", base+1, ewid-1); } else if (lv.wid < ewid) { unsigned base = allocate_vector(ewid); @@ -411,7 +411,7 @@ static struct vector_info draw_eq_immediate(ivl_expr_t expr, unsigned ewid, clr_vector(lv); fprintf(vvp_out, " %%mov %u, %u, %u;\n", base, lv.base, lv.wid); - fprintf(vvp_out, " %%mov %u, 0, %u;\n", + fprintf(vvp_out, " %%pad %u, 0, %u;\n", base+lv.wid, ewid-lv.wid); lv.base = base; lv.wid = ewid; @@ -576,7 +576,7 @@ static struct vector_info draw_binary_expr_eq(ivl_expr_t expr, lv.base = base; lv.wid = ewid; if (ewid > 1) - fprintf(vvp_out, " %%mov %u, 0, %u;\n", base+1, ewid-1); + fprintf(vvp_out, " %%pad %u, 0, %u;\n", base+1, ewid-1); } return lv; @@ -659,7 +659,7 @@ static struct vector_info draw_binary_expr_land(ivl_expr_t expr, unsigned wid) clr_vector(lv); lv.base = base; lv.wid = wid; - fprintf(vvp_out, " %%mov %u, 0, %u;\n", base+1, wid-1); + fprintf(vvp_out, " %%pad %u, 0, %u;\n", base+1, wid-1); } return lv; @@ -770,7 +770,7 @@ static struct vector_info draw_binary_expr_lor(ivl_expr_t expr, unsigned wid, if (lv.base >= 8) clr_vector(lv); lv.base = base; lv.wid = wid; - fprintf(vvp_out, " %%mov %u, 0, %u;\n", base+1, wid-1); + fprintf(vvp_out, " %%pad %u, 0, %u;\n", base+1, wid-1); } return lv; @@ -883,7 +883,7 @@ static struct vector_info draw_binary_expr_le_bool(ivl_expr_t expr, tmp.base = base; tmp.wid = wid; if (wid > 1) - fprintf(vvp_out, " %%mov %u, 0, %u;\n", base+1, wid-1); + fprintf(vvp_out, " %%pad %u, 0, %u;\n", base+1, wid-1); } return tmp; @@ -1018,7 +1018,7 @@ static struct vector_info draw_binary_expr_le(ivl_expr_t expr, lv.base = base; lv.wid = wid; if (wid > 1) - fprintf(vvp_out, " %%mov %u, 0, %u;\n", base+1, wid-1); + fprintf(vvp_out, " %%pad %u, 0, %u;\n", base+1, wid-1); } return lv; @@ -1762,16 +1762,15 @@ static struct vector_info draw_concat_expr(ivl_expr_t expr, unsigned wid, /* Pad the expression when needed. */ if (wid > concat_wid) { + unsigned base = res.base+concat_wid; + unsigned count = wid-concat_wid; /* We can get a signed concatenation with $signed({...}). */ if (ivl_expr_signed(expr)) { - unsigned base = res.base+concat_wid-1; - for (idx = 1; idx <= wid-concat_wid; idx += 1) { - fprintf(vvp_out, " %%mov %u, %u, 1;\n", - base+idx, base); - } + fprintf(vvp_out, " %%pad %u, %u, %u;\n", + base, base-1, count); } else { - fprintf(vvp_out, " %%mov %u, 0, %u;\n", - res.base+concat_wid, wid-concat_wid); + fprintf(vvp_out, " %%pad %u, 0, %u;\n", + base, count); } } } else { @@ -1918,19 +1917,19 @@ static struct vector_info draw_number_expr(ivl_expr_t expr, unsigned wid) /* Pad the number up to the expression width. */ if (idx < wid) { if (ivl_expr_signed(expr) && bits[nwid-1] == '1') - fprintf(vvp_out, " %%mov %u, 1, %u;\n", + fprintf(vvp_out, " %%pad %u, 1, %u;\n", res.base+idx, wid-idx); else if (bits[nwid-1] == 'x') - fprintf(vvp_out, " %%mov %u, 2, %u;\n", + fprintf(vvp_out, " %%pad %u, 2, %u;\n", res.base+idx, wid-idx); else if (bits[nwid-1] == 'z') - fprintf(vvp_out, " %%mov %u, 3, %u;\n", + fprintf(vvp_out, " %%pad %u, 3, %u;\n", res.base+idx, wid-idx); else - fprintf(vvp_out, " %%mov %u, 0, %u;\n", + fprintf(vvp_out, " %%pad %u, 0, %u;\n", res.base+idx, wid-idx); } @@ -1948,14 +1947,14 @@ static struct vector_info draw_number_expr(ivl_expr_t expr, unsigned wid) */ static void pad_in_place(struct vector_info dest, unsigned sub_width, int signed_flag) { + unsigned base = dest.base+sub_width; + unsigned count = dest.wid-sub_width; if (signed_flag) { - unsigned idx; - for (idx = sub_width ; idx < dest.wid ; idx += 1) - fprintf(vvp_out, " %%mov %u, %u, 1;\n", - dest.base+idx, dest.base+sub_width-1); + fprintf(vvp_out, " %%pad %u, %u, %u;\n", + base, base-1, count); } else { - fprintf(vvp_out, " %%mov %u, 0, %u;\n", - dest.base+sub_width, dest.wid - sub_width); + fprintf(vvp_out, " %%pad %u, 0, %u;\n", + base, count); } } @@ -2152,7 +2151,7 @@ static struct vector_info draw_string_expr(ivl_expr_t expr, unsigned wid) /* Pad the number up to the expression width. */ if (idx < wid) - fprintf(vvp_out, " %%mov %u, 0, %u;\n", res.base+idx, wid-idx); + fprintf(vvp_out, " %%pad %u, 0, %u;\n", res.base+idx, wid-idx); if (res.base >= 8) save_expression_lookaside(res.base, expr, wid); @@ -2173,21 +2172,18 @@ void pad_expr_in_place(ivl_expr_t expr, struct vector_info res, unsigned swid) if (res.wid <= swid) return; + unsigned base = res.base+swid; + unsigned count = res.wid-swid; if (ivl_expr_signed(expr)) { - unsigned idx; - for (idx = swid ; idx < res.wid ; idx += 1) - fprintf(vvp_out, " %%mov %u, %u, 1;\n", - res.base+idx, res.base+swid-1); - + fprintf(vvp_out, " %%pad %u, %u, %u;\n", + base, base-1, count); } else { - unsigned base = res.base+swid; - unsigned count = res.wid-swid; /* The %movi is faster for larger widths, but for very - small counts, the %mov is faster. */ + small counts, the %pad is faster. */ if (count > 4) fprintf(vvp_out, " %%movi %u, 0, %u;\n", base, count); else - fprintf(vvp_out, " %%mov %u, 0, %u;\n", base, count); + fprintf(vvp_out, " %%pad %u, 0, %u;\n", base, count); } } @@ -2609,16 +2605,8 @@ static struct vector_info draw_select_unsized_literal(ivl_expr_t expr, assert(res.base); fprintf(vvp_out, " %%mov %u, %u, %u; Pad sub-expression to match width\n", res.base, subv.base, subv.wid); - if (ivl_expr_signed(sube)) { - unsigned idx; - for (idx = subv.wid ; idx < res.wid ; idx += 1) { - fprintf(vvp_out, " %%mov %u, %u, 1;\n", - res.base+idx, subv.base+subv.wid-1); - } - } else { - fprintf(vvp_out, " %%mov %u, 0, %u\n", - res.base+subv.wid, wid-subv.wid); - } + + pad_in_place(res, subv.wid, ivl_expr_signed(sube)); subv = res; } diff --git a/vvp/codes.h b/vvp/codes.h index 2d4a86466..32dc25e66 100644 --- a/vvp/codes.h +++ b/vvp/codes.h @@ -141,6 +141,7 @@ extern bool of_NOR(vthread_t thr, vvp_code_t code); extern bool of_NORR(vthread_t thr, vvp_code_t code); extern bool of_OR(vthread_t thr, vvp_code_t code); extern bool of_ORR(vthread_t thr, vvp_code_t code); +extern bool of_PAD(vthread_t thr, vvp_code_t code); extern bool of_POW(vthread_t thr, vvp_code_t code); extern bool of_POW_S(vthread_t thr, vvp_code_t code); extern bool of_POW_WR(vthread_t thr, vvp_code_t code); diff --git a/vvp/compile.cc b/vvp/compile.cc index c913deb33..e7641f47b 100644 --- a/vvp/compile.cc +++ b/vvp/compile.cc @@ -184,6 +184,7 @@ static const struct opcode_table_s opcode_table[] = { { "%nor/r", of_NORR, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%or", of_OR, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%or/r", of_ORR, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, + { "%pad", of_PAD, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%pow", of_POW, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%pow/s", of_POW_S, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%pow/wr", of_POW_WR, 2, {OA_BIT1, OA_BIT2, OA_NONE} }, diff --git a/vvp/opcodes.txt b/vvp/opcodes.txt index 6a55fbd7f..fc3b62106 100644 --- a/vvp/opcodes.txt +++ b/vvp/opcodes.txt @@ -706,6 +706,14 @@ and the is a writable scalar. The gets the value of the or of all the bits of the src vector. +* %pad , , + +This instruction replicates a single bit in register space into a +destination vector in register space. The destination may overlap +the source bit. The may not be 0-3. This is useful for zero +or sign extending a vector. + + * %pow , , * %pow/s , , diff --git a/vvp/vthread.cc b/vvp/vthread.cc index af75ced8a..40d9fb854 100644 --- a/vvp/vthread.cc +++ b/vvp/vthread.cc @@ -3558,6 +3558,24 @@ bool of_MOV(vthread_t thr, vvp_code_t cp) return true; } +bool of_PAD(vthread_t thr, vvp_code_t cp) +{ + assert(cp->bit_idx[0] >= 4); + + vvp_bit4_t pad_bit; + if (cp->bit_idx[1] < 4) + pad_bit = thr_index_to_bit4[cp->bit_idx[1]]; + else + pad_bit = thr->bits4.value(cp->bit_idx[1]); + + thr_check_addr(thr, cp->bit_idx[0]+cp->number-1); + vvp_vector4_t tmp (cp->number, pad_bit); + thr->bits4.set_vec(cp->bit_idx[0], tmp); + return true; + + return true; +} + /* * %mov/wr , */