tgt-vvp: Replace `%pushi ...; %op` with `%opi`

For sequences where the tgt-vvp backend generates `%pushi` followed by one
of the operations that have an immediate version replace it with the
immediate version. This is slightly more efficient.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
This commit is contained in:
Lars-Peter Clausen 2022-06-05 22:43:33 +02:00
parent 8c56b2d116
commit de9d5e98b1
3 changed files with 8 additions and 16 deletions

View File

@ -1191,8 +1191,7 @@ static void draw_unary_inc_dec(ivl_expr_t sub, bool incr, bool pre)
if (pre) {
/* prefix means we add the result first, and store the
result, as well as leaving a copy on the stack. */
fprintf(vvp_out, " %%pushi/vec4 1, 0, %u;\n", wid);
fprintf(vvp_out, " %s;\n", cmd);
fprintf(vvp_out, " %si 1, 0, %u;\n", cmd, wid);
fprintf(vvp_out, " %%dup/vec4;\n");
fprintf(vvp_out, " %%store/vec4 v%p_0, 0, %u;\n", sig, wid);
@ -1200,8 +1199,7 @@ static void draw_unary_inc_dec(ivl_expr_t sub, bool incr, bool pre)
/* The post-fix decrement returns the non-decremented
version, so there is a slight re-arrange. */
fprintf(vvp_out, " %%dup/vec4;\n");
fprintf(vvp_out, " %%pushi/vec4 1, 0, %u;\n", wid);
fprintf(vvp_out, " %s;\n", cmd);
fprintf(vvp_out, " %si 1, 0, %u;\n", cmd, wid);
fprintf(vvp_out, " %%store/vec4 v%p_0, 0, %u;\n", sig, wid);
}
}
@ -1245,8 +1243,7 @@ static void draw_unary_vec4(ivl_expr_t expr)
case '-':
draw_eval_vec4(sub);
fprintf(vvp_out, " %%inv;\n");
fprintf(vvp_out, " %%pushi/vec4 1, 0, %u;\n", ivl_expr_width(sub));
fprintf(vvp_out, " %%add;\n");
fprintf(vvp_out, " %%addi 1, 0, %u;\n", ivl_expr_width(sub));
break;
case 'A': /* nand (~&) */
@ -1287,13 +1284,11 @@ static void draw_unary_vec4(ivl_expr_t expr)
/* Test if (m) < 0 */
fprintf(vvp_out, " %%dup/vec4;\n");
fprintf(vvp_out, " %%pushi/vec4 0, 0, %u;\n", ivl_expr_width(sub));
fprintf(vvp_out, " %%cmp/s;\n");
fprintf(vvp_out, " %%cmpi/s 0, 0, %u;\n", ivl_expr_width(sub));
fprintf(vvp_out, " %%jmp/0xz T_%u.%u, 5;\n", thread_count, local_count);
/* If so, calculate -(m) */
fprintf(vvp_out, " %%inv;\n");
fprintf(vvp_out, " %%pushi/vec4 1, 0, %u;\n", ivl_expr_width(sub));
fprintf(vvp_out, " %%add;\n");
fprintf(vvp_out, " %%addi 1, 0, %u;\n", ivl_expr_width(sub));
fprintf(vvp_out, "T_%u.%u ;\n", thread_count, local_count);
local_count += 1;
break;

View File

@ -134,8 +134,7 @@ static void get_vec_from_lval_slice(ivl_lval_t lval, struct vec_slice_info*slice
} else {
fprintf(vvp_out, " %%load/vec4 v%p_%lu;\n", sig, use_word);
}
fprintf(vvp_out, " %%pushi/vec4 %lu, 0, 32;\n", part_off);
fprintf(vvp_out, " %%part/u %u;\n", wid);
fprintf(vvp_out, " %%parti/u %u, %lu, 32;\n", wid, part_off);
} else if (ivl_signal_dimensions(sig)==0 && part_off_ex!=0 && word_ix==0) {

View File

@ -164,14 +164,12 @@ int show_stmt_repeat(ivl_statement_t net, ivl_scope_t sscope)
unsigned, then we only need to try to escape if expr==0 as
it will never be <0. */
fprintf(vvp_out, "T_%u.%u %%dup/vec4;\n", thread_count, lab_top);
fprintf(vvp_out, " %%pushi/vec4 0, 0, %u;\n", ivl_expr_width(expr));
fprintf(vvp_out, " %%cmp/%s;\n", sign);
fprintf(vvp_out, " %%cmpi/%s 0, 0, %u;\n", sign, ivl_expr_width(expr));
if (ivl_expr_signed(expr))
fprintf(vvp_out, " %%jmp/1xz T_%u.%u, 5;\n", thread_count, lab_out);
fprintf(vvp_out, " %%jmp/1 T_%u.%u, 4;\n", thread_count, lab_out);
/* This adds -1 (all ones in 2's complement) to the count. */
fprintf(vvp_out, " %%pushi/vec4 1, 0, %u;\n", ivl_expr_width(expr));
fprintf(vvp_out, " %%sub;\n");
fprintf(vvp_out, " %%subi 1, 0, %u;\n", ivl_expr_width(expr));
rc += show_statement(ivl_stmt_sub_stmt(net), sscope);