Add and use %concati/vec4 and %addi instructions.
Also, clean up some warnings, and optimize some existing opcodes.
This commit is contained in:
parent
1612c6d638
commit
301edf69d3
|
|
@ -38,6 +38,66 @@ void resize_vec4_wid(ivl_expr_t expr, unsigned wid)
|
||||||
fprintf(vvp_out, " %%pad/u %u;\n", wid);
|
fprintf(vvp_out, " %%pad/u %u;\n", wid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test if the draw_immediate_vec4 instruction can be used.
|
||||||
|
*/
|
||||||
|
static int test_immediate_vec4_ok(ivl_expr_t re)
|
||||||
|
{
|
||||||
|
const char*bits;
|
||||||
|
unsigned idx;
|
||||||
|
|
||||||
|
if (ivl_expr_type(re) != IVL_EX_NUMBER)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (ivl_expr_width(re) <= 32)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
bits = ivl_expr_bits(re);
|
||||||
|
|
||||||
|
for (idx = 32 ; idx < ivl_expr_width(re) ; idx += 1) {
|
||||||
|
if (bits[idx] != '0')
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void draw_immediate_vec4(ivl_expr_t re, const char*opcode)
|
||||||
|
{
|
||||||
|
unsigned long val0 = 0;
|
||||||
|
unsigned long valx = 0;
|
||||||
|
unsigned wid = ivl_expr_width(re);
|
||||||
|
const char*bits = ivl_expr_bits(re);
|
||||||
|
|
||||||
|
unsigned idx;
|
||||||
|
|
||||||
|
for (idx = 0 ; idx < wid ; idx += 1) {
|
||||||
|
assert( ((val0|valx)&0x80000000UL) == 0UL );
|
||||||
|
val0 <<= 1;
|
||||||
|
valx <<= 1;
|
||||||
|
switch (bits[wid-idx-1]) {
|
||||||
|
case '0':
|
||||||
|
break;
|
||||||
|
case '1':
|
||||||
|
val0 |= 1;
|
||||||
|
break;
|
||||||
|
case 'x':
|
||||||
|
val0 |= 1;
|
||||||
|
valx |= 1;
|
||||||
|
break;
|
||||||
|
case 'z':
|
||||||
|
val0 |= 0;
|
||||||
|
valx |= 1;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(vvp_out, " %s %lu, %lu, %u;\n", opcode, val0, valx, wid);
|
||||||
|
}
|
||||||
|
|
||||||
static void draw_binary_vec4_arith(ivl_expr_t expr)
|
static void draw_binary_vec4_arith(ivl_expr_t expr)
|
||||||
{
|
{
|
||||||
ivl_expr_t le = ivl_expr_oper1(expr);
|
ivl_expr_t le = ivl_expr_oper1(expr);
|
||||||
|
|
@ -58,6 +118,21 @@ static void draw_binary_vec4_arith(ivl_expr_t expr)
|
||||||
if (lwid != ewid) {
|
if (lwid != ewid) {
|
||||||
fprintf(vvp_out, " %%pad/%c %u;\n", ivl_expr_signed(le)? 's' : 'u', ewid);
|
fprintf(vvp_out, " %%pad/%c %u;\n", ivl_expr_signed(le)? 's' : 'u', ewid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Special case: If the re expression can be collected into an
|
||||||
|
immediate operand, and the instruction supports it, then
|
||||||
|
generate an immediate instruction instead of the generic
|
||||||
|
version. */
|
||||||
|
if (rwid==ewid && test_immediate_vec4_ok(re)) {
|
||||||
|
switch (ivl_expr_opcode(expr)) {
|
||||||
|
case '+':
|
||||||
|
draw_immediate_vec4(re, "%addi");
|
||||||
|
return;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
draw_eval_vec4(re);
|
draw_eval_vec4(re);
|
||||||
if (rwid != ewid) {
|
if (rwid != ewid) {
|
||||||
fprintf(vvp_out, " %%pad/%c %u;\n", ivl_expr_signed(re)? 's' : 'u', ewid);
|
fprintf(vvp_out, " %%pad/%c %u;\n", ivl_expr_signed(re)? 's' : 'u', ewid);
|
||||||
|
|
@ -618,40 +693,18 @@ static void draw_binary_vec4(ivl_expr_t expr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void draw_concat_vec4(ivl_expr_t expr)
|
|
||||||
{
|
|
||||||
/* Repeat the concatenation this many times to make a
|
|
||||||
super-concatenation. */
|
|
||||||
unsigned repeat = ivl_expr_repeat(expr);
|
|
||||||
/* This is the number of expressions that go into the
|
|
||||||
concatenation. */
|
|
||||||
unsigned num_sube = ivl_expr_parms(expr);
|
|
||||||
unsigned sub_idx;
|
|
||||||
|
|
||||||
assert(num_sube > 0);
|
|
||||||
|
|
||||||
/* Start with the least-significant bits. */
|
|
||||||
draw_eval_vec4(ivl_expr_parm(expr, 0));
|
|
||||||
|
|
||||||
for (sub_idx = 1 ; sub_idx < num_sube ; sub_idx += 1) {
|
|
||||||
/* Concatenate progressively higher parts. */
|
|
||||||
draw_eval_vec4(ivl_expr_parm(expr, sub_idx));
|
|
||||||
fprintf(vvp_out, " %%concat/vec4;\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (repeat > 1) {
|
|
||||||
fprintf(vvp_out, " %%replicate %u;\n", repeat);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Push a number into the vec4 stack using %pushi/vec4
|
* This handles two special cases:
|
||||||
* instructions. The %pushi/vec4 instruction can only handle up to 32
|
* 1) Making a large IVL_EX_NUMBER as an immediate value. In this
|
||||||
* non-zero bits, so if there are more than that, then generate
|
* case, start with a %pushi/vec4 to get the stack started, then
|
||||||
* multiple %pushi/vec4 statements, and use %concat/vec4 statements to
|
* continue with %concati/vec4 instructions to build that number
|
||||||
* concatenate the vectors into the desired result.
|
* up.
|
||||||
|
*
|
||||||
|
* 2) Concatenating a large IVL_EX_NUMBER to the current top of the
|
||||||
|
* stack. In this case, start with %concati/vec4 and continue
|
||||||
|
* generating %concati/vec4 instructions to finish up the large number.
|
||||||
*/
|
*/
|
||||||
static void draw_number_vec4(ivl_expr_t expr)
|
static void draw_concat_number_vec4(ivl_expr_t expr, int as_concati)
|
||||||
{
|
{
|
||||||
unsigned long val0 = 0;
|
unsigned long val0 = 0;
|
||||||
unsigned long valx = 0;
|
unsigned long valx = 0;
|
||||||
|
|
@ -660,7 +713,7 @@ static void draw_number_vec4(ivl_expr_t expr)
|
||||||
|
|
||||||
unsigned idx;
|
unsigned idx;
|
||||||
int accum = 0;
|
int accum = 0;
|
||||||
int count_pushi = 0;
|
int count_pushi = as_concati? 1 : 0;
|
||||||
|
|
||||||
/* Scan the literal bits, MSB first. */
|
/* Scan the literal bits, MSB first. */
|
||||||
for (idx = 0 ; idx < wid ; idx += 1) {
|
for (idx = 0 ; idx < wid ; idx += 1) {
|
||||||
|
|
@ -693,27 +746,82 @@ static void draw_number_vec4(ivl_expr_t expr)
|
||||||
then write it out, generate a %concat/vec4, and set
|
then write it out, generate a %concat/vec4, and set
|
||||||
up to handle more bits. */
|
up to handle more bits. */
|
||||||
if ( (val0|valx) & 0x80000000UL ) {
|
if ( (val0|valx) & 0x80000000UL ) {
|
||||||
fprintf(vvp_out, " %%pushi/vec4 %lu, %lu, %d;\n", val0, valx, accum);
|
if (count_pushi) {
|
||||||
|
fprintf(vvp_out, " %%concati/vec4 %lu, %lu, %d;\n",
|
||||||
|
val0, valx, accum);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
fprintf(vvp_out, " %%pushi/vec4 %lu, %lu, %d;\n",
|
||||||
|
val0, valx, accum);
|
||||||
|
}
|
||||||
|
|
||||||
accum = 0;
|
accum = 0;
|
||||||
val0 = 0;
|
val0 = 0;
|
||||||
valx = 0;
|
valx = 0;
|
||||||
/* If there is already at least 1 pushi, then
|
|
||||||
concatenate this result to what we've done
|
|
||||||
already. */
|
|
||||||
if (count_pushi)
|
|
||||||
fprintf(vvp_out, " %%concat/vec4;\n");
|
|
||||||
count_pushi += 1;
|
count_pushi += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (accum) {
|
if (accum) {
|
||||||
fprintf(vvp_out, " %%pushi/vec4 %lu, %lu, %u;\n", val0, valx, accum);
|
if (count_pushi) {
|
||||||
if (count_pushi)
|
fprintf(vvp_out, " %%concati/vec4 %lu, %lu, %u;\n",
|
||||||
fprintf(vvp_out, " %%concat/vec4;\n");
|
val0, valx, accum);
|
||||||
count_pushi += 1;
|
} else {
|
||||||
|
fprintf(vvp_out, " %%pushi/vec4 %lu, %lu, %u;\n",
|
||||||
|
val0, valx, accum);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void draw_concat_vec4(ivl_expr_t expr)
|
||||||
|
{
|
||||||
|
/* Repeat the concatenation this many times to make a
|
||||||
|
super-concatenation. */
|
||||||
|
unsigned repeat = ivl_expr_repeat(expr);
|
||||||
|
/* This is the number of expressions that go into the
|
||||||
|
concatenation. */
|
||||||
|
unsigned num_sube = ivl_expr_parms(expr);
|
||||||
|
unsigned sub_idx;
|
||||||
|
|
||||||
|
assert(num_sube > 0);
|
||||||
|
|
||||||
|
/* Start with the most-significant bits. */
|
||||||
|
draw_eval_vec4(ivl_expr_parm(expr, 0));
|
||||||
|
|
||||||
|
for (sub_idx = 1 ; sub_idx < num_sube ; sub_idx += 1) {
|
||||||
|
/* Concatenate progressively lower parts. */
|
||||||
|
ivl_expr_t sube = ivl_expr_parm(expr, sub_idx);
|
||||||
|
|
||||||
|
/* Special case: The next expression is a NUMBER that
|
||||||
|
can be concatenated using %concati/vec4
|
||||||
|
instructions. */
|
||||||
|
if (ivl_expr_type(sube) == IVL_EX_NUMBER) {
|
||||||
|
draw_concat_number_vec4(sube, 1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
draw_eval_vec4(sube);
|
||||||
|
fprintf(vvp_out, " %%concat/vec4; draw_concat_vec4\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (repeat > 1) {
|
||||||
|
fprintf(vvp_out, " %%replicate %u;\n", repeat);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Push a number into the vec4 stack using %pushi/vec4
|
||||||
|
* instructions. The %pushi/vec4 instruction can only handle up to 32
|
||||||
|
* non-zero bits, so if there are more than that, then generate
|
||||||
|
* multiple %pushi/vec4 statements, and use %concat/vec4 statements to
|
||||||
|
* concatenate the vectors into the desired result.
|
||||||
|
*/
|
||||||
|
static void draw_number_vec4(ivl_expr_t expr)
|
||||||
|
{
|
||||||
|
draw_concat_number_vec4(expr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
static void draw_property_vec4(ivl_expr_t expr)
|
static void draw_property_vec4(ivl_expr_t expr)
|
||||||
{
|
{
|
||||||
ivl_signal_t sig = ivl_expr_signal(expr);
|
ivl_signal_t sig = ivl_expr_signal(expr);
|
||||||
|
|
@ -873,20 +981,20 @@ static void draw_string_vec4(ivl_expr_t expr)
|
||||||
p += 1;
|
p += 1;
|
||||||
tmp_wid += 8;
|
tmp_wid += 8;
|
||||||
if (tmp_wid == 32) {
|
if (tmp_wid == 32) {
|
||||||
fprintf(vvp_out, " %%pushi/vec4 %lu, 0, 32;\n", tmp);
|
fprintf(vvp_out, " %%pushi/vec4 %lu, 0, 32; draw_string_vec4\n", tmp);
|
||||||
tmp = 0;
|
tmp = 0;
|
||||||
tmp_wid = 0;
|
tmp_wid = 0;
|
||||||
if (push_flag == 0)
|
if (push_flag == 0)
|
||||||
push_flag += 1;
|
push_flag += 1;
|
||||||
else
|
else
|
||||||
fprintf(vvp_out, " %%concat/vec4;\n");
|
fprintf(vvp_out, " %%concat/vec4; draw_string_vec4\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tmp_wid > 0) {
|
if (tmp_wid > 0) {
|
||||||
fprintf(vvp_out, " %%pushi/vec4 %lu, 0, %u;\n", tmp, tmp_wid);
|
fprintf(vvp_out, " %%pushi/vec4 %lu, 0, %u; draw_string_vec4\n", tmp, tmp_wid);
|
||||||
if (push_flag != 0)
|
if (push_flag != 0)
|
||||||
fprintf(vvp_out, " %%concat/vec4;\n");
|
fprintf(vvp_out, " %%concat/vec4; draw_string_vec4\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
free(fp);
|
free(fp);
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ typedef bool (*vvp_code_fun)(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_ABS_WR(vthread_t thr, vvp_code_t code);
|
extern bool of_ABS_WR(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_ADD(vthread_t thr, vvp_code_t code);
|
extern bool of_ADD(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_ADD_WR(vthread_t thr, vvp_code_t code);
|
extern bool of_ADD_WR(vthread_t thr, vvp_code_t code);
|
||||||
|
extern bool of_ADDI(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_ALLOC(vthread_t thr, vvp_code_t code);
|
extern bool of_ALLOC(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_AND(vthread_t thr, vvp_code_t code);
|
extern bool of_AND(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_ANDR(vthread_t thr, vvp_code_t code);
|
extern bool of_ANDR(vthread_t thr, vvp_code_t code);
|
||||||
|
|
@ -71,6 +72,7 @@ extern bool of_CMPZ(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_CONCAT_STR(vthread_t thr, vvp_code_t code);
|
extern bool of_CONCAT_STR(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_CONCATI_STR(vthread_t thr, vvp_code_t code);
|
extern bool of_CONCATI_STR(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_CONCAT_VEC4(vthread_t thr, vvp_code_t code);
|
extern bool of_CONCAT_VEC4(vthread_t thr, vvp_code_t code);
|
||||||
|
extern bool of_CONCATI_VEC4(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_CVT_RS(vthread_t thr, vvp_code_t code);
|
extern bool of_CVT_RS(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_CVT_RU(vthread_t thr, vvp_code_t code);
|
extern bool of_CVT_RU(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_CVT_RV(vthread_t thr, vvp_code_t code);
|
extern bool of_CVT_RV(vthread_t thr, vvp_code_t code);
|
||||||
|
|
|
||||||
|
|
@ -87,6 +87,7 @@ static const struct opcode_table_s opcode_table[] = {
|
||||||
{ "%abs/wr", of_ABS_WR, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
{ "%abs/wr", of_ABS_WR, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||||
{ "%add", of_ADD, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
{ "%add", of_ADD, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||||
{ "%add/wr", of_ADD_WR, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
{ "%add/wr", of_ADD_WR, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||||
|
{ "%addi", of_ADDI, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
|
||||||
{ "%alloc", of_ALLOC, 1, {OA_VPI_PTR, OA_NONE, OA_NONE} },
|
{ "%alloc", of_ALLOC, 1, {OA_VPI_PTR, OA_NONE, OA_NONE} },
|
||||||
{ "%and", of_AND, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
{ "%and", of_AND, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||||
{ "%and/r", of_ANDR, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
{ "%and/r", of_ANDR, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||||
|
|
@ -119,9 +120,10 @@ static const struct opcode_table_s opcode_table[] = {
|
||||||
{ "%cmp/wu", of_CMPWU, 2, {OA_BIT1, OA_BIT2, OA_NONE} },
|
{ "%cmp/wu", of_CMPWU, 2, {OA_BIT1, OA_BIT2, OA_NONE} },
|
||||||
{ "%cmp/x", of_CMPX, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
{ "%cmp/x", of_CMPX, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||||
{ "%cmp/z", of_CMPZ, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
{ "%cmp/z", of_CMPZ, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||||
{ "%concat/str", of_CONCAT_STR, 0,{OA_NONE, OA_NONE, OA_NONE} },
|
{ "%concat/str", of_CONCAT_STR, 0,{OA_NONE, OA_NONE, OA_NONE} },
|
||||||
{ "%concat/vec4",of_CONCAT_VEC4,0,{OA_NONE, OA_NONE, OA_NONE} },
|
{ "%concat/vec4", of_CONCAT_VEC4, 0,{OA_NONE, OA_NONE, OA_NONE} },
|
||||||
{ "%concati/str",of_CONCATI_STR,1,{OA_STRING,OA_NONE, OA_NONE} },
|
{ "%concati/str", of_CONCATI_STR, 1,{OA_STRING,OA_NONE, OA_NONE} },
|
||||||
|
{ "%concati/vec4",of_CONCATI_VEC4,3,{OA_BIT1, OA_BIT2, OA_NUMBER} },
|
||||||
{ "%cvt/rs", of_CVT_RS, 1, {OA_BIT1, OA_NONE, OA_NONE} },
|
{ "%cvt/rs", of_CVT_RS, 1, {OA_BIT1, OA_NONE, OA_NONE} },
|
||||||
{ "%cvt/ru", of_CVT_RU, 1, {OA_BIT1, OA_NONE, OA_NONE} },
|
{ "%cvt/ru", of_CVT_RU, 1, {OA_BIT1, OA_NONE, OA_NONE} },
|
||||||
{ "%cvt/rv", of_CVT_RV, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
{ "%cvt/rv", of_CVT_RV, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||||
|
|
|
||||||
|
|
@ -54,11 +54,15 @@ sum.
|
||||||
See also the %sub instruction.
|
See also the %sub instruction.
|
||||||
|
|
||||||
* %add
|
* %add
|
||||||
|
* %addi <vala>, <valb>, <wid>
|
||||||
|
|
||||||
This opcode pops and adds two vec4 values from the vec4 stack, adds
|
This opcode pops and adds two vec4 values from the vec4 stack, adds
|
||||||
them, and pushes the result back to the stack. The input values must
|
them, and pushes the result back to the stack. The input values must
|
||||||
have the same size, and the pushed result will have the same width.
|
have the same size, and the pushed result will have the same width.
|
||||||
|
|
||||||
|
The %addi variant takes one operand from the stack, the other is an
|
||||||
|
immediate value (See %pushi/vec4).
|
||||||
|
|
||||||
See also the %sub instruction.
|
See also the %sub instruction.
|
||||||
|
|
||||||
* %add/wr <bit-l>, <bit-r>
|
* %add/wr <bit-l>, <bit-r>
|
||||||
|
|
@ -319,12 +323,20 @@ of it as passing the tail, then the head, concatenating them, and
|
||||||
pushing the result. The stack starts with two strings in the stack,
|
pushing the result. The stack starts with two strings in the stack,
|
||||||
and ends with one string in the stack.
|
and ends with one string in the stack.
|
||||||
|
|
||||||
|
The %concati/str form pops only one value from the stack. The right
|
||||||
|
part comes from the immediate value.
|
||||||
|
|
||||||
* %concat/vec4
|
* %concat/vec4
|
||||||
|
* %concati/vec4 <vala>, <valb>, <wid>
|
||||||
|
|
||||||
Pop two vec4 vectors, concatenate them, and push the combined
|
Pop two vec4 vectors, concatenate them, and push the combined
|
||||||
result. The top of the vec4 stack is the LSB of the result, and the
|
result. The top of the vec4 stack is the LSB of the result, and the
|
||||||
next in this stack is the MSB bits of the result.
|
next in this stack is the MSB bits of the result.
|
||||||
|
|
||||||
|
The %concati/vec4 form takes an immediate value and appends it (lsb)
|
||||||
|
to the value on the top of the stack. See the %pushi/vec4 instruction
|
||||||
|
for how to describe the immediate value.
|
||||||
|
|
||||||
* %cvt/sr <bit-l>
|
* %cvt/sr <bit-l>
|
||||||
* %cvt/rs <bit-l>
|
* %cvt/rs <bit-l>
|
||||||
|
|
||||||
|
|
|
||||||
176
vvp/vthread.cc
176
vvp/vthread.cc
|
|
@ -334,7 +334,7 @@ static inline void thr_put_bit(struct vthread_s*thr,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
vvp_bit4_t vthread_get_bit(struct vthread_s*thr, unsigned addr)
|
vvp_bit4_t vthread_get_bit(struct vthread_s* /*thr*/, unsigned addr)
|
||||||
{
|
{
|
||||||
#if 0
|
#if 0
|
||||||
if (vpi_mode_flag == VPI_MODE_COMPILETF) return BIT4_X;
|
if (vpi_mode_flag == VPI_MODE_COMPILETF) return BIT4_X;
|
||||||
|
|
@ -345,7 +345,7 @@ vvp_bit4_t vthread_get_bit(struct vthread_s*thr, unsigned addr)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void vthread_put_bit(struct vthread_s*thr, unsigned addr, vvp_bit4_t bit)
|
void vthread_put_bit(struct vthread_s* /*thr*/, unsigned addr, vvp_bit4_t bit)
|
||||||
{
|
{
|
||||||
#if 0
|
#if 0
|
||||||
thr_put_bit(thr, addr, bit);
|
thr_put_bit(thr, addr, bit);
|
||||||
|
|
@ -900,24 +900,47 @@ bool of_AND(vthread_t thr, vvp_code_t)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
static void get_immediate_rval(vvp_code_t cp, vvp_vector4_t&val)
|
||||||
* %add
|
|
||||||
*
|
|
||||||
* Pop r,
|
|
||||||
* Pop l,
|
|
||||||
* Push l+r
|
|
||||||
*
|
|
||||||
* Pop 2 and push 1 is the same as pop 1 and replace the remaining top
|
|
||||||
* of the stack with a new value. That is what we will do.
|
|
||||||
*/
|
|
||||||
bool of_ADD(vthread_t thr, vvp_code_t)
|
|
||||||
{
|
{
|
||||||
vvp_vector4_t r = thr->pop_vec4();
|
uint32_t vala = cp->bit_idx[0];
|
||||||
// Rather then pop l, use it directly from the stack. When we
|
uint32_t valb = cp->bit_idx[1];
|
||||||
// assign to 'l', that will edit the top of the stack, which
|
unsigned wid = cp->number;
|
||||||
// replaces a pop and a pull.
|
|
||||||
vvp_vector4_t&l = thr->peek_vec4();
|
|
||||||
|
|
||||||
|
// The immediate value can be values bigger then 32 bits, but
|
||||||
|
// only if the high bits are zero. So at most we need to run
|
||||||
|
// through the loop below 32 times. Maybe less, if the target
|
||||||
|
// width is less. We don't have to do anything special on that
|
||||||
|
// because vala/valb bits will shift away so (vala|valb) will
|
||||||
|
// turn to zero at or before 32 shifts.
|
||||||
|
|
||||||
|
for (unsigned idx = 0 ; idx < wid && (vala|valb) ; idx += 1) {
|
||||||
|
uint32_t ba = 0;
|
||||||
|
// Convert the vala/valb bits to a ba number that can be
|
||||||
|
// used to select what goes into the value.
|
||||||
|
ba = (valb & 1) << 1;
|
||||||
|
ba |= vala & 1;
|
||||||
|
|
||||||
|
switch (ba) {
|
||||||
|
case 1:
|
||||||
|
val.set_bit(idx, BIT4_1);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
val.set_bit(idx, BIT4_Z);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
val.set_bit(idx, BIT4_X);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
vala >>= 1;
|
||||||
|
valb >>= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool do_ADD(vvp_vector4_t&l, const vvp_vector4_t&r)
|
||||||
|
{
|
||||||
unsigned wid = l.size();
|
unsigned wid = l.size();
|
||||||
assert(wid == r.size());
|
assert(wid == r.size());
|
||||||
|
|
||||||
|
|
@ -946,6 +969,48 @@ bool of_ADD(vthread_t thr, vvp_code_t)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* %add
|
||||||
|
*
|
||||||
|
* Pop r,
|
||||||
|
* Pop l,
|
||||||
|
* Push l+r
|
||||||
|
*
|
||||||
|
* Pop 2 and push 1 is the same as pop 1 and replace the remaining top
|
||||||
|
* of the stack with a new value. That is what we will do.
|
||||||
|
*/
|
||||||
|
bool of_ADD(vthread_t thr, vvp_code_t)
|
||||||
|
{
|
||||||
|
vvp_vector4_t r = thr->pop_vec4();
|
||||||
|
// Rather then pop l, use it directly from the stack. When we
|
||||||
|
// assign to 'l', that will edit the top of the stack, which
|
||||||
|
// replaces a pop and a pull.
|
||||||
|
vvp_vector4_t&l = thr->peek_vec4();
|
||||||
|
|
||||||
|
return do_ADD(l, r);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* %addi <vala>, <valb>, <wid>
|
||||||
|
*
|
||||||
|
* Pop1 operand, get the other operand from the arguments, and push
|
||||||
|
* the result.
|
||||||
|
*/
|
||||||
|
bool of_ADDI(vthread_t thr, vvp_code_t cp)
|
||||||
|
{
|
||||||
|
unsigned wid = cp->number;
|
||||||
|
|
||||||
|
vvp_vector4_t&l = thr->peek_vec4();
|
||||||
|
|
||||||
|
// I expect that most of the bits of an immediate value are
|
||||||
|
// going to be zero, so start the result vector with all zero
|
||||||
|
// bits. Then we only need to replace the bits that are different.
|
||||||
|
vvp_vector4_t r (wid, BIT4_0);
|
||||||
|
get_immediate_rval (cp, r);
|
||||||
|
|
||||||
|
return do_ADD(l, r);
|
||||||
|
}
|
||||||
|
|
||||||
bool of_ADD_WR(vthread_t thr, vvp_code_t)
|
bool of_ADD_WR(vthread_t thr, vvp_code_t)
|
||||||
{
|
{
|
||||||
double r = thr->pop_real();
|
double r = thr->pop_real();
|
||||||
|
|
@ -1057,7 +1122,7 @@ bool of_ASSIGN_VEC4_A_D(vthread_t thr, vvp_code_t cp)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
int use_off = -off;
|
int use_off = -off;
|
||||||
assert(wid > use_off);
|
assert(wid > (unsigned)use_off);
|
||||||
unsigned use_wid = wid - use_off;
|
unsigned use_wid = wid - use_off;
|
||||||
val = val.subvalue(use_off, use_wid);
|
val = val.subvalue(use_off, use_wid);
|
||||||
off = 0;
|
off = 0;
|
||||||
|
|
@ -1149,7 +1214,7 @@ bool of_ASSIGN_VEC4_OFF_D(vthread_t thr, vvp_code_t cp)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
int use_off = -off;
|
int use_off = -off;
|
||||||
assert(wid > use_off);
|
assert(wid > (unsigned)use_off);
|
||||||
unsigned use_wid = wid - use_off;
|
unsigned use_wid = wid - use_off;
|
||||||
val = val.subvalue(use_off, use_wid);
|
val = val.subvalue(use_off, use_wid);
|
||||||
off = 0;
|
off = 0;
|
||||||
|
|
@ -1488,20 +1553,20 @@ bool of_CASSIGN_WR(vthread_t thr, vvp_code_t cp)
|
||||||
*/
|
*/
|
||||||
bool of_CAST2(vthread_t thr, vvp_code_t)
|
bool of_CAST2(vthread_t thr, vvp_code_t)
|
||||||
{
|
{
|
||||||
vvp_vector4_t val = thr->pop_vec4();
|
vvp_vector4_t&val = thr->peek_vec4();
|
||||||
unsigned wid = val.size();
|
unsigned wid = val.size();
|
||||||
|
|
||||||
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
|
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
|
||||||
switch (val.value(idx)) {
|
switch (val.value(idx)) {
|
||||||
|
case BIT4_0:
|
||||||
case BIT4_1:
|
case BIT4_1:
|
||||||
val.set_bit(idx, BIT4_1);
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
val.set_bit(idx, BIT4_0);
|
val.set_bit(idx, BIT4_0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
thr->push_vec4(val);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1818,6 +1883,65 @@ bool of_CONCAT_VEC4(vthread_t thr, vvp_code_t)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* %concati/vec4 <vala>, <valb>, <wid>
|
||||||
|
*
|
||||||
|
* Concat the immediate value to the LOW bits of the concatenation.
|
||||||
|
* Get the HIGH bits from the top of the vec4 stack.
|
||||||
|
*/
|
||||||
|
bool of_CONCATI_VEC4(vthread_t thr, vvp_code_t cp)
|
||||||
|
{
|
||||||
|
uint32_t vala = cp->bit_idx[0];
|
||||||
|
uint32_t valb = cp->bit_idx[1];
|
||||||
|
unsigned wid = cp->number;
|
||||||
|
|
||||||
|
vvp_vector4_t&msb = thr->peek_vec4();
|
||||||
|
|
||||||
|
// I expect that most of the bits of an immediate value are
|
||||||
|
// going to be zero, so start the result vector with all zero
|
||||||
|
// bits. Then we only need to replace the bits that are different.
|
||||||
|
vvp_vector4_t lsb (wid, BIT4_0);
|
||||||
|
|
||||||
|
// The %concati/vec4 can create values bigger then 32 bits, but
|
||||||
|
// only if the high bits are zero. So at most we need to run
|
||||||
|
// through the loop below 32 times. Maybe less, if the target
|
||||||
|
// width is less. We don't have to do anything special on that
|
||||||
|
// because vala/valb bits will shift away so (vala|valb) will
|
||||||
|
// turn to zero at or before 32 shifts.
|
||||||
|
|
||||||
|
for (unsigned idx = 0 ; idx < wid && (vala|valb) ; idx += 1) {
|
||||||
|
uint32_t ba = 0;
|
||||||
|
// Convert the vala/valb bits to a ba number that can be
|
||||||
|
// used to select what goes into the value.
|
||||||
|
ba = (valb & 1) << 1;
|
||||||
|
ba |= vala & 1;
|
||||||
|
|
||||||
|
switch (ba) {
|
||||||
|
case 1:
|
||||||
|
lsb.set_bit(idx, BIT4_1);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
lsb.set_bit(idx, BIT4_Z);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
lsb.set_bit(idx, BIT4_X);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
vala >>= 1;
|
||||||
|
valb >>= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
vvp_vector4_t res (msb.size()+lsb.size(), BIT4_X);
|
||||||
|
res.set_vec(0, lsb);
|
||||||
|
res.set_vec(lsb.size(), msb);
|
||||||
|
|
||||||
|
msb = res;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool of_CVT_RS(vthread_t thr, vvp_code_t cp)
|
bool of_CVT_RS(vthread_t thr, vvp_code_t cp)
|
||||||
{
|
{
|
||||||
int64_t r = thr->words[cp->bit_idx[0]].w_int;
|
int64_t r = thr->words[cp->bit_idx[0]].w_int;
|
||||||
|
|
@ -4136,12 +4260,14 @@ bool of_XNORR(vthread_t thr, vvp_code_t)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* %or
|
||||||
|
*/
|
||||||
bool of_OR(vthread_t thr, vvp_code_t)
|
bool of_OR(vthread_t thr, vvp_code_t)
|
||||||
{
|
{
|
||||||
vvp_vector4_t vala = thr->pop_vec4();
|
|
||||||
vvp_vector4_t valb = thr->pop_vec4();
|
vvp_vector4_t valb = thr->pop_vec4();
|
||||||
|
vvp_vector4_t&vala = thr->peek_vec4();
|
||||||
vala |= valb;
|
vala |= valb;
|
||||||
thr->push_vec4(vala);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue