Add and use %concati/vec4 and %addi instructions.
Also, clean up some warnings, and optimize some existing opcodes.
This commit is contained in:
parent
1612c6d638
commit
301edf69d3
|
|
@ -38,6 +38,66 @@ void resize_vec4_wid(ivl_expr_t expr, unsigned wid)
|
|||
fprintf(vvp_out, " %%pad/u %u;\n", wid);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test if the draw_immediate_vec4 instruction can be used.
|
||||
*/
|
||||
static int test_immediate_vec4_ok(ivl_expr_t re)
|
||||
{
|
||||
const char*bits;
|
||||
unsigned idx;
|
||||
|
||||
if (ivl_expr_type(re) != IVL_EX_NUMBER)
|
||||
return 0;
|
||||
|
||||
if (ivl_expr_width(re) <= 32)
|
||||
return 1;
|
||||
|
||||
bits = ivl_expr_bits(re);
|
||||
|
||||
for (idx = 32 ; idx < ivl_expr_width(re) ; idx += 1) {
|
||||
if (bits[idx] != '0')
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void draw_immediate_vec4(ivl_expr_t re, const char*opcode)
|
||||
{
|
||||
unsigned long val0 = 0;
|
||||
unsigned long valx = 0;
|
||||
unsigned wid = ivl_expr_width(re);
|
||||
const char*bits = ivl_expr_bits(re);
|
||||
|
||||
unsigned idx;
|
||||
|
||||
for (idx = 0 ; idx < wid ; idx += 1) {
|
||||
assert( ((val0|valx)&0x80000000UL) == 0UL );
|
||||
val0 <<= 1;
|
||||
valx <<= 1;
|
||||
switch (bits[wid-idx-1]) {
|
||||
case '0':
|
||||
break;
|
||||
case '1':
|
||||
val0 |= 1;
|
||||
break;
|
||||
case 'x':
|
||||
val0 |= 1;
|
||||
valx |= 1;
|
||||
break;
|
||||
case 'z':
|
||||
val0 |= 0;
|
||||
valx |= 1;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(vvp_out, " %s %lu, %lu, %u;\n", opcode, val0, valx, wid);
|
||||
}
|
||||
|
||||
static void draw_binary_vec4_arith(ivl_expr_t expr)
|
||||
{
|
||||
ivl_expr_t le = ivl_expr_oper1(expr);
|
||||
|
|
@ -58,6 +118,21 @@ static void draw_binary_vec4_arith(ivl_expr_t expr)
|
|||
if (lwid != ewid) {
|
||||
fprintf(vvp_out, " %%pad/%c %u;\n", ivl_expr_signed(le)? 's' : 'u', ewid);
|
||||
}
|
||||
|
||||
/* Special case: If the re expression can be collected into an
|
||||
immediate operand, and the instruction supports it, then
|
||||
generate an immediate instruction instead of the generic
|
||||
version. */
|
||||
if (rwid==ewid && test_immediate_vec4_ok(re)) {
|
||||
switch (ivl_expr_opcode(expr)) {
|
||||
case '+':
|
||||
draw_immediate_vec4(re, "%addi");
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
draw_eval_vec4(re);
|
||||
if (rwid != ewid) {
|
||||
fprintf(vvp_out, " %%pad/%c %u;\n", ivl_expr_signed(re)? 's' : 'u', ewid);
|
||||
|
|
@ -618,40 +693,18 @@ static void draw_binary_vec4(ivl_expr_t expr)
|
|||
}
|
||||
}
|
||||
|
||||
static void draw_concat_vec4(ivl_expr_t expr)
|
||||
{
|
||||
/* Repeat the concatenation this many times to make a
|
||||
super-concatenation. */
|
||||
unsigned repeat = ivl_expr_repeat(expr);
|
||||
/* This is the number of expressions that go into the
|
||||
concatenation. */
|
||||
unsigned num_sube = ivl_expr_parms(expr);
|
||||
unsigned sub_idx;
|
||||
|
||||
assert(num_sube > 0);
|
||||
|
||||
/* Start with the least-significant bits. */
|
||||
draw_eval_vec4(ivl_expr_parm(expr, 0));
|
||||
|
||||
for (sub_idx = 1 ; sub_idx < num_sube ; sub_idx += 1) {
|
||||
/* Concatenate progressively higher parts. */
|
||||
draw_eval_vec4(ivl_expr_parm(expr, sub_idx));
|
||||
fprintf(vvp_out, " %%concat/vec4;\n");
|
||||
}
|
||||
|
||||
if (repeat > 1) {
|
||||
fprintf(vvp_out, " %%replicate %u;\n", repeat);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Push a number into the vec4 stack using %pushi/vec4
|
||||
* instructions. The %pushi/vec4 instruction can only handle up to 32
|
||||
* non-zero bits, so if there are more than that, then generate
|
||||
* multiple %pushi/vec4 statements, and use %concat/vec4 statements to
|
||||
* concatenate the vectors into the desired result.
|
||||
* This handles two special cases:
|
||||
* 1) Making a large IVL_EX_NUMBER as an immediate value. In this
|
||||
* case, start with a %pushi/vec4 to get the stack started, then
|
||||
* continue with %concati/vec4 instructions to build that number
|
||||
* up.
|
||||
*
|
||||
* 2) Concatenating a large IVL_EX_NUMBER to the current top of the
|
||||
* stack. In this case, start with %concati/vec4 and continue
|
||||
* generating %concati/vec4 instructions to finish up the large number.
|
||||
*/
|
||||
static void draw_number_vec4(ivl_expr_t expr)
|
||||
static void draw_concat_number_vec4(ivl_expr_t expr, int as_concati)
|
||||
{
|
||||
unsigned long val0 = 0;
|
||||
unsigned long valx = 0;
|
||||
|
|
@ -660,7 +713,7 @@ static void draw_number_vec4(ivl_expr_t expr)
|
|||
|
||||
unsigned idx;
|
||||
int accum = 0;
|
||||
int count_pushi = 0;
|
||||
int count_pushi = as_concati? 1 : 0;
|
||||
|
||||
/* Scan the literal bits, MSB first. */
|
||||
for (idx = 0 ; idx < wid ; idx += 1) {
|
||||
|
|
@ -693,27 +746,82 @@ static void draw_number_vec4(ivl_expr_t expr)
|
|||
then write it out, generate a %concat/vec4, and set
|
||||
up to handle more bits. */
|
||||
if ( (val0|valx) & 0x80000000UL ) {
|
||||
fprintf(vvp_out, " %%pushi/vec4 %lu, %lu, %d;\n", val0, valx, accum);
|
||||
if (count_pushi) {
|
||||
fprintf(vvp_out, " %%concati/vec4 %lu, %lu, %d;\n",
|
||||
val0, valx, accum);
|
||||
|
||||
} else {
|
||||
fprintf(vvp_out, " %%pushi/vec4 %lu, %lu, %d;\n",
|
||||
val0, valx, accum);
|
||||
}
|
||||
|
||||
accum = 0;
|
||||
val0 = 0;
|
||||
valx = 0;
|
||||
/* If there is already at least 1 pushi, then
|
||||
concatenate this result to what we've done
|
||||
already. */
|
||||
if (count_pushi)
|
||||
fprintf(vvp_out, " %%concat/vec4;\n");
|
||||
|
||||
count_pushi += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (accum) {
|
||||
fprintf(vvp_out, " %%pushi/vec4 %lu, %lu, %u;\n", val0, valx, accum);
|
||||
if (count_pushi)
|
||||
fprintf(vvp_out, " %%concat/vec4;\n");
|
||||
count_pushi += 1;
|
||||
if (count_pushi) {
|
||||
fprintf(vvp_out, " %%concati/vec4 %lu, %lu, %u;\n",
|
||||
val0, valx, accum);
|
||||
} else {
|
||||
fprintf(vvp_out, " %%pushi/vec4 %lu, %lu, %u;\n",
|
||||
val0, valx, accum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void draw_concat_vec4(ivl_expr_t expr)
|
||||
{
|
||||
/* Repeat the concatenation this many times to make a
|
||||
super-concatenation. */
|
||||
unsigned repeat = ivl_expr_repeat(expr);
|
||||
/* This is the number of expressions that go into the
|
||||
concatenation. */
|
||||
unsigned num_sube = ivl_expr_parms(expr);
|
||||
unsigned sub_idx;
|
||||
|
||||
assert(num_sube > 0);
|
||||
|
||||
/* Start with the most-significant bits. */
|
||||
draw_eval_vec4(ivl_expr_parm(expr, 0));
|
||||
|
||||
for (sub_idx = 1 ; sub_idx < num_sube ; sub_idx += 1) {
|
||||
/* Concatenate progressively lower parts. */
|
||||
ivl_expr_t sube = ivl_expr_parm(expr, sub_idx);
|
||||
|
||||
/* Special case: The next expression is a NUMBER that
|
||||
can be concatenated using %concati/vec4
|
||||
instructions. */
|
||||
if (ivl_expr_type(sube) == IVL_EX_NUMBER) {
|
||||
draw_concat_number_vec4(sube, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
draw_eval_vec4(sube);
|
||||
fprintf(vvp_out, " %%concat/vec4; draw_concat_vec4\n");
|
||||
}
|
||||
|
||||
if (repeat > 1) {
|
||||
fprintf(vvp_out, " %%replicate %u;\n", repeat);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Push a number into the vec4 stack using %pushi/vec4
|
||||
* instructions. The %pushi/vec4 instruction can only handle up to 32
|
||||
* non-zero bits, so if there are more than that, then generate
|
||||
* multiple %pushi/vec4 statements, and use %concat/vec4 statements to
|
||||
* concatenate the vectors into the desired result.
|
||||
*/
|
||||
static void draw_number_vec4(ivl_expr_t expr)
|
||||
{
|
||||
draw_concat_number_vec4(expr, 0);
|
||||
}
|
||||
|
||||
static void draw_property_vec4(ivl_expr_t expr)
|
||||
{
|
||||
ivl_signal_t sig = ivl_expr_signal(expr);
|
||||
|
|
@ -873,20 +981,20 @@ static void draw_string_vec4(ivl_expr_t expr)
|
|||
p += 1;
|
||||
tmp_wid += 8;
|
||||
if (tmp_wid == 32) {
|
||||
fprintf(vvp_out, " %%pushi/vec4 %lu, 0, 32;\n", tmp);
|
||||
fprintf(vvp_out, " %%pushi/vec4 %lu, 0, 32; draw_string_vec4\n", tmp);
|
||||
tmp = 0;
|
||||
tmp_wid = 0;
|
||||
if (push_flag == 0)
|
||||
push_flag += 1;
|
||||
else
|
||||
fprintf(vvp_out, " %%concat/vec4;\n");
|
||||
fprintf(vvp_out, " %%concat/vec4; draw_string_vec4\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (tmp_wid > 0) {
|
||||
fprintf(vvp_out, " %%pushi/vec4 %lu, 0, %u;\n", tmp, tmp_wid);
|
||||
fprintf(vvp_out, " %%pushi/vec4 %lu, 0, %u; draw_string_vec4\n", tmp, tmp_wid);
|
||||
if (push_flag != 0)
|
||||
fprintf(vvp_out, " %%concat/vec4;\n");
|
||||
fprintf(vvp_out, " %%concat/vec4; draw_string_vec4\n");
|
||||
}
|
||||
|
||||
free(fp);
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ typedef bool (*vvp_code_fun)(vthread_t thr, vvp_code_t code);
|
|||
extern bool of_ABS_WR(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_ADD(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_ADD_WR(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_ADDI(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_ALLOC(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_AND(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_ANDR(vthread_t thr, vvp_code_t code);
|
||||
|
|
@ -71,6 +72,7 @@ extern bool of_CMPZ(vthread_t thr, vvp_code_t code);
|
|||
extern bool of_CONCAT_STR(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_CONCATI_STR(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_CONCAT_VEC4(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_CONCATI_VEC4(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_CVT_RS(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_CVT_RU(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_CVT_RV(vthread_t thr, vvp_code_t code);
|
||||
|
|
|
|||
|
|
@ -87,6 +87,7 @@ static const struct opcode_table_s opcode_table[] = {
|
|||
{ "%abs/wr", of_ABS_WR, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||
{ "%add", of_ADD, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||
{ "%add/wr", of_ADD_WR, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||
{ "%addi", of_ADDI, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
|
||||
{ "%alloc", of_ALLOC, 1, {OA_VPI_PTR, OA_NONE, OA_NONE} },
|
||||
{ "%and", of_AND, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||
{ "%and/r", of_ANDR, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||
|
|
@ -119,9 +120,10 @@ static const struct opcode_table_s opcode_table[] = {
|
|||
{ "%cmp/wu", of_CMPWU, 2, {OA_BIT1, OA_BIT2, OA_NONE} },
|
||||
{ "%cmp/x", of_CMPX, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||
{ "%cmp/z", of_CMPZ, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||
{ "%concat/str", of_CONCAT_STR, 0,{OA_NONE, OA_NONE, OA_NONE} },
|
||||
{ "%concat/vec4",of_CONCAT_VEC4,0,{OA_NONE, OA_NONE, OA_NONE} },
|
||||
{ "%concati/str",of_CONCATI_STR,1,{OA_STRING,OA_NONE, OA_NONE} },
|
||||
{ "%concat/str", of_CONCAT_STR, 0,{OA_NONE, OA_NONE, OA_NONE} },
|
||||
{ "%concat/vec4", of_CONCAT_VEC4, 0,{OA_NONE, OA_NONE, OA_NONE} },
|
||||
{ "%concati/str", of_CONCATI_STR, 1,{OA_STRING,OA_NONE, OA_NONE} },
|
||||
{ "%concati/vec4",of_CONCATI_VEC4,3,{OA_BIT1, OA_BIT2, OA_NUMBER} },
|
||||
{ "%cvt/rs", of_CVT_RS, 1, {OA_BIT1, OA_NONE, OA_NONE} },
|
||||
{ "%cvt/ru", of_CVT_RU, 1, {OA_BIT1, OA_NONE, OA_NONE} },
|
||||
{ "%cvt/rv", of_CVT_RV, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||
|
|
|
|||
|
|
@ -54,11 +54,15 @@ sum.
|
|||
See also the %sub instruction.
|
||||
|
||||
* %add
|
||||
* %addi <vala>, <valb>, <wid>
|
||||
|
||||
This opcode pops and adds two vec4 values from the vec4 stack, adds
|
||||
them, and pushes the result back to the stack. The input values must
|
||||
have the same size, and the pushed result will have the same width.
|
||||
|
||||
The %addi variant takes one operand from the stack, the other is an
|
||||
immediate value (See %pushi/vec4).
|
||||
|
||||
See also the %sub instruction.
|
||||
|
||||
* %add/wr <bit-l>, <bit-r>
|
||||
|
|
@ -319,12 +323,20 @@ of it as passing the tail, then the head, concatenating them, and
|
|||
pushing the result. The stack starts with two strings in the stack,
|
||||
and ends with one string in the stack.
|
||||
|
||||
The %concati/str form pops only one value from the stack. The right
|
||||
part comes from the immediate value.
|
||||
|
||||
* %concat/vec4
|
||||
* %concati/vec4 <vala>, <valb>, <wid>
|
||||
|
||||
Pop two vec4 vectors, concatenate them, and push the combined
|
||||
result. The top of the vec4 stack is the LSB of the result, and the
|
||||
next in this stack is the MSB bits of the result.
|
||||
|
||||
The %concati/vec4 form takes an immediate value and appends it (lsb)
|
||||
to the value on the top of the stack. See the %pushi/vec4 instruction
|
||||
for how to describe the immediate value.
|
||||
|
||||
* %cvt/sr <bit-l>
|
||||
* %cvt/rs <bit-l>
|
||||
|
||||
|
|
|
|||
176
vvp/vthread.cc
176
vvp/vthread.cc
|
|
@ -334,7 +334,7 @@ static inline void thr_put_bit(struct vthread_s*thr,
|
|||
}
|
||||
#endif
|
||||
|
||||
vvp_bit4_t vthread_get_bit(struct vthread_s*thr, unsigned addr)
|
||||
vvp_bit4_t vthread_get_bit(struct vthread_s* /*thr*/, unsigned addr)
|
||||
{
|
||||
#if 0
|
||||
if (vpi_mode_flag == VPI_MODE_COMPILETF) return BIT4_X;
|
||||
|
|
@ -345,7 +345,7 @@ vvp_bit4_t vthread_get_bit(struct vthread_s*thr, unsigned addr)
|
|||
#endif
|
||||
}
|
||||
|
||||
void vthread_put_bit(struct vthread_s*thr, unsigned addr, vvp_bit4_t bit)
|
||||
void vthread_put_bit(struct vthread_s* /*thr*/, unsigned addr, vvp_bit4_t bit)
|
||||
{
|
||||
#if 0
|
||||
thr_put_bit(thr, addr, bit);
|
||||
|
|
@ -900,24 +900,47 @@ bool of_AND(vthread_t thr, vvp_code_t)
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* %add
|
||||
*
|
||||
* Pop r,
|
||||
* Pop l,
|
||||
* Push l+r
|
||||
*
|
||||
* Pop 2 and push 1 is the same as pop 1 and replace the remaining top
|
||||
* of the stack with a new value. That is what we will do.
|
||||
*/
|
||||
bool of_ADD(vthread_t thr, vvp_code_t)
|
||||
static void get_immediate_rval(vvp_code_t cp, vvp_vector4_t&val)
|
||||
{
|
||||
vvp_vector4_t r = thr->pop_vec4();
|
||||
// Rather then pop l, use it directly from the stack. When we
|
||||
// assign to 'l', that will edit the top of the stack, which
|
||||
// replaces a pop and a pull.
|
||||
vvp_vector4_t&l = thr->peek_vec4();
|
||||
uint32_t vala = cp->bit_idx[0];
|
||||
uint32_t valb = cp->bit_idx[1];
|
||||
unsigned wid = cp->number;
|
||||
|
||||
// The immediate value can be values bigger then 32 bits, but
|
||||
// only if the high bits are zero. So at most we need to run
|
||||
// through the loop below 32 times. Maybe less, if the target
|
||||
// width is less. We don't have to do anything special on that
|
||||
// because vala/valb bits will shift away so (vala|valb) will
|
||||
// turn to zero at or before 32 shifts.
|
||||
|
||||
for (unsigned idx = 0 ; idx < wid && (vala|valb) ; idx += 1) {
|
||||
uint32_t ba = 0;
|
||||
// Convert the vala/valb bits to a ba number that can be
|
||||
// used to select what goes into the value.
|
||||
ba = (valb & 1) << 1;
|
||||
ba |= vala & 1;
|
||||
|
||||
switch (ba) {
|
||||
case 1:
|
||||
val.set_bit(idx, BIT4_1);
|
||||
break;
|
||||
case 2:
|
||||
val.set_bit(idx, BIT4_Z);
|
||||
break;
|
||||
case 3:
|
||||
val.set_bit(idx, BIT4_X);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
vala >>= 1;
|
||||
valb >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
static bool do_ADD(vvp_vector4_t&l, const vvp_vector4_t&r)
|
||||
{
|
||||
unsigned wid = l.size();
|
||||
assert(wid == r.size());
|
||||
|
||||
|
|
@ -946,6 +969,48 @@ bool of_ADD(vthread_t thr, vvp_code_t)
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* %add
|
||||
*
|
||||
* Pop r,
|
||||
* Pop l,
|
||||
* Push l+r
|
||||
*
|
||||
* Pop 2 and push 1 is the same as pop 1 and replace the remaining top
|
||||
* of the stack with a new value. That is what we will do.
|
||||
*/
|
||||
bool of_ADD(vthread_t thr, vvp_code_t)
|
||||
{
|
||||
vvp_vector4_t r = thr->pop_vec4();
|
||||
// Rather then pop l, use it directly from the stack. When we
|
||||
// assign to 'l', that will edit the top of the stack, which
|
||||
// replaces a pop and a pull.
|
||||
vvp_vector4_t&l = thr->peek_vec4();
|
||||
|
||||
return do_ADD(l, r);
|
||||
}
|
||||
|
||||
/*
|
||||
* %addi <vala>, <valb>, <wid>
|
||||
*
|
||||
* Pop1 operand, get the other operand from the arguments, and push
|
||||
* the result.
|
||||
*/
|
||||
bool of_ADDI(vthread_t thr, vvp_code_t cp)
|
||||
{
|
||||
unsigned wid = cp->number;
|
||||
|
||||
vvp_vector4_t&l = thr->peek_vec4();
|
||||
|
||||
// I expect that most of the bits of an immediate value are
|
||||
// going to be zero, so start the result vector with all zero
|
||||
// bits. Then we only need to replace the bits that are different.
|
||||
vvp_vector4_t r (wid, BIT4_0);
|
||||
get_immediate_rval (cp, r);
|
||||
|
||||
return do_ADD(l, r);
|
||||
}
|
||||
|
||||
bool of_ADD_WR(vthread_t thr, vvp_code_t)
|
||||
{
|
||||
double r = thr->pop_real();
|
||||
|
|
@ -1057,7 +1122,7 @@ bool of_ASSIGN_VEC4_A_D(vthread_t thr, vvp_code_t cp)
|
|||
return true;
|
||||
|
||||
int use_off = -off;
|
||||
assert(wid > use_off);
|
||||
assert(wid > (unsigned)use_off);
|
||||
unsigned use_wid = wid - use_off;
|
||||
val = val.subvalue(use_off, use_wid);
|
||||
off = 0;
|
||||
|
|
@ -1149,7 +1214,7 @@ bool of_ASSIGN_VEC4_OFF_D(vthread_t thr, vvp_code_t cp)
|
|||
return true;
|
||||
|
||||
int use_off = -off;
|
||||
assert(wid > use_off);
|
||||
assert(wid > (unsigned)use_off);
|
||||
unsigned use_wid = wid - use_off;
|
||||
val = val.subvalue(use_off, use_wid);
|
||||
off = 0;
|
||||
|
|
@ -1488,20 +1553,20 @@ bool of_CASSIGN_WR(vthread_t thr, vvp_code_t cp)
|
|||
*/
|
||||
bool of_CAST2(vthread_t thr, vvp_code_t)
|
||||
{
|
||||
vvp_vector4_t val = thr->pop_vec4();
|
||||
vvp_vector4_t&val = thr->peek_vec4();
|
||||
unsigned wid = val.size();
|
||||
|
||||
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
|
||||
switch (val.value(idx)) {
|
||||
case BIT4_0:
|
||||
case BIT4_1:
|
||||
val.set_bit(idx, BIT4_1);
|
||||
break;
|
||||
default:
|
||||
val.set_bit(idx, BIT4_0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
thr->push_vec4(val);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -1818,6 +1883,65 @@ bool of_CONCAT_VEC4(vthread_t thr, vvp_code_t)
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* %concati/vec4 <vala>, <valb>, <wid>
|
||||
*
|
||||
* Concat the immediate value to the LOW bits of the concatenation.
|
||||
* Get the HIGH bits from the top of the vec4 stack.
|
||||
*/
|
||||
bool of_CONCATI_VEC4(vthread_t thr, vvp_code_t cp)
|
||||
{
|
||||
uint32_t vala = cp->bit_idx[0];
|
||||
uint32_t valb = cp->bit_idx[1];
|
||||
unsigned wid = cp->number;
|
||||
|
||||
vvp_vector4_t&msb = thr->peek_vec4();
|
||||
|
||||
// I expect that most of the bits of an immediate value are
|
||||
// going to be zero, so start the result vector with all zero
|
||||
// bits. Then we only need to replace the bits that are different.
|
||||
vvp_vector4_t lsb (wid, BIT4_0);
|
||||
|
||||
// The %concati/vec4 can create values bigger then 32 bits, but
|
||||
// only if the high bits are zero. So at most we need to run
|
||||
// through the loop below 32 times. Maybe less, if the target
|
||||
// width is less. We don't have to do anything special on that
|
||||
// because vala/valb bits will shift away so (vala|valb) will
|
||||
// turn to zero at or before 32 shifts.
|
||||
|
||||
for (unsigned idx = 0 ; idx < wid && (vala|valb) ; idx += 1) {
|
||||
uint32_t ba = 0;
|
||||
// Convert the vala/valb bits to a ba number that can be
|
||||
// used to select what goes into the value.
|
||||
ba = (valb & 1) << 1;
|
||||
ba |= vala & 1;
|
||||
|
||||
switch (ba) {
|
||||
case 1:
|
||||
lsb.set_bit(idx, BIT4_1);
|
||||
break;
|
||||
case 2:
|
||||
lsb.set_bit(idx, BIT4_Z);
|
||||
break;
|
||||
case 3:
|
||||
lsb.set_bit(idx, BIT4_X);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
vala >>= 1;
|
||||
valb >>= 1;
|
||||
}
|
||||
|
||||
vvp_vector4_t res (msb.size()+lsb.size(), BIT4_X);
|
||||
res.set_vec(0, lsb);
|
||||
res.set_vec(lsb.size(), msb);
|
||||
|
||||
msb = res;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool of_CVT_RS(vthread_t thr, vvp_code_t cp)
|
||||
{
|
||||
int64_t r = thr->words[cp->bit_idx[0]].w_int;
|
||||
|
|
@ -4136,12 +4260,14 @@ bool of_XNORR(vthread_t thr, vvp_code_t)
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* %or
|
||||
*/
|
||||
bool of_OR(vthread_t thr, vvp_code_t)
|
||||
{
|
||||
vvp_vector4_t vala = thr->pop_vec4();
|
||||
vvp_vector4_t valb = thr->pop_vec4();
|
||||
vvp_vector4_t&vala = thr->peek_vec4();
|
||||
vala |= valb;
|
||||
thr->push_vec4(vala);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue