Better use of immediate operands.
Clarify that operands are typically 32bits, and have the code generator make better use of this. Also improve the %movi implementation to work well with marger vectors. Add the %andi instruction to use immediate operands.
This commit is contained in:
parent
f6fede5aae
commit
5a0fe9ff83
|
|
@ -92,7 +92,7 @@ unsigned long get_number_immediate(ivl_expr_t ex)
|
|||
case '0':
|
||||
break;
|
||||
case '1':
|
||||
imm |= 1 << idx;
|
||||
imm |= 1UL << idx;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
|
|
@ -909,12 +909,43 @@ static struct vector_info draw_binary_expr_le(ivl_expr_t exp,
|
|||
return lv;
|
||||
}
|
||||
|
||||
static struct vector_info draw_logic_immediate(ivl_expr_t exp,
|
||||
ivl_expr_t le,
|
||||
ivl_expr_t re,
|
||||
unsigned wid)
|
||||
{
|
||||
struct vector_info lv = draw_eval_expr_wid(le, wid, STUFF_OK_XZ);
|
||||
unsigned long imm = get_number_immediate(re);
|
||||
|
||||
assert(lv.base >= 4);
|
||||
|
||||
switch (ivl_expr_opcode(exp)) {
|
||||
|
||||
case '&':
|
||||
fprintf(vvp_out, " %%andi %u, %lu, %u;\n", lv.base, imm, lv.wid);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
|
||||
return lv;
|
||||
}
|
||||
|
||||
static struct vector_info draw_binary_expr_logic(ivl_expr_t exp,
|
||||
unsigned wid)
|
||||
{
|
||||
ivl_expr_t le = ivl_expr_oper1(exp);
|
||||
ivl_expr_t re = ivl_expr_oper2(exp);
|
||||
|
||||
if (ivl_expr_opcode(exp) == '&') {
|
||||
if (number_is_immediate(re, IMM_WID))
|
||||
return draw_logic_immediate(exp, le, re, wid);
|
||||
if (number_is_immediate(le, IMM_WID))
|
||||
return draw_logic_immediate(exp, re, le, wid);
|
||||
}
|
||||
|
||||
struct vector_info lv;
|
||||
struct vector_info rv;
|
||||
|
||||
|
|
@ -1167,15 +1198,15 @@ static struct vector_info draw_add_immediate(ivl_expr_t le,
|
|||
imm = get_number_immediate(re);
|
||||
|
||||
/* Now generate enough %addi instructions to add the entire
|
||||
immediate value to the destination. The adds are done 16
|
||||
bits at a time, but 17 bits are done to push the carry into
|
||||
immediate value to the destination. The adds are done IMM_WID
|
||||
bits at a time, but +1 bits are done to push the carry into
|
||||
the higher bits if needed. */
|
||||
{ unsigned base;
|
||||
for (base = 0 ; base < lv.wid ; base += 16) {
|
||||
unsigned long tmp = imm & 0xffffUL;
|
||||
for (base = 0 ; base < lv.wid ; base += IMM_WID) {
|
||||
unsigned long tmp = imm & 0xffffffffUL;
|
||||
unsigned add_wid = lv.wid - base;
|
||||
|
||||
imm >>= 16;
|
||||
imm >>= IMM_WID;
|
||||
|
||||
fprintf(vvp_out, " %%addi %u, %lu, %u;\n",
|
||||
lv.base+base, tmp, add_wid);
|
||||
|
|
@ -1203,7 +1234,7 @@ static struct vector_info draw_sub_immediate(ivl_expr_t le,
|
|||
assert(lv.wid == wid);
|
||||
|
||||
imm = get_number_immediate(re);
|
||||
assert( (imm & ~0xffff) == 0 );
|
||||
assert( (imm & ~0xffffffffUL) == 0 );
|
||||
|
||||
switch (lv.base) {
|
||||
case 0:
|
||||
|
|
@ -1299,13 +1330,13 @@ static struct vector_info draw_binary_expr_arith(ivl_expr_t exp, unsigned wid)
|
|||
if ((ivl_expr_opcode(exp) == '-')
|
||||
&& (ivl_expr_type(re) == IVL_EX_NUMBER)
|
||||
&& (! number_is_unknown(re))
|
||||
&& number_is_immediate(re, 16))
|
||||
&& number_is_immediate(re, IMM_WID))
|
||||
return draw_sub_immediate(le, re, wid);
|
||||
|
||||
if ((ivl_expr_opcode(exp) == '*')
|
||||
&& (ivl_expr_type(re) == IVL_EX_NUMBER)
|
||||
&& (! number_is_unknown(re))
|
||||
&& number_is_immediate(re, 16))
|
||||
&& number_is_immediate(re, IMM_WID))
|
||||
return draw_mul_immediate(le, re, wid);
|
||||
|
||||
lv = draw_eval_expr_wid(le, wid, STUFF_OK_XZ);
|
||||
|
|
@ -1612,9 +1643,9 @@ static struct vector_info draw_number_expr(ivl_expr_t exp, unsigned wid)
|
|||
vvp_errors += 1;
|
||||
}
|
||||
|
||||
if ((!number_is_unknown(exp)) && number_is_immediate(exp, 16)) {
|
||||
int val = get_number_immediate(exp);
|
||||
fprintf(vvp_out, " %%movi %u, %d, %u;\n", res.base, val, wid);
|
||||
if ((!number_is_unknown(exp)) && number_is_immediate(exp, IMM_WID)) {
|
||||
unsigned long val = get_number_immediate(exp);
|
||||
fprintf(vvp_out, " %%movi %u, %lu, %u;\n", res.base, val, wid);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
|
@ -1836,8 +1867,8 @@ static struct vector_info draw_string_expr(ivl_expr_t exp, unsigned wid)
|
|||
idx = 0;
|
||||
while (idx < nwid) {
|
||||
unsigned bits;
|
||||
unsigned trans = 16;
|
||||
if (nwid-idx < 16)
|
||||
unsigned trans = IMM_WID;
|
||||
if (nwid-idx < trans)
|
||||
trans = nwid-idx;
|
||||
|
||||
bits = *p;
|
||||
|
|
@ -1845,6 +1876,14 @@ static struct vector_info draw_string_expr(ivl_expr_t exp, unsigned wid)
|
|||
if (trans > 8) {
|
||||
bits |= *p << 8;
|
||||
p -= 1;
|
||||
if (trans > 16) {
|
||||
bits |= *p << 16;
|
||||
p -= 1;
|
||||
if (trans > 24) {
|
||||
bits |= *p << 24;
|
||||
p -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
fprintf(vvp_out, " %%movi %u, %u, %u;\n", res.base+idx,bits,trans);
|
||||
|
||||
|
|
@ -1881,8 +1920,14 @@ void pad_expr_in_place(ivl_expr_t exp, struct vector_info res, unsigned swid)
|
|||
res.base+idx, res.base+swid-1);
|
||||
|
||||
} else {
|
||||
fprintf(vvp_out, " %%mov %u, 0, %u;\n",
|
||||
res.base+swid, res.wid-swid);
|
||||
unsigned base = res.base+swid;
|
||||
unsigned count = res.wid-swid;
|
||||
/* The %movi is faster for larger widths, but for very
|
||||
small counts, the %mov is faster. */
|
||||
if (count > 4)
|
||||
fprintf(vvp_out, " %%movi %u, 0, %u;\n", base, count);
|
||||
else
|
||||
fprintf(vvp_out, " %%mov %u, 0, %u;\n", base, count);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2086,7 +2131,7 @@ static struct vector_info draw_select_signal(ivl_expr_t sube,
|
|||
|
||||
for (idx = 0 ; idx < res.wid ; idx += 1) {
|
||||
if (idx >= bit_wid) {
|
||||
fprintf(vvp_out, " %%mov %u, 0, %u; Pad from %u to %u\n",
|
||||
fprintf(vvp_out, " %%movi %u, 0, %u; Pad from %u to %u\n",
|
||||
res.base+idx, res.wid-idx,
|
||||
ivl_expr_width(sube), wid);
|
||||
break;
|
||||
|
|
@ -2410,7 +2455,7 @@ static struct vector_info draw_unary_expr(ivl_expr_t exp, unsigned wid)
|
|||
|
||||
fprintf(vvp_out, " %%mov %u, %u, %u;\n",
|
||||
tmp.base, res.base, res.wid);
|
||||
fprintf(vvp_out, " %%mov %u, 0, %u;\n",
|
||||
fprintf(vvp_out, " %%movi %u, 0, %u;\n",
|
||||
tmp.base+res.wid, tmp.wid-res.wid);
|
||||
clr_vector(res);
|
||||
res = tmp;
|
||||
|
|
@ -2460,7 +2505,7 @@ static struct vector_info draw_unary_expr(ivl_expr_t exp, unsigned wid)
|
|||
assert(res.base);
|
||||
fprintf(vvp_out, " %%mov %u, %u, %u;\n",
|
||||
tmp.base, res.base, res.wid);
|
||||
fprintf(vvp_out, " %%mov %u, 0, %u;\n",
|
||||
fprintf(vvp_out, " %%movi %u, 0, %u;\n",
|
||||
tmp.base+res.wid, tmp.wid-res.wid);
|
||||
clr_vector(res);
|
||||
res = tmp;
|
||||
|
|
|
|||
|
|
@ -39,6 +39,12 @@ struct vector_info {
|
|||
unsigned wid;
|
||||
};
|
||||
|
||||
/*
|
||||
* Convenient constants...
|
||||
*/
|
||||
/* Width limit for typical immediate arguments. */
|
||||
# define IMM_WID 32
|
||||
|
||||
/*
|
||||
* Mangle all non-symbol characters in an identifier, quotes in names
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ extern bool of_ADD(vthread_t thr, vvp_code_t code);
|
|||
extern bool of_ADD_WR(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_ADDI(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_AND(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_ANDI(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_ANDR(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_ASSIGN_AV(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_ASSIGN_AVD(vthread_t thr, vvp_code_t code);
|
||||
|
|
@ -167,7 +168,7 @@ struct vvp_code_s {
|
|||
};
|
||||
|
||||
union {
|
||||
unsigned bit_idx[2];
|
||||
uint32_t bit_idx[2];
|
||||
vvp_net_t *net2;
|
||||
vvp_code_t cptr2;
|
||||
struct ufunc_core*ufunc_core_ptr;
|
||||
|
|
|
|||
|
|
@ -85,6 +85,7 @@ const static struct opcode_table_s opcode_table[] = {
|
|||
{ "%addi", of_ADDI, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
|
||||
{ "%and", of_AND, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
|
||||
{ "%and/r", of_ANDR, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
|
||||
{ "%andi", of_ANDI, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
|
||||
{ "%assign/av",of_ASSIGN_AV,3,{OA_ARR_PTR,OA_BIT1, OA_BIT2} },
|
||||
{ "%assign/av/d",of_ASSIGN_AVD,3,{OA_ARR_PTR,OA_BIT1, OA_BIT2} },
|
||||
{ "%assign/v0",of_ASSIGN_V0,3,{OA_FUNC_PTR,OA_BIT1, OA_BIT2} },
|
||||
|
|
|
|||
|
|
@ -531,7 +531,8 @@ is one of the 4 constant bits, the effect is to replicate the value
|
|||
into the destination vector. This is useful for filling a vector.
|
||||
|
||||
The %movi variant moves a binary value, LSB first, into the
|
||||
destination vector.
|
||||
destination vector. The immediate value is up to 32bits, padded with
|
||||
zeros to fillout the width.
|
||||
|
||||
* %mul <bit-l>, <bit-r>, <wid>
|
||||
|
||||
|
|
|
|||
|
|
@ -533,6 +533,28 @@ bool of_AND(vthread_t thr, vvp_code_t cp)
|
|||
}
|
||||
|
||||
|
||||
bool of_ANDI(vthread_t thr, vvp_code_t cp)
|
||||
{
|
||||
unsigned idx1 = cp->bit_idx[0];
|
||||
unsigned long imm = cp->bit_idx[1];
|
||||
unsigned wid = cp->number;
|
||||
|
||||
assert(idx1 >= 4);
|
||||
|
||||
vvp_vector4_t val = vthread_bits_to_vector(thr, idx1, wid);
|
||||
vvp_vector4_t imv (wid, BIT4_0);
|
||||
|
||||
unsigned trans = wid;
|
||||
if (trans > CPU_WORD_BITS)
|
||||
trans = CPU_WORD_BITS;
|
||||
imv.setarray(0, trans, &imm);
|
||||
|
||||
val &= imv;
|
||||
|
||||
thr->bits4.set_vec(idx1, val);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool of_ADD(vthread_t thr, vvp_code_t cp)
|
||||
{
|
||||
assert(cp->bit_idx[0] >= 4);
|
||||
|
|
@ -2950,13 +2972,24 @@ bool of_MOV_WR(vthread_t thr, vvp_code_t cp)
|
|||
bool of_MOVI(vthread_t thr, vvp_code_t cp)
|
||||
{
|
||||
unsigned dst = cp->bit_idx[0];
|
||||
unsigned val = cp->bit_idx[1];
|
||||
static unsigned long val[8] = {0, 0, 0, 0, 0, 0, 0, 0};
|
||||
unsigned wid = cp->number;
|
||||
|
||||
thr_check_addr(thr, dst+wid-1);
|
||||
|
||||
for (unsigned idx = 0 ; idx < wid ; idx += 1, val >>= 1)
|
||||
thr->bits4.set_bit(dst+idx, (val&1)? BIT4_1 : BIT4_0);
|
||||
val[0] = cp->bit_idx[1];
|
||||
|
||||
while (wid > 0) {
|
||||
unsigned trans = wid;
|
||||
if (trans > 8*CPU_WORD_BITS)
|
||||
trans = 8*CPU_WORD_BITS;
|
||||
|
||||
thr->bits4.setarray(dst, trans, val);
|
||||
|
||||
val[0] = 0;
|
||||
wid -= trans;
|
||||
dst += trans;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue