Implement the %parti/X instructions
This allows part select with constant base to be handled optimally. Also update some more instructions to more optimally work with the vec4 stack.
This commit is contained in:
parent
a9db765f98
commit
b96f04ccce
|
|
@ -62,7 +62,7 @@ int test_immediate_vec4_ok(ivl_expr_t re)
|
|||
return 1;
|
||||
}
|
||||
|
||||
void draw_immediate_vec4(ivl_expr_t re, const char*opcode)
|
||||
static void make_immediate_vec4_words(ivl_expr_t re, unsigned long*val0p, unsigned long*valxp, unsigned*widp)
|
||||
{
|
||||
unsigned long val0 = 0;
|
||||
unsigned long valx = 0;
|
||||
|
|
@ -95,6 +95,16 @@ void draw_immediate_vec4(ivl_expr_t re, const char*opcode)
|
|||
}
|
||||
}
|
||||
|
||||
*val0p = val0;
|
||||
*valxp = valx;
|
||||
*widp = wid;
|
||||
}
|
||||
|
||||
void draw_immediate_vec4(ivl_expr_t re, const char*opcode)
|
||||
{
|
||||
unsigned long val0, valx;
|
||||
unsigned wid;
|
||||
make_immediate_vec4_words(re, &val0, &valx, &wid);
|
||||
fprintf(vvp_out, " %s %lu, %lu, %u;\n", opcode, val0, valx, wid);
|
||||
}
|
||||
|
||||
|
|
@ -879,10 +889,22 @@ static void draw_select_vec4(ivl_expr_t expr)
|
|||
return;
|
||||
}
|
||||
|
||||
if (test_immediate_vec4_ok(base)) {
|
||||
unsigned long val0, valx;
|
||||
unsigned base_wid;
|
||||
make_immediate_vec4_words(base, &val0, &valx, &base_wid);
|
||||
assert(valx == 0);
|
||||
|
||||
draw_eval_vec4(subexpr);
|
||||
fprintf(vvp_out, " %%parti/%c %u, %lu, %u;\n",
|
||||
sign_suff, wid, val0, base_wid);
|
||||
|
||||
} else {
|
||||
draw_eval_vec4(subexpr);
|
||||
draw_eval_vec4(base);
|
||||
fprintf(vvp_out, " %%part/%c %u;\n", sign_suff, wid);
|
||||
}
|
||||
}
|
||||
|
||||
static void draw_select_pad_vec4(ivl_expr_t expr)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -164,6 +164,8 @@ extern bool of_PAD_S(vthread_t thr, vvp_code_t code);
|
|||
extern bool of_PAD_U(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_PART_S(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_PART_U(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_PARTI_S(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_PARTI_U(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_POP_OBJ(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_POP_REAL(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_POP_STR(vthread_t thr, vvp_code_t code);
|
||||
|
|
|
|||
|
|
@ -212,6 +212,8 @@ static const struct opcode_table_s opcode_table[] = {
|
|||
{ "%pad/u", of_PAD_U, 1, {OA_NUMBER, OA_NONE, OA_NONE} },
|
||||
{ "%part/s", of_PART_S, 1, {OA_NUMBER, OA_NONE, OA_NONE} },
|
||||
{ "%part/u", of_PART_U, 1, {OA_NUMBER, OA_NONE, OA_NONE} },
|
||||
{ "%parti/s",of_PARTI_S,3, {OA_NUMBER, OA_BIT1, OA_BIT2} },
|
||||
{ "%parti/u",of_PARTI_U,3, {OA_NUMBER, OA_BIT1, OA_BIT2} },
|
||||
{ "%pop/obj", of_POP_OBJ, 2, {OA_BIT1, OA_BIT2, OA_NONE} },
|
||||
{ "%pop/real",of_POP_REAL,1, {OA_NUMBER, OA_NONE, OA_NONE} },
|
||||
{ "%pop/str", of_POP_STR, 1, {OA_NUMBER, OA_NONE, OA_NONE} },
|
||||
|
|
|
|||
104
vvp/vthread.cc
104
vvp/vthread.cc
|
|
@ -2406,7 +2406,7 @@ static void negate_words(unsigned long*val, unsigned words)
|
|||
bool of_DIV_S(vthread_t thr, vvp_code_t)
|
||||
{
|
||||
vvp_vector4_t valb = thr->pop_vec4();
|
||||
vvp_vector4_t vala = thr->pop_vec4();
|
||||
vvp_vector4_t&vala = thr->peek_vec4();
|
||||
|
||||
assert(vala.size()== valb.size());
|
||||
unsigned wid = vala.size();
|
||||
|
|
@ -2418,7 +2418,7 @@ bool of_DIV_S(vthread_t thr, vvp_code_t)
|
|||
unsigned long*ap = vala.subarray(0, wid);
|
||||
if (ap == 0) {
|
||||
vvp_vector4_t tmp(wid, BIT4_X);
|
||||
thr->push_vec4(tmp);
|
||||
vala = tmp;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -2426,7 +2426,7 @@ bool of_DIV_S(vthread_t thr, vvp_code_t)
|
|||
if (bp == 0) {
|
||||
delete[]ap;
|
||||
vvp_vector4_t tmp(wid, BIT4_X);
|
||||
thr->push_vec4(tmp);
|
||||
vala = tmp;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -2444,14 +2444,13 @@ bool of_DIV_S(vthread_t thr, vvp_code_t)
|
|||
if (wid <= CPU_WORD_BITS) {
|
||||
if (bp[0] == 0) {
|
||||
vvp_vector4_t tmp(wid, BIT4_X);
|
||||
thr->push_vec4(tmp);
|
||||
vala = tmp;
|
||||
} else {
|
||||
long tmpa = (long) ap[0];
|
||||
long tmpb = (long) bp[0];
|
||||
long res = tmpa / tmpb;
|
||||
ap[0] = ((unsigned long)res) & ~sign_mask;
|
||||
vala.setarray(0, wid, ap);
|
||||
thr->push_vec4(vala);
|
||||
}
|
||||
delete[]ap;
|
||||
delete[]bp;
|
||||
|
|
@ -2475,7 +2474,7 @@ bool of_DIV_S(vthread_t thr, vvp_code_t)
|
|||
delete[]ap;
|
||||
delete[]bp;
|
||||
vvp_vector4_t tmp(wid, BIT4_X);
|
||||
thr->push_vec4(tmp);
|
||||
vala = tmp;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -2486,7 +2485,6 @@ bool of_DIV_S(vthread_t thr, vvp_code_t)
|
|||
result[words-1] &= ~sign_mask;
|
||||
|
||||
vala.setarray(0, wid, result);
|
||||
thr->push_vec4(vala);
|
||||
delete[]ap;
|
||||
delete[]bp;
|
||||
delete[]result;
|
||||
|
|
@ -3787,7 +3785,7 @@ static bool of_PART_base(vthread_t thr, vvp_code_t cp, bool signed_flag)
|
|||
unsigned wid = cp->number;
|
||||
|
||||
vvp_vector4_t base4 = thr->pop_vec4();
|
||||
vvp_vector4_t value = thr->pop_vec4();
|
||||
vvp_vector4_t&value = thr->peek_vec4();
|
||||
|
||||
vvp_vector4_t res (wid, BIT4_X);
|
||||
|
||||
|
|
@ -3795,17 +3793,17 @@ static bool of_PART_base(vthread_t thr, vvp_code_t cp, bool signed_flag)
|
|||
int32_t base;
|
||||
bool value_ok = vector4_to_value(base4, base, signed_flag);
|
||||
if (! value_ok) {
|
||||
thr->push_vec4(res);
|
||||
value = res;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (base >= (int32_t)value.size()) {
|
||||
thr->push_vec4(res);
|
||||
value = res;
|
||||
return true;
|
||||
}
|
||||
|
||||
if ((base+(int)wid) <= 0) {
|
||||
thr->push_vec4(res);
|
||||
value = res;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -3821,7 +3819,7 @@ static bool of_PART_base(vthread_t thr, vvp_code_t cp, bool signed_flag)
|
|||
}
|
||||
|
||||
res .set_vec(vbase, value.subvalue(base, wid));
|
||||
thr->push_vec4(res);
|
||||
value = res;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -3836,6 +3834,65 @@ bool of_PART_U(vthread_t thr, vvp_code_t cp)
|
|||
return of_PART_base(thr, cp, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* %parti/s <wid>, <basei>, <base_wid>
|
||||
* %parti/u <wid>, <basei>, <base_wid>
|
||||
*
|
||||
* Pop the value to be selected. The result is pushed back to the stack.
|
||||
*/
|
||||
static bool of_PARTI_base(vthread_t thr, vvp_code_t cp, bool signed_flag)
|
||||
{
|
||||
unsigned wid = cp->number;
|
||||
uint32_t base = cp->bit_idx[0];
|
||||
uint32_t bwid = cp->bit_idx[1];
|
||||
|
||||
vvp_vector4_t&value = thr->peek_vec4();
|
||||
|
||||
vvp_vector4_t res (wid, BIT4_X);
|
||||
|
||||
// NOTE: This is treating the vector as signed. Is that correct?
|
||||
int32_t use_base = base;
|
||||
if (signed_flag && bwid < 32 && (base&(1<<(bwid-1)))) {
|
||||
use_base |= (-1) << bwid;
|
||||
}
|
||||
|
||||
if (use_base >= (int32_t)value.size()) {
|
||||
value = res;
|
||||
return true;
|
||||
}
|
||||
|
||||
if ((use_base+(int32_t)wid) <= 0) {
|
||||
value = res;
|
||||
return true;
|
||||
}
|
||||
|
||||
long vbase = 0;
|
||||
if (use_base < 0) {
|
||||
vbase = -use_base;
|
||||
wid -= vbase;
|
||||
use_base = 0;
|
||||
}
|
||||
|
||||
if ((use_base+wid) > value.size()) {
|
||||
wid = value.size() - use_base;
|
||||
}
|
||||
|
||||
res .set_vec(vbase, value.subvalue(use_base, wid));
|
||||
value = res;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool of_PARTI_S(vthread_t thr, vvp_code_t cp)
|
||||
{
|
||||
return of_PARTI_base(thr, cp, true);
|
||||
}
|
||||
|
||||
bool of_PARTI_U(vthread_t thr, vvp_code_t cp)
|
||||
{
|
||||
return of_PARTI_base(thr, cp, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* %mov/wu <dst>, <src>
|
||||
*/
|
||||
|
|
@ -3853,15 +3910,15 @@ bool of_MOV_WU(vthread_t thr, vvp_code_t cp)
|
|||
*/
|
||||
bool of_MUL(vthread_t thr, vvp_code_t)
|
||||
{
|
||||
vvp_vector4_t vala = thr->pop_vec4();
|
||||
vvp_vector4_t valb = thr->pop_vec4();
|
||||
vvp_vector4_t&vala = thr->peek_vec4();
|
||||
assert(vala.size() == valb.size());
|
||||
unsigned wid = vala.size();
|
||||
|
||||
unsigned long*ap = vala.subarray(0, wid);
|
||||
if (ap == 0) {
|
||||
vvp_vector4_t tmp(wid, BIT4_X);
|
||||
thr->push_vec4(tmp);
|
||||
vala = tmp;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -3869,7 +3926,7 @@ bool of_MUL(vthread_t thr, vvp_code_t)
|
|||
if (bp == 0) {
|
||||
delete[]ap;
|
||||
vvp_vector4_t tmp(wid, BIT4_X);
|
||||
thr->push_vec4(tmp);
|
||||
vala = tmp;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -3877,7 +3934,6 @@ bool of_MUL(vthread_t thr, vvp_code_t)
|
|||
if (wid <= CPU_WORD_BITS) {
|
||||
ap[0] *= bp[0];
|
||||
vala.setarray(0, wid, ap);
|
||||
thr->push_vec4(vala);
|
||||
delete[]ap;
|
||||
delete[]bp;
|
||||
return true;
|
||||
|
|
@ -3903,7 +3959,6 @@ bool of_MUL(vthread_t thr, vvp_code_t)
|
|||
}
|
||||
|
||||
vala.setarray(0, wid, res);
|
||||
thr->push_vec4(vala);
|
||||
delete[]ap;
|
||||
delete[]bp;
|
||||
delete[]res;
|
||||
|
|
@ -5284,7 +5339,7 @@ bool of_STORE_VEC4(vthread_t thr, vvp_code_t cp)
|
|||
int off = off_index? thr->words[off_index].w_int : 0;
|
||||
const int sig_value_size = sig->value_size();
|
||||
|
||||
vvp_vector4_t val = thr->pop_vec4();
|
||||
vvp_vector4_t&val = thr->peek_vec4();
|
||||
|
||||
if (val.size() < (unsigned)wid) {
|
||||
cerr << "XXXX Internal error: val.size()=" << val.size()
|
||||
|
|
@ -5296,13 +5351,19 @@ bool of_STORE_VEC4(vthread_t thr, vvp_code_t cp)
|
|||
|
||||
// If there is a problem loading the index register, flags-4
|
||||
// will be set to 1, and we know here to skip the actual assignment.
|
||||
if (off_index!=0 && thr->flags[4] == BIT4_1)
|
||||
if (off_index!=0 && thr->flags[4] == BIT4_1) {
|
||||
thr->pop_vec4(1);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (off <= -wid)
|
||||
if (off <= -wid) {
|
||||
thr->pop_vec4(1);
|
||||
return true;
|
||||
if (off >= sig_value_size)
|
||||
}
|
||||
if (off >= sig_value_size) {
|
||||
thr->pop_vec4(1);
|
||||
return true;
|
||||
}
|
||||
|
||||
// If the index is below the vector, then only assign the high
|
||||
// bits that overlap with the target.
|
||||
|
|
@ -5327,6 +5388,7 @@ bool of_STORE_VEC4(vthread_t thr, vvp_code_t cp)
|
|||
else
|
||||
vvp_send_vec4_pv(ptr, val, off, wid, sig_value_size, thr->wt_context);
|
||||
|
||||
thr->pop_vec4(1);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue