Implement the %parti/X instructions
This allows part select with constant base to be handled optimally. Also update some more instructions to more optimally work with the vec4 stack.
This commit is contained in:
parent
a9db765f98
commit
b96f04ccce
|
|
@ -62,7 +62,7 @@ int test_immediate_vec4_ok(ivl_expr_t re)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void draw_immediate_vec4(ivl_expr_t re, const char*opcode)
|
static void make_immediate_vec4_words(ivl_expr_t re, unsigned long*val0p, unsigned long*valxp, unsigned*widp)
|
||||||
{
|
{
|
||||||
unsigned long val0 = 0;
|
unsigned long val0 = 0;
|
||||||
unsigned long valx = 0;
|
unsigned long valx = 0;
|
||||||
|
|
@ -95,6 +95,16 @@ void draw_immediate_vec4(ivl_expr_t re, const char*opcode)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
*val0p = val0;
|
||||||
|
*valxp = valx;
|
||||||
|
*widp = wid;
|
||||||
|
}
|
||||||
|
|
||||||
|
void draw_immediate_vec4(ivl_expr_t re, const char*opcode)
|
||||||
|
{
|
||||||
|
unsigned long val0, valx;
|
||||||
|
unsigned wid;
|
||||||
|
make_immediate_vec4_words(re, &val0, &valx, &wid);
|
||||||
fprintf(vvp_out, " %s %lu, %lu, %u;\n", opcode, val0, valx, wid);
|
fprintf(vvp_out, " %s %lu, %lu, %u;\n", opcode, val0, valx, wid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -879,9 +889,21 @@ static void draw_select_vec4(ivl_expr_t expr)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
draw_eval_vec4(subexpr);
|
if (test_immediate_vec4_ok(base)) {
|
||||||
draw_eval_vec4(base);
|
unsigned long val0, valx;
|
||||||
fprintf(vvp_out, " %%part/%c %u;\n", sign_suff, wid);
|
unsigned base_wid;
|
||||||
|
make_immediate_vec4_words(base, &val0, &valx, &base_wid);
|
||||||
|
assert(valx == 0);
|
||||||
|
|
||||||
|
draw_eval_vec4(subexpr);
|
||||||
|
fprintf(vvp_out, " %%parti/%c %u, %lu, %u;\n",
|
||||||
|
sign_suff, wid, val0, base_wid);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
draw_eval_vec4(subexpr);
|
||||||
|
draw_eval_vec4(base);
|
||||||
|
fprintf(vvp_out, " %%part/%c %u;\n", sign_suff, wid);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void draw_select_pad_vec4(ivl_expr_t expr)
|
static void draw_select_pad_vec4(ivl_expr_t expr)
|
||||||
|
|
|
||||||
|
|
@ -164,6 +164,8 @@ extern bool of_PAD_S(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_PAD_U(vthread_t thr, vvp_code_t code);
|
extern bool of_PAD_U(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_PART_S(vthread_t thr, vvp_code_t code);
|
extern bool of_PART_S(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_PART_U(vthread_t thr, vvp_code_t code);
|
extern bool of_PART_U(vthread_t thr, vvp_code_t code);
|
||||||
|
extern bool of_PARTI_S(vthread_t thr, vvp_code_t code);
|
||||||
|
extern bool of_PARTI_U(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_POP_OBJ(vthread_t thr, vvp_code_t code);
|
extern bool of_POP_OBJ(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_POP_REAL(vthread_t thr, vvp_code_t code);
|
extern bool of_POP_REAL(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_POP_STR(vthread_t thr, vvp_code_t code);
|
extern bool of_POP_STR(vthread_t thr, vvp_code_t code);
|
||||||
|
|
|
||||||
|
|
@ -212,6 +212,8 @@ static const struct opcode_table_s opcode_table[] = {
|
||||||
{ "%pad/u", of_PAD_U, 1, {OA_NUMBER, OA_NONE, OA_NONE} },
|
{ "%pad/u", of_PAD_U, 1, {OA_NUMBER, OA_NONE, OA_NONE} },
|
||||||
{ "%part/s", of_PART_S, 1, {OA_NUMBER, OA_NONE, OA_NONE} },
|
{ "%part/s", of_PART_S, 1, {OA_NUMBER, OA_NONE, OA_NONE} },
|
||||||
{ "%part/u", of_PART_U, 1, {OA_NUMBER, OA_NONE, OA_NONE} },
|
{ "%part/u", of_PART_U, 1, {OA_NUMBER, OA_NONE, OA_NONE} },
|
||||||
|
{ "%parti/s",of_PARTI_S,3, {OA_NUMBER, OA_BIT1, OA_BIT2} },
|
||||||
|
{ "%parti/u",of_PARTI_U,3, {OA_NUMBER, OA_BIT1, OA_BIT2} },
|
||||||
{ "%pop/obj", of_POP_OBJ, 2, {OA_BIT1, OA_BIT2, OA_NONE} },
|
{ "%pop/obj", of_POP_OBJ, 2, {OA_BIT1, OA_BIT2, OA_NONE} },
|
||||||
{ "%pop/real",of_POP_REAL,1, {OA_NUMBER, OA_NONE, OA_NONE} },
|
{ "%pop/real",of_POP_REAL,1, {OA_NUMBER, OA_NONE, OA_NONE} },
|
||||||
{ "%pop/str", of_POP_STR, 1, {OA_NUMBER, OA_NONE, OA_NONE} },
|
{ "%pop/str", of_POP_STR, 1, {OA_NUMBER, OA_NONE, OA_NONE} },
|
||||||
|
|
|
||||||
104
vvp/vthread.cc
104
vvp/vthread.cc
|
|
@ -2406,7 +2406,7 @@ static void negate_words(unsigned long*val, unsigned words)
|
||||||
bool of_DIV_S(vthread_t thr, vvp_code_t)
|
bool of_DIV_S(vthread_t thr, vvp_code_t)
|
||||||
{
|
{
|
||||||
vvp_vector4_t valb = thr->pop_vec4();
|
vvp_vector4_t valb = thr->pop_vec4();
|
||||||
vvp_vector4_t vala = thr->pop_vec4();
|
vvp_vector4_t&vala = thr->peek_vec4();
|
||||||
|
|
||||||
assert(vala.size()== valb.size());
|
assert(vala.size()== valb.size());
|
||||||
unsigned wid = vala.size();
|
unsigned wid = vala.size();
|
||||||
|
|
@ -2418,7 +2418,7 @@ bool of_DIV_S(vthread_t thr, vvp_code_t)
|
||||||
unsigned long*ap = vala.subarray(0, wid);
|
unsigned long*ap = vala.subarray(0, wid);
|
||||||
if (ap == 0) {
|
if (ap == 0) {
|
||||||
vvp_vector4_t tmp(wid, BIT4_X);
|
vvp_vector4_t tmp(wid, BIT4_X);
|
||||||
thr->push_vec4(tmp);
|
vala = tmp;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2426,7 +2426,7 @@ bool of_DIV_S(vthread_t thr, vvp_code_t)
|
||||||
if (bp == 0) {
|
if (bp == 0) {
|
||||||
delete[]ap;
|
delete[]ap;
|
||||||
vvp_vector4_t tmp(wid, BIT4_X);
|
vvp_vector4_t tmp(wid, BIT4_X);
|
||||||
thr->push_vec4(tmp);
|
vala = tmp;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2444,14 +2444,13 @@ bool of_DIV_S(vthread_t thr, vvp_code_t)
|
||||||
if (wid <= CPU_WORD_BITS) {
|
if (wid <= CPU_WORD_BITS) {
|
||||||
if (bp[0] == 0) {
|
if (bp[0] == 0) {
|
||||||
vvp_vector4_t tmp(wid, BIT4_X);
|
vvp_vector4_t tmp(wid, BIT4_X);
|
||||||
thr->push_vec4(tmp);
|
vala = tmp;
|
||||||
} else {
|
} else {
|
||||||
long tmpa = (long) ap[0];
|
long tmpa = (long) ap[0];
|
||||||
long tmpb = (long) bp[0];
|
long tmpb = (long) bp[0];
|
||||||
long res = tmpa / tmpb;
|
long res = tmpa / tmpb;
|
||||||
ap[0] = ((unsigned long)res) & ~sign_mask;
|
ap[0] = ((unsigned long)res) & ~sign_mask;
|
||||||
vala.setarray(0, wid, ap);
|
vala.setarray(0, wid, ap);
|
||||||
thr->push_vec4(vala);
|
|
||||||
}
|
}
|
||||||
delete[]ap;
|
delete[]ap;
|
||||||
delete[]bp;
|
delete[]bp;
|
||||||
|
|
@ -2475,7 +2474,7 @@ bool of_DIV_S(vthread_t thr, vvp_code_t)
|
||||||
delete[]ap;
|
delete[]ap;
|
||||||
delete[]bp;
|
delete[]bp;
|
||||||
vvp_vector4_t tmp(wid, BIT4_X);
|
vvp_vector4_t tmp(wid, BIT4_X);
|
||||||
thr->push_vec4(tmp);
|
vala = tmp;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2486,7 +2485,6 @@ bool of_DIV_S(vthread_t thr, vvp_code_t)
|
||||||
result[words-1] &= ~sign_mask;
|
result[words-1] &= ~sign_mask;
|
||||||
|
|
||||||
vala.setarray(0, wid, result);
|
vala.setarray(0, wid, result);
|
||||||
thr->push_vec4(vala);
|
|
||||||
delete[]ap;
|
delete[]ap;
|
||||||
delete[]bp;
|
delete[]bp;
|
||||||
delete[]result;
|
delete[]result;
|
||||||
|
|
@ -3787,7 +3785,7 @@ static bool of_PART_base(vthread_t thr, vvp_code_t cp, bool signed_flag)
|
||||||
unsigned wid = cp->number;
|
unsigned wid = cp->number;
|
||||||
|
|
||||||
vvp_vector4_t base4 = thr->pop_vec4();
|
vvp_vector4_t base4 = thr->pop_vec4();
|
||||||
vvp_vector4_t value = thr->pop_vec4();
|
vvp_vector4_t&value = thr->peek_vec4();
|
||||||
|
|
||||||
vvp_vector4_t res (wid, BIT4_X);
|
vvp_vector4_t res (wid, BIT4_X);
|
||||||
|
|
||||||
|
|
@ -3795,17 +3793,17 @@ static bool of_PART_base(vthread_t thr, vvp_code_t cp, bool signed_flag)
|
||||||
int32_t base;
|
int32_t base;
|
||||||
bool value_ok = vector4_to_value(base4, base, signed_flag);
|
bool value_ok = vector4_to_value(base4, base, signed_flag);
|
||||||
if (! value_ok) {
|
if (! value_ok) {
|
||||||
thr->push_vec4(res);
|
value = res;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (base >= (int32_t)value.size()) {
|
if (base >= (int32_t)value.size()) {
|
||||||
thr->push_vec4(res);
|
value = res;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((base+(int)wid) <= 0) {
|
if ((base+(int)wid) <= 0) {
|
||||||
thr->push_vec4(res);
|
value = res;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3821,7 +3819,7 @@ static bool of_PART_base(vthread_t thr, vvp_code_t cp, bool signed_flag)
|
||||||
}
|
}
|
||||||
|
|
||||||
res .set_vec(vbase, value.subvalue(base, wid));
|
res .set_vec(vbase, value.subvalue(base, wid));
|
||||||
thr->push_vec4(res);
|
value = res;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
@ -3836,6 +3834,65 @@ bool of_PART_U(vthread_t thr, vvp_code_t cp)
|
||||||
return of_PART_base(thr, cp, false);
|
return of_PART_base(thr, cp, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* %parti/s <wid>, <basei>, <base_wid>
|
||||||
|
* %parti/u <wid>, <basei>, <base_wid>
|
||||||
|
*
|
||||||
|
* Pop the value to be selected. The result is pushed back to the stack.
|
||||||
|
*/
|
||||||
|
static bool of_PARTI_base(vthread_t thr, vvp_code_t cp, bool signed_flag)
|
||||||
|
{
|
||||||
|
unsigned wid = cp->number;
|
||||||
|
uint32_t base = cp->bit_idx[0];
|
||||||
|
uint32_t bwid = cp->bit_idx[1];
|
||||||
|
|
||||||
|
vvp_vector4_t&value = thr->peek_vec4();
|
||||||
|
|
||||||
|
vvp_vector4_t res (wid, BIT4_X);
|
||||||
|
|
||||||
|
// NOTE: This is treating the vector as signed. Is that correct?
|
||||||
|
int32_t use_base = base;
|
||||||
|
if (signed_flag && bwid < 32 && (base&(1<<(bwid-1)))) {
|
||||||
|
use_base |= (-1) << bwid;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (use_base >= (int32_t)value.size()) {
|
||||||
|
value = res;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((use_base+(int32_t)wid) <= 0) {
|
||||||
|
value = res;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
long vbase = 0;
|
||||||
|
if (use_base < 0) {
|
||||||
|
vbase = -use_base;
|
||||||
|
wid -= vbase;
|
||||||
|
use_base = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((use_base+wid) > value.size()) {
|
||||||
|
wid = value.size() - use_base;
|
||||||
|
}
|
||||||
|
|
||||||
|
res .set_vec(vbase, value.subvalue(use_base, wid));
|
||||||
|
value = res;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool of_PARTI_S(vthread_t thr, vvp_code_t cp)
|
||||||
|
{
|
||||||
|
return of_PARTI_base(thr, cp, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool of_PARTI_U(vthread_t thr, vvp_code_t cp)
|
||||||
|
{
|
||||||
|
return of_PARTI_base(thr, cp, false);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* %mov/wu <dst>, <src>
|
* %mov/wu <dst>, <src>
|
||||||
*/
|
*/
|
||||||
|
|
@ -3853,15 +3910,15 @@ bool of_MOV_WU(vthread_t thr, vvp_code_t cp)
|
||||||
*/
|
*/
|
||||||
bool of_MUL(vthread_t thr, vvp_code_t)
|
bool of_MUL(vthread_t thr, vvp_code_t)
|
||||||
{
|
{
|
||||||
vvp_vector4_t vala = thr->pop_vec4();
|
|
||||||
vvp_vector4_t valb = thr->pop_vec4();
|
vvp_vector4_t valb = thr->pop_vec4();
|
||||||
|
vvp_vector4_t&vala = thr->peek_vec4();
|
||||||
assert(vala.size() == valb.size());
|
assert(vala.size() == valb.size());
|
||||||
unsigned wid = vala.size();
|
unsigned wid = vala.size();
|
||||||
|
|
||||||
unsigned long*ap = vala.subarray(0, wid);
|
unsigned long*ap = vala.subarray(0, wid);
|
||||||
if (ap == 0) {
|
if (ap == 0) {
|
||||||
vvp_vector4_t tmp(wid, BIT4_X);
|
vvp_vector4_t tmp(wid, BIT4_X);
|
||||||
thr->push_vec4(tmp);
|
vala = tmp;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3869,7 +3926,7 @@ bool of_MUL(vthread_t thr, vvp_code_t)
|
||||||
if (bp == 0) {
|
if (bp == 0) {
|
||||||
delete[]ap;
|
delete[]ap;
|
||||||
vvp_vector4_t tmp(wid, BIT4_X);
|
vvp_vector4_t tmp(wid, BIT4_X);
|
||||||
thr->push_vec4(tmp);
|
vala = tmp;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3877,7 +3934,6 @@ bool of_MUL(vthread_t thr, vvp_code_t)
|
||||||
if (wid <= CPU_WORD_BITS) {
|
if (wid <= CPU_WORD_BITS) {
|
||||||
ap[0] *= bp[0];
|
ap[0] *= bp[0];
|
||||||
vala.setarray(0, wid, ap);
|
vala.setarray(0, wid, ap);
|
||||||
thr->push_vec4(vala);
|
|
||||||
delete[]ap;
|
delete[]ap;
|
||||||
delete[]bp;
|
delete[]bp;
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -3903,7 +3959,6 @@ bool of_MUL(vthread_t thr, vvp_code_t)
|
||||||
}
|
}
|
||||||
|
|
||||||
vala.setarray(0, wid, res);
|
vala.setarray(0, wid, res);
|
||||||
thr->push_vec4(vala);
|
|
||||||
delete[]ap;
|
delete[]ap;
|
||||||
delete[]bp;
|
delete[]bp;
|
||||||
delete[]res;
|
delete[]res;
|
||||||
|
|
@ -5284,7 +5339,7 @@ bool of_STORE_VEC4(vthread_t thr, vvp_code_t cp)
|
||||||
int off = off_index? thr->words[off_index].w_int : 0;
|
int off = off_index? thr->words[off_index].w_int : 0;
|
||||||
const int sig_value_size = sig->value_size();
|
const int sig_value_size = sig->value_size();
|
||||||
|
|
||||||
vvp_vector4_t val = thr->pop_vec4();
|
vvp_vector4_t&val = thr->peek_vec4();
|
||||||
|
|
||||||
if (val.size() < (unsigned)wid) {
|
if (val.size() < (unsigned)wid) {
|
||||||
cerr << "XXXX Internal error: val.size()=" << val.size()
|
cerr << "XXXX Internal error: val.size()=" << val.size()
|
||||||
|
|
@ -5296,13 +5351,19 @@ bool of_STORE_VEC4(vthread_t thr, vvp_code_t cp)
|
||||||
|
|
||||||
// If there is a problem loading the index register, flags-4
|
// If there is a problem loading the index register, flags-4
|
||||||
// will be set to 1, and we know here to skip the actual assignment.
|
// will be set to 1, and we know here to skip the actual assignment.
|
||||||
if (off_index!=0 && thr->flags[4] == BIT4_1)
|
if (off_index!=0 && thr->flags[4] == BIT4_1) {
|
||||||
|
thr->pop_vec4(1);
|
||||||
return true;
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
if (off <= -wid)
|
if (off <= -wid) {
|
||||||
|
thr->pop_vec4(1);
|
||||||
return true;
|
return true;
|
||||||
if (off >= sig_value_size)
|
}
|
||||||
|
if (off >= sig_value_size) {
|
||||||
|
thr->pop_vec4(1);
|
||||||
return true;
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// If the index is below the vector, then only assign the high
|
// If the index is below the vector, then only assign the high
|
||||||
// bits that overlap with the target.
|
// bits that overlap with the target.
|
||||||
|
|
@ -5327,6 +5388,7 @@ bool of_STORE_VEC4(vthread_t thr, vvp_code_t cp)
|
||||||
else
|
else
|
||||||
vvp_send_vec4_pv(ptr, val, off, wid, sig_value_size, thr->wt_context);
|
vvp_send_vec4_pv(ptr, val, off, wid, sig_value_size, thr->wt_context);
|
||||||
|
|
||||||
|
thr->pop_vec4(1);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue