Remove a bunch of dead vvp opcodes.

This commit is contained in:
Stephen Williams 2014-10-24 15:13:28 -07:00
parent 62fce50f8c
commit 48d3972299
5 changed files with 9 additions and 1368 deletions

View File

@ -290,50 +290,7 @@ static void assign_to_lvector(ivl_lval_t lval,
}
} else if (part_off>0 || ivl_lval_width(lval)!=ivl_signal_width(sig)) {
#if 0
/* There is no mux expression, but a constant part
offset. Load that into index x1 and generate a
single-bit set instruction. */
assert(ivl_lval_width(lval) == width);
if (dexp != 0) {
/* Calculated delay... */
int delay_index = allocate_word();
draw_eval_expr_into_integer(dexp, delay_index);
fprintf(vvp_out, " %%ix/load 0, %u, 0;\n", width);
fprintf(vvp_out, " %%ix/load 1, %lu, 0;\n", part_off);
fprintf(vvp_out, " %%assign/v0/x1/d v%p_%lu, %d, %u;\n",
sig, use_word, delay_index, bit);
clr_word(delay_index);
} else if (nevents != 0) {
/* Event control delay... */
fprintf(vvp_out, " %%ix/load 0, %u, 0;\n", width);
fprintf(vvp_out, " %%ix/load 1, %lu, 0;\n", part_off);
fprintf(vvp_out, " %%assign/v0/x1/e v%p_%lu, %u;\n",
sig, use_word, bit);
} else {
/* Constant delay... */
fprintf(vvp_out, " %%ix/load 0, %u, 0;\n", width);
fprintf(vvp_out, " %%ix/load 1, %lu, 0;\n", part_off);
/*
* The %assign can only take a 32 bit delay. For a larger
* delay we need to put it into an index register.
*/
if (hig_d != 0) {
int delay_index = allocate_word();
fprintf(vvp_out, " %%ix/load %d, %lu, %lu;\n",
delay_index, low_d, hig_d);
fprintf(vvp_out,
" %%assign/v0/x1/d v%p_%lu, %d, %u;\n",
sig, use_word, delay_index, bit);
clr_word(delay_index);
} else {
fprintf(vvp_out,
" %%assign/v0/x1 v%p_%lu, %lu, %u;\n",
sig, use_word, low_d, bit);
}
}
#else
if (nevents != 0) {
assert(dexp==0);
int offset_index = allocate_word();
@ -364,7 +321,6 @@ static void assign_to_lvector(ivl_lval_t lval,
clr_word(offset_index);
clr_word(delay_index);
}
#endif
} else if (dexp != 0) {
/* Calculated delay... */

View File

@ -34,16 +34,12 @@ typedef bool (*vvp_code_fun)(vthread_t thr, vvp_code_t code);
extern bool of_ABS_WR(vthread_t thr, vvp_code_t code);
extern bool of_ADD(vthread_t thr, vvp_code_t code);
extern bool of_ADD_WR(vthread_t thr, vvp_code_t code);
extern bool of_ADDI(vthread_t thr, vvp_code_t code);
extern bool of_ALLOC(vthread_t thr, vvp_code_t code);
extern bool of_AND(vthread_t thr, vvp_code_t code);
extern bool of_ANDI(vthread_t thr, vvp_code_t code);
extern bool of_ANDR(vthread_t thr, vvp_code_t code);
extern bool of_ASSIGN_AR(vthread_t thr, vvp_code_t code);
extern bool of_ASSIGN_ARD(vthread_t thr, vvp_code_t code);
extern bool of_ASSIGN_ARE(vthread_t thr, vvp_code_t code);
extern bool of_ASSIGN_AV(vthread_t thr, vvp_code_t code);
extern bool of_ASSIGN_AVD(vthread_t thr, vvp_code_t code);
extern bool of_ASSIGN_D(vthread_t thr, vvp_code_t code);
extern bool of_ASSIGN_MV(vthread_t thr, vvp_code_t code);
extern bool of_ASSIGN_VEC4(vthread_t thr, vvp_code_t code);
@ -53,13 +49,9 @@ extern bool of_ASSIGN_VEC4_A_D(vthread_t thr, vvp_code_t code);
extern bool of_ASSIGN_VEC4_A_E(vthread_t thr, vvp_code_t code);
extern bool of_ASSIGN_VEC4_OFF_D(vthread_t thr, vvp_code_t code);
extern bool of_ASSIGN_VEC4_OFF_E(vthread_t thr, vvp_code_t code);
extern bool of_ASSIGN_V0X1(vthread_t thr, vvp_code_t code);
extern bool of_ASSIGN_V0X1D(vthread_t thr, vvp_code_t code);
extern bool of_ASSIGN_V0X1E(vthread_t thr, vvp_code_t code);
extern bool of_ASSIGN_WR(vthread_t thr, vvp_code_t code);
extern bool of_ASSIGN_WRD(vthread_t thr, vvp_code_t code);
extern bool of_ASSIGN_WRE(vthread_t thr, vvp_code_t code);
extern bool of_ASSIGN_X0(vthread_t thr, vvp_code_t code);
extern bool of_BLEND(vthread_t thr, vvp_code_t code);
extern bool of_BLEND_WR(vthread_t thr, vvp_code_t code);
extern bool of_BREAKPOINT(vthread_t thr, vvp_code_t code);
@ -68,8 +60,6 @@ extern bool of_CASSIGN_VEC4(vthread_t thr, vvp_code_t code);
extern bool of_CASSIGN_VEC4_OFF(vthread_t thr, vvp_code_t code);
extern bool of_CASSIGN_WR(vthread_t thr, vvp_code_t code);
extern bool of_CAST2(vthread_t thr, vvp_code_t code);
extern bool of_CMPIS(vthread_t thr, vvp_code_t code);
extern bool of_CMPIU(vthread_t thr, vvp_code_t code);
extern bool of_CMPS(vthread_t thr, vvp_code_t code);
extern bool of_CMPSTR(vthread_t thr, vvp_code_t code);
extern bool of_CMPU(vthread_t thr, vvp_code_t code);
@ -137,10 +127,6 @@ extern bool of_JMP1XZ(vthread_t thr, vvp_code_t code);
extern bool of_JOIN(vthread_t thr, vvp_code_t code);
extern bool of_JOIN_DETACH(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_AR(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_AV(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_AVP0(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_AVP0_S(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_REAL(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_DAR_R(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_DAR_STR(vthread_t thr, vvp_code_t code);
@ -153,18 +139,14 @@ extern bool of_LOAD_VEC4(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_VEC4A(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_VP0(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_VP0_S(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_X1P(vthread_t thr, vvp_code_t code);
extern bool of_MAX_WR(vthread_t thr, vvp_code_t code);
extern bool of_MIN_WR(vthread_t thr, vvp_code_t code);
extern bool of_MOD(vthread_t thr, vvp_code_t code);
extern bool of_MOD_S(vthread_t thr, vvp_code_t code);
extern bool of_MOD_WR(vthread_t thr, vvp_code_t code);
extern bool of_MOV(vthread_t thr, vvp_code_t code);
extern bool of_MOV_WU(vthread_t thr, vvp_code_t code);
extern bool of_MOVI(vthread_t thr, vvp_code_t code);
extern bool of_MUL(vthread_t thr, vvp_code_t code);
extern bool of_MUL_WR(vthread_t thr, vvp_code_t code);
extern bool of_MULI(vthread_t thr, vvp_code_t code);
extern bool of_NAND(vthread_t thr, vvp_code_t code);
extern bool of_NANDR(vthread_t thr, vvp_code_t code);
extern bool of_NEW_COBJ(vthread_t thr, vvp_code_t code);
@ -204,13 +186,11 @@ extern bool of_RELEASE_REG(vthread_t thr, vvp_code_t code);
extern bool of_RELEASE_WR(vthread_t thr, vvp_code_t code);
extern bool of_REPLICATE(vthread_t thr, vvp_code_t code);
extern bool of_SCOPY(vthread_t thr, vvp_code_t code);
extern bool of_SET_AV(vthread_t thr, vvp_code_t code);
extern bool of_SET_QB(vthread_t thr, vvp_code_t code);
extern bool of_SET_QF(vthread_t thr, vvp_code_t code);
extern bool of_SET_DAR_OBJ_REAL(vthread_t thr, vvp_code_t code);
extern bool of_SET_DAR_OBJ_STR(vthread_t thr, vvp_code_t code);
extern bool of_SET_DAR_OBJ_VEC4(vthread_t thr, vvp_code_t code);
extern bool of_SET_X0(vthread_t thr, vvp_code_t code);
extern bool of_SET_X0_X(vthread_t thr, vvp_code_t code);
extern bool of_SHIFTL(vthread_t thr, vvp_code_t code);
extern bool of_SHIFTR(vthread_t thr, vvp_code_t code);
@ -239,7 +219,6 @@ extern bool of_STORE_VEC4(vthread_t thr, vvp_code_t code);
extern bool of_STORE_VEC4A(vthread_t thr, vvp_code_t code);
extern bool of_SUB(vthread_t thr, vvp_code_t code);
extern bool of_SUB_WR(vthread_t thr, vvp_code_t code);
extern bool of_SUBI(vthread_t thr, vvp_code_t code);
extern bool of_SUBSTR(vthread_t thr, vvp_code_t code);
extern bool of_SUBSTR_VEC4(vthread_t thr, vvp_code_t code);
extern bool of_TEST_NUL(vthread_t thr, vvp_code_t code);

View File

@ -87,19 +87,12 @@ static const struct opcode_table_s opcode_table[] = {
{ "%abs/wr", of_ABS_WR, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%add", of_ADD, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%add/wr", of_ADD_WR, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%addi", of_ADDI, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
{ "%alloc", of_ALLOC, 1, {OA_VPI_PTR, OA_NONE, OA_NONE} },
{ "%and", of_AND, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%and/r", of_ANDR, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%andi", of_ANDI, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
{ "%assign/ar",of_ASSIGN_AR,2,{OA_ARR_PTR,OA_BIT1, OA_NONE} },
{ "%assign/ar/d",of_ASSIGN_ARD,2,{OA_ARR_PTR,OA_BIT1, OA_NONE} },
{ "%assign/ar/e",of_ASSIGN_ARE,1,{OA_ARR_PTR,OA_NONE, OA_NONE} },
{ "%assign/av",of_ASSIGN_AV,3,{OA_ARR_PTR,OA_BIT1, OA_BIT2} },
{ "%assign/av/d",of_ASSIGN_AVD,3,{OA_ARR_PTR,OA_BIT1, OA_BIT2} },
{ "%assign/v0/x1",of_ASSIGN_V0X1,3,{OA_FUNC_PTR,OA_BIT1,OA_BIT2} },
{ "%assign/v0/x1/d",of_ASSIGN_V0X1D,3,{OA_FUNC_PTR,OA_BIT1,OA_BIT2} },
{ "%assign/v0/x1/e",of_ASSIGN_V0X1E,2,{OA_FUNC_PTR,OA_BIT1,OA_NONE} },
{ "%assign/vec4", of_ASSIGN_VEC4, 2, {OA_FUNC_PTR, OA_BIT1, OA_NONE} },
{ "%assign/vec4/a/d", of_ASSIGN_VEC4_A_D, 3, {OA_ARR_PTR, OA_BIT1, OA_BIT2} },
{ "%assign/vec4/a/e", of_ASSIGN_VEC4_A_E, 2, {OA_ARR_PTR, OA_BIT1, OA_NONE} },
@ -110,7 +103,6 @@ static const struct opcode_table_s opcode_table[] = {
{ "%assign/wr", of_ASSIGN_WR, 2,{OA_VPI_PTR, OA_BIT1, OA_NONE} },
{ "%assign/wr/d",of_ASSIGN_WRD,2,{OA_VPI_PTR, OA_BIT1, OA_NONE} },
{ "%assign/wr/e",of_ASSIGN_WRE,1,{OA_VPI_PTR, OA_NONE, OA_NONE} },
{ "%assign/x0",of_ASSIGN_X0,3,{OA_FUNC_PTR,OA_BIT1, OA_BIT2} },
{ "%blend", of_BLEND, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%blend/wr", of_BLEND_WR,0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%breakpoint", of_BREAKPOINT, 0, {OA_NONE, OA_NONE, OA_NONE} },
@ -127,8 +119,6 @@ static const struct opcode_table_s opcode_table[] = {
{ "%cmp/wu", of_CMPWU, 2, {OA_BIT1, OA_BIT2, OA_NONE} },
{ "%cmp/x", of_CMPX, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmp/z", of_CMPZ, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmpi/s", of_CMPIS, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
{ "%cmpi/u", of_CMPIU, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
{ "%concat/str", of_CONCAT_STR, 0,{OA_NONE, OA_NONE, OA_NONE} },
{ "%concat/vec4",of_CONCAT_VEC4,0,{OA_NONE, OA_NONE, OA_NONE} },
{ "%concati/str",of_CONCATI_STR,1,{OA_STRING,OA_NONE, OA_NONE} },
@ -185,10 +175,6 @@ static const struct opcode_table_s opcode_table[] = {
{ "%join", of_JOIN, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%join/detach",of_JOIN_DETACH,1,{OA_NUMBER,OA_NONE, OA_NONE} },
{ "%load/ar",of_LOAD_AR,2, {OA_ARR_PTR, OA_BIT1, OA_NONE} },
{ "%load/av",of_LOAD_AV,3, {OA_BIT1, OA_ARR_PTR, OA_BIT2} },
{ "%load/avp0",of_LOAD_AVP0,3, {OA_BIT1, OA_ARR_PTR, OA_BIT2} },
{ "%load/avp0/s",of_LOAD_AVP0_S,3,{OA_BIT1,OA_ARR_PTR, OA_BIT2} },
{ "%load/avx.p",of_LOAD_AVX_P,3,{OA_BIT1, OA_ARR_PTR, OA_BIT2} },
{ "%load/dar/r", of_LOAD_DAR_R, 1, {OA_FUNC_PTR, OA_NONE, OA_NONE}},
{ "%load/dar/str",of_LOAD_DAR_STR, 1, {OA_FUNC_PTR, OA_NONE, OA_NONE} },
{ "%load/dar/vec4",of_LOAD_DAR_VEC4,1, {OA_FUNC_PTR, OA_NONE, OA_NONE} },
@ -201,18 +187,14 @@ static const struct opcode_table_s opcode_table[] = {
{ "%load/vec4a", of_LOAD_VEC4A,2,{OA_ARR_PTR, OA_BIT1, OA_NONE} },
{ "%load/vp0",of_LOAD_VP0,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
{ "%load/vp0/s",of_LOAD_VP0_S,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
{ "%load/x1p",of_LOAD_X1P,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
{ "%max/wr", of_MAX_WR, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%min/wr", of_MIN_WR, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%mod", of_MOD, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%mod/s", of_MOD_S, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%mod/wr", of_MOD_WR, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%mov", of_MOV, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
{ "%mov/wu", of_MOV_WU, 2, {OA_BIT1, OA_BIT2, OA_NONE} },
{ "%movi", of_MOVI, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
{ "%mul", of_MUL, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%mul/wr", of_MUL_WR, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%muli", of_MULI, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
{ "%nand", of_NAND, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%nand/r", of_NANDR, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%new/cobj", of_NEW_COBJ, 1, {OA_VPI_PTR,OA_NONE, OA_NONE} },
@ -252,13 +234,11 @@ static const struct opcode_table_s opcode_table[] = {
{ "%release/wr", of_RELEASE_WR, 2,{OA_FUNC_PTR,OA_BIT1,OA_NONE} },
{ "%replicate", of_REPLICATE, 1,{OA_NUMBER, OA_NONE,OA_NONE} },
{ "%scopy", of_SCOPY, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%set/av", of_SET_AV, 3, {OA_ARR_PTR, OA_BIT1, OA_BIT2} },
{ "%set/dar/obj/real",of_SET_DAR_OBJ_REAL,1,{OA_NUMBER,OA_NONE,OA_NONE} },
{ "%set/dar/obj/str", of_SET_DAR_OBJ_STR, 1,{OA_NUMBER,OA_NONE,OA_NONE} },
{ "%set/dar/obj/vec4",of_SET_DAR_OBJ_VEC4,1,{OA_NUMBER,OA_NONE,OA_NONE} },
{ "%set/qb", of_SET_QB, 3, {OA_FUNC_PTR, OA_BIT1, OA_BIT2} },
{ "%set/qf", of_SET_QF, 3, {OA_FUNC_PTR, OA_BIT1, OA_BIT2} },
{ "%set/x0", of_SET_X0, 3, {OA_FUNC_PTR, OA_BIT1, OA_BIT2} },
{ "%shiftl", of_SHIFTL, 1, {OA_NUMBER, OA_NONE, OA_NONE} },
{ "%shiftr", of_SHIFTR, 1, {OA_NUMBER, OA_NONE, OA_NONE} },
{ "%shiftr/s", of_SHIFTR_S, 1, {OA_NUMBER, OA_NONE, OA_NONE} },
@ -286,7 +266,6 @@ static const struct opcode_table_s opcode_table[] = {
{ "%store/vec4a", of_STORE_VEC4A, 3, {OA_ARR_PTR, OA_BIT1, OA_BIT2} },
{ "%sub", of_SUB, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%sub/wr", of_SUB_WR, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%subi", of_SUBI, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
{ "%substr", of_SUBSTR, 2,{OA_BIT1, OA_BIT2, OA_NONE} },
{ "%substr/vec4",of_SUBSTR_VEC4,2,{OA_BIT1, OA_BIT2, OA_NONE} },
{ "%test_nul", of_TEST_NUL, 1,{OA_FUNC_PTR,OA_NONE, OA_NONE} },

View File

@ -69,13 +69,6 @@ pushed in place
See also the %sub/wr instruction.
* %addi <bit-l>, <imm>, <wid>
This instruction adds the immediate value (no x or z bits) into the
left vector. The imm value is limited to 16 significant bits, but it
is zero extended to match any width.
* %alloc <scope-label>
This instruction allocates the storage for a new instance of an
@ -120,31 +113,6 @@ The %assign/ar/e variation uses the information in the thread
event control registers to determine when to perform the assign.
%evctl is used to set the event control information.
* %assign/av <array-label>, <delay>, <bit> (XXXX Old definition)
* %assign/av/d <array-label>, <delayx>, <bit> (XXXX Old definition)
* %assign/av/e <array-label>, <bit> (XXXX Old definition)
The %assign/av instruction assigns a vector value to a word in the
labeled array. The <delay> is the delay in simulation time to the
assignment (0 for non-blocking assignment) and the <bit> is the base
of the vector to write.
The width of the vector is retrieved from index register 0.
The base of a part select is retrieved from index register 1.
The address of the word in the memory is from index register 3. The
address is canonical form.
The %assign/av/d variation reads the delay from an integer register that
is given by the <delayx> value. This should not be 0, 1 or 3, of course,
since these registers contain the vector width, base part select and
word address.
The %assign/av/e variation uses the information in the thread
event control registers to determine when to perform the assign.
%evctl is used to set the event control information.
* %assign/v0 <var-label>, <delay>, <bit> (XXXX Old description)
* %assign/v0/d <var-label>, <delayx>, <bit> (XXXX Old description
* %assign/v0/e <var-label>, <bit> (XXXX Old description)
@ -166,14 +134,6 @@ event control registers to determine when to perform the assign.
The <var-label> references a .var object that can receive non-blocking
assignments. For blocking assignments, see %set/v.
* %assign/v0/x1 <var-label>, <delay>, <bit>
* %assign/v0/x1/d <var-label>, <delayx>, <bit>
* %assign/v0/x1/e <var-label>, <bit>
This is similar to the %assign/v0 instruction, but adds the index-1
index register with the canonical index of the destination where the
vector is to be written. This allows for part writes into the vector.
* %assign/vec4 <var-label>, <delay>
* %assign/vec4/d <var-label>, <delayx>
* %assign/vec4/e <var-label>
@ -230,19 +190,6 @@ The %assign/wr/e variation uses the information in the thread
event control registers to determine when to perform the assign.
%evctl is used to set the event control information.
* %assign/x0 <var-label>, <delay>, <bit> (OBSOLETE -- See %assign/v0x)
This does a non-blocking assignment to a functor, similar to the
%assign instruction. The <var-label> identifies the base functor of
the affected variable, and the <delay> gives the delay when the
assignment takes place. The delay may be 0. The actual functor used is
calculated by using <var-label> as a base, and indexing with the
index[0] index register. This supports indexed assignment.
The <bit> is the address of the thread register that contains the bit
value to assign.
* %blend
This instruction blends the bits of two vectors into a result in a
@ -344,13 +291,6 @@ The results of the comparison go into flags 4, 5, 6 and 7:
5: lt (less than)
6: eeq (case equal)
* %cmpi/s <bit-l>, <immr>, <wid>
* %cmpi/u <bit-l>, <immr>, <wid>
These instructions are similar to the %cmp instructions above, except
that the right hand operand is an immediate value. This is a positive
number that the vector is compared with.
* %cmp/wr
Compare real values for equality and less-then. This opcode pops to
@ -715,43 +655,6 @@ the children, and none of those children may be automatic. This
instruction is used to implement join_none and join_any from the
Verilog source.
* %load/av <bit>, <array-label>, <wid>
This instruction loads a word from the specified array. The word
address is in index register 3. Like %load/v below the width does
not have to match the width of the array word. See the %load/v
description for more information.
* %load/avp0 <bit>, <array-label>, <wid>
* %load/avp0/s <bit>, <array-label>, <wid>
This instruction is a mix of %load/av and %load/vp0. It loads an array
value like %load/av and then adds a value from index register 0 to the
result like %load/vp0. The loaded value is zero-extended to <wid>,
then added arithmetically to the signed index register 0. The result
is then stored in <bit>.
The %load/avp0/s instruction is the same, except that the loaded
vector is sign extended (instead of 0-extended) before the addition.
* %load/avx.p <bit>, <array-label>, <index>
This instruction is similar to %load/av, but it loads only a single
bit, and the <index> is the selector for the bit to use. If <index> is
out of range, then x is loaded. The index value is incremented by one
if it is defined (bit 4 is not 1).
* %load/dar <bit>, <functor-label>, <wid> (XXXX Old implementation)
This instruction loads an array word from a dynamic array. The
<label> refers to the variable object, and the <bit>/<wid> are the
location in local vector space where the extracted word goes. The
index is implicitly extracted from index register 3.
The dar/r variant reads a real-value into a real-valued register.
(See also %set/dar)
* %load/obj <var-label>
This instruction loads an object handle and pushes it to the top of
@ -824,23 +727,6 @@ The %load/ar instruction reads a real value from an array. The <index>
is the index register that contains the canonical word address into
the array.
* %load/x1p <bit>, <functor-label>, <wid>
This is an indexed load. It uses the contents of index register 1 to
select a part from a vector functor at <functor-label>. The
part is pulled from the indexed bit of the addressed functor and loaded
into the destination thread bit. The <wid> is the width of the
part. If any bit of the desired value is outside the vector, then that
bit is set to X.
The index register 1 is interpreted as a signed value. Even though the
address is canonical (from 0 to the width of the signal) the value in
index register 1 may be <0 or >=wid. The load instruction handles
filling in the out-of-bounds bits with x.
When the operation is done, the <wid> is added to index register 1, to
provide a basic auto-increment behavior.
* %loadi/wr <bit>, <mant>, <exp>
This opcode loads an immediate value, floating point, into the word
@ -874,19 +760,7 @@ The /s form does signed %.
This opcode is the real-valued modulus of the two real values.
* %mov <dst>, <src>, <wid>
* %mov/wu <dst>, <src>
* %movi <dst>, <value>, <wid>
This instruction copies a vector from one place in register space to
another. The destination and source vectors are assumed to be the same
width and non-overlapping. The <dst> may not be 0-3, but if the <src>
is one of the 4 constant bits, the effect is to replicate the value
into the destination vector. This is useful for filling a vector.
The %movi variant moves a binary value, LSB first, into the
destination vector. The immediate value is up to 32bits, padded with
zeros to fill out the width.
* %mul
@ -901,13 +775,6 @@ result is pushed back on the vec4 stack.
This opcode multiplies two real words together.
* %muli <bit-l>, <imm>, <wid>
This instruction is the same as %mul, but the second operand is an
immediate value that is padded to the width of the result.
* %nand
Perform the bitwise NAND of two vec4 vectors, and push the result. Each
@ -1161,41 +1028,12 @@ register that contains the LSB of the vector, and the <wid> is the
size of the vector. The width must exactly match the width of the
signal.
* %set/av <array-label>, <bit>, <wid> (XXXX Old definition)
This sets a thread vector to an array word. The <array-label>
addresses an array device, and the <bit>,<wid> describe a vector to be
written. Index register 3 contains the address of the word within the
array.
The base of a part select is retrieved from index register 1. The
width is implied from the <wid> that is the argument. This is the part
*within* the word.
The address (in canonical form) is precalculated and loaded into index
register 3. This is the address of the word within the array.
* %set/qb <var-label>, <bit>, <wid>
* %set/qf <var-label>, <bit>, <wid>
This sets the vector value into the back (qb) or front (qf) of a queue
variable.
* %set/x0 <var-label>, <bit>, <wid>
This sets the part of a signal vector, the address calculated by
using the index register 0 to index the base within the vector of
<var-label>. The destination must be a signal of some sort. Otherwise,
the instruction will fail.
The addressing is canonical (0-based) so the compiler must figure out
non-zero offsets, if any. The width is the width of the part being
written. The other bits of the vector are not touched.
The index may be signed, and if less than 0, the beginning bits are
not assigned. Also, if the bits go beyond the end of the signal, those
bits are not written anywhere.
* %shiftl <idx>
* %shiftr <idx>
* %shiftr/s <idx>
@ -1327,15 +1165,6 @@ from the left, and the result pushed.
See also the %add instruction.
* %subi <bit-l>, <imm>, <wid>
This instruction arithmetically subtracts the immediate value from the
left vector. The <imm> value is a 16bit unsigned value zero-extended to
the <wid> of the left vector. The result replaces the left vector.
See also the %addi instruction.
* %sub/wr
This instruction operates on real values in word registers. The right

File diff suppressed because it is too large Load Diff