5553 lines
140 KiB
C++
5553 lines
140 KiB
C++
/*
|
|
* Copyright (c) 2001-2013 Stephen Williams (steve@icarus.com)
|
|
*
|
|
* This source code is free software; you can redistribute it
|
|
* and/or modify it in source code form under the terms of the GNU
|
|
* General Public License as published by the Free Software
|
|
* Foundation; either version 2 of the License, or (at your option)
|
|
* any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
# include "config.h"
|
|
# include "vthread.h"
|
|
# include "codes.h"
|
|
# include "schedule.h"
|
|
# include "ufunc.h"
|
|
# include "event.h"
|
|
# include "vpi_priv.h"
|
|
# include "vvp_net_sig.h"
|
|
# include "vvp_cobject.h"
|
|
# include "vvp_darray.h"
|
|
# include "class_type.h"
|
|
#ifdef CHECK_WITH_VALGRIND
|
|
# include "vvp_cleanup.h"
|
|
#endif
|
|
# include <set>
|
|
# include <typeinfo>
|
|
# include <vector>
|
|
# include <cstdlib>
|
|
# include <climits>
|
|
# include <cstring>
|
|
# include <cmath>
|
|
# include <cassert>
|
|
|
|
# include <iostream>
|
|
# include <cstdio>
|
|
|
|
using namespace std;
|
|
|
|
/* This is the size of an unsigned long in bits. This is just a
|
|
convenience macro. */
|
|
# define CPU_WORD_BITS (8*sizeof(unsigned long))
|
|
# define TOP_BIT (1UL << (CPU_WORD_BITS-1))
|
|
|
|
/*
|
|
* This vthread_s structure describes all there is to know about a
|
|
* thread, including its program counter, all the private bits it
|
|
* holds, and its place in other lists.
|
|
*
|
|
*
|
|
* ** Notes On The Interactions of %fork/%join/%end:
|
|
*
|
|
* The %fork instruction creates a new thread and pushes that into a
|
|
* set of children for the thread. This new thread, then, becomes a
|
|
* child of the current thread, and the current thread a parent of the
|
|
* new thread. Any child can be reaped by a %join.
|
|
*
|
|
* Children placed into an automatic scope are given special
|
|
* treatment, which is required to make function/tasks calls that they
|
|
* represent work correctly. These automatic children are copied into
|
|
* an automatic_children set to mark them for this handling. %join
|
|
* operations will guarantee that automatic threads are joined first,
|
|
* before any non-automatic threads.
|
|
*
|
|
* It is a programming error for a thread that created threads to not
|
|
* %join (or %join/detach) as many as it created before it %ends. The
|
|
* children set will get messed up otherwise.
|
|
*
|
|
* the i_am_joining flag is a clue to children that the parent is
|
|
* blocked in a %join and may need to be scheduled. The %end
|
|
* instruction will check this flag in the parent to see if it should
|
|
* notify the parent that something is interesting.
|
|
*
|
|
* The i_have_ended flag, on the other hand, is used by threads to
|
|
* tell their parents that they are already dead. A thread that
|
|
* executes %end will set its own i_have_ended flag and let its parent
|
|
* reap it when the parent does the %join. If a thread has its
|
|
* schedule_parent_on_end flag set already when it %ends, then it
|
|
* reaps itself and simply schedules its parent. If a child has its
|
|
* i_have_ended flag set when a thread executes %join, then it is free
|
|
* to reap the child immediately.
|
|
*/
|
|
|
|
struct vthread_s {
|
|
vthread_s();
|
|
|
|
/* This is the program counter. */
|
|
vvp_code_t pc;
|
|
/* These hold the private thread bits. */
|
|
vvp_vector4_t bits4;
|
|
|
|
/* These are the word registers. */
|
|
union {
|
|
int64_t w_int;
|
|
uint64_t w_uint;
|
|
} words[16];
|
|
|
|
private:
|
|
vector<double> stack_real_;
|
|
public:
|
|
inline double pop_real(void)
|
|
{
|
|
assert(! stack_real_.empty());
|
|
double val = stack_real_.back();
|
|
stack_real_.pop_back();
|
|
return val;
|
|
}
|
|
inline void push_real(double val)
|
|
{
|
|
stack_real_.push_back(val);
|
|
}
|
|
inline double peek_real(unsigned depth)
|
|
{
|
|
assert(depth < stack_real_.size());
|
|
unsigned use_index = stack_real_.size()-1-depth;
|
|
return stack_real_[use_index];
|
|
}
|
|
inline void pop_real(unsigned cnt)
|
|
{
|
|
while (cnt > 0) {
|
|
stack_real_.pop_back();
|
|
cnt -= 1;
|
|
}
|
|
}
|
|
|
|
/* Strings are operated on using a forth-like operator
|
|
set. Items at the top of the stack (back()) are the objects
|
|
operated on except for special cases. New objects are
|
|
pushed onto the top (back()) and pulled from the top
|
|
(back()) only. */
|
|
private:
|
|
vector<string> stack_str_;
|
|
public:
|
|
inline string pop_str(void)
|
|
{
|
|
assert(! stack_str_.empty());
|
|
string val = stack_str_.back();
|
|
stack_str_.pop_back();
|
|
return val;
|
|
}
|
|
inline void push_str(const string&val)
|
|
{
|
|
stack_str_.push_back(val);
|
|
}
|
|
inline string&peek_str(unsigned depth)
|
|
{
|
|
assert(depth<stack_str_.size());
|
|
unsigned use_index = stack_str_.size()-1-depth;
|
|
return stack_str_[use_index];
|
|
}
|
|
inline void pop_str(unsigned cnt)
|
|
{
|
|
while (cnt > 0) {
|
|
stack_str_.pop_back();
|
|
cnt -= 1;
|
|
}
|
|
}
|
|
|
|
/* Objects are also operated on in a stack. */
|
|
private:
|
|
enum { STACK_OBJ_MAX_SIZE = 32 };
|
|
vvp_object_t stack_obj_[STACK_OBJ_MAX_SIZE];
|
|
int stack_obj_size_;
|
|
public:
|
|
inline vvp_object_t& peek_object(void)
|
|
{
|
|
assert(stack_obj_size_ > 0);
|
|
return stack_obj_[stack_obj_size_-1];
|
|
}
|
|
inline void pop_object(vvp_object_t&obj)
|
|
{
|
|
assert(stack_obj_size_ > 0);
|
|
stack_obj_size_ -= 1;
|
|
obj = stack_obj_[stack_obj_size_];
|
|
stack_obj_[stack_obj_size_].reset(0);
|
|
}
|
|
inline void pop_object(unsigned cnt)
|
|
{
|
|
assert(cnt <= stack_obj_size_);
|
|
for (size_t idx = stack_obj_size_-cnt ; idx < stack_obj_size_ ; idx += 1)
|
|
stack_obj_[idx].reset(0);
|
|
stack_obj_size_ -= cnt;
|
|
}
|
|
inline void push_object(const vvp_object_t&obj)
|
|
{
|
|
assert(stack_obj_size_ < STACK_OBJ_MAX_SIZE);
|
|
stack_obj_[stack_obj_size_] = obj;
|
|
stack_obj_size_ += 1;
|
|
}
|
|
|
|
/* My parent sets this when it wants me to wake it up. */
|
|
unsigned i_am_joining :1;
|
|
unsigned i_have_ended :1;
|
|
unsigned waiting_for_event :1;
|
|
unsigned is_scheduled :1;
|
|
unsigned delay_delete :1;
|
|
/* This points to the children of the thread. */
|
|
set<struct vthread_s*>children;
|
|
/* No more than 1 of the children are automatic. */
|
|
set<vthread_s*>automatic_children;
|
|
/* This points to my parent, if I have one. */
|
|
struct vthread_s*parent;
|
|
/* This points to the containing scope. */
|
|
struct __vpiScope*parent_scope;
|
|
/* This is used for keeping wait queues. */
|
|
struct vthread_s*wait_next;
|
|
/* These are used to access automatically allocated items. */
|
|
vvp_context_t wt_context, rd_context;
|
|
/* These are used to pass non-blocking event control information. */
|
|
vvp_net_t*event;
|
|
uint64_t ecount;
|
|
};
|
|
|
|
inline vthread_s::vthread_s()
|
|
{
|
|
stack_obj_size_ = 0;
|
|
}
|
|
|
|
static bool test_joinable(vthread_t thr, vthread_t child);
|
|
static void do_join(vthread_t thr, vthread_t child);
|
|
|
|
struct __vpiScope* vthread_scope(struct vthread_s*thr)
|
|
{
|
|
return thr->parent_scope;
|
|
}
|
|
|
|
struct vthread_s*running_thread = 0;
|
|
|
|
// this table maps the thread special index bit addresses to
|
|
// vvp_bit4_t bit values.
|
|
static vvp_bit4_t thr_index_to_bit4[4] = { BIT4_0, BIT4_1, BIT4_X, BIT4_Z };
|
|
|
|
static inline void thr_check_addr(struct vthread_s*thr, unsigned addr)
|
|
{
|
|
if (thr->bits4.size() <= addr)
|
|
thr->bits4.resize(addr+1);
|
|
}
|
|
|
|
static inline vvp_bit4_t thr_get_bit(struct vthread_s*thr, unsigned addr)
|
|
{
|
|
assert(addr < thr->bits4.size());
|
|
return thr->bits4.value(addr);
|
|
}
|
|
|
|
static inline void thr_put_bit(struct vthread_s*thr,
|
|
unsigned addr, vvp_bit4_t val)
|
|
{
|
|
thr_check_addr(thr, addr);
|
|
thr->bits4.set_bit(addr, val);
|
|
}
|
|
|
|
// REMOVE ME
|
|
static inline void thr_clr_bit_(struct vthread_s*thr, unsigned addr)
|
|
{
|
|
thr->bits4.set_bit(addr, BIT4_0);
|
|
}
|
|
|
|
vvp_bit4_t vthread_get_bit(struct vthread_s*thr, unsigned addr)
|
|
{
|
|
if (vpi_mode_flag == VPI_MODE_COMPILETF) return BIT4_X;
|
|
else return thr_get_bit(thr, addr);
|
|
}
|
|
|
|
void vthread_put_bit(struct vthread_s*thr, unsigned addr, vvp_bit4_t bit)
|
|
{
|
|
thr_put_bit(thr, addr, bit);
|
|
}
|
|
|
|
void vthread_push_real(struct vthread_s*thr, double val)
|
|
{
|
|
thr->push_real(val);
|
|
}
|
|
|
|
void vthread_pop_real(struct vthread_s*thr, unsigned depth)
|
|
{
|
|
thr->pop_real(depth);
|
|
}
|
|
|
|
void vthread_pop_str(struct vthread_s*thr, unsigned depth)
|
|
{
|
|
thr->pop_str(depth);
|
|
}
|
|
|
|
const string&vthread_get_str_stack(struct vthread_s*thr, unsigned depth)
|
|
{
|
|
return thr->peek_str(depth);
|
|
}
|
|
|
|
double vthread_get_real_stack(struct vthread_s*thr, unsigned depth)
|
|
{
|
|
return thr->peek_real(depth);
|
|
}
|
|
|
|
template <class T> T coerce_to_width(const T&that, unsigned width)
|
|
{
|
|
if (that.size() == width)
|
|
return that;
|
|
|
|
assert(that.size() > width);
|
|
T res (width);
|
|
for (unsigned idx = 0 ; idx < width ; idx += 1)
|
|
res.set_bit(idx, that.value(idx));
|
|
|
|
return res;
|
|
}
|
|
|
|
static unsigned long* vector_to_array(struct vthread_s*thr,
|
|
unsigned addr, unsigned wid)
|
|
{
|
|
if (addr == 0) {
|
|
unsigned awid = (wid + CPU_WORD_BITS - 1) / (CPU_WORD_BITS);
|
|
unsigned long*val = new unsigned long[awid];
|
|
for (unsigned idx = 0 ; idx < awid ; idx += 1)
|
|
val[idx] = 0;
|
|
return val;
|
|
}
|
|
if (addr == 1) {
|
|
unsigned awid = (wid + CPU_WORD_BITS - 1) / (CPU_WORD_BITS);
|
|
unsigned long*val = new unsigned long[awid];
|
|
for (unsigned idx = 0 ; idx < awid ; idx += 1)
|
|
val[idx] = -1UL;
|
|
|
|
wid -= (awid-1) * CPU_WORD_BITS;
|
|
if (wid < CPU_WORD_BITS)
|
|
val[awid-1] &= (-1UL) >> (CPU_WORD_BITS-wid);
|
|
|
|
return val;
|
|
}
|
|
|
|
if (addr < 4)
|
|
return 0;
|
|
|
|
return thr->bits4.subarray(addr, wid);
|
|
}
|
|
|
|
/*
|
|
* This function gets from the thread a vector of bits starting from
|
|
* the addressed location and for the specified width.
|
|
*/
|
|
static vvp_vector4_t vthread_bits_to_vector(struct vthread_s*thr,
|
|
unsigned bit, unsigned wid)
|
|
{
|
|
/* Make a vector of the desired width. */
|
|
|
|
if (bit >= 4) {
|
|
return vvp_vector4_t(thr->bits4, bit, wid);
|
|
|
|
} else {
|
|
return vvp_vector4_t(wid, thr_index_to_bit4[bit]);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Some of the instructions do wide addition to arrays of long. They
|
|
* use this add_with_carry function to help.
|
|
*/
|
|
static inline unsigned long add_with_carry(unsigned long a, unsigned long b,
|
|
unsigned long&carry)
|
|
{
|
|
unsigned long tmp = b + carry;
|
|
unsigned long sum = a + tmp;
|
|
carry = 0;
|
|
if (tmp < b)
|
|
carry = 1;
|
|
if (sum < tmp)
|
|
carry = 1;
|
|
if (sum < a)
|
|
carry = 1;
|
|
return sum;
|
|
}
|
|
|
|
static unsigned long multiply_with_carry(unsigned long a, unsigned long b,
|
|
unsigned long&carry)
|
|
{
|
|
const unsigned long mask = (1UL << (CPU_WORD_BITS/2)) - 1;
|
|
unsigned long a0 = a & mask;
|
|
unsigned long a1 = (a >> (CPU_WORD_BITS/2)) & mask;
|
|
unsigned long b0 = b & mask;
|
|
unsigned long b1 = (b >> (CPU_WORD_BITS/2)) & mask;
|
|
|
|
unsigned long tmp = a0 * b0;
|
|
|
|
unsigned long r00 = tmp & mask;
|
|
unsigned long c00 = (tmp >> (CPU_WORD_BITS/2)) & mask;
|
|
|
|
tmp = a0 * b1;
|
|
|
|
unsigned long r01 = tmp & mask;
|
|
unsigned long c01 = (tmp >> (CPU_WORD_BITS/2)) & mask;
|
|
|
|
tmp = a1 * b0;
|
|
|
|
unsigned long r10 = tmp & mask;
|
|
unsigned long c10 = (tmp >> (CPU_WORD_BITS/2)) & mask;
|
|
|
|
tmp = a1 * b1;
|
|
|
|
unsigned long r11 = tmp & mask;
|
|
unsigned long c11 = (tmp >> (CPU_WORD_BITS/2)) & mask;
|
|
|
|
unsigned long r1 = c00 + r01 + r10;
|
|
unsigned long r2 = (r1 >> (CPU_WORD_BITS/2)) & mask;
|
|
r1 &= mask;
|
|
r2 += c01 + c10 + r11;
|
|
unsigned long r3 = (r2 >> (CPU_WORD_BITS/2)) & mask;
|
|
r2 &= mask;
|
|
r3 += c11;
|
|
r3 &= mask;
|
|
|
|
carry = (r3 << (CPU_WORD_BITS/2)) + r2;
|
|
return (r1 << (CPU_WORD_BITS/2)) + r00;
|
|
}
|
|
|
|
static void multiply_array_imm(unsigned long*res, unsigned long*val,
|
|
unsigned words, unsigned long imm)
|
|
{
|
|
for (unsigned idx = 0 ; idx < words ; idx += 1)
|
|
res[idx] = 0;
|
|
|
|
for (unsigned mul_idx = 0 ; mul_idx < words ; mul_idx += 1) {
|
|
unsigned long sum;
|
|
unsigned long tmp = multiply_with_carry(val[mul_idx], imm, sum);
|
|
|
|
unsigned long carry = 0;
|
|
res[mul_idx] = add_with_carry(res[mul_idx], tmp, carry);
|
|
for (unsigned add_idx = mul_idx+1 ; add_idx < words ; add_idx += 1) {
|
|
res[add_idx] = add_with_carry(res[add_idx], sum, carry);
|
|
sum = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Allocate a context for use by a child thread. By preference, use
|
|
* the last freed context. If none available, create a new one. Add
|
|
* it to the list of live contexts in that scope.
|
|
*/
|
|
static vvp_context_t vthread_alloc_context(struct __vpiScope*scope)
|
|
{
|
|
assert(scope->is_automatic);
|
|
|
|
vvp_context_t context = scope->free_contexts;
|
|
if (context) {
|
|
scope->free_contexts = vvp_get_next_context(context);
|
|
for (unsigned idx = 0 ; idx < scope->nitem ; idx += 1) {
|
|
scope->item[idx]->reset_instance(context);
|
|
}
|
|
} else {
|
|
context = vvp_allocate_context(scope->nitem);
|
|
for (unsigned idx = 0 ; idx < scope->nitem ; idx += 1) {
|
|
scope->item[idx]->alloc_instance(context);
|
|
}
|
|
}
|
|
|
|
vvp_set_next_context(context, scope->live_contexts);
|
|
scope->live_contexts = context;
|
|
|
|
return context;
|
|
}
|
|
|
|
/*
|
|
* Free a context previously allocated to a child thread by pushing it
|
|
* onto the freed context stack. Remove it from the list of live contexts
|
|
* in that scope.
|
|
*/
|
|
static void vthread_free_context(vvp_context_t context, struct __vpiScope*scope)
|
|
{
|
|
assert(scope->is_automatic);
|
|
assert(context);
|
|
|
|
if (context == scope->live_contexts) {
|
|
scope->live_contexts = vvp_get_next_context(context);
|
|
} else {
|
|
vvp_context_t tmp = scope->live_contexts;
|
|
while (context != vvp_get_next_context(tmp)) {
|
|
assert(tmp);
|
|
tmp = vvp_get_next_context(tmp);
|
|
}
|
|
vvp_set_next_context(tmp, vvp_get_next_context(context));
|
|
}
|
|
|
|
vvp_set_next_context(context, scope->free_contexts);
|
|
scope->free_contexts = context;
|
|
}
|
|
|
|
#ifdef CHECK_WITH_VALGRIND
|
|
void contexts_delete(struct __vpiScope*scope)
|
|
{
|
|
vvp_context_t context = scope->free_contexts;
|
|
|
|
while (context) {
|
|
scope->free_contexts = vvp_get_next_context(context);
|
|
for (unsigned idx = 0; idx < scope->nitem; idx += 1) {
|
|
scope->item[idx]->free_instance(context);
|
|
}
|
|
free(context);
|
|
context = scope->free_contexts;
|
|
}
|
|
free(scope->item);
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Create a new thread with the given start address.
|
|
*/
|
|
vthread_t vthread_new(vvp_code_t pc, struct __vpiScope*scope)
|
|
{
|
|
vthread_t thr = new struct vthread_s;
|
|
thr->pc = pc;
|
|
thr->bits4 = vvp_vector4_t(32);
|
|
thr->parent = 0;
|
|
thr->parent_scope = scope;
|
|
thr->wait_next = 0;
|
|
thr->wt_context = 0;
|
|
thr->rd_context = 0;
|
|
|
|
thr->i_am_joining = 0;
|
|
thr->is_scheduled = 0;
|
|
thr->i_have_ended = 0;
|
|
thr->delay_delete = 0;
|
|
thr->waiting_for_event = 0;
|
|
thr->event = 0;
|
|
thr->ecount = 0;
|
|
|
|
thr_put_bit(thr, 0, BIT4_0);
|
|
thr_put_bit(thr, 1, BIT4_1);
|
|
thr_put_bit(thr, 2, BIT4_X);
|
|
thr_put_bit(thr, 3, BIT4_Z);
|
|
|
|
scope->threads .insert(thr);
|
|
return thr;
|
|
}
|
|
|
|
#ifdef CHECK_WITH_VALGRIND
|
|
#if 0
|
|
/*
|
|
* These are not currently correct. If you use them you will get
|
|
* double delete messages. There is still a leak related to a
|
|
* waiting event that needs to be investigated.
|
|
*/
|
|
|
|
static void wait_next_delete(vthread_t base)
|
|
{
|
|
while (base) {
|
|
vthread_t tmp = base->wait_next;
|
|
delete base;
|
|
base = tmp;
|
|
if (base->waiting_for_event == 0) break;
|
|
}
|
|
}
|
|
|
|
static void child_delete(vthread_t base)
|
|
{
|
|
while (base) {
|
|
vthread_t tmp = base->child;
|
|
delete base;
|
|
base = tmp;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
void vthreads_delete(struct __vpiScope*scope)
|
|
{
|
|
for (std::set<vthread_t>::iterator cur = scope->threads.begin()
|
|
; cur != scope->threads.end() ; ++ cur ) {
|
|
delete *cur;
|
|
}
|
|
scope->threads.clear();
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Reaping pulls the thread out of the stack of threads. If I have a
|
|
* child, then hand it over to my parent.
|
|
*/
|
|
static void vthread_reap(vthread_t thr)
|
|
{
|
|
if (! thr->children.empty()) {
|
|
for (set<vthread_t>::iterator cur = thr->children.begin()
|
|
; cur != thr->children.end() ; ++cur) {
|
|
vthread_t curp = *cur;
|
|
assert(curp->parent == thr);
|
|
curp->parent = thr->parent;
|
|
}
|
|
}
|
|
if (thr->parent) {
|
|
//assert(thr->parent->child == thr);
|
|
thr->parent->children.erase(thr);
|
|
}
|
|
|
|
thr->parent = 0;
|
|
|
|
// Remove myself from the containing scope.
|
|
thr->parent_scope->threads.erase(thr);
|
|
|
|
thr->pc = codespace_null();
|
|
|
|
/* If this thread is not scheduled, then is it safe to delete
|
|
it now. Otherwise, let the schedule event (which will
|
|
execute the thread at of_ZOMBIE) delete the object. */
|
|
if ((thr->is_scheduled == 0) && (thr->waiting_for_event == 0)) {
|
|
assert(thr->children.empty());
|
|
assert(thr->wait_next == 0);
|
|
if (thr->delay_delete)
|
|
schedule_del_thr(thr);
|
|
else
|
|
vthread_delete(thr);
|
|
}
|
|
}
|
|
|
|
void vthread_delete(vthread_t thr)
|
|
{
|
|
thr->bits4 = vvp_vector4_t();
|
|
delete thr;
|
|
}
|
|
|
|
void vthread_mark_scheduled(vthread_t thr)
|
|
{
|
|
while (thr != 0) {
|
|
assert(thr->is_scheduled == 0);
|
|
thr->is_scheduled = 1;
|
|
thr = thr->wait_next;
|
|
}
|
|
}
|
|
|
|
void vthread_delay_delete()
|
|
{
|
|
if (running_thread)
|
|
running_thread->delay_delete = 1;
|
|
}
|
|
|
|
/*
|
|
* This function runs each thread by fetching an instruction,
|
|
* incrementing the PC, and executing the instruction. The thread may
|
|
* be the head of a list, so each thread is run so far as possible.
|
|
*/
|
|
void vthread_run(vthread_t thr)
|
|
{
|
|
while (thr != 0) {
|
|
vthread_t tmp = thr->wait_next;
|
|
thr->wait_next = 0;
|
|
|
|
assert(thr->is_scheduled);
|
|
thr->is_scheduled = 0;
|
|
|
|
running_thread = thr;
|
|
|
|
for (;;) {
|
|
vvp_code_t cp = thr->pc;
|
|
thr->pc += 1;
|
|
|
|
/* Run the opcode implementation. If the execution of
|
|
the opcode returns false, then the thread is meant to
|
|
be paused, so break out of the loop. */
|
|
bool rc = (cp->opcode)(thr, cp);
|
|
if (rc == false)
|
|
break;
|
|
}
|
|
|
|
thr = tmp;
|
|
}
|
|
running_thread = 0;
|
|
}
|
|
|
|
/*
|
|
* The CHUNK_LINK instruction is a special next pointer for linking
|
|
* chunks of code space. It's like a simplified %jmp.
|
|
*/
|
|
bool of_CHUNK_LINK(vthread_t thr, vvp_code_t code)
|
|
{
|
|
assert(code->cptr);
|
|
thr->pc = code->cptr;
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* This is called by an event functor to wake up all the threads on
|
|
* its list. I in fact created that list in the %wait instruction, and
|
|
* I also am certain that the waiting_for_event flag is set.
|
|
*/
|
|
void vthread_schedule_list(vthread_t thr)
|
|
{
|
|
for (vthread_t cur = thr ; cur ; cur = cur->wait_next) {
|
|
assert(cur->waiting_for_event);
|
|
cur->waiting_for_event = 0;
|
|
}
|
|
|
|
schedule_vthread(thr, 0);
|
|
}
|
|
|
|
vvp_context_t vthread_get_wt_context()
|
|
{
|
|
if (running_thread)
|
|
return running_thread->wt_context;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
vvp_context_t vthread_get_rd_context()
|
|
{
|
|
if (running_thread)
|
|
return running_thread->rd_context;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
vvp_context_item_t vthread_get_wt_context_item(unsigned context_idx)
|
|
{
|
|
assert(running_thread && running_thread->wt_context);
|
|
return vvp_get_context_item(running_thread->wt_context, context_idx);
|
|
}
|
|
|
|
vvp_context_item_t vthread_get_rd_context_item(unsigned context_idx)
|
|
{
|
|
assert(running_thread && running_thread->rd_context);
|
|
return vvp_get_context_item(running_thread->rd_context, context_idx);
|
|
}
|
|
|
|
bool of_ABS_WR(vthread_t thr, vvp_code_t)
|
|
{
|
|
thr->push_real( fabs(thr->pop_real()) );
|
|
return true;
|
|
}
|
|
|
|
bool of_ALLOC(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
/* Allocate a context. */
|
|
vvp_context_t child_context = vthread_alloc_context(cp->scope);
|
|
|
|
/* Push the allocated context onto the write context stack. */
|
|
vvp_set_stacked_context(child_context, thr->wt_context);
|
|
thr->wt_context = child_context;
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool of_AND_wide(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
|
|
vvp_vector4_t val = vthread_bits_to_vector(thr, idx1, wid);
|
|
val &= vthread_bits_to_vector(thr, idx2, wid);
|
|
thr->bits4.set_vec(idx1, val);
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool of_AND_narrow(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
|
|
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
|
|
vvp_bit4_t lb = thr_get_bit(thr, idx1);
|
|
vvp_bit4_t rb = thr_get_bit(thr, idx2);
|
|
thr_put_bit(thr, idx1, lb&rb);
|
|
idx1 += 1;
|
|
if (idx2 >= 4)
|
|
idx2 += 1;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_AND(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
if (cp->number <= 4)
|
|
cp->opcode = &of_AND_narrow;
|
|
else
|
|
cp->opcode = &of_AND_wide;
|
|
|
|
return cp->opcode(thr, cp);
|
|
}
|
|
|
|
|
|
bool of_ANDI(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned long imm = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
|
|
assert(idx1 >= 4);
|
|
|
|
vvp_vector4_t val = vthread_bits_to_vector(thr, idx1, wid);
|
|
vvp_vector4_t imv (wid, BIT4_0);
|
|
|
|
unsigned trans = wid;
|
|
if (trans > CPU_WORD_BITS)
|
|
trans = CPU_WORD_BITS;
|
|
imv.setarray(0, trans, &imm);
|
|
|
|
val &= imv;
|
|
|
|
thr->bits4.set_vec(idx1, val);
|
|
return true;
|
|
}
|
|
|
|
bool of_ADD(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
unsigned long*lva = vector_to_array(thr, cp->bit_idx[0], cp->number);
|
|
unsigned long*lvb = vector_to_array(thr, cp->bit_idx[1], cp->number);
|
|
if (lva == 0 || lvb == 0)
|
|
goto x_out;
|
|
|
|
unsigned long carry;
|
|
carry = 0;
|
|
for (unsigned idx = 0 ; (idx*CPU_WORD_BITS) < cp->number ; idx += 1)
|
|
lva[idx] = add_with_carry(lva[idx], lvb[idx], carry);
|
|
|
|
/* We know from the vector_to_array that the address is valid
|
|
in the thr->bitr4 vector, so just do the set bit. */
|
|
|
|
thr->bits4.setarray(cp->bit_idx[0], cp->number, lva);
|
|
|
|
delete[]lva;
|
|
delete[]lvb;
|
|
|
|
return true;
|
|
|
|
x_out:
|
|
delete[]lva;
|
|
delete[]lvb;
|
|
|
|
vvp_vector4_t tmp(cp->number, BIT4_X);
|
|
thr->bits4.set_vec(cp->bit_idx[0], tmp);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_ADD_WR(vthread_t thr, vvp_code_t)
|
|
{
|
|
double r = thr->pop_real();
|
|
double l = thr->pop_real();
|
|
thr->push_real(l + r);
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* This is %addi, add-immediate. The first value is a vector, the
|
|
* second value is the immediate value in the bin_idx[1] position. The
|
|
* immediate value can be up to 16 bits, which are then padded to the
|
|
* width of the vector with zero.
|
|
*/
|
|
bool of_ADDI(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
// Collect arguments
|
|
unsigned bit_addr = cp->bit_idx[0];
|
|
unsigned long imm_value = cp->bit_idx[1];
|
|
unsigned bit_width = cp->number;
|
|
|
|
assert(bit_addr >= 4);
|
|
|
|
unsigned word_count = (bit_width+CPU_WORD_BITS-1)/CPU_WORD_BITS;
|
|
|
|
unsigned long*lva = vector_to_array(thr, bit_addr, bit_width);
|
|
if (lva == 0)
|
|
goto x_out;
|
|
|
|
|
|
unsigned long carry;
|
|
carry = 0;
|
|
for (unsigned idx = 0 ; idx < word_count ; idx += 1) {
|
|
lva[idx] = add_with_carry(lva[idx], imm_value, carry);
|
|
imm_value = 0;
|
|
}
|
|
|
|
/* We know from the vector_to_array that the address is valid
|
|
in the thr->bitr4 vector, so just do the set bit. */
|
|
|
|
thr->bits4.setarray(bit_addr, bit_width, lva);
|
|
|
|
delete[]lva;
|
|
|
|
return true;
|
|
|
|
x_out:
|
|
delete[]lva;
|
|
|
|
vvp_vector4_t tmp (bit_width, BIT4_X);
|
|
thr->bits4.set_vec(bit_addr, tmp);
|
|
|
|
return true;
|
|
}
|
|
|
|
/* %assign/ar <array>, <delay>
|
|
* Generate an assignment event to a real array. Index register 3
|
|
* contains the canonical address of the word in the memory. <delay>
|
|
* is the delay in simulation time. <bit> is the index register
|
|
* containing the real value.
|
|
*/
|
|
bool of_ASSIGN_AR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
long adr = thr->words[3].w_int;
|
|
unsigned delay = cp->bit_idx[0];
|
|
double value = thr->pop_real();
|
|
|
|
if (adr >= 0) {
|
|
schedule_assign_array_word(cp->array, adr, value, delay);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/* %assign/ar/d <array>, <delay_idx>
|
|
* Generate an assignment event to a real array. Index register 3
|
|
* contains the canonical address of the word in the memory.
|
|
* <delay_idx> is the integer register that contains the delay value.
|
|
*/
|
|
bool of_ASSIGN_ARD(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
long adr = thr->words[3].w_int;
|
|
vvp_time64_t delay = thr->words[cp->bit_idx[0]].w_uint;
|
|
double value = thr->pop_real();
|
|
|
|
if (adr >= 0) {
|
|
schedule_assign_array_word(cp->array, adr, value, delay);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/* %assign/ar/e <array>
|
|
* Generate an assignment event to a real array. Index register 3
|
|
* contains the canonical address of the word in the memory. <bit>
|
|
* is the index register containing the real value. The event
|
|
* information is contained in the thread event control registers
|
|
* and is set with %evctl.
|
|
*/
|
|
bool of_ASSIGN_ARE(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
long adr = thr->words[3].w_int;
|
|
double value = thr->pop_real();
|
|
|
|
if (adr >= 0) {
|
|
if (thr->ecount == 0) {
|
|
schedule_assign_array_word(cp->array, adr, value, 0);
|
|
} else {
|
|
schedule_evctl(cp->array, adr, value, thr->event,
|
|
thr->ecount);
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/* %assign/av <array>, <delay>, <bit>
|
|
* This generates an assignment event to an array. Index register 0
|
|
* contains the width of the vector (and the word) and index register
|
|
* 3 contains the canonical address of the word in memory.
|
|
*/
|
|
bool of_ASSIGN_AV(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned wid = thr->words[0].w_int;
|
|
long off = thr->words[1].w_int;
|
|
long adr = thr->words[3].w_int;
|
|
unsigned delay = cp->bit_idx[0];
|
|
unsigned bit = cp->bit_idx[1];
|
|
|
|
if (adr < 0) return true;
|
|
|
|
long vwidth = get_array_word_size(cp->array);
|
|
// We fell off the MSB end.
|
|
if (off >= vwidth) return true;
|
|
// Trim the bits after the MSB
|
|
if (off + (long)wid > vwidth) {
|
|
wid += vwidth - off - wid;
|
|
} else if (off < 0 ) {
|
|
// We fell off the LSB end.
|
|
if ((unsigned)-off > wid ) return true;
|
|
// Trim the bits before the LSB
|
|
wid += off;
|
|
bit -= off;
|
|
off = 0;
|
|
}
|
|
|
|
assert(wid > 0);
|
|
|
|
vvp_vector4_t value = vthread_bits_to_vector(thr, bit, wid);
|
|
|
|
schedule_assign_array_word(cp->array, adr, off, value, delay);
|
|
return true;
|
|
}
|
|
|
|
/* %assign/av/d <array>, <delay_idx>, <bit>
|
|
* This generates an assignment event to an array. Index register 0
|
|
* contains the width of the vector (and the word) and index register
|
|
* 3 contains the canonical address of the word in memory. The named
|
|
* index register contains the delay.
|
|
*/
|
|
bool of_ASSIGN_AVD(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned wid = thr->words[0].w_int;
|
|
long off = thr->words[1].w_int;
|
|
long adr = thr->words[3].w_int;
|
|
vvp_time64_t delay = thr->words[cp->bit_idx[0]].w_uint;
|
|
unsigned bit = cp->bit_idx[1];
|
|
|
|
if (adr < 0) return true;
|
|
|
|
long vwidth = get_array_word_size(cp->array);
|
|
// We fell off the MSB end.
|
|
if (off >= vwidth) return true;
|
|
// Trim the bits after the MSB
|
|
if (off + (long)wid > vwidth) {
|
|
wid += vwidth - off - wid;
|
|
} else if (off < 0 ) {
|
|
// We fell off the LSB end.
|
|
if ((unsigned)-off > wid ) return true;
|
|
// Trim the bits before the LSB
|
|
wid += off;
|
|
bit -= off;
|
|
off = 0;
|
|
}
|
|
|
|
assert(wid > 0);
|
|
|
|
vvp_vector4_t value = vthread_bits_to_vector(thr, bit, wid);
|
|
|
|
schedule_assign_array_word(cp->array, adr, off, value, delay);
|
|
return true;
|
|
}
|
|
|
|
bool of_ASSIGN_AVE(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned wid = thr->words[0].w_int;
|
|
long off = thr->words[1].w_int;
|
|
long adr = thr->words[3].w_int;
|
|
unsigned bit = cp->bit_idx[0];
|
|
|
|
if (adr < 0) return true;
|
|
|
|
long vwidth = get_array_word_size(cp->array);
|
|
// We fell off the MSB end.
|
|
if (off >= vwidth) return true;
|
|
// Trim the bits after the MSB
|
|
if (off + (long)wid > vwidth) {
|
|
wid += vwidth - off - wid;
|
|
} else if (off < 0 ) {
|
|
// We fell off the LSB end.
|
|
if ((unsigned)-off > wid ) return true;
|
|
// Trim the bits before the LSB
|
|
wid += off;
|
|
bit -= off;
|
|
off = 0;
|
|
}
|
|
|
|
assert(wid > 0);
|
|
|
|
vvp_vector4_t value = vthread_bits_to_vector(thr, bit, wid);
|
|
// If the count is zero then just put the value.
|
|
if (thr->ecount == 0) {
|
|
schedule_assign_array_word(cp->array, adr, off, value, 0);
|
|
} else {
|
|
schedule_evctl(cp->array, adr, value, off, thr->event, thr->ecount);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* This is %assign/v0 <label>, <delay>, <bit>
|
|
* Index register 0 contains a vector width.
|
|
*/
|
|
bool of_ASSIGN_V0(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned wid = thr->words[0].w_int;
|
|
assert(wid > 0);
|
|
unsigned delay = cp->bit_idx[0];
|
|
unsigned bit = cp->bit_idx[1];
|
|
|
|
vvp_net_ptr_t ptr (cp->net, 0);
|
|
if (bit >= 4) {
|
|
// If the vector is not a synthetic one, then have the
|
|
// scheduler pluck it directly out of my vector space.
|
|
schedule_assign_plucked_vector(ptr, delay, thr->bits4, bit, wid);
|
|
} else {
|
|
vvp_vector4_t value = vthread_bits_to_vector(thr, bit, wid);
|
|
schedule_assign_plucked_vector(ptr, delay, value, 0, wid);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* This is %assign/v0/d <label>, <delay_idx>, <bit>
|
|
* Index register 0 contains a vector width, and the named index
|
|
* register contains the delay.
|
|
*/
|
|
bool of_ASSIGN_V0D(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned wid = thr->words[0].w_int;
|
|
assert(wid > 0);
|
|
|
|
vvp_time64_t delay = thr->words[cp->bit_idx[0]].w_uint;
|
|
unsigned bit = cp->bit_idx[1];
|
|
|
|
vvp_net_ptr_t ptr (cp->net, 0);
|
|
|
|
if (bit >= 4) {
|
|
schedule_assign_plucked_vector(ptr, delay, thr->bits4, bit, wid);
|
|
} else {
|
|
vvp_vector4_t value = vthread_bits_to_vector(thr, bit, wid);
|
|
schedule_assign_plucked_vector(ptr, delay, value, 0, wid);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* This is %assign/v0/e <label>, <bit>
|
|
* Index register 0 contains a vector width.
|
|
*/
|
|
bool of_ASSIGN_V0E(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(thr->event != 0);
|
|
unsigned wid = thr->words[0].w_int;
|
|
assert(wid > 0);
|
|
unsigned bit = cp->bit_idx[0];
|
|
|
|
vvp_net_ptr_t ptr (cp->net, 0);
|
|
|
|
vvp_vector4_t value = vthread_bits_to_vector(thr, bit, wid);
|
|
// If the count is zero then just put the value.
|
|
if (thr->ecount == 0) {
|
|
schedule_assign_plucked_vector(ptr, 0, value, 0, wid);
|
|
} else {
|
|
schedule_evctl(ptr, value, 0, 0, thr->event, thr->ecount);
|
|
}
|
|
|
|
thr->event = 0;
|
|
thr->ecount = 0;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* This is %assign/v0/x1 <label>, <delay>, <bit>
|
|
* Index register 0 contains a vector part width.
|
|
* Index register 1 contains the offset into the destination vector.
|
|
*/
|
|
bool of_ASSIGN_V0X1(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned wid = thr->words[0].w_int;
|
|
long off = thr->words[1].w_int;
|
|
unsigned delay = cp->bit_idx[0];
|
|
unsigned bit = cp->bit_idx[1];
|
|
|
|
vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (cp->net->fil);
|
|
assert(sig);
|
|
|
|
// We fell off the MSB end.
|
|
if (off >= (long)sig->value_size()) return true;
|
|
else if (off < 0 ) {
|
|
// We fell off the LSB end.
|
|
if ((unsigned)-off >= wid ) return true;
|
|
// Trim the bits before the LSB
|
|
wid += off;
|
|
bit -= off;
|
|
off = 0;
|
|
}
|
|
|
|
assert(wid > 0);
|
|
|
|
vvp_vector4_t value = vthread_bits_to_vector(thr, bit, wid);
|
|
|
|
vvp_net_ptr_t ptr (cp->net, 0);
|
|
schedule_assign_vector(ptr, off, sig->value_size(), value, delay);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* This is %assign/v0/x1/d <label>, <delayx>, <bit>
|
|
* Index register 0 contains a vector part width.
|
|
* Index register 1 contains the offset into the destination vector.
|
|
*/
|
|
bool of_ASSIGN_V0X1D(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned wid = thr->words[0].w_int;
|
|
long off = thr->words[1].w_int;
|
|
vvp_time64_t delay = thr->words[cp->bit_idx[0]].w_uint;
|
|
unsigned bit = cp->bit_idx[1];
|
|
|
|
vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (cp->net->fil);
|
|
assert(sig);
|
|
|
|
// We fell off the MSB end.
|
|
if (off >= (long)sig->value_size()) return true;
|
|
else if (off < 0 ) {
|
|
// We fell off the LSB end.
|
|
if ((unsigned)-off >= wid ) return true;
|
|
// Trim the bits before the LSB
|
|
wid += off;
|
|
bit -= off;
|
|
off = 0;
|
|
}
|
|
|
|
assert(wid > 0);
|
|
|
|
vvp_vector4_t value = vthread_bits_to_vector(thr, bit, wid);
|
|
|
|
vvp_net_ptr_t ptr (cp->net, 0);
|
|
schedule_assign_vector(ptr, off, sig->value_size(), value, delay);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* This is %assign/v0/x1/e <label>, <bit>
|
|
* Index register 0 contains a vector part width.
|
|
* Index register 1 contains the offset into the destination vector.
|
|
*/
|
|
bool of_ASSIGN_V0X1E(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned wid = thr->words[0].w_int;
|
|
long off = thr->words[1].w_int;
|
|
unsigned bit = cp->bit_idx[0];
|
|
|
|
vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (cp->net->fil);
|
|
assert(sig);
|
|
|
|
// We fell off the MSB end.
|
|
if (off >= (long)sig->value_size()) {
|
|
thr->event = 0;
|
|
thr->ecount = 0;
|
|
return true;
|
|
} else if (off < 0 ) {
|
|
// We fell off the LSB end.
|
|
if ((unsigned)-off >= wid ) {
|
|
thr->event = 0;
|
|
thr->ecount = 0;
|
|
return true;
|
|
}
|
|
// Trim the bits before the LSB
|
|
wid += off;
|
|
bit -= off;
|
|
off = 0;
|
|
}
|
|
|
|
assert(wid > 0);
|
|
|
|
vvp_vector4_t value = vthread_bits_to_vector(thr, bit, wid);
|
|
|
|
vvp_net_ptr_t ptr (cp->net, 0);
|
|
// If the count is zero then just put the value.
|
|
if (thr->ecount == 0) {
|
|
schedule_assign_vector(ptr, off, sig->value_size(), value, 0);
|
|
} else {
|
|
schedule_evctl(ptr, value, off, sig->value_size(), thr->event,
|
|
thr->ecount);
|
|
}
|
|
|
|
thr->event = 0;
|
|
thr->ecount = 0;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* This is %assign/wr <vpi-label>, <delay>
|
|
*
|
|
* This assigns (after a delay) a value to a real variable. Use the
|
|
* vpi_put_value function to do the assign, with the delay written
|
|
* into the vpiInertialDelay carrying the desired delay.
|
|
*/
|
|
bool of_ASSIGN_WR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned delay = cp->bit_idx[0];
|
|
double value = thr->pop_real();
|
|
s_vpi_time del;
|
|
|
|
del.type = vpiSimTime;
|
|
vpip_time_to_timestruct(&del, delay);
|
|
|
|
__vpiHandle*tmp = cp->handle;
|
|
|
|
t_vpi_value val;
|
|
val.format = vpiRealVal;
|
|
val.value.real = value;
|
|
vpi_put_value(tmp, &val, &del, vpiTransportDelay);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_ASSIGN_WRD(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vvp_time64_t delay = thr->words[cp->bit_idx[0]].w_uint;
|
|
double value = thr->pop_real();
|
|
s_vpi_time del;
|
|
|
|
del.type = vpiSimTime;
|
|
vpip_time_to_timestruct(&del, delay);
|
|
|
|
__vpiHandle*tmp = cp->handle;
|
|
|
|
t_vpi_value val;
|
|
val.format = vpiRealVal;
|
|
val.value.real = value;
|
|
vpi_put_value(tmp, &val, &del, vpiTransportDelay);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_ASSIGN_WRE(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(thr->event != 0);
|
|
double value = thr->pop_real();
|
|
__vpiHandle*tmp = cp->handle;
|
|
|
|
// If the count is zero then just put the value.
|
|
if (thr->ecount == 0) {
|
|
t_vpi_value val;
|
|
|
|
val.format = vpiRealVal;
|
|
val.value.real = value;
|
|
vpi_put_value(tmp, &val, 0, vpiNoDelay);
|
|
} else {
|
|
schedule_evctl(tmp, value, thr->event, thr->ecount);
|
|
}
|
|
|
|
thr->event = 0;
|
|
thr->ecount = 0;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_ASSIGN_X0(vthread_t, vvp_code_t)
|
|
{
|
|
#if 0
|
|
unsigned char bit_val = thr_get_bit(thr, cp->bit_idx[1]);
|
|
vvp_ipoint_t itmp = ipoint_index(cp->iptr, thr->words[0].w_int);
|
|
schedule_assign(itmp, bit_val, cp->bit_idx[0]);
|
|
#else
|
|
fprintf(stderr, "XXXX forgot how to implement %%assign/x0\n");
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
bool of_BLEND(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
|
vvp_bit4_t lb = thr_get_bit(thr, idx1);
|
|
vvp_bit4_t rb = thr_get_bit(thr, idx2);
|
|
|
|
if (lb != rb)
|
|
thr_put_bit(thr, idx1, BIT4_X);
|
|
|
|
idx1 += 1;
|
|
if (idx2 >= 4)
|
|
idx2 += 1;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_BLEND_WR(vthread_t thr, vvp_code_t)
|
|
{
|
|
double f = thr->pop_real();
|
|
double t = thr->pop_real();
|
|
thr->push_real((t == f) ? t : 0.0);
|
|
return true;
|
|
}
|
|
|
|
bool of_BREAKPOINT(vthread_t, vvp_code_t)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* the %cassign/link instruction connects a source node to a
|
|
* destination node. The destination node must be a signal, as it is
|
|
* marked with the source of the cassign so that it may later be
|
|
* unlinked without specifically knowing the source that this
|
|
* instruction used.
|
|
*/
|
|
bool of_CASSIGN_LINK(vthread_t, vvp_code_t cp)
|
|
{
|
|
vvp_net_t*dst = cp->net;
|
|
vvp_net_t*src = cp->net2;
|
|
|
|
vvp_fun_signal_base*sig
|
|
= dynamic_cast<vvp_fun_signal_base*>(dst->fun);
|
|
assert(sig);
|
|
|
|
/* Detect the special case that we are already continuous
|
|
assigning the source onto the destination. */
|
|
if (sig->cassign_link == src)
|
|
return true;
|
|
|
|
/* If there is an existing cassign driving this node, then
|
|
unlink it. We can have only 1 cassign at a time. */
|
|
if (sig->cassign_link != 0) {
|
|
vvp_net_ptr_t tmp (dst, 1);
|
|
sig->cassign_link->unlink(tmp);
|
|
}
|
|
|
|
sig->cassign_link = src;
|
|
|
|
/* Link the output of the src to the port[1] (the cassign
|
|
port) of the destination. */
|
|
vvp_net_ptr_t dst_ptr (dst, 1);
|
|
src->link(dst_ptr);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* the %cassign/v instruction invokes a continuous assign of a
|
|
* constant value to a signal. The instruction arguments are:
|
|
*
|
|
* %cassign/v <net>, <base>, <wid> ;
|
|
*
|
|
* Where the <net> is the net label assembled into a vvp_net pointer,
|
|
* and the <base> and <wid> are stashed in the bit_idx array.
|
|
*
|
|
* This instruction writes vvp_vector4_t values to port-1 of the
|
|
* target signal.
|
|
*/
|
|
bool of_CASSIGN_V(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vvp_net_t*net = cp->net;
|
|
unsigned base = cp->bit_idx[0];
|
|
unsigned wid = cp->bit_idx[1];
|
|
|
|
/* Collect the thread bits into a vector4 item. */
|
|
vvp_vector4_t value = vthread_bits_to_vector(thr, base, wid);
|
|
|
|
/* set the value into port 1 of the destination. */
|
|
vvp_net_ptr_t ptr (net, 1);
|
|
vvp_send_vec4(ptr, value, 0);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_CASSIGN_WR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vvp_net_t*net = cp->net;
|
|
double value = thr->pop_real();
|
|
|
|
/* Set the value into port 1 of the destination. */
|
|
vvp_net_ptr_t ptr (net, 1);
|
|
vvp_send_real(ptr, value, 0);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_CASSIGN_X0(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vvp_net_t*net = cp->net;
|
|
unsigned base = cp->bit_idx[0];
|
|
unsigned wid = cp->bit_idx[1];
|
|
|
|
// Implicitly, we get the base into the target vector from the
|
|
// X0 register.
|
|
long index = thr->words[0].w_int;
|
|
|
|
vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (net->fil);
|
|
|
|
if (index < 0 && (wid <= (unsigned)-index))
|
|
return true;
|
|
|
|
if (index >= (long)sig->value_size())
|
|
return true;
|
|
|
|
if (index < 0) {
|
|
wid -= (unsigned) -index;
|
|
index = 0;
|
|
}
|
|
|
|
if (index+wid > sig->value_size())
|
|
wid = sig->value_size() - index;
|
|
|
|
vvp_vector4_t vector = vthread_bits_to_vector(thr, base, wid);
|
|
|
|
vvp_net_ptr_t ptr (net, 1);
|
|
vvp_send_vec4_pv(ptr, vector, index, wid, sig->value_size(), 0);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_CAST2(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned dst = cp->bit_idx[0];
|
|
unsigned src = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
|
|
thr_check_addr(thr, dst+wid-1);
|
|
thr_check_addr(thr, src+wid-1);
|
|
|
|
vvp_vector4_t res;
|
|
switch (src) {
|
|
case 0:
|
|
case 2:
|
|
case 3:
|
|
res = vvp_vector4_t(wid, BIT4_0);
|
|
break;
|
|
case 1:
|
|
res = vvp_vector4_t(wid, BIT4_1);
|
|
break;
|
|
default:
|
|
res = vector2_to_vector4(vvp_vector2_t(vthread_bits_to_vector(thr, src, wid)), wid);
|
|
break;
|
|
}
|
|
|
|
thr->bits4.set_vec(dst, res);
|
|
return true;
|
|
}
|
|
|
|
bool of_CMPS(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vvp_bit4_t eq = BIT4_1;
|
|
vvp_bit4_t eeq = BIT4_1;
|
|
vvp_bit4_t lt = BIT4_0;
|
|
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
|
|
const unsigned end1 = (idx1 < 4)? idx1 : idx1 + cp->number - 1;
|
|
const unsigned end2 = (idx2 < 4)? idx2 : idx2 + cp->number - 1;
|
|
|
|
if (end1 > end2)
|
|
thr_check_addr(thr, end1);
|
|
else
|
|
thr_check_addr(thr, end2);
|
|
|
|
const vvp_bit4_t sig1 = thr_get_bit(thr, end1);
|
|
const vvp_bit4_t sig2 = thr_get_bit(thr, end2);
|
|
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
|
vvp_bit4_t lv = thr_get_bit(thr, idx1);
|
|
vvp_bit4_t rv = thr_get_bit(thr, idx2);
|
|
|
|
if (lv > rv) {
|
|
lt = BIT4_0;
|
|
eeq = BIT4_0;
|
|
} else if (lv < rv) {
|
|
lt = BIT4_1;
|
|
eeq = BIT4_0;
|
|
}
|
|
if (eq != BIT4_X) {
|
|
if ((lv == BIT4_0) && (rv != BIT4_0))
|
|
eq = BIT4_0;
|
|
if ((lv == BIT4_1) && (rv != BIT4_1))
|
|
eq = BIT4_0;
|
|
if (bit4_is_xz(lv) || bit4_is_xz(rv))
|
|
eq = BIT4_X;
|
|
}
|
|
|
|
if (idx1 >= 4) idx1 += 1;
|
|
if (idx2 >= 4) idx2 += 1;
|
|
}
|
|
|
|
if (eq == BIT4_X)
|
|
lt = BIT4_X;
|
|
else if ((sig1 == BIT4_1) && (sig2 == BIT4_0))
|
|
lt = BIT4_1;
|
|
else if ((sig1 == BIT4_0) && (sig2 == BIT4_1))
|
|
lt = BIT4_0;
|
|
|
|
/* Correct the lt bit to account for the sign of the parameters. */
|
|
if (lt != BIT4_X) {
|
|
/* If the first is negative and the last positive, then
|
|
a < b for certain. */
|
|
if ((sig1 == BIT4_1) && (sig2 == BIT4_0))
|
|
lt = BIT4_1;
|
|
|
|
/* If the first is positive and the last negative, then
|
|
a > b for certain. */
|
|
if ((sig1 == BIT4_0) && (sig2 == BIT4_1))
|
|
lt = BIT4_0;
|
|
}
|
|
|
|
thr_put_bit(thr, 4, eq);
|
|
thr_put_bit(thr, 5, lt);
|
|
thr_put_bit(thr, 6, eeq);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_CMPSTR(vthread_t thr, vvp_code_t)
|
|
{
|
|
string re = thr->pop_str();
|
|
string le = thr->pop_str();
|
|
|
|
int rc = strcmp(le.c_str(), re.c_str());
|
|
|
|
vvp_bit4_t eq;
|
|
vvp_bit4_t lt;
|
|
|
|
if (rc == 0) {
|
|
eq = BIT4_1;
|
|
lt = BIT4_0;
|
|
} else if (rc < 0) {
|
|
eq = BIT4_0;
|
|
lt = BIT4_1;
|
|
} else {
|
|
eq = BIT4_0;
|
|
lt = BIT4_0;
|
|
}
|
|
|
|
thr_put_bit(thr, 4, eq);
|
|
thr_put_bit(thr, 5, lt);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_CMPIS(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vvp_bit4_t eq = BIT4_1;
|
|
vvp_bit4_t eeq = BIT4_1;
|
|
vvp_bit4_t lt = BIT4_0;
|
|
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned imm = cp->bit_idx[1];
|
|
|
|
const unsigned end1 = (idx1 < 4)? idx1 : idx1 + cp->number - 1;
|
|
thr_check_addr(thr, end1);
|
|
const vvp_bit4_t sig1 = thr_get_bit(thr, end1);
|
|
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
|
vvp_bit4_t lv = thr_get_bit(thr, idx1);
|
|
vvp_bit4_t rv = (imm & 1)? BIT4_1 : BIT4_0;
|
|
imm >>= 1;
|
|
|
|
if (lv > rv) {
|
|
lt = BIT4_0;
|
|
eeq = BIT4_0;
|
|
} else if (lv < rv) {
|
|
lt = BIT4_1;
|
|
eeq = BIT4_0;
|
|
}
|
|
if (eq != BIT4_X) {
|
|
if ((lv == BIT4_0) && (rv != BIT4_0))
|
|
eq = BIT4_0;
|
|
if ((lv == BIT4_1) && (rv != BIT4_1))
|
|
eq = BIT4_0;
|
|
if (bit4_is_xz(lv) || bit4_is_xz(rv))
|
|
eq = BIT4_X;
|
|
}
|
|
|
|
if (idx1 >= 4) idx1 += 1;
|
|
}
|
|
|
|
if (eq == BIT4_X)
|
|
lt = BIT4_X;
|
|
else if (sig1 == BIT4_1)
|
|
lt = BIT4_1;
|
|
|
|
thr_put_bit(thr, 4, eq);
|
|
thr_put_bit(thr, 5, lt);
|
|
thr_put_bit(thr, 6, eeq);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* The of_CMPIU below punts to this function if there are any xz bits
|
|
* in the vector part of the instruction. In this case we know that
|
|
* there is at least 1 xz bit in the left expression (and there are
|
|
* none in the imm value) so the eeq result must be false. Otherwise,
|
|
* the eq result may be 0 or x, and the lt bit is x.
|
|
*/
|
|
static bool of_CMPIU_the_hard_way(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned long imm = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
if (idx1 >= 4)
|
|
thr_check_addr(thr, idx1+wid-1);
|
|
|
|
vvp_bit4_t lv = thr_get_bit(thr, idx1);
|
|
vvp_bit4_t eq = BIT4_1;
|
|
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
|
|
vvp_bit4_t rv = (imm & 1UL)? BIT4_1 : BIT4_0;
|
|
imm >>= 1UL;
|
|
|
|
if (bit4_is_xz(lv)) {
|
|
eq = BIT4_X;
|
|
} else if (lv != rv) {
|
|
eq = BIT4_0;
|
|
break;
|
|
}
|
|
|
|
if (idx1 >= 4) {
|
|
idx1 += 1;
|
|
if ((idx+1) < wid)
|
|
lv = thr_get_bit(thr, idx1);
|
|
}
|
|
}
|
|
|
|
thr_put_bit(thr, 4, eq);
|
|
thr_put_bit(thr, 5, BIT4_X);
|
|
thr_put_bit(thr, 6, BIT4_0);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_CMPIU(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned addr = cp->bit_idx[0];
|
|
unsigned long imm = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
|
|
unsigned long*array = vector_to_array(thr, addr, wid);
|
|
// If there are xz bits in the right hand expression, then we
|
|
// have to do the compare the hard way. That is because even
|
|
// though we know that eeq must be false (the immediate value
|
|
// cannot have x or z bits) we don't know what the EQ or LT
|
|
// bits will be.
|
|
if (array == 0)
|
|
return of_CMPIU_the_hard_way(thr, cp);
|
|
|
|
unsigned words = (wid+CPU_WORD_BITS-1) / CPU_WORD_BITS;
|
|
vvp_bit4_t eq = BIT4_1;
|
|
vvp_bit4_t lt = BIT4_0;
|
|
for (unsigned idx = 0 ; idx < words ; idx += 1, imm = 0UL) {
|
|
if (array[idx] == imm)
|
|
continue;
|
|
|
|
eq = BIT4_0;
|
|
lt = (array[idx] < imm) ? BIT4_1 : BIT4_0;
|
|
}
|
|
|
|
delete[]array;
|
|
|
|
thr_put_bit(thr, 4, eq);
|
|
thr_put_bit(thr, 5, lt);
|
|
thr_put_bit(thr, 6, eq);
|
|
return true;
|
|
}
|
|
|
|
bool of_CMPU_the_hard_way(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vvp_bit4_t eq = BIT4_1;
|
|
vvp_bit4_t eeq = BIT4_1;
|
|
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
|
vvp_bit4_t lv = thr_get_bit(thr, idx1);
|
|
vvp_bit4_t rv = thr_get_bit(thr, idx2);
|
|
|
|
if (lv != rv)
|
|
eeq = BIT4_0;
|
|
|
|
if (eq==BIT4_1 && (bit4_is_xz(lv) || bit4_is_xz(rv)))
|
|
eq = BIT4_X;
|
|
if ((lv == BIT4_0) && (rv==BIT4_1))
|
|
eq = BIT4_0;
|
|
if ((lv == BIT4_1) && (rv==BIT4_0))
|
|
eq = BIT4_0;
|
|
|
|
if (eq == BIT4_0)
|
|
break;
|
|
|
|
if (idx1 >= 4) idx1 += 1;
|
|
if (idx2 >= 4) idx2 += 1;
|
|
|
|
}
|
|
|
|
thr_put_bit(thr, 4, eq);
|
|
thr_put_bit(thr, 5, BIT4_X);
|
|
thr_put_bit(thr, 6, eeq);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_CMPU(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vvp_bit4_t eq = BIT4_1;
|
|
vvp_bit4_t lt = BIT4_0;
|
|
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
|
|
unsigned long*larray = vector_to_array(thr, idx1, wid);
|
|
if (larray == 0) return of_CMPU_the_hard_way(thr, cp);
|
|
|
|
unsigned long*rarray = vector_to_array(thr, idx2, wid);
|
|
if (rarray == 0) {
|
|
delete[]larray;
|
|
return of_CMPU_the_hard_way(thr, cp);
|
|
}
|
|
|
|
unsigned words = (wid+CPU_WORD_BITS-1) / CPU_WORD_BITS;
|
|
|
|
for (unsigned wdx = 0 ; wdx < words ; wdx += 1) {
|
|
if (larray[wdx] == rarray[wdx])
|
|
continue;
|
|
|
|
eq = BIT4_0;
|
|
if (larray[wdx] < rarray[wdx])
|
|
lt = BIT4_1;
|
|
else
|
|
lt = BIT4_0;
|
|
}
|
|
|
|
delete[]larray;
|
|
delete[]rarray;
|
|
|
|
thr_put_bit(thr, 4, eq);
|
|
thr_put_bit(thr, 5, lt);
|
|
thr_put_bit(thr, 6, eq);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_CMPX(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vvp_bit4_t eq = BIT4_1;
|
|
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
|
vvp_bit4_t lv = thr_get_bit(thr, idx1);
|
|
vvp_bit4_t rv = thr_get_bit(thr, idx2);
|
|
|
|
if ((lv != rv) && !bit4_is_xz(lv) && !bit4_is_xz(rv)) {
|
|
eq = BIT4_0;
|
|
break;
|
|
}
|
|
|
|
if (idx1 >= 4) idx1 += 1;
|
|
if (idx2 >= 4) idx2 += 1;
|
|
}
|
|
|
|
thr_put_bit(thr, 4, eq);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_CMPWR(vthread_t thr, vvp_code_t)
|
|
{
|
|
double r = thr->pop_real();
|
|
double l = thr->pop_real();
|
|
|
|
vvp_bit4_t eq = (l == r)? BIT4_1 : BIT4_0;
|
|
vvp_bit4_t lt = (l < r)? BIT4_1 : BIT4_0;
|
|
|
|
thr_put_bit(thr, 4, eq);
|
|
thr_put_bit(thr, 5, lt);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_CMPWS(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
int64_t l = thr->words[cp->bit_idx[0]].w_int;
|
|
int64_t r = thr->words[cp->bit_idx[1]].w_int;
|
|
|
|
vvp_bit4_t eq = (l == r)? BIT4_1 : BIT4_0;
|
|
vvp_bit4_t lt = (l < r)? BIT4_1 : BIT4_0;
|
|
|
|
thr_put_bit(thr, 4, eq);
|
|
thr_put_bit(thr, 5, lt);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_CMPWU(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
uint64_t l = thr->words[cp->bit_idx[0]].w_uint;
|
|
uint64_t r = thr->words[cp->bit_idx[1]].w_uint;
|
|
|
|
vvp_bit4_t eq = (l == r)? BIT4_1 : BIT4_0;
|
|
vvp_bit4_t lt = (l < r)? BIT4_1 : BIT4_0;
|
|
|
|
thr_put_bit(thr, 4, eq);
|
|
thr_put_bit(thr, 5, lt);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_CMPZ(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vvp_bit4_t eq = BIT4_1;
|
|
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
|
vvp_bit4_t lv = thr_get_bit(thr, idx1);
|
|
vvp_bit4_t rv = thr_get_bit(thr, idx2);
|
|
|
|
if ((lv != BIT4_Z) && (rv != BIT4_Z) && (lv != rv)) {
|
|
eq = BIT4_0;
|
|
break;
|
|
}
|
|
|
|
if (idx1 >= 4) idx1 += 1;
|
|
if (idx2 >= 4) idx2 += 1;
|
|
}
|
|
|
|
thr_put_bit(thr, 4, eq);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %concat/str;
|
|
*/
|
|
bool of_CONCAT_STR(vthread_t thr, vvp_code_t)
|
|
{
|
|
string text = thr->pop_str();
|
|
thr->peek_str(0).append(text);
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %concati/str <string>;
|
|
*/
|
|
bool of_CONCATI_STR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
const char*text = cp->text;
|
|
thr->peek_str(0).append(text);
|
|
return true;
|
|
}
|
|
|
|
bool of_CVT_RS(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
int64_t r = thr->words[cp->bit_idx[0]].w_int;
|
|
thr->push_real( (double)(r) );
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_CVT_RU(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
uint64_t r = thr->words[cp->bit_idx[0]].w_uint;
|
|
thr->push_real( (double)(r) );
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_CVT_RV(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned base = cp->bit_idx[0];
|
|
unsigned wid = cp->bit_idx[1];
|
|
vvp_vector4_t vector = vthread_bits_to_vector(thr, base, wid);
|
|
double val;
|
|
vector4_to_value(vector, val, false);
|
|
thr->push_real(val);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_CVT_RV_S(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned base = cp->bit_idx[0];
|
|
unsigned wid = cp->bit_idx[1];
|
|
vvp_vector4_t vector = vthread_bits_to_vector(thr, base, wid);
|
|
double val;
|
|
vector4_to_value(vector, val, true);
|
|
thr->push_real(val);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %cvt/sr <idx>
|
|
* Pop the top value from the real stack, convert it to a 64bit signed
|
|
* and save it to the indexed register.
|
|
*/
|
|
bool of_CVT_SR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
double r = thr->pop_real();
|
|
thr->words[cp->bit_idx[0]].w_int = i64round(r);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_CVT_UR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
double r = thr->pop_real();
|
|
if (r >= 0.0)
|
|
thr->words[cp->bit_idx[0]].w_uint = (uint64_t)floor(r+0.5);
|
|
else
|
|
thr->words[cp->bit_idx[0]].w_uint = (uint64_t)ceil(r-0.5);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %cvt/vr <bit> <wid>
|
|
*/
|
|
bool of_CVT_VR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
double r = thr->pop_real();
|
|
unsigned base = cp->bit_idx[0];
|
|
unsigned wid = cp->number;
|
|
vvp_vector4_t tmp(wid, r);
|
|
|
|
/* Make sure there is enough space for the new vector. */
|
|
thr_check_addr(thr, base+wid-1);
|
|
thr->bits4.set_vec(base, tmp);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* This implements the %deassign instruction. All we do is write a
|
|
* long(1) to port-3 of the addressed net. This turns off an active
|
|
* continuous assign activated by %cassign/v
|
|
*/
|
|
bool of_DEASSIGN(vthread_t, vvp_code_t cp)
|
|
{
|
|
vvp_net_t*net = cp->net;
|
|
unsigned base = cp->bit_idx[0];
|
|
unsigned width = cp->bit_idx[1];
|
|
|
|
vvp_signal_value*fil = dynamic_cast<vvp_signal_value*> (net->fil);
|
|
assert(fil);
|
|
vvp_fun_signal_vec*sig = dynamic_cast<vvp_fun_signal_vec*>(net->fun);
|
|
assert(sig);
|
|
|
|
if (base >= fil->value_size()) return true;
|
|
if (base+width > fil->value_size()) width = fil->value_size() - base;
|
|
|
|
bool full_sig = base == 0 && width == fil->value_size();
|
|
|
|
// This is the net that is forcing me...
|
|
if (vvp_net_t*src = sig->cassign_link) {
|
|
if (!full_sig) {
|
|
fprintf(stderr, "Sorry: when a signal is assigning a "
|
|
"register, I cannot deassign part of it.\n");
|
|
exit(1);
|
|
}
|
|
// And this is the pointer to be removed.
|
|
vvp_net_ptr_t dst_ptr (net, 1);
|
|
src->unlink(dst_ptr);
|
|
sig->cassign_link = 0;
|
|
}
|
|
|
|
/* Do we release all or part of the net? */
|
|
if (full_sig) {
|
|
sig->deassign();
|
|
} else {
|
|
sig->deassign_pv(base, width);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_DEASSIGN_WR(vthread_t, vvp_code_t cp)
|
|
{
|
|
vvp_net_t*net = cp->net;
|
|
|
|
vvp_fun_signal_real*sig = dynamic_cast<vvp_fun_signal_real*>(net->fun);
|
|
assert(sig);
|
|
|
|
// This is the net that is forcing me...
|
|
if (vvp_net_t*src = sig->cassign_link) {
|
|
// And this is the pointer to be removed.
|
|
vvp_net_ptr_t dst_ptr (net, 1);
|
|
src->unlink(dst_ptr);
|
|
sig->cassign_link = 0;
|
|
}
|
|
|
|
sig->deassign();
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
/*
|
|
* The delay takes two 32bit numbers to make up a 64bit time.
|
|
*
|
|
* %delay <low>, <hig>
|
|
*/
|
|
bool of_DELAY(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vvp_time64_t low = cp->bit_idx[0];
|
|
vvp_time64_t hig = cp->bit_idx[1];
|
|
|
|
vvp_time64_t res = 32;
|
|
res = hig << res;
|
|
res += low;
|
|
|
|
schedule_vthread(thr, res);
|
|
return false;
|
|
}
|
|
|
|
bool of_DELAYX(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vvp_time64_t delay;
|
|
|
|
assert(cp->number < 4);
|
|
delay = thr->words[cp->number].w_uint;
|
|
schedule_vthread(thr, delay);
|
|
return false;
|
|
}
|
|
|
|
/* %delete/obj <label>
|
|
*
|
|
* This operator works by assigning a nil to the target object. This
|
|
* causes any value that might be there to be garbage collected, thus
|
|
* deleting the object.
|
|
*/
|
|
bool of_DELETE_OBJ(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
/* set the value into port 0 of the destination. */
|
|
vvp_net_ptr_t ptr (cp->net, 0);
|
|
vvp_send_object(ptr, vvp_object_t(), thr->wt_context);
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool do_disable(vthread_t thr, vthread_t match)
|
|
{
|
|
bool flag = false;
|
|
|
|
/* Pull the target thread out of its scope. */
|
|
thr->parent_scope->threads.erase(thr);
|
|
|
|
/* Turn the thread off by setting is program counter to
|
|
zero and setting an OFF bit. */
|
|
thr->pc = codespace_null();
|
|
thr->i_have_ended = 1;
|
|
|
|
/* Turn off all the children of the thread. Simulate a %join
|
|
for as many times as needed to clear the results of all the
|
|
%forks that this thread has done. */
|
|
while (!thr->children.empty()) {
|
|
|
|
vthread_t tmp = *(thr->children.begin());
|
|
assert(tmp);
|
|
assert(tmp->parent == thr);
|
|
thr->i_am_joining = 0;
|
|
if (do_disable(tmp, match))
|
|
flag = true;
|
|
|
|
vthread_reap(tmp);
|
|
}
|
|
|
|
if (thr->parent && thr->parent->i_am_joining) {
|
|
// If a parent is waiting in a %join, wake it up. Note
|
|
// that it is possible to be waiting in a %join yet
|
|
// already scheduled if multiple child threads are
|
|
// ending. So check if the thread is already scheduled
|
|
// before scheduling it again.
|
|
vthread_t parent = thr->parent;
|
|
parent->i_am_joining = 0;
|
|
if (! parent->i_have_ended)
|
|
schedule_vthread(parent, 0, true);
|
|
|
|
// Let the parent do the reaping.
|
|
vthread_reap(thr);
|
|
|
|
} else if (thr->parent) {
|
|
/* If the parent is yet to %join me, let its %join
|
|
do the reaping. */
|
|
//assert(tmp->is_scheduled == 0);
|
|
|
|
} else {
|
|
/* No parent at all. Goodbye. */
|
|
vthread_reap(thr);
|
|
}
|
|
|
|
return flag || (thr == match);
|
|
}
|
|
|
|
/*
|
|
* Implement the %disable instruction by scanning the target scope for
|
|
* all the target threads. Kill the target threads and wake up a
|
|
* parent that is attempting a %join.
|
|
*/
|
|
bool of_DISABLE(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
struct __vpiScope*scope = (struct __vpiScope*)cp->handle;
|
|
|
|
bool disabled_myself_flag = false;
|
|
|
|
while (! scope->threads.empty()) {
|
|
set<vthread_t>::iterator cur = scope->threads.begin();
|
|
|
|
/* If I am disabling myself, then remember that fact so
|
|
that I can finish this statement differently. */
|
|
if (*cur == thr)
|
|
disabled_myself_flag = true;
|
|
|
|
if (do_disable(*cur, thr))
|
|
disabled_myself_flag = true;
|
|
}
|
|
|
|
return ! disabled_myself_flag;
|
|
}
|
|
|
|
/*
|
|
* This function divides a 2-word number {high, a} by a 1-word
|
|
* number. Assume that high < b.
|
|
*/
|
|
static unsigned long divide2words(unsigned long a, unsigned long b,
|
|
unsigned long high)
|
|
{
|
|
unsigned long result = 0;
|
|
while (high > 0) {
|
|
unsigned long tmp_result = ULONG_MAX / b;
|
|
unsigned long remain = ULONG_MAX % b;
|
|
|
|
remain += 1;
|
|
if (remain >= b) {
|
|
remain -= b;
|
|
tmp_result += 1;
|
|
}
|
|
|
|
// Now 0x1_0...0 = b*tmp_result + remain
|
|
// high*0x1_0...0 = high*(b*tmp_result + remain)
|
|
// high*0x1_0...0 = high*b*tmp_result + high*remain
|
|
|
|
// We know that high*0x1_0...0 >= high*b*tmp_result, and
|
|
// we know that high*0x1_0...0 > high*remain. Use
|
|
// high*remain as the remainder for another iteration,
|
|
// and add tmp_result*high into the current estimate of
|
|
// the result.
|
|
result += tmp_result * high;
|
|
|
|
// The new iteration starts with high*remain + a.
|
|
remain = multiply_with_carry(high, remain, high);
|
|
a += remain;
|
|
if(a < remain)
|
|
high += 1;
|
|
|
|
// Now result*b + {high,a} == the input {high,a}. It is
|
|
// possible that the new high >= 1. If so, it will
|
|
// certainly be less than high from the previous
|
|
// iteration. Do another iteration and it will shrink,
|
|
// eventually to 0.
|
|
}
|
|
|
|
// high is now 0, so a is the remaining remainder, so we can
|
|
// finish off the integer divide with a simple a/b.
|
|
|
|
return result + a/b;
|
|
}
|
|
|
|
static unsigned long* divide_bits(unsigned long*ap, unsigned long*bp, unsigned wid)
|
|
{
|
|
// Do all our work a cpu-word at a time. The "words" variable
|
|
// is the number of words of the wid.
|
|
unsigned words = (wid+CPU_WORD_BITS-1) / CPU_WORD_BITS;
|
|
|
|
unsigned btop = words-1;
|
|
while (btop > 0 && bp[btop] == 0)
|
|
btop -= 1;
|
|
|
|
// Detect divide by 0, and exit.
|
|
if (btop==0 && bp[0]==0)
|
|
return 0;
|
|
|
|
// The result array will eventually accumulate the result. The
|
|
// diff array is a difference that we use in the intermediate.
|
|
unsigned long*diff = new unsigned long[words];
|
|
unsigned long*result= new unsigned long[words];
|
|
for (unsigned idx = 0 ; idx < words ; idx += 1)
|
|
result[idx] = 0;
|
|
|
|
for (unsigned cur = words-btop ; cur > 0 ; cur -= 1) {
|
|
unsigned cur_ptr = cur-1;
|
|
unsigned long cur_res;
|
|
if (ap[cur_ptr+btop] >= bp[btop]) {
|
|
unsigned long high = 0;
|
|
if (cur_ptr+btop+1 < words)
|
|
high = ap[cur_ptr+btop+1];
|
|
cur_res = divide2words(ap[cur_ptr+btop], bp[btop], high);
|
|
|
|
} else if (cur_ptr+btop+1 >= words) {
|
|
continue;
|
|
|
|
} else if (ap[cur_ptr+btop+1] == 0) {
|
|
continue;
|
|
|
|
} else {
|
|
cur_res = divide2words(ap[cur_ptr+btop], bp[btop],
|
|
ap[cur_ptr+btop+1]);
|
|
}
|
|
|
|
// cur_res is a guesstimate of the result this far. It
|
|
// may be 1 too big. (But it will also be >0) Try it,
|
|
// and if the difference comes out negative, then adjust.
|
|
|
|
// diff = (bp * cur_res) << cur_ptr;
|
|
multiply_array_imm(diff+cur_ptr, bp, words-cur_ptr, cur_res);
|
|
// ap -= diff
|
|
unsigned long carry = 1;
|
|
for (unsigned idx = cur_ptr ; idx < words ; idx += 1)
|
|
ap[idx] = add_with_carry(ap[idx], ~diff[idx], carry);
|
|
|
|
// ap has the diff subtracted out of it. If cur_res was
|
|
// too large, then ap will turn negative. (We easily
|
|
// tell that ap turned negative by looking at
|
|
// carry&1. If it is 0, then it is *negative*.) In that
|
|
// case, we know that cur_res was too large by 1. Correct by
|
|
// adding 1b back in and reducing cur_res.
|
|
if ((carry&1) == 0) {
|
|
// Keep adding b back in until the remainder
|
|
// becomes positive again.
|
|
do {
|
|
cur_res -= 1;
|
|
carry = 0;
|
|
for (unsigned idx = cur_ptr ; idx < words ; idx += 1)
|
|
ap[idx] = add_with_carry(ap[idx], bp[idx-cur_ptr], carry);
|
|
} while (carry == 0);
|
|
}
|
|
|
|
result[cur_ptr] = cur_res;
|
|
}
|
|
|
|
// Now ap contains the remainder and result contains the
|
|
// desired result. We should find that:
|
|
// input-a = bp * result + ap;
|
|
|
|
delete[]diff;
|
|
return result;
|
|
}
|
|
|
|
bool of_DIV(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned adra = cp->bit_idx[0];
|
|
unsigned adrb = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
|
|
assert(adra >= 4);
|
|
|
|
unsigned long*ap = vector_to_array(thr, adra, wid);
|
|
if (ap == 0) {
|
|
vvp_vector4_t tmp(wid, BIT4_X);
|
|
thr->bits4.set_vec(adra, tmp);
|
|
return true;
|
|
}
|
|
|
|
unsigned long*bp = vector_to_array(thr, adrb, wid);
|
|
if (bp == 0) {
|
|
delete[]ap;
|
|
vvp_vector4_t tmp(wid, BIT4_X);
|
|
thr->bits4.set_vec(adra, tmp);
|
|
return true;
|
|
}
|
|
|
|
// If the value fits in a single CPU word, then do it the easy way.
|
|
if (wid <= CPU_WORD_BITS) {
|
|
if (bp[0] == 0) {
|
|
vvp_vector4_t tmp(wid, BIT4_X);
|
|
thr->bits4.set_vec(adra, tmp);
|
|
} else {
|
|
ap[0] /= bp[0];
|
|
thr->bits4.setarray(adra, wid, ap);
|
|
}
|
|
delete[]ap;
|
|
delete[]bp;
|
|
return true;
|
|
}
|
|
|
|
unsigned long*result = divide_bits(ap, bp, wid);
|
|
if (result == 0) {
|
|
delete[]ap;
|
|
delete[]bp;
|
|
vvp_vector4_t tmp(wid, BIT4_X);
|
|
thr->bits4.set_vec(adra, tmp);
|
|
return true;
|
|
}
|
|
|
|
// Now ap contains the remainder and result contains the
|
|
// desired result. We should find that:
|
|
// input-a = bp * result + ap;
|
|
|
|
thr->bits4.setarray(adra, wid, result);
|
|
delete[]ap;
|
|
delete[]bp;
|
|
delete[]result;
|
|
return true;
|
|
}
|
|
|
|
|
|
static void negate_words(unsigned long*val, unsigned words)
|
|
{
|
|
unsigned long carry = 1;
|
|
for (unsigned idx = 0 ; idx < words ; idx += 1)
|
|
val[idx] = add_with_carry(0, ~val[idx], carry);
|
|
}
|
|
|
|
bool of_DIV_S(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned adra = cp->bit_idx[0];
|
|
unsigned adrb = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
unsigned words = (wid + CPU_WORD_BITS - 1) / CPU_WORD_BITS;
|
|
|
|
assert(adra >= 4);
|
|
|
|
// Get the values, left in right, in binary form. If there is
|
|
// a problem with either (caused by an X or Z bit) then we
|
|
// know right away that the entire result is X.
|
|
unsigned long*ap = vector_to_array(thr, adra, wid);
|
|
if (ap == 0) {
|
|
vvp_vector4_t tmp(wid, BIT4_X);
|
|
thr->bits4.set_vec(adra, tmp);
|
|
return true;
|
|
}
|
|
|
|
unsigned long*bp = vector_to_array(thr, adrb, wid);
|
|
if (bp == 0) {
|
|
delete[]ap;
|
|
vvp_vector4_t tmp(wid, BIT4_X);
|
|
thr->bits4.set_vec(adra, tmp);
|
|
return true;
|
|
}
|
|
|
|
// Sign extend the bits in the array to fill out the array.
|
|
unsigned long sign_mask = 0;
|
|
if (unsigned long sign_bits = (words*CPU_WORD_BITS) - wid) {
|
|
sign_mask = -1UL << (CPU_WORD_BITS-sign_bits);
|
|
if (ap[words-1] & (sign_mask>>1))
|
|
ap[words-1] |= sign_mask;
|
|
if (bp[words-1] & (sign_mask>>1))
|
|
bp[words-1] |= sign_mask;
|
|
}
|
|
|
|
// If the value fits in a single word, then use the native divide.
|
|
if (wid <= CPU_WORD_BITS) {
|
|
if (bp[0] == 0) {
|
|
vvp_vector4_t tmp(wid, BIT4_X);
|
|
thr->bits4.set_vec(adra, tmp);
|
|
} else {
|
|
long tmpa = (long) ap[0];
|
|
long tmpb = (long) bp[0];
|
|
long res = tmpa / tmpb;
|
|
ap[0] = ((unsigned long)res) & ~sign_mask;
|
|
thr->bits4.setarray(adra, wid, ap);
|
|
}
|
|
delete[]ap;
|
|
delete[]bp;
|
|
return true;
|
|
}
|
|
|
|
// We need to the actual division to positive integers. Make
|
|
// them positive here, and remember the negations.
|
|
bool negate_flag = false;
|
|
if ( ((long) ap[words-1]) < 0 ) {
|
|
negate_flag = true;
|
|
negate_words(ap, words);
|
|
}
|
|
if ( ((long) bp[words-1]) < 0 ) {
|
|
negate_flag ^= true;
|
|
negate_words(bp, words);
|
|
}
|
|
|
|
unsigned long*result = divide_bits(ap, bp, wid);
|
|
if (result == 0) {
|
|
delete[]ap;
|
|
delete[]bp;
|
|
vvp_vector4_t tmp(wid, BIT4_X);
|
|
thr->bits4.set_vec(adra, tmp);
|
|
return true;
|
|
}
|
|
|
|
if (negate_flag) {
|
|
negate_words(result, words);
|
|
}
|
|
|
|
result[words-1] &= ~sign_mask;
|
|
|
|
thr->bits4.setarray(adra, wid, result);
|
|
delete[]ap;
|
|
delete[]bp;
|
|
delete[]result;
|
|
return true;
|
|
}
|
|
|
|
bool of_DIV_WR(vthread_t thr, vvp_code_t)
|
|
{
|
|
double r = thr->pop_real();
|
|
double l = thr->pop_real();
|
|
thr->push_real(l / r);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_DUP_REAL(vthread_t thr, vvp_code_t)
|
|
{
|
|
thr->push_real(thr->peek_real(0));
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* This terminates the current thread. If there is a parent who is
|
|
* waiting for me to die, then I schedule it. At any rate, I mark
|
|
* myself as a zombie by setting my pc to 0.
|
|
*
|
|
* It is possible for this thread to have children at this %end. This
|
|
* means that my child is really my sibling created by my parent, and
|
|
* my parent will do the proper %joins in due course. For example:
|
|
*
|
|
* %fork child_1, test;
|
|
* %fork child_2, test;
|
|
* ... parent code ...
|
|
* %join;
|
|
* %join;
|
|
* %end;
|
|
*
|
|
* child_1 ;
|
|
* %end;
|
|
* child_2 ;
|
|
* %end;
|
|
*
|
|
* In this example, the main thread creates threads child_1 and
|
|
* child_2. It is possible that this thread is child_2, so there is a
|
|
* parent pointer and a child pointer, even though I did no
|
|
* %forks or %joins. This means that I have a ->child pointer and a
|
|
* ->parent pointer.
|
|
*
|
|
* If the main thread has executed the first %join, then it is waiting
|
|
* for me, and I will be reaped right away.
|
|
*
|
|
* If the main thread has not executed a %join yet, then this thread
|
|
* becomes a zombie. The main thread executes its %join eventually,
|
|
* reaping me at that time.
|
|
*
|
|
* It does not matter the order that child_1 and child_2 threads call
|
|
* %end -- child_2 will be reaped by the first %join, and child_1 will
|
|
* be reaped by the second %join.
|
|
*/
|
|
bool of_END(vthread_t thr, vvp_code_t)
|
|
{
|
|
assert(! thr->waiting_for_event);
|
|
thr->i_have_ended = 1;
|
|
thr->pc = codespace_null();
|
|
|
|
/* If I have a parent who is waiting for me, then mark that I
|
|
have ended, and schedule that parent. Also, finish the
|
|
%join for the parent. */
|
|
if (thr->parent && thr->parent->i_am_joining) {
|
|
vthread_t tmp = thr->parent;
|
|
|
|
// Detect that the parent is waiting on an automatic
|
|
// thread. Automatic threads must be reaped first. If
|
|
// the parent is waiting on an auto (other than me) then
|
|
// go into zombie state to be picked up later.
|
|
if (!test_joinable(tmp, thr))
|
|
return false;
|
|
|
|
tmp->i_am_joining = 0;
|
|
schedule_vthread(tmp, 0, true);
|
|
do_join(tmp, thr);
|
|
return false;
|
|
}
|
|
|
|
/* If I have no parents, then no one can %join me and there is
|
|
no reason to stick around. This can happen, for example if
|
|
I am an ``initial'' thread.
|
|
|
|
If I have children at this point, then I must have been the
|
|
main thread (there is no other parent) and an error (not
|
|
enough %joins) has been detected. */
|
|
if (thr->parent == 0) {
|
|
assert(thr->children.empty());
|
|
vthread_reap(thr);
|
|
return false;
|
|
}
|
|
|
|
/* If I make it this far, then I have a parent who may wish
|
|
to %join me. Remain a zombie so that it can. */
|
|
|
|
return false;
|
|
}
|
|
|
|
bool of_EVCTL(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(thr->event == 0 && thr->ecount == 0);
|
|
thr->event = cp->net;
|
|
thr->ecount = thr->words[cp->bit_idx[0]].w_uint;
|
|
return true;
|
|
}
|
|
bool of_EVCTLC(vthread_t thr, vvp_code_t)
|
|
{
|
|
thr->event = 0;
|
|
thr->ecount = 0;
|
|
return true;
|
|
}
|
|
|
|
bool of_EVCTLI(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(thr->event == 0 && thr->ecount == 0);
|
|
thr->event = cp->net;
|
|
thr->ecount = cp->bit_idx[0];
|
|
return true;
|
|
}
|
|
|
|
bool of_EVCTLS(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(thr->event == 0 && thr->ecount == 0);
|
|
thr->event = cp->net;
|
|
int64_t val = thr->words[cp->bit_idx[0]].w_int;
|
|
if (val < 0) val = 0;
|
|
thr->ecount = val;
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* the %force/link instruction connects a source node to a
|
|
* destination node. The destination node must be a signal, as it is
|
|
* marked with the source of the force so that it may later be
|
|
* unlinked without specifically knowing the source that this
|
|
* instruction used.
|
|
*/
|
|
bool of_FORCE_LINK(vthread_t, vvp_code_t cp)
|
|
{
|
|
vvp_net_t*dst = cp->net;
|
|
vvp_net_t*src = cp->net2;
|
|
|
|
assert(dst->fil);
|
|
dst->fil->force_link(dst, src);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* The %force/v instruction invokes a force assign of a constant value
|
|
* to a signal. The instruction arguments are:
|
|
*
|
|
* %force/v <net>, <base>, <wid> ;
|
|
*
|
|
* where the <net> is the net label assembled into a vvp_net pointer,
|
|
* and the <base> and <wid> are stashed in the bit_idx array.
|
|
*
|
|
* The instruction writes a vvp_vector4_t value to port-2 of the
|
|
* target signal.
|
|
*/
|
|
bool of_FORCE_V(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vvp_net_t*net = cp->net;
|
|
unsigned base = cp->bit_idx[0];
|
|
unsigned wid = cp->bit_idx[1];
|
|
|
|
/* Collect the thread bits into a vector4 item. */
|
|
vvp_vector4_t value = vthread_bits_to_vector(thr, base, wid);
|
|
|
|
/* Send the force value to the filter on the node. */
|
|
|
|
assert(net->fil);
|
|
if (value.size() != net->fil->filter_size())
|
|
value = coerce_to_width(value, net->fil->filter_size());
|
|
|
|
net->force_vec4(value, vvp_vector2_t(vvp_vector2_t::FILL1, net->fil->filter_size()));
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_FORCE_WR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vvp_net_t*net = cp->net;
|
|
double value = thr->pop_real();
|
|
|
|
net->force_real(value, vvp_vector2_t(vvp_vector2_t::FILL1, 1));
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
bool of_FORCE_X0(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vvp_net_t*net = cp->net;
|
|
unsigned base = cp->bit_idx[0];
|
|
unsigned wid = cp->bit_idx[1];
|
|
|
|
assert(net->fil);
|
|
|
|
// Implicitly, we get the base into the target vector from the
|
|
// X0 register.
|
|
long index = thr->words[0].w_int;
|
|
|
|
if (index < 0 && (wid <= (unsigned)-index))
|
|
return true;
|
|
|
|
if (index < 0) {
|
|
wid -= (unsigned) -index;
|
|
index = 0;
|
|
}
|
|
|
|
unsigned use_size = net->fil->filter_size();
|
|
|
|
|
|
if (index >= (long)use_size)
|
|
return true;
|
|
|
|
if (index+wid > use_size)
|
|
wid = use_size - index;
|
|
|
|
vvp_vector2_t mask(vvp_vector2_t::FILL0, use_size);
|
|
for (unsigned idx = 0 ; idx < wid ; idx += 1)
|
|
mask.set_bit(index+idx, 1);
|
|
|
|
vvp_vector4_t vector = vthread_bits_to_vector(thr, base, wid);
|
|
vvp_vector4_t value(use_size, BIT4_Z);
|
|
value.set_vec(index, vector);
|
|
|
|
net->force_vec4(value, mask);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* The %fork instruction causes a new child to be created and pushed
|
|
* in front of any existing child. This causes the new child to be
|
|
* added to the list of children, and for me to be the parent of the
|
|
* new child.
|
|
*/
|
|
bool of_FORK(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vthread_t child = vthread_new(cp->cptr2, cp->scope);
|
|
|
|
if (cp->scope->is_automatic) {
|
|
/* The context allocated for this child is the top entry
|
|
on the write context stack. */
|
|
child->wt_context = thr->wt_context;
|
|
child->rd_context = thr->wt_context;
|
|
|
|
thr->automatic_children.insert(child);
|
|
}
|
|
|
|
child->parent = thr;
|
|
thr->children.insert(child);
|
|
|
|
/* If the new child was created to evaluate a function,
|
|
run it immediately, then return to this thread. */
|
|
if (cp->scope->get_type_code() == vpiFunction) {
|
|
child->is_scheduled = 1;
|
|
vthread_run(child);
|
|
running_thread = thr;
|
|
} else {
|
|
schedule_vthread(child, 0, true);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_FREE(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
/* Pop the child context from the read context stack. */
|
|
vvp_context_t child_context = thr->rd_context;
|
|
thr->rd_context = vvp_get_stacked_context(child_context);
|
|
|
|
/* Free the context. */
|
|
vthread_free_context(child_context, cp->scope);
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool of_INV_wide(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned wid = cp->bit_idx[1];
|
|
|
|
vvp_vector4_t val = vthread_bits_to_vector(thr, idx1, wid);
|
|
thr->bits4.set_vec(idx1, ~val);
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool of_INV_narrow(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned wid = cp->bit_idx[1];
|
|
|
|
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
|
|
vvp_bit4_t lb = thr_get_bit(thr, idx1);
|
|
thr_put_bit(thr, idx1, ~lb);
|
|
idx1 += 1;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_INV(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
if (cp->number <= 4)
|
|
cp->opcode = &of_INV_narrow;
|
|
else
|
|
cp->opcode = &of_INV_wide;
|
|
|
|
return cp->opcode(thr, cp);
|
|
}
|
|
|
|
|
|
/*
|
|
* Index registers, arithmetic.
|
|
*/
|
|
|
|
static inline int64_t get_as_64_bit(uint32_t low_32, uint32_t high_32)
|
|
{
|
|
int64_t low = low_32;
|
|
int64_t res = high_32;
|
|
|
|
res <<= 32;
|
|
res |= low;
|
|
return res;
|
|
}
|
|
|
|
bool of_IX_ADD(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
thr->words[cp->number].w_int += get_as_64_bit(cp->bit_idx[0],
|
|
cp->bit_idx[1]);
|
|
return true;
|
|
}
|
|
|
|
bool of_IX_SUB(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
thr->words[cp->number].w_int -= get_as_64_bit(cp->bit_idx[0],
|
|
cp->bit_idx[1]);
|
|
return true;
|
|
}
|
|
|
|
bool of_IX_MUL(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
thr->words[cp->number].w_int *= get_as_64_bit(cp->bit_idx[0],
|
|
cp->bit_idx[1]);
|
|
return true;
|
|
}
|
|
|
|
bool of_IX_LOAD(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
thr->words[cp->number].w_int = get_as_64_bit(cp->bit_idx[0],
|
|
cp->bit_idx[1]);
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Load a vector into an index register. The format of the
|
|
* opcode is:
|
|
*
|
|
* %ix/get <ix>, <base>, <wid>
|
|
*
|
|
* where <ix> is the index register, <base> is the base of the
|
|
* vector and <wid> is the width in bits.
|
|
*
|
|
* Index registers only hold binary values, so if any of the
|
|
* bits of the vector are x or z, then set the value to 0,
|
|
* set bit[4] to 1, and give up.
|
|
*/
|
|
|
|
static uint64_t vector_to_index(vthread_t thr, unsigned base,
|
|
unsigned width, bool signed_flag)
|
|
{
|
|
uint64_t v = 0;
|
|
bool unknown_flag = false;
|
|
|
|
vvp_bit4_t vv = BIT4_0;
|
|
for (unsigned i = 0 ; i < width ; i += 1) {
|
|
vv = thr_get_bit(thr, base);
|
|
if (bit4_is_xz(vv)) {
|
|
v = 0UL;
|
|
unknown_flag = true;
|
|
break;
|
|
}
|
|
|
|
v |= (uint64_t) vv << i;
|
|
|
|
if (base >= 4)
|
|
base += 1;
|
|
}
|
|
|
|
/* Extend to fill the integer value. */
|
|
if (signed_flag && !unknown_flag) {
|
|
uint64_t pad = vv;
|
|
for (unsigned i = width ; i < 8*sizeof(v) ; i += 1) {
|
|
v |= pad << i;
|
|
}
|
|
}
|
|
|
|
/* Set bit 4 as a flag if the input is unknown. */
|
|
thr_put_bit(thr, 4, unknown_flag ? BIT4_1 : BIT4_0);
|
|
|
|
return v;
|
|
}
|
|
|
|
bool of_IX_GET(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned index = cp->bit_idx[0];
|
|
unsigned base = cp->bit_idx[1];
|
|
unsigned width = cp->number;
|
|
|
|
thr->words[index].w_uint = vector_to_index(thr, base, width, false);
|
|
return true;
|
|
}
|
|
|
|
bool of_IX_GET_S(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned index = cp->bit_idx[0];
|
|
unsigned base = cp->bit_idx[1];
|
|
unsigned width = cp->number;
|
|
|
|
thr->words[index].w_int = vector_to_index(thr, base, width, true);
|
|
return true;
|
|
}
|
|
|
|
bool of_IX_GETV(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned index = cp->bit_idx[0];
|
|
vvp_net_t*net = cp->net;
|
|
|
|
vvp_signal_value*sig = dynamic_cast<vvp_signal_value*>(net->fil);
|
|
if (sig == 0) {
|
|
assert(net->fil);
|
|
cerr << "%%ix/getv error: Net arg not a vector signal? "
|
|
<< typeid(*net->fil).name() << endl;
|
|
}
|
|
assert(sig);
|
|
|
|
vvp_vector4_t vec;
|
|
sig->vec4_value(vec);
|
|
uint64_t val;
|
|
bool known_flag = vector4_to_value(vec, val);
|
|
|
|
if (known_flag)
|
|
thr->words[index].w_uint = val;
|
|
else
|
|
thr->words[index].w_uint = 0;
|
|
|
|
/* Set bit 4 as a flag if the input is unknown. */
|
|
thr_put_bit(thr, 4, known_flag ? BIT4_0 : BIT4_1);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_IX_GETV_S(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned index = cp->bit_idx[0];
|
|
vvp_net_t*net = cp->net;
|
|
|
|
vvp_signal_value*sig = dynamic_cast<vvp_signal_value*>(net->fil);
|
|
if (sig == 0) {
|
|
assert(net->fil);
|
|
cerr << "%%ix/getv/s error: Net arg not a vector signal? "
|
|
<< "fun=" << typeid(*net->fil).name()
|
|
<< ", fil=" << (net->fil? typeid(*net->fil).name() : "<>")
|
|
<< endl;
|
|
}
|
|
assert(sig);
|
|
|
|
vvp_vector4_t vec;
|
|
sig->vec4_value(vec);
|
|
int64_t val;
|
|
bool known_flag = vector4_to_value(vec, val, true, true);
|
|
|
|
if (known_flag)
|
|
thr->words[index].w_int = val;
|
|
else
|
|
thr->words[index].w_int = 0;
|
|
|
|
/* Set bit 4 as a flag if the input is unknown. */
|
|
thr_put_bit(thr, 4, known_flag ? BIT4_0 : BIT4_1);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* The various JMP instruction work simply by pulling the new program
|
|
* counter from the instruction and resuming. If the jump is
|
|
* conditional, then test the bit for the expected value first.
|
|
*/
|
|
bool of_JMP(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
thr->pc = cp->cptr;
|
|
|
|
/* Normally, this returns true so that the processor just
|
|
keeps going to the next instruction. However, if there was
|
|
a $stop or vpiStop, returning false here can break the
|
|
simulation out of a hung loop. */
|
|
if (schedule_stopped()) {
|
|
schedule_vthread(thr, 0, false);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_JMP0(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
if (thr_get_bit(thr, cp->bit_idx[0]) == 0)
|
|
thr->pc = cp->cptr;
|
|
|
|
/* Normally, this returns true so that the processor just
|
|
keeps going to the next instruction. However, if there was
|
|
a $stop or vpiStop, returning false here can break the
|
|
simulation out of a hung loop. */
|
|
if (schedule_stopped()) {
|
|
schedule_vthread(thr, 0, false);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_JMP0XZ(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
if (thr_get_bit(thr, cp->bit_idx[0]) != BIT4_1)
|
|
thr->pc = cp->cptr;
|
|
|
|
/* Normally, this returns true so that the processor just
|
|
keeps going to the next instruction. However, if there was
|
|
a $stop or vpiStop, returning false here can break the
|
|
simulation out of a hung loop. */
|
|
if (schedule_stopped()) {
|
|
schedule_vthread(thr, 0, false);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_JMP1(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
if (thr_get_bit(thr, cp->bit_idx[0]) == 1)
|
|
thr->pc = cp->cptr;
|
|
|
|
/* Normally, this returns true so that the processor just
|
|
keeps going to the next instruction. However, if there was
|
|
a $stop or vpiStop, returning false here can break the
|
|
simulation out of a hung loop. */
|
|
if (schedule_stopped()) {
|
|
schedule_vthread(thr, 0, false);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* The %join instruction causes the thread to wait for one child
|
|
* to die. If a child is already dead (and a zombie) then I reap
|
|
* it and go on. Otherwise, I mark myself as waiting in a join so that
|
|
* children know to wake me when they finish.
|
|
*/
|
|
|
|
static bool test_joinable(vthread_t thr, vthread_t child)
|
|
{
|
|
set<vthread_t>::iterator auto_cur = thr->automatic_children.find(child);
|
|
if (!thr->automatic_children.empty() && auto_cur == thr->automatic_children.end())
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
static void do_join(vthread_t thr, vthread_t child)
|
|
{
|
|
assert(child->parent == thr);
|
|
|
|
/* If the immediate child thread is in an automatic scope... */
|
|
if (thr->automatic_children.erase(child) != 0) {
|
|
/* and is the top level task/function thread... */
|
|
if (thr->wt_context != thr->rd_context) {
|
|
/* Pop the child context from the write context stack. */
|
|
vvp_context_t child_context = thr->wt_context;
|
|
thr->wt_context = vvp_get_stacked_context(child_context);
|
|
|
|
/* Push the child context onto the read context stack */
|
|
vvp_set_stacked_context(child_context, thr->rd_context);
|
|
thr->rd_context = child_context;
|
|
}
|
|
}
|
|
|
|
vthread_reap(child);
|
|
}
|
|
|
|
bool of_JOIN(vthread_t thr, vvp_code_t)
|
|
{
|
|
assert( !thr->i_am_joining );
|
|
assert( !thr->children.empty());
|
|
|
|
// Are there any children that have already ended? If so, then
|
|
// join with that one.
|
|
for (set<vthread_t>::iterator cur = thr->children.begin()
|
|
; cur != thr->children.end() ; ++cur) {
|
|
vthread_t curp = *cur;
|
|
if (!curp->i_have_ended)
|
|
continue;
|
|
|
|
if (!test_joinable(thr, curp))
|
|
continue;
|
|
|
|
// found something!
|
|
do_join(thr, curp);
|
|
return true;
|
|
}
|
|
|
|
// Otherwise, tell my children to awaken me when they end,
|
|
// then pause.
|
|
thr->i_am_joining = 1;
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* This %join/detach <n> instruction causes the thread to detach
|
|
* threads that were created by an earlier %fork.
|
|
*/
|
|
bool of_JOIN_DETACH(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned long count = cp->number;
|
|
|
|
assert(thr->automatic_children.empty());
|
|
assert(count == thr->children.size());
|
|
|
|
while (!thr->children.empty()) {
|
|
vthread_t child = *thr->children.begin();
|
|
assert(child->parent == thr);
|
|
|
|
// We cannot detach automatic tasks/functions
|
|
assert(child->wt_context == 0);
|
|
if (child->i_have_ended) {
|
|
// If the child has already ended, then reap it.
|
|
vthread_reap(child);
|
|
|
|
} else {
|
|
thr->children.erase(child);
|
|
child->parent = 0;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %load/ar <array-label>, <index>;
|
|
*/
|
|
bool of_LOAD_AR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned idx = cp->bit_idx[0];
|
|
unsigned adr = thr->words[idx].w_int;
|
|
double word;
|
|
|
|
/* The result is 0.0 if the address is undefined. */
|
|
if (thr_get_bit(thr, 4) == BIT4_1) {
|
|
word = 0.0;
|
|
} else {
|
|
word = array_get_word_r(cp->array, adr);
|
|
}
|
|
|
|
thr->push_real(word);
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %load/av <bit>, <array-label>, <wid> ;
|
|
*
|
|
* <bit> is the thread bit address for the result
|
|
* <array-label> is the array to access, and
|
|
* <wid> is the width of the word to read.
|
|
*
|
|
* The address of the word in the array is in index register 3.
|
|
*/
|
|
bool of_LOAD_AV(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned bit = cp->bit_idx[0];
|
|
unsigned wid = cp->bit_idx[1];
|
|
unsigned adr = thr->words[3].w_int;
|
|
|
|
/* Check the address once, before we scan the vector. */
|
|
thr_check_addr(thr, bit+wid-1);
|
|
|
|
/* The result is 'bx if the address is undefined. */
|
|
if (thr_get_bit(thr, 4) == BIT4_1) {
|
|
vvp_vector4_t tmp (wid, BIT4_X);
|
|
thr->bits4.set_vec(bit, tmp);
|
|
return true;
|
|
}
|
|
|
|
vvp_vector4_t word = array_get_word(cp->array, adr);
|
|
|
|
if (word.size() > wid)
|
|
word.resize(wid);
|
|
|
|
/* Copy the vector bits into the bits4 vector. Do the copy
|
|
directly to skip the excess calls to thr_check_addr. */
|
|
thr->bits4.set_vec(bit, word);
|
|
|
|
/* If the source is shorter than the desired width, then pad
|
|
with BIT4_X values. */
|
|
for (unsigned idx = word.size() ; idx < wid ; idx += 1)
|
|
thr->bits4.set_bit(bit+idx, BIT4_X);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %load/dar <bit>, <array-label>, <index>;
|
|
*/
|
|
bool of_LOAD_DAR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned bit = cp->bit_idx[0];
|
|
unsigned wid = cp->bit_idx[1];
|
|
unsigned adr = thr->words[3].w_int;
|
|
vvp_net_t*net = cp->net;
|
|
|
|
assert(net);
|
|
vvp_fun_signal_object*obj = dynamic_cast<vvp_fun_signal_object*> (net->fun);
|
|
assert(obj);
|
|
|
|
vvp_darray*darray = obj->get_object().peek<vvp_darray>();
|
|
assert(darray);
|
|
|
|
vvp_vector4_t word;
|
|
darray->get_word(adr, word);
|
|
assert(word.size() == wid);
|
|
|
|
thr->bits4.set_vec(bit, word);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %load/dar/r <array-label>;
|
|
*/
|
|
bool of_LOAD_DAR_R(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned adr = thr->words[3].w_int;
|
|
vvp_net_t*net = cp->net;
|
|
|
|
assert(net);
|
|
vvp_fun_signal_object*obj = dynamic_cast<vvp_fun_signal_object*> (net->fun);
|
|
assert(obj);
|
|
|
|
vvp_darray*darray = obj->get_object().peek<vvp_darray>();
|
|
assert(darray);
|
|
|
|
double word;
|
|
darray->get_word(adr, word);
|
|
|
|
thr->push_real(word);
|
|
return true;
|
|
}
|
|
|
|
bool of_LOAD_DAR_STR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned adr = thr->words[3].w_int;
|
|
vvp_net_t*net = cp->net;
|
|
|
|
assert(net);
|
|
vvp_fun_signal_object*obj = dynamic_cast<vvp_fun_signal_object*> (net->fun);
|
|
assert(obj);
|
|
|
|
vvp_darray*darray = obj->get_object().peek<vvp_darray>();
|
|
assert(darray);
|
|
|
|
string word;
|
|
darray->get_word(adr, word);
|
|
thr->push_str(word);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %load/vp0, %load/vp0/s, %load/avp0 and %load/avp0/s share this function.
|
|
*/
|
|
#if (SIZEOF_UNSIGNED_LONG >= 8)
|
|
# define CPU_WORD_STRIDE CPU_WORD_BITS - 1 // avoid a warning
|
|
#else
|
|
# define CPU_WORD_STRIDE CPU_WORD_BITS
|
|
#endif
|
|
static void load_vp0_common(vthread_t thr, vvp_code_t cp, const vvp_vector4_t&sig_value)
|
|
{
|
|
unsigned bit = cp->bit_idx[0];
|
|
unsigned wid = cp->bit_idx[1];
|
|
int64_t addend = thr->words[0].w_int;
|
|
|
|
/* Check the address once, before we scan the vector. */
|
|
thr_check_addr(thr, bit+wid-1);
|
|
|
|
unsigned long*val = sig_value.subarray(0, wid);
|
|
if (val == 0) {
|
|
vvp_vector4_t tmp(wid, BIT4_X);
|
|
thr->bits4.set_vec(bit, tmp);
|
|
return;
|
|
}
|
|
|
|
unsigned words = (wid + CPU_WORD_BITS - 1) / CPU_WORD_BITS;
|
|
unsigned long carry = 0;
|
|
unsigned long imm = addend;
|
|
for (unsigned idx = 0 ; idx < words ; idx += 1) {
|
|
val[idx] = add_with_carry(val[idx], imm, carry);
|
|
addend >>= CPU_WORD_STRIDE;
|
|
imm = addend;
|
|
}
|
|
|
|
/* Copy the vector bits into the bits4 vector. Do the copy
|
|
directly to skip the excess calls to thr_check_addr. */
|
|
thr->bits4.setarray(bit, wid, val);
|
|
delete[]val;
|
|
}
|
|
|
|
/*
|
|
* %load/avp0 <bit>, <array-label>, <wid> ;
|
|
*
|
|
* <bit> is the thread bit address for the result
|
|
* <array-label> is the array to access, and
|
|
* <wid> is the width of the word to read.
|
|
*
|
|
* The address of the word in the array is in index register 3.
|
|
* An integer value from index register 0 is added to the value.
|
|
*/
|
|
bool of_LOAD_AVP0(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned wid = cp->bit_idx[1];
|
|
unsigned adr = thr->words[3].w_int;
|
|
|
|
/* The result is 'bx if the address is undefined. */
|
|
if (thr_get_bit(thr, 4) == BIT4_1) {
|
|
unsigned bit = cp->bit_idx[0];
|
|
thr_check_addr(thr, bit+wid-1);
|
|
vvp_vector4_t tmp (wid, BIT4_X);
|
|
thr->bits4.set_vec(bit, tmp);
|
|
return true;
|
|
}
|
|
|
|
/* We need a vector this wide to make the math work correctly.
|
|
* Copy the base bits into the vector, but keep the width. */
|
|
vvp_vector4_t sig_value(wid, BIT4_0);
|
|
sig_value.copy_bits(array_get_word(cp->array, adr));
|
|
|
|
load_vp0_common(thr, cp, sig_value);
|
|
return true;
|
|
}
|
|
|
|
bool of_LOAD_AVP0_S(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned wid = cp->bit_idx[1];
|
|
unsigned adr = thr->words[3].w_int;
|
|
|
|
/* The result is 'bx if the address is undefined. */
|
|
if (thr_get_bit(thr, 4) == BIT4_1) {
|
|
unsigned bit = cp->bit_idx[0];
|
|
thr_check_addr(thr, bit+wid-1);
|
|
vvp_vector4_t tmp (wid, BIT4_X);
|
|
thr->bits4.set_vec(bit, tmp);
|
|
return true;
|
|
}
|
|
|
|
vvp_vector4_t tmp (array_get_word(cp->array, adr));
|
|
|
|
/* We need a vector this wide to make the math work correctly.
|
|
* Copy the base bits into the vector, but keep the width. */
|
|
vvp_vector4_t sig_value(wid, tmp.value(tmp.size()-1));
|
|
sig_value.copy_bits(tmp);
|
|
|
|
load_vp0_common(thr, cp, sig_value);
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %load/avx.p <bit>, <array-label>, <idx> ;
|
|
*
|
|
* <bit> is the thread bit address for the result
|
|
* <array-label> is the array to access, and
|
|
* <wid> is the width of the word to read.
|
|
*
|
|
* The address of the word in the array is in index register 3.
|
|
*/
|
|
bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned bit = cp->bit_idx[0];
|
|
unsigned index = cp->bit_idx[1];
|
|
unsigned adr = thr->words[3].w_int;
|
|
|
|
/* The result is 'bx if the address is undefined. */
|
|
if (thr_get_bit(thr, 4) == BIT4_1) {
|
|
thr_put_bit(thr, bit, BIT4_X);
|
|
return true;
|
|
}
|
|
|
|
long use_index = thr->words[index].w_int;
|
|
|
|
vvp_vector4_t word = array_get_word(cp->array, adr);
|
|
|
|
if ((use_index >= (long)word.size()) || (use_index < 0)) {
|
|
thr_put_bit(thr, bit, BIT4_X);
|
|
} else {
|
|
thr_put_bit(thr, bit, word.value(use_index));
|
|
}
|
|
|
|
thr->words[index].w_int = use_index + 1;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %load/obj <var-label>
|
|
*/
|
|
bool of_LOAD_OBJ(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vvp_net_t*net = cp->net;
|
|
vvp_fun_signal_object*fun = dynamic_cast<vvp_fun_signal_object*> (net->fun);
|
|
assert(fun);
|
|
|
|
vvp_object_t val = fun->get_object();
|
|
thr->push_object(val);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %load/real <var-label>
|
|
*/
|
|
bool of_LOAD_REAL(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
__vpiHandle*tmp = cp->handle;
|
|
t_vpi_value val;
|
|
|
|
val.format = vpiRealVal;
|
|
vpi_get_value(tmp, &val);
|
|
|
|
thr->push_real(val.value.real);
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
bool of_LOAD_STR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vvp_net_t*net = cp->net;
|
|
|
|
|
|
vvp_fun_signal_string*fun = dynamic_cast<vvp_fun_signal_string*> (net->fun);
|
|
assert(fun);
|
|
|
|
const string&val = fun->get_string();
|
|
thr->push_str(val);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_LOAD_STRA(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned idx = cp->bit_idx[0];
|
|
unsigned adr = thr->words[idx].w_int;
|
|
string word;
|
|
|
|
/* The result is 0.0 if the address is undefined. */
|
|
if (thr_get_bit(thr, 4) == BIT4_1) {
|
|
word = "";
|
|
} else {
|
|
word = array_get_word_str(cp->array, adr);
|
|
}
|
|
|
|
thr->push_str(word);
|
|
return true;
|
|
}
|
|
|
|
/* %load/v <bit>, <label>, <wid>
|
|
*
|
|
* Implement the %load/v instruction. Load the vector value of the
|
|
* requested width from the <label> functor starting in the thread bit
|
|
* <bit>.
|
|
*
|
|
* The <bit> value is the destination in the thread vector store, and
|
|
* is in cp->bit_idx[0].
|
|
*
|
|
* The <wid> value is the expected with of the vector, and is in
|
|
* cp->bit_idx[1].
|
|
*
|
|
* The functor to read from is the vvp_net_t object pointed to by the
|
|
* cp->net pointer.
|
|
*/
|
|
static void load_base(vvp_code_t cp, vvp_vector4_t&dst)
|
|
{
|
|
vvp_net_t*net = cp->net;
|
|
|
|
/* For the %load to work, the functor must actually be a
|
|
signal functor. Only signals save their vector value. */
|
|
vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (net->fil);
|
|
if (sig == 0) {
|
|
cerr << "%%load/v error: Net arg not a signal? "
|
|
<< (net->fil ? typeid(*net->fil).name() : typeid(*net->fun).name()) << endl;
|
|
assert(sig);
|
|
}
|
|
|
|
sig->vec4_value(dst);
|
|
}
|
|
|
|
bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned bit = cp->bit_idx[0];
|
|
unsigned wid = cp->bit_idx[1];
|
|
|
|
vvp_vector4_t sig_value;
|
|
load_base(cp, sig_value);
|
|
|
|
/* Check the address once, before we scan the vector. */
|
|
thr_check_addr(thr, bit+wid-1);
|
|
|
|
if (sig_value.size() > wid)
|
|
sig_value.resize(wid);
|
|
|
|
/* Copy the vector bits into the bits4 vector. Do the copy
|
|
directly to skip the excess calls to thr_check_addr. */
|
|
thr->bits4.set_vec(bit, sig_value);
|
|
|
|
/* If the source is shorter than the desired width, then pad
|
|
with BIT4_X values. */
|
|
for (unsigned idx = sig_value.size() ; idx < wid ; idx += 1)
|
|
thr->bits4.set_bit(bit+idx, BIT4_X);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* This is like of_LOAD_VEC, but includes an add of an integer value from
|
|
* index 0. The <wid> is the expected result width not the vector width.
|
|
*/
|
|
|
|
bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned wid = cp->bit_idx[1];
|
|
|
|
/* We need a vector this wide to make the math work correctly.
|
|
* Copy the base bits into the vector, but keep the width. */
|
|
vvp_vector4_t sig_value(wid, BIT4_0);
|
|
|
|
vvp_vector4_t tmp;
|
|
load_base(cp, tmp);
|
|
sig_value.copy_bits(tmp);
|
|
|
|
load_vp0_common(thr, cp, sig_value);
|
|
return true;
|
|
}
|
|
|
|
bool of_LOAD_VP0_S(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned wid = cp->bit_idx[1];
|
|
|
|
vvp_vector4_t tmp;
|
|
load_base(cp, tmp);
|
|
|
|
/* We need a vector this wide to make the math work correctly.
|
|
* Copy the base bits into the vector, but keep the width. */
|
|
vvp_vector4_t sig_value(wid, tmp.value(tmp.size()-1));
|
|
sig_value.copy_bits(tmp);
|
|
|
|
load_vp0_common(thr, cp, sig_value);
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %load/x16 <bit>, <functor>, <wid>
|
|
*
|
|
* <bit> is the destination thread bit and must be >= 4.
|
|
*/
|
|
bool of_LOAD_X1P(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
// <bit> is the thread bit to load
|
|
assert(cp->bit_idx[0] >= 4);
|
|
unsigned bit = cp->bit_idx[0];
|
|
int wid = cp->bit_idx[1];
|
|
|
|
// <index> is the canonical base address of the part select.
|
|
long index = thr->words[1].w_int;
|
|
|
|
// <functor> is converted to a vvp_net_t pointer from which we
|
|
// read our value.
|
|
vvp_net_t*net = cp->net;
|
|
|
|
// For the %load to work, the functor must actually be a
|
|
// signal functor. Only signals save their vector value.
|
|
vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (net->fil);
|
|
assert(sig);
|
|
|
|
for (long idx = 0 ; idx < wid ; idx += 1) {
|
|
long use_index = index + idx;
|
|
vvp_bit4_t val;
|
|
if (use_index < 0 || use_index >= (signed)sig->value_size())
|
|
val = BIT4_X;
|
|
else
|
|
val = sig->value(use_index);
|
|
|
|
thr_put_bit(thr, bit+idx, val);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static void do_verylong_mod(vthread_t thr, vvp_code_t cp,
|
|
bool left_is_neg, bool right_is_neg)
|
|
{
|
|
bool out_is_neg = left_is_neg;
|
|
int len=cp->number;
|
|
unsigned char *a, *z, *t;
|
|
a = new unsigned char[len+1];
|
|
z = new unsigned char[len+1];
|
|
t = new unsigned char[len+1];
|
|
|
|
unsigned char carry;
|
|
unsigned char temp;
|
|
|
|
int mxa = -1, mxz = -1;
|
|
int i;
|
|
int current, copylen;
|
|
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
|
|
unsigned lb_carry = left_is_neg? 1 : 0;
|
|
unsigned rb_carry = right_is_neg? 1 : 0;
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
|
unsigned lb = thr_get_bit(thr, idx1);
|
|
unsigned rb = thr_get_bit(thr, idx2);
|
|
|
|
if ((lb | rb) & 2) {
|
|
delete []t;
|
|
delete []z;
|
|
delete []a;
|
|
goto x_out;
|
|
}
|
|
|
|
if (left_is_neg) {
|
|
lb = (1-lb) + lb_carry;
|
|
lb_carry = (lb & ~1)? 1 : 0;
|
|
lb &= 1;
|
|
}
|
|
if (right_is_neg) {
|
|
rb = (1-rb) + rb_carry;
|
|
rb_carry = (rb & ~1)? 1 : 0;
|
|
rb &= 1;
|
|
}
|
|
|
|
z[idx]=lb;
|
|
a[idx]=1-rb; // for 2s complement add..
|
|
|
|
idx1 += 1;
|
|
if (idx2 >= 4)
|
|
idx2 += 1;
|
|
}
|
|
|
|
z[len]=0;
|
|
a[len]=1;
|
|
|
|
for(i=len-1;i>=0;i--) {
|
|
if(!a[i]) {
|
|
mxa=i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
for(i=len-1;i>=0;i--) {
|
|
if(z[i]) {
|
|
mxz=i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if((mxa>mxz)||(mxa==-1)) {
|
|
if(mxa==-1) {
|
|
delete []t;
|
|
delete []z;
|
|
delete []a;
|
|
goto x_out;
|
|
}
|
|
|
|
goto tally;
|
|
}
|
|
|
|
copylen = mxa + 2;
|
|
current = mxz - mxa;
|
|
|
|
while(current > -1) {
|
|
carry = 1;
|
|
for(i=0;i<copylen;i++) {
|
|
temp = z[i+current] + a[i] + carry;
|
|
t[i] = (temp&1);
|
|
carry = (temp>>1);
|
|
}
|
|
|
|
if(carry) {
|
|
for(i=0;i<copylen;i++) {
|
|
z[i+current] = t[i];
|
|
}
|
|
}
|
|
|
|
current--;
|
|
}
|
|
|
|
tally:
|
|
|
|
carry = out_is_neg? 1 : 0;
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
|
unsigned ob = z[idx];
|
|
if (out_is_neg) {
|
|
ob = (1-ob) + carry;
|
|
carry = (ob & ~1)? 1 : 0;
|
|
ob = ob & 1;
|
|
}
|
|
thr_put_bit(thr, cp->bit_idx[0]+idx, ob?BIT4_1:BIT4_0);
|
|
}
|
|
|
|
delete []t;
|
|
delete []z;
|
|
delete []a;
|
|
return;
|
|
|
|
x_out:
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1)
|
|
thr_put_bit(thr, cp->bit_idx[0]+idx, BIT4_X);
|
|
|
|
return;
|
|
}
|
|
|
|
bool of_MAX_WR(vthread_t thr, vvp_code_t)
|
|
{
|
|
double r = thr->pop_real();
|
|
double l = thr->pop_real();
|
|
if (r != r)
|
|
thr->push_real(l);
|
|
else if (l != l)
|
|
thr->push_real(r);
|
|
else if (r < l)
|
|
thr->push_real(l);
|
|
else
|
|
thr->push_real(r);
|
|
return true;
|
|
}
|
|
|
|
bool of_MIN_WR(vthread_t thr, vvp_code_t)
|
|
{
|
|
double r = thr->pop_real();
|
|
double l = thr->pop_real();
|
|
if (r != r)
|
|
thr->push_real(l);
|
|
else if (l != l)
|
|
thr->push_real(r);
|
|
else if (r < l)
|
|
thr->push_real(r);
|
|
else
|
|
thr->push_real(l);
|
|
return true;
|
|
}
|
|
|
|
bool of_MOD(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
if(cp->number <= 8*sizeof(unsigned long long)) {
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
unsigned long long lv = 0, rv = 0;
|
|
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
|
unsigned long long lb = thr_get_bit(thr, idx1);
|
|
unsigned long long rb = thr_get_bit(thr, idx2);
|
|
|
|
if ((lb | rb) & 2)
|
|
goto x_out;
|
|
|
|
lv |= (unsigned long long) lb << idx;
|
|
rv |= (unsigned long long) rb << idx;
|
|
|
|
idx1 += 1;
|
|
if (idx2 >= 4)
|
|
idx2 += 1;
|
|
}
|
|
|
|
if (rv == 0)
|
|
goto x_out;
|
|
|
|
lv %= rv;
|
|
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
|
thr_put_bit(thr, cp->bit_idx[0]+idx, (lv&1)?BIT4_1 : BIT4_0);
|
|
lv >>= 1;
|
|
}
|
|
|
|
return true;
|
|
|
|
} else {
|
|
do_verylong_mod(thr, cp, false, false);
|
|
return true;
|
|
}
|
|
|
|
x_out:
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1)
|
|
thr_put_bit(thr, cp->bit_idx[0]+idx, BIT4_X);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_MOD_S(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
/* Handle the case that we can fit the bits into a long-long
|
|
variable. We cause use native % to do the work. */
|
|
if(cp->number <= 8*sizeof(long long)) {
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
long long lv = 0, rv = 0;
|
|
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
|
long long lb = thr_get_bit(thr, idx1);
|
|
long long rb = thr_get_bit(thr, idx2);
|
|
|
|
if ((lb | rb) & 2)
|
|
goto x_out;
|
|
|
|
lv |= (long long) lb << idx;
|
|
rv |= (long long) rb << idx;
|
|
|
|
idx1 += 1;
|
|
if (idx2 >= 4)
|
|
idx2 += 1;
|
|
}
|
|
|
|
if (rv == 0)
|
|
goto x_out;
|
|
|
|
/* Sign extend the signed operands when needed. */
|
|
if (cp->number < 8*sizeof(long long)) {
|
|
if (lv & (1LL << (cp->number-1)))
|
|
lv |= -1LL << cp->number;
|
|
if (rv & (1LL << (cp->number-1)))
|
|
rv |= -1LL << cp->number;
|
|
}
|
|
|
|
lv %= rv;
|
|
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
|
thr_put_bit(thr, cp->bit_idx[0]+idx, (lv&1)?BIT4_1:BIT4_0);
|
|
lv >>= 1;
|
|
}
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
bool left_is_neg
|
|
= thr_get_bit(thr,cp->bit_idx[0]+cp->number-1) == 1;
|
|
bool right_is_neg
|
|
= thr_get_bit(thr,cp->bit_idx[1]+cp->number-1) == 1;
|
|
do_verylong_mod(thr, cp, left_is_neg, right_is_neg);
|
|
return true;
|
|
}
|
|
|
|
x_out:
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1)
|
|
thr_put_bit(thr, cp->bit_idx[0]+idx, BIT4_X);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %mod/wr
|
|
*/
|
|
bool of_MOD_WR(vthread_t thr, vvp_code_t)
|
|
{
|
|
double r = thr->pop_real();
|
|
double l = thr->pop_real();
|
|
thr->push_real(fmod(l,r));
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %mov <dest>, <src>, <wid>
|
|
* This instruction is implemented by the of_MOV function
|
|
* below. However, during runtime vvp might notice that the
|
|
* parameters have certain properties that make it possible to
|
|
* replace the of_MOV opcode with a more specific instruction that
|
|
* more directly does the job. All the of_MOV*_ functions are
|
|
* functions that of_MOV might use to replace itself.
|
|
*/
|
|
|
|
static bool of_MOV1XZ_(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
thr_check_addr(thr, cp->bit_idx[0]+cp->number-1);
|
|
vvp_vector4_t tmp (cp->number, thr_index_to_bit4[cp->bit_idx[1]]);
|
|
thr->bits4.set_vec(cp->bit_idx[0], tmp);
|
|
return true;
|
|
}
|
|
|
|
static bool of_MOV_(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
/* This variant implements the general case that we know
|
|
neither the source nor the destination to be <4. Otherwise,
|
|
we copy all the bits manually. */
|
|
|
|
thr_check_addr(thr, cp->bit_idx[0]+cp->number-1);
|
|
thr_check_addr(thr, cp->bit_idx[1]+cp->number-1);
|
|
|
|
thr->bits4.mov(cp->bit_idx[0], cp->bit_idx[1], cp->number);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_MOV(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
if (cp->bit_idx[1] >= 4) {
|
|
cp->opcode = &of_MOV_;
|
|
return cp->opcode(thr, cp);
|
|
|
|
} else {
|
|
cp->opcode = &of_MOV1XZ_;
|
|
return cp->opcode(thr, cp);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_PAD(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
vvp_bit4_t pad_bit;
|
|
if (cp->bit_idx[1] < 4)
|
|
pad_bit = thr_index_to_bit4[cp->bit_idx[1]];
|
|
else
|
|
pad_bit = thr->bits4.value(cp->bit_idx[1]);
|
|
|
|
thr_check_addr(thr, cp->bit_idx[0]+cp->number-1);
|
|
vvp_vector4_t tmp (cp->number, pad_bit);
|
|
thr->bits4.set_vec(cp->bit_idx[0], tmp);
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %mov/wu <dst>, <src>
|
|
*/
|
|
bool of_MOV_WU(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned dst = cp->bit_idx[0];
|
|
unsigned src = cp->bit_idx[1];
|
|
|
|
thr->words[dst].w_uint = thr->words[src].w_uint;
|
|
return true;
|
|
}
|
|
|
|
bool of_MOVI(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned dst = cp->bit_idx[0];
|
|
static unsigned long val[8] = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
unsigned wid = cp->number;
|
|
|
|
thr_check_addr(thr, dst+wid-1);
|
|
|
|
val[0] = cp->bit_idx[1];
|
|
|
|
while (wid > 0) {
|
|
unsigned trans = wid;
|
|
if (trans > 8*CPU_WORD_BITS)
|
|
trans = 8*CPU_WORD_BITS;
|
|
|
|
thr->bits4.setarray(dst, trans, val);
|
|
|
|
val[0] = 0;
|
|
wid -= trans;
|
|
dst += trans;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_MUL(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned adra = cp->bit_idx[0];
|
|
unsigned adrb = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
|
|
assert(adra >= 4);
|
|
|
|
unsigned long*ap = vector_to_array(thr, adra, wid);
|
|
if (ap == 0) {
|
|
vvp_vector4_t tmp(wid, BIT4_X);
|
|
thr->bits4.set_vec(adra, tmp);
|
|
return true;
|
|
}
|
|
|
|
unsigned long*bp = vector_to_array(thr, adrb, wid);
|
|
if (bp == 0) {
|
|
delete[]ap;
|
|
vvp_vector4_t tmp(wid, BIT4_X);
|
|
thr->bits4.set_vec(adra, tmp);
|
|
return true;
|
|
}
|
|
|
|
// If the value fits in a single CPU word, then do it the easy way.
|
|
if (wid <= CPU_WORD_BITS) {
|
|
ap[0] *= bp[0];
|
|
thr->bits4.setarray(adra, wid, ap);
|
|
delete[]ap;
|
|
delete[]bp;
|
|
return true;
|
|
}
|
|
|
|
unsigned words = (wid+CPU_WORD_BITS-1) / CPU_WORD_BITS;
|
|
unsigned long*res = new unsigned long[words];
|
|
for (unsigned idx = 0 ; idx < words ; idx += 1)
|
|
res[idx] = 0;
|
|
|
|
for (unsigned mul_a = 0 ; mul_a < words ; mul_a += 1) {
|
|
for (unsigned mul_b = 0 ; mul_b < (words-mul_a) ; mul_b += 1) {
|
|
unsigned long sum;
|
|
unsigned long tmp = multiply_with_carry(ap[mul_a], bp[mul_b], sum);
|
|
unsigned base = mul_a + mul_b;
|
|
unsigned long carry = 0;
|
|
res[base] = add_with_carry(res[base], tmp, carry);
|
|
for (unsigned add_idx = base+1; add_idx < words; add_idx += 1) {
|
|
res[add_idx] = add_with_carry(res[add_idx], sum, carry);
|
|
sum = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
thr->bits4.setarray(adra, wid, res);
|
|
delete[]ap;
|
|
delete[]bp;
|
|
delete[]res;
|
|
return true;
|
|
}
|
|
|
|
bool of_MUL_WR(vthread_t thr, vvp_code_t)
|
|
{
|
|
double r = thr->pop_real();
|
|
double l = thr->pop_real();
|
|
thr->push_real(l * r);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_MULI(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned adr = cp->bit_idx[0];
|
|
unsigned long imm = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
|
|
assert(adr >= 4);
|
|
|
|
unsigned long*val = vector_to_array(thr, adr, wid);
|
|
// If there are X bits in the value, then return X.
|
|
if (val == 0) {
|
|
vvp_vector4_t tmp(cp->number, BIT4_X);
|
|
thr->bits4.set_vec(cp->bit_idx[0], tmp);
|
|
return true;
|
|
}
|
|
|
|
// If everything fits in a word, then do it the easy way.
|
|
if (wid <= CPU_WORD_BITS) {
|
|
val[0] *= imm;
|
|
thr->bits4.setarray(adr, wid, val);
|
|
delete[]val;
|
|
return true;
|
|
}
|
|
|
|
unsigned words = (wid+CPU_WORD_BITS-1) / CPU_WORD_BITS;
|
|
unsigned long*res = new unsigned long[words];
|
|
|
|
multiply_array_imm(res, val, words, imm);
|
|
|
|
thr->bits4.setarray(adr, wid, res);
|
|
delete[]val;
|
|
delete[]res;
|
|
return true;
|
|
}
|
|
|
|
static bool of_NAND_wide(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
|
|
vvp_vector4_t val = vthread_bits_to_vector(thr, idx1, wid);
|
|
val &= vthread_bits_to_vector(thr, idx2, wid);
|
|
thr->bits4.set_vec(idx1, ~val);
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool of_NAND_narrow(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
|
|
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
|
|
vvp_bit4_t lb = thr_get_bit(thr, idx1);
|
|
vvp_bit4_t rb = thr_get_bit(thr, idx2);
|
|
thr_put_bit(thr, idx1, ~(lb&rb));
|
|
idx1 += 1;
|
|
if (idx2 >= 4)
|
|
idx2 += 1;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_NAND(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
if (cp->number <= 4)
|
|
cp->opcode = &of_NAND_narrow;
|
|
else
|
|
cp->opcode = &of_NAND_wide;
|
|
|
|
return cp->opcode(thr, cp);
|
|
}
|
|
|
|
/*
|
|
* %new/cobj <vpi_object>
|
|
* This creates a new cobject (SystemVerilog class object) and pushes
|
|
* it to the stack. The <vpi-object> is a __vpiHandle that is a
|
|
* vpiClassDefn object that defines the item to be created.
|
|
*/
|
|
bool of_NEW_COBJ(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
const class_type*defn = dynamic_cast<const class_type*> (cp->handle);
|
|
assert(defn);
|
|
|
|
vvp_object_t tmp (new vvp_cobject(defn));
|
|
thr->push_object(tmp);
|
|
return true;
|
|
}
|
|
|
|
bool of_NEW_DARRAY(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
const char*text = cp->text;
|
|
size_t size = thr->words[cp->bit_idx[0]].w_int;
|
|
|
|
vvp_object_t obj;
|
|
if (strcmp(text,"b8") == 0) {
|
|
obj = new vvp_darray_atom<uint8_t>(size);
|
|
} else if (strcmp(text,"b16") == 0) {
|
|
obj = new vvp_darray_atom<uint16_t>(size);
|
|
} else if (strcmp(text,"b32") == 0) {
|
|
obj = new vvp_darray_atom<uint32_t>(size);
|
|
} else if (strcmp(text,"b64") == 0) {
|
|
obj = new vvp_darray_atom<uint64_t>(size);
|
|
} else if (strcmp(text,"sb8") == 0) {
|
|
obj = new vvp_darray_atom<int8_t>(size);
|
|
} else if (strcmp(text,"sb16") == 0) {
|
|
obj = new vvp_darray_atom<int16_t>(size);
|
|
} else if (strcmp(text,"sb32") == 0) {
|
|
obj = new vvp_darray_atom<int32_t>(size);
|
|
} else if (strcmp(text,"sb64") == 0) {
|
|
obj = new vvp_darray_atom<int64_t>(size);
|
|
} else if (strcmp(text,"r") == 0) {
|
|
obj = new vvp_darray_real(size);
|
|
} else if (strcmp(text,"S") == 0) {
|
|
obj = new vvp_darray_string(size);
|
|
} else {
|
|
obj = new vvp_darray (size);
|
|
}
|
|
|
|
thr->push_object(obj);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_NOOP(vthread_t, vvp_code_t)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
bool of_NORR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
vvp_bit4_t lb = BIT4_1;
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
|
|
|
vvp_bit4_t rb = thr_get_bit(thr, idx2+idx);
|
|
if (rb == BIT4_1) {
|
|
lb = BIT4_0;
|
|
break;
|
|
}
|
|
|
|
if (rb != BIT4_0)
|
|
lb = BIT4_X;
|
|
}
|
|
|
|
thr_put_bit(thr, cp->bit_idx[0], lb);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Push a null to the object stack.
|
|
*/
|
|
bool of_NULL(vthread_t thr, vvp_code_t)
|
|
{
|
|
vvp_object_t tmp;
|
|
thr->push_object(tmp);
|
|
return true;
|
|
}
|
|
|
|
|
|
bool of_ANDR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
vvp_bit4_t lb = BIT4_1;
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
|
|
|
vvp_bit4_t rb = thr_get_bit(thr, idx2+idx);
|
|
if (rb == BIT4_0) {
|
|
lb = BIT4_0;
|
|
break;
|
|
}
|
|
|
|
if (rb != BIT4_1)
|
|
lb = BIT4_X;
|
|
}
|
|
|
|
thr_put_bit(thr, cp->bit_idx[0], lb);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_NANDR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
vvp_bit4_t lb = BIT4_0;
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
|
|
|
vvp_bit4_t rb = thr_get_bit(thr, idx2+idx);
|
|
if (rb == BIT4_0) {
|
|
lb = BIT4_1;
|
|
break;
|
|
}
|
|
|
|
if (rb != BIT4_1)
|
|
lb = BIT4_X;
|
|
}
|
|
|
|
thr_put_bit(thr, cp->bit_idx[0], lb);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_ORR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
vvp_bit4_t lb = BIT4_0;
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
|
|
|
vvp_bit4_t rb = thr_get_bit(thr, idx2+idx);
|
|
if (rb == BIT4_1) {
|
|
lb = BIT4_1;
|
|
break;
|
|
}
|
|
|
|
if (rb != BIT4_0)
|
|
lb = BIT4_X;
|
|
}
|
|
|
|
thr_put_bit(thr, cp->bit_idx[0], lb);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_XORR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
vvp_bit4_t lb = BIT4_0;
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
|
|
|
vvp_bit4_t rb = thr_get_bit(thr, idx2+idx);
|
|
if (rb == BIT4_1)
|
|
lb = ~lb;
|
|
else if (rb != BIT4_0) {
|
|
lb = BIT4_X;
|
|
break;
|
|
}
|
|
}
|
|
|
|
thr_put_bit(thr, cp->bit_idx[0], lb);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_XNORR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
vvp_bit4_t lb = BIT4_1;
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
|
|
|
vvp_bit4_t rb = thr_get_bit(thr, idx2+idx);
|
|
if (rb == BIT4_1)
|
|
lb = ~lb;
|
|
else if (rb != BIT4_0) {
|
|
lb = BIT4_X;
|
|
break;
|
|
}
|
|
}
|
|
|
|
thr_put_bit(thr, cp->bit_idx[0], lb);
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool of_OR_wide(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
|
|
vvp_vector4_t val = vthread_bits_to_vector(thr, idx1, wid);
|
|
val |= vthread_bits_to_vector(thr, idx2, wid);
|
|
thr->bits4.set_vec(idx1, val);
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool of_OR_narrow(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
|
|
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
|
|
vvp_bit4_t lb = thr_get_bit(thr, idx1);
|
|
vvp_bit4_t rb = thr_get_bit(thr, idx2);
|
|
thr_put_bit(thr, idx1, lb|rb);
|
|
idx1 += 1;
|
|
if (idx2 >= 4)
|
|
idx2 += 1;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_OR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
if (cp->number <= 4)
|
|
cp->opcode = &of_OR_narrow;
|
|
else
|
|
cp->opcode = &of_OR_wide;
|
|
|
|
return cp->opcode(thr, cp);
|
|
}
|
|
|
|
static bool of_NOR_wide(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
|
|
vvp_vector4_t val = vthread_bits_to_vector(thr, idx1, wid);
|
|
val |= vthread_bits_to_vector(thr, idx2, wid);
|
|
thr->bits4.set_vec(idx1, ~val);
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool of_NOR_narrow(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
|
|
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
|
|
vvp_bit4_t lb = thr_get_bit(thr, idx1);
|
|
vvp_bit4_t rb = thr_get_bit(thr, idx2);
|
|
thr_put_bit(thr, idx1, ~(lb|rb));
|
|
idx1 += 1;
|
|
if (idx2 >= 4)
|
|
idx2 += 1;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_NOR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
if (cp->number <= 4)
|
|
cp->opcode = &of_NOR_narrow;
|
|
else
|
|
cp->opcode = &of_NOR_wide;
|
|
|
|
return cp->opcode(thr, cp);
|
|
}
|
|
|
|
/*
|
|
* %pop/obj <number>
|
|
*/
|
|
bool of_POP_OBJ(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned cnt = cp->number;
|
|
thr->pop_object(cnt);
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %pop/real <number>
|
|
*/
|
|
bool of_POP_REAL(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned cnt = cp->number;
|
|
for (unsigned idx = 0 ; idx < cnt ; idx += 1) {
|
|
(void) thr->pop_real();
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %pop/str <number>
|
|
*/
|
|
bool of_POP_STR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned cnt = cp->number;
|
|
thr->pop_str(cnt);
|
|
return true;
|
|
}
|
|
|
|
bool of_POW(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
unsigned idx = cp->bit_idx[0];
|
|
unsigned idy = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
vvp_vector2_t xv2 = vvp_vector2_t(vthread_bits_to_vector(thr, idx, wid));
|
|
vvp_vector2_t yv2 = vvp_vector2_t(vthread_bits_to_vector(thr, idy, wid));
|
|
|
|
/* If we have an X or Z in the arguments return X. */
|
|
if (xv2.is_NaN() || yv2.is_NaN()) {
|
|
for (unsigned jdx = 0 ; jdx < wid ; jdx += 1)
|
|
thr_put_bit(thr, cp->bit_idx[0]+jdx, BIT4_X);
|
|
return true;
|
|
}
|
|
|
|
/* To make the result more manageable trim off the extra bits. */
|
|
xv2.trim();
|
|
yv2.trim();
|
|
|
|
vvp_vector2_t result = pow(xv2, yv2);
|
|
|
|
/* If the result is too small zero pad it. */
|
|
if (result.size() < wid) {
|
|
for (unsigned jdx = wid-1; jdx >= result.size(); jdx -= 1)
|
|
thr_put_bit(thr, cp->bit_idx[0]+jdx, BIT4_0);
|
|
wid = result.size();
|
|
}
|
|
|
|
/* Copy only what we need of the result. */
|
|
for (unsigned jdx = 0; jdx < wid; jdx += 1)
|
|
thr_put_bit(thr, cp->bit_idx[0]+jdx,
|
|
result.value(jdx) ? BIT4_1 : BIT4_0);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_POW_S(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
unsigned idx = cp->bit_idx[0];
|
|
unsigned idy = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
vvp_vector4_t xv = vthread_bits_to_vector(thr, idx, wid);
|
|
vvp_vector4_t yv = vthread_bits_to_vector(thr, idy, wid);
|
|
|
|
/* If we have an X or Z in the arguments return X. */
|
|
if (xv.has_xz() || yv.has_xz()) {
|
|
for (unsigned jdx = 0 ; jdx < wid ; jdx += 1)
|
|
thr_put_bit(thr, cp->bit_idx[0]+jdx, BIT4_X);
|
|
return true;
|
|
}
|
|
|
|
/* Calculate the result using the double pow() function. */
|
|
double xd, yd, resd;
|
|
vector4_to_value(xv, xd, true);
|
|
vector4_to_value(yv, yd, true);
|
|
/* 2**-1 and -2**-1 are defined to be zero. */
|
|
if ((yd == -1.0) && (fabs(xd) == 2.0)) resd = 0.0;
|
|
else resd = pow(xd, yd);
|
|
vvp_vector4_t res = vvp_vector4_t(wid, resd);
|
|
|
|
/* Copy the result. */
|
|
for (unsigned jdx = 0; jdx < wid; jdx += 1)
|
|
thr_put_bit(thr, cp->bit_idx[0]+jdx, res.value(jdx));
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_POW_WR(vthread_t thr, vvp_code_t)
|
|
{
|
|
double r = thr->pop_real();
|
|
double l = thr->pop_real();
|
|
thr->push_real(pow(l,r));
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %prop/r <pid>
|
|
*
|
|
* Load a real value from the cobject and push it onto the real value
|
|
* stack.
|
|
*/
|
|
bool of_PROP_R(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned pid = cp->number;
|
|
|
|
vvp_object_t&obj = thr->peek_object();
|
|
vvp_cobject*cobj = obj.peek<vvp_cobject>();
|
|
|
|
double val = cobj->get_real(pid);
|
|
thr->push_real(val);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %prop/str <pid>
|
|
*
|
|
* Load a string value from the cobject and push it onto the real value
|
|
* stack.
|
|
*/
|
|
bool of_PROP_STR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned pid = cp->number;
|
|
|
|
vvp_object_t&obj = thr->peek_object();
|
|
vvp_cobject*cobj = obj.peek<vvp_cobject>();
|
|
|
|
string val = cobj->get_string(pid);
|
|
thr->push_str(val);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %prop/v <pid> <base> <wid>
|
|
*
|
|
* Load a property <id> from the cobject on the top of the stack into
|
|
* the vector space at <base>.
|
|
*/
|
|
bool of_PROP_V(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned pid = cp->bit_idx[0];
|
|
unsigned dst = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
|
|
thr_check_addr(thr, dst+wid-1);
|
|
vvp_object_t&obj = thr->peek_object();
|
|
vvp_cobject*cobj = obj.peek<vvp_cobject>();
|
|
|
|
vvp_vector4_t val;
|
|
cobj->get_vec4(pid, val);
|
|
|
|
if (val.size() > wid)
|
|
val.resize(wid);
|
|
|
|
thr->bits4.set_vec(dst, val);
|
|
|
|
if (val.size() < wid) {
|
|
for (unsigned idx = val.size() ; idx < wid ; idx += 1)
|
|
thr->bits4.set_bit(dst+idx, BIT4_X);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_PUSHI_REAL(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
double mant = cp->bit_idx[0];
|
|
uint32_t imant = cp->bit_idx[0];
|
|
int exp = cp->bit_idx[1];
|
|
|
|
// Detect +infinity
|
|
if (exp==0x3fff && imant==0) {
|
|
thr->push_real(INFINITY);
|
|
return true;
|
|
}
|
|
// Detect -infinity
|
|
if (exp==0x7fff && imant==0) {
|
|
thr->push_real(-INFINITY);
|
|
return true;
|
|
}
|
|
// Detect NaN
|
|
if (exp==0x3fff) {
|
|
thr->push_real(nan(""));
|
|
return true;
|
|
}
|
|
|
|
double sign = (exp & 0x4000)? -1.0 : 1.0;
|
|
|
|
exp &= 0x1fff;
|
|
|
|
mant = sign * ldexp(mant, exp - 0x1000);
|
|
thr->push_real(mant);
|
|
return true;
|
|
}
|
|
|
|
bool of_PUSHI_STR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
const char*text = cp->text;
|
|
thr->push_str(string(text));
|
|
return true;
|
|
}
|
|
|
|
bool of_PUSHV_STR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned src = cp->bit_idx[0];
|
|
unsigned wid = cp->bit_idx[1];
|
|
|
|
vvp_vector4_t vec = vthread_bits_to_vector(thr, src, wid);
|
|
size_t slen = (vec.size() + 7)/8;
|
|
vector<char>buf;
|
|
buf.reserve(slen);
|
|
|
|
for (size_t idx = 0 ; idx < vec.size() ; idx += 8) {
|
|
char tmp = 0;
|
|
size_t trans = 8;
|
|
if (idx+trans > vec.size())
|
|
trans = vec.size() - idx;
|
|
|
|
for (size_t bdx = 0 ; bdx < trans ; bdx += 1) {
|
|
if (vec.value(idx+bdx) == BIT4_1)
|
|
tmp |= 1 << bdx;
|
|
}
|
|
|
|
if (tmp != 0)
|
|
buf.push_back(tmp);
|
|
}
|
|
|
|
string val;
|
|
for (vector<char>::reverse_iterator cur = buf.rbegin()
|
|
; cur != buf.rend() ; ++cur) {
|
|
val.push_back(*cur);
|
|
}
|
|
|
|
thr->push_str(val);
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %putc/str/v <var>, <muxr>, <base>
|
|
*/
|
|
bool of_PUTC_STR_V(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned muxr = cp->bit_idx[0];
|
|
unsigned base = cp->bit_idx[1];
|
|
|
|
/* The mux is the index into the string. If it is <0, then
|
|
this operation cannot possible effect the string, so we are
|
|
done. */
|
|
assert(muxr < 16);
|
|
int32_t mux = thr->words[muxr].w_int;
|
|
if (mux < 0)
|
|
return true;
|
|
|
|
/* Extract the character from the vector space. If that byte
|
|
is null (8'hh00) then there is nothing more to do. */
|
|
unsigned long*tmp = vector_to_array(thr, base, 8);
|
|
if (tmp == 0)
|
|
return true;
|
|
if (tmp[0] == 0)
|
|
return true;
|
|
|
|
char tmp_val = tmp[0]&0xff;
|
|
|
|
/* Get the existing value of the string. If we find that the
|
|
index is too big for the string, then give up. */
|
|
vvp_net_t*net = cp->net;
|
|
vvp_fun_signal_string*fun = dynamic_cast<vvp_fun_signal_string*> (net->fun);
|
|
assert(fun);
|
|
|
|
string val = fun->get_string();
|
|
if (val.size() <= (size_t)mux)
|
|
return true;
|
|
|
|
/* If the value to write is the same as the destination, then
|
|
stop now. */
|
|
if (val[mux] == tmp_val)
|
|
return true;
|
|
|
|
/* Finally, modify the string and write the new string to the
|
|
variable so that the new value propagates. */
|
|
val[mux] = tmp_val;
|
|
vvp_send_string(vvp_net_ptr_t(cp->net, 0), val, thr->wt_context);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* These implement the %release/net and %release/reg instructions. The
|
|
* %release/net instruction applies to a net kind of functor by
|
|
* sending the release/net command to the command port. (See vvp_net.h
|
|
* for details.) The %release/reg instruction is the same, but sends
|
|
* the release/reg command instead. These are very similar to the
|
|
* %deassign instruction.
|
|
*/
|
|
static bool do_release_vec(vvp_code_t cp, bool net_flag)
|
|
{
|
|
vvp_net_t*net = cp->net;
|
|
unsigned base = cp->bit_idx[0];
|
|
unsigned width = cp->bit_idx[1];
|
|
|
|
assert(net->fil);
|
|
|
|
if (base >= net->fil->filter_size()) return true;
|
|
if (base+width > net->fil->filter_size())
|
|
width = net->fil->filter_size() - base;
|
|
|
|
bool full_sig = base == 0 && width == net->fil->filter_size();
|
|
|
|
// XXXX Can't really do this if this is a partial release?
|
|
net->fil->force_unlink();
|
|
|
|
/* Do we release all or part of the net? */
|
|
vvp_net_ptr_t ptr (net, 0);
|
|
if (full_sig) {
|
|
net->fil->release(ptr, net_flag);
|
|
} else {
|
|
net->fil->release_pv(ptr, base, width, net_flag);
|
|
}
|
|
net->fun->force_flag();
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_RELEASE_NET(vthread_t, vvp_code_t cp)
|
|
{
|
|
return do_release_vec(cp, true);
|
|
}
|
|
|
|
|
|
bool of_RELEASE_REG(vthread_t, vvp_code_t cp)
|
|
{
|
|
return do_release_vec(cp, false);
|
|
}
|
|
|
|
/* The type is 1 for registers and 0 for everything else. */
|
|
bool of_RELEASE_WR(vthread_t, vvp_code_t cp)
|
|
{
|
|
vvp_net_t*net = cp->net;
|
|
unsigned type = cp->bit_idx[0];
|
|
|
|
assert(net->fil);
|
|
net->fil->force_unlink();
|
|
|
|
// Send a command to this signal to unforce itself.
|
|
vvp_net_ptr_t ptr (net, 0);
|
|
net->fil->release(ptr, type==0);
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* This implements the "%set/av <label>, <bit>, <wid>" instruction. In
|
|
* this case, the <label> is an array label, and the <bit> and <wid>
|
|
* are the thread vector of a value to be written in.
|
|
*/
|
|
bool of_SET_AV(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned bit = cp->bit_idx[0];
|
|
unsigned wid = cp->bit_idx[1];
|
|
unsigned off = thr->words[1].w_int;
|
|
unsigned adr = thr->words[3].w_int;
|
|
|
|
/* Make a vector of the desired width. */
|
|
vvp_vector4_t value = vthread_bits_to_vector(thr, bit, wid);
|
|
|
|
array_set_word(cp->array, adr, off, value);
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %set/dar <label>, <bit>, <wid>
|
|
*/
|
|
bool of_SET_DAR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned bit = cp->bit_idx[0];
|
|
unsigned wid = cp->bit_idx[1];
|
|
unsigned adr = thr->words[3].w_int;
|
|
|
|
/* Make a vector of the desired width. */
|
|
vvp_vector4_t value = vthread_bits_to_vector(thr, bit, wid);
|
|
|
|
vvp_net_t*net = cp->net;
|
|
vvp_fun_signal_object*obj = dynamic_cast<vvp_fun_signal_object*> (net->fun);
|
|
assert(obj);
|
|
|
|
vvp_darray*darray = obj->get_object().peek<vvp_darray>();
|
|
assert(darray);
|
|
|
|
darray->set_word(adr, value);
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* This implements the "%set/v <label>, <bit>, <wid>" instruction.
|
|
*
|
|
* The <label> is a reference to a vvp_net_t object, and it is in
|
|
* cp->net.
|
|
*
|
|
* The <bit> is the thread bit address, and is in cp->bin_idx[0].
|
|
*
|
|
* The <wid> is the width of the vector I'm to make, and is in
|
|
* cp->bin_idx[1].
|
|
*/
|
|
bool of_SET_VEC(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[1] > 0);
|
|
unsigned bit = cp->bit_idx[0];
|
|
unsigned wid = cp->bit_idx[1];
|
|
|
|
/* set the value into port 0 of the destination. */
|
|
vvp_net_ptr_t ptr (cp->net, 0);
|
|
|
|
vvp_send_vec4(ptr, vthread_bits_to_vector(thr, bit, wid),
|
|
thr->wt_context);
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
/*
|
|
* Implement the %set/x instruction:
|
|
*
|
|
* %set/x <functor>, <bit>, <wid>
|
|
*
|
|
* The bit value of a vector go into the addressed functor. Do not
|
|
* transfer bits that are outside the signal range. Get the target
|
|
* vector dimensions from the vvp_fun_signal addressed by the vvp_net
|
|
* pointer.
|
|
*/
|
|
bool of_SET_X0(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vvp_net_t*net = cp->net;
|
|
unsigned bit = cp->bit_idx[0];
|
|
unsigned wid = cp->bit_idx[1];
|
|
|
|
// Implicitly, we get the base into the target vector from the
|
|
// X0 register.
|
|
long index = thr->words[0].w_int;
|
|
|
|
vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (net->fil);
|
|
assert(sig);
|
|
|
|
// If the entire part is below the beginning of the vector,
|
|
// then we are done.
|
|
if (index < 0 && (wid <= (unsigned)-index))
|
|
return true;
|
|
|
|
// If the entire part is above the end of the vector, then we
|
|
// are done.
|
|
if (index >= (long)sig->value_size())
|
|
return true;
|
|
|
|
// If the part starts below the vector, then skip the first
|
|
// few bits and reduce enough bits to start at the beginning
|
|
// of the vector.
|
|
if (index < 0) {
|
|
if (bit >= 4) bit += (unsigned) -index;
|
|
wid -= (unsigned) -index;
|
|
index = 0;
|
|
}
|
|
|
|
// Reduce the width to keep the part inside the vector.
|
|
if (index+wid > sig->value_size())
|
|
wid = sig->value_size() - index;
|
|
|
|
vvp_vector4_t bit_vec(wid);
|
|
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
|
|
vvp_bit4_t bit_val = thr_get_bit(thr, bit);
|
|
bit_vec.set_bit(idx, bit_val);
|
|
if (bit >= 4)
|
|
bit += 1;
|
|
}
|
|
|
|
vvp_net_ptr_t ptr (net, 0);
|
|
vvp_send_vec4_pv(ptr, bit_vec, index, wid, sig->value_size(), thr->wt_context);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_SHIFTL_I0(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
int base = cp->bit_idx[0];
|
|
int wid = cp->number;
|
|
int shift = thr->words[0].w_int;
|
|
|
|
assert(base >= 4);
|
|
thr_check_addr(thr, base+wid-1);
|
|
|
|
if (thr_get_bit(thr, 4) == BIT4_1) {
|
|
// The result is 'bx if the shift amount is undefined.
|
|
vvp_vector4_t tmp (wid, BIT4_X);
|
|
thr->bits4.set_vec(base, tmp);
|
|
|
|
} else if (shift >= wid) {
|
|
// Shift is so far that all value is shifted out. Write
|
|
// in a constant 0 result.
|
|
vvp_vector4_t tmp (wid, BIT4_0);
|
|
thr->bits4.set_vec(base, tmp);
|
|
|
|
} else if (shift > 0) {
|
|
vvp_vector4_t tmp (thr->bits4, base, wid-shift);
|
|
thr->bits4.set_vec(base+shift, tmp);
|
|
|
|
// Fill zeros on the bottom
|
|
vvp_vector4_t fil (shift, BIT4_0);
|
|
thr->bits4.set_vec(base, fil);
|
|
|
|
} else if (shift <= -wid) {
|
|
vvp_vector4_t tmp (wid, BIT4_X);
|
|
thr->bits4.set_vec(base, tmp);
|
|
|
|
} else if (shift < 0) {
|
|
// For a negative shift we pad with 'bx.
|
|
int idx;
|
|
for (idx = 0 ; (idx-shift) < wid ; idx += 1) {
|
|
unsigned src = base + idx - shift;
|
|
unsigned dst = base + idx;
|
|
thr_put_bit(thr, dst, thr_get_bit(thr, src));
|
|
}
|
|
for ( ; idx < wid ; idx += 1)
|
|
thr_put_bit(thr, base+idx, BIT4_X);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* This is an unsigned right shift:
|
|
*
|
|
* %shiftr/i0 <bit>, <wid>
|
|
*
|
|
* The vector at address <bit> with width <wid> is shifted right a
|
|
* number of bits stored in index/word register 0.
|
|
*/
|
|
bool of_SHIFTR_I0(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
int base = cp->bit_idx[0];
|
|
int wid = cp->number;
|
|
int shift = thr->words[0].w_int;
|
|
|
|
assert(base >= 4);
|
|
thr_check_addr(thr, base+wid-1);
|
|
|
|
if (thr_get_bit(thr, 4) == BIT4_1) {
|
|
// The result is 'bx if the shift amount is undefined.
|
|
vvp_vector4_t tmp (wid, BIT4_X);
|
|
thr->bits4.set_vec(base, tmp);
|
|
|
|
} else if (shift > wid) {
|
|
// Shift so far that the entire vector is shifted out.
|
|
vvp_vector4_t tmp (wid, BIT4_0);
|
|
thr->bits4.set_vec(base, tmp);
|
|
|
|
} else if (shift > 0) {
|
|
// The mov method should handle overlapped source/dest
|
|
thr->bits4.mov(base, base+shift, wid-shift);
|
|
|
|
vvp_vector4_t tmp (shift, BIT4_0);
|
|
thr->bits4.set_vec(base+wid-shift, tmp);
|
|
|
|
} else if (shift < -wid) {
|
|
// Negative shift is so far that all the value is shifted out.
|
|
// Write in a constant 'bx result.
|
|
vvp_vector4_t tmp (wid, BIT4_X);
|
|
thr->bits4.set_vec(base, tmp);
|
|
|
|
} else if (shift < 0) {
|
|
|
|
// For a negative shift we pad with 'bx.
|
|
vvp_vector4_t tmp (thr->bits4, base, wid+shift);
|
|
thr->bits4.set_vec(base-shift, tmp);
|
|
|
|
vvp_vector4_t fil (-shift, BIT4_X);
|
|
thr->bits4.set_vec(base, fil);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool of_SHIFTR_S_I0(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
int base = cp->bit_idx[0];
|
|
int wid = cp->number;
|
|
int shift = thr->words[0].w_int;
|
|
vvp_bit4_t sign = thr_get_bit(thr, base+wid-1);
|
|
|
|
if (thr_get_bit(thr, 4) == BIT4_1) {
|
|
// The result is 'bx if the shift amount is undefined.
|
|
vvp_vector4_t tmp (wid, BIT4_X);
|
|
thr->bits4.set_vec(base, tmp);
|
|
} else if (shift >= wid) {
|
|
for (int idx = 0 ; idx < wid ; idx += 1)
|
|
thr_put_bit(thr, base+idx, sign);
|
|
|
|
} else if (shift > 0) {
|
|
for (int idx = 0 ; idx < (wid-shift) ; idx += 1) {
|
|
unsigned src = base + idx + shift;
|
|
unsigned dst = base + idx;
|
|
thr_put_bit(thr, dst, thr_get_bit(thr, src));
|
|
}
|
|
for (int idx = (wid-shift) ; idx < wid ; idx += 1)
|
|
thr_put_bit(thr, base+idx, sign);
|
|
|
|
} else if (shift < -wid) {
|
|
// Negative shift is so far that all the value is
|
|
// shifted out. Write in a constant 'bx result.
|
|
vvp_vector4_t tmp (wid, BIT4_X);
|
|
thr->bits4.set_vec(base, tmp);
|
|
|
|
} else if (shift < 0) {
|
|
|
|
// For a negative shift we pad with 'bx.
|
|
vvp_vector4_t tmp (thr->bits4, base, wid+shift);
|
|
thr->bits4.set_vec(base-shift, tmp);
|
|
|
|
vvp_vector4_t fil (-shift, BIT4_X);
|
|
thr->bits4.set_vec(base, fil);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool of_STORE_DAR_R(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
long adr = thr->words[3].w_int;
|
|
|
|
// Pop the real value to be store...
|
|
double value = thr->pop_real();
|
|
|
|
vvp_net_t*net = cp->net;
|
|
vvp_fun_signal_object*obj = dynamic_cast<vvp_fun_signal_object*> (net->fun);
|
|
assert(obj);
|
|
|
|
vvp_darray*darray = obj->get_object().peek<vvp_darray>();
|
|
assert(darray);
|
|
|
|
darray->set_word(adr, value);
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %store/dar/str <var>
|
|
* In this case, <var> is the name of a dynamic array. Signed index
|
|
* register 3 contains the index into the dynamic array.
|
|
*/
|
|
bool of_STORE_DAR_STR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
long adr = thr->words[3].w_int;
|
|
|
|
// Pop the string to be stored...
|
|
string value = thr->pop_str();
|
|
|
|
vvp_net_t*net = cp->net;
|
|
vvp_fun_signal_object*obj = dynamic_cast<vvp_fun_signal_object*> (net->fun);
|
|
assert(obj);
|
|
|
|
vvp_darray*darray = obj->get_object().peek<vvp_darray>();
|
|
assert(darray);
|
|
|
|
darray->set_word(adr, value);
|
|
return true;
|
|
}
|
|
|
|
|
|
bool of_STORE_OBJ(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
/* set the value into port 0 of the destination. */
|
|
vvp_net_ptr_t ptr (cp->net, 0);
|
|
|
|
vvp_object_t val;
|
|
thr->pop_object(val);
|
|
|
|
vvp_send_object(ptr, val, thr->wt_context);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %store/prop/r <id>
|
|
*
|
|
* Pop a real value from the real stack, and store the value into the
|
|
* property of the object references by the top of the stack. Do NOT
|
|
* pop the object stack.
|
|
*/
|
|
bool of_STORE_PROP_R(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
size_t pid = cp->number;
|
|
double val = thr->pop_real();
|
|
|
|
vvp_object_t&obj = thr->peek_object();
|
|
vvp_cobject*cobj = obj.peek<vvp_cobject>();
|
|
assert(cobj);
|
|
|
|
cobj->set_real(pid, val);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %store/prop/str <id>
|
|
*
|
|
* Pop a string value from the string stack, and store the value into
|
|
* the property of the object references by the top of the stack. Do NOT
|
|
* pop the object stack.
|
|
*/
|
|
bool of_STORE_PROP_STR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
size_t pid = cp->number;
|
|
string val = thr->pop_str();
|
|
|
|
vvp_object_t&obj = thr->peek_object();
|
|
vvp_cobject*cobj = obj.peek<vvp_cobject>();
|
|
assert(cobj);
|
|
|
|
cobj->set_string(pid, val);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %store/prop/v <id> <base> <wid>
|
|
*
|
|
* Store vector value into property <id> of cobject in the top of the stack.
|
|
*/
|
|
bool of_STORE_PROP_V(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
size_t pid = cp->bit_idx[0];
|
|
unsigned src = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
|
|
vvp_vector4_t val = vthread_bits_to_vector(thr, src, wid);
|
|
|
|
vvp_object_t&obj = thr->peek_object();
|
|
vvp_cobject*cobj = obj.peek<vvp_cobject>();
|
|
assert(cobj);
|
|
|
|
cobj->set_vec4(pid, val);
|
|
return true;
|
|
}
|
|
|
|
bool of_STORE_REAL(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
double val = thr->pop_real();
|
|
/* set the value into port 0 of the destination. */
|
|
vvp_net_ptr_t ptr (cp->net, 0);
|
|
vvp_send_real(ptr, val, thr->wt_context);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %store/reala <var-label> <index>
|
|
*/
|
|
bool of_STORE_REALA(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned idx = cp->bit_idx[0];
|
|
unsigned adr = thr->words[idx].w_int;
|
|
|
|
double val = thr->pop_real();
|
|
array_set_word(cp->array, adr, val);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_STORE_STR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
/* set the value into port 0 of the destination. */
|
|
vvp_net_ptr_t ptr (cp->net, 0);
|
|
|
|
string val = thr->pop_str();
|
|
vvp_send_string(ptr, val, thr->wt_context);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %store/stra <array-label> <index>
|
|
*/
|
|
bool of_STORE_STRA(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
unsigned idx = cp->bit_idx[0];
|
|
unsigned adr = thr->words[idx].w_int;
|
|
|
|
string val = thr->pop_str();
|
|
array_set_word(cp->array, adr, val);
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
bool of_SUB(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
unsigned long*lva = vector_to_array(thr, cp->bit_idx[0], cp->number);
|
|
unsigned long*lvb = vector_to_array(thr, cp->bit_idx[1], cp->number);
|
|
if (lva == 0 || lvb == 0)
|
|
goto x_out;
|
|
|
|
|
|
unsigned long carry;
|
|
carry = 1;
|
|
for (unsigned idx = 0 ; (idx*CPU_WORD_BITS) < cp->number ; idx += 1)
|
|
lva[idx] = add_with_carry(lva[idx], ~lvb[idx], carry);
|
|
|
|
|
|
/* We know from the vector_to_array that the address is valid
|
|
in the thr->bitr4 vector, so just do the set bit. */
|
|
|
|
thr->bits4.setarray(cp->bit_idx[0], cp->number, lva);
|
|
delete[]lva;
|
|
delete[]lvb;
|
|
|
|
return true;
|
|
|
|
x_out:
|
|
delete[]lva;
|
|
delete[]lvb;
|
|
|
|
vvp_vector4_t tmp(cp->number, BIT4_X);
|
|
thr->bits4.set_vec(cp->bit_idx[0], tmp);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_SUB_WR(vthread_t thr, vvp_code_t)
|
|
{
|
|
double r = thr->pop_real();
|
|
double l = thr->pop_real();
|
|
thr->push_real(l - r);
|
|
return true;
|
|
}
|
|
|
|
bool of_SUBI(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
unsigned word_count = (cp->number+CPU_WORD_BITS-1)/CPU_WORD_BITS;
|
|
unsigned long imm = cp->bit_idx[1];
|
|
unsigned long*lva = vector_to_array(thr, cp->bit_idx[0], cp->number);
|
|
if (lva == 0)
|
|
goto x_out;
|
|
|
|
|
|
unsigned long carry;
|
|
carry = 1;
|
|
for (unsigned idx = 0 ; idx < word_count ; idx += 1) {
|
|
lva[idx] = add_with_carry(lva[idx], ~imm, carry);
|
|
imm = 0UL;
|
|
}
|
|
|
|
/* We know from the vector_to_array that the address is valid
|
|
in the thr->bitr4 vector, so just do the set bit. */
|
|
|
|
thr->bits4.setarray(cp->bit_idx[0], cp->number, lva);
|
|
|
|
delete[]lva;
|
|
|
|
return true;
|
|
|
|
x_out:
|
|
delete[]lva;
|
|
|
|
vvp_vector4_t tmp(cp->number, BIT4_X);
|
|
thr->bits4.set_vec(cp->bit_idx[0], tmp);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %substr <first>, <last>
|
|
* Pop a string, take the substring (SystemVerilog style), and return
|
|
* the result to the stack. This opcode actually works by editing the
|
|
* string in place.
|
|
*/
|
|
bool of_SUBSTR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
int32_t first = thr->words[cp->bit_idx[0]].w_int;
|
|
int32_t last = thr->words[cp->bit_idx[1]].w_int;
|
|
string&val = thr->peek_str(0);
|
|
|
|
if (first < 0 || last < first || last >= (int32_t)val.size()) {
|
|
val = string("");
|
|
return true;
|
|
}
|
|
|
|
val = val.substr(first, last-first+1);
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %substr/v <bitl>, <index>, <wid>
|
|
*/
|
|
bool of_SUBSTR_V(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
string&val = thr->peek_str(0);
|
|
uint32_t bitl = cp->bit_idx[0];
|
|
uint32_t sel = cp->bit_idx[1];
|
|
unsigned wid = cp->number;
|
|
|
|
thr_check_addr(thr, bitl+wid);
|
|
assert(bitl >= 4);
|
|
|
|
int32_t use_sel = thr->words[sel].w_int;
|
|
|
|
vvp_vector4_t tmp (8);
|
|
unsigned char_count = wid/8;
|
|
for (unsigned idx = 0 ; idx < char_count ; idx += 1) {
|
|
unsigned long byte;
|
|
if (use_sel < 0)
|
|
byte = 0x00;
|
|
else if ((size_t)use_sel >= val.size())
|
|
byte = 0x00;
|
|
else
|
|
byte = val[use_sel];
|
|
|
|
thr->bits4.setarray(bitl, 8, &byte);
|
|
bitl += 8;
|
|
use_sel += 1;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_FILE_LINE(vthread_t, vvp_code_t cp)
|
|
{
|
|
if (show_file_line) {
|
|
vpiHandle handle = cp->handle;
|
|
cerr << vpi_get_str(vpiFile, handle) << ":"
|
|
<< vpi_get(vpiLineNo, handle) << ": ";
|
|
cerr << vpi_get_str(_vpiDescription, handle);
|
|
cerr << endl;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* %test_nul <var-label>;
|
|
*/
|
|
bool of_TEST_NUL(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vvp_net_t*net = cp->net;
|
|
|
|
assert(net);
|
|
vvp_fun_signal_object*obj = dynamic_cast<vvp_fun_signal_object*> (net->fun);
|
|
assert(obj);
|
|
|
|
if (obj->get_object().test_nil())
|
|
thr_put_bit(thr, 4, BIT4_1);
|
|
else
|
|
thr_put_bit(thr, 4, BIT4_0);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool of_VPI_CALL(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
vpip_execute_vpi_call(thr, cp->handle);
|
|
|
|
if (schedule_stopped()) {
|
|
if (! schedule_finished())
|
|
schedule_vthread(thr, 0, false);
|
|
|
|
return false;
|
|
}
|
|
|
|
return schedule_finished()? false : true;
|
|
}
|
|
|
|
/* %wait <label>;
|
|
* Implement the wait by locating the vvp_net_T for the event, and
|
|
* adding this thread to the threads list for the event. The some
|
|
* argument is the reference to the functor to wait for. This must be
|
|
* an event object of some sort.
|
|
*/
|
|
bool of_WAIT(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(! thr->waiting_for_event);
|
|
thr->waiting_for_event = 1;
|
|
|
|
/* Add this thread to the list in the event. */
|
|
waitable_hooks_s*ep = dynamic_cast<waitable_hooks_s*> (cp->net->fun);
|
|
assert(ep);
|
|
thr->wait_next = ep->add_waiting_thread(thr);
|
|
|
|
/* Return false to suspend this thread. */
|
|
return false;
|
|
}
|
|
|
|
|
|
bool of_XNOR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
|
|
|
vvp_bit4_t lb = thr_get_bit(thr, idx1);
|
|
vvp_bit4_t rb = thr_get_bit(thr, idx2);
|
|
thr_put_bit(thr, idx1, ~(lb ^ rb));
|
|
|
|
idx1 += 1;
|
|
if (idx2 >= 4)
|
|
idx2 += 1;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
bool of_XOR(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
assert(cp->bit_idx[0] >= 4);
|
|
|
|
unsigned idx1 = cp->bit_idx[0];
|
|
unsigned idx2 = cp->bit_idx[1];
|
|
|
|
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
|
|
|
vvp_bit4_t lb = thr_get_bit(thr, idx1);
|
|
vvp_bit4_t rb = thr_get_bit(thr, idx2);
|
|
|
|
if ((lb == BIT4_1) && (rb == BIT4_1)) {
|
|
thr_put_bit(thr, idx1, BIT4_0);
|
|
|
|
} else if ((lb == BIT4_0) && (rb == BIT4_0)) {
|
|
thr_put_bit(thr, idx1, BIT4_0);
|
|
|
|
} else if ((lb == BIT4_1) && (rb == BIT4_0)) {
|
|
thr_put_bit(thr, idx1, BIT4_1);
|
|
|
|
} else if ((lb == BIT4_0) && (rb == BIT4_1)) {
|
|
thr_put_bit(thr, idx1, BIT4_1);
|
|
|
|
} else {
|
|
thr_put_bit(thr, idx1, BIT4_X);
|
|
}
|
|
|
|
idx1 += 1;
|
|
if (idx2 >= 4)
|
|
idx2 += 1;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
bool of_ZOMBIE(vthread_t thr, vvp_code_t)
|
|
{
|
|
thr->pc = codespace_null();
|
|
if ((thr->parent == 0) && (thr->children.empty())) {
|
|
if (thr->delay_delete)
|
|
schedule_del_thr(thr);
|
|
else
|
|
vthread_delete(thr);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* This is a phantom opcode used to call user defined functions. It
|
|
* is used in code generated by the .ufunc statement. It contains a
|
|
* pointer to the executable code of the function and a pointer to
|
|
* a ufunc_core object that has all the port information about the
|
|
* function.
|
|
*/
|
|
bool of_EXEC_UFUNC(vthread_t thr, vvp_code_t cp)
|
|
{
|
|
struct __vpiScope*child_scope = cp->ufunc_core_ptr->func_scope();
|
|
assert(child_scope);
|
|
|
|
assert(thr->children.empty());
|
|
|
|
/* We can take a number of shortcuts because we know that a
|
|
continuous assignment can only occur in a static scope. */
|
|
assert(thr->wt_context == 0);
|
|
assert(thr->rd_context == 0);
|
|
|
|
/* If an automatic function, allocate a context for this call. */
|
|
vvp_context_t child_context = 0;
|
|
if (child_scope->is_automatic) {
|
|
child_context = vthread_alloc_context(child_scope);
|
|
thr->wt_context = child_context;
|
|
thr->rd_context = child_context;
|
|
}
|
|
/* Copy all the inputs to the ufunc object to the port
|
|
variables of the function. This copies all the values
|
|
atomically. */
|
|
cp->ufunc_core_ptr->assign_bits_to_ports(child_context);
|
|
|
|
/* Create a temporary thread and run it immediately. A function
|
|
may not contain any blocking statements, so vthread_run() can
|
|
only return when the %end opcode is reached. */
|
|
vthread_t child = vthread_new(cp->cptr, child_scope);
|
|
child->wt_context = child_context;
|
|
child->rd_context = child_context;
|
|
child->is_scheduled = 1;
|
|
vthread_run(child);
|
|
running_thread = thr;
|
|
|
|
/* Now copy the output from the result variable to the output
|
|
ports of the .ufunc device. */
|
|
cp->ufunc_core_ptr->finish_thread();
|
|
|
|
/* If an automatic function, free the context for this call. */
|
|
if (child_scope->is_automatic) {
|
|
vthread_free_context(child_context, child_scope);
|
|
thr->wt_context = 0;
|
|
thr->rd_context = 0;
|
|
}
|
|
|
|
return true;
|
|
}
|