/* * Copyright (c) 2001-2008 Stephen Williams (steve@icarus.com) * * This source code is free software; you can redistribute it * and/or modify it in source code form under the terms of the GNU * General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ # include "config.h" # include "vthread.h" # include "codes.h" # include "schedule.h" # include "ufunc.h" # include "event.h" # include "vpi_priv.h" #ifdef HAVE_MALLOC_H # include #endif # include # include # include # include # include # include # include #include /* This is the size of an unsigned long in bits. This is just a convenience macro. */ # define CPU_WORD_BITS (8*sizeof(unsigned long)) # define TOP_BIT (1UL << (CPU_WORD_BITS-1)) /* * This vhtread_s structure describes all there is to know about a * thread, including its program counter, all the private bits it * holds, and its place in other lists. * * * ** Notes On The Interactions of %fork/%join/%end: * * The %fork instruction creates a new thread and pushes that onto the * stack of children for the thread. This new thread, then, becomes * the new direct descendant of the thread. This new thread is * therefore also the first thread to be reaped when the parent does a * %join. * * It is a programming error for a thread that created threads to not * %join as many as it created before it %ends. The linear stack for * tracking thread relationships will create a mess otherwise. For * example, if A creates B then C, the stack is: * * A --> C --> B * * If C then %forks X, the stack is: * * A --> C --> X --> B * * If C %ends without a join, then the stack is: * * A --> C(zombie) --> X --> B * * If A then executes 2 %joins, it will reap C and X (when it ends) * leaving B in purgatory. What's worse, A will block on the schedules * of X and C instead of C and B, possibly creating incorrect timing. * * The schedule_parent_on_end flag is used by threads to tell their * children that they are waiting for it to end. It is set by a %join * instruction if the child is not already done. The thread that * executes a %join instruction sets the flag in its child. * * The i_have_ended flag, on the other hand, is used by threads to * tell their parents that they are already dead. A thread that * executes %end will set its own i_have_ended flag and let its parent * reap it when the parent does the %join. If a thread has its * schedule_parent_on_end flag set already when it %ends, then it * reaps itself and simply schedules its parent. If a child has its * i_have_ended flag set when a thread executes %join, then it is free * to reap the child immediately. */ struct vthread_s { /* This is the program counter. */ vvp_code_t pc; /* These hold the private thread bits. */ vvp_vector4_t bits4; /* These are the word registers. */ union { int64_t w_int; uint64_t w_uint; double w_real; } words[16]; /* My parent sets this when it wants me to wake it up. */ unsigned schedule_parent_on_end :1; unsigned i_have_ended :1; unsigned waiting_for_event :1; unsigned is_scheduled :1; unsigned fork_count :8; /* This points to the sole child of the thread. */ struct vthread_s*child; /* This points to my parent, if I have one. */ struct vthread_s*parent; /* This is used for keeping wait queues. */ struct vthread_s*wait_next; /* These are used to keep the thread in a scope. */ struct vthread_s*scope_next, *scope_prev; }; // this table maps the thread special index bit addresses to // vvp_bit4_t bit values. static vvp_bit4_t thr_index_to_bit4[4] = { BIT4_0, BIT4_1, BIT4_X, BIT4_Z }; static inline void thr_check_addr(struct vthread_s*thr, unsigned addr) { if (thr->bits4.size() <= addr) thr->bits4.resize(addr+1); } static inline vvp_bit4_t thr_get_bit(struct vthread_s*thr, unsigned addr) { assert(addr < thr->bits4.size()); return thr->bits4.value(addr); } static inline void thr_put_bit(struct vthread_s*thr, unsigned addr, vvp_bit4_t val) { thr_check_addr(thr, addr); thr->bits4.set_bit(addr, val); } // REMOVE ME static inline void thr_clr_bit_(struct vthread_s*thr, unsigned addr) { thr->bits4.set_bit(addr, BIT4_0); } vvp_bit4_t vthread_get_bit(struct vthread_s*thr, unsigned addr) { return thr_get_bit(thr, addr); } void vthread_put_bit(struct vthread_s*thr, unsigned addr, vvp_bit4_t bit) { thr_put_bit(thr, addr, bit); } double vthread_get_real(struct vthread_s*thr, unsigned addr) { return thr->words[addr].w_real; } void vthread_put_real(struct vthread_s*thr, unsigned addr, double val) { thr->words[addr].w_real = val; } static unsigned long* vector_to_array(struct vthread_s*thr, unsigned addr, unsigned wid) { if (addr == 0) { unsigned awid = (wid + CPU_WORD_BITS - 1) / (CPU_WORD_BITS); unsigned long*val = new unsigned long[awid]; for (unsigned idx = 0 ; idx < awid ; idx += 1) val[idx] = 0; return val; } if (addr == 1) { unsigned awid = (wid + CPU_WORD_BITS - 1) / (CPU_WORD_BITS); unsigned long*val = new unsigned long[awid]; for (unsigned idx = 0 ; idx < awid ; idx += 1) val[idx] = -1UL; return val; } if (addr < 4) return 0; return thr->bits4.subarray(addr, wid); } /* * This function gets from the thread a vector of bits starting from * the addressed location and for the specified width. */ static vvp_vector4_t vthread_bits_to_vector(struct vthread_s*thr, unsigned bit, unsigned wid) { /* Make a vector of the desired width. */ if (bit >= 4) { return vvp_vector4_t(thr->bits4, bit, wid); } else { vvp_vector4_t value(wid); vvp_bit4_t bit_val = thr_index_to_bit4[bit]; for (unsigned idx = 0; idx < wid; idx +=1) { value.set_bit(idx, bit_val); } return value; } } /* * Create a new thread with the given start address. */ vthread_t vthread_new(vvp_code_t pc, struct __vpiScope*scope) { vthread_t thr = new struct vthread_s; thr->pc = pc; thr->bits4 = vvp_vector4_t(32); thr->child = 0; thr->parent = 0; thr->wait_next = 0; /* If the target scope never held a thread, then create a header cell for it. This is a stub to make circular lists easier to work with. */ if (scope->threads == 0) { scope->threads = new struct vthread_s; scope->threads->pc = codespace_null(); scope->threads->bits4 = vvp_vector4_t(); scope->threads->child = 0; scope->threads->parent = 0; scope->threads->scope_prev = scope->threads; scope->threads->scope_next = scope->threads; } { vthread_t tmp = scope->threads; thr->scope_next = tmp->scope_next; thr->scope_prev = tmp; thr->scope_next->scope_prev = thr; thr->scope_prev->scope_next = thr; } thr->schedule_parent_on_end = 0; thr->is_scheduled = 0; thr->i_have_ended = 0; thr->waiting_for_event = 0; thr->is_scheduled = 0; thr->fork_count = 0; thr_put_bit(thr, 0, BIT4_0); thr_put_bit(thr, 1, BIT4_1); thr_put_bit(thr, 2, BIT4_X); thr_put_bit(thr, 3, BIT4_Z); return thr; } /* * Reaping pulls the thread out of the stack of threads. If I have a * child, then hand it over to my parent. */ static void vthread_reap(vthread_t thr) { if (thr->child) { assert(thr->child->parent == thr); thr->child->parent = thr->parent; } if (thr->parent) { assert(thr->parent->child == thr); thr->parent->child = thr->child; } thr->child = 0; thr->parent = 0; thr->scope_next->scope_prev = thr->scope_prev; thr->scope_prev->scope_next = thr->scope_next; thr->pc = codespace_null(); /* If this thread is not scheduled, then is it safe to delete it now. Otherwise, let the schedule event (which will execute the thread at of_ZOMBIE) delete the object. */ if ((thr->is_scheduled == 0) && (thr->waiting_for_event == 0)) { assert(thr->fork_count == 0); assert(thr->wait_next == 0); schedule_del_thr(thr); } } void vthread_delete(vthread_t thr) { thr->bits4 = vvp_vector4_t(); delete thr; } void vthread_mark_scheduled(vthread_t thr) { while (thr != 0) { assert(thr->is_scheduled == 0); thr->is_scheduled = 1; thr = thr->wait_next; } } /* * This function runs each thread by fetching an instruction, * incrementing the PC, and executing the instruction. The thread may * be the head of a list, so each thread is run so far as possible. */ void vthread_run(vthread_t thr) { while (thr != 0) { vthread_t tmp = thr->wait_next; thr->wait_next = 0; assert(thr->is_scheduled); thr->is_scheduled = 0; for (;;) { vvp_code_t cp = thr->pc; thr->pc += 1; /* Run the opcode implementation. If the execution of the opcode returns false, then the thread is meant to be paused, so break out of the loop. */ bool rc = (cp->opcode)(thr, cp); if (rc == false) break; } thr = tmp; } } /* * Unlink a ptr object from the driver. The input is the driver in the * form of a vvp_net_t pointer. The .out member of that object is the * driver. The dst_ptr argument is the receiver pin to be located and * removed from the fan-out list. */ static void unlink_from_driver(vvp_net_t*src, vvp_net_ptr_t dst_ptr) { vvp_net_t*net = dst_ptr.ptr(); unsigned net_port = dst_ptr.port(); if (src->out == dst_ptr) { /* If the drive fan-out list starts with this pointer, then the unlink is easy. Pull the list forward. */ src->out = net->port[net_port]; } else { /* Scan the linked list, looking for the net_ptr_t pointer *before* the one we wish to remove. */ vvp_net_ptr_t cur = src->out; assert(!cur.nil()); vvp_net_t*cur_net = cur.ptr(); unsigned cur_port = cur.port(); while (cur_net->port[cur_port] != dst_ptr) { cur = cur_net->port[cur_port]; assert(!cur.nil()); cur_net = cur.ptr(); cur_port = cur.port(); } /* Unlink. */ cur_net->port[cur_port] = net->port[net_port]; } net->port[net_port] = vvp_net_ptr_t(0,0); } /* * The CHUNK_LINK instruction is a specla next pointer for linking * chunks of code space. It's like a simplified %jmp. */ bool of_CHUNK_LINK(vthread_t thr, vvp_code_t code) { assert(code->cptr); thr->pc = code->cptr; return true; } /* * This is called by an event functor to wake up all the threads on * its list. I in fact created that list in the %wait instruction, and * I also am certain that the waiting_for_event flag is set. */ void vthread_schedule_list(vthread_t thr) { for (vthread_t cur = thr ; cur ; cur = cur->wait_next) { assert(cur->waiting_for_event); cur->waiting_for_event = 0; } schedule_vthread(thr, 0); } bool of_ABS_WR(vthread_t thr, vvp_code_t cp) { unsigned dst = cp->bit_idx[0]; unsigned src = cp->bit_idx[1]; thr->words[dst].w_real = fabs(thr->words[src].w_real); return true; } bool of_AND(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); unsigned idx1 = cp->bit_idx[0]; unsigned idx2 = cp->bit_idx[1]; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { vvp_bit4_t lb = thr_get_bit(thr, idx1); vvp_bit4_t rb = thr_get_bit(thr, idx2); thr_put_bit(thr, idx1, lb & rb); idx1 += 1; if (idx2 >= 4) idx2 += 1; } return true; } bool of_ADD(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); unsigned long*lva = vector_to_array(thr, cp->bit_idx[0], cp->number); unsigned long*lvb = vector_to_array(thr, cp->bit_idx[1], cp->number); if (lva == 0 || lvb == 0) goto x_out; unsigned long carry; carry = 0; for (unsigned idx = 0 ; (idx*CPU_WORD_BITS) < cp->number ; idx += 1) { unsigned long tmp = lvb[idx] + carry; unsigned long sum = lva[idx] + tmp; carry = 0; if (tmp < lvb[idx]) carry = 1; if (sum < tmp) carry = 1; if (sum < lva[idx]) carry = 1; lva[idx] = sum; } /* We know from the vector_to_array that the address is valid in the thr->bitr4 vector, so just do the set bit. */ thr->bits4.setarray(cp->bit_idx[0], cp->number, lva); delete[]lva; delete[]lvb; return true; x_out: delete[]lva; delete[]lvb; vvp_vector4_t tmp(cp->number, BIT4_X); thr->bits4.set_vec(cp->bit_idx[0], tmp); return true; } bool of_ADD_WR(vthread_t thr, vvp_code_t cp) { double l = thr->words[cp->bit_idx[0]].w_real; double r = thr->words[cp->bit_idx[1]].w_real; thr->words[cp->bit_idx[0]].w_real = l + r; return true; } /* * This is %addi, add-immediate. The first value is a vector, the * second value is the immediate value in the bin_idx[1] position. The * immediate value can be up to 16 bits, which are then padded to the * width of the vector with zero. */ bool of_ADDI(vthread_t thr, vvp_code_t cp) { // Collect arguments unsigned bit_addr = cp->bit_idx[0]; unsigned long imm_value = cp->bit_idx[1]; unsigned bit_width = cp->number; assert(bit_addr >= 4); unsigned word_count = (bit_width+CPU_WORD_BITS-1)/CPU_WORD_BITS; unsigned long*lva = vector_to_array(thr, bit_addr, bit_width); unsigned long*lvb = 0; if (lva == 0) goto x_out; lvb = new unsigned long[word_count]; lvb[0] = imm_value; for (unsigned idx = 1 ; idx < word_count ; idx += 1) lvb[idx] = 0; unsigned long carry; carry = 0; for (unsigned idx = 0 ; (idx*CPU_WORD_BITS) < bit_width ; idx += 1) { unsigned long tmp = lvb[idx] + carry; unsigned long sum = lva[idx] + tmp; carry = 0; if (tmp < lvb[idx]) carry = 1; if (sum < tmp) carry = 1; if (sum < lva[idx]) carry = 1; lva[idx] = sum; } /* We know from the vector_to_array that the address is valid in the thr->bitr4 vector, so just do the set bit. */ thr->bits4.setarray(bit_addr, bit_width, lva); delete[]lva; delete[]lvb; return true; x_out: delete[]lva; vvp_vector4_t tmp (bit_width, BIT4_X); thr->bits4.set_vec(bit_addr, tmp); return true; } /* %assign/av , , * This generates an assignment event to an array. Index register 0 * contains the width of the vector (and the word) and index register * 3 contains the canonical address of the word in memory. */ bool of_ASSIGN_AV(vthread_t thr, vvp_code_t cp) { unsigned wid = thr->words[0].w_int; unsigned off = thr->words[1].w_int; unsigned adr = thr->words[3].w_int; assert(wid > 0); unsigned delay = cp->bit_idx[0]; unsigned bit = cp->bit_idx[1]; vvp_vector4_t value = vthread_bits_to_vector(thr, bit, wid); schedule_assign_array_word(cp->array, adr, off, value, delay); return true; } /* %assign/av/d , , * This generates an assignment event to an array. Index register 0 * contains the width of the vector (and the word) and index register * 3 contains the canonical address of the word in memory. The named * index register contains the delay. */ bool of_ASSIGN_AVD(vthread_t thr, vvp_code_t cp) { unsigned wid = thr->words[0].w_int; unsigned off = thr->words[1].w_int; unsigned adr = thr->words[3].w_int; assert(wid > 0); unsigned long delay = thr->words[cp->bit_idx[0]].w_int; unsigned bit = cp->bit_idx[1]; vvp_vector4_t value = vthread_bits_to_vector(thr, bit, wid); schedule_assign_array_word(cp->array, adr, off, value, delay); return true; } /* * This is %assign/v0