/* * Copyright (c) 2001-2002 Stephen Williams (steve@icarus.com) * * This source code is free software; you can redistribute it * and/or modify it in source code form under the terms of the GNU * General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ #ifdef HAVE_CVS_IDENT #ident "$Id: vthread.cc,v 1.93 2002/11/08 04:59:58 steve Exp $" #endif # include "vthread.h" # include "codes.h" # include "debug.h" # include "schedule.h" # include "functor.h" # include "ufunc.h" # include "event.h" # include "vpi_priv.h" #ifdef HAVE_MALLOC_H # include #endif # include # include # include # include #include /* This is the size of an unsigned long in bits. This is just a convenience macro. */ # define CPU_WORD_BITS (8*sizeof(unsigned long)) # define TOP_BIT (1UL << (CPU_WORD_BITS-1)) /* * This vhtread_s structure describes all there is to know about a * thread, including its program counter, all the private bits it * holds, and its place in other lists. * * * ** Notes On The Interactions of %fork/%join/%end: * * The %fork instruction creates a new thread and pushes that onto the * stack of children for the thread. This new thread, then, becomes * the new direct descendent of the thread. This new thread is * therefore also the first thread to be reaped when the parent does a * %join. * * It is a programming error for a thread that created threads to not * %join as many as it created before it %ends. The linear stack for * tracking thread relationships will create a mess otherwise. For * example, if A creates B then C, the stack is: * * A --> C --> B * * If C then %forks X, the stack is: * * A --> C --> X --> B * * If C %ends without a join, then the stack is: * * A --> C(zombie) --> X --> B * * If A then executes 2 %joins, it will reap C and X (when it ends) * leaving B in purgatory. What's worse, A will block on the schedules * of X and C instead of C and B, possibly creating incorrect timing. * * The schedule_parent_on_end flag is used by threads to tell their * children that they are waiting for it to end. It is set by a %join * instruction if the child is not already done. The thread that * executes a %join instruction sets the flag in its child. * * The i_have_ended flag, on the other hand, is used by threads to * tell their parents that they are already dead. A thread that * executes %end will set its own i_have_ended flag and let its parent * reap it when the parent does the %join. If a thread has its * schedule_parent_on_end flag set already when it %ends, then it * reaps itself and simply schedules its parent. If a child has its * i_have_ended flag set when a thread executes %join, then it is free * to reap the child immediately. */ struct vthread_s { /* This is the program counter. */ unsigned long pc; /* These hold the private thread bits. */ unsigned long *bits; long index[4]; unsigned nbits :16; /* My parent sets this when it wants me to wake it up. */ unsigned schedule_parent_on_end :1; unsigned i_have_ended :1; unsigned waiting_for_event :1; unsigned is_scheduled :1; unsigned fork_count :8; /* This points to the sole child of the thread. */ struct vthread_s*child; /* This points to my parent, if I have one. */ struct vthread_s*parent; /* This is used for keeping wait queues. */ struct vthread_s*wait_next; /* These are used to keep the thread in a scope. */ struct vthread_s*scope_next, *scope_prev; }; #if SIZEOF_UNSIGNED_LONG == 8 # define THR_BITS_INIT 0xaaaaaaaaaaaaaaaaUL #else # define THR_BITS_INIT 0xaaaaaaaaUL #endif static void thr_check_addr(struct vthread_s*thr, unsigned addr) { assert(addr < 0x10000); while (thr->nbits <= addr) { unsigned word_cnt = thr->nbits/(CPU_WORD_BITS/2) + 1; thr->bits = (unsigned long*) realloc(thr->bits, word_cnt*sizeof(unsigned long)); thr->bits[word_cnt-1] = THR_BITS_INIT; thr->nbits = word_cnt * (CPU_WORD_BITS/2); } } static inline unsigned thr_get_bit(struct vthread_s*thr, unsigned addr) { assert(addr < thr->nbits); unsigned idx = addr % (CPU_WORD_BITS/2); addr /= (CPU_WORD_BITS/2); return (thr->bits[addr] >> (idx*2)) & 3UL; } static inline void thr_put_bit(struct vthread_s*thr, unsigned addr, unsigned val) { if (addr >= thr->nbits) thr_check_addr(thr, addr); unsigned idx = addr % (CPU_WORD_BITS/2); addr /= (CPU_WORD_BITS/2); unsigned long mask = 3UL << (idx*2); unsigned long tmp = val; thr->bits[addr] = (thr->bits[addr] & ~mask) | (tmp << (idx*2)); } static inline void thr_clr_bit_(struct vthread_s*thr, unsigned addr) { unsigned idx = addr % (CPU_WORD_BITS/2); addr /= (CPU_WORD_BITS/2); unsigned long mask = 3UL << (idx*2); thr->bits[addr] &= ~mask; } unsigned vthread_get_bit(struct vthread_s*thr, unsigned addr) { return thr_get_bit(thr, addr); } void vthread_put_bit(struct vthread_s*thr, unsigned addr, unsigned bit) { thr_put_bit(thr, addr, bit); } static unsigned long* vector_to_array(struct vthread_s*thr, unsigned addr, unsigned wid) { unsigned awid = (wid + CPU_WORD_BITS - 1) / (CPU_WORD_BITS); unsigned long*val = new unsigned long[awid]; for (unsigned idx = 0 ; idx < awid ; idx += 1) val[idx] = 0; for (unsigned idx = 0 ; idx < wid ; idx += 1) { unsigned long bit = thr_get_bit(thr, addr); if (bit & 2) goto x_out; val[idx/CPU_WORD_BITS] |= bit << (idx % CPU_WORD_BITS); if (addr >= 4) addr += 1; } return val; x_out: delete[]val; return 0; } /* * Create a new thread with the given start address. */ vthread_t vthread_new(unsigned long pc, struct __vpiScope*scope) { vthread_t thr = new struct vthread_s; thr->pc = pc; thr->bits = (unsigned long*)malloc(4 * sizeof(unsigned long)); thr->nbits = 4 * (CPU_WORD_BITS/2); thr->child = 0; thr->parent = 0; thr->wait_next = 0; /* If the target scope never held a thread, then create a header cell for it. This is a stub to make circular lists easier to work with. */ if (scope->threads == 0) { scope->threads = new struct vthread_s; scope->threads->pc = 0; scope->threads->bits = 0; scope->threads->nbits = 0; scope->threads->child = 0; scope->threads->parent = 0; scope->threads->scope_prev = scope->threads; scope->threads->scope_next = scope->threads; } { vthread_t tmp = scope->threads; thr->scope_next = tmp->scope_next; thr->scope_prev = tmp; thr->scope_next->scope_prev = thr; thr->scope_prev->scope_next = thr; } thr->schedule_parent_on_end = 0; thr->is_scheduled = 0; thr->i_have_ended = 0; thr->waiting_for_event = 0; thr->is_scheduled = 0; thr->fork_count = 0; thr_put_bit(thr, 0, 0); thr_put_bit(thr, 1, 1); thr_put_bit(thr, 2, 2); thr_put_bit(thr, 3, 3); return thr; } /* * Reaping pulls the thread out of the stack of threads. If I have a * child, then hand it over to my parent. */ static void vthread_reap(vthread_t thr) { free(thr->bits); thr->bits = 0; if (thr->child) { assert(thr->child->parent == thr); thr->child->parent = thr->parent; } if (thr->parent) { assert(thr->parent->child == thr); thr->parent->child = thr->child; } thr->child = 0; thr->parent = 0; thr->scope_next->scope_prev = thr->scope_prev; thr->scope_prev->scope_next = thr->scope_next; thr->pc = 0; /* If this thread is not scheduled, then is it safe to delete it now. Otherwise, let the schedule event (which will execute the thread at of_ZOMBIE) delete the object. */ if ((thr->is_scheduled == 0) && (thr->waiting_for_event == 0)) { assert(thr->fork_count == 0); assert(thr->wait_next == 0); delete thr; } } void vthread_mark_scheduled(vthread_t thr) { assert(thr->is_scheduled == 0); thr->is_scheduled = 1; } /* * This function runs a thread by fetching an instruction, * incrementing the PC, and executing the instruction. */ void vthread_run(vthread_t thr) { assert(thr->is_scheduled); thr->is_scheduled = 0; for (;;) { vvp_code_t cp = codespace_index(thr->pc); thr->pc += 1; assert(cp); assert(cp->opcode); /* Run the opcode implementation. If the execution of the opcode returns false, then the thread is meant to be paused, so break out of the loop. */ bool rc = (cp->opcode)(thr, cp); if (rc == false) return; } } /* * This is called by an event functor to wake up all the threads on * its list. I in fact created that list in the %wait instruction, and * I also am certain that the waiting_for_event flag is set. */ void vthread_schedule_list(vthread_t thr) { while (thr) { vthread_t tmp = thr; thr = thr->wait_next; assert(tmp->waiting_for_event); tmp->waiting_for_event = 0; tmp->wait_next = 0; schedule_vthread(tmp, 0); } } bool of_AND(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); unsigned idx1 = cp->bit_idx[0]; unsigned idx2 = cp->bit_idx[1]; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned lb = thr_get_bit(thr, idx1); unsigned rb = thr_get_bit(thr, idx2); if ((lb == 0) || (rb == 0)) { thr_put_bit(thr, idx1, 0); } else if ((lb == 1) && (rb == 1)) { thr_put_bit(thr, idx1, 1); } else { thr_put_bit(thr, idx1, 2); } idx1 += 1; if (idx2 >= 4) idx2 += 1; } return true; } bool of_ADD(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); unsigned long*lva = vector_to_array(thr, cp->bit_idx[0], cp->number); unsigned long*lvb = vector_to_array(thr, cp->bit_idx[1], cp->number); if (lva == 0 || lvb == 0) goto x_out; unsigned long carry; carry = 0; for (unsigned idx = 0 ; (idx*CPU_WORD_BITS) < cp->number ; idx += 1) { unsigned long tmp = lvb[idx] + carry; unsigned long sum = lva[idx] + tmp; carry = 0; if (tmp < lvb[idx]) carry = 1; if (sum < tmp) carry = 1; if (sum < lva[idx]) carry = 1; lva[idx] = sum; } for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned bit = lva[idx/CPU_WORD_BITS] >> (idx % CPU_WORD_BITS); thr_put_bit(thr, cp->bit_idx[0]+idx, (bit&1) ? 1 : 0); } delete[]lva; delete[]lvb; return true; x_out: delete[]lva; delete[]lvb; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) thr_put_bit(thr, cp->bit_idx[0]+idx, 2); return true; } /* * This is %addi, add-immediate. The first value is a vector, the * second value is the immediate value in the bin_idx[1] position. The * immediate value can be up to 16 bits, which are then padded to the * width of the vector with zero. */ bool of_ADDI(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); unsigned word_count = (cp->number+CPU_WORD_BITS-1)/CPU_WORD_BITS; unsigned long*lva = vector_to_array(thr, cp->bit_idx[0], cp->number); unsigned long*lvb; if (lva == 0) goto x_out; lvb = new unsigned long[word_count]; lvb[0] = cp->bit_idx[1]; for (unsigned idx = 1 ; idx < word_count ; idx += 1) lvb[idx] = 0; unsigned long carry; carry = 0; for (unsigned idx = 0 ; (idx*CPU_WORD_BITS) < cp->number ; idx += 1) { unsigned long tmp = lvb[idx] + carry; unsigned long sum = lva[idx] + tmp; carry = 0; if (tmp < lvb[idx]) carry = 1; if (sum < tmp) carry = 1; if (sum < lva[idx]) carry = 1; lva[idx] = sum; } for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned bit = lva[idx/CPU_WORD_BITS] >> (idx % CPU_WORD_BITS); thr_put_bit(thr, cp->bit_idx[0]+idx, (bit&1) ? 1 : 0); } delete[]lva; delete[]lvb; return true; x_out: delete[]lva; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) thr_put_bit(thr, cp->bit_idx[0]+idx, 2); return true; } bool of_ASSIGN(vthread_t thr, vvp_code_t cp) { unsigned char bit_val = thr_get_bit(thr, cp->bit_idx[1]); schedule_assign(cp->iptr, bit_val, cp->bit_idx[0]); return true; } bool of_ASSIGN_D(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] < 4); unsigned char bit_val = thr_get_bit(thr, cp->bit_idx[1]); schedule_assign(cp->iptr, bit_val, thr->index[cp->bit_idx[0]]); return true; } /* * This is %assign/v0