/* * Copyright (c) 2001 Stephen Williams (steve@icarus.com) * * This source code is free software; you can redistribute it * and/or modify it in source code form under the terms of the GNU * General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ #ifdef HAVE_CVS_IDENT #ident "$Id: vthread.cc,v 1.82 2002/08/27 05:39:57 steve Exp $" #endif # include "vthread.h" # include "codes.h" # include "debug.h" # include "schedule.h" # include "functor.h" # include "ufunc.h" # include "event.h" # include "vpi_priv.h" #ifdef HAVE_MALLOC_H # include #endif # include # include # include # include #include /* This is the size of an unsigned long in bits. This is just a convenience macro. */ # define CPU_WORD_BITS (8*sizeof(unsigned long)) # define TOP_BIT (1UL << (CPU_WORD_BITS-1)) /* * This vhtread_s structure describes all there is to know about a * thread, including its program counter, all the private bits it * holds, and its place in other lists. * * * ** Notes On The Interactions of %fork/%join/%end: * * The %fork instruction creates a new thread and pushes that onto the * stack of children for the thread. This new thread, then, becomes * the new direct descendent of the thread. This new thread is * therefore also the first thread to be reaped when the parent does a * %join. * * It is a programming error for a thread that created threads to not * %join as many as it created before it %ends. The linear stack for * tracking thread relationships will create a mess otherwise. For * example, if A creates B then C, the stack is: * * A --> C --> B * * If C then %forks X, the stack is: * * A --> C --> X --> B * * If C %ends without a join, then the stack is: * * A --> C(zombie) --> X --> B * * If A then executes 2 %joins, it will read C and X (when it ends) * leaving B in purgatory. What's worse, A will block on the schedules * of X and C instead of C and B, possibly creating incorrect timing. * * The schedule_parent_on_end flag is used by threads to tell their * children that they are waiting for it to end. It is set by a %join * instruction if the child is not already done. The thread that * executes a %join instruction sets the flag in its child. * * The i_have_ended flag, on the other hand, is used by threads to * tell their parents that they are already dead. A thread that * executes %end will set its own i_have_ended flag and let its parent * reap it when the parent does the %join. If a thread has its * schedule_parent_on_end flag set already when it %ends, then it * reaps itself and simply schedules its parent. If a child has its * i_have_ended flag set when a thread executes %join, then it is free * to reap the child immediately. */ struct vthread_s { /* This is the program counter. */ unsigned long pc; /* These hold the private thread bits. */ unsigned long *bits; long index[4]; unsigned nbits :16; /* My parent sets this when it wants me to wake it up. */ unsigned schedule_parent_on_end :1; unsigned i_have_ended :1; unsigned waiting_for_event :1; unsigned is_scheduled :1; /* This points to the sole child of the thread. */ struct vthread_s*child; /* This points to my parent, if I have one. */ struct vthread_s*parent; /* This is used for keeping wait queues. */ struct vthread_s*wait_next; /* These are used to keep the thread in a scope. */ struct vthread_s*scope_next, *scope_prev; }; #if SIZEOF_UNSIGNED_LONG == 8 # define THR_BITS_INIT 0xaaaaaaaaaaaaaaaaUL #else # define THR_BITS_INIT 0xaaaaaaaaUL #endif static void thr_check_addr(struct vthread_s*thr, unsigned addr) { assert(addr < 0x10000); while (thr->nbits <= addr) { unsigned word_cnt = thr->nbits/(CPU_WORD_BITS/2) + 1; thr->bits = (unsigned long*) realloc(thr->bits, word_cnt*sizeof(unsigned long)); thr->bits[word_cnt-1] = THR_BITS_INIT; thr->nbits = word_cnt * (CPU_WORD_BITS/2); } } static inline unsigned thr_get_bit(struct vthread_s*thr, unsigned addr) { assert(addr < thr->nbits); unsigned idx = addr % (CPU_WORD_BITS/2); addr /= (CPU_WORD_BITS/2); return (thr->bits[addr] >> (idx*2)) & 3UL; } static inline void thr_put_bit(struct vthread_s*thr, unsigned addr, unsigned val) { if (addr >= thr->nbits) thr_check_addr(thr, addr); unsigned idx = addr % (CPU_WORD_BITS/2); addr /= (CPU_WORD_BITS/2); unsigned long mask = 3UL << (idx*2); unsigned long tmp = val; thr->bits[addr] = (thr->bits[addr] & ~mask) | (tmp << (idx*2)); } static inline void thr_clr_bit_(struct vthread_s*thr, unsigned addr) { unsigned idx = addr % (CPU_WORD_BITS/2); addr /= (CPU_WORD_BITS/2); unsigned long mask = 3UL << (idx*2); thr->bits[addr] &= ~mask; } unsigned vthread_get_bit(struct vthread_s*thr, unsigned addr) { return thr_get_bit(thr, addr); } void vthread_put_bit(struct vthread_s*thr, unsigned addr, unsigned bit) { thr_put_bit(thr, addr, bit); } static unsigned long* vector_to_array(struct vthread_s*thr, unsigned addr, unsigned wid) { unsigned awid = (wid + CPU_WORD_BITS - 1) / (CPU_WORD_BITS); unsigned long*val = new unsigned long[awid]; for (unsigned idx = 0 ; idx < awid ; idx += 1) val[idx] = 0; for (unsigned idx = 0 ; idx < wid ; idx += 1) { unsigned long bit = thr_get_bit(thr, addr); if (bit & 2) goto x_out; val[idx/CPU_WORD_BITS] |= bit << (idx % CPU_WORD_BITS); if (addr >= 4) addr += 1; } return val; x_out: delete[]val; return 0; } /* * Create a new thread with the given start address. */ vthread_t vthread_new(unsigned long pc, struct __vpiScope*scope) { vthread_t thr = new struct vthread_s; thr->pc = pc; thr->bits = (unsigned long*)malloc(4 * sizeof(unsigned long)); thr->nbits = 4 * (CPU_WORD_BITS/2); thr->child = 0; thr->parent = 0; thr->wait_next = 0; /* If the target scope never held a thread, then create a header cell for it. This is a stub to make circular lists easier to work with. */ if (scope->threads == 0) { scope->threads = new struct vthread_s; scope->threads->pc = 0; scope->threads->bits = 0; scope->threads->nbits = 0; scope->threads->child = 0; scope->threads->parent = 0; scope->threads->scope_prev = scope->threads; scope->threads->scope_next = scope->threads; } { vthread_t tmp = scope->threads; thr->scope_next = tmp->scope_next; thr->scope_prev = tmp; thr->scope_next->scope_prev = thr; thr->scope_prev->scope_next = thr; } thr->schedule_parent_on_end = 0; thr->is_scheduled = 0; thr->i_have_ended = 0; thr->waiting_for_event = 0; thr->is_scheduled = 0; thr_put_bit(thr, 0, 0); thr_put_bit(thr, 1, 1); thr_put_bit(thr, 2, 2); thr_put_bit(thr, 3, 3); return thr; } /* * Reaping pulls the thread out of the stack of threads. If I have a * child, then hand it over to my parent. */ static void vthread_reap(vthread_t thr) { assert(thr->wait_next == 0); free(thr->bits); thr->bits = 0; if (thr->child) thr->child->parent = thr->parent; if (thr->parent) thr->parent->child = thr->child; thr->child = 0; thr->parent = 0; thr->scope_next->scope_prev = thr->scope_prev; thr->scope_prev->scope_next = thr->scope_next; thr->pc = 0; /* If this thread is not scheduled, then is it safe to delete it now. Otherwise, let the schedule event (which will execute the thread at of_ZOMBIE) delete the object. */ if (thr->is_scheduled == 0) delete thr; } void vthread_mark_scheduled(vthread_t thr) { assert(thr->is_scheduled == 0); thr->is_scheduled = 1; } /* * This function runs a thread by fetching an instruction, * incrementing the PC, and executing the instruction. */ void vthread_run(vthread_t thr) { assert(thr->is_scheduled); thr->is_scheduled = 0; for (;;) { vvp_code_t cp = codespace_index(thr->pc); thr->pc += 1; assert(cp); assert(cp->opcode); /* Run the opcode implementation. If the execution of the opcode returns false, then the thread is meant to be paused, so break out of the loop. */ bool rc = (cp->opcode)(thr, cp); if (rc == false) return; } } /* * This is called by an event functor to wake up all the threads on * its list. I in fact created that list in the %wait instruction, and * I also am certain that the waiting_for_event flag is set. */ void vthread_schedule_list(vthread_t thr) { while (thr) { vthread_t tmp = thr; thr = thr->wait_next; assert(tmp->waiting_for_event); tmp->waiting_for_event = 0; tmp->wait_next = 0; schedule_vthread(tmp, 0); } } bool of_AND(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); unsigned idx1 = cp->bit_idx[0]; unsigned idx2 = cp->bit_idx[1]; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned lb = thr_get_bit(thr, idx1); unsigned rb = thr_get_bit(thr, idx2); if ((lb == 0) || (rb == 0)) { thr_put_bit(thr, idx1, 0); } else if ((lb == 1) && (rb == 1)) { thr_put_bit(thr, idx1, 1); } else { thr_put_bit(thr, idx1, 2); } idx1 += 1; if (idx2 >= 4) idx2 += 1; } return true; } bool of_ADD(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); unsigned long*lva = vector_to_array(thr, cp->bit_idx[0], cp->number); unsigned long*lvb = vector_to_array(thr, cp->bit_idx[1], cp->number); if (lva == 0 || lvb == 0) goto x_out; unsigned long carry; carry = 0; for (unsigned idx = 0 ; (idx*CPU_WORD_BITS) < cp->number ; idx += 1) { unsigned long tmp = lvb[idx] + carry; unsigned long sum = lva[idx] + tmp; carry = 0; if (tmp < lvb[idx]) carry = 1; if (sum < tmp) carry = 1; if (sum < lva[idx]) carry = 1; lva[idx] = sum; } for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned bit = lva[idx/CPU_WORD_BITS] >> (idx % CPU_WORD_BITS); thr_put_bit(thr, cp->bit_idx[0]+idx, (bit&1) ? 1 : 0); } delete[]lva; delete[]lvb; return true; x_out: delete[]lva; delete[]lvb; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) thr_put_bit(thr, cp->bit_idx[0]+idx, 2); return true; } /* * This is %addi, add-immediate. The first value is a vector, the * second value is the immediate value in the bin_idx[1] position. The * immediate value can be up to 16 bits, which are then padded to the * width of the vector with zero. */ bool of_ADDI(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); unsigned word_count = (cp->number+CPU_WORD_BITS-1)/CPU_WORD_BITS; unsigned long*lva = vector_to_array(thr, cp->bit_idx[0], cp->number); unsigned long*lvb; if (lva == 0) goto x_out; lvb = new unsigned long[word_count]; lvb[0] = cp->bit_idx[1]; for (unsigned idx = 1 ; idx < word_count ; idx += 1) lvb[idx] = 0; unsigned long carry; carry = 0; for (unsigned idx = 0 ; (idx*CPU_WORD_BITS) < cp->number ; idx += 1) { unsigned long tmp = lvb[idx] + carry; unsigned long sum = lva[idx] + tmp; carry = 0; if (tmp < lvb[idx]) carry = 1; if (sum < tmp) carry = 1; if (sum < lva[idx]) carry = 1; lva[idx] = sum; } for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned bit = lva[idx/CPU_WORD_BITS] >> (idx % CPU_WORD_BITS); thr_put_bit(thr, cp->bit_idx[0]+idx, (bit&1) ? 1 : 0); } delete[]lva; delete[]lvb; return true; x_out: delete[]lva; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) thr_put_bit(thr, cp->bit_idx[0]+idx, 2); return true; } bool of_ASSIGN(vthread_t thr, vvp_code_t cp) { unsigned char bit_val = thr_get_bit(thr, cp->bit_idx[1]); schedule_assign(cp->iptr, bit_val, cp->bit_idx[0]); return true; } bool of_ASSIGN_D(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] < 4); unsigned char bit_val = thr_get_bit(thr, cp->bit_idx[1]); schedule_assign(cp->iptr, bit_val, thr->index[cp->bit_idx[0]]); return true; } bool of_ASSIGN_X0(vthread_t thr, vvp_code_t cp) { unsigned char bit_val = thr_get_bit(thr, cp->bit_idx[1]); vvp_ipoint_t itmp = ipoint_index(cp->iptr, thr->index[0]); schedule_assign(itmp, bit_val, cp->bit_idx[0]); return true; } bool of_ASSIGN_MEM(vthread_t thr, vvp_code_t cp) { unsigned char bit_val = thr_get_bit(thr, cp->bit_idx[1]); schedule_memory(cp->mem, thr->index[3], bit_val, cp->bit_idx[0]); return true; } bool of_BLEND(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); unsigned idx1 = cp->bit_idx[0]; unsigned idx2 = cp->bit_idx[1]; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned lb = thr_get_bit(thr, idx1); unsigned rb = thr_get_bit(thr, idx2); if (lb != rb) thr_put_bit(thr, idx1, 2); idx1 += 1; if (idx2 >= 4) idx2 += 1; } return true; } bool of_BREAKPOINT(vthread_t thr, vvp_code_t cp) { #if defined(WITH_DEBUG) breakpoint(); #endif return true; } bool of_CMPS(vthread_t thr, vvp_code_t cp) { unsigned eq = 1; unsigned eeq = 1; unsigned lt = 0; unsigned idx1 = cp->bit_idx[0]; unsigned idx2 = cp->bit_idx[1]; unsigned end1 = (idx1 < 4)? idx1 : idx1 + cp->number - 1; unsigned end2 = (idx2 < 4)? idx2 : idx2 + cp->number - 1; unsigned sig1 = thr_get_bit(thr, end1); unsigned sig2 = thr_get_bit(thr, end2); for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned lv = thr_get_bit(thr, idx1); unsigned rv = thr_get_bit(thr, idx2); if (lv > rv) { lt = 0; eeq = 0; } else if (lv < rv) { lt = 1; eeq = 0; } if (eq != 2) { if ((lv == 0) && (rv != 0)) eq = 0; if ((lv == 1) && (rv != 1)) eq = 0; if ((lv | rv) >= 2) eq = 2; } if (idx1 >= 4) idx1 += 1; if (idx2 >= 4) idx2 += 1; } if (eq == 2) lt = 2; else if ((sig1 == 1) && (sig2 == 0)) lt = 1; else if ((sig1 == 0) && (sig2 == 1)) lt = 0; /* Correct the lt bit to account for the sign of the parameters. */ if (lt < 2) { sig1 = thr_get_bit(thr, end1); sig2 = thr_get_bit(thr, end2); /* If both numbers are negative, then switch the direction of the lt. */ if ((sig1 == 1) && (sig2 == 1) && (eq != 0)) lt ^= 1; /* If the first is negative and the last positive, then a < b for certain. */ if ((sig1 == 1) && (sig2 == 0)) lt = 1; /* If the first is positive and the last negative, then a > b for certain. */ if ((sig1 == 0) && (sig2 == 1)) lt = 0; } thr_put_bit(thr, 4, eq); thr_put_bit(thr, 5, lt); thr_put_bit(thr, 6, eeq); return true; } bool of_CMPIU(vthread_t thr, vvp_code_t cp) { unsigned eq = 1; unsigned eeq = 1; unsigned lt = 0; unsigned idx1 = cp->bit_idx[0]; unsigned imm = cp->bit_idx[1]; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned lv = thr_get_bit(thr, idx1); unsigned rv = imm & 1; imm >>= 1; if (lv > rv) { lt = 0; eeq = 0; } else if (lv < rv) { lt = 1; eeq = 0; } if (eq != 2) { if ((lv == 0) && (rv != 0)) eq = 0; if ((lv == 1) && (rv != 1)) eq = 0; if ((lv | rv) >= 2) eq = 2; } if (idx1 >= 4) idx1 += 1; } if (eq == 2) lt = 2; thr_put_bit(thr, 4, eq); thr_put_bit(thr, 5, lt); thr_put_bit(thr, 6, eeq); return true; } bool of_CMPU(vthread_t thr, vvp_code_t cp) { unsigned eq = 1; unsigned eeq = 1; unsigned lt = 0; unsigned idx1 = cp->bit_idx[0]; unsigned idx2 = cp->bit_idx[1]; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned lv = thr_get_bit(thr, idx1); unsigned rv = thr_get_bit(thr, idx2); if (lv > rv) { lt = 0; eeq = 0; } else if (lv < rv) { lt = 1; eeq = 0; } if (eq != 2) { if ((lv == 0) && (rv != 0)) eq = 0; if ((lv == 1) && (rv != 1)) eq = 0; if ((lv | rv) >= 2) eq = 2; } if (idx1 >= 4) idx1 += 1; if (idx2 >= 4) idx2 += 1; } if (eq == 2) lt = 2; thr_put_bit(thr, 4, eq); thr_put_bit(thr, 5, lt); thr_put_bit(thr, 6, eeq); return true; } bool of_CMPX(vthread_t thr, vvp_code_t cp) { unsigned eq = 1; unsigned idx1 = cp->bit_idx[0]; unsigned idx2 = cp->bit_idx[1]; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned lv = thr_get_bit(thr, idx1); unsigned rv = thr_get_bit(thr, idx2); if ((lv < 2) && (rv < 2) && (lv != rv)) { eq = 0; break; } if (idx1 >= 4) idx1 += 1; if (idx2 >= 4) idx2 += 1; } thr_put_bit(thr, 4, eq); return true; } bool of_CMPZ(vthread_t thr, vvp_code_t cp) { unsigned eq = 1; unsigned idx1 = cp->bit_idx[0]; unsigned idx2 = cp->bit_idx[1]; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned lv = thr_get_bit(thr, idx1); unsigned rv = thr_get_bit(thr, idx2); if ((lv < 3) && (rv < 3) && (lv != rv)) { eq = 0; break; } if (idx1 >= 4) idx1 += 1; if (idx2 >= 4) idx2 += 1; } thr_put_bit(thr, 4, eq); return true; } bool of_DELAY(vthread_t thr, vvp_code_t cp) { //printf("thread %p: %%delay %lu\n", thr, cp->number); schedule_vthread(thr, cp->number); return false; } bool of_DELAYX(vthread_t thr, vvp_code_t cp) { unsigned long delay; assert(cp->number < 4); delay = thr->index[cp->number]; schedule_vthread(thr, delay); return false; } /* * Implement the %disable instruction by scanning the target scope for * all the target threads. Kill the target threads and wake up a * parent that is attempting a %join. * * XXXX BUG BUG! * The scheduler probably still has a pointer to me, and this reaping * will destroy this object. The result: dangling pointer. */ bool of_DISABLE(vthread_t thr, vvp_code_t cp) { struct __vpiScope*scope = (struct __vpiScope*)cp->handle; if (scope->threads == 0) return true; struct vthread_s*head = scope->threads; bool disabled_myself_flag = false; while (head->scope_next != head) { vthread_t tmp = head->scope_next; /* Pull the target thread out of the scope. */ tmp->scope_next->scope_prev = tmp->scope_prev; tmp->scope_prev->scope_next = tmp->scope_next; /* XXXX I don't support disabling threads with children. */ assert(tmp->child == 0); /* XXXX Don't know how to disable waiting threads. */ assert(tmp->waiting_for_event == 0); /* If I am disabling myself, that remember that fact so that I can finish this statement differently. */ if (tmp == thr) disabled_myself_flag = true; tmp->pc = 0; tmp->i_have_ended = 1; if (tmp->schedule_parent_on_end) { /* If a parent is waiting in a %join, wake it up. */ assert(tmp->parent); schedule_vthread(tmp->parent, 0, true); vthread_reap(tmp); } else if (tmp->parent) { /* If the parent is yet to %join me, let its %join do the reaping. */ //assert(tmp->is_scheduled == 0); } else { /* No parent at all. Goodby. */ vthread_reap(tmp); } } return ! disabled_myself_flag; } static void divide_bits(unsigned len, unsigned char*lbits, const unsigned char*rbits) { unsigned char *a, *b, *z, *t; a = new unsigned char[len+1]; b = new unsigned char[len+1]; z = new unsigned char[len+1]; t = new unsigned char[len+1]; unsigned char carry; unsigned char temp; int mxa = -1, mxz = -1; int i; int current, copylen; for (unsigned idx = 0 ; idx < len ; idx += 1) { unsigned lb = lbits[idx]; unsigned rb = rbits[idx]; z[idx]=lb; a[idx]=1-rb; // for 2s complement add.. } z[len]=0; a[len]=1; for(i=0;i<(int)len+1;i++) { b[i]=0; } for(i=len-1;i>=0;i--) { if(!a[i]) { mxa=i; break; } } for(i=len-1;i>=0;i--) { if(z[i]) { mxz=i; break; } } if((mxa>mxz)||(mxa==-1)) { if(mxa==-1) { fprintf(stderr, "Division By Zero error, exiting.\n"); exit(255); } goto tally; } copylen = mxa + 2; current = mxz - mxa; while(current > -1) { carry = 1; for(i=0;i>1); } if(carry) { for(i=0;ibit_idx[0] >= 4); if(cp->number <= 8*sizeof(unsigned long)) { unsigned idx1 = cp->bit_idx[0]; unsigned idx2 = cp->bit_idx[1]; unsigned long lv = 0, rv = 0; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned lb = thr_get_bit(thr, idx1); unsigned rb = thr_get_bit(thr, idx2); if ((lb | rb) & 2) goto x_out; lv |= lb << idx; rv |= rb << idx; idx1 += 1; if (idx2 >= 4) idx2 += 1; } if (rv == 0) goto x_out; lv /= rv; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { thr_put_bit(thr, cp->bit_idx[0]+idx, (lv&1) ? 1 : 0); lv >>= 1; } return true; } else { /* Make a string of the bits of the numbers to be divided. Then divide them, and write the results into the thread. */ unsigned char*lbits = new unsigned char[cp->number]; unsigned char*rbits = new unsigned char[cp->number]; unsigned idx1 = cp->bit_idx[0]; unsigned idx2 = cp->bit_idx[1]; bool rval_is_zero = true; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { lbits[idx] = thr_get_bit(thr, idx1); rbits[idx] = thr_get_bit(thr, idx2); if ((lbits[idx] | rbits[idx]) > 1) { delete[]lbits; delete[]rbits; goto x_out; } if (rbits[idx] != 0) rval_is_zero = false; idx1 += 1; if (idx2 >= 4) idx2 += 1; } /* Notice the special case of divide by 0. */ if (rval_is_zero) { delete[]lbits; delete[]rbits; goto x_out; } divide_bits(cp->number, lbits, rbits); for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { thr_put_bit(thr, cp->bit_idx[0]+idx, lbits[idx]); } delete[]lbits; delete[]rbits; return true; } x_out: for (unsigned idx = 0 ; idx < cp->number ; idx += 1) thr_put_bit(thr, cp->bit_idx[0]+idx, 2); return true; } static void negate_bits(unsigned len, unsigned char*bits) { unsigned char carry = 1; for (unsigned idx = 0 ; idx < len ; idx += 1) { carry += bits[idx]? 0 : 1; bits[idx] = carry & 1; carry >>= 1; } } bool of_DIV_S(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); if(cp->number <= 8*sizeof(long)) { unsigned idx1 = cp->bit_idx[0]; unsigned idx2 = cp->bit_idx[1]; long lv = 0, rv = 0; unsigned lb = 0; unsigned rb = 0; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { lb = thr_get_bit(thr, idx1); rb = thr_get_bit(thr, idx2); if ((lb | rb) & 2) goto x_out; lv |= (long)lb << idx; rv |= (long)rb << idx; idx1 += 1; if (idx2 >= 4) idx2 += 1; } /* Extend the sign to fill the native long. */ for (unsigned idx = cp->number; idx < (8*sizeof lv); idx += 1) { lv |= (long)lb << idx; rv |= (long)rb << idx; } if (rv == 0) goto x_out; lv /= rv; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { thr_put_bit(thr, cp->bit_idx[0]+idx, (lv&1) ? 1 : 0); lv >>= 1; } } else { unsigned char*lbits = new unsigned char[cp->number]; unsigned char*rbits = new unsigned char[cp->number]; unsigned idx1 = cp->bit_idx[0]; unsigned idx2 = cp->bit_idx[1]; bool rval_is_zero = true; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { lbits[idx] = thr_get_bit(thr, idx1); rbits[idx] = thr_get_bit(thr, idx2); if ((lbits[idx] | rbits[idx]) > 1) { delete[]lbits; delete[]rbits; goto x_out; } if (rbits[idx] != 0) rval_is_zero = false; idx1 += 1; if (idx2 >= 4) idx2 += 1; } /* Notice the special case of divide by 0. */ if (rval_is_zero) { delete[]lbits; delete[]rbits; goto x_out; } /* Signed division is unsigned division on the absolute values of the operands, then corrected for the number of signs. */ unsigned sign_flag = 0; if (lbits[cp->number-1]) { sign_flag += 1; negate_bits(cp->number, lbits); } if (rbits[cp->number-1]) { sign_flag += 1; negate_bits(cp->number, rbits); } divide_bits(cp->number, lbits, rbits); if (sign_flag & 1) { negate_bits(cp->number, lbits); } for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { thr_put_bit(thr, cp->bit_idx[0]+idx, lbits[idx]); } delete[]lbits; delete[]rbits; } return true; x_out: for (unsigned idx = 0 ; idx < cp->number ; idx += 1) thr_put_bit(thr, cp->bit_idx[0]+idx, 2); return true; } /* * This terminates the current thread. If there is a parent who is * waiting for me to die, then I schedule it. At any rate, I mark * myself as a zombie by setting my pc to 0. * * It is possible for this thread to have children at this %end. This * means that my child is really my sibling created by my parent, and * my parent will do the proper %joins in due course. For example: * * %fork child_1, test; * %fork child_2, test; * ... parent code ... * %join; * %join; * %end; * * child_1 ; * %end; * child_2 ; * %end; * * In this example, the main thread creates threads child_1 and * child_2. It is possible that this thread is child_2, so there is a * parent pointer and a child pointer, even though I did no * %forks or %joins. This means that I have a ->child pointer and a * ->parent pointer. * * If the main thread has executed the first %join, then it is waiting * for me, and I will be reaped right away. * * If the main thread has not executed a %join yet, then this thread * becomes a zombie. The main thread executes its %join eventually, * reaping me at that time. * * It does not matter the order that child_1 and child_2 threads call * %end -- child_2 will be reaped by the first %join, and child_1 will * be reaped by the second %join. */ bool of_END(vthread_t thr, vvp_code_t) { assert(! thr->waiting_for_event); thr->i_have_ended = 1; thr->pc = 0; /* If I have a parent who is waiting for me, then mark that I have ended, and schedule that parent. Also, finish the %join for the parent. */ if (thr->schedule_parent_on_end) { assert(thr->parent); schedule_vthread(thr->parent, 0, true); vthread_reap(thr); return false; } /* If I have no parents, then no one can %join me and there is no reason to stick around. This can happen, for example if I am an ``initial'' thread. If I have children at this point, then I must have been the main thread (there is no other parent) and an error (not enough %joins) has been detected. */ if (thr->parent == 0) { assert(thr->child == 0); vthread_reap(thr); return false; } /* If I make it this far, then I have a parent who may wish to %join me. Remain a zombie so that it can. */ return false; } /* * The %fork instruction causes a new child to be created and pushed * in front of any existing child. This causes the new child to be the * parent of any previous children, and for me to be the parent of the * new child. */ bool of_FORK(vthread_t thr, vvp_code_t cp) { vthread_t child = vthread_new(cp->cptr2, cp->scope); child->child = thr->child; child->parent = thr; thr->child = child; if (child->child) { assert(child->child->parent == thr); child->child->parent = child; } schedule_vthread(child, 0, true); return true; } bool of_INV(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); for (unsigned idx = 0 ; idx < cp->bit_idx[1] ; idx += 1) { unsigned val = thr_get_bit(thr, cp->bit_idx[0]+idx); switch (val) { case 0: val = 1; break; case 1: val = 0; break; default: val = 2; break; } thr_put_bit(thr, cp->bit_idx[0]+idx, val); } return true; } /* ** Index registers, unsigned arithmetic. */ bool of_IX_ADD(vthread_t thr, vvp_code_t cp) { thr->index[cp->bit_idx[0] & 3] += cp->number; return true; } bool of_IX_SUB(vthread_t thr, vvp_code_t cp) { thr->index[cp->bit_idx[0] & 3] -= cp->number; return true; } bool of_IX_MUL(vthread_t thr, vvp_code_t cp) { thr->index[cp->bit_idx[0] & 3] *= cp->number; return true; } bool of_IX_LOAD(vthread_t thr, vvp_code_t cp) { thr->index[cp->bit_idx[0] & 3] = cp->number; return true; } /* * Load a vector into an index register. The format of the * opcode is: * * %ix/get , , * * where is the index register, is the base of the * vector and is the width in bits. * * Index registers only hold binary values, so if any of the * bits of the vector are x or z, then set the value to 0, * set bit[4] to 1, and give up. */ bool of_IX_GET(vthread_t thr, vvp_code_t cp) { unsigned long v = 0; bool unknown_flag = false; for (unsigned i = 0; inumber; i++) { unsigned char vv = thr_get_bit(thr, cp->bit_idx[1] + i); if (vv&2) { v = 0UL; unknown_flag = true; break; } v |= vv << i; } thr->index[cp->bit_idx[0] & 3] = v; /* Set bit 4 as a flag if the input is unknown. */ thr_put_bit(thr, 4, unknown_flag? 1 : 0); return true; } /* * The various JMP instruction work simply by pulling the new program * counter from the instruction and resuming. If the jump is * conditional, then test the bit for the expected value first. */ bool of_JMP(vthread_t thr, vvp_code_t cp) { thr->pc = cp->cptr; return true; } bool of_JMP0(vthread_t thr, vvp_code_t cp) { if (thr_get_bit(thr, cp->bit_idx[0]) == 0) thr->pc = cp->cptr; return true; } bool of_JMP0XZ(vthread_t thr, vvp_code_t cp) { if (thr_get_bit(thr, cp->bit_idx[0]) != 1) thr->pc = cp->cptr; return true; } bool of_JMP1(vthread_t thr, vvp_code_t cp) { if (thr_get_bit(thr, cp->bit_idx[0]) == 1) thr->pc = cp->cptr; return true; } /* * The %join instruction causes the thread to wait for the one and * only child to die. If it is already dead (and a zombie) then I * reap it and go on. Otherwise, I tell the child that I am ready for * it to die, and it will reschedule me when it does. */ bool of_JOIN(vthread_t thr, vvp_code_t cp) { assert(thr->child); assert(thr->child->parent == thr); /* If the child has already ended, reap it now. */ if (thr->child->i_have_ended) { vthread_reap(thr->child); return true; } /* Otherwise, I get to start waiting. */ thr->child->schedule_parent_on_end = 1; return false; } bool of_LOAD(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); thr_put_bit(thr, cp->bit_idx[0], functor_get(cp->iptr)); return true; } bool of_LOAD_MEM(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); unsigned char val = memory_get(cp->mem, thr->index[3]); thr_put_bit(thr, cp->bit_idx[0], val); return true; } bool of_LOAD_X(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); assert(cp->bit_idx[1] < 4); vvp_ipoint_t ptr = ipoint_index(cp->iptr, thr->index[cp->bit_idx[1]]); thr_put_bit(thr, cp->bit_idx[0], functor_get(ptr)); return true; } bool of_MOD(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); if(cp->number <= 8*sizeof(unsigned long)) { unsigned idx1 = cp->bit_idx[0]; unsigned idx2 = cp->bit_idx[1]; unsigned long lv = 0, rv = 0; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned lb = thr_get_bit(thr, idx1); unsigned rb = thr_get_bit(thr, idx2); if ((lb | rb) & 2) goto x_out; lv |= lb << idx; rv |= rb << idx; idx1 += 1; if (idx2 >= 4) idx2 += 1; } if (rv == 0) goto x_out; lv %= rv; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { thr_put_bit(thr, cp->bit_idx[0]+idx, (lv&1) ? 1 : 0); lv >>= 1; } return true; } else { int len=cp->number; unsigned char *a, *z, *t; a = new unsigned char[len+1]; z = new unsigned char[len+1]; t = new unsigned char[len+1]; unsigned char carry; unsigned char temp; int mxa = -1, mxz = -1; int i; int current, copylen; unsigned idx1 = cp->bit_idx[0]; unsigned idx2 = cp->bit_idx[1]; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned lb = thr_get_bit(thr, idx1); unsigned rb = thr_get_bit(thr, idx2); if ((lb | rb) & 2) { delete []t; delete []z; delete []a; goto x_out; } z[idx]=lb; a[idx]=1-rb; // for 2s complement add.. idx1 += 1; if (idx2 >= 4) idx2 += 1; } z[len]=0; a[len]=1; for(i=len-1;i>=0;i--) { if(!a[i]) { mxa=i; break; } } for(i=len-1;i>=0;i--) { if(z[i]) { mxz=i; break; } } if((mxa>mxz)||(mxa==-1)) { if(mxa==-1) { delete []t; delete []z; delete []a; goto x_out; } goto tally; } copylen = mxa + 2; current = mxz - mxa; while(current > -1) { carry = 1; for(i=0;i>1); } if(carry) { for(i=0;inumber ; idx += 1) { thr_put_bit(thr, cp->bit_idx[0]+idx, z[idx]); } delete []t; delete []z; delete []a; return true; } x_out: for (unsigned idx = 0 ; idx < cp->number ; idx += 1) thr_put_bit(thr, cp->bit_idx[0]+idx, 2); return true; } /* * %mov , , * This instruction is implemented by the of_MOV function * below. However, durning runtime vvp might notice that the * parameters have certain properties that make it possible to * replace the of_MOV opcode with a more specific instruction that * more directly does the job. All the of_MOV*_ functions are * functions that of_MOV might use to replace itself. */ static bool of_MOV0_a_(vthread_t thr, vvp_code_t cp) { if ((cp->bit_idx[0]+cp->number) > thr->nbits) thr_check_addr(thr, cp->bit_idx[0]+cp->number-1); for (unsigned idx = 0 ; idx < cp->number ; idx += 1) thr_clr_bit_(thr, cp->bit_idx[0]+idx); return true; } static bool of_MOV0_b_(vthread_t thr, vvp_code_t cp) { if (cp->bit_idx[1] >= thr->nbits) thr_check_addr(thr, cp->bit_idx[1]); thr->bits[cp->bit_idx[0]] &= cp->number; return true; } static bool of_MOV1XZ_(vthread_t thr, vvp_code_t cp) { for (unsigned idx = 0 ; idx < cp->number ; idx += 1) thr_put_bit(thr, cp->bit_idx[0]+idx, cp->bit_idx[1]); return true; } static bool of_MOV_(vthread_t thr, vvp_code_t cp) { for (unsigned idx = 0 ; idx < cp->number ; idx += 1) thr_put_bit(thr, cp->bit_idx[0]+idx, thr_get_bit(thr, cp->bit_idx[1]+idx)); return true; } bool of_MOV(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); if (cp->bit_idx[1] >= 4) { cp->opcode = &of_MOV_; return cp->opcode(thr, cp); } else if (cp->bit_idx[1] == 0) { /* Detect the special case where this is really just a large clear. Rewrite the instruction to skip this test next time around, and use a precoded opcode. */ unsigned test_addr = cp->bit_idx[0] + cp->number - 1; unsigned addr1 = cp->bit_idx[0] / (CPU_WORD_BITS/2); unsigned addr2 = (test_addr) / (CPU_WORD_BITS/2); if (addr1 == addr2) { unsigned sh1 = cp->bit_idx[0] % (CPU_WORD_BITS/2); unsigned sh2 = (test_addr+1) % (CPU_WORD_BITS/2); unsigned long mask = ULONG_MAX << ((sh2 - sh1) * 2UL); mask = (~mask) << sh1*2UL; cp->number = ~mask; cp->bit_idx[0] = addr1; cp->bit_idx[1] = test_addr; cp->opcode = &of_MOV0_b_; return cp->opcode(thr, cp); } else { cp->opcode = &of_MOV0_a_; return cp->opcode(thr, cp); } } else { cp->opcode = &of_MOV1XZ_; return cp->opcode(thr, cp); } return true; } bool of_MUL(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); if(cp->number <= 8*sizeof(unsigned long)) { unsigned idx1 = cp->bit_idx[0]; unsigned idx2 = cp->bit_idx[1]; unsigned long lv = 0, rv = 0; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned lb = thr_get_bit(thr, idx1); unsigned rb = thr_get_bit(thr, idx2); if ((lb | rb) & 2) goto x_out; lv |= lb << idx; rv |= rb << idx; idx1 += 1; if (idx2 >= 4) idx2 += 1; } lv *= rv; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { thr_put_bit(thr, cp->bit_idx[0]+idx, (lv&1) ? 1 : 0); lv >>= 1; } return true; } else { unsigned idx1 = cp->bit_idx[0]; unsigned idx2 = cp->bit_idx[1]; unsigned char *a, *b, *sum; a = new unsigned char[cp->number]; b = new unsigned char[cp->number]; sum = new unsigned char[cp->number]; int mxa = -1; int mxb = -1; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned lb = thr_get_bit(thr, idx1); unsigned rb = thr_get_bit(thr, idx2); if ((lb | rb) & 2) { delete[]sum; delete[]b; delete[]a; goto x_out; } if((a[idx] = lb)) mxa=idx+1; if((b[idx] = rb)) mxb=idx; sum[idx]=0; idx1 += 1; if (idx2 >= 4) idx2 += 1; } // do "unsigned ZZ sum = a * b" the hard way.. for(int i=0;i<=mxb;i++) { if(b[i]) { unsigned char carry=0; unsigned char temp; for(int j=0;j<=mxa;j++) { if(i+j>=(int)cp->number) break; temp=sum[i+j]+a[j]+carry; sum[i+j]=(temp&1); carry=(temp>>1); } } } for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { thr_put_bit(thr, cp->bit_idx[0]+idx, sum[idx]); } delete[]sum; delete[]b; delete[]a; return true; } x_out: for (unsigned idx = 0 ; idx < cp->number ; idx += 1) thr_put_bit(thr, cp->bit_idx[0]+idx, 2); return true; } bool of_MULI(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); /* If the value fits into a native unsigned long, then make an unsigned long variable with the numbers, to a native multiply, and work with that. */ if(cp->number <= 8*sizeof(unsigned long)) { unsigned idx1 = cp->bit_idx[0]; unsigned long lv = 0, rv = cp->bit_idx[1]; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned lb = thr_get_bit(thr, idx1); if (lb & 2) goto x_out; lv |= lb << idx; idx1 += 1; } lv *= rv; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { thr_put_bit(thr, cp->bit_idx[0]+idx, (lv&1) ? 1 : 0); lv >>= 1; } return true; } /* number is too large for local long, so do bitwise multiply. */ unsigned idx1; idx1 = cp->bit_idx[0]; unsigned imm; imm = cp->bit_idx[1]; unsigned char *a, *b, *sum; a = new unsigned char[cp->number]; b = new unsigned char[cp->number]; sum = new unsigned char[cp->number]; int mxa; mxa = -1; int mxb; mxb = -1; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned lb = thr_get_bit(thr, idx1); unsigned rb = imm & 1; imm >>= 1; if (lb & 2) { delete[]sum; delete[]b; delete[]a; goto x_out; } if((a[idx] = lb)) mxa=idx+1; if((b[idx] = rb)) mxb=idx; sum[idx]=0; idx1 += 1; } // do "unsigned ZZ sum = a * b" the hard way.. for(int i=0;i<=mxb;i++) { if(b[i]) { unsigned char carry=0; unsigned char temp; for(int j=0;j<=mxa;j++) { if(i+j>=(int)cp->number) break; temp=sum[i+j]+a[j]+carry; sum[i+j]=(temp&1); carry=(temp>>1); } } } for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { thr_put_bit(thr, cp->bit_idx[0]+idx, sum[idx]); } delete[]sum; delete[]b; delete[]a; return true; x_out: for (unsigned idx = 0 ; idx < cp->number ; idx += 1) thr_put_bit(thr, cp->bit_idx[0]+idx, 2); return true; } bool of_NOOP(vthread_t thr, vvp_code_t cp) { return true; } bool of_NORR(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); unsigned lb = 1; unsigned idx2 = cp->bit_idx[1]; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned rb = thr_get_bit(thr, idx2+idx); if (rb == 1) { lb = 0; break; } if (rb != 0) lb = 2; } thr_put_bit(thr, cp->bit_idx[0], lb); return true; } bool of_ANDR(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); unsigned lb = 1; unsigned idx2 = cp->bit_idx[1]; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned rb = thr_get_bit(thr, idx2+idx); if (rb == 0) { lb = 0; break; } if (rb != 1) lb = 2; } thr_put_bit(thr, cp->bit_idx[0], lb); return true; } bool of_NANDR(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); unsigned lb = 0; unsigned idx2 = cp->bit_idx[1]; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned rb = thr_get_bit(thr, idx2+idx); if (rb == 0) { lb = 1; break; } if (rb != 1) lb = 2; } thr_put_bit(thr, cp->bit_idx[0], lb); return true; } bool of_ORR(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); unsigned lb = 0; unsigned idx2 = cp->bit_idx[1]; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned rb = thr_get_bit(thr, idx2+idx); if (rb == 1) { lb = 1; break; } if (rb != 0) lb = 2; } thr_put_bit(thr, cp->bit_idx[0], lb); return true; } bool of_XORR(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); unsigned lb = 0; unsigned idx2 = cp->bit_idx[1]; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned rb = thr_get_bit(thr, idx2+idx); if (rb == 1) lb ^= 1; else if (rb != 0) { lb = 2; break; } } thr_put_bit(thr, cp->bit_idx[0], lb); return true; } bool of_XNORR(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); unsigned lb = 1; unsigned idx2 = cp->bit_idx[1]; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned rb = thr_get_bit(thr, idx2+idx); if (rb == 1) lb ^= 1; else if (rb != 0) { lb = 2; break; } } thr_put_bit(thr, cp->bit_idx[0], lb); return true; } bool of_OR(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); unsigned idx1 = cp->bit_idx[0]; unsigned idx2 = cp->bit_idx[1]; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned lb = thr_get_bit(thr, idx1); unsigned rb = thr_get_bit(thr, idx2); if ((lb == 1) || (rb == 1)) { thr_put_bit(thr, idx1, 1); } else if ((lb == 0) && (rb == 0)) { thr_put_bit(thr, idx1, 0); } else { thr_put_bit(thr, idx1, 2); } idx1 += 1; if (idx2 >= 4) idx2 += 1; } return true; } static const unsigned char strong_values[4] = {St0, St1, StX, HiZ}; bool of_SET(vthread_t thr, vvp_code_t cp) { unsigned char bit_val = thr_get_bit(thr, cp->bit_idx[0]); functor_set(cp->iptr, bit_val, strong_values[bit_val], true); return true; } bool of_SET_MEM(vthread_t thr, vvp_code_t cp) { unsigned char val = thr_get_bit(thr, cp->bit_idx[0]); memory_set(cp->mem, thr->index[3], val); return true; } /* * Implement the %set/x instruction: * * %set/x , , * * The single bit goes into the indexed functor. Abort the instruction * if the index is <0. */ bool of_SET_X(vthread_t thr, vvp_code_t cp) { unsigned char bit_val = thr_get_bit(thr, cp->bit_idx[0]); long idx = thr->index[cp->bit_idx[1]&3]; /* If idx < 0, then the index value is probably generated from an undefined value. At any rate, this is defined to have no effect so quit now. */ if (idx < 0) { return true; } /* Form the functor pointer from the base pointer and the index from the index register. */ vvp_ipoint_t itmp = ipoint_index(cp->iptr, idx); /* Set the value. */ functor_set(itmp, bit_val, strong_values[bit_val], true); return true; } bool of_SHIFTL_I0(vthread_t thr, vvp_code_t cp) { unsigned base = cp->bit_idx[0]; unsigned wid = cp->number; unsigned long shift = thr->index[0]; if (shift >= wid) { for (unsigned idx = 0 ; idx < wid ; idx += 1) thr_put_bit(thr, base+idx, 0); } else if (shift > 0) { for (unsigned idx = wid ; idx > shift ; idx -= 1) { unsigned src = base+idx-shift-1; unsigned dst = base + idx - 1; thr_put_bit(thr, dst, thr_get_bit(thr, src)); } for (unsigned idx = 0 ; idx < shift ; idx += 1) thr_put_bit(thr, base+idx, 0); } return true; } /* * This is an unsigned right shift. */ bool of_SHIFTR_I0(vthread_t thr, vvp_code_t cp) { unsigned base = cp->bit_idx[0]; unsigned wid = cp->number; unsigned long shift = thr->index[0]; if (shift >= wid) { for (unsigned idx = 0 ; idx < wid ; idx += 1) thr_put_bit(thr, base+idx, 0); } else if (shift > 0) { for (unsigned idx = 0 ; idx < (wid-shift) ; idx += 1) { unsigned src = base + idx + shift; unsigned dst = base + idx; thr_put_bit(thr, dst, thr_get_bit(thr, src)); } for (unsigned idx = (wid-shift) ; idx < wid ; idx += 1) thr_put_bit(thr, base+idx, 0); } return true; } bool of_SUB(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); unsigned long*lva = vector_to_array(thr, cp->bit_idx[0], cp->number); unsigned long*lvb = vector_to_array(thr, cp->bit_idx[1], cp->number); if (lva == 0 || lvb == 0) goto x_out; unsigned carry; carry = 1; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned long tmp; unsigned sum = carry; tmp = lva[idx/CPU_WORD_BITS]; sum += 1 & (tmp >> (idx%CPU_WORD_BITS)); tmp = lvb[idx/CPU_WORD_BITS]; sum += 1 & ~(tmp >> (idx%CPU_WORD_BITS)); carry = sum / 2; thr_put_bit(thr, cp->bit_idx[0]+idx, (sum&1) ? 1 : 0); } delete[]lva; delete[]lvb; return true; x_out: delete[]lva; delete[]lvb; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) thr_put_bit(thr, cp->bit_idx[0]+idx, 2); return true; } bool of_VPI_CALL(vthread_t thr, vvp_code_t cp) { // printf("thread %p: %%vpi_call\n", thr); vpip_execute_vpi_call(thr, cp->handle); return schedule_finished()? false : true; } /* * Implement the wait by locating the functor for the event, and * adding this thread to the threads list for the event. */ bool of_WAIT(vthread_t thr, vvp_code_t cp) { assert(! thr->waiting_for_event); thr->waiting_for_event = 1; waitable_hooks_s* ep = dynamic_cast(functor_index(cp->iptr)); assert(ep); thr->wait_next = ep->threads; ep->threads = thr; return false; } bool of_XNOR(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); unsigned idx1 = cp->bit_idx[0]; unsigned idx2 = cp->bit_idx[1]; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned lb = thr_get_bit(thr, idx1); unsigned rb = thr_get_bit(thr, idx2); if ((lb == 1) && (rb == 1)) { thr_put_bit(thr, idx1, 1); } else if ((lb == 0) && (rb == 0)) { thr_put_bit(thr, idx1, 1); } else if ((lb == 1) && (rb == 0)) { thr_put_bit(thr, idx1, 0); } else if ((lb == 0) && (rb == 1)) { thr_put_bit(thr, idx1, 0); } else { thr_put_bit(thr, idx1, 2); } idx1 += 1; if (idx2 >= 4) idx2 += 1; } return true; } bool of_XOR(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); unsigned idx1 = cp->bit_idx[0]; unsigned idx2 = cp->bit_idx[1]; for (unsigned idx = 0 ; idx < cp->number ; idx += 1) { unsigned lb = thr_get_bit(thr, idx1); unsigned rb = thr_get_bit(thr, idx2); if ((lb == 1) && (rb == 1)) { thr_put_bit(thr, idx1, 0); } else if ((lb == 0) && (rb == 0)) { thr_put_bit(thr, idx1, 0); } else if ((lb == 1) && (rb == 0)) { thr_put_bit(thr, idx1, 1); } else if ((lb == 0) && (rb == 1)) { thr_put_bit(thr, idx1, 1); } else { thr_put_bit(thr, idx1, 2); } idx1 += 1; if (idx2 >= 4) idx2 += 1; } return true; } bool of_ZOMBIE(vthread_t thr, vvp_code_t) { thr->pc = 0; if ((thr->parent == 0) && (thr->child == 0)) delete thr; return false; } /* * This is a phantom opcode used to call user defined functions. It is * used in code generated by the .ufunc statement. This instruction * contains a pointer to executable code of the function, and to a * ufunc_core object that has all the port information about the * function. */ bool of_CALL_UFUNC(vthread_t thr, vvp_code_t cp) { /* Copy all the inputs to the ufunc object to the port variables of the function. This copies all the values atomically. */ cp->ufunc_core_ptr->assign_bits_to_ports(); /* Create a temporary thread, and execute it manually. This is necessary so that the previous step (assign_bits_to_ports) and the execution of the function itself are an atomic unit. If it were not, then the inputs might change between setup and execution, causing really bad things to happen. */ vthread_t child = vthread_new(cp->cptr, cp->ufunc_core_ptr->scope()); vthread_mark_scheduled(child); vthread_run(child); /* Now copy the output from the result variable to the output ports of the .ufunc device. */ cp->ufunc_core_ptr->finish_thread(thr); return true; } /* * $Log: vthread.cc,v $ * Revision 1.82 2002/08/27 05:39:57 steve * Fix l-value indexing of memories and vectors so that * an unknown (x) index causes so cell to be addresses. * * Fix tangling of label identifiers in the fork-join * code generator. * * Revision 1.81 2002/08/22 03:38:40 steve * Fix behavioral eval of x?a:b expressions. * * Revision 1.80 2002/08/18 01:05:50 steve * x in index values leads to 0. * * Revision 1.79 2002/08/12 01:35:09 steve * conditional ident string using autoconfig. * * Revision 1.78 2002/06/02 18:55:58 steve * Add %cmpi/u instruction. * * Revision 1.77 2002/05/31 20:04:22 steve * Add the %muli instruction. * * Revision 1.76 2002/05/31 04:09:58 steve * Slight improvement in %mov performance. * * Revision 1.75 2002/05/31 00:05:49 steve * Word oriented bit storage. * * Revision 1.74 2002/05/29 16:29:34 steve * Add %addi, which is faster to simulate. * * Revision 1.73 2002/05/27 00:53:10 steve * Able to disable thread self. * * Revision 1.72 2002/05/24 04:55:13 steve * Detect long division by zero. * * Revision 1.71 2002/05/19 05:18:16 steve * Add callbacks for vpiNamedEvent objects. * * Revision 1.70 2002/05/12 23:44:41 steve * task calls and forks push the thread event in the queue. * * Revision 1.69 2002/04/21 22:29:49 steve * Add the assign/d instruction for computed delays. * * Revision 1.68 2002/04/14 18:41:34 steve * Support signed integer division. * * Revision 1.67 2002/03/18 00:19:34 steve * Add the .ufunc statement. * * Revision 1.66 2002/01/26 02:08:07 steve * Handle x in l-value of set/x * * Revision 1.65 2001/12/31 00:01:16 steve * Account for negatives in cmp/s * * Revision 1.64 2001/11/06 03:07:22 steve * Code rearrange. (Stephan Boettcher) * * Revision 1.63 2001/11/01 03:00:20 steve * Add force/cassign/release/deassign support. (Stephan Boettcher) * * Revision 1.62 2001/10/31 04:27:47 steve * Rewrite the functor type to have fewer functor modes, * and use objects to manage the different types. * (Stephan Boettcher) * * Revision 1.61 2001/10/25 04:19:53 steve * VPI support for callback to return values. * * Revision 1.60 2001/10/23 03:49:13 steve * Fix carry between works for %add instruction. * * Revision 1.59 2001/10/20 23:20:32 steve * Catch and X division by 0. * * Revision 1.58 2001/10/16 01:26:55 steve * Add %div support (Anthony Bybell) * * Revision 1.57 2001/10/14 17:36:19 steve * Forgot to propagate carry. * * Revision 1.56 2001/10/14 16:36:43 steve * Very wide multiplication (Anthony Bybell) * * Revision 1.55 2001/09/15 18:27:05 steve * Make configure detect malloc.h * * Revision 1.54 2001/09/07 23:29:28 steve * Redo of_SUBU in a more obvious algorithm, that * is not significantly slower. Also, clean up the * implementation of %mov from a constant. * * Fix initial clearing of vector by vector_to_array * * Revision 1.53 2001/08/26 22:59:32 steve * Add the assign/x0 and set/x opcodes. * * Revision 1.52 2001/08/08 00:53:50 steve * signed/unsigned warnings? * * Revision 1.51 2001/07/22 00:04:50 steve * Add the load/x instruction for bit selects. * * Revision 1.50 2001/07/20 04:57:00 steve * Fix of_END when a middle thread ends. * * Revision 1.49 2001/07/19 04:40:55 steve * Add support for the delayx opcode. * * Revision 1.48 2001/07/04 04:57:10 steve * Relax limit on behavioral subtraction. * * Revision 1.47 2001/06/30 21:07:26 steve * Support non-const right shift (unsigned). * * Revision 1.46 2001/06/23 18:26:26 steve * Add the %shiftl/i0 instruction. * * Revision 1.45 2001/06/22 00:03:05 steve * Infinitely wide behavioral add. * * Revision 1.44 2001/06/18 01:09:32 steve * More behavioral unary reduction operators. * (Stephan Boettcher) */