From e5381feb85b1d3a711b8205b5b2fff627b5a773d Mon Sep 17 00:00:00 2001 From: Stephen Williams Date: Sun, 2 Dec 2007 08:47:06 -0800 Subject: [PATCH 1/5] Clean up functor counters The functor counters were left over from the v0.8 release. Rework the counters to be relevent to the current state of vvp. Signed-off-by: Stephen Williams --- vvp/logic.cc | 16 +++++--- vvp/main.cc | 95 ++--------------------------------------------- vvp/resolv.cc | 12 +----- vvp/statistics.cc | 29 ++------------- vvp/statistics.h | 27 ++------------ vvp/vvp_net.cc | 6 ++- 6 files changed, 25 insertions(+), 160 deletions(-) diff --git a/vvp/logic.cc b/vvp/logic.cc index 5ffece35b..585ce8e52 100644 --- a/vvp/logic.cc +++ b/vvp/logic.cc @@ -43,7 +43,7 @@ table_functor_s::table_functor_s(truth_t t) : table(t) { - count_functors_table += 1; + count_functors_logic += 1; } table_functor_s::~table_functor_s() @@ -125,6 +125,7 @@ void vvp_fun_boolean_::recv_vec4_pv(vvp_net_ptr_t ptr, const vvp_vector4_t&bit, vvp_fun_and::vvp_fun_and(unsigned wid, bool invert) : vvp_fun_boolean_(wid), invert_(invert) { + count_functors_logic += 1; } vvp_fun_and::~vvp_fun_and() @@ -160,6 +161,7 @@ void vvp_fun_and::run_run() vvp_fun_eeq::vvp_fun_eeq(unsigned wid, bool invert) : vvp_fun_boolean_(wid), invert_(invert) { + count_functors_logic += 1; } vvp_fun_eeq::~vvp_fun_eeq() @@ -188,7 +190,7 @@ void vvp_fun_eeq::run_run() vvp_fun_buf::vvp_fun_buf() { net_ = 0; - count_functors_table += 1; + count_functors_logic += 1; } vvp_fun_buf::~vvp_fun_buf() @@ -227,7 +229,7 @@ void vvp_fun_buf::run_run() vvp_fun_bufz::vvp_fun_bufz() { - count_functors_table += 1; + count_functors_logic += 1; } vvp_fun_bufz::~vvp_fun_bufz() @@ -257,7 +259,7 @@ void vvp_fun_bufz::recv_real(vvp_net_ptr_t ptr, double bit) vvp_fun_muxr::vvp_fun_muxr() : a_(0.0), b_(0.0) { - count_functors_table += 1; + count_functors_logic += 1; select_ = 2; } @@ -333,7 +335,7 @@ void vvp_fun_muxr::recv_real(vvp_net_ptr_t ptr, double bit) vvp_fun_muxz::vvp_fun_muxz(unsigned wid) : a_(wid), b_(wid) { - count_functors_table += 1; + count_functors_logic += 1; select_ = 2; for (unsigned idx = 0 ; idx < wid ; idx += 1) { a_.set_bit(idx, BIT4_X); @@ -408,7 +410,7 @@ void vvp_fun_muxz::recv_vec4(vvp_net_ptr_t ptr, const vvp_vector4_t&bit) vvp_fun_not::vvp_fun_not() { net_ = 0; - count_functors_table += 1; + count_functors_logic += 1; } vvp_fun_not::~vvp_fun_not() @@ -452,6 +454,7 @@ void vvp_fun_not::run_run() vvp_fun_or::vvp_fun_or(unsigned wid, bool invert) : vvp_fun_boolean_(wid), invert_(invert) { + count_functors_logic += 1; } vvp_fun_or::~vvp_fun_or() @@ -487,6 +490,7 @@ void vvp_fun_or::run_run() vvp_fun_xor::vvp_fun_xor(unsigned wid, bool invert) : vvp_fun_boolean_(wid), invert_(invert) { + count_functors_logic += 1; } vvp_fun_xor::~vvp_fun_xor() diff --git a/vvp/main.cc b/vvp/main.cc index 5bf72a593..cecdd6990 100644 --- a/vvp/main.cc +++ b/vvp/main.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001 Stephen Williams (steve@icarus.com) + * Copyright (c) 2001-2007 Stephen Williams (steve@icarus.com) * * This source code is free software; you can redistribute it * and/or modify it in source code form under the terms of the GNU @@ -16,9 +16,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ -#ifdef HAVE_CVS_IDENT -#ident "$Id: main.cc,v 1.44 2007/02/16 23:30:14 steve Exp $" -#endif # include "config.h" # include "parse_misc.h" @@ -267,10 +264,10 @@ int main(int argc, char*argv[]) if (verbose_flag) { vpi_mcd_printf(1, " ... %8lu functors\n", count_functors); - vpi_mcd_printf(1, " %8lu table\n", count_functors_table); + vpi_mcd_printf(1, " %8lu logic\n", count_functors_logic); vpi_mcd_printf(1, " %8lu bufif\n", count_functors_bufif); vpi_mcd_printf(1, " %8lu resolv\n",count_functors_resolv); - vpi_mcd_printf(1, " %8lu variable\n", count_functors_var); + vpi_mcd_printf(1, " %8lu signals\n", count_functors_sig); vpi_mcd_printf(1, " ... %8lu opcodes (%lu bytes)\n", count_opcodes, (unsigned long)size_opcodes); vpi_mcd_printf(1, " ... %8lu nets\n", count_vpi_nets); @@ -306,89 +303,3 @@ int main(int argc, char*argv[]) return 0; } -/* - * $Log: main.cc,v $ - * Revision 1.44 2007/02/16 23:30:14 steve - * Get page size from sysconf. - * - * Revision 1.43 2006/04/28 15:44:37 steve - * Include math.h with lround implementation. - * - * Revision 1.42 2006/04/28 15:40:30 steve - * lround takes double, not float. - * - * Revision 1.41 2006/04/27 05:04:59 steve - * Detect missing lround function. - * - * Revision 1.40 2005/01/29 06:28:19 steve - * Add the -s flag to start up interactive. - * - * Revision 1.39 2004/10/04 01:10:59 steve - * Clean up spurious trailing white space. - * - * Revision 1.38 2003/06/25 04:04:19 steve - * Fix mingw portability problems. - * - * Revision 1.37 2003/06/13 19:51:08 steve - * Include verbose messages in log output. - * - * Revision 1.36 2003/05/15 16:51:09 steve - * Arrange for mcd id=00_00_00_01 to go to stdout - * as well as a user specified log file, set log - * file to buffer lines. - * - * Add vpi_flush function, and clear up some cunfused - * return codes from other vpi functions. - * - * Adjust $display and vcd/lxt messages to use the - * standard output/log file. - * - * Revision 1.35 2003/03/13 04:36:57 steve - * Remove the obsolete functor delete functions. - * - * Revision 1.34 2003/02/07 02:45:05 steve - * Mke getopt ignore options after the file name. - * - * Revision 1.33 2003/01/18 23:55:35 steve - * Add a means to clear the module search path. - * - * Revision 1.32 2003/01/06 23:57:26 steve - * Schedule wait lists of threads as a single event, - * to save on events. Also, improve efficiency of - * event_s allocation. Add some event statistics to - * get an idea where performance is really going. - * - * Revision 1.31 2002/09/18 03:34:07 steve - * printf size warning. - * - * Revision 1.30 2002/08/12 01:35:08 steve - * conditional ident string using autoconfig. - * - * Revision 1.29 2002/07/15 00:21:42 steve - * Fix initialization of symbol table string heap. - * - * Revision 1.28 2002/07/05 20:08:44 steve - * Count different types of functors. - * - * Revision 1.27 2002/07/05 17:14:15 steve - * Names of vpi objects allocated as vpip_strings. - * - * Revision 1.26 2002/07/05 03:47:06 steve - * Track opcode memory space. - * - * Revision 1.25 2002/07/05 02:50:58 steve - * Remove the vpi object symbol table after compile. - * - * Revision 1.24 2002/04/12 02:44:02 steve - * Formally define extended arguments to vvp. - * - * Revision 1.23 2002/03/01 05:43:14 steve - * Add cleanup to verbose messages. - * - * Revision 1.22 2002/01/09 03:15:23 steve - * Add vpi_get_vlog_info support. - * - * Revision 1.21 2001/10/20 01:03:42 steve - * Print memory usage information if requested (Stephan Boettcher) - */ - diff --git a/vvp/resolv.cc b/vvp/resolv.cc index 5bf36ff28..e375b5c41 100644 --- a/vvp/resolv.cc +++ b/vvp/resolv.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001-2004 Stephen Williams (steve@icarus.com) + * Copyright (c) 2001-2007 Stephen Williams (steve@icarus.com) * * This source code is free software; you can redistribute it * and/or modify it in source code form under the terms of the GNU @@ -16,9 +16,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ -#ifdef HAVE_CVS_IDENT -#ident "$Id: resolv.cc,v 1.26 2005/06/22 18:30:12 steve Exp $" -#endif # include "resolv.h" # include "schedule.h" @@ -31,6 +28,7 @@ resolv_functor::resolv_functor(vvp_scalar_t hiz_value, const char*debug_l) : hiz_(hiz_value), debug_label_(debug_l) { + count_functors_resolv += 1; } resolv_functor::~resolv_functor() @@ -95,9 +93,3 @@ void resolv_functor::recv_vec8(vvp_net_ptr_t port, vvp_vector8_t bit) vvp_send_vec8(ptr->out, out); } - - -/* - * $Log: resolv.cc,v $ - */ - diff --git a/vvp/statistics.cc b/vvp/statistics.cc index 024599f3c..79bb19d24 100644 --- a/vvp/statistics.cc +++ b/vvp/statistics.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002 Stephen Williams (steve@icarus.com) + * Copyright (c) 2002-2007 Stephen Williams (steve@icarus.com) * * This source code is free software; you can redistribute it * and/or modify it in source code form under the terms of the GNU @@ -16,9 +16,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ -#ifdef HAVE_CVS_IDENT -#ident "$Id: statistics.cc,v 1.5 2002/08/12 01:35:08 steve Exp $" -#endif # include "statistics.h" @@ -28,10 +25,10 @@ unsigned long count_opcodes = 0; unsigned long count_functors = 0; -unsigned long count_functors_table = 0; +unsigned long count_functors_logic = 0; unsigned long count_functors_bufif = 0; unsigned long count_functors_resolv= 0; -unsigned long count_functors_var = 0; +unsigned long count_functors_sig = 0; unsigned long count_vpi_nets = 0; @@ -41,23 +38,3 @@ unsigned long count_vpi_scopes = 0; size_t size_opcodes = 0; - -/* - * $Log: statistics.cc,v $ - * Revision 1.5 2002/08/12 01:35:08 steve - * conditional ident string using autoconfig. - * - * Revision 1.4 2002/07/05 20:08:44 steve - * Count different types of functors. - * - * Revision 1.3 2002/07/05 17:14:15 steve - * Names of vpi objects allocated as vpip_strings. - * - * Revision 1.2 2002/07/05 03:46:43 steve - * Track opcode memory space. - * - * Revision 1.1 2002/07/05 02:50:58 steve - * Remove the vpi object symbol table after compile. - * - */ - diff --git a/vvp/statistics.h b/vvp/statistics.h index 848b06699..1bd6e3bee 100644 --- a/vvp/statistics.h +++ b/vvp/statistics.h @@ -1,7 +1,7 @@ #ifndef __statistics_H #define __statistics_H /* - * Copyright (c) 2002 Stephen Williams (steve@icarus.com) + * Copyright (c) 2002-2007 Stephen Williams (steve@icarus.com) * * This source code is free software; you can redistribute it * and/or modify it in source code form under the terms of the GNU @@ -18,40 +18,19 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ -#ifdef HAVE_CVS_IDENT -#ident "$Id: statistics.h,v 1.5 2002/08/12 01:35:08 steve Exp $" -#endif # include extern unsigned long count_opcodes; extern unsigned long count_functors; -extern unsigned long count_functors_table; +extern unsigned long count_functors_logic; extern unsigned long count_functors_bufif; extern unsigned long count_functors_resolv; -extern unsigned long count_functors_var; +extern unsigned long count_functors_sig; extern unsigned long count_vpi_nets; extern unsigned long count_vpi_scopes; extern unsigned long count_vpi_memories; extern size_t size_opcodes; -/* - * $Log: statistics.h,v $ - * Revision 1.5 2002/08/12 01:35:08 steve - * conditional ident string using autoconfig. - * - * Revision 1.4 2002/07/05 20:08:44 steve - * Count different types of functors. - * - * Revision 1.3 2002/07/05 17:14:15 steve - * Names of vpi objects allocated as vpip_strings. - * - * Revision 1.2 2002/07/05 03:46:43 steve - * Track opcode memory space. - * - * Revision 1.1 2002/07/05 02:50:58 steve - * Remove the vpi object symbol table after compile. - * - */ #endif diff --git a/vvp/vvp_net.cc b/vvp/vvp_net.cc index 595f02443..e1eade218 100644 --- a/vvp/vvp_net.cc +++ b/vvp/vvp_net.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 Stephen Williams (steve@icarus.com) + * Copyright (c) 2004-2007 Stephen Williams (steve@icarus.com) * * This source code is free software; you can redistribute it * and/or modify it in source code form under the terms of the GNU @@ -16,11 +16,11 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ -#ident "$Id: vvp_net.cc,v 1.64 2007/06/12 02:36:58 steve Exp $" # include "config.h" # include "vvp_net.h" # include "schedule.h" +# include "statistics.h" # include # include # include @@ -1406,6 +1406,7 @@ ostream& operator<<(ostream&out, const vvp_vector8_t&that) vvp_net_fun_t::vvp_net_fun_t() { + count_functors += 1; } vvp_net_fun_t::~vvp_net_fun_t() @@ -1477,6 +1478,7 @@ vvp_fun_signal_base::vvp_fun_signal_base() continuous_assign_active_ = false; force_link = 0; cassign_link = 0; + count_functors_sig += 1; } void vvp_fun_signal_base::deassign() From 68a9526fec3f3dc29dc40a5b79a9a5428efe159b Mon Sep 17 00:00:00 2001 From: Stephen Williams Date: Sun, 2 Dec 2007 19:00:12 -0800 Subject: [PATCH 2/5] Minor performance tweak of vector_to_array function. --- vvp/vthread.cc | 3 ++- vvp/vvp_net.cc | 12 +++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/vvp/vthread.cc b/vvp/vthread.cc index 878418e17..3fe8e53e6 100644 --- a/vvp/vthread.cc +++ b/vvp/vthread.cc @@ -170,16 +170,17 @@ void vthread_put_real(struct vthread_s*thr, unsigned addr, double val) static unsigned long* vector_to_array(struct vthread_s*thr, unsigned addr, unsigned wid) { - unsigned awid = (wid + CPU_WORD_BITS - 1) / (CPU_WORD_BITS); if (addr == 0) { + unsigned awid = (wid + CPU_WORD_BITS - 1) / (CPU_WORD_BITS); unsigned long*val = new unsigned long[awid]; for (unsigned idx = 0 ; idx < awid ; idx += 1) val[idx] = 0; return val; } if (addr == 1) { + unsigned awid = (wid + CPU_WORD_BITS - 1) / (CPU_WORD_BITS); unsigned long*val = new unsigned long[awid]; for (unsigned idx = 0 ; idx < awid ; idx += 1) val[idx] = -1UL; diff --git a/vvp/vvp_net.cc b/vvp/vvp_net.cc index e1eade218..aed907fe7 100644 --- a/vvp/vvp_net.cc +++ b/vvp/vvp_net.cc @@ -355,6 +355,9 @@ unsigned long* vvp_vector4_t::subarray(unsigned adr, unsigned wid) const unsigned long tmp = bits_ptr_[adr/BITS_PER_WORD]; tmp >>= 2UL * (adr%BITS_PER_WORD); + unsigned long mask1 = 1; + const unsigned long mask1_last = 1UL << (BIT2_PER_WORD-1); + unsigned long*val_ptr = val; for (unsigned idx = 0 ; idx < wid ; idx += 1) { /* Starting a new word? */ if (adr%BITS_PER_WORD == 0) @@ -363,10 +366,17 @@ unsigned long* vvp_vector4_t::subarray(unsigned adr, unsigned wid) const if (tmp&2) goto x_out; if (tmp&1) - val[idx/BIT2_PER_WORD] |= 1UL << (idx % BIT2_PER_WORD); + *val_ptr |= mask1; adr += 1; tmp >>= 2UL; + + if (mask1 == mask1_last) { + val_ptr += 1; + mask1 = 1; + } else { + mask1 <<= 1; + } } } From 8f519531f30fec420bffc1495318fa091ec68519 Mon Sep 17 00:00:00 2001 From: Stephen Williams Date: Tue, 4 Dec 2007 19:15:15 -0800 Subject: [PATCH 3/5] Optimize load-add with load/add instruction Where and expression is an immediate value added to a signal value, it is possible to optimize them to a single instruction that combines the load with an add at the same time. --- tgt-vvp/eval_expr.c | 64 ++++++++++++++++++++++++++++++++++++++++++--- vvp/codes.h | 1 + vvp/compile.cc | 1 + vvp/vthread.cc | 37 +++++++++++++++++++++++++- vvp/vvp_net.cc | 59 +++++++++++++++++++++++++++++++++++++++++ vvp/vvp_net.h | 5 ++++ 6 files changed, 163 insertions(+), 4 deletions(-) diff --git a/tgt-vvp/eval_expr.c b/tgt-vvp/eval_expr.c index c9a406e16..e01fbb97d 100644 --- a/tgt-vvp/eval_expr.c +++ b/tgt-vvp/eval_expr.c @@ -27,6 +27,8 @@ static void draw_eval_expr_dest(ivl_expr_t exp, struct vector_info dest, int ok_flags); +static void draw_signal_dest(ivl_expr_t exp, struct vector_info res, + int add_index); int number_is_unknown(ivl_expr_t ex) { @@ -998,6 +1000,28 @@ static struct vector_info draw_binary_expr_lrs(ivl_expr_t exp, unsigned wid) return lv; } +static struct vector_info draw_load_add_immediate(ivl_expr_t le, + ivl_expr_t re, + unsigned wid) +{ + struct vector_info lv; + unsigned long imm; + + imm = get_number_immediate(re); + + /* Load the immidiate value into word register 0 */ + fprintf(vvp_out, " %%ix/load 0, %lu;\n", imm); + + lv.base = allocate_vector(wid); + lv.wid = wid; + + /* Load the signal value with %loads that add the index + register to the value being loaded. */ + draw_signal_dest(le, lv, 0); + + return lv; +} + static struct vector_info draw_add_immediate(ivl_expr_t le, ivl_expr_t re, unsigned wid) @@ -1098,6 +1122,26 @@ static struct vector_info draw_binary_expr_arith(ivl_expr_t exp, unsigned wid) const char*sign_string = ivl_expr_signed(exp)? "/s" : ""; + if ((ivl_expr_opcode(exp) == '+') + && (ivl_expr_type(le) == IVL_EX_SIGNAL) + && (ivl_expr_type(re) == IVL_EX_ULONG)) + return draw_load_add_immediate(le, re, wid); + + if ((ivl_expr_opcode(exp) == '+') + && (ivl_expr_type(le) == IVL_EX_SIGNAL) + && (ivl_expr_type(re) == IVL_EX_NUMBER)) + return draw_load_add_immediate(le, re, wid); + + if ((ivl_expr_opcode(exp) == '+') + && (ivl_expr_type(re) == IVL_EX_SIGNAL) + && (ivl_expr_type(le) == IVL_EX_ULONG)) + return draw_load_add_immediate(re, le, wid); + + if ((ivl_expr_opcode(exp) == '+') + && (ivl_expr_type(re) == IVL_EX_SIGNAL) + && (ivl_expr_type(le) == IVL_EX_NUMBER)) + return draw_load_add_immediate(re, le, wid); + if ((ivl_expr_opcode(exp) == '+') && (ivl_expr_type(re) == IVL_EX_ULONG)) return draw_add_immediate(le, re, wid); @@ -1663,8 +1707,12 @@ static void pad_expr_in_place(ivl_expr_t exp, struct vector_info res, unsigned s * into the thread bits. Remember to account for the part select by * offsetting the read from the lsi (least significant index) of the * signal. + * + * If the add_index is >=0, then generate a %load/vpp to add the + * word0 value to the loaded value before storing it into the destination. */ -static void draw_signal_dest(ivl_expr_t exp, struct vector_info res) +static void draw_signal_dest(ivl_expr_t exp, struct vector_info res, + int add_index) { unsigned swid = ivl_expr_width(exp); ivl_signal_t sig = ivl_expr_signal(exp); @@ -1679,6 +1727,7 @@ static void draw_signal_dest(ivl_expr_t exp, struct vector_info res) if (ivl_signal_array_count(sig) > 1) { ivl_expr_t ix = ivl_expr_oper1(exp); if (!number_is_immediate(ix, 8*sizeof(unsigned long))) { + assert(add_index < 0); draw_eval_expr_into_integer(ix, 3); fprintf(vvp_out, " %%load/av %u, v%p, %u;\n", res.base, sig, swid); @@ -1694,11 +1743,20 @@ static void draw_signal_dest(ivl_expr_t exp, struct vector_info res) if (ivl_signal_data_type(sig) == IVL_VT_REAL) { + assert(add_index < 0); int tmp = allocate_word(); fprintf(vvp_out, " %%load/wr %d, v%p_%u;\n", tmp, sig, word); fprintf(vvp_out, " %%cvt/vr %u, %d, %u;\n", res.base, tmp, res.wid); clr_word(tmp); + } else if (add_index >= 0) { + + assert(add_index == 0); + + /* If this is a REG (a variable) then I can do a vector read. */ + fprintf(vvp_out, " %%load/vp0 %u, v%p_%u, %u;\n", + res.base, sig, word, swid); + } else { /* If this is a REG (a variable) then I can do a vector read. */ @@ -1730,7 +1788,7 @@ static struct vector_info draw_signal_expr(ivl_expr_t exp, unsigned wid, res.wid = wid; save_expression_lookaside(res.base, exp, wid); - draw_signal_dest(exp, res); + draw_signal_dest(exp, res, -1); return res; } @@ -2232,7 +2290,7 @@ static void draw_eval_expr_dest(ivl_expr_t exp, struct vector_info dest, switch (ivl_expr_type(exp)) { case IVL_EX_SIGNAL: - draw_signal_dest(exp, dest); + draw_signal_dest(exp, dest, -1); return; default: diff --git a/vvp/codes.h b/vvp/codes.h index c70093494..3e8bd4335 100644 --- a/vvp/codes.h +++ b/vvp/codes.h @@ -95,6 +95,7 @@ extern bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t code); extern bool of_LOAD_MV(vthread_t thr, vvp_code_t code); extern bool of_LOAD_NX(vthread_t thr, vvp_code_t code); extern bool of_LOAD_VEC(vthread_t thr, vvp_code_t code); +extern bool of_LOAD_VP0(vthread_t thr, vvp_code_t code); extern bool of_LOAD_WR(vthread_t thr, vvp_code_t code); extern bool of_LOAD_X(vthread_t thr, vvp_code_t code); extern bool of_LOAD_XP(vthread_t thr, vvp_code_t code); diff --git a/vvp/compile.cc b/vvp/compile.cc index 71ba609e0..6fae6ded4 100644 --- a/vvp/compile.cc +++ b/vvp/compile.cc @@ -141,6 +141,7 @@ const static struct opcode_table_s opcode_table[] = { { "%load/mv",of_LOAD_MV,3, {OA_BIT1, OA_MEM_PTR, OA_BIT2} }, { "%load/nx",of_LOAD_NX,3, {OA_BIT1, OA_VPI_PTR, OA_BIT2} }, { "%load/v", of_LOAD_VEC,3, {OA_BIT1, OA_FUNC_PTR, OA_BIT2} }, + { "%load/vp0",of_LOAD_VP0,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} }, { "%load/wr",of_LOAD_WR,2, {OA_BIT1, OA_VPI_PTR, OA_BIT2} }, { "%load/x", of_LOAD_X, 3, {OA_BIT1, OA_FUNC_PTR, OA_BIT2} }, { "%load/x.p",of_LOAD_XP, 3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} }, diff --git a/vvp/vthread.cc b/vvp/vthread.cc index 3fe8e53e6..6bfbbe328 100644 --- a/vvp/vthread.cc +++ b/vvp/vthread.cc @@ -2188,13 +2188,14 @@ bool of_LOAD_NX(vthread_t thr, vvp_code_t cp) * The functor to read from is the vvp_net_t object pointed to by the * cp->net pointer. */ -bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp) +vvp_vector4_t load_base(vthread_t thr, vvp_code_t cp) { assert(cp->bit_idx[0] >= 4); assert(cp->bit_idx[1] > 0); unsigned bit = cp->bit_idx[0]; unsigned wid = cp->bit_idx[1]; + int64_t addend = thr->words[0].w_int; vvp_net_t*net = cp->net; /* For the %load to work, the functor must actually be a @@ -2209,6 +2210,40 @@ bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp) vvp_vector4_t sig_value = sig->vec4_value(); sig_value.resize(wid); + return sig_value; +} + +bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp) +{ + unsigned bit = cp->bit_idx[0]; + unsigned wid = cp->bit_idx[1]; + + vvp_vector4_t sig_value = load_base(thr, cp); + + /* Check the address once, before we scan the vector. */ + thr_check_addr(thr, bit+wid-1); + + /* Copy the vector bits into the bits4 vector. Do the copy + directly to skip the excess calls to thr_check_addr. */ + thr->bits4.set_vec(bit, sig_value); + + return true; +} + +/* +* This is like of_LOAD_VEC, but includes an add of an integer value. +*/ +bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp) +{ + unsigned bit = cp->bit_idx[0]; + unsigned wid = cp->bit_idx[1]; + int64_t addend = thr->words[0].w_int; + + vvp_vector4_t sig_value = load_base(thr, cp); + + /* Add the addend value */ + sig_value += addend; + /* Check the address once, before we scan the vector. */ thr_check_addr(thr, bit+wid-1); diff --git a/vvp/vvp_net.cc b/vvp/vvp_net.cc index aed907fe7..6be770b1b 100644 --- a/vvp/vvp_net.cc +++ b/vvp/vvp_net.cc @@ -557,6 +557,31 @@ bool vvp_vector4_t::eeq(const vvp_vector4_t&that) const return true; } +bool vvp_vector4_t::has_xz() const +{ + if (size_ < BITS_PER_WORD) { + unsigned long mask = WORD_X_BITS >> 2*(BITS_PER_WORD - size_); + return 0 != (bits_val_&mask); + } + + if (size_ == BITS_PER_WORD) { + return 0 != (bits_val_&WORD_X_BITS); + } + + unsigned words = size_ / BITS_PER_WORD; + for (unsigned idx = 0 ; idx < words ; idx += 1) { + if (bits_ptr_[idx] & WORD_X_BITS) + return true; + } + + unsigned long mask = size_%BITS_PER_WORD; + if (mask > 0) { + mask = WORD_X_BITS >> 2*(BITS_PER_WORD - mask); + return 0 != bits_ptr_[words]&mask; + } + + return false; +} void vvp_vector4_t::change_z2x() { @@ -602,6 +627,40 @@ char* vvp_vector4_t::as_string(char*buf, size_t buf_len) return res; } +/* +* Add an integer to the vvp_vector4_t in place, bit by bit so that +* there is no size limitations. +*/ +vvp_vector4_t& vvp_vector4_t::operator += (int64_t that) +{ + vvp_bit4_t carry = BIT4_0; + unsigned idx; + + if (has_xz()) { + vvp_vector4_t xxx (size(), BIT4_X); + *this = xxx; + return *this; + } + + for (idx = 0 ; idx < size() ; idx += 1) { + if (that == 0 && carry==BIT4_0) + break; + + vvp_bit4_t that_bit = (that&1)? BIT4_1 : BIT4_0; + that >>= 1; + + if (that_bit==BIT4_0 && carry==BIT4_0) + continue; + + vvp_bit4_t bit = value(idx); + bit = add_with_carry(bit, that_bit, carry); + + set_bit(idx, bit); + } + + return *this; +} + ostream& operator<< (ostream&out, const vvp_vector4_t&that) { out << that.size() << "'b"; diff --git a/vvp/vvp_net.h b/vvp/vvp_net.h index 50c272992..6bc809c5b 100644 --- a/vvp/vvp_net.h +++ b/vvp/vvp_net.h @@ -121,12 +121,17 @@ class vvp_vector4_t { // Test that the vectors are exactly equal bool eeq(const vvp_vector4_t&that) const; + // Return true if there is an X or Z anywhere in the vector. + bool has_xz() const; + // Change all Z bits to X bits. void change_z2x(); // Display the value into the buf as a string. char*as_string(char*buf, size_t buf_len); + vvp_vector4_t& operator += (int64_t); + private: // Number of vvp_bit4_t bits that can be shoved into a word. enum { BITS_PER_WORD = 8*sizeof(unsigned long)/2 }; From 3b90a827e5a2a0d99b848b79e4770e0fe44af59c Mon Sep 17 00:00:00 2001 From: Stephen Williams Date: Tue, 4 Dec 2007 22:14:26 -0800 Subject: [PATCH 4/5] Fix simple compile warnings. --- vvp/vthread.cc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/vvp/vthread.cc b/vvp/vthread.cc index 6bfbbe328..abfd580b7 100644 --- a/vvp/vthread.cc +++ b/vvp/vthread.cc @@ -170,8 +170,6 @@ void vthread_put_real(struct vthread_s*thr, unsigned addr, double val) static unsigned long* vector_to_array(struct vthread_s*thr, unsigned addr, unsigned wid) { - - if (addr == 0) { unsigned awid = (wid + CPU_WORD_BITS - 1) / (CPU_WORD_BITS); unsigned long*val = new unsigned long[awid]; @@ -431,7 +429,6 @@ bool of_ADD(vthread_t thr, vvp_code_t cp) if (lva == 0 || lvb == 0) goto x_out; - unsigned long carry; carry = 0; for (unsigned idx = 0 ; (idx*CPU_WORD_BITS) < cp->number ; idx += 1) { @@ -2193,9 +2190,7 @@ vvp_vector4_t load_base(vthread_t thr, vvp_code_t cp) assert(cp->bit_idx[0] >= 4); assert(cp->bit_idx[1] > 0); - unsigned bit = cp->bit_idx[0]; unsigned wid = cp->bit_idx[1]; - int64_t addend = thr->words[0].w_int; vvp_net_t*net = cp->net; /* For the %load to work, the functor must actually be a From 19e8c057883c4e6f488983e9ad675e44ed1ea25e Mon Sep 17 00:00:00 2001 From: Stephen Williams Date: Tue, 4 Dec 2007 22:16:31 -0800 Subject: [PATCH 5/5] Optimize X check in vector subarray Optimize check for X bits while doing vector4 subarray. In particular, do X checks a word at a time so that individual bits need not be tested. --- vvp/vvp_net.cc | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/vvp/vvp_net.cc b/vvp/vvp_net.cc index 6be770b1b..cc66e6425 100644 --- a/vvp/vvp_net.cc +++ b/vvp/vvp_net.cc @@ -353,22 +353,42 @@ unsigned long* vvp_vector4_t::subarray(unsigned adr, unsigned wid) const /* Get the first word we are scanning. We may in fact be somewhere in the middle of that word. */ unsigned long tmp = bits_ptr_[adr/BITS_PER_WORD]; - tmp >>= 2UL * (adr%BITS_PER_WORD); + unsigned long off = adr%BITS_PER_WORD; + tmp >>= 2UL * off; + // Test for X bits but not beyond the desired wid. + unsigned long xmask = WORD_X_BITS; + if (wid < (BITS_PER_WORD-off)) + xmask &= ~(-1UL << 2*wid); + if (tmp & xmask) + goto x_out; + + // Where in the target array to write the next bit. unsigned long mask1 = 1; const unsigned long mask1_last = 1UL << (BIT2_PER_WORD-1); unsigned long*val_ptr = val; + // Track where the source bit is in the source word. + unsigned adr_bit = adr%BITS_PER_WORD; + // Scan... for (unsigned idx = 0 ; idx < wid ; idx += 1) { /* Starting a new word? */ - if (adr%BITS_PER_WORD == 0) + if (adr_bit == BITS_PER_WORD) { tmp = bits_ptr_[adr/BITS_PER_WORD]; + // If this is the last word, then only test + // for X in the valid bits. + xmask = WORD_X_BITS; + if ((wid-idx) < BITS_PER_WORD) + xmask &= ~(WORD_Z_BITS<<2*(wid-idx)); + if (tmp & xmask) + goto x_out; + adr_bit = 0; + } - if (tmp&2) - goto x_out; if (tmp&1) *val_ptr |= mask1; adr += 1; + adr_bit += 1; tmp >>= 2UL; if (mask1 == mask1_last) {