From 2b1393e7b9b80206dbf4f7ee47937062d6d711ae Mon Sep 17 00:00:00 2001 From: Stephen Williams Date: Wed, 3 Dec 2014 13:12:06 -0800 Subject: [PATCH] Reduce some vector copies in %load/vec4 and %concat/vec4 instructions. By clever stack manipulations, we can eliminate some vector copies, which can improve performance. --- vvp/vthread.cc | 60 +++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 33 deletions(-) diff --git a/vvp/vthread.cc b/vvp/vthread.cc index 11061e2e6..9480a2cfb 100644 --- a/vvp/vthread.cc +++ b/vvp/vthread.cc @@ -1983,14 +1983,22 @@ bool of_CONCATI_STR(vthread_t thr, vvp_code_t cp) */ bool of_CONCAT_VEC4(vthread_t thr, vvp_code_t) { - vvp_vector4_t lsb = thr->pop_vec4(); - vvp_vector4_t&msb = thr->peek_vec4(); + const vvp_vector4_t&lsb = thr->peek_vec4(0); + const vvp_vector4_t&msb = thr->peek_vec4(1); - vvp_vector4_t res (msb.size()+lsb.size(), BIT4_X); + // The result is the size of the top two vectors in the stack. + vvp_vector4_t res (msb.size() + lsb.size(), BIT4_X); + + // Build up the result. res.set_vec(0, lsb); res.set_vec(lsb.size(), msb); - msb = res; + // Rearrange the stack to pop the inputs and push the + // result. Do that by actually popping only 1 stack position + // and replacing the new top with the new value. + thr->pop_vec4(1); + thr->peek_vec4() = res; + return true; } @@ -3546,27 +3554,22 @@ bool of_LOAD_STRA(vthread_t thr, vvp_code_t cp) return true; } -/* %load/v ,