diff --git a/configure.ac b/configure.ac index c06a240f2..84339138f 100644 --- a/configure.ac +++ b/configure.ac @@ -1267,45 +1267,8 @@ if test "x$enable_modsimd" = xyes; then fi fi -# do we want to use SLEEF ? -if test "x$enable_modsimd" = xyes; then - if test "x$with_sleef" = xyes; then - if test "x$nsimd" = x4; then - AC_CHECK_HEADERS([sleef.h]) - AC_CHECK_LIB([sleef], [Sleef_logd4_u35], - [AC_DEFINE([HAVE_LIBSLEEF], [], [Have SLEEF vector math library]) - LIBS="$LIBS -lsleef" - have_sleef=yes - AC_MSG_RESULT([Use SLEEF]) - ], [have_sleef=no] - ) - fi - if test "x$nsimd" = x2; then - AC_CHECK_HEADERS([sleef.h]) - AC_CHECK_LIB([sleef], [Sleef_logd2_u35], - [AC_DEFINE([HAVE_LIBSLEEF], [], [Have SLEEF vector math library]) - LIBS="$LIBS -lsleef" - have_sleef=yes - AC_MSG_RESULT([Use SLEEF]) - ], [have_sleef=no] - ) - fi - if test "x$nsimd" = x8; then - AC_CHECK_HEADERS([sleef.h]) - AC_CHECK_LIB([sleef], [Sleef_logd8_u35], - [AC_DEFINE([HAVE_LIBSLEEF], [], [Have SLEEF vector math library]) - LIBS="$LIBS -lsleef" - have_sleef=yes - AC_MSG_RESULT([Use SLEEF]) - ], [have_sleef=no] - ) - fi - fi -fi - -# otherwise test if we can use libmvec or equivalent +# test if we can use libmvec or equivalent if test "x$have_intrinsics" = xyes; then -if test "x$have_sleef" != xyes; then AC_MSG_CHECKING([vector math library libmvec]) have_libmvec=no if test "x$nsimd" = x4; then @@ -1350,8 +1313,64 @@ if test "x$have_sleef" != xyes; then have_libmvec=no ]) fi - fi + +# do we want to use SLEEF ? +if test "x$have_libmvec" = xno; then +if test "x$with_sleef" != xno && test "x$with_sleef" != xyes; then + with_sleef=try; +fi +fi + +if test "x$enable_modsimd" = xyes; then + if test "x$with_sleef" = xyes || test "x$with_sleef" = xtry; then + if test "x$nsimd" = x4; then + AC_CHECK_LIB([sleef], [Sleef_logd4_u35], + [AC_DEFINE([HAVE_LIBSLEEF], [], [Have SLEEF vector math library]) + LIBS="$LIBS -lsleef" + have_sleef=yes + AC_MSG_RESULT([Use SLEEF]) + ], [have_sleef=no] + ) + fi + if test "x$nsimd" = x2; then + AC_CHECK_HEADERS([sleef.h]) + AC_CHECK_LIB([sleef], [Sleef_logd2_u35], + [AC_DEFINE([HAVE_LIBSLEEF], [], [Have SLEEF vector math library]) + LIBS="$LIBS -lsleef" + have_sleef=yes + AC_MSG_RESULT([Use SLEEF]) + ], [have_sleef=no] + ) + fi + if test "x$nsimd" = x8; then + AC_CHECK_HEADERS([sleef.h]) + AC_CHECK_LIB([sleef], [Sleef_logd8_u35]) + AC_MSG_CHECKING([vector math library sleef with vector width 8]) + AC_LINK_IFELSE([ + AC_LANG_PROGRAM( + [[#include ]], + [[__m512d x; x=Sleef_logd8_u35(x);]] + )] ,[ + AC_MSG_RESULT([yes]) + have_sleef=yes + ],[ + AC_MSG_RESULT([no]) + have_sleef=no + ] ) + fi + fi +fi + +if test "x$have_sleef" = xno; then +if test "x$with_sleef" = xyes; then + AC_MSG_ERROR([SLEEF requested but can't be used, try without-sleef]) +fi +fi + +#disable libmvec if specified to use sleef +if test "x$have_sleef" = xyes; then + have_libmvec=no fi # adjust compiler flags for MODSIMD @@ -1403,8 +1422,13 @@ if test "x$enable_modsimd" = xyes; then AC_MSG_RESULT([Use libmvec]) fi - if test "x$have_libmvec" = xno; then - if test "x$have_sleef" = xno; then + if test "x$have_sleef" = xyes; then + AC_DEFINE([USE_LIBSLEEF],[1], [use SLEEF for vector math]) + AC_MSG_RESULT([Use SLEEF]) + fi + + if test "x$have_libmvec" != xyes; then + if test "x$have_sleef" != xyes; then AC_MSG_WARN([No math vector library detected (sleef or libmvec). Rely on compiler's auto-vectorization of math functions.]) fi fi diff --git a/src/include/ngspice/SIMD/simdop2.h b/src/include/ngspice/SIMD/simdop2.h new file mode 100644 index 000000000..af450db73 --- /dev/null +++ b/src/include/ngspice/SIMD/simdop2.h @@ -0,0 +1,118 @@ +/******************************************************************************* + * Copyright 2020 Florian Ballenegger, Anamosic Ballenegger Design + ******************************************************************************* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ + +#if USEX86INTRINSICS==1 +#include +#define vec2_MAX(a,b) _mm_max_pd(a,b) +#define vecN_MAX vec2_MAX +#define vec2_sqrt(a) _mm_sqrt_pd(a) +#define vecN_sqrt vec2_sqrt +static inline Vec2d vec2_blend(Vec2d fa, Vec2d tr, Vec2m mask) +{ + return _mm_blendv_pd(fa,tr, (Vec2d) mask); +} +#define vecN_blend vec2_blend +#else +#define vec2_blend vecN_blend +#endif + +#ifdef USE_LIBSLEEF +#include +#define vec2_exp(a) Sleef_expd2_u10(a) +#define vecN_exp vec2_exp +#define vec2_log(a) Sleef_logd2_u35(a) +#define vecN_log vec2_log +#ifndef USEX86INTRINSICS +#define vec2_MAX(a,b) Sleef_fmaxd2(a,b) +#define vecN_MAX vec2_MAX +#define vec2_sqrt(a) Sleef_sqrtd2_u35(a) +#define vecN_sqrt vec2_sqrt +#endif +#define vec2_fabs(a) Sleef_fabsd2(a) +#define vecN_fabs vec2_fabs +#define vec2_pow(a,b) Sleef_powd2_u10(a,vec2_SIMDTOVECTOR(b)) +#define vecN_pow vec2_pow + +#else + +#ifdef HAS_LIBMVEC +Vec2d _ZGVbN2v_exp(Vec2d); +Vec2d _ZGVbN2v_log(Vec2d); +Vec2d _ZGVbN2vv_pow(Vec2d, Vec2d); + +#define vec2_exp(a) _ZGVbN2v_exp(a) +#define vecN_exp vec2_exp +#define vec2_log(a) _ZGVbN2v_log(a) +#define vecN_log vec2_log +#define vec2_pow(a,b) _ZGVbN2vv_pow(a,b) +#define vecN_pow vec2_pow +#define vec2_fabs vecN_fabs + +#endif /* HAS_LIBMVEC */ +#endif /* not USE_LIBSLEEF */ + +#ifdef USE_SERIAL_FORM + +#define vec2_SIMDTOVECTOR vecN_SIMDTOVECTOR +#define vec2_SIMDTOVECTORMASK vecN_SIMDTOVECTORMASK +#define vec2_StateAccess vecN_StateAccess +#define vec2_SIMDCOUNT vecN_SIMDCOUNT + +#else + +static inline Vec2d vec2_SIMDTOVECTOR(double val) +{ + return (Vec2d) {val,val}; +} +static inline Vec2m vec2_SIMDTOVECTORMASK(int val) +{ + return (Vec2m) {val,val}; +} +static inline Vec2d vec2_StateAccess(double* cktstate, Vec2m stateindexes) +{ + return (Vec2d) { + cktstate[stateindexes[0]], + cktstate[stateindexes[1]] + }; +} +static inline int vec2_SIMDCOUNT(Vec2m mask) { + return (mask[0] ? 1 : 0) + (mask[1] ? 1 : 0) ; +} +#define vecN_SIMDTOVECTOR vec2_SIMDTOVECTOR +#define vecN_SIMDTOVECTORMASK vec2_SIMDTOVECTORMASK +#define vecN_StateAccess vec2_StateAccess +#define vecN_SIMDCOUNT vec2_SIMDCOUNT + +#endif + +#define vec2_StateStore vecN_StateStore +#define vec2_StateAdd vecN_StateAdd +#define vec2_StateSub vecN_StateSub + diff --git a/src/include/ngspice/SIMD/simdop4.h b/src/include/ngspice/SIMD/simdop4.h index e91c91d5f..71234efd4 100644 --- a/src/include/ngspice/SIMD/simdop4.h +++ b/src/include/ngspice/SIMD/simdop4.h @@ -43,7 +43,7 @@ static inline Vec4d vec4_blend(Vec4d fa, Vec4d tr, Vec4m mask) #define vec4_blend vecN_blend #endif -#ifdef HAVE_LIBSLEEF +#ifdef USE_LIBSLEEF #include #define vec4_exp(a) Sleef_expd4_u10(a) #define vecN_exp vec4_exp @@ -76,7 +76,7 @@ Vec4d _ZGVdN4vv_pow(Vec4d, Vec4d); #define vec4_fabs vecN_fabs #endif /* HAS_LIBMVEC */ -#endif /* not HAVE_LIBSLEEF */ +#endif /* not USE_LIBSLEEF */ #ifdef USE_SERIAL_FORM diff --git a/src/include/ngspice/SIMD/simdop8.h b/src/include/ngspice/SIMD/simdop8.h new file mode 100644 index 000000000..da54c57b3 --- /dev/null +++ b/src/include/ngspice/SIMD/simdop8.h @@ -0,0 +1,109 @@ +/******************************************************************************* + * Copyright 2020 Florian Ballenegger, Anamosic Ballenegger Design + ******************************************************************************* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ + +#ifdef USE_LIBSLEEF +#include +#define vec8_exp(a) Sleef_expd8_u10(a) +#define vecN_exp vec8_exp +#define vec8_log(a) Sleef_logd8_u35(a) +#define vecN_log vec8_log +#ifndef USEX86INTRINSICS +#define vec8_MAX(a,b) Sleef_fmaxd8(a,b) +#define vecN_MAX vec8_MAX +#define vec8_sqrt(a) Sleef_sqrtd8_u35(a) +#define vecN_sqrt vec8_sqrt +#endif +#define vec8_fabs(a) Sleef_fabsd8(a) +#define vecN_fabs vec8_fabs +#define vec8_pow(a,b) Sleef_powd8_u10(a,vec8_SIMDTOVECTOR(b)) +#define vecN_pow vec8_pow + +#else + +#define vec8_exp vecN_exp +#define vec8_log vecN_log +#define vec8_MAX vecN_MAX +#define vec8_sqrt vecN_sqrt +#define vec8_fabs vecN_fabs +#define vec8_pow vecN_pow + +#endif /* USE_LIBSLEEF */ + +#ifdef USE_SERIAL_FORM + +#define vec8_SIMDTOVECTOR vecN_SIMDTOVECTOR +#define vec8_SIMDTOVECTORMASK vecN_SIMDTOVECTORMASK +#define vec8_StateAccess vecN_StateAccess +#define vec8_SIMDCOUNT vecN_SIMDCOUNT + +#else + +static inline Vec8d vec8_SIMDTOVECTOR(double val) +{ + return (Vec8d) {val,val,val,val,val,val,val,val}; +} +static inline Vec8m vec8_SIMDTOVECTORMASK(int val) +{ + return (Vec8m) {val,val,val,val,val,val,val,val}; +} +static inline Vec8d vec8_StateAccess(double* cktstate, Vec8m stateindexes) +{ + return (Vec8d) { + cktstate[stateindexes[0]], + cktstate[stateindexes[1]], + cktstate[stateindexes[2]], + cktstate[stateindexes[3]], + cktstate[stateindexes[4]], + cktstate[stateindexes[5]], + cktstate[stateindexes[6]], + cktstate[stateindexes[7]], + }; +} +static inline int vec8_SIMDCOUNT(Vec8m mask) { + return ( mask[0] ? 1 : 0) + + (mask[1] ? 1 : 0) + + (mask[2] ? 1 : 0) + + (mask[3] ? 1 : 0) + + (mask[4] ? 1 : 0) + + (mask[5] ? 1 : 0) + + (mask[6] ? 1 : 0) + + (mask[7] ? 1 : 0); +} +#define vecN_SIMDTOVECTOR vec8_SIMDTOVECTOR +#define vecN_SIMDTOVECTORMASK vec8_SIMDTOVECTORMASK +#define vecN_StateAccess vec8_StateAccess +#define vecN_SIMDCOUNT vec8_SIMDCOUNT + +#endif + +#define vec8_StateStore vecN_StateStore +#define vec8_StateAdd vecN_StateAdd +#define vec8_StateSub vecN_StateSub + diff --git a/src/spicelib/devices/bsim3v32simd/b3v32ldsimd4d.c b/src/spicelib/devices/bsim3v32simd/b3v32ldsimd4d.c index 886af5a73..e9e747e4d 100644 --- a/src/spicelib/devices/bsim3v32simd/b3v32ldsimd4d.c +++ b/src/spicelib/devices/bsim3v32simd/b3v32ldsimd4d.c @@ -39,7 +39,7 @@ #define vec4_powMJSWG(x,p) vec4_pow(x,p) -#ifdef HAVE_LIBSLEEF +#ifdef USE_LIBSLEEF #include #define vec4_exp(a) Sleef_expd4_u10(a) #define vec4_log(a) Sleef_logd4_u35(a) @@ -50,7 +50,7 @@ #endif -/* HAS_LIBMVEC and/or HAVE_LIBSLEEF defined from configure.ac */ +/* HAS_LIBMVEC and/or USE_LIBSLEEF defined from configure.ac */ /* USE_SERIAL_FORM can be defined but has no performance influence */ @@ -73,7 +73,7 @@ static inline Vec4d vec4_blend(Vec4d fa, Vec4d tr, Vec4m mask) } #endif -#ifndef HAVE_LIBSLEEF +#ifndef USE_LIBSLEEF /******* vec4_exp, vec4_log *******/ #ifdef HAS_LIBMVEC Vec4d _ZGVdN4v_exp(Vec4d x); @@ -161,7 +161,7 @@ static inline Vec4d vec4_pow(Vec4d x, double p) return vec4_exp(vec4_log(x)*p); } -#endif /* HAVE_LIBSLEEF */ +#endif /* USE_LIBSLEEF */ /******* vec4_SIMDTOVECTOR, vec4_SIMDTOVECTORMASK *******/ #ifdef USE_SERIAL_FORM