working on NSIMD=2/4/8 options

This commit is contained in:
Florian Ballenegger 2020-08-08 00:22:32 +02:00
parent 09d192eed2
commit 606cd59108
5 changed files with 298 additions and 47 deletions

View File

@ -1267,45 +1267,8 @@ if test "x$enable_modsimd" = xyes; then
fi fi
fi fi
# do we want to use SLEEF ? # test if we can use libmvec or equivalent
if test "x$enable_modsimd" = xyes; then
if test "x$with_sleef" = xyes; then
if test "x$nsimd" = x4; then
AC_CHECK_HEADERS([sleef.h])
AC_CHECK_LIB([sleef], [Sleef_logd4_u35],
[AC_DEFINE([HAVE_LIBSLEEF], [], [Have SLEEF vector math library])
LIBS="$LIBS -lsleef"
have_sleef=yes
AC_MSG_RESULT([Use SLEEF])
], [have_sleef=no]
)
fi
if test "x$nsimd" = x2; then
AC_CHECK_HEADERS([sleef.h])
AC_CHECK_LIB([sleef], [Sleef_logd2_u35],
[AC_DEFINE([HAVE_LIBSLEEF], [], [Have SLEEF vector math library])
LIBS="$LIBS -lsleef"
have_sleef=yes
AC_MSG_RESULT([Use SLEEF])
], [have_sleef=no]
)
fi
if test "x$nsimd" = x8; then
AC_CHECK_HEADERS([sleef.h])
AC_CHECK_LIB([sleef], [Sleef_logd8_u35],
[AC_DEFINE([HAVE_LIBSLEEF], [], [Have SLEEF vector math library])
LIBS="$LIBS -lsleef"
have_sleef=yes
AC_MSG_RESULT([Use SLEEF])
], [have_sleef=no]
)
fi
fi
fi
# otherwise test if we can use libmvec or equivalent
if test "x$have_intrinsics" = xyes; then if test "x$have_intrinsics" = xyes; then
if test "x$have_sleef" != xyes; then
AC_MSG_CHECKING([vector math library libmvec]) AC_MSG_CHECKING([vector math library libmvec])
have_libmvec=no have_libmvec=no
if test "x$nsimd" = x4; then if test "x$nsimd" = x4; then
@ -1350,8 +1313,64 @@ if test "x$have_sleef" != xyes; then
have_libmvec=no have_libmvec=no
]) ])
fi fi
fi fi
# do we want to use SLEEF ?
if test "x$have_libmvec" = xno; then
if test "x$with_sleef" != xno && test "x$with_sleef" != xyes; then
with_sleef=try;
fi
fi
if test "x$enable_modsimd" = xyes; then
if test "x$with_sleef" = xyes || test "x$with_sleef" = xtry; then
if test "x$nsimd" = x4; then
AC_CHECK_LIB([sleef], [Sleef_logd4_u35],
[AC_DEFINE([HAVE_LIBSLEEF], [], [Have SLEEF vector math library])
LIBS="$LIBS -lsleef"
have_sleef=yes
AC_MSG_RESULT([Use SLEEF])
], [have_sleef=no]
)
fi
if test "x$nsimd" = x2; then
AC_CHECK_HEADERS([sleef.h])
AC_CHECK_LIB([sleef], [Sleef_logd2_u35],
[AC_DEFINE([HAVE_LIBSLEEF], [], [Have SLEEF vector math library])
LIBS="$LIBS -lsleef"
have_sleef=yes
AC_MSG_RESULT([Use SLEEF])
], [have_sleef=no]
)
fi
if test "x$nsimd" = x8; then
AC_CHECK_HEADERS([sleef.h])
AC_CHECK_LIB([sleef], [Sleef_logd8_u35])
AC_MSG_CHECKING([vector math library sleef with vector width 8])
AC_LINK_IFELSE([
AC_LANG_PROGRAM(
[[#include <sleef.h>]],
[[__m512d x; x=Sleef_logd8_u35(x);]]
)] ,[
AC_MSG_RESULT([yes])
have_sleef=yes
],[
AC_MSG_RESULT([no])
have_sleef=no
] )
fi
fi
fi
if test "x$have_sleef" = xno; then
if test "x$with_sleef" = xyes; then
AC_MSG_ERROR([SLEEF requested but can't be used, try without-sleef])
fi
fi
#disable libmvec if specified to use sleef
if test "x$have_sleef" = xyes; then
have_libmvec=no
fi fi
# adjust compiler flags for MODSIMD # adjust compiler flags for MODSIMD
@ -1403,8 +1422,13 @@ if test "x$enable_modsimd" = xyes; then
AC_MSG_RESULT([Use libmvec]) AC_MSG_RESULT([Use libmvec])
fi fi
if test "x$have_libmvec" = xno; then if test "x$have_sleef" = xyes; then
if test "x$have_sleef" = xno; then AC_DEFINE([USE_LIBSLEEF],[1], [use SLEEF for vector math])
AC_MSG_RESULT([Use SLEEF])
fi
if test "x$have_libmvec" != xyes; then
if test "x$have_sleef" != xyes; then
AC_MSG_WARN([No math vector library detected (sleef or libmvec). Rely on compiler's auto-vectorization of math functions.]) AC_MSG_WARN([No math vector library detected (sleef or libmvec). Rely on compiler's auto-vectorization of math functions.])
fi fi
fi fi

View File

@ -0,0 +1,118 @@
/*******************************************************************************
* Copyright 2020 Florian Ballenegger, Anamosic Ballenegger Design
*******************************************************************************
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
#if USEX86INTRINSICS==1
#include <x86intrin.h>
#define vec2_MAX(a,b) _mm_max_pd(a,b)
#define vecN_MAX vec2_MAX
#define vec2_sqrt(a) _mm_sqrt_pd(a)
#define vecN_sqrt vec2_sqrt
static inline Vec2d vec2_blend(Vec2d fa, Vec2d tr, Vec2m mask)
{
return _mm_blendv_pd(fa,tr, (Vec2d) mask);
}
#define vecN_blend vec2_blend
#else
#define vec2_blend vecN_blend
#endif
#ifdef USE_LIBSLEEF
#include <sleef.h>
#define vec2_exp(a) Sleef_expd2_u10(a)
#define vecN_exp vec2_exp
#define vec2_log(a) Sleef_logd2_u35(a)
#define vecN_log vec2_log
#ifndef USEX86INTRINSICS
#define vec2_MAX(a,b) Sleef_fmaxd2(a,b)
#define vecN_MAX vec2_MAX
#define vec2_sqrt(a) Sleef_sqrtd2_u35(a)
#define vecN_sqrt vec2_sqrt
#endif
#define vec2_fabs(a) Sleef_fabsd2(a)
#define vecN_fabs vec2_fabs
#define vec2_pow(a,b) Sleef_powd2_u10(a,vec2_SIMDTOVECTOR(b))
#define vecN_pow vec2_pow
#else
#ifdef HAS_LIBMVEC
Vec2d _ZGVbN2v_exp(Vec2d);
Vec2d _ZGVbN2v_log(Vec2d);
Vec2d _ZGVbN2vv_pow(Vec2d, Vec2d);
#define vec2_exp(a) _ZGVbN2v_exp(a)
#define vecN_exp vec2_exp
#define vec2_log(a) _ZGVbN2v_log(a)
#define vecN_log vec2_log
#define vec2_pow(a,b) _ZGVbN2vv_pow(a,b)
#define vecN_pow vec2_pow
#define vec2_fabs vecN_fabs
#endif /* HAS_LIBMVEC */
#endif /* not USE_LIBSLEEF */
#ifdef USE_SERIAL_FORM
#define vec2_SIMDTOVECTOR vecN_SIMDTOVECTOR
#define vec2_SIMDTOVECTORMASK vecN_SIMDTOVECTORMASK
#define vec2_StateAccess vecN_StateAccess
#define vec2_SIMDCOUNT vecN_SIMDCOUNT
#else
static inline Vec2d vec2_SIMDTOVECTOR(double val)
{
return (Vec2d) {val,val};
}
static inline Vec2m vec2_SIMDTOVECTORMASK(int val)
{
return (Vec2m) {val,val};
}
static inline Vec2d vec2_StateAccess(double* cktstate, Vec2m stateindexes)
{
return (Vec2d) {
cktstate[stateindexes[0]],
cktstate[stateindexes[1]]
};
}
static inline int vec2_SIMDCOUNT(Vec2m mask) {
return (mask[0] ? 1 : 0) + (mask[1] ? 1 : 0) ;
}
#define vecN_SIMDTOVECTOR vec2_SIMDTOVECTOR
#define vecN_SIMDTOVECTORMASK vec2_SIMDTOVECTORMASK
#define vecN_StateAccess vec2_StateAccess
#define vecN_SIMDCOUNT vec2_SIMDCOUNT
#endif
#define vec2_StateStore vecN_StateStore
#define vec2_StateAdd vecN_StateAdd
#define vec2_StateSub vecN_StateSub

View File

@ -43,7 +43,7 @@ static inline Vec4d vec4_blend(Vec4d fa, Vec4d tr, Vec4m mask)
#define vec4_blend vecN_blend #define vec4_blend vecN_blend
#endif #endif
#ifdef HAVE_LIBSLEEF #ifdef USE_LIBSLEEF
#include <sleef.h> #include <sleef.h>
#define vec4_exp(a) Sleef_expd4_u10(a) #define vec4_exp(a) Sleef_expd4_u10(a)
#define vecN_exp vec4_exp #define vecN_exp vec4_exp
@ -76,7 +76,7 @@ Vec4d _ZGVdN4vv_pow(Vec4d, Vec4d);
#define vec4_fabs vecN_fabs #define vec4_fabs vecN_fabs
#endif /* HAS_LIBMVEC */ #endif /* HAS_LIBMVEC */
#endif /* not HAVE_LIBSLEEF */ #endif /* not USE_LIBSLEEF */
#ifdef USE_SERIAL_FORM #ifdef USE_SERIAL_FORM

View File

@ -0,0 +1,109 @@
/*******************************************************************************
* Copyright 2020 Florian Ballenegger, Anamosic Ballenegger Design
*******************************************************************************
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
#ifdef USE_LIBSLEEF
#include <sleef.h>
#define vec8_exp(a) Sleef_expd8_u10(a)
#define vecN_exp vec8_exp
#define vec8_log(a) Sleef_logd8_u35(a)
#define vecN_log vec8_log
#ifndef USEX86INTRINSICS
#define vec8_MAX(a,b) Sleef_fmaxd8(a,b)
#define vecN_MAX vec8_MAX
#define vec8_sqrt(a) Sleef_sqrtd8_u35(a)
#define vecN_sqrt vec8_sqrt
#endif
#define vec8_fabs(a) Sleef_fabsd8(a)
#define vecN_fabs vec8_fabs
#define vec8_pow(a,b) Sleef_powd8_u10(a,vec8_SIMDTOVECTOR(b))
#define vecN_pow vec8_pow
#else
#define vec8_exp vecN_exp
#define vec8_log vecN_log
#define vec8_MAX vecN_MAX
#define vec8_sqrt vecN_sqrt
#define vec8_fabs vecN_fabs
#define vec8_pow vecN_pow
#endif /* USE_LIBSLEEF */
#ifdef USE_SERIAL_FORM
#define vec8_SIMDTOVECTOR vecN_SIMDTOVECTOR
#define vec8_SIMDTOVECTORMASK vecN_SIMDTOVECTORMASK
#define vec8_StateAccess vecN_StateAccess
#define vec8_SIMDCOUNT vecN_SIMDCOUNT
#else
static inline Vec8d vec8_SIMDTOVECTOR(double val)
{
return (Vec8d) {val,val,val,val,val,val,val,val};
}
static inline Vec8m vec8_SIMDTOVECTORMASK(int val)
{
return (Vec8m) {val,val,val,val,val,val,val,val};
}
static inline Vec8d vec8_StateAccess(double* cktstate, Vec8m stateindexes)
{
return (Vec8d) {
cktstate[stateindexes[0]],
cktstate[stateindexes[1]],
cktstate[stateindexes[2]],
cktstate[stateindexes[3]],
cktstate[stateindexes[4]],
cktstate[stateindexes[5]],
cktstate[stateindexes[6]],
cktstate[stateindexes[7]],
};
}
static inline int vec8_SIMDCOUNT(Vec8m mask) {
return ( mask[0] ? 1 : 0)
+ (mask[1] ? 1 : 0)
+ (mask[2] ? 1 : 0)
+ (mask[3] ? 1 : 0)
+ (mask[4] ? 1 : 0)
+ (mask[5] ? 1 : 0)
+ (mask[6] ? 1 : 0)
+ (mask[7] ? 1 : 0);
}
#define vecN_SIMDTOVECTOR vec8_SIMDTOVECTOR
#define vecN_SIMDTOVECTORMASK vec8_SIMDTOVECTORMASK
#define vecN_StateAccess vec8_StateAccess
#define vecN_SIMDCOUNT vec8_SIMDCOUNT
#endif
#define vec8_StateStore vecN_StateStore
#define vec8_StateAdd vecN_StateAdd
#define vec8_StateSub vecN_StateSub

View File

@ -39,7 +39,7 @@
#define vec4_powMJSWG(x,p) vec4_pow(x,p) #define vec4_powMJSWG(x,p) vec4_pow(x,p)
#ifdef HAVE_LIBSLEEF #ifdef USE_LIBSLEEF
#include <sleef.h> #include <sleef.h>
#define vec4_exp(a) Sleef_expd4_u10(a) #define vec4_exp(a) Sleef_expd4_u10(a)
#define vec4_log(a) Sleef_logd4_u35(a) #define vec4_log(a) Sleef_logd4_u35(a)
@ -50,7 +50,7 @@
#endif #endif
/* HAS_LIBMVEC and/or HAVE_LIBSLEEF defined from configure.ac */ /* HAS_LIBMVEC and/or USE_LIBSLEEF defined from configure.ac */
/* USE_SERIAL_FORM can be defined but has no performance influence */ /* USE_SERIAL_FORM can be defined but has no performance influence */
@ -73,7 +73,7 @@ static inline Vec4d vec4_blend(Vec4d fa, Vec4d tr, Vec4m mask)
} }
#endif #endif
#ifndef HAVE_LIBSLEEF #ifndef USE_LIBSLEEF
/******* vec4_exp, vec4_log *******/ /******* vec4_exp, vec4_log *******/
#ifdef HAS_LIBMVEC #ifdef HAS_LIBMVEC
Vec4d _ZGVdN4v_exp(Vec4d x); Vec4d _ZGVdN4v_exp(Vec4d x);
@ -161,7 +161,7 @@ static inline Vec4d vec4_pow(Vec4d x, double p)
return vec4_exp(vec4_log(x)*p); return vec4_exp(vec4_log(x)*p);
} }
#endif /* HAVE_LIBSLEEF */ #endif /* USE_LIBSLEEF */
/******* vec4_SIMDTOVECTOR, vec4_SIMDTOVECTORMASK *******/ /******* vec4_SIMDTOVECTOR, vec4_SIMDTOVECTORMASK *******/
#ifdef USE_SERIAL_FORM #ifdef USE_SERIAL_FORM