working on NSIMD=2/4/8 options
This commit is contained in:
parent
09d192eed2
commit
606cd59108
106
configure.ac
106
configure.ac
|
|
@ -1267,45 +1267,8 @@ if test "x$enable_modsimd" = xyes; then
|
|||
fi
|
||||
fi
|
||||
|
||||
# do we want to use SLEEF ?
|
||||
if test "x$enable_modsimd" = xyes; then
|
||||
if test "x$with_sleef" = xyes; then
|
||||
if test "x$nsimd" = x4; then
|
||||
AC_CHECK_HEADERS([sleef.h])
|
||||
AC_CHECK_LIB([sleef], [Sleef_logd4_u35],
|
||||
[AC_DEFINE([HAVE_LIBSLEEF], [], [Have SLEEF vector math library])
|
||||
LIBS="$LIBS -lsleef"
|
||||
have_sleef=yes
|
||||
AC_MSG_RESULT([Use SLEEF])
|
||||
], [have_sleef=no]
|
||||
)
|
||||
fi
|
||||
if test "x$nsimd" = x2; then
|
||||
AC_CHECK_HEADERS([sleef.h])
|
||||
AC_CHECK_LIB([sleef], [Sleef_logd2_u35],
|
||||
[AC_DEFINE([HAVE_LIBSLEEF], [], [Have SLEEF vector math library])
|
||||
LIBS="$LIBS -lsleef"
|
||||
have_sleef=yes
|
||||
AC_MSG_RESULT([Use SLEEF])
|
||||
], [have_sleef=no]
|
||||
)
|
||||
fi
|
||||
if test "x$nsimd" = x8; then
|
||||
AC_CHECK_HEADERS([sleef.h])
|
||||
AC_CHECK_LIB([sleef], [Sleef_logd8_u35],
|
||||
[AC_DEFINE([HAVE_LIBSLEEF], [], [Have SLEEF vector math library])
|
||||
LIBS="$LIBS -lsleef"
|
||||
have_sleef=yes
|
||||
AC_MSG_RESULT([Use SLEEF])
|
||||
], [have_sleef=no]
|
||||
)
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# otherwise test if we can use libmvec or equivalent
|
||||
# test if we can use libmvec or equivalent
|
||||
if test "x$have_intrinsics" = xyes; then
|
||||
if test "x$have_sleef" != xyes; then
|
||||
AC_MSG_CHECKING([vector math library libmvec])
|
||||
have_libmvec=no
|
||||
if test "x$nsimd" = x4; then
|
||||
|
|
@ -1350,8 +1313,64 @@ if test "x$have_sleef" != xyes; then
|
|||
have_libmvec=no
|
||||
])
|
||||
fi
|
||||
|
||||
fi
|
||||
|
||||
# do we want to use SLEEF ?
|
||||
if test "x$have_libmvec" = xno; then
|
||||
if test "x$with_sleef" != xno && test "x$with_sleef" != xyes; then
|
||||
with_sleef=try;
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "x$enable_modsimd" = xyes; then
|
||||
if test "x$with_sleef" = xyes || test "x$with_sleef" = xtry; then
|
||||
if test "x$nsimd" = x4; then
|
||||
AC_CHECK_LIB([sleef], [Sleef_logd4_u35],
|
||||
[AC_DEFINE([HAVE_LIBSLEEF], [], [Have SLEEF vector math library])
|
||||
LIBS="$LIBS -lsleef"
|
||||
have_sleef=yes
|
||||
AC_MSG_RESULT([Use SLEEF])
|
||||
], [have_sleef=no]
|
||||
)
|
||||
fi
|
||||
if test "x$nsimd" = x2; then
|
||||
AC_CHECK_HEADERS([sleef.h])
|
||||
AC_CHECK_LIB([sleef], [Sleef_logd2_u35],
|
||||
[AC_DEFINE([HAVE_LIBSLEEF], [], [Have SLEEF vector math library])
|
||||
LIBS="$LIBS -lsleef"
|
||||
have_sleef=yes
|
||||
AC_MSG_RESULT([Use SLEEF])
|
||||
], [have_sleef=no]
|
||||
)
|
||||
fi
|
||||
if test "x$nsimd" = x8; then
|
||||
AC_CHECK_HEADERS([sleef.h])
|
||||
AC_CHECK_LIB([sleef], [Sleef_logd8_u35])
|
||||
AC_MSG_CHECKING([vector math library sleef with vector width 8])
|
||||
AC_LINK_IFELSE([
|
||||
AC_LANG_PROGRAM(
|
||||
[[#include <sleef.h>]],
|
||||
[[__m512d x; x=Sleef_logd8_u35(x);]]
|
||||
)] ,[
|
||||
AC_MSG_RESULT([yes])
|
||||
have_sleef=yes
|
||||
],[
|
||||
AC_MSG_RESULT([no])
|
||||
have_sleef=no
|
||||
] )
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "x$have_sleef" = xno; then
|
||||
if test "x$with_sleef" = xyes; then
|
||||
AC_MSG_ERROR([SLEEF requested but can't be used, try without-sleef])
|
||||
fi
|
||||
fi
|
||||
|
||||
#disable libmvec if specified to use sleef
|
||||
if test "x$have_sleef" = xyes; then
|
||||
have_libmvec=no
|
||||
fi
|
||||
|
||||
# adjust compiler flags for MODSIMD
|
||||
|
|
@ -1403,8 +1422,13 @@ if test "x$enable_modsimd" = xyes; then
|
|||
AC_MSG_RESULT([Use libmvec])
|
||||
fi
|
||||
|
||||
if test "x$have_libmvec" = xno; then
|
||||
if test "x$have_sleef" = xno; then
|
||||
if test "x$have_sleef" = xyes; then
|
||||
AC_DEFINE([USE_LIBSLEEF],[1], [use SLEEF for vector math])
|
||||
AC_MSG_RESULT([Use SLEEF])
|
||||
fi
|
||||
|
||||
if test "x$have_libmvec" != xyes; then
|
||||
if test "x$have_sleef" != xyes; then
|
||||
AC_MSG_WARN([No math vector library detected (sleef or libmvec). Rely on compiler's auto-vectorization of math functions.])
|
||||
fi
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -0,0 +1,118 @@
|
|||
/*******************************************************************************
|
||||
* Copyright 2020 Florian Ballenegger, Anamosic Ballenegger Design
|
||||
*******************************************************************************
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
******************************************************************************/
|
||||
|
||||
#if USEX86INTRINSICS==1
|
||||
#include <x86intrin.h>
|
||||
#define vec2_MAX(a,b) _mm_max_pd(a,b)
|
||||
#define vecN_MAX vec2_MAX
|
||||
#define vec2_sqrt(a) _mm_sqrt_pd(a)
|
||||
#define vecN_sqrt vec2_sqrt
|
||||
static inline Vec2d vec2_blend(Vec2d fa, Vec2d tr, Vec2m mask)
|
||||
{
|
||||
return _mm_blendv_pd(fa,tr, (Vec2d) mask);
|
||||
}
|
||||
#define vecN_blend vec2_blend
|
||||
#else
|
||||
#define vec2_blend vecN_blend
|
||||
#endif
|
||||
|
||||
#ifdef USE_LIBSLEEF
|
||||
#include <sleef.h>
|
||||
#define vec2_exp(a) Sleef_expd2_u10(a)
|
||||
#define vecN_exp vec2_exp
|
||||
#define vec2_log(a) Sleef_logd2_u35(a)
|
||||
#define vecN_log vec2_log
|
||||
#ifndef USEX86INTRINSICS
|
||||
#define vec2_MAX(a,b) Sleef_fmaxd2(a,b)
|
||||
#define vecN_MAX vec2_MAX
|
||||
#define vec2_sqrt(a) Sleef_sqrtd2_u35(a)
|
||||
#define vecN_sqrt vec2_sqrt
|
||||
#endif
|
||||
#define vec2_fabs(a) Sleef_fabsd2(a)
|
||||
#define vecN_fabs vec2_fabs
|
||||
#define vec2_pow(a,b) Sleef_powd2_u10(a,vec2_SIMDTOVECTOR(b))
|
||||
#define vecN_pow vec2_pow
|
||||
|
||||
#else
|
||||
|
||||
#ifdef HAS_LIBMVEC
|
||||
Vec2d _ZGVbN2v_exp(Vec2d);
|
||||
Vec2d _ZGVbN2v_log(Vec2d);
|
||||
Vec2d _ZGVbN2vv_pow(Vec2d, Vec2d);
|
||||
|
||||
#define vec2_exp(a) _ZGVbN2v_exp(a)
|
||||
#define vecN_exp vec2_exp
|
||||
#define vec2_log(a) _ZGVbN2v_log(a)
|
||||
#define vecN_log vec2_log
|
||||
#define vec2_pow(a,b) _ZGVbN2vv_pow(a,b)
|
||||
#define vecN_pow vec2_pow
|
||||
#define vec2_fabs vecN_fabs
|
||||
|
||||
#endif /* HAS_LIBMVEC */
|
||||
#endif /* not USE_LIBSLEEF */
|
||||
|
||||
#ifdef USE_SERIAL_FORM
|
||||
|
||||
#define vec2_SIMDTOVECTOR vecN_SIMDTOVECTOR
|
||||
#define vec2_SIMDTOVECTORMASK vecN_SIMDTOVECTORMASK
|
||||
#define vec2_StateAccess vecN_StateAccess
|
||||
#define vec2_SIMDCOUNT vecN_SIMDCOUNT
|
||||
|
||||
#else
|
||||
|
||||
static inline Vec2d vec2_SIMDTOVECTOR(double val)
|
||||
{
|
||||
return (Vec2d) {val,val};
|
||||
}
|
||||
static inline Vec2m vec2_SIMDTOVECTORMASK(int val)
|
||||
{
|
||||
return (Vec2m) {val,val};
|
||||
}
|
||||
static inline Vec2d vec2_StateAccess(double* cktstate, Vec2m stateindexes)
|
||||
{
|
||||
return (Vec2d) {
|
||||
cktstate[stateindexes[0]],
|
||||
cktstate[stateindexes[1]]
|
||||
};
|
||||
}
|
||||
static inline int vec2_SIMDCOUNT(Vec2m mask) {
|
||||
return (mask[0] ? 1 : 0) + (mask[1] ? 1 : 0) ;
|
||||
}
|
||||
#define vecN_SIMDTOVECTOR vec2_SIMDTOVECTOR
|
||||
#define vecN_SIMDTOVECTORMASK vec2_SIMDTOVECTORMASK
|
||||
#define vecN_StateAccess vec2_StateAccess
|
||||
#define vecN_SIMDCOUNT vec2_SIMDCOUNT
|
||||
|
||||
#endif
|
||||
|
||||
#define vec2_StateStore vecN_StateStore
|
||||
#define vec2_StateAdd vecN_StateAdd
|
||||
#define vec2_StateSub vecN_StateSub
|
||||
|
||||
|
|
@ -43,7 +43,7 @@ static inline Vec4d vec4_blend(Vec4d fa, Vec4d tr, Vec4m mask)
|
|||
#define vec4_blend vecN_blend
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_LIBSLEEF
|
||||
#ifdef USE_LIBSLEEF
|
||||
#include <sleef.h>
|
||||
#define vec4_exp(a) Sleef_expd4_u10(a)
|
||||
#define vecN_exp vec4_exp
|
||||
|
|
@ -76,7 +76,7 @@ Vec4d _ZGVdN4vv_pow(Vec4d, Vec4d);
|
|||
#define vec4_fabs vecN_fabs
|
||||
|
||||
#endif /* HAS_LIBMVEC */
|
||||
#endif /* not HAVE_LIBSLEEF */
|
||||
#endif /* not USE_LIBSLEEF */
|
||||
|
||||
#ifdef USE_SERIAL_FORM
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,109 @@
|
|||
/*******************************************************************************
|
||||
* Copyright 2020 Florian Ballenegger, Anamosic Ballenegger Design
|
||||
*******************************************************************************
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
******************************************************************************/
|
||||
|
||||
#ifdef USE_LIBSLEEF
|
||||
#include <sleef.h>
|
||||
#define vec8_exp(a) Sleef_expd8_u10(a)
|
||||
#define vecN_exp vec8_exp
|
||||
#define vec8_log(a) Sleef_logd8_u35(a)
|
||||
#define vecN_log vec8_log
|
||||
#ifndef USEX86INTRINSICS
|
||||
#define vec8_MAX(a,b) Sleef_fmaxd8(a,b)
|
||||
#define vecN_MAX vec8_MAX
|
||||
#define vec8_sqrt(a) Sleef_sqrtd8_u35(a)
|
||||
#define vecN_sqrt vec8_sqrt
|
||||
#endif
|
||||
#define vec8_fabs(a) Sleef_fabsd8(a)
|
||||
#define vecN_fabs vec8_fabs
|
||||
#define vec8_pow(a,b) Sleef_powd8_u10(a,vec8_SIMDTOVECTOR(b))
|
||||
#define vecN_pow vec8_pow
|
||||
|
||||
#else
|
||||
|
||||
#define vec8_exp vecN_exp
|
||||
#define vec8_log vecN_log
|
||||
#define vec8_MAX vecN_MAX
|
||||
#define vec8_sqrt vecN_sqrt
|
||||
#define vec8_fabs vecN_fabs
|
||||
#define vec8_pow vecN_pow
|
||||
|
||||
#endif /* USE_LIBSLEEF */
|
||||
|
||||
#ifdef USE_SERIAL_FORM
|
||||
|
||||
#define vec8_SIMDTOVECTOR vecN_SIMDTOVECTOR
|
||||
#define vec8_SIMDTOVECTORMASK vecN_SIMDTOVECTORMASK
|
||||
#define vec8_StateAccess vecN_StateAccess
|
||||
#define vec8_SIMDCOUNT vecN_SIMDCOUNT
|
||||
|
||||
#else
|
||||
|
||||
static inline Vec8d vec8_SIMDTOVECTOR(double val)
|
||||
{
|
||||
return (Vec8d) {val,val,val,val,val,val,val,val};
|
||||
}
|
||||
static inline Vec8m vec8_SIMDTOVECTORMASK(int val)
|
||||
{
|
||||
return (Vec8m) {val,val,val,val,val,val,val,val};
|
||||
}
|
||||
static inline Vec8d vec8_StateAccess(double* cktstate, Vec8m stateindexes)
|
||||
{
|
||||
return (Vec8d) {
|
||||
cktstate[stateindexes[0]],
|
||||
cktstate[stateindexes[1]],
|
||||
cktstate[stateindexes[2]],
|
||||
cktstate[stateindexes[3]],
|
||||
cktstate[stateindexes[4]],
|
||||
cktstate[stateindexes[5]],
|
||||
cktstate[stateindexes[6]],
|
||||
cktstate[stateindexes[7]],
|
||||
};
|
||||
}
|
||||
static inline int vec8_SIMDCOUNT(Vec8m mask) {
|
||||
return ( mask[0] ? 1 : 0)
|
||||
+ (mask[1] ? 1 : 0)
|
||||
+ (mask[2] ? 1 : 0)
|
||||
+ (mask[3] ? 1 : 0)
|
||||
+ (mask[4] ? 1 : 0)
|
||||
+ (mask[5] ? 1 : 0)
|
||||
+ (mask[6] ? 1 : 0)
|
||||
+ (mask[7] ? 1 : 0);
|
||||
}
|
||||
#define vecN_SIMDTOVECTOR vec8_SIMDTOVECTOR
|
||||
#define vecN_SIMDTOVECTORMASK vec8_SIMDTOVECTORMASK
|
||||
#define vecN_StateAccess vec8_StateAccess
|
||||
#define vecN_SIMDCOUNT vec8_SIMDCOUNT
|
||||
|
||||
#endif
|
||||
|
||||
#define vec8_StateStore vecN_StateStore
|
||||
#define vec8_StateAdd vecN_StateAdd
|
||||
#define vec8_StateSub vecN_StateSub
|
||||
|
||||
|
|
@ -39,7 +39,7 @@
|
|||
#define vec4_powMJSWG(x,p) vec4_pow(x,p)
|
||||
|
||||
|
||||
#ifdef HAVE_LIBSLEEF
|
||||
#ifdef USE_LIBSLEEF
|
||||
#include <sleef.h>
|
||||
#define vec4_exp(a) Sleef_expd4_u10(a)
|
||||
#define vec4_log(a) Sleef_logd4_u35(a)
|
||||
|
|
@ -50,7 +50,7 @@
|
|||
#endif
|
||||
|
||||
|
||||
/* HAS_LIBMVEC and/or HAVE_LIBSLEEF defined from configure.ac */
|
||||
/* HAS_LIBMVEC and/or USE_LIBSLEEF defined from configure.ac */
|
||||
|
||||
/* USE_SERIAL_FORM can be defined but has no performance influence */
|
||||
|
||||
|
|
@ -73,7 +73,7 @@ static inline Vec4d vec4_blend(Vec4d fa, Vec4d tr, Vec4m mask)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_LIBSLEEF
|
||||
#ifndef USE_LIBSLEEF
|
||||
/******* vec4_exp, vec4_log *******/
|
||||
#ifdef HAS_LIBMVEC
|
||||
Vec4d _ZGVdN4v_exp(Vec4d x);
|
||||
|
|
@ -161,7 +161,7 @@ static inline Vec4d vec4_pow(Vec4d x, double p)
|
|||
return vec4_exp(vec4_log(x)*p);
|
||||
}
|
||||
|
||||
#endif /* HAVE_LIBSLEEF */
|
||||
#endif /* USE_LIBSLEEF */
|
||||
|
||||
/******* vec4_SIMDTOVECTOR, vec4_SIMDTOVECTORMASK *******/
|
||||
#ifdef USE_SERIAL_FORM
|
||||
|
|
|
|||
Loading…
Reference in New Issue