enable simd

This commit is contained in:
Holger Vogt 2020-07-12 10:24:26 +02:00
parent 0d34bb52cc
commit f12d2a3f0d
13 changed files with 25205 additions and 2 deletions

View File

@ -60,7 +60,7 @@ else
echo "configuring for 64 bit release"
echo
# You may add --enable-adms to the following command for adding adms generated devices
../configure --with-wingui --enable-xspice --enable-cider --enable-openmp --disable-debug prefix="C:/Spice64" CFLAGS="-m64 -O3 -march=native " LDFLAGS="-m64 -s"
../configure --with-wingui --enable-xspice --enable-cider --enable-openmp --disable-debug prefix="C:/Spice64" CFLAGS="-m64 -O3 -mtune=native " LDFLAGS="-m64 -s"
fi
if [ $? -ne 0 ]; then echo "../configure failed"; exit 1 ; fi

View File

@ -0,0 +1,104 @@
inline int BSIM3v32_ACM_saturationCurrents
(
BSIM3v32model *model,
BSIM3v32instance *here,
double *DrainSatCurrent,
double *SourceSatCurrent
)
{
return ACM_saturationCurrents(
model->BSIM3v32acmMod,
model->BSIM3v32calcacm,
here->BSIM3v32geo,
model->BSIM3v32hdif,
model->BSIM3v32wmlt,
here->BSIM3v32w,
model->BSIM3v32xw,
model->BSIM3v32jctTempSatCurDensity,
model->BSIM3v32jctSidewallTempSatCurDensity,
here->BSIM3v32drainAreaGiven,
here->BSIM3v32drainArea,
here->BSIM3v32drainPerimeterGiven,
here->BSIM3v32drainPerimeter,
here->BSIM3v32sourceAreaGiven,
here->BSIM3v32sourceArea,
here->BSIM3v32sourcePerimeterGiven,
here->BSIM3v32sourcePerimeter,
DrainSatCurrent,
SourceSatCurrent
);
}
inline int BSIM3v32_ACM_junctionCapacitances
(
BSIM3v32model *model,
BSIM3v32instance *here,
double *areaDrainBulkCapacitance,
double *periDrainBulkCapacitance,
double *gateDrainBulkCapacitance,
double *areaSourceBulkCapacitance,
double *periSourceBulkCapacitance,
double *gateSourceBulkCapacitance
)
{
switch (model->BSIM3v32intVersion) {
case BSIM3v32V324:
case BSIM3v32V323:
return ACM_junctionCapacitances(
model->BSIM3v32acmMod,
model->BSIM3v32calcacm,
here->BSIM3v32geo,
model->BSIM3v32hdif,
model->BSIM3v32wmlt,
here->BSIM3v32w,
model->BSIM3v32xw,
here->BSIM3v32drainAreaGiven,
here->BSIM3v32drainArea,
here->BSIM3v32drainPerimeterGiven,
here->BSIM3v32drainPerimeter,
here->BSIM3v32sourceAreaGiven,
here->BSIM3v32sourceArea,
here->BSIM3v32sourcePerimeterGiven,
here->BSIM3v32sourcePerimeter,
model->BSIM3v32unitAreaTempJctCap,
model->BSIM3v32unitLengthSidewallTempJctCap,
model->BSIM3v32unitLengthGateSidewallTempJctCap,
areaDrainBulkCapacitance,
periDrainBulkCapacitance,
gateDrainBulkCapacitance,
areaSourceBulkCapacitance,
periSourceBulkCapacitance,
gateSourceBulkCapacitance
);
case BSIM3v32V322:
case BSIM3v32V32:
default:
return ACM_junctionCapacitances(
model->BSIM3v32acmMod,
model->BSIM3v32calcacm,
here->BSIM3v32geo,
model->BSIM3v32hdif,
model->BSIM3v32wmlt,
here->BSIM3v32w,
model->BSIM3v32xw,
here->BSIM3v32drainAreaGiven,
here->BSIM3v32drainArea,
here->BSIM3v32drainPerimeterGiven,
here->BSIM3v32drainPerimeter,
here->BSIM3v32sourceAreaGiven,
here->BSIM3v32sourceArea,
here->BSIM3v32sourcePerimeterGiven,
here->BSIM3v32sourcePerimeter,
model->BSIM3v32unitAreaJctCap,
model->BSIM3v32unitLengthSidewallJctCap,
model->BSIM3v32unitLengthGateSidewallJctCap,
areaDrainBulkCapacitance,
periDrainBulkCapacitance,
gateDrainBulkCapacitance,
areaSourceBulkCapacitance,
periSourceBulkCapacitance,
gateSourceBulkCapacitance
);
}
}

View File

@ -0,0 +1,25 @@
#ifndef BSIM3v32ACM_H
#define BSIM3v32ACM_H
int BSIM3v32_ACM_saturationCurrents
(
BSIM3v32model *model,
BSIM3v32instance *here,
double *DrainSatCurrent,
double *SourceSatCurrent
);
int BSIM3v32_ACM_junctionCapacitances
(
BSIM3v32model *model,
BSIM3v32instance *here,
double *areaDrainBulkCapacitance,
double *periDrainBulkCapacitance,
double *gateDrainBulkCapacitance,
double *areaSourceBulkCapacitance,
double *periSourceBulkCapacitance,
double *gateSourceBulkCapacitance
);
#endif

View File

@ -0,0 +1,262 @@
/*******************************************************************************
* Copyright 2020 Florian Ballenegger, Anamosic Ballenegger Design
*******************************************************************************
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
#include "ngspice/ngspice.h"
#include "bsim3v32def.h"
/* NSIMD = number of // instances evaluated (=4 for AVX2) */
#define NDATASIMD 7
#define DEBUG 0
#ifndef USE_OMP
extern int BSIM3v32LoadSeq(BSIM3v32instance *here, CKTcircuit *ckt, double* data, int stride);
extern int BSIM3v32LoadSIMD(BSIM3v32instance **heres, CKTcircuit *ckt, double data[7][NSIMD]);
#else
extern void BSIM3v32LoadRhsMat(GENmodel *inModel, CKTcircuit *ckt);
extern int BSIM3v32LoadSeq(BSIM3v32instance *here, CKTcircuit *ckt, int);
extern int BSIM3v32LoadSIMD(BSIM3v32instance **heres, CKTcircuit *ckt);
#endif
#ifndef USE_OMP
int
BSIM3v32loadSel (GENmodel *inModel, CKTcircuit *ckt)
{
#ifndef USE_OMP
double data[NDATASIMD][NSIMD];
#endif
BSIM3v32group *group; /* a group of instance of same model, same pParam, same nqsMode, same geo and same off */
BSIM3v32model *model = (BSIM3v32model*)inModel;
BSIM3v32instance* heres[NSIMD];
for (; model != NULL; model = BSIM3v32nextModel(model))
for (group=model->groupHead; group!=NULL; group=group->next)
{
int idx=0;
while(idx+NSIMD <= group->InstCount)
{
int count=0;
while((count<NSIMD) && (idx<group->InstCount))
{
data[0][count]=NAN;
heres[count] = group->InstArray[idx];
int local_error = BSIM3v32LoadSeq(group->InstArray[idx++],ckt,
&data[0][count],NSIMD
);
if (local_error) return local_error;
if(!isnan(data[0][count]))
{
count++;
}
}
if(count==NSIMD)
{
int local_error;
/* process NSIMD instances at once */
local_error = BSIM3v32LoadSIMD(heres, ckt, data);
if (local_error) return local_error;
}
else for(int i=0;i<count;i++)
{
int local_error = BSIM3v32LoadSeq(heres[i], ckt, NULL,0);
if (local_error) return local_error;
}
}
/* remaining instances are evaluated sequencially */
for (; idx < group->InstCount; idx++) {
int local_error = BSIM3v32LoadSeq(group->InstArray[idx], ckt,
NULL, 0);
if (local_error) return local_error;
}
}
return 0; /* no error */
}
#endif
#ifdef USE_OMP
int
BSIM3v32loadSel (GENmodel *inModel, CKTcircuit *ckt)
{
/*
This version do omp parallel only inside groups
*/
BSIM3v32group *group;
BSIM3v32model *model = (BSIM3v32model*)inModel;
int error=0;
int idx=0;
for (; model != NULL; model = BSIM3v32nextModel(model))
for (group=model->groupHead; group!=NULL; group=group->next)
{
#pragma omp parallel for
for (idx=0; idx <= group->InstCount-NSIMD; idx+=NSIMD)
{
int local_error;
int i;
int needeval=0;
for(i=0;i<NSIMD;i++)
{
group->InstArray[idx+i]->BSIM3v32SIMDCheck=-1;
local_error = BSIM3v32LoadSeq(group->InstArray[idx+i], ckt, 1);
if (local_error) error = local_error;
if(group->InstArray[idx+i]->BSIM3v32SIMDCheck!=-1)
needeval=1;
}
if(!needeval)
continue; /* all NSIMD instances are bypassed */
local_error = BSIM3v32LoadSIMD(&group->InstArray[idx], ckt);
if (local_error) error = local_error;
}
/* omp mess with idx val after the for loop above, so we recalc it */
idx = NSIMD*(group->InstCount/NSIMD);
for (; idx < group->InstCount; idx++) {
int local_error = BSIM3v32LoadSeq(group->InstArray[idx], ckt, 2);
if (local_error) error = local_error;
}
}
BSIM3v32LoadRhsMat(inModel, ckt);
return error;
}
#if 0
int
BSIM3v32loadSelVrai (GENmodel *inModel, CKTcircuit *ckt)
{
/*
This version do omp parallel for most instances of all models combined
*/
BSIM3v32group *group;
BSIM3v32model *model = (BSIM3v32model*)inModel;
int idx;
int error = 0;
int nsimd,nsisd;
/* pre load all instances */
if(DEBUG) printf("loadomp %d\n",model->BSIM3v32InstCount);
#pragma omp parallel for
for (idx = 0; idx < model->BSIM3v32InstCount; idx++) {
BSIM3v32instance *here = model->BSIM3v32InstanceArray[idx];
if(DEBUG) printf("loadomp preload seq %d\n",idx);
here->BSIM3v32SIMDCheck=-1;
int local_error = BSIM3v32LoadSeq(here,ckt,1);
if (local_error) error=local_error;
}
if (error) printf("load error\n");
if (error) return error;
/* sort instances to run in SIMD */
nsimd=0;
nsisd=0;
for (model = (BSIM3v32model*)inModel; model != NULL; model = BSIM3v32nextModel(model))
for (group=model->groupHead; group!=NULL; group=group->next)
{
int rev=group->InstCount;
group->SimdCount = 0;
for(idx=0;idx<group->InstCount;idx++)
{
BSIM3v32instance *here = group->InstArray[idx];
if(here->BSIM3v32SIMDCheck==-1)
{
/* bypassed, swap current inst to the end */
rev--;
group->InstArray[idx] = group->InstArray[rev];
group->InstArray[rev] = here;
}
}
group->EvalCount = rev;
group->SimdCount = rev/NSIMD;
nsimd += group->SimdCount;
nsisd += rev - NSIMD*group->SimdCount;
}
if(DEBUG) printf("nsimd=%d nsisd=%d\n",nsimd,nsisd);
/* run SIMD in parallel */
#pragma omp parallel for
for(idx=0;idx<nsimd;idx++)
{
if(DEBUG) printf("Search SIMD index %d\n", idx);
int search=idx;
BSIM3v32model* mod;
BSIM3v32group* grp;
for (mod = (BSIM3v32model*)inModel; mod != NULL; mod = BSIM3v32nextModel(mod))
for (grp=mod->groupHead; grp!=NULL; grp=grp->next)
{
if(search>=0 && search < grp->SimdCount)
{
BSIM3v32instance** heres = &grp->InstArray[search*NSIMD];
if(DEBUG) printf("Call Simd index %d of %d\n", search*NSIMD, grp->InstCount);
int local_error = BSIM3v32LoadSIMD(heres, ckt);
if(DEBUG) printf("Call ended\n");
if(local_error) error=local_error;
}
search -= grp->SimdCount;
}
}
if(error) return error;
if(DEBUG) printf("now switch to sisd\n");
/* run remaining SISD in parallel */
#pragma omp parallel for
for(idx=0;idx<nsisd;idx++)
{
int search=idx;
BSIM3v32model* mod;
BSIM3v32group* grp;
for (mod = (BSIM3v32model*)inModel; mod != NULL; mod = BSIM3v32nextModel(mod))
for (grp=mod->groupHead; grp!=NULL; grp=grp->next)
{
int n = grp->EvalCount - grp->SimdCount*NSIMD;
if(search>=0 && search < n)
{
if(DEBUG) printf("Call seq index %d of %d\n", search + grp->SimdCount*NSIMD,grp->InstCount);
int local_error = BSIM3v32LoadSeq(grp->InstArray[search + grp->SimdCount*NSIMD], ckt, 0);
if(DEBUG) printf("Call ended\n");
if(local_error) error=local_error;
}
search -= n;
}
}
if(DEBUG) printf("Now write the matrix\n");
/* Write in matrix sequentially */
BSIM3v32LoadRhsMat(inModel, ckt);
return error;
}
#endif
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,347 @@
/*******************************************************************************
* Copyright 2020 Florian Ballenegger, Anamosic Ballenegger Design
*******************************************************************************
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
#include <math.h>
#include <x86intrin.h>
#include <signal.h>
#include "ngspice/ngspice.h"
#include "ngspice/cktdefs.h"
#include "bsim3v32def.h"
#include "b3v32acm.h"
#include "ngspice/trandefs.h"
#include "ngspice/const.h"
#include "ngspice/sperror.h"
#include "ngspice/devdefs.h"
#include "ngspice/suffix.h"
#define MAX_EXP 5.834617425e14
#define MIN_EXP 1.713908431e-15
#define EXP_THRESHOLD 34.0
#define EPSOX 3.453133e-11
#define EPSSI 1.03594e-10
#define Charge_q 1.60219e-19
#define DELTA_1 0.02
#define DELTA_2 0.02
#define DELTA_3 0.02
#define DELTA_4 0.02
#define USEX86INTRINSICS 1
typedef double Vec4d __attribute__ ((vector_size (sizeof(double)*NSIMD), aligned (sizeof(double)*NSIMD)));
typedef long int Vec4m __attribute__ ((vector_size (sizeof(double)*NSIMD), aligned (sizeof(double)*NSIMD)));
#define SIMDANY(err) (err!=0)
#define SIMDIFYCMD(cmd) /* empty */
#define SIMDifySaveScope(sc) /* empty */
#define vec4_pow0p7(x,p) vec4_mypow(x,p)
#define vec4_powMJ(x,p) vec4_mypow(x,p)
#define vec4_powMJSW(x,p) vec4_mypow(x,p)
#define vec4_powMJSWG(x,p) vec4_mypow(x,p)
#if USEX86INTRINSICS==1
/* libmvec prototypes */
/* Caution: those libmvec functions are not as precise as std libm */
__m256d _ZGVdN4v_exp(__m256d x);
__m256d _ZGVdN4v_log(__m256d x);
#define vec4_MAX(a,b) _mm256_max_pd(a,b)
#define vec4_exp(a) _ZGVdN4v_exp(a)
#define vec4_log(a) _ZGVdN4v_log(a)
#define vec4_sqrt(a) _mm256_sqrt_pd(a)
static inline Vec4d vec4_blend(Vec4d fa, Vec4d tr, Vec4m mask)
{
return _mm256_blendv_pd(fa,tr, (Vec4d) mask);
}
static inline Vec4d vec4_fabs(Vec4d x)
{
return vec4_blend(x,-x,x<0);
}
#else
/* vector-libm prototypes */
Vec4d vec4_exp_vectorlibm(Vec4d x); /* defined in vec4_exp.c */
Vec4d vec4_log_vectorlibm(Vec4d x); /* defined in vec4_log.c */
#define vec4_exp(a) vec4_exp_vectorlibm(a)
#define vec4_log(a) vec4_log_vectorlibm(a)
static inline Vec4d vec4_MAX(Vec4d a, Vec4d b)
{
return vec4_blend(a,b,a<b);
}
static inline Vec4d vec4_blend(Vec4d fa, Vec4d tr, Vec4m mask)
{
/* hope for good vectorization by the compiler ! */
Vec4d res;
#pragma omp simd
for(int i=0;i<4;i++)
{
res[i] = mask[i] ? tr[i] : fa[i];
}
return res;
}
static inline Vec4d vec4_fabs(Vec4d x)
{
/* hope for good vectorization by the compiler ! */
Vec4d res;
#pragma omp simd
for(int i=0;i<4;i++)
{
res[i] = (x[i] < 0) ? -x[i] : x[i];
}
return res;
}
static inline Vec4d vec4_sqrt(Vec4d x)
{
/* hope for good vectorization by the compiler ! */
Vec4d res;
#pragma omp simd
for(int i=0;i<4;i++)
{
res[i] = sqrt(x[i]);
}
return res;
}
#endif
static inline Vec4d vec4_mypow(Vec4d x, double p)
{
return vec4_exp(vec4_log(x)*p);
}
/* some debug utils functions */
void vec4_printd(const char* msg, const char* name, Vec4d vecd)
{
printf("%s %s %g %g %g %g\n",msg,name,vecd[0],vecd[1],vecd[2],vecd[3]);
}
void vec4_printm(const char* msg, const char* name, Vec4m vecm)
{
printf("%s %s %ld %ld %ld %ld\n",msg,name,vecm[0],vecm[1],vecm[2],vecm[3]);
}
void vec4_CheckCollisions(Vec4m stateindexes, const char* msg)
{
for(int i=0;i<NSIMD;i++)
for(int j=0;j<NSIMD;j++)
if(i!=j)
if(stateindexes[i]==stateindexes[j])
{
printf("%s, collisions %ld %ld %ld %ld!\n",msg,stateindexes[0],stateindexes[1],stateindexes[2],stateindexes[3]);
raise(SIGINT);
}
}
/* useful vectorized functions */
static inline Vec4d SIMDLOADDATA(int idx, double data[7][NSIMD])
{
return (Vec4d) {data[idx][0],data[idx][1],data[idx][2],data[idx][3]};
}
static inline Vec4d vec4_BSIM3v32_StateAccess(double* cktstate, Vec4m stateindexes)
{
return (Vec4d) {
cktstate[stateindexes[0]],
cktstate[stateindexes[1]],
cktstate[stateindexes[2]],
cktstate[stateindexes[3]]
};
}
static inline void vec4_BSIM3v32_StateStore(double* cktstate, Vec4m stateindexes, Vec4d values)
{
if(0) vec4_CheckCollisions(stateindexes,"SateStore");
for(int idx=0;idx<NSIMD;idx++)
{
cktstate[stateindexes[idx]] = values[idx];
}
}
static inline void vec4_BSIM3v32_StateAdd(double* cktstate, Vec4m stateindexes, Vec4d values)
{
if(0) vec4_CheckCollisions(stateindexes,"StateAdd");
for(int idx=0;idx<NSIMD;idx++)
{
cktstate[stateindexes[idx]] += values[idx];
}
}
static inline void vec4_BSIM3v32_StateSub(double* cktstate, Vec4m stateindexes, Vec4d values)
{
if(0) vec4_CheckCollisions(stateindexes,"StateSub");
for(int idx=0;idx<NSIMD;idx++)
{
cktstate[stateindexes[idx]] -= values[idx];
}
}
static inline Vec4d vec4_exp_seq(Vec4d val)
{
return (Vec4d) {exp(val[0]),exp(val[1]),exp(val[2]),exp(val[3])};
}
static inline Vec4d vec4_log_seq(Vec4d val)
{
return (Vec4d) {log(val[0]),log(val[1]),log(val[2]),log(val[3])};
}
static inline Vec4d vec4_sqrt_seq(Vec4d val)
{
return (Vec4d) {sqrt(val[0]),sqrt(val[1]),sqrt(val[2]),sqrt(val[3])};
}
static inline Vec4d vec4_MAX_seq(Vec4d a, Vec4d b)
{
return (Vec4d) {MAX(a[0],b[0]),MAX(a[1],b[1]),MAX(a[2],b[2]),MAX(a[3],b[3])};
}
static inline int vec4_BSIM3v32_ACM_saturationCurrents
(
BSIM3v32model *model,
BSIM3v32instance **heres,
Vec4d *DrainSatCurrent,
Vec4d *SourceSatCurrent
)
{
int error;
double dsat,ssat;
for(int idx=0;idx<NSIMD;idx++)
{
error = BSIM3v32_ACM_saturationCurrents(
model, heres[idx],
&dsat,
&ssat
);
(*DrainSatCurrent)[idx] = dsat;
(*SourceSatCurrent)[idx] = ssat;
if(error) return error;
}
return error;
}
static inline int vec4_BSIM3v32_ACM_junctionCapacitances(
BSIM3v32model *model,
BSIM3v32instance **heres,
Vec4d *areaDrainBulkCapacitance,
Vec4d *periDrainBulkCapacitance,
Vec4d *gateDrainBulkCapacitance,
Vec4d *areaSourceBulkCapacitance,
Vec4d *periSourceBulkCapacitance,
Vec4d *gateSourceBulkCapacitance
)
{
int error;
double areaDB,periDB,gateDB,areaSB,periSB,gateSB;
for(int idx=0;idx<NSIMD;idx++)
{
error = BSIM3v32_ACM_junctionCapacitances(
model, heres[idx],
&areaDB,
&periDB,
&gateDB,
&areaSB,
&periSB,
&gateSB
);
(*areaDrainBulkCapacitance)[idx]=areaDB;
(*periDrainBulkCapacitance)[idx]=periDB;
(*gateDrainBulkCapacitance)[idx]=gateDB;
(*areaSourceBulkCapacitance)[idx]=areaSB;
(*periSourceBulkCapacitance)[idx]=periSB;
(*gateSourceBulkCapacitance)[idx]=gateSB;
if(error) return error;
}
return error;
}
/* geq, ceq, and zero are not translated to vectors because there are unused */
static inline int vec4_NIintegrate(CKTcircuit* ckt, double* geq, double *ceq, double zero, Vec4m chargestate)
{
int error;
if (0) vec4_CheckCollisions(chargestate, "NIIntegrate");
for(int idx=0;idx<NSIMD;idx++)
{
error = NIintegrate(ckt,geq,ceq,zero,chargestate[idx]);
if(error) return error;
}
return error;
}
static inline int vec4_SIMDCOUNT(Vec4m mask) {
return (mask[0] ? 1 : 0) + (mask[1] ? 1 : 0) + (mask[2] ? 1 : 0) + (mask[3] ? 1 : 0);
}
static inline Vec4d vec4_SIMDTOVECTOR(double val)
{
return (Vec4d) {val,val,val,val};
}
static inline Vec4m vec4_SIMDTOVECTORMASK(int val)
{
return (Vec4m) {val,val,val,val};
}
int BSIM3v32LoadSIMD(BSIM3v32instance **heres, CKTcircuit *ckt
#ifndef USE_OMP
, double data[7][NSIMD]
#endif
)
{
BSIM3v32model *model = BSIM3v32modPtr(heres[0]);
struct bsim3v32SizeDependParam *pParam;
pParam = heres[0]->pParam; /* same of all NSIMD instances */
#if NSIMD==4
#ifdef USE_OMP
#pragma message "Use OMP SIMD4 version"
#include "b3v32ldseq_simd4_omp.c"
#else
#include "b3v32ldseq_simd4.c"
#endif
#elif NSIMD==8
#ifdef USE_OMP
#pragma message "Use OMP SIMD8 version"
#include "b3v32ldseq_simd8_omp.c"
#else
#include "b3v32ldseq_simd8.c"
#endif
#else
#error Unsupported value for NSIMD
#endif
return(OK);
}

View File

@ -0,0 +1,274 @@
/*******************************************************************************
* Copyright 2020 Florian Ballenegger, Anamosic Ballenegger Design
*******************************************************************************
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
****************************
/* draft version, not tested, not even compiled */
#include <math.h>
#include <x86intrin.h>
#include <signal.h>
#include "ngspice/ngspice.h"
#include "ngspice/cktdefs.h"
#include "bsim3v32def.h"
#include "b3v32acm.h"
#include "ngspice/trandefs.h"
#include "ngspice/const.h"
#include "ngspice/sperror.h"
#include "ngspice/devdefs.h"
#include "ngspice/suffix.h"
#define MAX_EXP 5.834617425e14
#define MIN_EXP 1.713908431e-15
#define EXP_THRESHOLD 34.0
#define EPSOX 3.453133e-11
#define EPSSI 1.03594e-10
#define Charge_q 1.60219e-19
#define DELTA_1 0.02
#define DELTA_2 0.02
#define DELTA_3 0.02
#define DELTA_4 0.02
#define NSIMD 8
#define USEX86INTRINSICS 1
typedef double Vec8d __attribute__ ((vector_size (sizeof(double)*NSIMD), aligned (sizeof(double)*NSIMD)));
typedef long int Vec8m __attribute__ ((vector_size (sizeof(double)*NSIMD), aligned (sizeof(double)*NSIMD)));
#define SIMDANY(err) (err!=0)
#define SIMDIFYCMD(cmd) /* empty */
#define SIMDifySaveScope(sc) /* empty */
#define SIMDVECTORMACRO(val) ((Vec8d) {val,val,val,val})
#define SIMDVECTOR(val) vec8_SIMDTOVECTOR(val)
#define SIMDVECTORMASK(val) vec8_SIMDTOVECTORMASK(val)
#ifdef USEX86INTRINSICS
#define vec8_MAX(a,b) _mm512_max_pd(a,b)
#define vec8_exp(a) _mm512_exp_pd(a)
#define vec8_log(a) _mm512_log_pd(a)
#define vec8_sqrt(a) _mm512_sqrt_pd(a)
static inline Vec8d vec8_blend(Vec8d fa, Vec8d tr, Vec8m mask)
{
/* mask follow gcc vector extension comparison results false=0 true=-1 */
/* so we can't use the new _mm512_mask_blendv_pd intrinsics */
#define SAFER
#ifdef SAFER
/* support mask = false:0, true: 1 or -1 */
return (Vec8d) _mm512_ternarylogic_epi64(
_mm512_castpd_si512(fa),
_mm512_castpd_si512(tr),
_mm512_srai_epi64(_mm512_castpd_si512(mask), 63),
0xd8);
#else
/* support only mask 0 or -1, mask=1 will fail, but should be OK with this code */
return (Vec8d) _mm512_ternarylogic_epi64(
_mm512_castpd_si512(fa),
_mm512_castpd_si512(tr),
_mm512_castpd_si512(mask),
0xd8);
#endif
}
static inline Vec8d vec8_fabs(Vec8d x)
{
return (Vec8d) _mm512_abs_pd(x);
}
#else
#error X86 AVX512 instrinsics required for using SIMD8 version
#endif
/* some debug utils functions */
void vec8_printd(const char* msg, const char* name, Vec8d vecd)
{
printf("%s %s %g %g %g %g\n",msg,name,vecd[0],vecd[1],vecd[2],vecd[3]);
}
void vec8_printm(const char* msg, const char* name, Vec8m vecm)
{
printf("%s %s %ld %ld %ld %ld\n",msg,name,vecm[0],vecm[1],vecm[2],vecm[3]);
}
void vec8_CheckCollisions(Vec8m stateindexes, const char* msg)
{
for(int i=0;i<NSIMD;i++)
for(int j=0;j<NSIMD;j++)
if(i!=j)
if(stateindexes[i]==stateindexes[j])
{
printf("%s, collisions %ld %ld %ld %ld!\n",msg,stateindexes[0],stateindexes[1],stateindexes[2],stateindexes[3]);
raise(SIGINT);
}
}
/* useful vectorized functions */
static inline Vec8d SIMDLOADDATA(int idx, double data[7][NSIMD])
{
return (Vec8d) {data[idx][0],data[idx][1],data[idx][2],data[idx][3],
data[idx][4],data[idx][5],data[idx][6],data[idx][7]};
}
static inline Vec8d vec8_BSIM3v32_StateAccess(double* cktstate, Vec8m stateindexes)
{
return (Vec8d) {
cktstate[stateindexes[0]],
cktstate[stateindexes[1]],
cktstate[stateindexes[2]],
cktstate[stateindexes[3]],
cktstate[stateindexes[4]],
cktstate[stateindexes[5]],
cktstate[stateindexes[6]],
cktstate[stateindexes[7]]
};
}
static inline void vec8_BSIM3v32_StateStore(double* cktstate, Vec8m stateindexes, Vec8d values)
{
if(0) vec8_CheckCollisions(stateindexes,"SateStore");
for(int idx=0;idx<NSIMD;idx++)
{
cktstate[stateindexes[idx]] = values[idx];
}
}
static inline void vec8_BSIM3v32_StateAdd(double* cktstate, Vec8m stateindexes, Vec8d values)
{
if(0) vec8_CheckCollisions(stateindexes,"StateAdd");
for(int idx=0;idx<NSIMD;idx++)
{
cktstate[stateindexes[idx]] += values[idx];
}
}
static inline void vec8_BSIM3v32_StateSub(double* cktstate, Vec8m stateindexes, Vec8d values)
{
if(0) vec8_CheckCollisions(stateindexes,"StateSub");
for(int idx=0;idx<NSIMD;idx++)
{
cktstate[stateindexes[idx]] -= values[idx];
}
}
static inline int vec8_BSIM3v32_ACM_saturationCurrents
(
BSIM3v32model *model,
BSIM3v32instance **heres,
Vec8d *DrainSatCurrent,
Vec8d *SourceSatCurrent
)
{
int error;
for(int idx=0;idx<NSIMD;idx++)
{
error = BSIM3v32_ACM_saturationCurrents(
model, heres[idx],
&((*DrainSatCurrent)[idx]),
&((*SourceSatCurrent)[idx])
);
if(error) return error;
}
return error;
}
static inline int vec8_BSIM3v32_ACM_junctionCapacitances(
BSIM3v32model *model,
BSIM3v32instance **heres,
Vec8d *areaDrainBulkCapacitance,
Vec8d *periDrainBulkCapacitance,
Vec8d *gateDrainBulkCapacitance,
Vec8d *areaSourceBulkCapacitance,
Vec8d *periSourceBulkCapacitance,
Vec8d *gateSourceBulkCapacitance
)
{
int error;
for(int idx=0;idx<NSIMD;idx++)
{
error = BSIM3v32_ACM_junctionCapacitances(
model, heres[idx],
&((*areaDrainBulkCapacitance)[idx]),
&((*periDrainBulkCapacitance)[idx]),
&((*gateDrainBulkCapacitance)[idx]),
&((*areaSourceBulkCapacitance)[idx]),
&((*periSourceBulkCapacitance)[idx]),
&((*gateSourceBulkCapacitance)[idx])
);
if(error) return error;
}
return error;
}
/* geq, ceq, and zero are not vectors because there are unused */
static inline int vec8_NIintegrate(CKTcircuit* ckt, double* geq, double *ceq, double zero, Vec8m chargestate)
{
int error;
if (0) vec8_CheckCollisions(chargestate, "NIIntegrate");
for(int idx=0;idx<NSIMD;idx++)
{
error = NIintegrate(ckt,geq,ceq,zero,chargestate[idx]);
if(error) return error;
}
return error;
}
static inline int vec8_SIMDCOUNT(Vec8m mask) {
return (mask[0] ? 1 : 0) + (mask[1] ? 1 : 0) + (mask[2] ? 1 : 0) + (mask[3] ? 1 : 0)
+ (mask[4] ? 1 : 0) + (mask[5] ? 1 : 0) + (mask[6] ? 1 : 0) + (mask[7] ? 1 : 0);
}
static inline Vec8d vec8_SIMDTOVECTOR(double val)
{
return (Vec8d) {val,val,val,val,val,val,val,val};
}
static inline Vec8m vec8_SIMDTOVECTORMASK(int val)
{
return (Vec8m) {val,val,val,val,val,val,val,val};
}
int BSIM3v32LoadSIMD8(BSIM3v32instance **heres, CKTcircuit *ckt, double data[7][NSIMD]) {
BSIM3v32model *model = BSIM3v32modPtr(heres[0]);
if(0) printf("BSIM3v32LoadSIMD %s model %s\n", heres[0]->gen.GENname, model->gen.GENmodName);
struct bsim3v32SizeDependParam *pParam;
pParam = heres[0]->pParam; /* same of all NSIMD instances */
#if 1
#include "b3v32ldseq_simd8.c"
#endif
return(OK);
}

View File

@ -0,0 +1,654 @@
/* This program implements a show-case vector (vectorizable) double
precision exponential with a 4 ulp error bound.
Author: Christoph Lauter,
Sorbonne Université - LIP6 - PEQUAN team.
This program uses code generated using Sollya and Metalibm; see the
licences and exception texts below.
This program is
Copyright 2014-2018 Christoph Lauter Sorbonne Université
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
This code was generated using non-trivial code generation commands
of the Metalibm software program.
Before using, modifying and/or integrating this code into other
software, review the copyright and license status of this
generated code. In particular, see the exception below.
This generated program is partly or entirely based on a program
generated using non-trivial code generation commands of the Sollya
software program. See the copyright notice and exception text
referring to that Sollya-generated part of this program generated
with Metalibm below.
Metalibm is
Copyright 2008-2013 by
Laboratoire de l'Informatique du Parallélisme,
UMR CNRS - ENS Lyon - UCB Lyon 1 - INRIA 5668
and by
Laboratoire d'Informatique de Paris 6, equipe PEQUAN,
UPMC Universite Paris 06 - CNRS - UMR 7606 - LIP6, Paris, France.
Contributors: Christoph Quirin Lauter
(UPMC LIP6 PEQUAN formerly LIP/ENS Lyon)
christoph.lauter@lip6.fr
and
Olga Kupriianova
(UPMC LIP6 PEQUAN)
olga.kupriianova@lip6.fr
Metalibm was formerly developed by the Arenaire project at Ecole
Normale Superieure de Lyon and is now developed by Equipe PEQUAN
at Universite Pierre et Marie Curie Paris 6.
The Metalibm software program is free software; you can
redistribute it and/or modify it under the terms of the GNU Lesser
General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option)
any later version.
Metalibm is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the Metalibm program; if not, write to the Free
Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA.
This generated program is distributed WITHOUT ANY WARRANTY; without
even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.
As a special exception, you may create a larger work that contains
part or all of this software generated using Metalibm and
distribute that work under terms of your choice, so long as that
work isn't itself a numerical code generator using the skeleton of
this code or a modified version thereof as a code skeleton.
Alternatively, if you modify or redistribute this generated code
itself, or its skeleton, you may (at your option) remove this
special exception, which will cause this generated code and its
skeleton and the resulting Metalibm output files to be licensed
under the General Public licence (version 2) without this special
exception.
This special exception was added by the Metalibm copyright holders
on November 20th 2013.
*/
/*
This code was generated using non-trivial code generation commands of
the Sollya software program.
Before using, modifying and/or integrating this code into other
software, review the copyright and license status of this generated
code. In particular, see the exception below.
Sollya is
Copyright 2006-2013 by
Laboratoire de l'Informatique du Parallelisme, UMR CNRS - ENS Lyon -
UCB Lyon 1 - INRIA 5668,
Laboratoire d'Informatique de Paris 6, equipe PEQUAN, UPMC Universite
Paris 06 - CNRS - UMR 7606 - LIP6, Paris, France
and by
Centre de recherche INRIA Sophia-Antipolis Mediterranee, equipe APICS,
Sophia Antipolis, France.
Contributors Ch. Lauter, S. Chevillard, M. Joldes
christoph.lauter@ens-lyon.org
sylvain.chevillard@ens-lyon.org
joldes@lass.fr
The Sollya software is a computer program whose purpose is to provide
an environment for safe floating-point code development. It is
particularily targeted to the automatized implementation of
mathematical floating-point libraries (libm). Amongst other features,
it offers a certified infinity norm, an automatic polynomial
implementer and a fast Remez algorithm.
The Sollya software is governed by the CeCILL-C license under French
law and abiding by the rules of distribution of free software. You
can use, modify and/ or redistribute the software under the terms of
the CeCILL-C license as circulated by CEA, CNRS and INRIA at the
following URL "http://www.cecill.info".
As a counterpart to the access to the source code and rights to copy,
modify and redistribute granted by the license, users are provided
only with a limited warranty and the software's author, the holder of
the economic rights, and the successive licensors have only limited
liability.
In this respect, the user's attention is drawn to the risks associated
with loading, using, modifying and/or developing or reproducing the
software by the user in light of its specific status of free software,
that may mean that it is complicated to manipulate, and that also
therefore means that it is reserved for developers and experienced
professionals having in-depth computer knowledge. Users are therefore
encouraged to load and test the software's suitability as regards
their requirements in conditions enabling the security of their
systems and/or data to be ensured and, more generally, to use and
operate it in the same conditions as regards security.
The fact that you are presently reading this means that you have had
knowledge of the CeCILL-C license and that you accept its terms.
The Sollya program is distributed WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE.
This generated program is distributed WITHOUT ANY WARRANTY; without
even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.
As a special exception, you may create a larger work that contains
part or all of this software generated using Sollya and distribute
that work under terms of your choice, so long as that work isn't
itself a numerical code generator using the skeleton of this code or a
modified version thereof as a code skeleton. Alternatively, if you
modify or redistribute this generated code itself, or its skeleton,
you may (at your option) remove this special exception, which will
cause this generated code and its skeleton and the resulting Sollya
output files to be licensed under the CeCILL-C licence without this
special exception.
This special exception was added by the Sollya copyright holders in
version 4.1 of Sollya.
*/
#include <stdint.h>
#define VECTOR_LENGTH 4
#define RESTRICT restrict
#define CONST const
typedef double Vec4d __attribute__ ((vector_size (sizeof(double)*VECTOR_LENGTH),
aligned (sizeof(double)*VECTOR_LENGTH)));
/* Two caster types */
typedef union _dblcast {
double d;
uint64_t i;
} dblcast;
typedef union {
int64_t l;
double d;
} db_number;
/* Macro implementations of some double-double operations */
#define Add12(s, r, a, b) \
{double _z, _a=a, _b=b; \
s = _a + _b; \
_z = s - _a; \
r = _b - _z; }
#define Mul12(rh,rl,u,v) \
{ \
CONST double c = 134217729.; /* 2^27 +1 */ \
double up, u1, u2, vp, v1, v2; \
double _u =u, _v=v; \
\
up = _u*c; vp = _v*c; \
u1 = (_u-up)+up; v1 = (_v-vp)+vp; \
u2 = _u-u1; v2 = _v-v1; \
\
*rh = _u*_v; \
*rl = (((u1*v1-*rh)+(u1*v2))+(u2*v1))+(u2*v2); \
}
#define Mul122(resh,resl,a,bh,bl) \
{ \
double _t1, _t2, _t3, _t4; \
\
Mul12(&_t1,&_t2,(a),(bh)); \
_t3 = (a) * (bl); \
_t4 = _t2 + _t3; \
Add12((*(resh)),(*(resl)),_t1,_t4); \
}
#define Mul22(zh,zl,xh,xl,yh,yl) \
{ \
double mh, ml; \
\
CONST double c = 134217729.; \
double up, u1, u2, vp, v1, v2; \
\
up = (xh)*c; vp = (yh)*c; \
u1 = ((xh)-up)+up; v1 = ((yh)-vp)+vp; \
u2 = (xh)-u1; v2 = (yh)-v1; \
\
mh = (xh)*(yh); \
ml = (((u1*v1-mh)+(u1*v2))+(u2*v1))+(u2*v2); \
\
ml += (xh)*(yl) + (xl)*(yh); \
*zh = mh+ml; \
*zl = mh - (*zh) + ml; \
}
/* Need fabs */
double fabs(double);
/* Some constants */
#define LOG2_E 1.442695040888963407359924681001892137426645954153
#define LOG_2_HI 0.693147180559890330187045037746429443359375
#define LOG_2_LO 5.4979230187083711552420206887059365096458163346682e-14
#define SHIFTER 6755399441055744.0
/* A metalibm generated function for the callout */
#define f_approx_exp_arg_red_coeff_0h 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000e+00
#define f_approx_exp_arg_red_coeff_1h 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000e+00
#define f_approx_exp_arg_red_coeff_2h 5.00000000000032307490016592055326327681541442871093750000000000000000000000000000e-01
#define f_approx_exp_arg_red_coeff_3h 1.66666666664336909908783468381443526595830917358398437500000000000000000000000000e-01
#define f_approx_exp_arg_red_coeff_4h 4.16666661063678778198493546369718387722969055175781250000000000000000000000000000e-02
#define f_approx_exp_arg_red_coeff_5h 8.33337739276391979703628720699271070770919322967529296875000000000000000000000000e-03
#define f_approx_exp_arg_red_coeff_6h 1.39156772666044516173489142829566844739019870758056640625000000000000000000000000e-03
static inline void f_approx_exp_arg_red(double * RESTRICT f_approx_exp_arg_red_resh, double * RESTRICT f_approx_exp_arg_red_resm, double x) {
double f_approx_exp_arg_red_t_1_0h;
double f_approx_exp_arg_red_t_2_0h;
double f_approx_exp_arg_red_t_3_0h;
double f_approx_exp_arg_red_t_4_0h;
double f_approx_exp_arg_red_t_5_0h;
double f_approx_exp_arg_red_t_6_0h;
double f_approx_exp_arg_red_t_7_0h;
double f_approx_exp_arg_red_t_8_0h;
double f_approx_exp_arg_red_t_9_0h;
double f_approx_exp_arg_red_t_10_0h;
double f_approx_exp_arg_red_t_11_0h;
double f_approx_exp_arg_red_t_12_0h;
double f_approx_exp_arg_red_t_13_0h, f_approx_exp_arg_red_t_13_0m;
f_approx_exp_arg_red_t_1_0h = f_approx_exp_arg_red_coeff_6h;
f_approx_exp_arg_red_t_2_0h = f_approx_exp_arg_red_t_1_0h * x;
f_approx_exp_arg_red_t_3_0h = f_approx_exp_arg_red_coeff_5h + f_approx_exp_arg_red_t_2_0h;
f_approx_exp_arg_red_t_4_0h = f_approx_exp_arg_red_t_3_0h * x;
f_approx_exp_arg_red_t_5_0h = f_approx_exp_arg_red_coeff_4h + f_approx_exp_arg_red_t_4_0h;
f_approx_exp_arg_red_t_6_0h = f_approx_exp_arg_red_t_5_0h * x;
f_approx_exp_arg_red_t_7_0h = f_approx_exp_arg_red_coeff_3h + f_approx_exp_arg_red_t_6_0h;
f_approx_exp_arg_red_t_8_0h = f_approx_exp_arg_red_t_7_0h * x;
f_approx_exp_arg_red_t_9_0h = f_approx_exp_arg_red_coeff_2h + f_approx_exp_arg_red_t_8_0h;
f_approx_exp_arg_red_t_10_0h = f_approx_exp_arg_red_t_9_0h * x;
f_approx_exp_arg_red_t_11_0h = f_approx_exp_arg_red_coeff_1h + f_approx_exp_arg_red_t_10_0h;
f_approx_exp_arg_red_t_12_0h = f_approx_exp_arg_red_t_11_0h * x;
Add12(f_approx_exp_arg_red_t_13_0h,f_approx_exp_arg_red_t_13_0m,f_approx_exp_arg_red_coeff_0h,f_approx_exp_arg_red_t_12_0h);
*f_approx_exp_arg_red_resh = f_approx_exp_arg_red_t_13_0h; *f_approx_exp_arg_red_resm = f_approx_exp_arg_red_t_13_0m;
}
static CONST double f_approx_twoPower_Index_Hi[32] = {
1,
1.021897148654116627,
1.0442737824274137548,
1.067140400676823697,
1.0905077326652576897,
1.114386742595892432,
1.1387886347566915646,
1.1637248587775774755,
1.1892071150027210269,
1.2152473599804689552,
1.241857812073484002,
1.2690509571917332199,
1.2968395546510096406,
1.3252366431597413232,
1.3542555469368926513,
1.3839098819638320226,
1.4142135623730951455,
1.4451808069770466503,
1.4768261459394993462,
1.5091644275934228414,
1.542210825407940744,
1.5759808451078864966,
1.6104903319492542835,
1.6457554781539649458,
1.681792830507429004,
1.718619298122477934,
1.7562521603732994535,
1.794709075003107168,
1.8340080864093424307,
1.8741676341102999626,
1.9152065613971474,
1.9571441241754001794
};
static CONST double f_approx_twoPower_Index_Mi[32] = {
0,
5.109225028973443893e-17,
8.551889705537964892e-17,
-7.899853966841582122e-17,
-3.046782079812471147e-17,
1.0410278456845570955e-16,
8.912812676025407777e-17,
3.8292048369240934987e-17,
3.982015231465646111e-17,
-7.71263069268148813e-17,
4.658027591836936791e-17,
2.667932131342186095e-18,
2.5382502794888314959e-17,
-2.858731210038861373e-17,
7.700948379802989461e-17,
-6.770511658794786287e-17,
-9.66729331345291345e-17,
-3.023758134993987319e-17,
-3.4839945568927957958e-17,
-1.016455327754295039e-16,
7.949834809697620856e-17,
-1.013691647127830398e-17,
2.470719256979788785e-17,
-1.0125679913674772604e-16,
8.19901002058149652e-17,
-1.851380418263110988e-17,
2.960140695448873307e-17,
1.822745842791208677e-17,
3.283107224245627203e-17,
-6.122763413004142561e-17,
-1.0619946056195962638e-16,
8.960767791036667767e-17
};
#define f_approx_argred_log2_of_base_times_two_to_w 4.616624130844682838e1
#define f_approx_argred_minus_logbase_of_2_times_two_to_minus_w_hi -2.1660849392498290195e-2
#define f_approx_argred_minus_logbase_of_2_times_two_to_minus_w_mi -7.24702129326968612e-19
#define f_approx_argred_shifter 6755399441055744.0
#define f_approx_argred_w 5
#define f_approx_argred_idx_mask 31ull
#define f_approx_argred_lower_32_bits 0xffffffffull
static inline void scalar_exp_callout_inner(double * RESTRICT res_resh, double * RESTRICT res_resm, double xh) {
double zh;
double poly_resh, poly_resm;
double t;
double shifted_t;
double mAsDouble;
db_number argRedCaster;
int mAsInt;
int E;
int E1;
int E2;
int idx;
double rescaled_m_hi;
double rescaled_m_mi;
double table_hi;
double table_mi;
double tableTimesPoly_hi;
double tableTimesPoly_mi;
db_number twoE1;
db_number twoE2;
double twoE1tablePoly_hi;
double twoE1tablePoly_mi;
t = xh * f_approx_argred_log2_of_base_times_two_to_w;
shifted_t = t + f_approx_argred_shifter;
mAsDouble = shifted_t - f_approx_argred_shifter;
argRedCaster.d = shifted_t;
mAsInt = (int) (argRedCaster.l & f_approx_argred_lower_32_bits);
E = mAsInt >> f_approx_argred_w;
E1 = E >> 1;
E2 = E - E1;
idx = mAsInt & f_approx_argred_idx_mask;
Mul122(&rescaled_m_hi, &rescaled_m_mi, mAsDouble, f_approx_argred_minus_logbase_of_2_times_two_to_minus_w_hi, f_approx_argred_minus_logbase_of_2_times_two_to_minus_w_mi);
zh = (xh + rescaled_m_hi) + rescaled_m_mi;
f_approx_exp_arg_red(&poly_resh, &poly_resm, zh);
table_hi = f_approx_twoPower_Index_Hi[idx];
table_mi = f_approx_twoPower_Index_Mi[idx];
Mul22(&tableTimesPoly_hi,&tableTimesPoly_mi,table_hi,table_mi,poly_resh,poly_resm);
twoE1.l = E1 + 1023ll;
twoE1.l <<= 52;
twoE2.l = E2 + 1023ll;
twoE2.l <<= 52;
twoE1tablePoly_hi = twoE1.d * tableTimesPoly_hi;
twoE1tablePoly_mi = twoE1.d * tableTimesPoly_mi;
*res_resh = twoE2.d * twoE1tablePoly_hi;
*res_resm = twoE2.d * twoE1tablePoly_mi;
}
/* A scalar exponential for the callout */
static inline double scalar_exp_callout(double x) {
dblcast xdb, xAbsdb;
double yh, yl, twoM600, two600;
xdb.d = x;
xAbsdb.i = xdb.i & 0x7fffffffffffffffull;
if (xAbsdb.i >= 0x7ff0000000000000ull) {
/* If we are here, we have an Inf or a Nan */
if (xAbsdb.i == 0x7ff0000000000000ull) {
/* Here, the input is an Inf */
if (xdb.i >> 63) {
/* x = -Inf, return 0 */
return 0.0;
}
/* x = +Inf, return +Inf */
return x;
}
/* Here, the input is a NaN */
return 1.0 + x;
}
/* Here, the input is real.
Start by checking if we have evident under- or overflow.
We have evident underflow if x <= -746.0
and evident overflow if x >= 711.0.
*/
if (x <= -746.0) {
/* Return a completely underflowed result */
twoM600 = 2.4099198651028841177407500347125089364310049545099e-181;
return twoM600 * twoM600;
}
if (x >= 711.0) {
/* Return a completely overflowed result */
two600 = 4.1495155688809929585124078636911611510124462322424e180;
return two600 * two600;
}
/* Here, the input will not provoke any huge overflow or underflow
but there might still be some under- or overflow.
Now check if x is that small in magnitude that returning 1.0 + x
suffices to well approximate the exponential (up to a relative
error of 2^-53). This is surely the case when abs(x) <= 0.75 *
2^-26.
*/
if (fabs(x) <= 1.11758708953857421875e-8) {
return 1.0 + x;
}
/* Here, the input is real. There might still be some slight under-
or overflow on output.
Just use a metalibm generated function.
*/
scalar_exp_callout_inner(&yh, &yl, x);
return yh + yl;
}
/* A vector exponential callout */
static inline Vec4d vector_exp_callout(Vec4d x) {
int i;
Vec4d y;
for (i=0;i<VECTOR_LENGTH;i++) {
y[i] = scalar_exp_callout(x[i]);
}
return y;
}
/* Generated polynomial for vector exponential */
#define vector_exp_poly_coeff_0h 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000e+00
#define vector_exp_poly_coeff_1h 1.00000000000000643929354282590793445706367492675781250000000000000000000000000000e+00
#define vector_exp_poly_coeff_2h 4.99999999999983513188084316425374709069728851318359375000000000000000000000000000e-01
#define vector_exp_poly_coeff_3h 1.66666666665578222517041240280377678573131561279296875000000000000000000000000000e-01
#define vector_exp_poly_coeff_4h 4.16666666679390979011188278491317760199308395385742187500000000000000000000000000e-02
#define vector_exp_poly_coeff_5h 8.33333338463836288678709962596258264966309070587158203125000000000000000000000000e-03
#define vector_exp_poly_coeff_6h 1.38888885906261988316401367882235717843286693096160888671875000000000000000000000e-03
#define vector_exp_poly_coeff_7h 1.98411714150174687447750199176255136990221217274665832519531250000000000000000000e-04
#define vector_exp_poly_coeff_8h 2.48018422092243362301385717350044046725088264793157577514648437500000000000000000e-05
#define vector_exp_poly_coeff_9h 2.76397570196414793205260534980638453816936817020177841186523437500000000000000000e-06
#define vector_exp_poly_coeff_10h 2.75111392508451855531313940197990497438240709016099572181701660156250000000000000e-07
static inline void vector_exp_poly(double * RESTRICT vector_exp_poly_resh, double x) {
double vector_exp_poly_t_1_0h;
double vector_exp_poly_t_2_0h;
double vector_exp_poly_t_3_0h;
double vector_exp_poly_t_4_0h;
double vector_exp_poly_t_5_0h;
double vector_exp_poly_t_6_0h;
double vector_exp_poly_t_7_0h;
double vector_exp_poly_t_8_0h;
double vector_exp_poly_t_9_0h;
double vector_exp_poly_t_10_0h;
double vector_exp_poly_t_11_0h;
double vector_exp_poly_t_12_0h;
double vector_exp_poly_t_13_0h;
double vector_exp_poly_t_14_0h;
double vector_exp_poly_t_15_0h;
double vector_exp_poly_t_16_0h;
double vector_exp_poly_t_17_0h;
double vector_exp_poly_t_18_0h;
double vector_exp_poly_t_19_0h;
double vector_exp_poly_t_20_0h;
double vector_exp_poly_t_21_0h;
vector_exp_poly_t_1_0h = vector_exp_poly_coeff_10h;
vector_exp_poly_t_2_0h = vector_exp_poly_t_1_0h * x;
vector_exp_poly_t_3_0h = vector_exp_poly_coeff_9h + vector_exp_poly_t_2_0h;
vector_exp_poly_t_4_0h = vector_exp_poly_t_3_0h * x;
vector_exp_poly_t_5_0h = vector_exp_poly_coeff_8h + vector_exp_poly_t_4_0h;
vector_exp_poly_t_6_0h = vector_exp_poly_t_5_0h * x;
vector_exp_poly_t_7_0h = vector_exp_poly_coeff_7h + vector_exp_poly_t_6_0h;
vector_exp_poly_t_8_0h = vector_exp_poly_t_7_0h * x;
vector_exp_poly_t_9_0h = vector_exp_poly_coeff_6h + vector_exp_poly_t_8_0h;
vector_exp_poly_t_10_0h = vector_exp_poly_t_9_0h * x;
vector_exp_poly_t_11_0h = vector_exp_poly_coeff_5h + vector_exp_poly_t_10_0h;
vector_exp_poly_t_12_0h = vector_exp_poly_t_11_0h * x;
vector_exp_poly_t_13_0h = vector_exp_poly_coeff_4h + vector_exp_poly_t_12_0h;
vector_exp_poly_t_14_0h = vector_exp_poly_t_13_0h * x;
vector_exp_poly_t_15_0h = vector_exp_poly_coeff_3h + vector_exp_poly_t_14_0h;
vector_exp_poly_t_16_0h = vector_exp_poly_t_15_0h * x;
vector_exp_poly_t_17_0h = vector_exp_poly_coeff_2h + vector_exp_poly_t_16_0h;
vector_exp_poly_t_18_0h = vector_exp_poly_t_17_0h * x;
vector_exp_poly_t_19_0h = vector_exp_poly_coeff_1h + vector_exp_poly_t_18_0h;
vector_exp_poly_t_20_0h = vector_exp_poly_t_19_0h * x;
vector_exp_poly_t_21_0h = vector_exp_poly_coeff_0h + vector_exp_poly_t_20_0h;
*vector_exp_poly_resh = vector_exp_poly_t_21_0h;
}
/* A vector exponential */
Vec4d vec4_exp_vectorlibm(Vec4d x) {
int i;
int okaySlots;
Vec4d y;
double shiftedXTLog2e, eDouble, t, r;
int E;
double p;
dblcast twoE;
/* Check if we can handle all inputs */
okaySlots = 0;
for (i=0;i<VECTOR_LENGTH;i++) {
okaySlots += (fabs(x[i]) < 700.0);
}
/* Perform a callout if we cannot handle the input in one slot */
if (okaySlots != VECTOR_LENGTH) {
return vector_exp_callout(x);
}
/* Here we know that all inputs are real and do not provoke under-
or overflow in output
*/
for (i=0;i<VECTOR_LENGTH;i++) {
shiftedXTLog2e = x[i] * LOG2_E + SHIFTER;
eDouble = shiftedXTLog2e - SHIFTER;
E = (int) eDouble;
t = x[i] - eDouble * LOG_2_HI; /* exact: trailing bits of constant 0, Sterbenz */
r = t - eDouble * LOG_2_LO;
vector_exp_poly(&p,r);
twoE.i = E + 1023;
twoE.i <<= 52;
y[i] = twoE.d * p;
}
return y;
}

View File

@ -0,0 +1,769 @@
/* This program implements a show-case vector (vectorizable) double
precision logarithm with a 4 ulp error bound.
Author: Christoph Lauter,
Sorbonne Université - LIP6 - PEQUAN team.
This program uses code generated using Sollya and Metalibm; see the
licences and exception texts below.
This program is
Copyright 2014-2018 Christoph Lauter Sorbonne Université
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
This code was generated using non-trivial code generation commands
of the Metalibm software program.
Before using, modifying and/or integrating this code into other
software, review the copyright and license status of this
generated code. In particular, see the exception below.
This generated program is partly or entirely based on a program
generated using non-trivial code generation commands of the Sollya
software program. See the copyright notice and exception text
referring to that Sollya-generated part of this program generated
with Metalibm below.
Metalibm is
Copyright 2008-2013 by
Laboratoire de l'Informatique du Parallélisme,
UMR CNRS - ENS Lyon - UCB Lyon 1 - INRIA 5668
and by
Laboratoire d'Informatique de Paris 6, equipe PEQUAN,
UPMC Universite Paris 06 - CNRS - UMR 7606 - LIP6, Paris, France.
Contributors: Christoph Quirin Lauter
(UPMC LIP6 PEQUAN formerly LIP/ENS Lyon)
christoph.lauter@lip6.fr
and
Olga Kupriianova
(UPMC LIP6 PEQUAN)
olga.kupriianova@lip6.fr
Metalibm was formerly developed by the Arenaire project at Ecole
Normale Superieure de Lyon and is now developed by Equipe PEQUAN
at Universite Pierre et Marie Curie Paris 6.
The Metalibm software program is free software; you can
redistribute it and/or modify it under the terms of the GNU Lesser
General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option)
any later version.
Metalibm is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the Metalibm program; if not, write to the Free
Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA.
This generated program is distributed WITHOUT ANY WARRANTY; without
even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.
As a special exception, you may create a larger work that contains
part or all of this software generated using Metalibm and
distribute that work under terms of your choice, so long as that
work isn't itself a numerical code generator using the skeleton of
this code or a modified version thereof as a code skeleton.
Alternatively, if you modify or redistribute this generated code
itself, or its skeleton, you may (at your option) remove this
special exception, which will cause this generated code and its
skeleton and the resulting Metalibm output files to be licensed
under the General Public licence (version 2) without this special
exception.
This special exception was added by the Metalibm copyright holders
on November 20th 2013.
*/
/*
This code was generated using non-trivial code generation commands of
the Sollya software program.
Before using, modifying and/or integrating this code into other
software, review the copyright and license status of this generated
code. In particular, see the exception below.
Sollya is
Copyright 2006-2013 by
Laboratoire de l'Informatique du Parallelisme, UMR CNRS - ENS Lyon -
UCB Lyon 1 - INRIA 5668,
Laboratoire d'Informatique de Paris 6, equipe PEQUAN, UPMC Universite
Paris 06 - CNRS - UMR 7606 - LIP6, Paris, France
and by
Centre de recherche INRIA Sophia-Antipolis Mediterranee, equipe APICS,
Sophia Antipolis, France.
Contributors Ch. Lauter, S. Chevillard, M. Joldes
christoph.lauter@ens-lyon.org
sylvain.chevillard@ens-lyon.org
joldes@lass.fr
The Sollya software is a computer program whose purpose is to provide
an environment for safe floating-point code development. It is
particularily targeted to the automatized implementation of
mathematical floating-point libraries (libm). Amongst other features,
it offers a certified infinity norm, an automatic polynomial
implementer and a fast Remez algorithm.
The Sollya software is governed by the CeCILL-C license under French
law and abiding by the rules of distribution of free software. You
can use, modify and/ or redistribute the software under the terms of
the CeCILL-C license as circulated by CEA, CNRS and INRIA at the
following URL "http://www.cecill.info".
As a counterpart to the access to the source code and rights to copy,
modify and redistribute granted by the license, users are provided
only with a limited warranty and the software's author, the holder of
the economic rights, and the successive licensors have only limited
liability.
In this respect, the user's attention is drawn to the risks associated
with loading, using, modifying and/or developing or reproducing the
software by the user in light of its specific status of free software,
that may mean that it is complicated to manipulate, and that also
therefore means that it is reserved for developers and experienced
professionals having in-depth computer knowledge. Users are therefore
encouraged to load and test the software's suitability as regards
their requirements in conditions enabling the security of their
systems and/or data to be ensured and, more generally, to use and
operate it in the same conditions as regards security.
The fact that you are presently reading this means that you have had
knowledge of the CeCILL-C license and that you accept its terms.
The Sollya program is distributed WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE.
This generated program is distributed WITHOUT ANY WARRANTY; without
even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.
As a special exception, you may create a larger work that contains
part or all of this software generated using Sollya and distribute
that work under terms of your choice, so long as that work isn't
itself a numerical code generator using the skeleton of this code or a
modified version thereof as a code skeleton. Alternatively, if you
modify or redistribute this generated code itself, or its skeleton,
you may (at your option) remove this special exception, which will
cause this generated code and its skeleton and the resulting Sollya
output files to be licensed under the CeCILL-C licence without this
special exception.
This special exception was added by the Sollya copyright holders in
version 4.1 of Sollya.
*/
/* Modified by Florian Ballenegger, Anamosic Ballenegger Design, 2020.
Use of gcc vector extensions instead of pointers to double */
#include <stdint.h>
/* Two caster types */
typedef union _dblcast {
double d;
uint64_t i;
} dblcast;
typedef union {
int64_t l;
double d;
} db_number;
/* Compiler tricks and hints */
#define INLINE inline
#define RESTRICT restrict
#define STATIC static
#define CONST const
/* Vector length */
#define VECTOR_LENGTH 4
typedef double Vec4d __attribute__ ((vector_size (sizeof(double)*VECTOR_LENGTH),
aligned (sizeof(double)*VECTOR_LENGTH)));
/* Macro implementations of some double-double operations */
#define Add12(s, r, a, b) \
{double _z, _a=a, _b=b; \
s = _a + _b; \
_z = s - _a; \
r = _b - _z; }
#define Mul22(zh,zl,xh,xl,yh,yl) \
{ \
double mh, ml; \
\
const double c = 134217729.; \
double up, u1, u2, vp, v1, v2; \
\
up = (xh)*c; vp = (yh)*c; \
u1 = ((xh)-up)+up; v1 = ((yh)-vp)+vp; \
u2 = (xh)-u1; v2 = (yh)-v1; \
\
mh = (xh)*(yh); \
ml = (((u1*v1-mh)+(u1*v2))+(u2*v1))+(u2*v2); \
\
ml += (xh)*(yl) + (xl)*(yh); \
*zh = mh+ml; \
*zl = mh - (*zh) + ml; \
}
#define Mul122(resh,resl,a,bh,bl) \
{ \
double _t1, _t2, _t3, _t4; \
\
Mul12(&_t1,&_t2,(a),(bh)); \
_t3 = (a) * (bl); \
_t4 = _t2 + _t3; \
Add12((*(resh)),(*(resl)),_t1,_t4); \
}
#define Add22(zh,zl,xh,xl,yh,yl) \
do { \
double _r,_s; \
_r = (xh)+(yh); \
_s = ((((xh)-_r) +(yh)) + (yl)) + (xl); \
*zh = _r+_s; \
*zl = (_r - (*zh)) + _s; \
} while(0)
#define Mul12(rh,rl,u,v) \
{ \
const double c = 134217729.; /* 2^27 +1 */ \
double up, u1, u2, vp, v1, v2; \
double _u =u, _v=v; \
\
up = _u*c; vp = _v*c; \
u1 = (_u-up)+up; v1 = (_v-vp)+vp; \
u2 = _u-u1; v2 = _v-v1; \
\
*rh = _u*_v; \
*rl = (((u1*v1-*rh)+(u1*v2))+(u2*v1))+(u2*v2); \
}
/* Need fabs */
double fabs(double);
/* Some constants */
#define LOG_TWO_HI 0.693147180559890330187045037746429443359375
#define LOG_TWO_LO 5.4979230187083711552420206887059365096458163346682e-14
/* A metalibm generated function for the callout */
#define f_approx_log_arg_red_coeff_1h 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000e+00
#define f_approx_log_arg_red_coeff_2h -4.99999999999998390176614293523016385734081268310546875000000000000000000000000000e-01
#define f_approx_log_arg_red_coeff_3h 3.33333333333923731434111914495588280260562896728515625000000000000000000000000000e-01
#define f_approx_log_arg_red_coeff_4h -2.50000000052116866378071335930144414305686950683593750000000000000000000000000000e-01
#define f_approx_log_arg_red_coeff_5h 1.99999988486698782041983690760389436036348342895507812500000000000000000000000000e-01
#define f_approx_log_arg_red_coeff_6h -1.66666258081627438603078417145297862589359283447265625000000000000000000000000000e-01
#define f_approx_log_arg_red_coeff_7h 1.42921894210221167575980416586389765143394470214843750000000000000000000000000000e-01
#define f_approx_log_arg_red_coeff_8h -1.25915254741829296669664017827017232775688171386718750000000000000000000000000000e-01
STATIC INLINE void f_approx_log_arg_red(double * RESTRICT f_approx_log_arg_red_resh, double * RESTRICT f_approx_log_arg_red_resm, double xh, double xm) {
double f_approx_log_arg_red_t_1_0h;
double f_approx_log_arg_red_t_2_0h;
double f_approx_log_arg_red_t_3_0h;
double f_approx_log_arg_red_t_4_0h;
double f_approx_log_arg_red_t_5_0h;
double f_approx_log_arg_red_t_6_0h;
double f_approx_log_arg_red_t_7_0h;
double f_approx_log_arg_red_t_8_0h;
double f_approx_log_arg_red_t_9_0h;
double f_approx_log_arg_red_t_10_0h;
double f_approx_log_arg_red_t_11_0h;
double f_approx_log_arg_red_t_12_0h;
double f_approx_log_arg_red_t_13_0h;
double f_approx_log_arg_red_t_14_0h;
double f_approx_log_arg_red_t_15_0h, f_approx_log_arg_red_t_15_0m;
double f_approx_log_arg_red_t_16_0h, f_approx_log_arg_red_t_16_0m;
f_approx_log_arg_red_t_1_0h = f_approx_log_arg_red_coeff_8h;
f_approx_log_arg_red_t_2_0h = f_approx_log_arg_red_t_1_0h * xh;
f_approx_log_arg_red_t_3_0h = f_approx_log_arg_red_coeff_7h + f_approx_log_arg_red_t_2_0h;
f_approx_log_arg_red_t_4_0h = f_approx_log_arg_red_t_3_0h * xh;
f_approx_log_arg_red_t_5_0h = f_approx_log_arg_red_coeff_6h + f_approx_log_arg_red_t_4_0h;
f_approx_log_arg_red_t_6_0h = f_approx_log_arg_red_t_5_0h * xh;
f_approx_log_arg_red_t_7_0h = f_approx_log_arg_red_coeff_5h + f_approx_log_arg_red_t_6_0h;
f_approx_log_arg_red_t_8_0h = f_approx_log_arg_red_t_7_0h * xh;
f_approx_log_arg_red_t_9_0h = f_approx_log_arg_red_coeff_4h + f_approx_log_arg_red_t_8_0h;
f_approx_log_arg_red_t_10_0h = f_approx_log_arg_red_t_9_0h * xh;
f_approx_log_arg_red_t_11_0h = f_approx_log_arg_red_coeff_3h + f_approx_log_arg_red_t_10_0h;
f_approx_log_arg_red_t_12_0h = f_approx_log_arg_red_t_11_0h * xh;
f_approx_log_arg_red_t_13_0h = f_approx_log_arg_red_coeff_2h + f_approx_log_arg_red_t_12_0h;
f_approx_log_arg_red_t_14_0h = f_approx_log_arg_red_t_13_0h * xh;
Add12(f_approx_log_arg_red_t_15_0h,f_approx_log_arg_red_t_15_0m,f_approx_log_arg_red_coeff_1h,f_approx_log_arg_red_t_14_0h);
Mul22(&f_approx_log_arg_red_t_16_0h,&f_approx_log_arg_red_t_16_0m,f_approx_log_arg_red_t_15_0h,f_approx_log_arg_red_t_15_0m,xh,xm);
*f_approx_log_arg_red_resh = f_approx_log_arg_red_t_16_0h; *f_approx_log_arg_red_resm = f_approx_log_arg_red_t_16_0m;
}
#define f_approx_tablewidth 5
#define f_approx_maxindex 14
#define f_approx_rcpr_log_two_of_base_hi 0.69314718055994528622676398299518041312694549560546875
#define f_approx_rcpr_log_two_of_base_mi 2.3190468138462995584177710797133615750739959242786823734316925538223586045205593109130859375e-17
static const double f_approx_log_rcpr_tbl_hi[33] = {
0,
3.17486983145802981187699742804397828876972198486328125e-2,
6.453852113757117814341057737692608498036861419677734375e-2,
8.985632912186104770402295116582536138594150543212890625e-2,
0.1158318155251217007606356901305844075977802276611328125,
0.142500062607283040083672176479012705385684967041015625,
0.169899036795397473387225772967212833464145660400390625,
0.1980699137620937910764240541539038531482219696044921875,
0.2270574506353460753071971112149185501039028167724609375,
0.2468600779315257842672082233548280782997608184814453125,
0.2670627852490452536216025691828690469264984130859375,
0.2981533723190763485177967595518566668033599853515625,
0.319430770766361227241958431477542035281658172607421875,
0.34117075740276714412857472780160605907440185546875,
-0.329753286372467979692402195723843760788440704345703125,
-0.30702503529491187439504074063734151422977447509765625,
-0.28376817313064461867355703361681662499904632568359375,
-0.27193371548364175804834985683555714786052703857421875,
-0.2478361639045812692128123444490483961999416351318359375,
-0.223143551314209764857565687634632922708988189697265625,
-0.2105647691073496419189581274622469209134578704833984375,
-0.1849223384940119896402421773018431849777698516845703125,
-0.17185025692665922836255276706651784479618072509765625,
-0.1451820098444978890395162807180895470082759857177734375,
-0.1315763577887192614657152489598956890404224395751953125,
-0.1177830356563834557359626842298894189298152923583984375,
-0.10379679368164355934833764649738441221415996551513671875,
-7.522342123758753162920953627690323628485202789306640625e-2,
-6.062462181643483993820353816772694699466228485107421875e-2,
-4.58095360312942012637194011404062621295452117919921875e-2,
-3.077165866675368732785500469617545604705810546875e-2,
-1.5504186535965254478686148331689764745533466339111328125e-2,
0
};
static const double f_approx_log_rcpr_tbl_mi[33] = {
0,
3.03822630846808578765259986229142635550407126467467068542394059704747633077204227447509765625e-18,
-6.4704866616929329974161813916713618427728286285169519154170103547585313208401203155517578125e-18,
-6.2737601636895940223772151595043522169967894903434509935868934604741298244334757328033447265625e-19,
4.33848436980809595557198228135728192959103146527353490891076859270469867624342441558837890625e-18,
-9.9263882342257491397106905651454915981827472977916566876377402195430477149784564971923828125e-18,
-4.8680087644390707941393631766999763543363602831990049994714819803220962057821452617645263671875e-19,
3.74284348246143901356926696786621497402944711010920782190414257684096810407936573028564453125e-18,
9.551415762738488431492098722158984238118586922020904206309666051311069168150424957275390625e-18,
1.3617433717483680171009009478499574446783469284919833308666881066528731025755405426025390625e-17,
-7.3289153273201694886198949831953541788954485227476805253576941368010011501610279083251953125e-18,
-1.72069586744586603715170366469832022772114935873187524517646806998527608811855316162109375e-17,
1.3542568572648110745997524461078410815028703905694095442624469427528310916386544704437255859375e-18,
-1.936679006260286699473802044740827141118261398825169117277056329839979298412799835205078125e-17,
-2.122020616196946023332814001844389995179410458238009572207172226399052306078374385833740234375e-18,
1.231991620010196428468632499036271595368677926845939196720536301654647104442119598388671875e-17,
2.0326655811266561230291019136542876238402571524729010865595313362064189277589321136474609375e-17,
-7.8331963769744201243220009945333356568337002449775477268267831476578066940419375896453857421875e-19,
1.24322095787025231818185093190325423423584424116919953939852661051190807484090328216552734375e-17,
9.091270597324799048711045191818233254271755021066504787174977764152572490274906158447265625e-18,
4.24940531472989532850360049655226441340213720053550945643383585093033616431057453155517578125e-18,
-3.023661415357406426577090417003710240867302228907377570354952922571101225912570953369140625e-18,
6.0224538210113704760318352588172818979944380808860641962620974254605243913829326629638671875e-18,
-8.2424187830224753896228153425798328521705177161500548155270706729425000958144664764404296875e-18,
-1.112300087972958802991298461231701795529693224825161512736571012283093295991420745849609375e-17,
1.197168574759367729935408317875380291366461975031726568119427867031845380552113056182861328125e-18,
-5.47772415726659012592706002045618002605660904524354816447218041730593540705740451812744140625e-18,
5.93060419629324071708218111258442537327230935598090626192924190718258614651858806610107421875e-18,
-2.642402593872693418157455274069099088532417945381102798718675472855466068722307682037353515625e-18,
-1.90295986647425706325531188416869176372485943199669260195161513138373265974223613739013671875e-18,
-1.0431732029005967805059792190367890366163673586242621564579291515428849379532039165496826171875e-18,
3.27832102289242912962985506573138544887782756899054594813824881072150674299336969852447509765625e-19,
0
};
static const double f_approx_rcpr_tbl[33] = {
1.0,
0.96875,
0.9375,
0.9140625,
0.890625,
0.8671875,
0.84375,
0.8203125,
0.796875,
0.78125,
0.765625,
0.7421875,
0.7265625,
0.7109375,
0.6953125,
0.6796875,
0.6640625,
0.65625,
0.640625,
0.625,
0.6171875,
0.6015625,
0.59375,
0.578125,
0.5703125,
0.5625,
0.5546875,
0.5390625,
0.53125,
0.5234375,
0.515625,
0.5078125,
0.5
};
STATIC INLINE void scalar_log_callout_inner(double * RESTRICT res_resh, double * RESTRICT res_resm, double xh) {
db_number argRedCaster;
int E;
int index;
double ed;
double m;
double r;
double zh;
double zm;
double mrh, mrl;
double temp;
double polyHi;
double polyMi;
double tableHi;
double tableMi;
double scaledExpoHi;
double scaledExpoMi;
double logMHi;
double logMMi;
argRedCaster.d = xh;
E = 0;
if (argRedCaster.l < 0x0010000000000000) {
argRedCaster.d *= 9007199254740992.0;
E -= 53;
}
E += (int) ((argRedCaster.l >> 52) - 1023ll);
index = (int) ((argRedCaster.l & 0x000fffffffffffffull) >> (52 - f_approx_tablewidth - 1));
index = (index + 1) >> 1;
if (index >= f_approx_maxindex) E++;
ed = (double) E;
argRedCaster.l = (argRedCaster.l & 0x800fffffffffffffull) | 0x3ff0000000000000ull;
m = argRedCaster.d;
r = f_approx_rcpr_tbl[index];
Mul12(&mrh,&mrl,m,r);
temp = mrh - 1.0;
Add12(zh,zm,temp,mrl);
f_approx_log_arg_red(&polyHi, &polyMi, zh, zm);
tableHi = f_approx_log_rcpr_tbl_hi[index];
tableMi = f_approx_log_rcpr_tbl_mi[index];
Mul122(&scaledExpoHi,&scaledExpoMi,ed,f_approx_rcpr_log_two_of_base_hi,f_approx_rcpr_log_two_of_base_mi);
Add22(&logMHi,&logMMi,tableHi,tableMi,polyHi,polyMi);
Add22(res_resh,res_resm,scaledExpoHi,scaledExpoMi,logMHi,logMMi);
}
/* A scalar logarithm for the callout */
STATIC INLINE double scalar_log_callout(double x) {
dblcast xdb;
double yh, yl;
double temp;
/* Check for special inputs: x less than the smallest positive
subnormal, x Inf or NaN
*/
xdb.d = x;
if ((xdb.i == 0x0ull) || (xdb.i >= 0x7ff0000000000000ull)) {
/* Here, we have a special case to handle
The input is either +/-0, negative, +/-Inf or +/- NaN.
*/
if ((xdb.i & 0x7fffffffffffffffull) >= 0x7ff0000000000000ull) {
/* The input is either Inf or NaN */
if ((xdb.i & 0x7fffffffffffffffull) > 0x7ff0000000000000ull) {
/* The input is NaN. Return the quietized NaN */
return 1.0 + x;
}
/* The input is +Inf or -Inf */
if ((xdb.i & 0x8000000000000000ull) == 0x0ull) {
/* The input is +Inf. Return log(+Inf) = + Inf. */
return x;
}
/* The input is -Inf. Let the case fall through */
}
/* The input is +/- 0, -Inf or a negative real number */
if (x == 0.0) {
/* The input is +/-0. Return -Inf and raise the division-by-zero
exception.
*/
temp = 1.0 - 1.0; /* temp = +0.0 or -0.0 */
temp = temp * temp; /* temp = +0.0 */
return -1.0 / temp; /* Return -Inf and raise div-by-zero. */
}
/* The input is -Inf or a negative real number.
Return NaN and raise the invalid exception.
*/
temp = 0.0;
return temp / temp; /* Return NaN and raise invalid. */
}
/* Here the input is a positive subnormal or normal
Just call a Metalibm generated function.
*/
scalar_log_callout_inner(&yh, &yl, x);
/* Return the result */
return yh + yl;
}
/* A vector logarithm callout */
STATIC INLINE Vec4d vector_log_callout(Vec4d x) {
int i;
Vec4d y;
for (i=0;i<VECTOR_LENGTH;i++) {
y[i] = scalar_log_callout(x[i]);
}
return y;
}
/* Generated polynomial for vector logarithm */
#define vector_log_poly_coeff_1h 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000e+00
#define vector_log_poly_coeff_2h -5.00000000000000999200722162640886381268501281738281250000000000000000000000000000e-01
#define vector_log_poly_coeff_3h 3.33333333333384995711412557284347712993621826171875000000000000000000000000000000e-01
#define vector_log_poly_coeff_4h -2.49999999999541949735615276040334720164537429809570312500000000000000000000000000e-01
#define vector_log_poly_coeff_5h 1.99999999982921977670358160139585379511117935180664062500000000000000000000000000e-01
#define vector_log_poly_coeff_6h -1.66666666708135652319455743963771965354681015014648437500000000000000000000000000e-01
#define vector_log_poly_coeff_7h 1.42857144801517760290821001945005264133214950561523437500000000000000000000000000e-01
#define vector_log_poly_coeff_8h -1.25000000676456918258239170427259523421525955200195312500000000000000000000000000e-01
#define vector_log_poly_coeff_9h 1.11111007470194977919675238808849826455116271972656250000000000000000000000000000e-01
#define vector_log_poly_coeff_10h -9.99997732686361273657382753299316391348838806152343750000000000000000000000000000e-02
#define vector_log_poly_coeff_11h 9.09118368248343633464259028187370859086513519287109375000000000000000000000000000e-02
#define vector_log_poly_coeff_12h -8.33440688797140172283661740948446094989776611328125000000000000000000000000000000e-02
#define vector_log_poly_coeff_13h 7.68928106123701327057062826497713103890419006347656250000000000000000000000000000e-02
#define vector_log_poly_coeff_14h -7.12109533797148086531336730331531725823879241943359375000000000000000000000000000e-02
#define vector_log_poly_coeff_15h 6.65850051807088672006784690893255174160003662109375000000000000000000000000000000e-02
#define vector_log_poly_coeff_16h -6.43233317758114681028658310424361843615770339965820312500000000000000000000000000e-02
#define vector_log_poly_coeff_17h 6.31209736682013661246415381356200668960809707641601562500000000000000000000000000e-02
#define vector_log_poly_coeff_18h -5.44324247927492413379191305011772783473134040832519531250000000000000000000000000e-02
#define vector_log_poly_coeff_19h 3.23620871610351343306000160282565047964453697204589843750000000000000000000000000e-02
#define vector_log_poly_coeff_20h -9.16877113215055876416226254832508857361972332000732421875000000000000000000000000e-03
STATIC void vector_log_poly(double * RESTRICT vector_log_poly_resh, double x) {
double vector_log_poly_t_1_0h;
double vector_log_poly_t_2_0h;
double vector_log_poly_t_3_0h;
double vector_log_poly_t_4_0h;
double vector_log_poly_t_5_0h;
double vector_log_poly_t_6_0h;
double vector_log_poly_t_7_0h;
double vector_log_poly_t_8_0h;
double vector_log_poly_t_9_0h;
double vector_log_poly_t_10_0h;
double vector_log_poly_t_11_0h;
double vector_log_poly_t_12_0h;
double vector_log_poly_t_13_0h;
double vector_log_poly_t_14_0h;
double vector_log_poly_t_15_0h;
double vector_log_poly_t_16_0h;
double vector_log_poly_t_17_0h;
double vector_log_poly_t_18_0h;
double vector_log_poly_t_19_0h;
double vector_log_poly_t_20_0h;
double vector_log_poly_t_21_0h;
double vector_log_poly_t_22_0h;
double vector_log_poly_t_23_0h;
double vector_log_poly_t_24_0h;
double vector_log_poly_t_25_0h;
double vector_log_poly_t_26_0h;
double vector_log_poly_t_27_0h;
double vector_log_poly_t_28_0h;
double vector_log_poly_t_29_0h;
double vector_log_poly_t_30_0h;
double vector_log_poly_t_31_0h;
double vector_log_poly_t_32_0h;
double vector_log_poly_t_33_0h;
double vector_log_poly_t_34_0h;
double vector_log_poly_t_35_0h;
double vector_log_poly_t_36_0h;
double vector_log_poly_t_37_0h;
double vector_log_poly_t_38_0h;
double vector_log_poly_t_39_0h;
double vector_log_poly_t_40_0h;
vector_log_poly_t_1_0h = vector_log_poly_coeff_20h;
vector_log_poly_t_2_0h = vector_log_poly_t_1_0h * x;
vector_log_poly_t_3_0h = vector_log_poly_coeff_19h + vector_log_poly_t_2_0h;
vector_log_poly_t_4_0h = vector_log_poly_t_3_0h * x;
vector_log_poly_t_5_0h = vector_log_poly_coeff_18h + vector_log_poly_t_4_0h;
vector_log_poly_t_6_0h = vector_log_poly_t_5_0h * x;
vector_log_poly_t_7_0h = vector_log_poly_coeff_17h + vector_log_poly_t_6_0h;
vector_log_poly_t_8_0h = vector_log_poly_t_7_0h * x;
vector_log_poly_t_9_0h = vector_log_poly_coeff_16h + vector_log_poly_t_8_0h;
vector_log_poly_t_10_0h = vector_log_poly_t_9_0h * x;
vector_log_poly_t_11_0h = vector_log_poly_coeff_15h + vector_log_poly_t_10_0h;
vector_log_poly_t_12_0h = vector_log_poly_t_11_0h * x;
vector_log_poly_t_13_0h = vector_log_poly_coeff_14h + vector_log_poly_t_12_0h;
vector_log_poly_t_14_0h = vector_log_poly_t_13_0h * x;
vector_log_poly_t_15_0h = vector_log_poly_coeff_13h + vector_log_poly_t_14_0h;
vector_log_poly_t_16_0h = vector_log_poly_t_15_0h * x;
vector_log_poly_t_17_0h = vector_log_poly_coeff_12h + vector_log_poly_t_16_0h;
vector_log_poly_t_18_0h = vector_log_poly_t_17_0h * x;
vector_log_poly_t_19_0h = vector_log_poly_coeff_11h + vector_log_poly_t_18_0h;
vector_log_poly_t_20_0h = vector_log_poly_t_19_0h * x;
vector_log_poly_t_21_0h = vector_log_poly_coeff_10h + vector_log_poly_t_20_0h;
vector_log_poly_t_22_0h = vector_log_poly_t_21_0h * x;
vector_log_poly_t_23_0h = vector_log_poly_coeff_9h + vector_log_poly_t_22_0h;
vector_log_poly_t_24_0h = vector_log_poly_t_23_0h * x;
vector_log_poly_t_25_0h = vector_log_poly_coeff_8h + vector_log_poly_t_24_0h;
vector_log_poly_t_26_0h = vector_log_poly_t_25_0h * x;
vector_log_poly_t_27_0h = vector_log_poly_coeff_7h + vector_log_poly_t_26_0h;
vector_log_poly_t_28_0h = vector_log_poly_t_27_0h * x;
vector_log_poly_t_29_0h = vector_log_poly_coeff_6h + vector_log_poly_t_28_0h;
vector_log_poly_t_30_0h = vector_log_poly_t_29_0h * x;
vector_log_poly_t_31_0h = vector_log_poly_coeff_5h + vector_log_poly_t_30_0h;
vector_log_poly_t_32_0h = vector_log_poly_t_31_0h * x;
vector_log_poly_t_33_0h = vector_log_poly_coeff_4h + vector_log_poly_t_32_0h;
vector_log_poly_t_34_0h = vector_log_poly_t_33_0h * x;
vector_log_poly_t_35_0h = vector_log_poly_coeff_3h + vector_log_poly_t_34_0h;
vector_log_poly_t_36_0h = vector_log_poly_t_35_0h * x;
vector_log_poly_t_37_0h = vector_log_poly_coeff_2h + vector_log_poly_t_36_0h;
vector_log_poly_t_38_0h = vector_log_poly_t_37_0h * x;
vector_log_poly_t_39_0h = vector_log_poly_coeff_1h + vector_log_poly_t_38_0h;
vector_log_poly_t_40_0h = vector_log_poly_t_39_0h * x;
*vector_log_poly_resh = vector_log_poly_t_40_0h;
}
/* A vector logarithm */
Vec4d vec4_log_vectorlibm(Vec4d x) {
int i;
int okaySlots;
Vec4d y;
dblcast xdb;
uint64_t tui1, tui2, tui3;
int E;
double eDouble, m, r, p, elog2h, elog2l;
double t1h, t1l, t2, t3;
/* Check if we can handle all inputs */
okaySlots = 0;
for (i=0;i<VECTOR_LENGTH;i++) {
xdb.d = x[i];
okaySlots += ((xdb.i >= 0x0020000000000000ull) && (xdb.i < 0x7ff0000000000000ull));
}
/* Perform a callout if we cannot handle the input in one slot */
if (okaySlots != VECTOR_LENGTH) {
return vector_log_callout(x);
}
/* Here, the input is real, and far enough from the subnormal
range
*/
for (i=0;i<VECTOR_LENGTH;i++) {
xdb.d = x[i];
tui1 = xdb.i;
tui2 = tui1 + 0x0008000000000000ull;
tui1 >>= 52;
tui2 >>= 52;
tui3 = tui2 - tui1;
tui3 <<= 52;
E = ((int) tui2) - 1023;
eDouble = (double) E;
xdb.i = ((xdb.i & 0x000fffffffffffffull) | 0x3ff0000000000000ull) - tui3;
m = xdb.d; /* 2^E * m = x exactly, 0.75 <= m < 1.5 */
r = m - 1.0; /* exact: Sterbenz */
vector_log_poly(&p, r);
elog2h = eDouble * LOG_TWO_HI; /* exact: trailing zeros */
elog2l = eDouble * LOG_TWO_LO;
t1h = elog2h + p;
t2 = t1h - elog2h;
t1l = p - t2; /* exact: t1h + t1l = elog2h + p */
t3 = elog2l + t1l;
y[i] = t1h + t3;
}
return y;
}

View File

@ -667,13 +667,15 @@
<BasicRuntimeChecks>Default</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<RuntimeTypeInfo>true</RuntimeTypeInfo>
<OpenMPSupport>true</OpenMPSupport>
<OpenMPSupport>
</OpenMPSupport>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<CompileAs>CompileAsC</CompileAs>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<AdditionalOptions>/openmp:experimental %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<AdditionalDependencies>psapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
@ -1111,6 +1113,7 @@
<ClInclude Include="..\src\spicelib\devices\bsim3v1\bsim3v1ext.h" />
<ClInclude Include="..\src\spicelib\devices\bsim3v1\bsim3v1init.h" />
<ClInclude Include="..\src\spicelib\devices\bsim3v1\bsim3v1itf.h" />
<ClInclude Include="..\src\spicelib\devices\bsim3v32\b3v32acm.h" />
<ClInclude Include="..\src\spicelib\devices\bsim3v32\bsim3v32def.h" />
<ClInclude Include="..\src\spicelib\devices\bsim3v32\bsim3v32ext.h" />
<ClInclude Include="..\src\spicelib\devices\bsim3v32\bsim3v32init.h" />
@ -1846,11 +1849,19 @@
<ClCompile Include="..\src\spicelib\devices\bsim3v1\bsim3v1init.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32acld.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32acm.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32ask.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32check.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32cvtest.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32getic.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32ld.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32ldsel.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32ldseq.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32ldseq_simd4.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32ldseq_simd4_omp.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32ldseq_simd8.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32ldsimd.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32ldsimd8.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32mask.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32mdel.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32mpar.c" />
@ -1862,6 +1873,8 @@
<ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32temp.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32trunc.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\bsim3v32init.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\vec4_exp.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3v32\vec4_log.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3\b3.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3\b3acld.c" />
<ClCompile Include="..\src\spicelib\devices\bsim3\b3ask.c" />