From 8b1c9dde7f10e9fd1ab02a9461bc2503610886dc Mon Sep 17 00:00:00 2001 From: h_vogt Date: Thu, 24 Jun 2010 10:25:05 +0000 Subject: [PATCH] OpenMP for BSIM3 ver 3.3.0 --- ChangeLog | 4 + configure.in | 26 +++- src/spicelib/devices/bsim3/b3ld.c | 170 ++++++++++++++++++++++++-- src/spicelib/devices/bsim3/b3set.c | 57 ++++++++- src/spicelib/devices/bsim3/bsim3def.h | 54 ++++++++ src/spinit.in | 3 + 6 files changed, 300 insertions(+), 14 deletions(-) diff --git a/ChangeLog b/ChangeLog index 826431720..4150ab7f4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2010-06-23 Holger Vogt + * configure.in, bsim3/b3ld.c, bsim3/b3set.c, bsim3/bsim3def.f, + spinit.in: Multi-core support with OpenMP for BSIM3 vers. 3.3.0 + 2010-06-23 Robert Larice * src/ngmultidec.c, * src/frontend/inpcom.c, diff --git a/configure.in b/configure.in index 9bdf1a434..78aa87294 100644 --- a/configure.in +++ b/configure.in @@ -167,7 +167,7 @@ AC_ARG_ENABLE(xgraph, dnl --enable-x: Compile software using x libraries. A GUI will be provided then. AC_ARG_ENABLE(x, AS_HELP_STRING([--enable-x],[Enable X11 gui])) - + dnl --with-readline: Includes GNU readline support into CLI. Default is "no". dnl Including readline into ngspice is a violation of GPL license. It's use dnl is discouraged. @@ -182,7 +182,9 @@ dnl --with-tcl: define TCL_MODULE in the code. This is for tcl support AC_ARG_WITH(tcl, [ --with-tcl[=tcldir] Compiles the tcl module instead, experimental, see README.Tcl]) - +dnl --enable-openmp: Use OpenMP on multi-core processors +AC_ARG_ENABLE(openmp, + AS_HELP_STRING([--enable-openmp],[Use OpenMP parallel processing])) dnl readline and editline cannot both be enabled if test "$with_editline" = "yes"; then @@ -612,9 +614,12 @@ AC_STRUCT_TM AC_STRUCT_TIMEZONE AC_CHECK_FUNCS(localtime) -AC_CHECK_FUNCS(ftime) -AC_CHECK_FUNCS(gettimeofday time) -AC_CHECK_FUNCS(getrusage utimes) +AC_CHECK_FUNCS(ftime gettimeofday) +dnl Do not use time or getrusage function for CPU time measurement under OpenMP +if test "$enable_openmp" = "no"; then + AC_CHECK_FUNCS(time getrusage) +fi +AC_CHECK_FUNCS(utimes) AC_CHECK_FUNCS(getrlimit ulimit, break) AC_CHECK_FUNCS([endpwent gethostbyname memset select socket strdup strerror strncasecmp strstr strtol]) @@ -1017,6 +1022,17 @@ fi dnl Use AC_CHECK_HEADERS so the HAVE_*_H symbol gets defined AC_CHECK_HEADERS(ncurses/termcap.h termcap.h) +dnl Add OpenMP to ngspice. +if test "$enable_openmp" = "yes"; then +dnl test for header + AC_CHECK_HEADERS([omp.h], + [AC_DEFINE(USE_OMP,[1],[OpenMP parallel processing])], + [AC_MSG_ERROR(Couldn't find OpenMP headers.)]) + CFLAGS="$CFLAGS -fopenmp" +dnl test if function is o.k. + AC_CHECK_FUNC(omp_get_wtime, [AC_MSG_RESULT(OpenMP features enabled)], + [AC_MSG_ERROR(OpenMP not working correctly.)]) +fi dnl Output Files dnl ------------ diff --git a/src/spicelib/devices/bsim3/b3ld.c b/src/spicelib/devices/bsim3/b3ld.c index 844f11303..d9585f92b 100644 --- a/src/spicelib/devices/bsim3/b3ld.c +++ b/src/spicelib/devices/bsim3/b3ld.c @@ -31,14 +31,44 @@ #define DELTA_3 0.02 #define DELTA_4 0.02 +#ifdef USE_OMP +int BSIM3LoadOMP(BSIM3instance *here, CKTcircuit *ckt); +void BSIM3LoadRhsMat(GENmodel *inModel, CKTcircuit *ckt); +extern int nthreads; +#endif + int BSIM3load( GENmodel *inModel, CKTcircuit *ckt) { +#ifdef USE_OMP + int idx; + BSIM3model *model = (BSIM3model*)inModel; + int good = 0; + BSIM3instance *here; + BSIM3instance **InstArray; + InstArray = model->BSIM3InstanceArray; + +#pragma omp parallel for num_threads(nthreads) private(here) + for (idx = 0; idx < model->BSIM3InstCount; idx++) { + here = InstArray[idx]; + good = BSIM3LoadOMP(here, ckt); + } + + BSIM3LoadRhsMat(inModel, ckt); + + return good; +} + + +int BSIM3LoadOMP(BSIM3instance *here, CKTcircuit *ckt) { +BSIM3model *model; +#else BSIM3model *model = (BSIM3model*)inModel; BSIM3instance *here; +#endif double SourceSatCurrent, DrainSatCurrent; double ag0, qgd, qgs, qgb, von, cbhat, VgstNVt, ExpVgst; double cdrain, cdhat, cdreq, ceqbd, ceqbs, ceqqb, ceqqd, ceqqg, ceq, geq; @@ -136,18 +166,23 @@ struct bsim3SizeDependParam *pParam; int ByPass, Check, ChargeComputationNeeded, error; /* double junk[50]; */ +#ifdef USE_OMP +model = here->BSIM3modPtr; +#endif + ScalingFactor = 1.0e-9; ChargeComputationNeeded = ((ckt->CKTmode & (MODEAC | MODETRAN | MODEINITSMSIG)) || ((ckt->CKTmode & MODETRANOP) && (ckt->CKTmode & MODEUIC))) ? 1 : 0; - +#ifndef USE_OMP for (; model != NULL; model = model->BSIM3nextModel) { for (here = model->BSIM3instances; here != NULL; here = here->BSIM3nextInstance) - { + { if (here->BSIM3owner != ARCHme) continue; +#endif Check = 1; ByPass = 0; pParam = here->pParam; @@ -2890,7 +2925,15 @@ line900: } m = here->BSIM3m; - +#ifdef USE_OMP + here->BSIM3rhsG = m * ceqqg; + here->BSIM3rhsB = m * (ceqbs + ceqbd + ceqqb); + here->BSIM3rhsD = m * (ceqbd - cdreq - ceqqd); + here->BSIM3rhsS = m * (cdreq + ceqbs + ceqqg + + ceqqb + ceqqd); + if (here->BSIM3nqsMod) + here->BSIM3rhsQ = m * (cqcheq - cqdef); +#else (*(ckt->CKTrhs + here->BSIM3gNode) -= m * ceqqg); (*(ckt->CKTrhs + here->BSIM3bNode) -= m * (ceqbs + ceqbd + ceqqb)); (*(ckt->CKTrhs + here->BSIM3dNodePrime) += m * (ceqbd - cdreq - ceqqd)); @@ -2898,12 +2941,64 @@ line900: + ceqqb + ceqqd)); if (here->BSIM3nqsMod) *(ckt->CKTrhs + here->BSIM3qNode) += m * (cqcheq - cqdef); - +#endif /* * load y matrix */ T1 = qdef * here->BSIM3gtau; +#ifdef USE_OMP + here->BSIM3DdPt = m * here->BSIM3drainConductance; + here->BSIM3GgPt = m * (gcggb - ggtg); + here->BSIM3SsPt = m * here->BSIM3sourceConductance; + here->BSIM3BbPt = m * (here->BSIM3gbd + here->BSIM3gbs + - gcbgb - gcbdb - gcbsb - here->BSIM3gbbs); + here->BSIM3DPdpPt = m * (here->BSIM3drainConductance + + here->BSIM3gds + here->BSIM3gbd + + RevSum + gcddb + dxpart * ggtd + + T1 * ddxpart_dVd + gbdpdp); + here->BSIM3SPspPt = m * (here->BSIM3sourceConductance + + here->BSIM3gds + here->BSIM3gbs + + FwdSum + gcssb + sxpart * ggts + + T1 * dsxpart_dVs + gbspsp); + here->BSIM3DdpPt = m * here->BSIM3drainConductance; + here->BSIM3GbPt = m * (gcggb + gcgdb + gcgsb + ggtb); + here->BSIM3GdpPt = m * (gcgdb - ggtd); + here->BSIM3GspPt = m * (gcgsb - ggts); + here->BSIM3SspPt = m * here->BSIM3sourceConductance; + here->BSIM3BgPt = m * (gcbgb - here->BSIM3gbgs); + here->BSIM3BdpPt = m * (gcbdb - here->BSIM3gbd + gbbdp); + here->BSIM3BspPt = m * (gcbsb - here->BSIM3gbs + gbbsp); + here->BSIM3DPdPt = m * here->BSIM3drainConductance; + here->BSIM3DPgPt = m * (Gm + gcdgb + dxpart * ggtg + + T1 * ddxpart_dVg + gbdpg); + here->BSIM3DPbPt = m * (here->BSIM3gbd - Gmbs + gcdgb + gcddb + + gcdsb - dxpart * ggtb + - T1 * ddxpart_dVb - gbdpb); + here->BSIM3DPspPt = m * (here->BSIM3gds + FwdSum - gcdsb + - dxpart * ggts - T1 * ddxpart_dVs - gbdpsp); + here->BSIM3SPgPt = m * (gcsgb - Gm + sxpart * ggtg + + T1 * dsxpart_dVg + gbspg); + here->BSIM3SPsPt = m * here->BSIM3sourceConductance; + here->BSIM3SPbPt = m * (here->BSIM3gbs + Gmbs + gcsgb + gcsdb + + gcssb - sxpart * ggtb + - T1 * dsxpart_dVb - gbspb); + here->BSIM3SPdpPt = m * (here->BSIM3gds + RevSum - gcsdb + - sxpart * ggtd - T1 * dsxpart_dVd - gbspdp); + + if (here->BSIM3nqsMod) + { here->BSIM3QqPt = m * (gqdef + here->BSIM3gtau); + + here->BSIM3DPqPt = m * (dxpart * here->BSIM3gtau); + here->BSIM3SPqPt = m * (sxpart * here->BSIM3gtau); + here->BSIM3GqPt = m * here->BSIM3gtau; + + here->BSIM3QgPt = m * (ggtg - gcqgb); + here->BSIM3QdpPt = m * (ggtd - gcqdb); + here->BSIM3QspPt = m * (ggts - gcqsb); + here->BSIM3QbPt = m * (ggtb - gcqbb); + } +#else (*(here->BSIM3DdPtr) += m * here->BSIM3drainConductance); (*(here->BSIM3GgPtr) += m * (gcggb - ggtg)); (*(here->BSIM3SsPtr) += m * here->BSIM3sourceConductance); @@ -2954,12 +3049,73 @@ line900: *(here->BSIM3QspPtr) += m * (ggts - gcqsb); *(here->BSIM3QbPtr) += m * (ggtb - gcqbb); } - +#endif line1000: ; - +#ifndef USE_OMP } /* End of Mosfet Instance */ } /* End of Model Instance */ - +#endif return(OK); } +#ifdef USE_OMP +void BSIM3LoadRhsMat(GENmodel *inModel, CKTcircuit *ckt) +{ + unsigned int InstCount, idx; + BSIM3instance **InstArray; + BSIM3instance *here; + BSIM3model *model = (BSIM3model*)inModel; + + InstArray = model->BSIM3InstanceArray; + InstCount = model->BSIM3InstCount; + + for(idx = 0; idx < InstCount; idx++) { + here = InstArray[idx]; + /* Update b for Ax = b */ + (*(ckt->CKTrhs + here->BSIM3gNode) -= here->BSIM3rhsG); + (*(ckt->CKTrhs + here->BSIM3bNode) -= here->BSIM3rhsB); + (*(ckt->CKTrhs + here->BSIM3dNodePrime) += here->BSIM3rhsD); + (*(ckt->CKTrhs + here->BSIM3sNodePrime) += here->BSIM3rhsS); + if (here->BSIM3nqsMod) + (*(ckt->CKTrhs + here->BSIM3qNode) += here->BSIM3rhsQ); + + /* Update A for Ax = b */ + (*(here->BSIM3DdPtr) += here->BSIM3DdPt); + (*(here->BSIM3GgPtr) += here->BSIM3GgPt); + (*(here->BSIM3SsPtr) += here->BSIM3SsPt); + (*(here->BSIM3BbPtr) += here->BSIM3BbPt); + (*(here->BSIM3DPdpPtr) += here->BSIM3DPdpPt); + (*(here->BSIM3SPspPtr) += here->BSIM3SPspPt); + (*(here->BSIM3DdpPtr) -= here->BSIM3DdpPt); + (*(here->BSIM3GbPtr) -= here->BSIM3GbPt); + (*(here->BSIM3GdpPtr) += here->BSIM3GdpPt); + (*(here->BSIM3GspPtr) += here->BSIM3GspPt); + (*(here->BSIM3SspPtr) -= here->BSIM3SspPt); + (*(here->BSIM3BgPtr) += here->BSIM3BgPt); + (*(here->BSIM3BdpPtr) += here->BSIM3BdpPt); + (*(here->BSIM3BspPtr) += here->BSIM3BspPt); + (*(here->BSIM3DPdPtr) -= here->BSIM3DPdPt); + (*(here->BSIM3DPgPtr) += here->BSIM3DPgPt); + (*(here->BSIM3DPbPtr) -= here->BSIM3DPbPt); + (*(here->BSIM3DPspPtr) -= here->BSIM3DPspPt); + (*(here->BSIM3SPgPtr) += here->BSIM3SPgPt); + (*(here->BSIM3SPsPtr) -= here->BSIM3SPsPt); + (*(here->BSIM3SPbPtr) -= here->BSIM3SPbPt); + (*(here->BSIM3SPdpPtr) -= here->BSIM3SPdpPt); + + if (here->BSIM3nqsMod) + { *(here->BSIM3QqPtr) += here->BSIM3QqPt; + + *(here->BSIM3DPqPtr) += here->BSIM3DPqPt; + *(here->BSIM3SPqPtr) += here->BSIM3SPqPt; + *(here->BSIM3GqPtr) -= here->BSIM3GqPt; + + *(here->BSIM3QgPtr) += here->BSIM3QgPt; + *(here->BSIM3QdpPtr) += here->BSIM3QdpPt; + *(here->BSIM3QspPtr) += here->BSIM3QspPt; + *(here->BSIM3QbPtr) += here->BSIM3QbPt; + } + + } +} +#endif diff --git a/src/spicelib/devices/bsim3/b3set.c b/src/spicelib/devices/bsim3/b3set.c index b733d45de..3c8665f84 100644 --- a/src/spicelib/devices/bsim3/b3set.c +++ b/src/spicelib/devices/bsim3/b3set.c @@ -15,6 +15,11 @@ #include "const.h" #include "sperror.h" #include "suffix.h" +#ifdef USE_OMP +int nthreads; +extern bool cp_getvar(char *name, int type, void *retval); +#define VT_NUM 1 +#endif #define MAX_EXP 5.834617425e14 #define MIN_EXP 1.713908431e-15 @@ -37,9 +42,15 @@ BSIM3model *model = (BSIM3model*)inModel; BSIM3instance *here; int error; CKTnode *tmp; - CKTnode *tmpNode; -IFuid tmpName; +IFuid tmpName; + +#ifdef USE_OMP +unsigned int idx, InstCount; +BSIM3instance **InstArray; +int nthreads; +#endif + /* loop through all the BSIM3 device models */ for( ; model != NULL; model = model->BSIM3nextModel ) @@ -955,6 +966,7 @@ IFuid tmpName; /* set Sparse Matrix Pointers */ + /* macro to make elements with built in test for out of memory */ #define TSTALLOC(ptr,first,second) \ if((here->ptr = SMPmakeElt(matrix,here->first,here->second))==(double *)NULL){\ @@ -997,6 +1009,47 @@ if((here->ptr = SMPmakeElt(matrix,here->first,here->second))==(double *)NULL){\ } } +#ifdef USE_OMP + if (!cp_getvar("num_threads", VT_NUM, (char *) &nthreads)) + nthreads = 4; + + omp_set_num_threads(nthreads); + if (nthreads == 1) + printf("OpenMP: %d thread is requested in BSIM3\n", nthreads); + else + printf("OpenMP: %d threads are requested in BSIM3\n", nthreads); + InstCount = 0; + model = (BSIM3model*)inModel; + /* loop through all the BSIM3 device models + to count the number of instances */ + + for( ; model != NULL; model = model->BSIM3nextModel ) + { + /* loop through all the instances of the model */ + for (here = model->BSIM3instances; here != NULL ; + here=here->BSIM3nextInstance) + { + InstCount++; + } + } + InstArray = (BSIM3instance**)tmalloc(InstCount*sizeof(BSIM3instance**)); + model = (BSIM3model*)inModel; + idx = 0; + for( ; model != NULL; model = model->BSIM3nextModel ) + { + /* loop through all the instances of the model */ + for (here = model->BSIM3instances; here != NULL ; + here=here->BSIM3nextInstance) + { + InstArray[idx] = here; + idx++; + } + /* set the array pointer and instance count into each model */ + model->BSIM3InstCount = InstCount; + model->BSIM3InstanceArray = InstArray; + } + +#endif return(OK); } diff --git a/src/spicelib/devices/bsim3/bsim3def.h b/src/spicelib/devices/bsim3/bsim3def.h index bb20275e2..fa7912ff2 100644 --- a/src/spicelib/devices/bsim3/bsim3def.h +++ b/src/spicelib/devices/bsim3/bsim3def.h @@ -15,6 +15,12 @@ File: bsim3def.h #include "complex.h" #include "noisedef.h" +//#define USE_OMP + +#ifdef USE_OMP +#include +#endif + typedef struct sBSIM3instance { struct sBSIM3model *BSIM3modPtr; @@ -163,6 +169,48 @@ typedef struct sBSIM3instance double *BSIM3SPqPtr; double *BSIM3BqPtr; +#ifdef USE_OMP + /* per instance storage of results, to update matrix at a later stge */ + double BSIM3rhsG; + double BSIM3rhsB; + double BSIM3rhsD; + double BSIM3rhsS; + double BSIM3rhsQ; + + double BSIM3DdPt; + double BSIM3GgPt; + double BSIM3SsPt; + double BSIM3BbPt; + double BSIM3DPdpPt; + double BSIM3SPspPt; + double BSIM3DdpPt; + double BSIM3GbPt; + double BSIM3GdpPt; + double BSIM3GspPt; + double BSIM3SspPt; + double BSIM3BdpPt; + double BSIM3BspPt; + double BSIM3DPspPt; + double BSIM3DPdPt; + double BSIM3BgPt; + double BSIM3DPgPt; + double BSIM3SPgPt; + double BSIM3SPsPt; + double BSIM3DPbPt; + double BSIM3SPbPt; + double BSIM3SPdpPt; + + double BSIM3QqPt; + double BSIM3QdpPt; + double BSIM3QgPt; + double BSIM3QspPt; + double BSIM3QbPt; + double BSIM3DPqPt; + double BSIM3GqPt; + double BSIM3SPqPt; + double BSIM3BqPt; +#endif + #define BSIM3vbd BSIM3states+ 0 #define BSIM3vbs BSIM3states+ 1 #define BSIM3vgs BSIM3states+ 2 @@ -795,6 +843,12 @@ typedef struct sBSIM3model struct bsim3SizeDependParam *pSizeDependParamKnot; + +#ifdef USE_OMP + int BSIM3InstCount; + struct sBSIM3instance **BSIM3InstanceArray; +#endif + /* Flags */ unsigned BSIM3mobModGiven :1; unsigned BSIM3binUnitGiven :1; diff --git a/src/spinit.in b/src/spinit.in index d9cd5707b..12a0c9477 100644 --- a/src/spinit.in +++ b/src/spinit.in @@ -11,6 +11,9 @@ set x11lineararcs set noaskquit ** set the compatibility mode to allow some hspice constructs set ngbehavior=all +** set the number of threads in openmp +** default (if compiled with --enable-openmp) is: 2 +*set num_threads=4 strcmp __flag $program "ngspice" if $__flag = 0