From f12d2a3f0dbd378ba06aabd31eeec0f71d1cf40f Mon Sep 17 00:00:00 2001
From: Holger Vogt <holger.vogt@uni-due.de>
Date: Sun, 12 Jul 2020 10:24:26 +0200
Subject: [PATCH] enable simd

---
 compile_min_simd.sh                           |    2 +-
 src/spicelib/devices/bsim3v32/b3v32acm.c      |  104 +
 src/spicelib/devices/bsim3v32/b3v32acm.h      |   25 +
 src/spicelib/devices/bsim3v32/b3v32ldsel.c    |  262 +
 src/spicelib/devices/bsim3v32/b3v32ldseq.c    | 3799 +++++++++
 .../devices/bsim3v32/b3v32ldseq_simd4.c       | 5689 +++++++++++++
 .../devices/bsim3v32/b3v32ldseq_simd4_omp.c   | 5726 +++++++++++++
 .../devices/bsim3v32/b3v32ldseq_simd8.c       | 7541 +++++++++++++++++
 src/spicelib/devices/bsim3v32/b3v32ldsimd.c   |  347 +
 src/spicelib/devices/bsim3v32/b3v32ldsimd8.c  |  274 +
 src/spicelib/devices/bsim3v32/vec4_exp.c      |  654 ++
 src/spicelib/devices/bsim3v32/vec4_log.c      |  769 ++
 visualc/vngspice.vcxproj                      |   15 +-
 13 files changed, 25205 insertions(+), 2 deletions(-)
 create mode 100644 src/spicelib/devices/bsim3v32/b3v32acm.c
 create mode 100644 src/spicelib/devices/bsim3v32/b3v32acm.h
 create mode 100644 src/spicelib/devices/bsim3v32/b3v32ldsel.c
 create mode 100644 src/spicelib/devices/bsim3v32/b3v32ldseq.c
 create mode 100644 src/spicelib/devices/bsim3v32/b3v32ldseq_simd4.c
 create mode 100644 src/spicelib/devices/bsim3v32/b3v32ldseq_simd4_omp.c
 create mode 100644 src/spicelib/devices/bsim3v32/b3v32ldseq_simd8.c
 create mode 100644 src/spicelib/devices/bsim3v32/b3v32ldsimd.c
 create mode 100644 src/spicelib/devices/bsim3v32/b3v32ldsimd8.c
 create mode 100644 src/spicelib/devices/bsim3v32/vec4_exp.c
 create mode 100644 src/spicelib/devices/bsim3v32/vec4_log.c

diff --git a/compile_min_simd.sh b/compile_min_simd.sh
index 41dc78287..6ba53e8ba 100644
--- a/compile_min_simd.sh
+++ b/compile_min_simd.sh
@@ -60,7 +60,7 @@ else
   echo "configuring for 64 bit release"
   echo
 # You may add  --enable-adms to the following command for adding adms generated devices 
-  ../configure --with-wingui --enable-xspice --enable-cider --enable-openmp --disable-debug prefix="C:/Spice64" CFLAGS="-m64 -O3 -march=native " LDFLAGS="-m64 -s"
+  ../configure --with-wingui --enable-xspice --enable-cider --enable-openmp --disable-debug prefix="C:/Spice64" CFLAGS="-m64 -O3 -mtune=native " LDFLAGS="-m64 -s"
 fi
 if [ $? -ne 0 ]; then  echo "../configure failed"; exit 1 ; fi
 
diff --git a/src/spicelib/devices/bsim3v32/b3v32acm.c b/src/spicelib/devices/bsim3v32/b3v32acm.c
new file mode 100644
index 000000000..79b1d2c86
--- /dev/null
+++ b/src/spicelib/devices/bsim3v32/b3v32acm.c
@@ -0,0 +1,104 @@
+inline int BSIM3v32_ACM_saturationCurrents
+(
+	BSIM3v32model *model,
+	BSIM3v32instance *here,
+        double *DrainSatCurrent,
+        double *SourceSatCurrent
+)
+{
+            return ACM_saturationCurrents(
+            model->BSIM3v32acmMod,
+            model->BSIM3v32calcacm,
+            here->BSIM3v32geo,
+            model->BSIM3v32hdif,
+            model->BSIM3v32wmlt,
+            here->BSIM3v32w,
+            model->BSIM3v32xw,
+            model->BSIM3v32jctTempSatCurDensity,
+            model->BSIM3v32jctSidewallTempSatCurDensity,
+            here->BSIM3v32drainAreaGiven,
+            here->BSIM3v32drainArea,
+            here->BSIM3v32drainPerimeterGiven,
+            here->BSIM3v32drainPerimeter,
+            here->BSIM3v32sourceAreaGiven,
+            here->BSIM3v32sourceArea,
+            here->BSIM3v32sourcePerimeterGiven,
+            here->BSIM3v32sourcePerimeter,
+            DrainSatCurrent,
+            SourceSatCurrent
+            );
+}
+
+	    
+inline int BSIM3v32_ACM_junctionCapacitances
+(
+	BSIM3v32model *model,
+	BSIM3v32instance *here,
+	double *areaDrainBulkCapacitance,
+	double *periDrainBulkCapacitance,
+	double *gateDrainBulkCapacitance,
+	double *areaSourceBulkCapacitance,
+	double *periSourceBulkCapacitance,
+	double *gateSourceBulkCapacitance
+)
+{
+	switch (model->BSIM3v32intVersion) {
+                    case BSIM3v32V324:
+                    case BSIM3v32V323:
+		      return ACM_junctionCapacitances(
+                      model->BSIM3v32acmMod,
+                      model->BSIM3v32calcacm,
+                      here->BSIM3v32geo,
+                      model->BSIM3v32hdif,
+                      model->BSIM3v32wmlt,
+                      here->BSIM3v32w,
+                      model->BSIM3v32xw,
+                      here->BSIM3v32drainAreaGiven,
+                      here->BSIM3v32drainArea,
+                      here->BSIM3v32drainPerimeterGiven,
+                      here->BSIM3v32drainPerimeter,
+                      here->BSIM3v32sourceAreaGiven,
+                      here->BSIM3v32sourceArea,
+                      here->BSIM3v32sourcePerimeterGiven,
+                      here->BSIM3v32sourcePerimeter,
+                      model->BSIM3v32unitAreaTempJctCap,
+                      model->BSIM3v32unitLengthSidewallTempJctCap,
+                      model->BSIM3v32unitLengthGateSidewallTempJctCap,
+                      areaDrainBulkCapacitance,
+                      periDrainBulkCapacitance,
+                      gateDrainBulkCapacitance,
+                      areaSourceBulkCapacitance,
+                      periSourceBulkCapacitance,
+                      gateSourceBulkCapacitance
+              	      );
+		    case BSIM3v32V322:
+                    case BSIM3v32V32:
+                    default:
+		      return ACM_junctionCapacitances(
+                      model->BSIM3v32acmMod,
+                      model->BSIM3v32calcacm,
+                      here->BSIM3v32geo,
+                      model->BSIM3v32hdif,
+                      model->BSIM3v32wmlt,
+                      here->BSIM3v32w,
+                      model->BSIM3v32xw,
+                      here->BSIM3v32drainAreaGiven,
+                      here->BSIM3v32drainArea,
+                      here->BSIM3v32drainPerimeterGiven,
+                      here->BSIM3v32drainPerimeter,
+                      here->BSIM3v32sourceAreaGiven,
+                      here->BSIM3v32sourceArea,
+                      here->BSIM3v32sourcePerimeterGiven,
+                      here->BSIM3v32sourcePerimeter,
+                      model->BSIM3v32unitAreaJctCap,
+                      model->BSIM3v32unitLengthSidewallJctCap,
+                      model->BSIM3v32unitLengthGateSidewallJctCap,
+		      areaDrainBulkCapacitance,
+                      periDrainBulkCapacitance,
+                      gateDrainBulkCapacitance,
+                      areaSourceBulkCapacitance,
+                      periSourceBulkCapacitance,
+                      gateSourceBulkCapacitance
+              	      );
+	}
+}
diff --git a/src/spicelib/devices/bsim3v32/b3v32acm.h b/src/spicelib/devices/bsim3v32/b3v32acm.h
new file mode 100644
index 000000000..adf1717bd
--- /dev/null
+++ b/src/spicelib/devices/bsim3v32/b3v32acm.h
@@ -0,0 +1,25 @@
+#ifndef BSIM3v32ACM_H
+#define BSIM3v32ACM_H
+
+int BSIM3v32_ACM_saturationCurrents
+(
+	BSIM3v32model *model,
+	BSIM3v32instance *here,
+        double *DrainSatCurrent,
+        double *SourceSatCurrent
+);
+
+	    
+int BSIM3v32_ACM_junctionCapacitances
+(
+	BSIM3v32model *model,
+	BSIM3v32instance *here,
+	double *areaDrainBulkCapacitance,
+	double *periDrainBulkCapacitance,
+	double *gateDrainBulkCapacitance,
+	double *areaSourceBulkCapacitance,
+	double *periSourceBulkCapacitance,
+	double *gateSourceBulkCapacitance
+);
+
+#endif
diff --git a/src/spicelib/devices/bsim3v32/b3v32ldsel.c b/src/spicelib/devices/bsim3v32/b3v32ldsel.c
new file mode 100644
index 000000000..2081c6e7b
--- /dev/null
+++ b/src/spicelib/devices/bsim3v32/b3v32ldsel.c
@@ -0,0 +1,262 @@
+/*******************************************************************************
+ * Copyright 2020 Florian Ballenegger, Anamosic Ballenegger Design
+ *******************************************************************************
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ ******************************************************************************/
+
+#include "ngspice/ngspice.h"
+#include "bsim3v32def.h"
+
+/* NSIMD = number of // instances evaluated (=4 for AVX2) */
+#define NDATASIMD 7
+
+#define DEBUG 0
+
+#ifndef USE_OMP
+extern int BSIM3v32LoadSeq(BSIM3v32instance *here, CKTcircuit *ckt, double* data, int stride);
+extern int BSIM3v32LoadSIMD(BSIM3v32instance **heres, CKTcircuit *ckt, double data[7][NSIMD]);
+#else
+extern void BSIM3v32LoadRhsMat(GENmodel *inModel, CKTcircuit *ckt);
+extern int BSIM3v32LoadSeq(BSIM3v32instance *here, CKTcircuit *ckt, int);
+extern int BSIM3v32LoadSIMD(BSIM3v32instance **heres, CKTcircuit *ckt);
+#endif
+
+
+#ifndef USE_OMP
+int
+BSIM3v32loadSel (GENmodel *inModel, CKTcircuit *ckt)
+{
+	#ifndef USE_OMP
+	double data[NDATASIMD][NSIMD];
+	#endif
+    BSIM3v32group *group; /* a group of instance of same model, same pParam, same nqsMode, same geo and same off */
+    BSIM3v32model *model = (BSIM3v32model*)inModel;
+    BSIM3v32instance* heres[NSIMD];
+    
+	for (; model != NULL; model = BSIM3v32nextModel(model))
+	for (group=model->groupHead; group!=NULL; group=group->next)
+	{
+    	   int idx=0;
+    	   while(idx+NSIMD <= group->InstCount)
+    	   {
+    		int count=0;
+    		while((count<NSIMD) && (idx<group->InstCount))
+    		{	
+			data[0][count]=NAN;
+			heres[count] = group->InstArray[idx];
+    			int local_error = BSIM3v32LoadSeq(group->InstArray[idx++],ckt,
+				&data[0][count],NSIMD
+			);
+			if (local_error) return local_error;
+			if(!isnan(data[0][count]))
+			{
+				count++;
+			}
+    		}
+    		if(count==NSIMD)
+    		{
+			int local_error;
+			 /* process NSIMD instances at once */
+			local_error = BSIM3v32LoadSIMD(heres, ckt, data);
+        		if (local_error) return local_error;
+    		}
+		else for(int i=0;i<count;i++)
+		{
+			int local_error = BSIM3v32LoadSeq(heres[i], ckt, NULL,0);
+        		if (local_error) return local_error;
+		}
+    	   }
+	
+    	   /* remaining instances are evaluated sequencially */
+    	   for (; idx < group->InstCount; idx++) {
+    		int local_error = BSIM3v32LoadSeq(group->InstArray[idx], ckt,
+			NULL, 0);
+        	if (local_error) return local_error;
+           }
+    }
+
+    return 0; /* no error */
+}
+#endif
+
+
+#ifdef USE_OMP
+int
+BSIM3v32loadSel (GENmodel *inModel, CKTcircuit *ckt)
+{
+	/*
+	This version do omp parallel only inside groups
+	*/
+	BSIM3v32group *group;
+	BSIM3v32model *model = (BSIM3v32model*)inModel;
+	int error=0;
+	int idx=0;
+	for (; model != NULL; model = BSIM3v32nextModel(model))
+	for (group=model->groupHead; group!=NULL; group=group->next)
+	{	
+	
+	#pragma omp parallel for
+	for (idx=0; idx <= group->InstCount-NSIMD; idx+=NSIMD)
+	{
+    		int local_error;
+		int i;
+		int needeval=0;
+		for(i=0;i<NSIMD;i++)
+		{
+			group->InstArray[idx+i]->BSIM3v32SIMDCheck=-1;
+			local_error = BSIM3v32LoadSeq(group->InstArray[idx+i], ckt, 1);
+        		if (local_error) error = local_error;
+			
+			if(group->InstArray[idx+i]->BSIM3v32SIMDCheck!=-1)
+				needeval=1;
+		}
+		if(!needeval)
+			continue; /* all NSIMD instances are bypassed */
+		local_error = BSIM3v32LoadSIMD(&group->InstArray[idx], ckt);
+		if (local_error) error = local_error;
+	}
+	/* omp mess with idx val after the for loop above, so we recalc it */
+	idx = NSIMD*(group->InstCount/NSIMD);
+	for (; idx < group->InstCount; idx++) {
+		int local_error = BSIM3v32LoadSeq(group->InstArray[idx], ckt, 2);
+		if (local_error) error = local_error;
+	}
+	}
+	
+	BSIM3v32LoadRhsMat(inModel, ckt);
+	return error;
+}
+
+#if 0
+int
+BSIM3v32loadSelVrai (GENmodel *inModel, CKTcircuit *ckt)
+{
+	/*
+	This version do omp parallel for most instances of all models combined 
+	*/
+	BSIM3v32group *group;
+	BSIM3v32model *model = (BSIM3v32model*)inModel;
+    
+    	int idx;
+	int error = 0;
+	int nsimd,nsisd;
+	/* pre load all instances */
+	if(DEBUG) printf("loadomp %d\n",model->BSIM3v32InstCount);
+	#pragma omp parallel for
+	for (idx = 0; idx < model->BSIM3v32InstCount; idx++) {
+		BSIM3v32instance *here = model->BSIM3v32InstanceArray[idx];
+		if(DEBUG) printf("loadomp preload seq %d\n",idx);
+		here->BSIM3v32SIMDCheck=-1;
+		int local_error = BSIM3v32LoadSeq(here,ckt,1);
+		if (local_error) error=local_error;
+	}
+	if (error) printf("load error\n");
+        if (error) return error;
+	
+	/* sort instances to run in SIMD */
+	nsimd=0;
+	nsisd=0;
+	for (model = (BSIM3v32model*)inModel; model != NULL; model = BSIM3v32nextModel(model))
+	for (group=model->groupHead; group!=NULL; group=group->next)
+	{
+		int rev=group->InstCount;
+		group->SimdCount = 0;
+		for(idx=0;idx<group->InstCount;idx++)
+		{
+			BSIM3v32instance *here = group->InstArray[idx];
+			if(here->BSIM3v32SIMDCheck==-1)
+			{
+				/* bypassed, swap current inst to the end */
+				rev--;
+				group->InstArray[idx] = group->InstArray[rev];
+				group->InstArray[rev] = here;
+			}
+		}
+		group->EvalCount = rev;
+		group->SimdCount = rev/NSIMD;
+		nsimd += group->SimdCount;
+		nsisd += rev - NSIMD*group->SimdCount;
+	}
+	
+	if(DEBUG) printf("nsimd=%d nsisd=%d\n",nsimd,nsisd);
+	/* run SIMD in parallel */
+	#pragma omp parallel for
+	for(idx=0;idx<nsimd;idx++)
+	{
+		if(DEBUG) printf("Search SIMD index %d\n", idx);
+		int search=idx;
+		BSIM3v32model* mod;
+		BSIM3v32group* grp;
+		for (mod = (BSIM3v32model*)inModel; mod != NULL; mod = BSIM3v32nextModel(mod))
+		for (grp=mod->groupHead; grp!=NULL; grp=grp->next)
+		{
+			if(search>=0 && search < grp->SimdCount)
+			{
+				BSIM3v32instance** heres = &grp->InstArray[search*NSIMD];
+				if(DEBUG) printf("Call Simd index %d of %d\n", search*NSIMD, grp->InstCount);
+				int local_error = BSIM3v32LoadSIMD(heres, ckt);
+				if(DEBUG) printf("Call ended\n");
+				if(local_error) error=local_error;
+			}
+			search -= grp->SimdCount;
+		}
+	}
+	if(error) return error;
+	
+	if(DEBUG) printf("now switch to sisd\n");
+	/* run remaining SISD in parallel */
+	#pragma omp parallel for
+	for(idx=0;idx<nsisd;idx++)
+	{
+		int search=idx;
+		BSIM3v32model* mod;
+		BSIM3v32group* grp;
+		for (mod = (BSIM3v32model*)inModel; mod != NULL; mod = BSIM3v32nextModel(mod))
+		for (grp=mod->groupHead; grp!=NULL; grp=grp->next)
+		{
+			int n = grp->EvalCount - grp->SimdCount*NSIMD;
+			if(search>=0 && search < n)
+			{
+				if(DEBUG) printf("Call seq index %d of %d\n", search + grp->SimdCount*NSIMD,grp->InstCount);
+				int local_error = BSIM3v32LoadSeq(grp->InstArray[search + grp->SimdCount*NSIMD], ckt, 0);
+				if(DEBUG) printf("Call ended\n");
+				if(local_error) error=local_error;
+			}
+			search -= n;
+		}
+	}
+	if(DEBUG) printf("Now write the matrix\n");
+	/* Write in matrix sequentially */
+	BSIM3v32LoadRhsMat(inModel, ckt);
+	
+	return error;
+}
+#endif
+
+#endif
+
+
diff --git a/src/spicelib/devices/bsim3v32/b3v32ldseq.c b/src/spicelib/devices/bsim3v32/b3v32ldseq.c
new file mode 100644
index 000000000..b7669a3a2
--- /dev/null
+++ b/src/spicelib/devices/bsim3v32/b3v32ldseq.c
@@ -0,0 +1,3799 @@
+/**** BSIM3v3.2.4, Released by Xuemei Xi 12/21/2001 ****/
+
+/**********
+ * Copyright 2001 Regents of the University of California. All rights reserved.
+ * Original File: b3ld.c of BSIM3v3.2.4
+ * Author: 1991 JianHui Huang and Min-Chie Jeng.
+ * Modified by Mansun Chan (1995).
+ * Author: 1997-1999 Weidong Liu.
+ * Author: 2001 Xuemei Xi
+ * Modified by Xuemei Xi, 10/05, 12/21, 2001.
+ * Modified by Paolo Nenzi 2002 and Dietmar Warning 2003
+ * Modified by Florian Ballenegger 2020 for SIMD version generation
+ **********/
+ 
+ /**********
+ * Modified 2020 by Florian Ballenegger, Anamosic Ballenegger Design
+ * Distributed under the same license terms as the original code,
+ * see file "B3TERMS_OF_USE"
+ **********/
+
+#ifdef SIMDIFYCPP 
+
+/* simdify parser need to know that those names are C types
+  (however the actual type don't matter for this tool) */
+typedef int GENmodel;
+typedef int CKTcircuit;
+typedef int BSIM3v32instance;
+typedef int BSIM3v32model;
+#define SEQCHECK(val) SIMDCHECK((val));
+#define SEQCHECK2(a,b) SIMDCHECK2((a),(b));
+#define SEQCHECK3(a,b,c) SIMDCHECK3((a),(b),(c));
+#define SEQCHECK4(a,b,c,d) SIMDCHECK4((a),(b),(c),(d));
+
+#if 0
+#define mypow(x,p) exp(p*log(x))
+#define pow0p7(x,p) mypow(x,p)
+#define powMJ(x,p) mypow(x,p)
+#define powMJSW(x,p) mypow(x,p)
+#define powMJSWG(x,p) mypow(x,p)
+#endif
+
+#else
+
+#define SIMDIFYCMD(cmd)
+#define SIMDANY(err) err /* normal error handling when not SIMD */
+#define SIMDCOUNT(val) (val)
+#define SEQCHECK(val) if(stride<0) { data[-stride-1] = val; return OK; }
+#define SEQCHECK2(a,b) if(stride<0) { data[-stride-1] = a; data[-stride-1+4] = b; return OK; }
+#define SEQCHECK3(a,b,c) if(stride<0) { data[-stride-1] = a; data[-stride-1+4] = b; data[-stride-1+8] = c; return OK; }
+#define SEQCHECK4(a,b,c,d) if(stride<0) { data[-stride-1] = a; data[-stride-1+4] = b; data[-stride-1+8] = c; data[-stride-1+12] = d; return OK; }
+
+#include "ngspice/ngspice.h"
+#include "ngspice/cktdefs.h"
+#include "bsim3v32def.h"
+#include "b3v32acm.h"
+#include "ngspice/trandefs.h"
+#include "ngspice/const.h"
+#include "ngspice/sperror.h"
+#include "ngspice/devdefs.h"
+#include "ngspice/suffix.h"
+
+#define MAX_EXP 5.834617425e14
+#define MIN_EXP 1.713908431e-15
+#define EXP_THRESHOLD 34.0
+#define EPSOX 3.453133e-11
+#define EPSSI 1.03594e-10
+#define Charge_q 1.60219e-19
+#define DELTA_1 0.02
+#define DELTA_2 0.02
+#define DELTA_3 0.02
+#define DELTA_4 0.02
+
+#define mypow_unused(x,p) exp(p*log(x))
+#define mypow(x,p) pow(x,p)
+
+#define pow0p7(x,p) mypow(x,p)
+#define powMJ(x,p) mypow(x,p)
+#define powMJSW(x,p) mypow(x,p)
+#define powMJSWG(x,p) mypow(x,p)
+
+#if 0
+#ifdef NEWCONV
+#pragma message "NEWCONV"
+#else
+#pragma message "no NEWCONV"
+#endif
+
+#ifdef NOBYPASS
+#pragma message "NOBYPASS"
+#else
+#pragma message "no NOBYPASS"
+#endif
+#ifdef PREDICTOR
+#pragma message "PREDICTOR"
+#else
+#pragma message "no PREDICTOR"
+#endif
+#endif
+
+int BSIM3v32_ACM_saturationCurrents
+(
+	BSIM3v32model *model,
+	BSIM3v32instance *here,
+        double *DrainSatCurrent,
+        double *SourceSatCurrent
+)
+{
+            return ACM_saturationCurrents(
+            model->BSIM3v32acmMod,
+            model->BSIM3v32calcacm,
+            here->BSIM3v32geo,
+            model->BSIM3v32hdif,
+            model->BSIM3v32wmlt,
+            here->BSIM3v32w,
+            model->BSIM3v32xw,
+            model->BSIM3v32jctTempSatCurDensity,
+            model->BSIM3v32jctSidewallTempSatCurDensity,
+            here->BSIM3v32drainAreaGiven,
+            here->BSIM3v32drainArea,
+            here->BSIM3v32drainPerimeterGiven,
+            here->BSIM3v32drainPerimeter,
+            here->BSIM3v32sourceAreaGiven,
+            here->BSIM3v32sourceArea,
+            here->BSIM3v32sourcePerimeterGiven,
+            here->BSIM3v32sourcePerimeter,
+            DrainSatCurrent,
+            SourceSatCurrent
+            );
+}
+
+	    
+int BSIM3v32_ACM_junctionCapacitances
+(
+	BSIM3v32model *model,
+	BSIM3v32instance *here,
+	double *areaDrainBulkCapacitance,
+	double *periDrainBulkCapacitance,
+	double *gateDrainBulkCapacitance,
+	double *areaSourceBulkCapacitance,
+	double *periSourceBulkCapacitance,
+	double *gateSourceBulkCapacitance
+)
+{
+	switch (model->BSIM3v32intVersion) {
+                    case BSIM3v32V324:
+                    case BSIM3v32V323:
+		      return ACM_junctionCapacitances(
+                      model->BSIM3v32acmMod,
+                      model->BSIM3v32calcacm,
+                      here->BSIM3v32geo,
+                      model->BSIM3v32hdif,
+                      model->BSIM3v32wmlt,
+                      here->BSIM3v32w,
+                      model->BSIM3v32xw,
+                      here->BSIM3v32drainAreaGiven,
+                      here->BSIM3v32drainArea,
+                      here->BSIM3v32drainPerimeterGiven,
+                      here->BSIM3v32drainPerimeter,
+                      here->BSIM3v32sourceAreaGiven,
+                      here->BSIM3v32sourceArea,
+                      here->BSIM3v32sourcePerimeterGiven,
+                      here->BSIM3v32sourcePerimeter,
+                      model->BSIM3v32unitAreaTempJctCap,
+                      model->BSIM3v32unitLengthSidewallTempJctCap,
+                      model->BSIM3v32unitLengthGateSidewallTempJctCap,
+                      areaDrainBulkCapacitance,
+                      periDrainBulkCapacitance,
+                      gateDrainBulkCapacitance,
+                      areaSourceBulkCapacitance,
+                      periSourceBulkCapacitance,
+                      gateSourceBulkCapacitance
+              	      );
+		    case BSIM3v32V322:
+                    case BSIM3v32V32:
+                    default:
+		      return ACM_junctionCapacitances(
+                      model->BSIM3v32acmMod,
+                      model->BSIM3v32calcacm,
+                      here->BSIM3v32geo,
+                      model->BSIM3v32hdif,
+                      model->BSIM3v32wmlt,
+                      here->BSIM3v32w,
+                      model->BSIM3v32xw,
+                      here->BSIM3v32drainAreaGiven,
+                      here->BSIM3v32drainArea,
+                      here->BSIM3v32drainPerimeterGiven,
+                      here->BSIM3v32drainPerimeter,
+                      here->BSIM3v32sourceAreaGiven,
+                      here->BSIM3v32sourceArea,
+                      here->BSIM3v32sourcePerimeterGiven,
+                      here->BSIM3v32sourcePerimeter,
+                      model->BSIM3v32unitAreaJctCap,
+                      model->BSIM3v32unitLengthSidewallJctCap,
+                      model->BSIM3v32unitLengthGateSidewallJctCap,
+		      areaDrainBulkCapacitance,
+                      periDrainBulkCapacitance,
+                      gateDrainBulkCapacitance,
+                      areaSourceBulkCapacitance,
+                      periSourceBulkCapacitance,
+                      gateSourceBulkCapacitance
+              	      );
+	}
+}
+
+#endif
+
+/*F.B: if data is not NULL, return to do calc in SIMD except if bypass 
+       if data is NULL, just perform the whole model without SIMD */
+#ifdef USE_OMP
+#pragma message "Use OMP"
+#else
+#pragma message "Dont use OMP"
+#endif
+
+/*
+prelim argument:
+1: check if bypass, complete eval if bypass, otherwise store prelim data and abort
+0: skip prelim calc, load prelim data and finish eval
+2: complete eval
+*/
+
+int BSIM3v32LoadSeq(
+	BSIM3v32instance *here,
+	CKTcircuit *ckt,
+	#ifndef USE_OMP
+	double* data,
+	int stride
+	#else
+	int prelim
+	#endif
+	)
+{
+#ifndef SIMDIFYCPP
+    BSIM3v32model *model;
+#endif
+double SourceSatCurrent, DrainSatCurrent;
+double ag0, qgd, qgs, qgb, cbhat, VgstNVt, ExpVgst; /* F.B: removed von */
+double cdrain, cdhat, cdreq, ceqbd, ceqbs, ceqqb, ceqqd, ceqqg, ceq, geq;
+double czbd, czbdsw, czbdswg, czbs, czbssw, czbsswg, evbd, evbs, arg, sarg;
+/* double delvbd, delvbs, delvds, delvgd, delvgs; FB: moved decl into sub block */
+double Vfbeff, dVfbeff_dVg, dVfbeff_dVd = 0.0, dVfbeff_dVb, V3, V4;
+double gcbdb, gcbgb, gcbsb, gcddb, gcdgb, gcdsb, gcgdb, gcggb, gcgsb, gcsdb;
+/* FB: moved double to decl into NEWCONV block */
+
+double gcsgb, gcssb, MJ, MJSW, MJSWG;
+double vbd, vbs, vds, vgb, vgd, vgs; /* FB: vgdo decl moved */
+/* FB: xfact decl moved inside PREDICTOR block */
+double qgate = 0.0, qbulk = 0.0, qdrn = 0.0, qsrc;
+double qinoi, cqgate, cqbulk, cqdrn;
+double Vds, Vgs, Vbs, Gmbs, FwdSum, RevSum;
+double Vgs_eff, Vfb, dVfb_dVb = 0.0, dVfb_dVd = 0.0;
+double Phis, dPhis_dVb, sqrtPhis, dsqrtPhis_dVb, Vth, dVth_dVb, dVth_dVd;
+double Vgst, dVgst_dVg, dVgst_dVb, dVgs_eff_dVg, Nvtm;
+double Vtm;
+double n, dn_dVb, dn_dVd, voffcv, noff, dnoff_dVd, dnoff_dVb;
+double ExpArg, V0, CoxWLcen, QovCox, LINK;
+double DeltaPhi, dDeltaPhi_dVg, dDeltaPhi_dVd, dDeltaPhi_dVb;
+double Cox, Tox, Tcen, dTcen_dVg, dTcen_dVd, dTcen_dVb;
+double Ccen, Coxeff, dCoxeff_dVg, dCoxeff_dVd, dCoxeff_dVb;
+double Denomi, dDenomi_dVg, dDenomi_dVd, dDenomi_dVb;
+double ueff, dueff_dVg, dueff_dVd, dueff_dVb;
+double Esat, Vdsat;
+double EsatL, dEsatL_dVg, dEsatL_dVd, dEsatL_dVb;
+double dVdsat_dVg, dVdsat_dVb, dVdsat_dVd, Vasat, dAlphaz_dVg, dAlphaz_dVb;
+double dVasat_dVg, dVasat_dVb, dVasat_dVd, Va, dVa_dVd, dVa_dVg, dVa_dVb;
+double Vbseff, dVbseff_dVb, VbseffCV, dVbseffCV_dVb;
+double Arg1, One_Third_CoxWL, Two_Third_CoxWL, Alphaz, CoxWL;
+double T0, dT0_dVg, dT0_dVd, dT0_dVb;
+double T1, dT1_dVg, dT1_dVd, dT1_dVb;
+double T2, dT2_dVg, dT2_dVd, dT2_dVb;
+double T3, dT3_dVg, dT3_dVd, dT3_dVb;
+double T4;
+double T5;
+double T6;
+double T7;
+double T8;
+double T9;
+double T10;
+double T11, T12;
+double tmp, Abulk, dAbulk_dVb, Abulk0, dAbulk0_dVb;
+double tmpuni; /*F.B.*/
+double VACLM, dVACLM_dVg, dVACLM_dVd, dVACLM_dVb;
+double VADIBL, dVADIBL_dVg, dVADIBL_dVd, dVADIBL_dVb;
+double Xdep, dXdep_dVb, lt1, dlt1_dVb, ltw, dltw_dVb, Delt_vth, dDelt_vth_dVb;
+double Theta0, dTheta0_dVb;
+double TempRatio, tmp1, tmp2, tmp3, tmp4;
+double DIBL_Sft, dDIBL_Sft_dVd, Lambda, dLambda_dVg;
+/* double Idtot, Ibtot; FB: moved Idtot and Ibtot decl into sub blocks */
+
+/* double tempv; FB: moved decl */
+double a1, ScalingFactor;
+
+double Vgsteff, dVgsteff_dVg, dVgsteff_dVd, dVgsteff_dVb;
+double Vdseff, dVdseff_dVg, dVdseff_dVd, dVdseff_dVb;
+double VdseffCV, dVdseffCV_dVg, dVdseffCV_dVd, dVdseffCV_dVb;
+double diffVds, dAbulk_dVg;
+double beta, dbeta_dVg, dbeta_dVd, dbeta_dVb;
+double gche, dgche_dVg, dgche_dVd, dgche_dVb;
+double fgche1, dfgche1_dVg, dfgche1_dVd, dfgche1_dVb;
+double fgche2, dfgche2_dVg, dfgche2_dVd, dfgche2_dVb;
+double Idl, dIdl_dVg, dIdl_dVd, dIdl_dVb;
+double Idsa, dIdsa_dVg, dIdsa_dVd, dIdsa_dVb;
+double Ids, Gm, Gds, Gmb;
+double Isub, Gbd, Gbg, Gbb;
+double VASCBE, dVASCBE_dVg, dVASCBE_dVd, dVASCBE_dVb;
+double CoxWovL;
+double Rds, dRds_dVg, dRds_dVb, WVCox, WVCoxRds;
+double Vgst2Vtm, VdsatCV, dVdsatCV_dVg, dVdsatCV_dVb;
+double Leff, Weff, dWeff_dVg, dWeff_dVb;
+double AbulkCV, dAbulkCV_dVb;
+double qgdo, qgso, cgdo, cgso;
+
+double qcheq = 0.0, qdef, gqdef = 0.0, cqdef, cqcheq, gtau_diff, gtau_drift;
+double gcqdb = 0.0,gcqsb = 0.0, gcqgb = 0.0,gcqbb = 0.0;
+double dxpart, sxpart, ggtg, ggtd, ggts, ggtb;
+double ddxpart_dVd, ddxpart_dVg, ddxpart_dVb, ddxpart_dVs;
+double dsxpart_dVd, dsxpart_dVg, dsxpart_dVb, dsxpart_dVs;
+
+double gbspsp, gbbdp, gbbsp, gbspg, gbspb, gbspdp;
+double gbdpdp, gbdpg, gbdpb, gbdpsp;
+double Cgg, Cgd, Cgb, Cdg, Cdd, Cds;
+double Csg, Csd, Css, Csb, Cbg, Cbd, Cbb;
+double Cgg1, Cgb1, Cgd1, Cbg1, Cbb1, Cbd1, Qac0, Qsub0;
+double dQac0_dVg, dQac0_dVd = 0.0, dQac0_dVb, dQsub0_dVg;
+double dQsub0_dVd, dQsub0_dVb;
+
+double m;
+
+int BSIM3v32mode; /* F.B. */
+#ifndef SIMDIFYCPP 
+struct bsim3v32SizeDependParam *pParam;
+int ByPass;
+#endif
+
+int Check, ChargeComputationNeeded, error;
+
+          ScalingFactor = 1.0e-9;
+          ChargeComputationNeeded =
+                 ((ckt->CKTmode & (MODEDCTRANCURVE | MODEAC | MODETRAN | MODEINITSMSIG)) ||
+                 ((ckt->CKTmode & MODETRANOP) && (ckt->CKTmode & MODEUIC)))
+                 ? 1 : 0;
+#ifndef SIMDIFYCPP 
+	  model = BSIM3v32modPtr(here);
+          Check = 1;
+          ByPass = 0;
+          pParam = here->pParam;
+	  BSIM3v32mode = here->BSIM3v32mode;
+	  cdhat=0.0;
+	  cbhat=0.0;
+	  /* F.B.: move CoxWL def here */
+	  CoxWL = model->BSIM3v32cox * pParam->BSIM3v32weffCV
+                  	 * pParam->BSIM3v32leffCV;
+          One_Third_CoxWL = CoxWL / 3.0;
+          Two_Third_CoxWL = 2.0 * One_Third_CoxWL;
+	  
+	  #ifdef USE_OMP
+	  if(prelim==0)
+	  	goto prelimskip;
+          #endif
+	  if ((ckt->CKTmode & MODEINITSMSIG))
+          {   vbs = *(ckt->CKTstate0 + here->BSIM3v32vbs);
+              vgs = *(ckt->CKTstate0 + here->BSIM3v32vgs);
+              vds = *(ckt->CKTstate0 + here->BSIM3v32vds);
+              qdef = *(ckt->CKTstate0 + here->BSIM3v32qdef);
+          }
+          else if ((ckt->CKTmode & MODEINITTRAN))
+          {   vbs = *(ckt->CKTstate1 + here->BSIM3v32vbs);
+              vgs = *(ckt->CKTstate1 + here->BSIM3v32vgs);
+              vds = *(ckt->CKTstate1 + here->BSIM3v32vds);
+              qdef = *(ckt->CKTstate1 + here->BSIM3v32qdef);
+          }
+          else if ((ckt->CKTmode & MODEINITJCT) && !here->BSIM3v32off)
+          {   vds = model->BSIM3v32type * here->BSIM3v32icVDS;
+              vgs = model->BSIM3v32type * here->BSIM3v32icVGS;
+              vbs = model->BSIM3v32type * here->BSIM3v32icVBS;
+              qdef = 0.0;
+
+              if ((vds == 0.0) && (vgs == 0.0) && (vbs == 0.0) &&
+                  ((ckt->CKTmode & (MODETRAN | MODEAC|MODEDCOP |
+                   MODEDCTRANCURVE)) || (!(ckt->CKTmode & MODEUIC))))
+              {   vbs = 0.0;
+                  vgs = model->BSIM3v32type * here->BSIM3v32vth0 + 0.1;
+                  vds = 0.1;
+              }
+          }
+          else if ((ckt->CKTmode & (MODEINITJCT | MODEINITFIX)) &&
+                  (here->BSIM3v32off))
+          {    qdef = vbs = vgs = vds = 0.0;
+          }
+          else
+          {
+	       double Idtot, Ibtot;
+	       double delvbd, delvbs, delvds, delvgd, delvgs;
+	       double vgdo;
+#ifndef PREDICTOR
+               if ((ckt->CKTmode & MODEINITPRED))
+               {   double xfact;
+	           xfact = ckt->CKTdelta / ckt->CKTdeltaOld[1];
+		   if(0) printf("Write state in precalc\n");
+                   *(ckt->CKTstate0 + here->BSIM3v32vbs) =
+                         *(ckt->CKTstate1 + here->BSIM3v32vbs);
+                   vbs = (1.0 + xfact)* (*(ckt->CKTstate1 + here->BSIM3v32vbs))
+                         - (xfact * (*(ckt->CKTstate2 + here->BSIM3v32vbs)));
+                   *(ckt->CKTstate0 + here->BSIM3v32vgs) =
+                         *(ckt->CKTstate1 + here->BSIM3v32vgs);
+                   vgs = (1.0 + xfact)* (*(ckt->CKTstate1 + here->BSIM3v32vgs))
+                         - (xfact * (*(ckt->CKTstate2 + here->BSIM3v32vgs)));
+                   *(ckt->CKTstate0 + here->BSIM3v32vds) =
+                         *(ckt->CKTstate1 + here->BSIM3v32vds);
+                   vds = (1.0 + xfact)* (*(ckt->CKTstate1 + here->BSIM3v32vds))
+                         - (xfact * (*(ckt->CKTstate2 + here->BSIM3v32vds)));
+                   *(ckt->CKTstate0 + here->BSIM3v32vbd) =
+                         *(ckt->CKTstate0 + here->BSIM3v32vbs)
+                         - *(ckt->CKTstate0 + here->BSIM3v32vds);
+                   *(ckt->CKTstate0 + here->BSIM3v32qdef) =
+                         *(ckt->CKTstate1 + here->BSIM3v32qdef);
+                   qdef = (1.0 + xfact)* (*(ckt->CKTstate1 + here->BSIM3v32qdef))
+                        -(xfact * (*(ckt->CKTstate2 + here->BSIM3v32qdef)));
+               }
+               else
+               {
+#endif /* PREDICTOR */
+                   vbs = model->BSIM3v32type
+                       * (*(ckt->CKTrhsOld + here->BSIM3v32bNode)
+                       - *(ckt->CKTrhsOld + here->BSIM3v32sNodePrime));
+                   vgs = model->BSIM3v32type
+                       * (*(ckt->CKTrhsOld + here->BSIM3v32gNode)
+                       - *(ckt->CKTrhsOld + here->BSIM3v32sNodePrime));
+                   vds = model->BSIM3v32type
+                       * (*(ckt->CKTrhsOld + here->BSIM3v32dNodePrime)
+                       - *(ckt->CKTrhsOld + here->BSIM3v32sNodePrime));
+                   qdef = model->BSIM3v32type
+                        * (*(ckt->CKTrhsOld + here->BSIM3v32qNode));
+#ifndef PREDICTOR
+               }
+#endif /* PREDICTOR */
+
+               vbd = vbs - vds;
+               vgd = vgs - vds;
+               vgdo = *(ckt->CKTstate0 + here->BSIM3v32vgs)
+                    - *(ckt->CKTstate0 + here->BSIM3v32vds);
+               delvbs = vbs - *(ckt->CKTstate0 + here->BSIM3v32vbs);
+               delvbd = vbd - *(ckt->CKTstate0 + here->BSIM3v32vbd);
+               delvgs = vgs - *(ckt->CKTstate0 + here->BSIM3v32vgs);
+               delvds = vds - *(ckt->CKTstate0 + here->BSIM3v32vds);
+               delvgd = vgd - vgdo;
+
+               if (here->BSIM3v32mode > 0) /* F.B. >= 0 to > 0 */
+               {   Idtot = here->BSIM3v32cd + here->BSIM3v32csub - here->BSIM3v32cbd;
+                   cdhat = Idtot - here->BSIM3v32gbd * delvbd
+                         + (here->BSIM3v32gmbs + here->BSIM3v32gbbs) * delvbs
+                         + (here->BSIM3v32gm + here->BSIM3v32gbgs) * delvgs
+                         + (here->BSIM3v32gds + here->BSIM3v32gbds) * delvds;
+                   Ibtot = here->BSIM3v32cbs + here->BSIM3v32cbd - here->BSIM3v32csub;
+                   cbhat = Ibtot + here->BSIM3v32gbd * delvbd
+                         + (here->BSIM3v32gbs - here->BSIM3v32gbbs) * delvbs
+                         - here->BSIM3v32gbgs * delvgs
+                         - here->BSIM3v32gbds * delvds;
+               }
+               else
+               {   Idtot = here->BSIM3v32cd - here->BSIM3v32cbd;
+                   cdhat = Idtot - (here->BSIM3v32gbd - here->BSIM3v32gmbs) * delvbd
+                         + here->BSIM3v32gm * delvgd
+                         - here->BSIM3v32gds * delvds;
+                   Ibtot = here->BSIM3v32cbs + here->BSIM3v32cbd - here->BSIM3v32csub;
+                   cbhat = Ibtot + here->BSIM3v32gbs * delvbs
+                         + (here->BSIM3v32gbd - here->BSIM3v32gbbs) * delvbd
+                         - here->BSIM3v32gbgs * delvgd
+                         + here->BSIM3v32gbds * delvds;
+               }
+
+#ifndef NOBYPASS
+           /* following should be one big if connected by && all over
+            * the place, but some C compilers can't handle that, so
+            * we split it up here to let them digest it in stages
+            */
+
+               if ((!(ckt->CKTmode & MODEINITPRED)) && (ckt->CKTbypass))
+               if ((fabs(delvbs) < (ckt->CKTreltol * MAX(fabs(vbs),
+                   fabs(*(ckt->CKTstate0+here->BSIM3v32vbs))) + ckt->CKTvoltTol)))
+               if ((fabs(delvbd) < (ckt->CKTreltol * MAX(fabs(vbd),
+                   fabs(*(ckt->CKTstate0+here->BSIM3v32vbd))) + ckt->CKTvoltTol)))
+               if ((fabs(delvgs) < (ckt->CKTreltol * MAX(fabs(vgs),
+                   fabs(*(ckt->CKTstate0+here->BSIM3v32vgs))) + ckt->CKTvoltTol)))
+               if ((fabs(delvds) < (ckt->CKTreltol * MAX(fabs(vds),
+                   fabs(*(ckt->CKTstate0+here->BSIM3v32vds))) + ckt->CKTvoltTol)))
+               if ((fabs(cdhat - Idtot) < ckt->CKTreltol
+                   * MAX(fabs(cdhat),fabs(Idtot)) + ckt->CKTabstol))
+               {   
+	           double tempv;
+		   tempv = MAX(fabs(cbhat),fabs(Ibtot)) + ckt->CKTabstol;
+                   if ((fabs(cbhat - Ibtot)) < ckt->CKTreltol * tempv)
+                   {   /* bypass code */
+                       vbs = *(ckt->CKTstate0 + here->BSIM3v32vbs);
+                       vbd = *(ckt->CKTstate0 + here->BSIM3v32vbd);
+                       vgs = *(ckt->CKTstate0 + here->BSIM3v32vgs);
+                       vds = *(ckt->CKTstate0 + here->BSIM3v32vds);
+                       qdef = *(ckt->CKTstate0 + here->BSIM3v32qdef);
+
+                       vgd = vgs - vds;
+                       vgb = vgs - vbs;
+
+                       cdrain = here->BSIM3v32cd;
+                       if ((ckt->CKTmode & (MODETRAN | MODEAC)) ||
+                           ((ckt->CKTmode & MODETRANOP) &&
+                           (ckt->CKTmode & MODEUIC)))
+                       {   ByPass = 1;
+                           qgate = here->BSIM3v32qgate;
+                           qbulk = here->BSIM3v32qbulk;
+                           qdrn = here->BSIM3v32qdrn;
+                           goto line755;
+                       }
+                       else
+                       {   goto line850;
+                       }
+                   }
+               }
+
+#endif /*NOBYPASS*/
+               /* von = here->BSIM3v32von;  F.B: expanded von to here->BSIM3v32von below */
+               if (*(ckt->CKTstate0 + here->BSIM3v32vds) >= 0.0)
+               {   vgs = DEVfetlim(vgs, *(ckt->CKTstate0+here->BSIM3v32vgs), here->BSIM3v32von);
+                   vds = vgs - vgd;
+                   vds = DEVlimvds(vds, *(ckt->CKTstate0 + here->BSIM3v32vds));
+                   vgd = vgs - vds;
+
+               }
+               else
+               {   vgd = DEVfetlim(vgd, vgdo, here->BSIM3v32von);
+                   vds = vgs - vgd;
+                   vds = -DEVlimvds(-vds, -(*(ckt->CKTstate0+here->BSIM3v32vds)));
+                   vgs = vgd + vds;
+               }
+
+               if (vds >= 0.0)
+               {   vbs = DEVpnjlim(vbs, *(ckt->CKTstate0 + here->BSIM3v32vbs),
+                                   CONSTvt0, model->BSIM3v32vcrit, &Check);
+                   vbd = vbs - vds;
+
+               }
+               else
+               {   vbd = DEVpnjlim(vbd, *(ckt->CKTstate0 + here->BSIM3v32vbd),
+                                   CONSTvt0, model->BSIM3v32vcrit, &Check);
+                   vbs = vbd + vds;
+               }
+          }
+	  /* FB: store calculated data so far and return for SIMD processing */
+	  #ifndef USE_OMP
+          if(data && stride>0)
+	  {
+	  	data[0*stride] = vbs;
+		data[1*stride] = vgs;
+		data[2*stride] = vds;
+		data[3*stride] = qdef;
+		data[4*stride] = cdhat;
+		data[5*stride] = cbhat;
+		data[6*stride] = Check ? 1.0 : 0.0;
+		if(0)
+		printf("ldseq return for SIMD !, %g %g %g %g %g %g, check=%d\n",vbs,vgs,vds,qdef,cdhat,cbhat,Check);
+	  	return OK;
+	  }
+	  if(data && (stride==0)) {
+	  	if( 	(fabs(data[0*4] - vbs) > 1e-30)
+		     || (fabs(data[1*4] - vgs) > 1e-30)
+		     || (fabs(data[2*4] - vds) > 1e-30)
+		     || (fabs(data[3*4] - qdef) > 1e-30) )
+		{
+		     printf("error! precalc data differ to recalculated values !\n");
+		     printf("vbs: %g %g\n",data[0*4],vbs);
+		     printf("vgs: %g %g\n",data[1*4],vgs);
+		     printf("vds: %g %g\n",data[2*4],vds);
+		     printf("qdef: %g %g\n",data[3*4],qdef);
+		}
+		return OK;
+	  }
+	  #else
+	  prelimskip:
+	  if(prelim==1)
+	  {
+	  	here->BSIM3v32SIMDvbs = vbs;
+		here->BSIM3v32SIMDvgs = vgs;
+		here->BSIM3v32SIMDvds = vds;
+		here->BSIM3v32SIMDqdef = qdef;
+		here->BSIM3v32SIMDcdhat = cdhat;
+		here->BSIM3v32SIMDcbhat = cbhat;
+		here->BSIM3v32SIMDCheck = Check;
+		return OK;
+	  }
+	  else if (prelim==0)
+	  {
+	  	if(here->BSIM3v32SIMDCheck==-1)
+			printf("error load unitilized data\n");
+	  	vbs = here->BSIM3v32SIMDvbs;
+    		vgs = here->BSIM3v32SIMDvgs;
+    		vds = here->BSIM3v32SIMDvds;
+    		qdef = here->BSIM3v32SIMDqdef;
+    		cdhat = here->BSIM3v32SIMDcdhat;
+    		cbhat = here->BSIM3v32SIMDcbhat;
+    		Check = here->BSIM3v32SIMDCheck;
+	  }
+	  #endif
+	  
+	  
+#endif /* ifndef SIMDIFYCPP */
+
+#ifdef SIMDIFYCPP
+#if 0
+/* use this to silent compiler warning about use of uninitialized variables */    
+      evbs = T0 = evbd = Vds = Vgs = Vbs = Phis = dPhis_dVb = sqrtPhis = dsqrtPhis_dVb
+  	= (0.0);
+  T4 = Theta0 = dT1_dVb = dTheta0_dVb = dT2_dVb = T9 = n = dn_dVb = dn_dVd = T7 = T6 = T5
+  	= (0.0);
+  dVgsteff_dVg = dVgsteff_dVd = dVgsteff_dVb = ExpVgst = dT1_dVg = dT1_dVd = dT2_dVg = dT2_dVd
+	= (0.0);
+  dRds_dVg = dRds_dVb = Denomi = dT0_dVg = dT0_dVd = dVdsat_dVg = dVdsat_dVd = dVdsat_dVb
+	= (0.0);
+  dT3_dVg = dT3_dVd = dT3_dVb = VACLM = dVACLM_dVg = dVACLM_dVb = VASCBE = dVASCBE_dVg
+  	= (0.0);
+  dVASCBE_dVd = dVASCBE_dVb = AbulkCV = dAbulkCV_dVb = Alphaz = T11 = dAlphaz_dVg = dAlphaz_dVb
+  	= (0.0);
+  T12 = tmp = VbseffCV = dVbseffCV_dVb = Cgb = dVdsatCV_dVg = dVdsatCV_dVb = Cgg1 = Cgb1 = Cgd1
+  	= (0.0);
+  dT0_dVb = dVACLM_dVd = Cbg1 = Cbb1 = Cbd1 = qsrc = Csg = Csb = Csd = Tcen = dTcen_dVg = dTcen_dVb = arg = sarg = qgdo = qgso = gcggb = gcgdb = gcgsb = gcdgb = gcddb = gcdsb = gcsgb = gcsdb = gcssb = gcbgb = gcbdb = gcbsb = qgd = qgs = qgb = sxpart = dxpart = ddxpart_dVd = dsxpart_dVd = Cdd = Cdg = Cds = Css = dsxpart_dVg = dsxpart_dVs = dsxpart_dVb = Gmbs = FwdSum = RevSum = cdreq = ceqbd = ceqbs = gbbdp = gbbsp = gbdpg = gbdpdp = gbdpb = gbdpsp = gbspg = gbspdp = gbspb = gbspsp
+  	= (0.0);
+#endif
+#ifndef USE_OMP	
+    vbs = SIMDLOADDATA(0,data);
+    vgs = SIMDLOADDATA(1,data);
+    vds = SIMDLOADDATA(2,data);
+    qdef = SIMDLOADDATA(3,data);
+    cdhat = SIMDLOADDATA(4,data);
+    cbhat = SIMDLOADDATA(5,data);
+    Check = ((SIMDLOADDATA(6,data))>0.5);
+#else
+    vbs = here->BSIM3v32SIMDvbs;
+    vgs = here->BSIM3v32SIMDvgs;
+    vds = here->BSIM3v32SIMDvds;
+    qdef = here->BSIM3v32SIMDqdef;
+    cdhat = here->BSIM3v32SIMDcdhat;
+    cbhat = here->BSIM3v32SIMDcbhat;
+    Check = here->BSIM3v32SIMDCheck;
+#endif /* USE_OMP */
+#endif	/* SIMDIFYCPP */
+  
+          /* FB: SIMD start here */
+          SIMDIFYCMD(start);
+          /* determine DC current and derivatives */
+          vbd = vbs - vds;
+          vgd = vgs - vds;
+          vgb = vgs - vbs;
+	
+          /* Source/drain junction diode DC model begins */
+          Nvtm = model->BSIM3v32vtm * model->BSIM3v32jctEmissionCoeff;
+          /* acm model */
+          if (model->BSIM3v32acmMod == 0)
+          {
+            SourceSatCurrent = 1.0e-14;
+	    if ((here->BSIM3v32sourceArea <= 0.0)
+                & (here->BSIM3v32sourcePerimeter <= 0.0))
+              {
+               ; /* F.B. SourceSatCurrent = 1.0e-14; moved above */
+              }
+            else
+              {
+                SourceSatCurrent = here->BSIM3v32sourceArea
+                  * model->BSIM3v32jctTempSatCurDensity
+                  + here->BSIM3v32sourcePerimeter
+                  * model->BSIM3v32jctSidewallTempSatCurDensity;
+              }
+            DrainSatCurrent = 1.0e-14;
+	    if ((here->BSIM3v32drainArea <= 0.0) & (here->BSIM3v32drainPerimeter <= 0.0))
+            {   ; /*F.B. DrainSatCurrent = 1.0e-14; moved above */
+            }
+            else
+            {   DrainSatCurrent = here->BSIM3v32drainArea
+                                * model->BSIM3v32jctTempSatCurDensity
+                                + here->BSIM3v32drainPerimeter
+                                * model->BSIM3v32jctSidewallTempSatCurDensity;
+            }
+          }
+          else
+          {
+	    /* F.B. made inline helper function easier to vectorize */
+	    error = BSIM3v32_ACM_saturationCurrents(
+	    model,
+	    here,
+	    &DrainSatCurrent,
+            &SourceSatCurrent
+	    );
+            if (SIMDANY(error))
+                return(error);
+          }
+	  	  
+          if (SourceSatCurrent <= 0.0)
+          {   here->BSIM3v32gbs = ckt->CKTgmin;
+              here->BSIM3v32cbs = here->BSIM3v32gbs * vbs;
+          }
+          else
+          {   if (model->BSIM3v32ijth == 0.0)
+              {   evbs = exp(vbs / Nvtm);
+                  here->BSIM3v32gbs = SourceSatCurrent * evbs / Nvtm + ckt->CKTgmin;
+                  here->BSIM3v32cbs = SourceSatCurrent * (evbs - 1.0)
+                                 + ckt->CKTgmin * vbs;
+              }
+              else
+              {   if (vbs < here->BSIM3v32vjsm)
+                  {   evbs = exp(vbs / Nvtm);
+                      here->BSIM3v32gbs = SourceSatCurrent * evbs / Nvtm + ckt->CKTgmin;
+                      here->BSIM3v32cbs = SourceSatCurrent * (evbs - 1.0)
+                                     + ckt->CKTgmin * vbs;
+                  }
+                  else
+                  {
+                      /* Added revision dependent code */
+                      switch (model->BSIM3v32intVersion) {
+                        case BSIM3v32V324:
+                        case BSIM3v32V323:
+                        case BSIM3v32V322:
+                          T0 = here->BSIM3v32IsEvjsm / Nvtm;
+                          here->BSIM3v32gbs = T0 + (ckt->CKTgmin);
+                          here->BSIM3v32cbs = here->BSIM3v32IsEvjsm - SourceSatCurrent
+                            + T0 * (vbs - here->BSIM3v32vjsm) + (ckt->CKTgmin) * vbs;
+                          break;
+                        case BSIM3v32V32:
+                        default:
+                          T0 = (SourceSatCurrent + model->BSIM3v32ijth) / Nvtm;
+                          here->BSIM3v32gbs = T0 + (ckt->CKTgmin);
+                          here->BSIM3v32cbs = model->BSIM3v32ijth + (ckt->CKTgmin) * vbs
+                            + T0 * (vbs - here->BSIM3v32vjsm);
+                      }
+                  }
+              }
+          }
+
+          if (DrainSatCurrent <= 0.0)
+          {   here->BSIM3v32gbd = ckt->CKTgmin;
+              here->BSIM3v32cbd = here->BSIM3v32gbd * vbd;
+          }
+          else
+          {   if (model->BSIM3v32ijth == 0.0)
+              {   evbd = exp(vbd / Nvtm);
+                  here->BSIM3v32gbd = DrainSatCurrent * evbd / Nvtm + ckt->CKTgmin;
+                  here->BSIM3v32cbd = DrainSatCurrent * (evbd - 1.0)
+                                 + ckt->CKTgmin * vbd;
+              }
+              else
+              {   if (vbd < here->BSIM3v32vjdm)
+                  {   evbd = exp(vbd / Nvtm);
+                      here->BSIM3v32gbd = DrainSatCurrent * evbd / Nvtm + ckt->CKTgmin;
+                      here->BSIM3v32cbd = DrainSatCurrent * (evbd - 1.0)
+                                     + ckt->CKTgmin * vbd;
+                  }
+                  else
+                  {
+                      /* Added revision dependent code */
+                      switch (model->BSIM3v32intVersion) {
+                        case BSIM3v32V324:
+                        case BSIM3v32V323:
+                        case BSIM3v32V322:
+                          T0 = here->BSIM3v32IsEvjdm / Nvtm;
+                          here->BSIM3v32gbd = T0 + (ckt->CKTgmin);
+                          here->BSIM3v32cbd = here->BSIM3v32IsEvjdm - DrainSatCurrent
+                            + T0 * (vbd - here->BSIM3v32vjdm) + (ckt->CKTgmin) * vbd;
+                          break;
+                        case BSIM3v32V32:
+                        default:
+                          T0 = (DrainSatCurrent + model->BSIM3v32ijth) / Nvtm;
+                          here->BSIM3v32gbd = T0 + (ckt->CKTgmin);
+                          here->BSIM3v32cbd = model->BSIM3v32ijth + (ckt->CKTgmin) * vbd
+                            + T0 * (vbd - here->BSIM3v32vjdm);
+                      }
+                  }
+              }
+          }
+          /* End of diode DC model */
+          BSIM3v32mode = (vds >= 0.0); /*F.B. BSIM3v32mode = +(seq)/-(simd) 1 or 0. (was +1/-1 originally) */
+          if (BSIM3v32mode)
+          {   /* normal mode */
+              Vds = vds;
+              Vgs = vgs;
+              Vbs = vbs;
+          }
+          else
+          {   /* inverse mode */
+              Vds = -vds;
+              Vgs = vgd;
+              Vbs = vbd;
+          }
+	  
+	  {
+	  int modesym;
+	  modesym = 2*(BSIM3v32mode&0x1)-1;
+          here->BSIM3v32mode = modesym; /* restore +/- 1 */
+          /* F.B. all subsequents here->BSIM3v32mode are replaced by local BSIM3v32mode +1/0 */
+          }
+          T0 = Vbs - pParam->BSIM3v32vbsc - 0.001;
+          T1 = sqrt(T0 * T0 - 0.004 * pParam->BSIM3v32vbsc);
+          Vbseff = pParam->BSIM3v32vbsc + 0.5 * (T0 + T1);
+          dVbseff_dVb = 0.5 * (1.0 + T0 / T1);
+          if (Vbseff < Vbs)
+          {   Vbseff = Vbs;
+          }
+	  
+
+          if (Vbseff > 0.0)
+          {   T0 = pParam->BSIM3v32phi / (pParam->BSIM3v32phi + Vbseff);
+              Phis = pParam->BSIM3v32phi * T0;
+              dPhis_dVb = -T0 * T0;
+              sqrtPhis = pParam->BSIM3v32phis3 / (pParam->BSIM3v32phi + 0.5 * Vbseff);
+              dsqrtPhis_dVb = -0.5 * sqrtPhis * sqrtPhis / pParam->BSIM3v32phis3;
+          }
+          else
+          {   Phis = pParam->BSIM3v32phi - Vbseff;
+              dPhis_dVb = -1.0;
+              sqrtPhis = sqrt(Phis);
+              dsqrtPhis_dVb = -0.5 / sqrtPhis;
+          }
+          Xdep = pParam->BSIM3v32Xdep0 * sqrtPhis / pParam->BSIM3v32sqrtPhi;
+          dXdep_dVb = (pParam->BSIM3v32Xdep0 / pParam->BSIM3v32sqrtPhi)
+                    * dsqrtPhis_dVb;
+
+          Leff = pParam->BSIM3v32leff;
+          Vtm = model->BSIM3v32vtm;
+/* Vth Calculation */
+          T3 = sqrt(Xdep);
+          V0 = pParam->BSIM3v32vbi - pParam->BSIM3v32phi;
+
+          T0 = pParam->BSIM3v32dvt2 * Vbseff;
+	  T2 = pParam->BSIM3v32dvt2; /* F.B. */
+          if (T0 >= - 0.5)
+          {   T1 = 1.0 + T0;
+              /* T2 = pParam->BSIM3v32dvt2;  F.B. */
+          }
+          else /* Added to avoid any discontinuity problems caused by dvt2 */
+          {   T4 = 1.0 / (3.0 + 8.0 * T0);
+              T1 = (1.0 + 3.0 * T0) * T4;
+              T2 = T2 * T4 * T4; /* F.B. */
+          }
+          lt1 = model->BSIM3v32factor1 * T3 * T1;
+          dlt1_dVb = model->BSIM3v32factor1 * (0.5 / T3 * T1 * dXdep_dVb + T3 * T2);
+	
+          T0 = pParam->BSIM3v32dvt2w * Vbseff;
+          if (T0 >= - 0.5)
+          {   T1 = 1.0 + T0;
+              T2 = pParam->BSIM3v32dvt2w;
+          }
+          else /* Added to avoid any discontinuity problems caused by dvt2w */
+          {   T4 = 1.0 / (3.0 + 8.0 * T0);
+              T1 = (1.0 + 3.0 * T0) * T4;
+              T2 = pParam->BSIM3v32dvt2w * T4 * T4;
+          }
+          ltw = model->BSIM3v32factor1 * T3 * T1;
+          dltw_dVb = model->BSIM3v32factor1 * (0.5 / T3 * T1 * dXdep_dVb + T3 * T2);
+
+          T0 = -0.5 * pParam->BSIM3v32dvt1 * Leff / lt1;
+          if (T0 > -EXP_THRESHOLD)
+          {   T1 = exp(T0);
+              Theta0 = T1 * (1.0 + 2.0 * T1);
+              dT1_dVb = -T0 / lt1 * T1 * dlt1_dVb;
+              dTheta0_dVb = (1.0 + 4.0 * T1) * dT1_dVb;
+          }
+          else
+          {   T1 = MIN_EXP;
+              Theta0 = T1 * (1.0 + 2.0 * T1);
+              dTheta0_dVb = 0.0;
+          }
+  	  
+	  
+          /* F.B: expanded thetavth directly in following expression.
+             here->BSIM3v32thetavth was not used in any file except here.
+             Note: it should be removed from def.h file as well.
+          */
+          Delt_vth = pParam->BSIM3v32dvt0 * Theta0 * V0;
+          
+          dDelt_vth_dVb = pParam->BSIM3v32dvt0 * dTheta0_dVb * V0;
+
+          T0 = -0.5 * pParam->BSIM3v32dvt1w * pParam->BSIM3v32weff * Leff / ltw;
+          if (T0 > -EXP_THRESHOLD)
+          {   T1 = exp(T0);
+              T2 = T1 * (1.0 + 2.0 * T1);
+              dT1_dVb = -T0 / ltw * T1 * dltw_dVb;
+              dT2_dVb = (1.0 + 4.0 * T1) * dT1_dVb;
+          }
+          else
+          {   T1 = MIN_EXP;
+              T2 = T1 * (1.0 + 2.0 * T1);
+              dT2_dVb = 0.0;
+          }
+
+          T0 = pParam->BSIM3v32dvt0w * T2;
+          T2 = T0 * V0;
+          dT2_dVb = pParam->BSIM3v32dvt0w * dT2_dVb * V0;
+
+          TempRatio =  ckt->CKTtemp / model->BSIM3v32tnom - 1.0;
+          T0 = sqrt(1.0 + pParam->BSIM3v32nlx / Leff);
+          T1 = pParam->BSIM3v32k1ox * (T0 - 1.0) * pParam->BSIM3v32sqrtPhi
+             + (pParam->BSIM3v32kt1 + pParam->BSIM3v32kt1l / Leff
+             + pParam->BSIM3v32kt2 * Vbseff) * TempRatio;
+          tmp2 = model->BSIM3v32tox * pParam->BSIM3v32phi
+               / (pParam->BSIM3v32weff + pParam->BSIM3v32w0);
+
+          T3 = pParam->BSIM3v32eta0 + pParam->BSIM3v32etab * Vbseff;
+          if (T3 < 1.0e-4) /* avoid  discontinuity problems caused by etab */
+          {   T9 = 1.0 / (3.0 - 2.0e4 * T3);
+              T3 = (2.0e-4 - T3) * T9;
+              T4 = T9 * T9;
+          }
+          else
+          {   T4 = 1.0;
+          }
+          dDIBL_Sft_dVd = T3 * pParam->BSIM3v32theta0vb0;
+          DIBL_Sft = dDIBL_Sft_dVd * Vds;
+
+          Vth = model->BSIM3v32type * here->BSIM3v32vth0 - pParam->BSIM3v32k1
+              * pParam->BSIM3v32sqrtPhi + pParam->BSIM3v32k1ox * sqrtPhis
+              - pParam->BSIM3v32k2ox * Vbseff - Delt_vth - T2 + (pParam->BSIM3v32k3
+              + pParam->BSIM3v32k3b * Vbseff) * tmp2 + T1 - DIBL_Sft;
+
+          here->BSIM3v32von = Vth;
+
+          dVth_dVb = pParam->BSIM3v32k1ox * dsqrtPhis_dVb - pParam->BSIM3v32k2ox
+                   - dDelt_vth_dVb - dT2_dVb + pParam->BSIM3v32k3b * tmp2
+                   - pParam->BSIM3v32etab * Vds * pParam->BSIM3v32theta0vb0 * T4
+                   + pParam->BSIM3v32kt2 * TempRatio;
+          dVth_dVd = -dDIBL_Sft_dVd;
+
+/* Calculate n */
+          tmp2 = pParam->BSIM3v32nfactor * EPSSI / Xdep;
+          tmp3 = pParam->BSIM3v32cdsc + pParam->BSIM3v32cdscb * Vbseff
+               + pParam->BSIM3v32cdscd * Vds;
+          tmp4 = (tmp2 + tmp3 * Theta0 + pParam->BSIM3v32cit) / model->BSIM3v32cox;
+          if (tmp4 >= -0.5)
+          {   n = 1.0 + tmp4;
+              dn_dVb = (-tmp2 / Xdep * dXdep_dVb + tmp3 * dTheta0_dVb
+                     + pParam->BSIM3v32cdscb * Theta0) / model->BSIM3v32cox;
+              dn_dVd = pParam->BSIM3v32cdscd * Theta0 / model->BSIM3v32cox;
+          }
+          else
+           /* avoid  discontinuity problems caused by tmp4 */
+          {   T0 = 1.0 / (3.0 + 8.0 * tmp4);
+              n = (1.0 + 3.0 * tmp4) * T0;
+              T0 *= T0;
+              dn_dVb = (-tmp2 / Xdep * dXdep_dVb + tmp3 * dTheta0_dVb
+                     + pParam->BSIM3v32cdscb * Theta0) / model->BSIM3v32cox * T0;
+              dn_dVd = pParam->BSIM3v32cdscd * Theta0 / model->BSIM3v32cox * T0;
+          }
+
+/* Poly Gate Si Depletion Effect */
+          T0 = here->BSIM3v32vfb + pParam->BSIM3v32phi;
+	  Vgs_eff = Vgs;
+          dVgs_eff_dVg = 1.0;
+          if ((pParam->BSIM3v32ngate > 1.e18) && (pParam->BSIM3v32ngate < 1.e25))
+	    if((Vgs > T0))
+          /* added to avoid the problem caused by ngate */
+          {   T1 = 1.0e6 * Charge_q * EPSSI * pParam->BSIM3v32ngate
+                 / (model->BSIM3v32cox * model->BSIM3v32cox);
+              T4 = sqrt(1.0 + 2.0 * (Vgs - T0) / T1);
+              T2 = T1 * (T4 - 1.0);
+              T3 = 0.5 * T2 * T2 / T1; /* T3 = Vpoly */
+              T7 = 1.12 - T3 - 0.05;
+              T6 = sqrt(T7 * T7 + 0.224);
+              T5 = 1.12 - 0.5 * (T7 + T6);
+              Vgs_eff = Vgs - T5;
+              dVgs_eff_dVg = 1.0 - (0.5 - 0.5 / T4) * (1.0 + T7 / T6);
+          }
+          Vgst = Vgs_eff - Vth;
+	
+/* Effective Vgst (Vgsteff) Calculation */
+
+          T10 = 2.0 * n * Vtm;
+          VgstNVt = Vgst / T10;
+          ExpArg = (2.0 * pParam->BSIM3v32voff - Vgst) / T10;
+          
+	  #ifdef SIMDIFYCPP
+	  /* F.B. Simd version use only exp function */
+	  T0 = VgstNVt;
+	  if (ExpArg > EXP_THRESHOLD)
+		T0 = (Vgst - pParam->BSIM3v32voff) / (n * Vtm);
+	  ExpVgst = exp(T0);
+	  #endif
+	  
+          /* MCJ: Very small Vgst */
+          if (VgstNVt > EXP_THRESHOLD)
+          {   Vgsteff = Vgst;
+              dVgsteff_dVg = dVgs_eff_dVg;
+              dVgsteff_dVd = -dVth_dVd;
+              dVgsteff_dVb = -dVth_dVb;
+          }
+          else if (ExpArg > EXP_THRESHOLD)
+          {   
+	      #ifndef SIMDIFYCPP
+	      T0 = (Vgst - pParam->BSIM3v32voff) / (n * Vtm);
+              ExpVgst = exp(T0);
+	      #endif
+              Vgsteff = Vtm * pParam->BSIM3v32cdep0 / model->BSIM3v32cox * ExpVgst;
+              dVgsteff_dVg = Vgsteff / (n * Vtm);
+              dVgsteff_dVd = -dVgsteff_dVg * (dVth_dVd + T0 * Vtm * dn_dVd);
+              dVgsteff_dVb = -dVgsteff_dVg * (dVth_dVb + T0 * Vtm * dn_dVb);
+              dVgsteff_dVg *= dVgs_eff_dVg;
+          }
+          else
+          {   
+	      #ifndef SIMDIFYCPP
+	      ExpVgst = exp(VgstNVt);
+	      #endif
+              T1 = T10 * log(1.0 + ExpVgst);
+              dT1_dVg = ExpVgst / (1.0 + ExpVgst);
+              dT1_dVb = -dT1_dVg * (dVth_dVb + Vgst / n * dn_dVb)
+                      + T1 / n * dn_dVb;
+              dT1_dVd = -dT1_dVg * (dVth_dVd + Vgst / n * dn_dVd)
+                      + T1 / n * dn_dVd;
+
+              dT2_dVg = -model->BSIM3v32cox / (Vtm * pParam->BSIM3v32cdep0)
+                      * exp(ExpArg);
+              T2 = 1.0 - T10 * dT2_dVg;
+              dT2_dVd = -dT2_dVg * (dVth_dVd - 2.0 * Vtm * ExpArg * dn_dVd)
+                      + (T2 - 1.0) / n * dn_dVd;
+              dT2_dVb = -dT2_dVg * (dVth_dVb - 2.0 * Vtm * ExpArg * dn_dVb)
+                      + (T2 - 1.0) / n * dn_dVb;
+
+              Vgsteff = T1 / T2;
+              T3 = T2 * T2;
+              dVgsteff_dVg = (T2 * dT1_dVg - T1 * dT2_dVg) / T3 * dVgs_eff_dVg;
+              dVgsteff_dVd = (T2 * dT1_dVd - T1 * dT2_dVd) / T3;
+              dVgsteff_dVb = (T2 * dT1_dVb - T1 * dT2_dVb) / T3;
+          }
+	 
+          /* Added revision dependent code */
+          if (model->BSIM3v32intVersion > BSIM3v32V323) {
+            here->BSIM3v32Vgsteff = Vgsteff;
+          }
+
+/* Calculate Effective Channel Geometry */
+          T9 = sqrtPhis - pParam->BSIM3v32sqrtPhi;
+          Weff = pParam->BSIM3v32weff - 2.0 * (pParam->BSIM3v32dwg * Vgsteff
+               + pParam->BSIM3v32dwb * T9);
+          dWeff_dVg = -2.0 * pParam->BSIM3v32dwg;
+          dWeff_dVb = -2.0 * pParam->BSIM3v32dwb * dsqrtPhis_dVb;
+
+          if (Weff < 2.0e-8) /* to avoid the discontinuity problem due to Weff*/
+          {   T0 = 1.0 / (6.0e-8 - 2.0 * Weff);
+              Weff = 2.0e-8 * (4.0e-8 - Weff) * T0;
+              T0 *= T0 * 4.0e-16;
+              dWeff_dVg *= T0;
+              dWeff_dVb *= T0;
+          }
+
+          T0 = pParam->BSIM3v32prwg * Vgsteff + pParam->BSIM3v32prwb * T9;
+          if (T0 >= -0.9)
+          {   Rds = pParam->BSIM3v32rds0 * (1.0 + T0);
+              dRds_dVg = pParam->BSIM3v32rds0 * pParam->BSIM3v32prwg;
+              dRds_dVb = pParam->BSIM3v32rds0 * pParam->BSIM3v32prwb * dsqrtPhis_dVb;
+          }
+          else
+           /* to avoid the discontinuity problem due to prwg and prwb*/
+          {   T1 = 1.0 / (17.0 + 20.0 * T0);
+              Rds = pParam->BSIM3v32rds0 * (0.8 + T0) * T1;
+              T1 *= T1;
+              dRds_dVg = pParam->BSIM3v32rds0 * pParam->BSIM3v32prwg * T1;
+              dRds_dVb = pParam->BSIM3v32rds0 * pParam->BSIM3v32prwb * dsqrtPhis_dVb
+                       * T1;
+          }
+          /* Added revision dependent code */
+          if (model->BSIM3v32intVersion > BSIM3v32V323) {
+            here->BSIM3v32rds = Rds;        /* Noise Bugfix */
+          }
+
+/* Calculate Abulk */
+          T1 = 0.5 * pParam->BSIM3v32k1ox / sqrtPhis;
+          dT1_dVb = -T1 / sqrtPhis * dsqrtPhis_dVb;
+
+          T9 = sqrt(pParam->BSIM3v32xj * Xdep);
+          tmp1 = Leff + 2.0 * T9;
+          T5 = Leff / tmp1;
+          tmp2 = pParam->BSIM3v32a0 * T5;
+          tmp3 = pParam->BSIM3v32weff + pParam->BSIM3v32b1;
+          tmp4 = pParam->BSIM3v32b0 / tmp3;
+          T2 = tmp2 + tmp4;
+          dT2_dVb = -T9 / tmp1 / Xdep * dXdep_dVb;
+          T6 = T5 * T5;
+          T7 = T5 * T6;
+
+          Abulk0 = 1.0 + T1 * T2;
+          dAbulk0_dVb = T1 * tmp2 * dT2_dVb + T2 * dT1_dVb;
+
+          T8 = pParam->BSIM3v32ags * pParam->BSIM3v32a0 * T7;
+          dAbulk_dVg = -T1 * T8;
+          Abulk = Abulk0 + dAbulk_dVg * Vgsteff;
+          dAbulk_dVb = dAbulk0_dVb - T8 * Vgsteff * (dT1_dVb
+                     + 3.0 * T1 * dT2_dVb);
+
+          if (Abulk0 < 0.1) /* added to avoid the problems caused by Abulk0 */
+          {   T9 = 1.0 / (3.0 - 20.0 * Abulk0);
+              Abulk0 = (0.2 - Abulk0) * T9;
+              dAbulk0_dVb *= T9 * T9;
+          }
+
+          if (Abulk < 0.1)
+          /* added to avoid the problems caused by Abulk */
+          {   T9 = 1.0 / (3.0 - 20.0 * Abulk);
+              Abulk = (0.2 - Abulk) * T9;
+              /* Added revision dependent code */
+              if (model->BSIM3v32intVersion > BSIM3v32V32) {
+                T10 = T9 * T9;
+                dAbulk_dVb *= T10;
+                dAbulk_dVg *= T10;
+              } else {
+                      dAbulk_dVb *= T9 * T9;
+              }
+          }
+	  
+          /* Added revision dependent code */
+          if (model->BSIM3v32intVersion > BSIM3v32V323) {
+            here->BSIM3v32Abulk = Abulk;
+          }
+
+          T2 = pParam->BSIM3v32keta * Vbseff;
+          if (T2 >= -0.9)
+          {   T0 = 1.0 / (1.0 + T2);
+              dT0_dVb = -pParam->BSIM3v32keta * T0 * T0;
+          }
+          else
+          /* added to avoid the problems caused by Keta */
+          {   T1 = 1.0 / (0.8 + T2);
+              T0 = (17.0 + 20.0 * T2) * T1;
+              dT0_dVb = -pParam->BSIM3v32keta * T1 * T1;
+          }
+          dAbulk_dVg *= T0;
+          dAbulk_dVb = dAbulk_dVb * T0 + Abulk * dT0_dVb;
+          dAbulk0_dVb = dAbulk0_dVb * T0 + Abulk0 * dT0_dVb;
+          Abulk *= T0;
+          Abulk0 *= T0;
+
+
+/* Mobility calculation */
+          if (model->BSIM3v32mobMod == 1)
+          {   T0 = Vgsteff + Vth + Vth;
+              T2 = pParam->BSIM3v32ua + pParam->BSIM3v32uc * Vbseff;
+              T3 = T0 / model->BSIM3v32tox;
+              T5 = T3 * (T2 + pParam->BSIM3v32ub * T3);
+              dDenomi_dVg = (T2 + 2.0 * pParam->BSIM3v32ub * T3) / model->BSIM3v32tox;
+              dDenomi_dVd = dDenomi_dVg * 2.0 * dVth_dVd;
+              dDenomi_dVb = dDenomi_dVg * 2.0 * dVth_dVb + pParam->BSIM3v32uc * T3;
+          }
+          else if (model->BSIM3v32mobMod == 2)
+          {   T5 = Vgsteff / model->BSIM3v32tox * (pParam->BSIM3v32ua
+                 + pParam->BSIM3v32uc * Vbseff + pParam->BSIM3v32ub * Vgsteff
+                 / model->BSIM3v32tox);
+              dDenomi_dVg = (pParam->BSIM3v32ua + pParam->BSIM3v32uc * Vbseff
+                          + 2.0 * pParam->BSIM3v32ub * Vgsteff / model->BSIM3v32tox)
+                          / model->BSIM3v32tox;
+              dDenomi_dVd = 0.0;
+              dDenomi_dVb = Vgsteff * pParam->BSIM3v32uc / model->BSIM3v32tox;
+          }
+          else
+          {   T0 = Vgsteff + Vth + Vth;
+              T2 = 1.0 + pParam->BSIM3v32uc * Vbseff;
+              T3 = T0 / model->BSIM3v32tox;
+              T4 = T3 * (pParam->BSIM3v32ua + pParam->BSIM3v32ub * T3);
+              T5 = T4 * T2;
+              dDenomi_dVg = (pParam->BSIM3v32ua + 2.0 * pParam->BSIM3v32ub * T3) * T2
+                          / model->BSIM3v32tox;
+              dDenomi_dVd = dDenomi_dVg * 2.0 * dVth_dVd;
+              dDenomi_dVb = dDenomi_dVg * 2.0 * dVth_dVb + pParam->BSIM3v32uc * T4;
+          }
+
+          if (T5 >= -0.8)
+          {   Denomi = 1.0 + T5;
+          }
+          else /* Added to avoid the discontinuity problem caused by ua and ub*/
+          {   T9 = 1.0 / (7.0 + 10.0 * T5);
+              Denomi = (0.6 + T5) * T9;
+              T9 *= T9;
+              dDenomi_dVg *= T9;
+              dDenomi_dVd *= T9;
+              dDenomi_dVb *= T9;
+          }
+
+          here->BSIM3v32ueff = ueff = here->BSIM3v32u0temp / Denomi;
+          T9 = -ueff / Denomi;
+          dueff_dVg = T9 * dDenomi_dVg;
+          dueff_dVd = T9 * dDenomi_dVd;
+          dueff_dVb = T9 * dDenomi_dVb;
+
+/* Saturation Drain Voltage  Vdsat */
+          WVCox = Weff * pParam->BSIM3v32vsattemp * model->BSIM3v32cox;
+          WVCoxRds = WVCox * Rds;
+
+          Esat = 2.0 * pParam->BSIM3v32vsattemp / ueff;
+          EsatL = Esat * Leff;
+          T0 = -EsatL /ueff;
+          dEsatL_dVg = T0 * dueff_dVg;
+          dEsatL_dVd = T0 * dueff_dVd;
+          dEsatL_dVb = T0 * dueff_dVb;
+
+          /* Sqrt() */
+          a1 = pParam->BSIM3v32a1;
+          if (a1 == 0.0)
+          {   Lambda = pParam->BSIM3v32a2;
+              dLambda_dVg = 0.0;
+          }
+          else if (a1 > 0.0)
+/* Added to avoid the discontinuity problem
+   caused by a1 and a2 (Lambda) */
+          {   T0 = 1.0 - pParam->BSIM3v32a2;
+              T1 = T0 - pParam->BSIM3v32a1 * Vgsteff - 0.0001;
+              T2 = sqrt(T1 * T1 + 0.0004 * T0);
+              Lambda = pParam->BSIM3v32a2 + T0 - 0.5 * (T1 + T2);
+              dLambda_dVg = 0.5 * pParam->BSIM3v32a1 * (1.0 + T1 / T2);
+          }
+          else
+          {   T1 = pParam->BSIM3v32a2 + pParam->BSIM3v32a1 * Vgsteff - 0.0001;
+              T2 = sqrt(T1 * T1 + 0.0004 * pParam->BSIM3v32a2);
+              Lambda = 0.5 * (T1 + T2);
+              dLambda_dVg = 0.5 * pParam->BSIM3v32a1 * (1.0 + T1 / T2);
+          }
+
+          Vgst2Vtm = Vgsteff + 2.0 * Vtm;
+          /* Added revision dependent code */
+          if (model->BSIM3v32intVersion > BSIM3v32V323) {
+            here->BSIM3v32AbovVgst2Vtm = Abulk / Vgst2Vtm;
+          }
+
+          if (Rds > 0)
+          {   tmp2 = dRds_dVg / Rds + dWeff_dVg / Weff;
+              tmp3 = dRds_dVb / Rds + dWeff_dVb / Weff;
+          }
+          else
+          {   tmp2 = dWeff_dVg / Weff;
+              tmp3 = dWeff_dVb / Weff;
+          }
+          if ((Rds == 0.0) & (Lambda == 1.0))
+          {   T0 = 1.0 / (Abulk * EsatL + Vgst2Vtm);
+              tmp1 = 0.0;
+              T1 = T0 * T0;
+              T2 = Vgst2Vtm * T0;
+              T3 = EsatL * Vgst2Vtm;
+              Vdsat = T3 * T0;
+
+              dT0_dVg = -(Abulk * dEsatL_dVg + EsatL * dAbulk_dVg + 1.0) * T1;
+              dT0_dVd = -(Abulk * dEsatL_dVd) * T1;
+              dT0_dVb = -(Abulk * dEsatL_dVb + dAbulk_dVb * EsatL) * T1;
+
+              dVdsat_dVg = T3 * dT0_dVg + T2 * dEsatL_dVg + EsatL * T0;
+              dVdsat_dVd = T3 * dT0_dVd + T2 * dEsatL_dVd;
+              dVdsat_dVb = T3 * dT0_dVb + T2 * dEsatL_dVb;
+          }
+          else
+          {   tmp1 = dLambda_dVg / (Lambda * Lambda);
+              T9 = Abulk * WVCoxRds;
+              T8 = Abulk * T9;
+              T7 = Vgst2Vtm * T9;
+              T6 = Vgst2Vtm * WVCoxRds;
+              T0 = 2.0 * Abulk * (T9 - 1.0 + 1.0 / Lambda);
+              dT0_dVg = 2.0 * (T8 * tmp2 - Abulk * tmp1
+                      + (2.0 * T9 + 1.0 / Lambda - 1.0) * dAbulk_dVg);
+
+              dT0_dVb = 2.0 * (T8 * (2.0 / Abulk * dAbulk_dVb + tmp3)
+                      + (1.0 / Lambda - 1.0) * dAbulk_dVb);
+              dT0_dVd = 0.0;
+              T1 = Vgst2Vtm * (2.0 / Lambda - 1.0) + Abulk * EsatL + 3.0 * T7;
+
+              dT1_dVg = (2.0 / Lambda - 1.0) - 2.0 * Vgst2Vtm * tmp1
+                      + Abulk * dEsatL_dVg + EsatL * dAbulk_dVg + 3.0 * (T9
+                      + T7 * tmp2 + T6 * dAbulk_dVg);
+              dT1_dVb = Abulk * dEsatL_dVb + EsatL * dAbulk_dVb
+                      + 3.0 * (T6 * dAbulk_dVb + T7 * tmp3);
+              dT1_dVd = Abulk * dEsatL_dVd;
+
+              T2 = Vgst2Vtm * (EsatL + 2.0 * T6);
+              dT2_dVg = EsatL + Vgst2Vtm * dEsatL_dVg
+                      + T6 * (4.0 + 2.0 * Vgst2Vtm * tmp2);
+              dT2_dVb = Vgst2Vtm * (dEsatL_dVb + 2.0 * T6 * tmp3);
+              dT2_dVd = Vgst2Vtm * dEsatL_dVd;
+
+              T3 = sqrt(T1 * T1 - 2.0 * T0 * T2);
+              Vdsat = (T1 - T3) / T0;
+
+              dT3_dVg = (T1 * dT1_dVg - 2.0 * (T0 * dT2_dVg + T2 * dT0_dVg))
+                      / T3;
+              dT3_dVd = (T1 * dT1_dVd - 2.0 * (T0 * dT2_dVd + T2 * dT0_dVd))
+                      / T3;
+              dT3_dVb = (T1 * dT1_dVb - 2.0 * (T0 * dT2_dVb + T2 * dT0_dVb))
+                      / T3;
+
+              dVdsat_dVg = (dT1_dVg - (T1 * dT1_dVg - dT0_dVg * T2
+                         - T0 * dT2_dVg) / T3 - Vdsat * dT0_dVg) / T0;
+              dVdsat_dVb = (dT1_dVb - (T1 * dT1_dVb - dT0_dVb * T2
+                         - T0 * dT2_dVb) / T3 - Vdsat * dT0_dVb) / T0;
+              dVdsat_dVd = (dT1_dVd - (T1 * dT1_dVd - T0 * dT2_dVd) / T3) / T0;
+          }
+          here->BSIM3v32vdsat = Vdsat;
+
+/* Effective Vds (Vdseff) Calculation */
+          T1 = Vdsat - Vds - pParam->BSIM3v32delta;
+          dT1_dVg = dVdsat_dVg;
+          dT1_dVd = dVdsat_dVd - 1.0;
+          dT1_dVb = dVdsat_dVb;
+
+          T2 = sqrt(T1 * T1 + 4.0 * pParam->BSIM3v32delta * Vdsat);
+          T0 = T1 / T2;
+          T3 = 2.0 * pParam->BSIM3v32delta / T2;
+          dT2_dVg = T0 * dT1_dVg + T3 * dVdsat_dVg;
+          dT2_dVd = T0 * dT1_dVd + T3 * dVdsat_dVd;
+          dT2_dVb = T0 * dT1_dVb + T3 * dVdsat_dVb;
+
+          Vdseff = Vdsat - 0.5 * (T1 + T2);
+          dVdseff_dVg = dVdsat_dVg - 0.5 * (dT1_dVg + dT2_dVg);
+          dVdseff_dVd = dVdsat_dVd - 0.5 * (dT1_dVd + dT2_dVd);
+          dVdseff_dVb = dVdsat_dVb - 0.5 * (dT1_dVb + dT2_dVb);
+          /* Added revision dependent code */
+          switch (model->BSIM3v32intVersion) {
+            case BSIM3v32V324:
+            case BSIM3v32V323:
+            case BSIM3v32V322:
+              /* Added to eliminate non-zero Vdseff at Vds=0.0 */
+              if (Vds == 0.0)
+                {
+                  Vdseff = 0.0;
+                  dVdseff_dVg = 0.0;
+                  dVdseff_dVb = 0.0;
+                }
+              break;
+            case BSIM3v32V32:
+            default:
+              /* Do nothing */
+              break;
+          }
+
+/* Calculate VAsat */
+          tmp4 = 1.0 - 0.5 * Abulk * Vdsat / Vgst2Vtm;
+          T9 = WVCoxRds * Vgsteff;
+          T8 = T9 / Vgst2Vtm;
+          T0 = EsatL + Vdsat + 2.0 * T9 * tmp4;
+
+          T7 = 2.0 * WVCoxRds * tmp4;
+          dT0_dVg = dEsatL_dVg + dVdsat_dVg + T7 * (1.0 + tmp2 * Vgsteff)
+                  - T8 * (Abulk * dVdsat_dVg - Abulk * Vdsat / Vgst2Vtm
+                  + Vdsat * dAbulk_dVg);
+
+          dT0_dVb = dEsatL_dVb + dVdsat_dVb + T7 * tmp3 * Vgsteff
+                  - T8 * (dAbulk_dVb * Vdsat + Abulk * dVdsat_dVb);
+          dT0_dVd = dEsatL_dVd + dVdsat_dVd - T8 * Abulk * dVdsat_dVd;
+
+          T9 = WVCoxRds * Abulk;
+          T1 = 2.0 / Lambda - 1.0 + T9;
+          dT1_dVg = -2.0 * tmp1 +  WVCoxRds * (Abulk * tmp2 + dAbulk_dVg);
+          dT1_dVb = dAbulk_dVb * WVCoxRds + T9 * tmp3;
+
+          Vasat = T0 / T1;
+          dVasat_dVg = (dT0_dVg - Vasat * dT1_dVg) / T1;
+          dVasat_dVb = (dT0_dVb - Vasat * dT1_dVb) / T1;
+          dVasat_dVd = dT0_dVd / T1;
+
+          if (Vdseff > Vds)
+             Vdseff = Vds;
+          diffVds = Vds - Vdseff;
+          /* Added revision dependent code */
+          if (model->BSIM3v32intVersion > BSIM3v32V323) {
+            here->BSIM3v32Vdseff = Vdseff;
+          }
+
+/* Calculate VACLM */
+          VACLM = MAX_EXP; /* F.B. moved else clause into default values */
+          dVACLM_dVd = dVACLM_dVg = dVACLM_dVb = 0.0;
+          if (pParam->BSIM3v32pclm > 0.0)
+          if (diffVds > 1.0e-10)
+          {   T0 = 1.0 / (pParam->BSIM3v32pclm * Abulk * pParam->BSIM3v32litl);
+              dT0_dVb = -T0 / Abulk * dAbulk_dVb;
+              dT0_dVg = -T0 / Abulk * dAbulk_dVg;
+
+              T2 = Vgsteff / EsatL;
+              T1 = Leff * (Abulk + T2);
+              dT1_dVg = Leff * ((1.0 - T2 * dEsatL_dVg) / EsatL + dAbulk_dVg);
+              dT1_dVb = Leff * (dAbulk_dVb - T2 * dEsatL_dVb / EsatL);
+              dT1_dVd = -T2 * dEsatL_dVd / Esat;
+
+              T9 = T0 * T1;
+              VACLM = T9 * diffVds;
+              dVACLM_dVg = T0 * dT1_dVg * diffVds - T9 * dVdseff_dVg
+                         + T1 * diffVds * dT0_dVg;
+              dVACLM_dVb = (dT0_dVb * T1 + T0 * dT1_dVb) * diffVds
+                         - T9 * dVdseff_dVb;
+              dVACLM_dVd = T0 * dT1_dVd * diffVds + T9 * (1.0 - dVdseff_dVd);
+          }
+	  
+/* Calculate VADIBL */
+          if (pParam->BSIM3v32thetaRout > 0.0)
+          {   T8 = Abulk * Vdsat;
+              T0 = Vgst2Vtm * T8;
+              dT0_dVg = Vgst2Vtm * Abulk * dVdsat_dVg + T8
+                      + Vgst2Vtm * Vdsat * dAbulk_dVg;
+              dT0_dVb = Vgst2Vtm * (dAbulk_dVb * Vdsat + Abulk * dVdsat_dVb);
+              dT0_dVd = Vgst2Vtm * Abulk * dVdsat_dVd;
+
+              T1 = Vgst2Vtm + T8;
+              dT1_dVg = 1.0 + Abulk * dVdsat_dVg + Vdsat * dAbulk_dVg;
+              dT1_dVb = Abulk * dVdsat_dVb + dAbulk_dVb * Vdsat;
+              dT1_dVd = Abulk * dVdsat_dVd;
+
+              T9 = T1 * T1;
+              T2 = pParam->BSIM3v32thetaRout;
+              VADIBL = (Vgst2Vtm - T0 / T1) / T2;
+              dVADIBL_dVg = (1.0 - dT0_dVg / T1 + T0 * dT1_dVg / T9) / T2;
+              dVADIBL_dVb = (-dT0_dVb / T1 + T0 * dT1_dVb / T9) / T2;
+              dVADIBL_dVd = (-dT0_dVd / T1 + T0 * dT1_dVd / T9) / T2;
+
+              T7 = pParam->BSIM3v32pdiblb * Vbseff;
+              if (T7 >= -0.9)
+              {   T3 = 1.0 / (1.0 + T7);
+                  VADIBL *= T3;
+                  dVADIBL_dVg *= T3;
+                  dVADIBL_dVb = (dVADIBL_dVb - VADIBL * pParam->BSIM3v32pdiblb)
+                              * T3;
+                  dVADIBL_dVd *= T3;
+              }
+              else
+/* Added to avoid the discontinuity problem caused by pdiblcb */
+              {   T4 = 1.0 / (0.8 + T7);
+                  T3 = (17.0 + 20.0 * T7) * T4;
+                  dVADIBL_dVg *= T3;
+                  dVADIBL_dVb = dVADIBL_dVb * T3
+                              - VADIBL * pParam->BSIM3v32pdiblb * T4 * T4;
+                  dVADIBL_dVd *= T3;
+                  VADIBL *= T3;
+              }
+          }
+          else
+          {   VADIBL = MAX_EXP;
+              dVADIBL_dVd = dVADIBL_dVg = dVADIBL_dVb = 0.0;
+          }
+
+/* Calculate VA */
+
+          T8 = pParam->BSIM3v32pvag / EsatL;
+          T9 = T8 * Vgsteff;
+          if (T9 > -0.9)
+          {   T0 = 1.0 + T9;
+              dT0_dVg = T8 * (1.0 - Vgsteff * dEsatL_dVg / EsatL);
+              dT0_dVb = -T9 * dEsatL_dVb / EsatL;
+              dT0_dVd = -T9 * dEsatL_dVd / EsatL;
+          }
+          else /* Added to avoid the discontinuity problems caused by pvag */
+          {   T1 = 1.0 / (17.0 + 20.0 * T9);
+              T0 = (0.8 + T9) * T1;
+              T1 *= T1;
+              dT0_dVg = T8 * (1.0 - Vgsteff * dEsatL_dVg / EsatL) * T1;
+
+              T9 *= T1 / EsatL;
+              dT0_dVb = -T9 * dEsatL_dVb;
+              dT0_dVd = -T9 * dEsatL_dVd;
+          }
+
+          tmp1 = VACLM * VACLM;
+          tmp2 = VADIBL * VADIBL;
+          tmp3 = VACLM + VADIBL;
+
+          T1 = VACLM * VADIBL / tmp3;
+          tmp3 *= tmp3;
+          dT1_dVg = (tmp1 * dVADIBL_dVg + tmp2 * dVACLM_dVg) / tmp3;
+          dT1_dVd = (tmp1 * dVADIBL_dVd + tmp2 * dVACLM_dVd) / tmp3;
+          dT1_dVb = (tmp1 * dVADIBL_dVb + tmp2 * dVACLM_dVb) / tmp3;
+
+          Va = Vasat + T0 * T1;
+	  
+          dVa_dVg = dVasat_dVg + T1 * dT0_dVg + T0 * dT1_dVg;
+          dVa_dVd = dVasat_dVd + T1 * dT0_dVd + T0 * dT1_dVd;
+          dVa_dVb = dVasat_dVb + T1 * dT0_dVb + T0 * dT1_dVb;
+
+/* Calculate VASCBE */
+          dVASCBE_dVg = dVASCBE_dVd = dVASCBE_dVb = 0.0; /* F.B. moved from else clauses to default val*/
+          if (pParam->BSIM3v32pscbe2 > 0.0)
+          {   if (diffVds > pParam->BSIM3v32pscbe1 * pParam->BSIM3v32litl
+                  / EXP_THRESHOLD)
+              {   T0 =  pParam->BSIM3v32pscbe1 * pParam->BSIM3v32litl / diffVds;
+                  VASCBE = Leff * exp(T0) / pParam->BSIM3v32pscbe2;
+                  T1 = T0 * VASCBE / diffVds;
+                  dVASCBE_dVg = T1 * dVdseff_dVg;
+                  dVASCBE_dVd = -T1 * (1.0 - dVdseff_dVd);
+                  dVASCBE_dVb = T1 * dVdseff_dVb;
+              }
+              else
+              {   VASCBE = MAX_EXP * Leff/pParam->BSIM3v32pscbe2;
+              }
+          }
+          else
+          {   VASCBE = MAX_EXP;
+          }
+
+/* Calculate Ids */
+          CoxWovL = model->BSIM3v32cox * Weff / Leff;
+          beta = ueff * CoxWovL;
+          dbeta_dVg = CoxWovL * dueff_dVg + beta * dWeff_dVg / Weff;
+          dbeta_dVd = CoxWovL * dueff_dVd;
+          dbeta_dVb = CoxWovL * dueff_dVb + beta * dWeff_dVb / Weff;
+
+          T0 = 1.0 - 0.5 * Abulk * Vdseff / Vgst2Vtm;
+          dT0_dVg = -0.5 * (Abulk * dVdseff_dVg
+                  - Abulk * Vdseff / Vgst2Vtm + Vdseff * dAbulk_dVg) / Vgst2Vtm;
+          dT0_dVd = -0.5 * Abulk * dVdseff_dVd / Vgst2Vtm;
+          dT0_dVb = -0.5 * (Abulk * dVdseff_dVb + dAbulk_dVb * Vdseff)
+                  / Vgst2Vtm;
+
+          fgche1 = Vgsteff * T0;
+          dfgche1_dVg = Vgsteff * dT0_dVg + T0;
+          dfgche1_dVd = Vgsteff * dT0_dVd;
+          dfgche1_dVb = Vgsteff * dT0_dVb;
+
+          T9 = Vdseff / EsatL;
+          fgche2 = 1.0 + T9;
+          dfgche2_dVg = (dVdseff_dVg - T9 * dEsatL_dVg) / EsatL;
+          dfgche2_dVd = (dVdseff_dVd - T9 * dEsatL_dVd) / EsatL;
+          dfgche2_dVb = (dVdseff_dVb - T9 * dEsatL_dVb) / EsatL;
+
+          gche = beta * fgche1 / fgche2;
+          dgche_dVg = (beta * dfgche1_dVg + fgche1 * dbeta_dVg
+                    - gche * dfgche2_dVg) / fgche2;
+          dgche_dVd = (beta * dfgche1_dVd + fgche1 * dbeta_dVd
+                    - gche * dfgche2_dVd) / fgche2;
+          dgche_dVb = (beta * dfgche1_dVb + fgche1 * dbeta_dVb
+                    - gche * dfgche2_dVb) / fgche2;
+
+          T0 = 1.0 + gche * Rds;
+          T9 = Vdseff / T0;
+          Idl = gche * T9;
+
+          dIdl_dVg = (gche * dVdseff_dVg + T9 * dgche_dVg) / T0
+                   - Idl * gche / T0 * dRds_dVg ;
+
+          dIdl_dVd = (gche * dVdseff_dVd + T9 * dgche_dVd) / T0;
+          dIdl_dVb = (gche * dVdseff_dVb + T9 * dgche_dVb
+                   - Idl * dRds_dVb * gche) / T0;
+
+          T9 =  diffVds / Va;
+          T0 =  1.0 + T9;
+          Idsa = Idl * T0;
+          dIdsa_dVg = T0 * dIdl_dVg - Idl * (dVdseff_dVg + T9 * dVa_dVg) / Va;
+          dIdsa_dVd = T0 * dIdl_dVd + Idl * (1.0 - dVdseff_dVd
+                    - T9 * dVa_dVd) / Va;
+          dIdsa_dVb = T0 * dIdl_dVb - Idl * (dVdseff_dVb + T9 * dVa_dVb) / Va;
+
+          T9 = diffVds / VASCBE;
+          T0 = 1.0 + T9;
+          Ids = Idsa * T0;
+
+          Gm = T0 * dIdsa_dVg - Idsa * (dVdseff_dVg + T9 * dVASCBE_dVg) / VASCBE;
+          Gds = T0 * dIdsa_dVd + Idsa * (1.0 - dVdseff_dVd
+              - T9 * dVASCBE_dVd) / VASCBE;
+          Gmb = T0 * dIdsa_dVb - Idsa * (dVdseff_dVb
+              + T9 * dVASCBE_dVb) / VASCBE;
+
+          Gds += Gm * dVgsteff_dVd;
+          Gmb += Gm * dVgsteff_dVb;
+          Gm *= dVgsteff_dVg;
+          Gmb *= dVbseff_dVb;
+
+          /* Substrate current begins */
+	  /* F.B: use local tmpuni instead of tmp */
+          tmpuni = pParam->BSIM3v32alpha0 + pParam->BSIM3v32alpha1 * Leff;
+          if ((tmpuni <= 0.0) || (pParam->BSIM3v32beta0 <= 0.0))
+          {   Isub = Gbd = Gbb = Gbg = 0.0;
+          }
+          else
+          {   T2 = tmpuni / Leff;
+              if (diffVds > pParam->BSIM3v32beta0 / EXP_THRESHOLD)
+              {   T0 = -pParam->BSIM3v32beta0 / diffVds;
+                  T1 = T2 * diffVds * exp(T0);
+                  T3 = T1 / diffVds * (T0 - 1.0);
+                  dT1_dVg = T3 * dVdseff_dVg;
+                  dT1_dVd = T3 * (dVdseff_dVd - 1.0);
+                  dT1_dVb = T3 * dVdseff_dVb;
+              }
+              else
+              {   T3 = T2 * MIN_EXP;
+                  T1 = T3 * diffVds;
+                  dT1_dVg = -T3 * dVdseff_dVg;
+                  dT1_dVd = T3 * (1.0 - dVdseff_dVd);
+                  dT1_dVb = -T3 * dVdseff_dVb;
+              }
+              Isub = T1 * Idsa;
+              Gbg = T1 * dIdsa_dVg + Idsa * dT1_dVg;
+              Gbd = T1 * dIdsa_dVd + Idsa * dT1_dVd;
+              Gbb = T1 * dIdsa_dVb + Idsa * dT1_dVb;
+
+              Gbd += Gbg * dVgsteff_dVd;
+              Gbb += Gbg * dVgsteff_dVb;
+              Gbg *= dVgsteff_dVg;
+              Gbb *= dVbseff_dVb; /* bug fixing */
+          }
+
+          cdrain = Ids;
+          here->BSIM3v32gds = Gds;
+          here->BSIM3v32gm = Gm;
+          here->BSIM3v32gmbs = Gmb;
+
+          here->BSIM3v32gbbs = Gbb;
+          here->BSIM3v32gbgs = Gbg;
+          here->BSIM3v32gbds = Gbd;
+
+          here->BSIM3v32csub = Isub;
+
+          /* BSIM3v32 thermal noise Qinv calculated from all capMod
+           * 0, 1, 2 & 3 stored in here->BSIM3v32qinv 1/1998 */
+
+	  /* F.B. moved CoxWL def to top-level and removed 4x duplicates inside ifs */
+#ifdef SIMDIFYCPP
+	  CoxWL = model->BSIM3v32cox * pParam->BSIM3v32weffCV
+                    * pParam->BSIM3v32leffCV;
+          One_Third_CoxWL = CoxWL / 3.0;
+          Two_Third_CoxWL = 2.0 * One_Third_CoxWL;
+	  #define _nu_One_Third_CoxWL (CoxWL / 3.0)	  
+	  #define _nu_Two_Third_CoxWL (CoxWL * 2 / 3.0)
+#endif
+          if ((model->BSIM3v32xpart < 0) | (!ChargeComputationNeeded))
+          {   qgate = 0.0;
+	      qdrn = 0.0;
+	      qsrc = 0.0;
+	      qbulk = 0.0;
+              here->BSIM3v32cggb = 0.0;
+	      here->BSIM3v32cgsb = 0.0;
+	      here->BSIM3v32cgdb = 0.0;
+              here->BSIM3v32cdgb = 0.0;
+              here->BSIM3v32cdsb = 0.0;
+              here->BSIM3v32cddb = 0.0;
+              here->BSIM3v32cbgb = 0.0;
+              here->BSIM3v32cbsb = 0.0;
+              here->BSIM3v32cbdb = 0.0;
+              here->BSIM3v32cqdb = 0.0;
+              here->BSIM3v32cqsb = 0.0;
+              here->BSIM3v32cqgb = 0.0;
+              here->BSIM3v32cqbb = 0.0;
+              here->BSIM3v32gtau = 0.0;
+              goto finished;
+          }
+          else if (model->BSIM3v32capMod == 0)
+          {
+              if (Vbseff < 0.0)
+              {   Vbseff = Vbs;
+                  dVbseff_dVb = 1.0;
+              }
+              else
+              {   Vbseff = pParam->BSIM3v32phi - Phis;
+                  dVbseff_dVb = -dPhis_dVb;
+              }
+
+              Vfb = pParam->BSIM3v32vfbcv;
+              Vth = Vfb + pParam->BSIM3v32phi + pParam->BSIM3v32k1ox * sqrtPhis;
+              Vgst = Vgs_eff - Vth;
+              dVth_dVb = pParam->BSIM3v32k1ox * dsqrtPhis_dVb;
+              dVgst_dVb = -dVth_dVb; /* F.B. set but not used ? */
+              dVgst_dVg = dVgs_eff_dVg; /* F.B. set but not used ? */
+
+              /* CoxWL = model->BSIM3v32cox * pParam->BSIM3v32weffCV
+                    * pParam->BSIM3v32leffCV; F.B. */
+              Arg1 = Vgs_eff - Vbseff - Vfb;
+
+              if (Arg1 <= 0.0)
+              {   qgate = CoxWL * Arg1;
+                  qbulk = -qgate; 
+                  qdrn = 0.0;
+
+                  here->BSIM3v32cggb = CoxWL * dVgs_eff_dVg;
+                  here->BSIM3v32cgdb = 0.0;
+                  here->BSIM3v32cgsb = CoxWL * (dVbseff_dVb - dVgs_eff_dVg);
+
+                  here->BSIM3v32cdgb = 0.0;
+                  here->BSIM3v32cddb = 0.0;
+                  here->BSIM3v32cdsb = 0.0;
+
+                  here->BSIM3v32cbgb = -CoxWL * dVgs_eff_dVg;
+                  here->BSIM3v32cbdb = 0.0;
+                  here->BSIM3v32cbsb = -here->BSIM3v32cgsb;
+                  here->BSIM3v32qinv = 0.0;
+              }
+              else if (Vgst <= 0.0)
+              {   T1 = 0.5 * pParam->BSIM3v32k1ox;
+                  T2 = sqrt(T1 * T1 + Arg1);
+                  qgate = CoxWL * pParam->BSIM3v32k1ox * (T2 - T1);
+                  qbulk = -qgate;
+                  qdrn = 0.0;
+
+                  T0 = CoxWL * T1 / T2;
+                  here->BSIM3v32cggb = T0 * dVgs_eff_dVg;
+                  here->BSIM3v32cgdb = 0.0;
+                  here->BSIM3v32cgsb = T0 * (dVbseff_dVb - dVgs_eff_dVg);
+
+                  here->BSIM3v32cdgb = 0.0;
+                  here->BSIM3v32cddb = 0.0;
+                  here->BSIM3v32cdsb = 0.0;
+
+                  here->BSIM3v32cbgb = -here->BSIM3v32cggb;
+                  here->BSIM3v32cbdb = 0.0;
+                  here->BSIM3v32cbsb = -here->BSIM3v32cgsb;
+                  here->BSIM3v32qinv = 0.0;
+              }
+              else
+              {   /*One_Third_CoxWL = CoxWL / 3.0;
+                  Two_Third_CoxWL = 2.0 * One_Third_CoxWL;*/
+
+                  AbulkCV = Abulk0 * pParam->BSIM3v32abulkCVfactor;
+                  dAbulkCV_dVb = pParam->BSIM3v32abulkCVfactor * dAbulk0_dVb;
+                  Vdsat = Vgst / AbulkCV;
+                  dVdsat_dVg = dVgs_eff_dVg / AbulkCV;
+                  dVdsat_dVb = - (Vdsat * dAbulkCV_dVb + dVth_dVb)/ AbulkCV;
+
+                  if (model->BSIM3v32xpart > 0.5)
+                  {   /* 0/100 Charge partition model */
+                      if (Vdsat <= Vds)
+                      {   /* saturation region */
+                          T1 = Vdsat / 3.0;
+                          qgate = CoxWL * (Vgs_eff - Vfb
+                                - pParam->BSIM3v32phi - T1);
+                          T2 = -Two_Third_CoxWL * Vgst;
+                          qbulk = -(qgate + T2);
+                          qdrn = 0.0;
+
+                          here->BSIM3v32cggb = One_Third_CoxWL * (3.0
+                                          - dVdsat_dVg) * dVgs_eff_dVg;
+                          T2 = -One_Third_CoxWL * dVdsat_dVb;
+                          here->BSIM3v32cgsb = -(here->BSIM3v32cggb + T2);
+                          here->BSIM3v32cgdb = 0.0;
+
+                          here->BSIM3v32cdgb = 0.0;
+                          here->BSIM3v32cddb = 0.0;
+                          here->BSIM3v32cdsb = 0.0;
+
+                          here->BSIM3v32cbgb = -(here->BSIM3v32cggb
+                                          - Two_Third_CoxWL * dVgs_eff_dVg);
+                          T3 = -(T2 + Two_Third_CoxWL * dVth_dVb);
+                          here->BSIM3v32cbsb = -(here->BSIM3v32cbgb + T3);
+                          here->BSIM3v32cbdb = 0.0;
+                          here->BSIM3v32qinv = -(qgate + qbulk);
+                      }
+                      else
+                      {   /* linear region */
+                          Alphaz = Vgst / Vdsat;
+                          T1 = 2.0 * Vdsat - Vds;
+                          T2 = Vds / (3.0 * T1);
+                          T3 = T2 * Vds;
+                          T9 = 0.25 * CoxWL;
+                          T4 = T9 * Alphaz;
+                          T7 = 2.0 * Vds - T1 - 3.0 * T3;
+                          T8 = T3 - T1 - 2.0 * Vds;
+                          qgate = CoxWL * (Vgs_eff - Vfb
+                                - pParam->BSIM3v32phi - 0.5 * (Vds - T3));
+                          T10 = T4 * T8;
+                          qdrn = T4 * T7;
+                          qbulk = -(qgate + qdrn + T10);
+
+                          T5 = T3 / T1;
+                          here->BSIM3v32cggb = CoxWL * (1.0 - T5 * dVdsat_dVg)
+                                          * dVgs_eff_dVg;
+                          T11 = -CoxWL * T5 * dVdsat_dVb;
+                          here->BSIM3v32cgdb = CoxWL * (T2 - 0.5 + 0.5 * T5);
+                          here->BSIM3v32cgsb = -(here->BSIM3v32cggb + T11
+                                          + here->BSIM3v32cgdb);
+                          T6 = 1.0 / Vdsat;
+                          dAlphaz_dVg = T6 * (1.0 - Alphaz * dVdsat_dVg);
+                          dAlphaz_dVb = -T6 * (dVth_dVb + Alphaz * dVdsat_dVb);
+                          T7 = T9 * T7;
+                          T8 = T9 * T8;
+                          T9 = 2.0 * T4 * (1.0 - 3.0 * T5);
+                          here->BSIM3v32cdgb = (T7 * dAlphaz_dVg - T9
+                                          * dVdsat_dVg) * dVgs_eff_dVg;
+                          T12 = T7 * dAlphaz_dVb - T9 * dVdsat_dVb;
+                          here->BSIM3v32cddb = T4 * (3.0 - 6.0 * T2 - 3.0 * T5);
+                          here->BSIM3v32cdsb = -(here->BSIM3v32cdgb + T12
+                                          + here->BSIM3v32cddb);
+
+                          T9 = 2.0 * T4 * (1.0 + T5);
+                          T10 = (T8 * dAlphaz_dVg - T9 * dVdsat_dVg)
+                              * dVgs_eff_dVg;
+                          T11 = T8 * dAlphaz_dVb - T9 * dVdsat_dVb;
+                          T12 = T4 * (2.0 * T2 + T5 - 1.0);
+                          T0 = -(T10 + T11 + T12);
+
+                          here->BSIM3v32cbgb = -(here->BSIM3v32cggb
+                                          + here->BSIM3v32cdgb + T10);
+                          here->BSIM3v32cbdb = -(here->BSIM3v32cgdb
+                                          + here->BSIM3v32cddb + T12);
+                          here->BSIM3v32cbsb = -(here->BSIM3v32cgsb
+                                          + here->BSIM3v32cdsb + T0);
+                          here->BSIM3v32qinv = -(qgate + qbulk);
+                      }
+                  }
+                  else if (model->BSIM3v32xpart < 0.5)
+                  {   /* 40/60 Charge partition model */
+                      if (Vds >= Vdsat)
+                      {   /* saturation region */
+                          T1 = Vdsat / 3.0;
+                          qgate = CoxWL * (Vgs_eff - Vfb
+                                - pParam->BSIM3v32phi - T1);
+                          T2 = -Two_Third_CoxWL * Vgst;
+                          qbulk = -(qgate + T2);
+                          qdrn = 0.4 * T2;
+
+                          here->BSIM3v32cggb = One_Third_CoxWL * (3.0
+                                          - dVdsat_dVg) * dVgs_eff_dVg;
+                          T2 = -One_Third_CoxWL * dVdsat_dVb;
+                          here->BSIM3v32cgsb = -(here->BSIM3v32cggb + T2);
+                          here->BSIM3v32cgdb = 0.0;
+
+                          T3 = 0.4 * Two_Third_CoxWL;
+                          here->BSIM3v32cdgb = -T3 * dVgs_eff_dVg;
+                          here->BSIM3v32cddb = 0.0;
+                          T4 = T3 * dVth_dVb;
+                          here->BSIM3v32cdsb = -(T4 + here->BSIM3v32cdgb);
+
+                          here->BSIM3v32cbgb = -(here->BSIM3v32cggb
+                                          - Two_Third_CoxWL * dVgs_eff_dVg);
+                          T3 = -(T2 + Two_Third_CoxWL * dVth_dVb);
+                          here->BSIM3v32cbsb = -(here->BSIM3v32cbgb + T3);
+                          here->BSIM3v32cbdb = 0.0;
+                          here->BSIM3v32qinv = -(qgate + qbulk);
+                      }
+                      else
+                      {   /* linear region  */
+                          Alphaz = Vgst / Vdsat;
+                          T1 = 2.0 * Vdsat - Vds;
+                          T2 = Vds / (3.0 * T1);
+                          T3 = T2 * Vds;
+                          T9 = 0.25 * CoxWL;
+                          T4 = T9 * Alphaz;
+                          qgate = CoxWL * (Vgs_eff - Vfb - pParam->BSIM3v32phi
+                                - 0.5 * (Vds - T3));
+
+                          T5 = T3 / T1;
+                          here->BSIM3v32cggb = CoxWL * (1.0 - T5 * dVdsat_dVg)
+                                          * dVgs_eff_dVg;
+                          tmp = -CoxWL * T5 * dVdsat_dVb;
+                          here->BSIM3v32cgdb = CoxWL * (T2 - 0.5 + 0.5 * T5);
+                          here->BSIM3v32cgsb = -(here->BSIM3v32cggb
+                                          + here->BSIM3v32cgdb + tmp);
+
+                          T6 = 1.0 / Vdsat;
+                          dAlphaz_dVg = T6 * (1.0 - Alphaz * dVdsat_dVg);
+                          dAlphaz_dVb = -T6 * (dVth_dVb + Alphaz * dVdsat_dVb);
+
+                          T6 = 8.0 * Vdsat * Vdsat - 6.0 * Vdsat * Vds
+                             + 1.2 * Vds * Vds;
+                          T8 = T2 / T1;
+                          T7 = Vds - T1 - T8 * T6;
+                          qdrn = T4 * T7;
+                          T7 *= T9;
+                          tmp = T8 / T1;
+                          tmp1 = T4 * (2.0 - 4.0 * tmp * T6
+                               + T8 * (16.0 * Vdsat - 6.0 * Vds));
+
+                          here->BSIM3v32cdgb = (T7 * dAlphaz_dVg - tmp1
+                                          * dVdsat_dVg) * dVgs_eff_dVg;
+                          T10 = T7 * dAlphaz_dVb - tmp1 * dVdsat_dVb;
+                          here->BSIM3v32cddb = T4 * (2.0 - (1.0 / (3.0 * T1
+                                          * T1) + 2.0 * tmp) * T6 + T8
+                                          * (6.0 * Vdsat - 2.4 * Vds));
+                          here->BSIM3v32cdsb = -(here->BSIM3v32cdgb
+                                          + T10 + here->BSIM3v32cddb);
+
+                          T7 = 2.0 * (T1 + T3);
+                          qbulk = -(qgate - T4 * T7);
+                          T7 *= T9;
+                          T0 = 4.0 * T4 * (1.0 - T5);
+                          T12 = (-T7 * dAlphaz_dVg - here->BSIM3v32cdgb
+                              - T0 * dVdsat_dVg) * dVgs_eff_dVg;
+                          T11 = -T7 * dAlphaz_dVb - T10 - T0 * dVdsat_dVb;
+                          T10 = -4.0 * T4 * (T2 - 0.5 + 0.5 * T5)
+                              - here->BSIM3v32cddb;
+                          tmp = -(T10 + T11 + T12);
+
+                          here->BSIM3v32cbgb = -(here->BSIM3v32cggb
+                                          + here->BSIM3v32cdgb + T12);
+                          here->BSIM3v32cbdb = -(here->BSIM3v32cgdb
+                                          + here->BSIM3v32cddb + T10); /* bug fix */
+                          here->BSIM3v32cbsb = -(here->BSIM3v32cgsb
+                                          + here->BSIM3v32cdsb + tmp);
+                          here->BSIM3v32qinv = -(qgate + qbulk);
+                      }
+                  }
+                  else
+                  {   /* 50/50 partitioning */
+                      if (Vds >= Vdsat)
+                      {   /* saturation region */
+                          T1 = Vdsat / 3.0;
+                          qgate = CoxWL * (Vgs_eff - Vfb
+                                - pParam->BSIM3v32phi - T1);
+                          T2 = -Two_Third_CoxWL * Vgst;
+                          qbulk = -(qgate + T2);
+                          qdrn = 0.5 * T2;
+
+                          here->BSIM3v32cggb = One_Third_CoxWL * (3.0
+                                          - dVdsat_dVg) * dVgs_eff_dVg;
+                          T2 = -One_Third_CoxWL * dVdsat_dVb;
+                          here->BSIM3v32cgsb = -(here->BSIM3v32cggb + T2);
+                          here->BSIM3v32cgdb = 0.0;
+
+                          here->BSIM3v32cdgb = -One_Third_CoxWL * dVgs_eff_dVg;
+                          here->BSIM3v32cddb = 0.0;
+                          T4 = One_Third_CoxWL * dVth_dVb;
+                          here->BSIM3v32cdsb = -(T4 + here->BSIM3v32cdgb);
+
+                          here->BSIM3v32cbgb = -(here->BSIM3v32cggb
+                                          - Two_Third_CoxWL * dVgs_eff_dVg);
+                          T3 = -(T2 + Two_Third_CoxWL * dVth_dVb);
+                          here->BSIM3v32cbsb = -(here->BSIM3v32cbgb + T3);
+                          here->BSIM3v32cbdb = 0.0;
+                          here->BSIM3v32qinv = -(qgate + qbulk);
+                      }
+                      else
+                      {   /* linear region */
+                          Alphaz = Vgst / Vdsat;
+                          T1 = 2.0 * Vdsat - Vds;
+                          T2 = Vds / (3.0 * T1);
+                          T3 = T2 * Vds;
+                          T9 = 0.25 * CoxWL;
+                          T4 = T9 * Alphaz;
+                          qgate = CoxWL * (Vgs_eff - Vfb - pParam->BSIM3v32phi
+                                - 0.5 * (Vds - T3));
+
+                          T5 = T3 / T1;
+                          here->BSIM3v32cggb = CoxWL * (1.0 - T5 * dVdsat_dVg)
+                                          * dVgs_eff_dVg;
+                          tmp = -CoxWL * T5 * dVdsat_dVb;
+                          here->BSIM3v32cgdb = CoxWL * (T2 - 0.5 + 0.5 * T5);
+                          here->BSIM3v32cgsb = -(here->BSIM3v32cggb
+                                          + here->BSIM3v32cgdb + tmp);
+
+                          T6 = 1.0 / Vdsat;
+                          dAlphaz_dVg = T6 * (1.0 - Alphaz * dVdsat_dVg);
+                          dAlphaz_dVb = -T6 * (dVth_dVb + Alphaz * dVdsat_dVb);
+
+                          T7 = T1 + T3;
+                          qdrn = -T4 * T7;
+                          qbulk = - (qgate + qdrn + qdrn);
+                          T7 *= T9;
+                          T0 = T4 * (2.0 * T5 - 2.0);
+
+                          here->BSIM3v32cdgb = (T0 * dVdsat_dVg - T7
+                                          * dAlphaz_dVg) * dVgs_eff_dVg;
+                          T12 = T0 * dVdsat_dVb - T7 * dAlphaz_dVb;
+                          here->BSIM3v32cddb = T4 * (1.0 - 2.0 * T2 - T5);
+                          here->BSIM3v32cdsb = -(here->BSIM3v32cdgb + T12
+                                          + here->BSIM3v32cddb);
+
+                          here->BSIM3v32cbgb = -(here->BSIM3v32cggb
+                                          + 2.0 * here->BSIM3v32cdgb);
+                          here->BSIM3v32cbdb = -(here->BSIM3v32cgdb
+                                          + 2.0 * here->BSIM3v32cddb);
+                          here->BSIM3v32cbsb = -(here->BSIM3v32cgsb
+                                          + 2.0 * here->BSIM3v32cdsb);
+                          here->BSIM3v32qinv = -(qgate + qbulk);
+                      }
+                  }
+              }
+          }
+          else
+          {   if (Vbseff < 0.0)
+              {   VbseffCV = Vbseff;
+                  dVbseffCV_dVb = 1.0;
+              }
+              else
+              {   VbseffCV = pParam->BSIM3v32phi - Phis;
+                  dVbseffCV_dVb = -dPhis_dVb;
+              }
+
+              /* CoxWL = model->BSIM3v32cox * pParam->BSIM3v32weffCV
+                    * pParam->BSIM3v32leffCV; F.B. */
+		
+              /* Seperate VgsteffCV with noff and voffcv */
+              noff = n * pParam->BSIM3v32noff;
+              dnoff_dVd = pParam->BSIM3v32noff * dn_dVd;
+              dnoff_dVb = pParam->BSIM3v32noff * dn_dVb;
+              T0 = Vtm * noff;
+              voffcv = pParam->BSIM3v32voffcv;
+              VgstNVt = (Vgst - voffcv) / T0;
+
+              if (VgstNVt > EXP_THRESHOLD)
+              {   Vgsteff = Vgst - voffcv;
+                  dVgsteff_dVg = dVgs_eff_dVg;
+                  dVgsteff_dVd = -dVth_dVd;
+                  dVgsteff_dVb = -dVth_dVb;
+              }
+              else if (VgstNVt < -EXP_THRESHOLD)
+              {   Vgsteff = T0 * log(1.0 + MIN_EXP);
+                  dVgsteff_dVg = 0.0;
+                  dVgsteff_dVd = Vgsteff / noff;
+                  dVgsteff_dVb = dVgsteff_dVd * dnoff_dVb;
+                  dVgsteff_dVd *= dnoff_dVd;
+              }
+              else
+              {   ExpVgst = exp(VgstNVt);
+                  Vgsteff = T0 * log(1.0 + ExpVgst);
+                  dVgsteff_dVg = ExpVgst / (1.0 + ExpVgst);
+                  dVgsteff_dVd = -dVgsteff_dVg * (dVth_dVd + (Vgst - voffcv)
+                               / noff * dnoff_dVd) + Vgsteff / noff * dnoff_dVd;
+                  dVgsteff_dVb = -dVgsteff_dVg * (dVth_dVb + (Vgst - voffcv)
+                               / noff * dnoff_dVb) + Vgsteff / noff * dnoff_dVb;
+                  dVgsteff_dVg *= dVgs_eff_dVg;
+              } /* End of VgsteffCV */
+
+              if (model->BSIM3v32capMod == 1)
+              {
+                  /* Added revision dependent code */
+                  switch (model->BSIM3v32intVersion) {
+                    case BSIM3v32V324:
+                    case BSIM3v32V323:
+                    case BSIM3v32V322:
+                      Vfb = here->BSIM3v32vfbzb;
+                      break;
+                    case BSIM3v32V32:
+                      Vfb = here->BSIM3v32vfbzb;
+                      dVfb_dVb = dVfb_dVd = 0.0;
+                      break;
+                    default:
+                      Vfb = Vth - pParam->BSIM3v32phi - pParam->BSIM3v32k1ox * sqrtPhis;
+                      dVfb_dVb = dVth_dVb - pParam->BSIM3v32k1ox * dsqrtPhis_dVb;
+                      dVfb_dVd = dVth_dVd;
+                  }
+
+                  Arg1 = Vgs_eff - VbseffCV - Vfb - Vgsteff;
+
+                  if (Arg1 <= 0.0)
+                  {   qgate = CoxWL * Arg1;
+                      Cgg = CoxWL * (dVgs_eff_dVg - dVgsteff_dVg);
+                      /* Added revision dependent code */
+                      switch (model->BSIM3v32intVersion) {
+                        case BSIM3v32V324:
+                        case BSIM3v32V323:
+                        case BSIM3v32V322:
+                          Cgd = -CoxWL * dVgsteff_dVd;
+                          Cgb = -CoxWL * (dVbseffCV_dVb + dVgsteff_dVb);
+                          break;
+                        case BSIM3v32V32:
+                        default:
+                          Cgd = -CoxWL * (dVfb_dVd + dVgsteff_dVd);
+                          Cgb = -CoxWL * (dVfb_dVb + dVbseffCV_dVb + dVgsteff_dVb);
+                      }
+                  }
+                  else
+                  {   T0 = 0.5 * pParam->BSIM3v32k1ox;
+                      T1 = sqrt(T0 * T0 + Arg1);
+                      T2 = CoxWL * T0 / T1;
+
+                      qgate = CoxWL * pParam->BSIM3v32k1ox * (T1 - T0);
+
+                      Cgg = T2 * (dVgs_eff_dVg - dVgsteff_dVg);
+                      /* Added revision dependent code */
+                      switch (model->BSIM3v32intVersion) {
+                        case BSIM3v32V324:
+                        case BSIM3v32V323:
+                        case BSIM3v32V322:
+                          Cgd = -T2 * dVgsteff_dVd;
+                          Cgb = -T2 * (dVbseffCV_dVb + dVgsteff_dVb);
+                          break;
+                        case BSIM3v32V32:
+                        default:
+                          Cgd = -T2 * (dVfb_dVd + dVgsteff_dVd);
+                          Cgb = -T2 * (dVfb_dVb + dVbseffCV_dVb + dVgsteff_dVb);
+                      }
+                  }
+                  qbulk = -qgate;
+                  Cbg = -Cgg;
+                  Cbd = -Cgd;
+                  Cbb = -Cgb;
+
+                  One_Third_CoxWL = CoxWL / 3.0;
+                  Two_Third_CoxWL = 2.0 * One_Third_CoxWL;
+                  AbulkCV = Abulk0 * pParam->BSIM3v32abulkCVfactor;
+                  dAbulkCV_dVb = pParam->BSIM3v32abulkCVfactor * dAbulk0_dVb;
+                  VdsatCV = Vgsteff / AbulkCV;
+                  if (VdsatCV < Vds)
+                  {   dVdsatCV_dVg = 1.0 / AbulkCV;
+                      dVdsatCV_dVb = -VdsatCV * dAbulkCV_dVb / AbulkCV;
+                      T0 = Vgsteff - VdsatCV / 3.0;
+                      dT0_dVg = 1.0 - dVdsatCV_dVg / 3.0;
+                      dT0_dVb = -dVdsatCV_dVb / 3.0;
+                      qgate += CoxWL * T0;
+                      Cgg1 = CoxWL * dT0_dVg;
+                      Cgb1 = CoxWL * dT0_dVb + Cgg1 * dVgsteff_dVb;
+                      Cgd1 = Cgg1 * dVgsteff_dVd;
+                      Cgg1 *= dVgsteff_dVg;
+                      Cgg += Cgg1;
+                      Cgb += Cgb1;
+                      Cgd += Cgd1;
+
+                      T0 = VdsatCV - Vgsteff;
+                      dT0_dVg = dVdsatCV_dVg - 1.0;
+                      dT0_dVb = dVdsatCV_dVb;
+                      qbulk += One_Third_CoxWL * T0;
+                      Cbg1 = One_Third_CoxWL * dT0_dVg;
+                      Cbb1 = One_Third_CoxWL * dT0_dVb + Cbg1 * dVgsteff_dVb;
+                      Cbd1 = Cbg1 * dVgsteff_dVd;
+                      Cbg1 *= dVgsteff_dVg;
+                      Cbg += Cbg1;
+                      Cbb += Cbb1;
+                      Cbd += Cbd1;
+
+                      if (model->BSIM3v32xpart > 0.5)
+                          T0 = -Two_Third_CoxWL;
+                      else if (model->BSIM3v32xpart < 0.5)
+                          T0 = -0.4 * CoxWL;
+                      else
+                          T0 = -One_Third_CoxWL;
+
+                      qsrc = T0 * Vgsteff;
+                      Csg = T0 * dVgsteff_dVg;
+                      Csb = T0 * dVgsteff_dVb;
+                      Csd = T0 * dVgsteff_dVd;
+                      Cgb *= dVbseff_dVb;
+                      Cbb *= dVbseff_dVb;
+                      Csb *= dVbseff_dVb;
+                  }
+                  else
+                  {   T0 = AbulkCV * Vds;
+                      T1 = 12.0 * (Vgsteff - 0.5 * T0 + 1.e-20);
+                      T2 = Vds / T1;
+                      T3 = T0 * T2;
+                      dT3_dVg = -12.0 * T2 * T2 * AbulkCV;
+                      dT3_dVd = 6.0 * T0 * (4.0 * Vgsteff - T0) / T1 / T1 - 0.5;
+                      dT3_dVb = 12.0 * T2 * T2 * dAbulkCV_dVb * Vgsteff;
+
+                      qgate += CoxWL * (Vgsteff - 0.5 * Vds + T3);
+                      Cgg1 = CoxWL * (1.0 + dT3_dVg);
+                      Cgb1 = CoxWL * dT3_dVb + Cgg1 * dVgsteff_dVb;
+                      Cgd1 = CoxWL * dT3_dVd + Cgg1 * dVgsteff_dVd;
+                      Cgg1 *= dVgsteff_dVg;
+                      Cgg += Cgg1;
+                      Cgb += Cgb1;
+                      Cgd += Cgd1;
+
+                      qbulk += CoxWL * (1.0 - AbulkCV) * (0.5 * Vds - T3);
+                      Cbg1 = -CoxWL * ((1.0 - AbulkCV) * dT3_dVg);
+                      Cbb1 = -CoxWL * ((1.0 - AbulkCV) * dT3_dVb
+                           + (0.5 * Vds - T3) * dAbulkCV_dVb)
+                           + Cbg1 * dVgsteff_dVb;
+                      Cbd1 = -CoxWL * (1.0 - AbulkCV) * dT3_dVd
+                           + Cbg1 * dVgsteff_dVd;
+                      Cbg1 *= dVgsteff_dVg;
+                      Cbg += Cbg1;
+                      Cbb += Cbb1;
+                      Cbd += Cbd1;
+
+                      if (model->BSIM3v32xpart > 0.5)
+                      {   /* 0/100 Charge petition model */
+                          T1 = T1 + T1;
+                          qsrc = -CoxWL * (0.5 * Vgsteff + 0.25 * T0
+                               - T0 * T0 / T1);
+                          Csg = -CoxWL * (0.5 + 24.0 * T0 * Vds / T1 / T1
+                              * AbulkCV);
+                          Csb = -CoxWL * (0.25 * Vds * dAbulkCV_dVb
+                              - 12.0 * T0 * Vds / T1 / T1 * (4.0 * Vgsteff - T0)
+                              * dAbulkCV_dVb) + Csg * dVgsteff_dVb;
+                          Csd = -CoxWL * (0.25 * AbulkCV - 12.0 * AbulkCV * T0
+                              / T1 / T1 * (4.0 * Vgsteff - T0))
+                              + Csg * dVgsteff_dVd;
+                          Csg *= dVgsteff_dVg;
+                      }
+                      else if (model->BSIM3v32xpart < 0.5)
+                      {   /* 40/60 Charge petition model */
+                          T1 = T1 / 12.0;
+                          T2 = 0.5 * CoxWL / (T1 * T1);
+                          T3 = Vgsteff * (2.0 * T0 * T0 / 3.0 + Vgsteff
+                             * (Vgsteff - 4.0 * T0 / 3.0))
+                             - 2.0 * T0 * T0 * T0 / 15.0;
+                          qsrc = -T2 * T3;
+                          T4 = 4.0 / 3.0 * Vgsteff * (Vgsteff - T0)
+                             + 0.4 * T0 * T0;
+                          Csg = -2.0 * qsrc / T1 - T2 * (Vgsteff * (3.0
+                              * Vgsteff - 8.0 * T0 / 3.0)
+                              + 2.0 * T0 * T0 / 3.0);
+                          Csb = (qsrc / T1 * Vds + T2 * T4 * Vds) * dAbulkCV_dVb
+                              + Csg * dVgsteff_dVb;
+                          Csd = (qsrc / T1 + T2 * T4) * AbulkCV
+                              + Csg * dVgsteff_dVd;
+                          Csg *= dVgsteff_dVg;
+                      }
+                      else
+                      {   /* 50/50 Charge petition model */
+                          qsrc = -0.5 * (qgate + qbulk);
+                          Csg = -0.5 * (Cgg1 + Cbg1);
+                          Csb = -0.5 * (Cgb1 + Cbb1);
+                          Csd = -0.5 * (Cgd1 + Cbd1);
+                      }
+                      Cgb *= dVbseff_dVb;
+                      Cbb *= dVbseff_dVb;
+                      Csb *= dVbseff_dVb;
+                  }
+                  qdrn = -(qgate + qbulk + qsrc);
+                  here->BSIM3v32cggb = Cgg;
+                  here->BSIM3v32cgsb = -(Cgg + Cgd + Cgb);
+                  here->BSIM3v32cgdb = Cgd;
+                  here->BSIM3v32cdgb = -(Cgg + Cbg + Csg);
+                  here->BSIM3v32cdsb = (Cgg + Cgd + Cgb + Cbg + Cbd + Cbb
+                                  + Csg + Csd + Csb);
+                  here->BSIM3v32cddb = -(Cgd + Cbd + Csd);
+                  here->BSIM3v32cbgb = Cbg;
+                  here->BSIM3v32cbsb = -(Cbg + Cbd + Cbb);
+                  here->BSIM3v32cbdb = Cbd;
+                  here->BSIM3v32qinv = -(qgate + qbulk);
+              }
+
+              else if (model->BSIM3v32capMod == 2)
+              {
+                  /* Added revision dependent code */
+                  switch (model->BSIM3v32intVersion) {
+                    case BSIM3v32V324:
+                    case BSIM3v32V323:
+                    case BSIM3v32V322:
+                      Vfb = here->BSIM3v32vfbzb;
+                      break;
+                    case BSIM3v32V32:
+                      Vfb = here->BSIM3v32vfbzb;
+                      dVfb_dVb = dVfb_dVd = 0.0;
+                      break;
+                    default:        /*  old code prior to v3.2 */
+                      Vfb = Vth - pParam->BSIM3v32phi - pParam->BSIM3v32k1ox * sqrtPhis;
+                      dVfb_dVb = dVth_dVb - pParam->BSIM3v32k1ox * dsqrtPhis_dVb;
+                      dVfb_dVd = dVth_dVd;
+                  }
+
+                  V3 = Vfb - Vgs_eff + VbseffCV - DELTA_3;
+		  T0 = V3 * V3; /* F.B.*/
+		  T2 = 4.0 * DELTA_3 * Vfb; /* F.B.*/
+                  if (Vfb <= 0.0)
+                  {   T0 = T0 - T2;
+		      T2 = -DELTA_3;
+                  }
+                  else
+                  {   T0 = T0 + T2;
+		      T2 = DELTA_3;
+                  }
+                  T0 = sqrt(T0); /* F.B. */
+		  T2 = T2 / T0;
+		  /* F.B. original code was
+		  if (Vfb <= 0.0)
+                  {   T0 = sqrt(V3 * V3 - 4.0 * DELTA_3 * Vfb);
+                      T2 = -DELTA_3 / T0;
+                  }
+                  else
+		  {   T0 = sqrt(V3 * V3 + 4.0 * DELTA_3 * Vfb);
+                      T2 = DELTA_3 / T0;
+                  }
+		  */
+                  T1 = 0.5 * (1.0 + V3 / T0);
+                  Vfbeff = Vfb - 0.5 * (V3 + T0);
+                  /* Added revision dependent code */
+                  switch (model->BSIM3v32intVersion) {
+                    case BSIM3v32V324:
+                    case BSIM3v32V323:
+                    case BSIM3v32V322:
+                      /* Do nothing */
+                      break;
+                    case BSIM3v32V32:
+                    default:
+                      dVfbeff_dVd = (1.0 - T1 - T2) * dVfb_dVd;
+                  }
+                  dVfbeff_dVg = T1 * dVgs_eff_dVg;
+                  /* Added revision dependent code */
+                  switch (model->BSIM3v32intVersion) {
+                    case BSIM3v32V324:
+                    case BSIM3v32V323:
+                    case BSIM3v32V322:
+                      dVfbeff_dVb = -T1 * dVbseffCV_dVb;
+                      break;
+                    case BSIM3v32V32:
+                    default:
+                      dVfbeff_dVb = (1.0 - T1 - T2) * dVfb_dVb - T1 * dVbseffCV_dVb;
+                  }
+                  Qac0 = CoxWL * (Vfbeff - Vfb);
+                  dQac0_dVg = CoxWL * dVfbeff_dVg;
+                  /* Added revision dependent code */
+                  switch (model->BSIM3v32intVersion) {
+                    case BSIM3v32V324:
+                    case BSIM3v32V323:
+                    case BSIM3v32V322:
+                      /* Do nothing */
+                      break;
+                    case BSIM3v32V32:
+                    default:
+                      dQac0_dVd = CoxWL * (dVfbeff_dVd - dVfb_dVd);
+                  }
+                  /* Added revision dependent code */
+                  switch (model->BSIM3v32intVersion) {
+                    case BSIM3v32V324:
+                    case BSIM3v32V323:
+                    case BSIM3v32V322:
+                      dQac0_dVb = CoxWL * dVfbeff_dVb;
+                      break;
+                    case BSIM3v32V32:
+                    default:
+                      dQac0_dVb = CoxWL * (dVfbeff_dVb - dVfb_dVb);
+                  }
+
+                  T0 = 0.5 * pParam->BSIM3v32k1ox;
+                  T3 = Vgs_eff - Vfbeff - VbseffCV - Vgsteff;
+                  if (pParam->BSIM3v32k1ox == 0.0)
+                  {   T1 = 0.0;
+                      T2 = 0.0;
+                  }
+                  else if (T3 < 0.0)
+                  {   T1 = T0 + T3 / pParam->BSIM3v32k1ox;
+                      T2 = CoxWL;
+                  }
+                  else
+                  {   T1 = sqrt(T0 * T0 + T3);
+                      T2 = CoxWL * T0 / T1;
+                  }
+
+                  Qsub0 = CoxWL * pParam->BSIM3v32k1ox * (T1 - T0);
+
+                  dQsub0_dVg = T2 * (dVgs_eff_dVg - dVfbeff_dVg - dVgsteff_dVg);
+                  /* Added revision dependent code */
+                  switch (model->BSIM3v32intVersion) {
+                    case BSIM3v32V324:
+                    case BSIM3v32V323:
+                    case BSIM3v32V322:
+                      dQsub0_dVd = -T2 * dVgsteff_dVd;
+                      break;
+                    case BSIM3v32V32:
+                    default:
+                      dQsub0_dVd = -T2 * (dVfbeff_dVd + dVgsteff_dVd);
+                  }
+                  dQsub0_dVb = -T2 * (dVfbeff_dVb + dVbseffCV_dVb
+                             + dVgsteff_dVb);
+
+                  AbulkCV = Abulk0 * pParam->BSIM3v32abulkCVfactor;
+                  dAbulkCV_dVb = pParam->BSIM3v32abulkCVfactor * dAbulk0_dVb;
+                  VdsatCV = Vgsteff / AbulkCV;
+
+                  V4 = VdsatCV - Vds - DELTA_4;
+                  T0 = sqrt(V4 * V4 + 4.0 * DELTA_4 * VdsatCV);
+                  VdseffCV = VdsatCV - 0.5 * (V4 + T0);
+                  T1 = 0.5 * (1.0 + V4 / T0);
+                  T2 = DELTA_4 / T0;
+                  T3 = (1.0 - T1 - T2) / AbulkCV;
+                  dVdseffCV_dVg = T3;
+                  dVdseffCV_dVd = T1;
+                  dVdseffCV_dVb = -T3 * VdsatCV * dAbulkCV_dVb;
+                  /* Added revision dependent code */
+                  switch (model->BSIM3v32intVersion) {
+                    case BSIM3v32V324:
+                    case BSIM3v32V323:
+                    case BSIM3v32V322:
+                      /* Added to eliminate non-zero VdseffCV at Vds=0.0 */
+                      if (Vds == 0.0)
+                        {
+                          VdseffCV = 0.0;
+                          dVdseffCV_dVg = 0.0;
+                          dVdseffCV_dVb = 0.0;
+                        }
+                      break;
+                    case BSIM3v32V32:
+                    default:
+                      /* Do nothing */
+                      break;
+                  }
+
+                  T0 = AbulkCV * VdseffCV;
+                  T1 = 12.0 * (Vgsteff - 0.5 * T0 + 1e-20);
+                  T2 = VdseffCV / T1;
+                  T3 = T0 * T2;
+
+                  T4 = (1.0 - 12.0 * T2 * T2 * AbulkCV);
+                  T5 = (6.0 * T0 * (4.0 * Vgsteff - T0) / (T1 * T1) - 0.5);
+                  T6 = 12.0 * T2 * T2 * Vgsteff;
+
+                  qinoi = -CoxWL * (Vgsteff - 0.5 * T0 + AbulkCV * T3);
+                  qgate = CoxWL * (Vgsteff - 0.5 * VdseffCV + T3);
+                  Cgg1 = CoxWL * (T4 + T5 * dVdseffCV_dVg);
+                  Cgd1 = CoxWL * T5 * dVdseffCV_dVd + Cgg1 * dVgsteff_dVd;
+                  Cgb1 = CoxWL * (T5 * dVdseffCV_dVb + T6 * dAbulkCV_dVb)
+                       + Cgg1 * dVgsteff_dVb;
+                  Cgg1 *= dVgsteff_dVg;
+
+                  T7 = 1.0 - AbulkCV;
+                  qbulk = CoxWL * T7 * (0.5 * VdseffCV - T3);
+                  T4 = -T7 * (T4 - 1.0);
+                  T5 = -T7 * T5;
+                  T6 = -(T7 * T6 + (0.5 * VdseffCV - T3));
+                  Cbg1 = CoxWL * (T4 + T5 * dVdseffCV_dVg);
+                  Cbd1 = CoxWL * T5 * dVdseffCV_dVd + Cbg1 * dVgsteff_dVd;
+                  Cbb1 = CoxWL * (T5 * dVdseffCV_dVb + T6 * dAbulkCV_dVb)
+                       + Cbg1 * dVgsteff_dVb;
+                  Cbg1 *= dVgsteff_dVg;
+
+                  if (model->BSIM3v32xpart > 0.5)
+                  {   /* 0/100 Charge petition model */
+                      T1 = T1 + T1;
+                      qsrc = -CoxWL * (0.5 * Vgsteff + 0.25 * T0
+                           - T0 * T0 / T1);
+                      T7 = (4.0 * Vgsteff - T0) / (T1 * T1);
+                      T4 = -(0.5 + 24.0 * T0 * T0 / (T1 * T1));
+                      T5 = -(0.25 * AbulkCV - 12.0 * AbulkCV * T0 * T7);
+                      T6 = -(0.25 * VdseffCV - 12.0 * T0 * VdseffCV * T7);
+                      Csg = CoxWL * (T4 + T5 * dVdseffCV_dVg);
+                      Csd = CoxWL * T5 * dVdseffCV_dVd + Csg * dVgsteff_dVd;
+                      Csb = CoxWL * (T5 * dVdseffCV_dVb + T6 * dAbulkCV_dVb)
+                          + Csg * dVgsteff_dVb;
+                      Csg *= dVgsteff_dVg;
+                  }
+                  else if (model->BSIM3v32xpart < 0.5)
+                  {   /* 40/60 Charge petition model */
+                      T1 = T1 / 12.0;
+                      T2 = 0.5 * CoxWL / (T1 * T1);
+                      T3 = Vgsteff * (2.0 * T0 * T0 / 3.0 + Vgsteff
+                         * (Vgsteff - 4.0 * T0 / 3.0))
+                         - 2.0 * T0 * T0 * T0 / 15.0;
+                      qsrc = -T2 * T3;
+                      T7 = 4.0 / 3.0 * Vgsteff * (Vgsteff - T0)
+                         + 0.4 * T0 * T0;
+                      T4 = -2.0 * qsrc / T1 - T2 * (Vgsteff * (3.0
+                         * Vgsteff - 8.0 * T0 / 3.0)
+                         + 2.0 * T0 * T0 / 3.0);
+                      T5 = (qsrc / T1 + T2 * T7) * AbulkCV;
+                      T6 = (qsrc / T1 * VdseffCV + T2 * T7 * VdseffCV);
+                      Csg = (T4 + T5 * dVdseffCV_dVg);
+                      Csd = T5 * dVdseffCV_dVd + Csg * dVgsteff_dVd;
+                      Csb = (T5 * dVdseffCV_dVb + T6 * dAbulkCV_dVb)
+                          + Csg * dVgsteff_dVb;
+                      Csg *= dVgsteff_dVg;
+                  }
+                  else
+                  {   /* 50/50 Charge petition model */
+                      qsrc = -0.5 * (qgate + qbulk);
+                      Csg = -0.5 * (Cgg1 + Cbg1);
+                      Csb = -0.5 * (Cgb1 + Cbb1);
+                      Csd = -0.5 * (Cgd1 + Cbd1);
+                  }
+
+                  qgate += Qac0 + Qsub0;
+                  qbulk -= (Qac0 + Qsub0);
+                  qdrn = -(qgate + qbulk + qsrc);
+
+                  Cgg = dQac0_dVg + dQsub0_dVg + Cgg1;
+                  /* Added revision dependent code */
+                  switch (model->BSIM3v32intVersion) {
+                    case BSIM3v32V324:
+                    case BSIM3v32V323:
+                    case BSIM3v32V322:
+                      Cgd = dQsub0_dVd + Cgd1;
+                      break;
+                    case BSIM3v32V32:
+                    default:
+                      Cgd = dQac0_dVd + dQsub0_dVd + Cgd1;
+                  }
+                  Cgb = dQac0_dVb + dQsub0_dVb + Cgb1;
+
+                  Cbg = Cbg1 - dQac0_dVg - dQsub0_dVg;
+                  /* Added revision dependent code */
+                  switch (model->BSIM3v32intVersion) {
+                    case BSIM3v32V324:
+                    case BSIM3v32V323:
+                    case BSIM3v32V322:
+                      Cbd = Cbd1 - dQsub0_dVd;
+                      break;
+                    case BSIM3v32V32:
+                    default:
+                      Cbd = Cbd1 - dQac0_dVd - dQsub0_dVd;
+                  }
+                  Cbb = Cbb1 - dQac0_dVb - dQsub0_dVb;
+
+                  Cgb *= dVbseff_dVb;
+                  Cbb *= dVbseff_dVb;
+                  Csb *= dVbseff_dVb;
+
+                  here->BSIM3v32cggb = Cgg;
+                  here->BSIM3v32cgsb = -(Cgg + Cgd + Cgb);
+                  here->BSIM3v32cgdb = Cgd;
+                  here->BSIM3v32cdgb = -(Cgg + Cbg + Csg);
+                  here->BSIM3v32cdsb = (Cgg + Cgd + Cgb + Cbg + Cbd + Cbb
+                                  + Csg + Csd + Csb);
+                  here->BSIM3v32cddb = -(Cgd + Cbd + Csd);
+                  here->BSIM3v32cbgb = Cbg;
+                  here->BSIM3v32cbsb = -(Cbg + Cbd + Cbb);
+                  here->BSIM3v32cbdb = Cbd;
+                  here->BSIM3v32qinv = qinoi;
+              }
+
+              /* New Charge-Thickness capMod (CTM) begins */
+              else if (model->BSIM3v32capMod == 3)
+              {   V3 = here->BSIM3v32vfbzb - Vgs_eff + VbseffCV - DELTA_3;
+                  T0 = V3 * V3;
+		  T2 = 4.0 * DELTA_3 * here->BSIM3v32vfbzb;
+		  if (here->BSIM3v32vfbzb <= 0.0)
+                  {   T0 = T0 - T2;
+		      T2 = -DELTA_3;
+                  }
+                  else
+                  {   T0 = T0 + T2;
+                      T2 = DELTA_3;
+                  }
+		  T0 = sqrt(T0);
+                  T2 = T2 / T0;
+		  /* if (here->BSIM3v32vfbzb <= 0.0)
+                  {   T0 = sqrt(V3 * V3 - 4.0 * DELTA_3 * here->BSIM3v32vfbzb);
+                      T2 = -DELTA_3 / T0;
+                  }
+                  else
+                  {   T0 = sqrt(V3 * V3 + 4.0 * DELTA_3 * here->BSIM3v32vfbzb);
+                      T2 = DELTA_3 / T0;
+                  } */
+
+                  T1 = 0.5 * (1.0 + V3 / T0);
+                  Vfbeff = here->BSIM3v32vfbzb - 0.5 * (V3 + T0);
+                  dVfbeff_dVg = T1 * dVgs_eff_dVg;
+                  dVfbeff_dVb = -T1 * dVbseffCV_dVb;
+
+                  Cox = model->BSIM3v32cox;
+                  Tox = 1.0e8 * model->BSIM3v32tox;
+                  T0 = (Vgs_eff - VbseffCV - here->BSIM3v32vfbzb) / Tox;
+                  dT0_dVg = dVgs_eff_dVg / Tox;
+                  dT0_dVb = -dVbseffCV_dVb / Tox;
+
+                  tmp = T0 * pParam->BSIM3v32acde;
+		  dTcen_dVg = dTcen_dVb = 0.0; /* F.B: moved from else clauses to default val */
+                  if ((-EXP_THRESHOLD < tmp) & (tmp < EXP_THRESHOLD))
+                  {   Tcen = pParam->BSIM3v32ldeb * exp(tmp);
+                      dTcen_dVg = pParam->BSIM3v32acde * Tcen;
+                      dTcen_dVb = dTcen_dVg * dT0_dVb;
+                      dTcen_dVg *= dT0_dVg;
+                  }
+                  else if (tmp <= -EXP_THRESHOLD)
+                  {   Tcen = pParam->BSIM3v32ldeb * MIN_EXP;
+                  }
+                  else
+                  {   Tcen = pParam->BSIM3v32ldeb * MAX_EXP;
+                  }
+
+                  LINK = 1.0e-3 * model->BSIM3v32tox;
+                  V3 = pParam->BSIM3v32ldeb - Tcen - LINK;
+                  V4 = sqrt(V3 * V3 + 4.0 * LINK * pParam->BSIM3v32ldeb);
+                  Tcen = pParam->BSIM3v32ldeb - 0.5 * (V3 + V4);
+                  T1 = 0.5 * (1.0 + V3 / V4);
+                  dTcen_dVg *= T1;
+                  dTcen_dVb *= T1;
+
+                  Ccen = EPSSI / Tcen;
+                  T2 = Cox / (Cox + Ccen);
+                  Coxeff = T2 * Ccen;
+                  T3 = -Ccen / Tcen;
+                  dCoxeff_dVg = T2 * T2 * T3;
+                  dCoxeff_dVb = dCoxeff_dVg * dTcen_dVb;
+                  dCoxeff_dVg *= dTcen_dVg;
+                  CoxWLcen = CoxWL * Coxeff / Cox;
+
+                  Qac0 = CoxWLcen * (Vfbeff - here->BSIM3v32vfbzb);
+                  QovCox = Qac0 / Coxeff;
+                  dQac0_dVg = CoxWLcen * dVfbeff_dVg
+                            + QovCox * dCoxeff_dVg;
+                  dQac0_dVb = CoxWLcen * dVfbeff_dVb
+                            + QovCox * dCoxeff_dVb;
+
+                  T0 = 0.5 * pParam->BSIM3v32k1ox;
+                  T3 = Vgs_eff - Vfbeff - VbseffCV - Vgsteff;
+                  if (pParam->BSIM3v32k1ox == 0.0)
+                  {   T1 = 0.0;
+                      T2 = 0.0;
+                  }
+                  else if (T3 < 0.0)
+                  {   T1 = T0 + T3 / pParam->BSIM3v32k1ox;
+                      T2 = CoxWLcen;
+                  }
+                  else
+                  {   T1 = sqrt(T0 * T0 + T3);
+                      T2 = CoxWLcen * T0 / T1;
+                  }
+
+                  Qsub0 = CoxWLcen * pParam->BSIM3v32k1ox * (T1 - T0);
+                  QovCox = Qsub0 / Coxeff;
+                  dQsub0_dVg = T2 * (dVgs_eff_dVg - dVfbeff_dVg - dVgsteff_dVg)
+                             + QovCox * dCoxeff_dVg;
+                  dQsub0_dVd = -T2 * dVgsteff_dVd;
+                  dQsub0_dVb = -T2 * (dVfbeff_dVb + dVbseffCV_dVb + dVgsteff_dVb)
+                             + QovCox * dCoxeff_dVb;
+
+                  /* Gate-bias dependent delta Phis begins */
+                  if (pParam->BSIM3v32k1ox <= 0.0)
+                  {   Denomi = 0.25 * pParam->BSIM3v32moin * Vtm;
+                      T0 = 0.5 * pParam->BSIM3v32sqrtPhi;
+                  }
+                  else
+                  {   Denomi = pParam->BSIM3v32moin * Vtm
+                             * pParam->BSIM3v32k1ox * pParam->BSIM3v32k1ox;
+                      T0 = pParam->BSIM3v32k1ox * pParam->BSIM3v32sqrtPhi;
+                  }
+                  T1 = 2.0 * T0 + Vgsteff;
+
+                  DeltaPhi = Vtm * log(1.0 + T1 * Vgsteff / Denomi);
+                  dDeltaPhi_dVg = 2.0 * Vtm * (T1 -T0) / (Denomi + T1 * Vgsteff);
+                  dDeltaPhi_dVd = dDeltaPhi_dVg * dVgsteff_dVd; /* F.B. Set but not used ? */
+                  dDeltaPhi_dVb = dDeltaPhi_dVg * dVgsteff_dVb; /* F.B. Set but not used ? */
+                  /* End of delta Phis */
+
+                  T3 = 4.0 * (Vth - here->BSIM3v32vfbzb - pParam->BSIM3v32phi);
+                  Tox += Tox;
+                  if (T3 >= 0.0)
+                  {
+                      /* Added revision dependent code */
+                      switch (model->BSIM3v32intVersion) {
+                        case BSIM3v32V324:
+                        case BSIM3v32V323:
+                        case BSIM3v32V322:
+                          T0 = (Vgsteff + T3) / Tox;
+                          dT0_dVd = (dVgsteff_dVd + 4.0 * dVth_dVd) / Tox;
+                          dT0_dVb = (dVgsteff_dVb + 4.0 * dVth_dVb) / Tox;
+                          break;
+                        case BSIM3v32V32:
+                        default:
+                          T0 = (Vgsteff + T3) / Tox;
+                      }
+                  }
+                  else
+                  {
+                      /* Added revision dependent code */
+                      switch (model->BSIM3v32intVersion) {
+                        case BSIM3v32V324:
+                        case BSIM3v32V323:
+                        case BSIM3v32V322:
+                          T0 = (Vgsteff + 1.0e-20) / Tox;
+                          dT0_dVd = dVgsteff_dVd / Tox;
+                          dT0_dVb = dVgsteff_dVb / Tox;
+                          break;
+                        case BSIM3v32V32:
+                        default:
+                          T0 = (Vgsteff + 1.0e-20) / Tox;
+                      }
+                  }
+                  /* tmp = exp(0.7 * log(T0)); */
+		  tmp = pow0p7(T0,0.7); /* F.B. use pow with special implementation */
+                  T1 = 1.0 + tmp;
+                  T2 = 0.7 * tmp / (T0 * Tox);
+                  Tcen = 1.9e-9 / T1;
+                  dTcen_dVg = -1.9e-9 * T2 / T1 /T1;
+                  /* Added revision dependent code */
+                  switch (model->BSIM3v32intVersion) {
+                    case BSIM3v32V324:
+                    case BSIM3v32V323:
+                    case BSIM3v32V322:
+                      dTcen_dVd = Tox * dTcen_dVg;
+                      dTcen_dVb = dTcen_dVd * dT0_dVb;
+                      dTcen_dVd *= dT0_dVd;
+                      break;
+                    case BSIM3v32V32:
+                    default:
+                      dTcen_dVd = dTcen_dVg * (4.0 * dVth_dVd + dVgsteff_dVd);
+                      dTcen_dVb = dTcen_dVg * (4.0 * dVth_dVb + dVgsteff_dVb);
+                  }
+                  dTcen_dVg *= dVgsteff_dVg;
+
+                  Ccen = EPSSI / Tcen;
+                  T0 = Cox / (Cox + Ccen);
+                  Coxeff = T0 * Ccen;
+                  T1 = -Ccen / Tcen;
+                  dCoxeff_dVg = T0 * T0 * T1;
+                  dCoxeff_dVd = dCoxeff_dVg * dTcen_dVd;
+                  dCoxeff_dVb = dCoxeff_dVg * dTcen_dVb;
+                  dCoxeff_dVg *= dTcen_dVg;
+                  CoxWLcen = CoxWL * Coxeff / Cox;
+
+                  AbulkCV = Abulk0 * pParam->BSIM3v32abulkCVfactor;
+                  dAbulkCV_dVb = pParam->BSIM3v32abulkCVfactor * dAbulk0_dVb;
+                  VdsatCV = (Vgsteff - DeltaPhi) / AbulkCV;
+                  V4 = VdsatCV - Vds - DELTA_4;
+                  T0 = sqrt(V4 * V4 + 4.0 * DELTA_4 * VdsatCV);
+                  VdseffCV = VdsatCV - 0.5 * (V4 + T0);
+                  T1 = 0.5 * (1.0 + V4 / T0);
+                  T2 = DELTA_4 / T0;
+                  T3 = (1.0 - T1 - T2) / AbulkCV;
+                  T4 = T3 * ( 1.0 - dDeltaPhi_dVg);
+                  dVdseffCV_dVg = T4;
+                  dVdseffCV_dVd = T1;
+                  dVdseffCV_dVb = -T3 * VdsatCV * dAbulkCV_dVb;
+                  /* Added revision dependent code */
+                  switch (model->BSIM3v32intVersion) {
+                    case BSIM3v32V324:
+                    case BSIM3v32V323:
+                    case BSIM3v32V322:
+                      /* Added to eliminate non-zero VdseffCV at Vds=0.0 */
+                      if (Vds == 0.0)
+                        {
+                          VdseffCV = 0.0;
+                          dVdseffCV_dVg = 0.0;
+                          dVdseffCV_dVb = 0.0;
+                        }
+                      break;
+                    case BSIM3v32V32:
+                    default:
+                      /* Do nothing */
+                      break;
+                  }
+
+                  T0 = AbulkCV * VdseffCV;
+                  T1 = Vgsteff - DeltaPhi;
+                  T2 = 12.0 * (T1 - 0.5 * T0 + 1.0e-20);
+                  T3 = T0 / T2;
+                  T4 = 1.0 - 12.0 * T3 * T3;
+                  T5 = AbulkCV * (6.0 * T0 * (4.0 * T1 - T0) / (T2 * T2) - 0.5);
+                  T6 = T5 * VdseffCV / AbulkCV;
+
+                  qgate = qinoi = CoxWLcen * (T1 - T0 * (0.5 - T3));
+                  QovCox = qgate / Coxeff;
+                  Cgg1 = CoxWLcen * (T4 * (1.0 - dDeltaPhi_dVg)
+                       + T5 * dVdseffCV_dVg);
+                  Cgd1 = CoxWLcen * T5 * dVdseffCV_dVd + Cgg1
+                       * dVgsteff_dVd + QovCox * dCoxeff_dVd;
+                  Cgb1 = CoxWLcen * (T5 * dVdseffCV_dVb + T6 * dAbulkCV_dVb)
+                       + Cgg1 * dVgsteff_dVb + QovCox * dCoxeff_dVb;
+                  Cgg1 = Cgg1 * dVgsteff_dVg + QovCox * dCoxeff_dVg;
+
+
+                  T7 = 1.0 - AbulkCV;
+                  T8 = T2 * T2;
+                  T9 = 12.0 * T7 * T0 * T0 / (T8 * AbulkCV);
+                  T10 = T9 * (1.0 - dDeltaPhi_dVg);
+                  T11 = -T7 * T5 / AbulkCV;
+                  T12 = -(T9 * T1 / AbulkCV + VdseffCV * (0.5 - T0 / T2));
+
+                  qbulk = CoxWLcen * T7 * (0.5 * VdseffCV - T0 * VdseffCV / T2);
+                  QovCox = qbulk / Coxeff;
+                  Cbg1 = CoxWLcen * (T10 + T11 * dVdseffCV_dVg);
+                  Cbd1 = CoxWLcen * T11 * dVdseffCV_dVd + Cbg1
+                       * dVgsteff_dVd + QovCox * dCoxeff_dVd;
+                  Cbb1 = CoxWLcen * (T11 * dVdseffCV_dVb + T12 * dAbulkCV_dVb)
+                       + Cbg1 * dVgsteff_dVb + QovCox * dCoxeff_dVb;
+                  Cbg1 = Cbg1 * dVgsteff_dVg + QovCox * dCoxeff_dVg;
+
+                  if (model->BSIM3v32xpart > 0.5)
+                  {   /* 0/100 partition */
+                      qsrc = -CoxWLcen * (T1 / 2.0 + T0 / 4.0
+                           - 0.5 * T0 * T0 / T2);
+                      QovCox = qsrc / Coxeff;
+                      T2 += T2;
+                      T3 = T2 * T2;
+                      T7 = -(0.25 - 12.0 * T0 * (4.0 * T1 - T0) / T3);
+                      T4 = -(0.5 + 24.0 * T0 * T0 / T3) * (1.0 - dDeltaPhi_dVg);
+                      T5 = T7 * AbulkCV;
+                      T6 = T7 * VdseffCV;
+
+                      Csg = CoxWLcen * (T4 + T5 * dVdseffCV_dVg);
+                      Csd = CoxWLcen * T5 * dVdseffCV_dVd + Csg * dVgsteff_dVd
+                          + QovCox * dCoxeff_dVd;
+                      Csb = CoxWLcen * (T5 * dVdseffCV_dVb + T6 * dAbulkCV_dVb)
+                          + Csg * dVgsteff_dVb + QovCox * dCoxeff_dVb;
+                      Csg = Csg * dVgsteff_dVg + QovCox * dCoxeff_dVg;
+                  }
+                  else if (model->BSIM3v32xpart < 0.5)
+                  {   /* 40/60 partition */
+                      T2 = T2 / 12.0;
+                      T3 = 0.5 * CoxWLcen / (T2 * T2);
+                      T4 = T1 * (2.0 * T0 * T0 / 3.0 + T1 * (T1 - 4.0
+                         * T0 / 3.0)) - 2.0 * T0 * T0 * T0 / 15.0;
+                      qsrc = -T3 * T4;
+                      QovCox = qsrc / Coxeff;
+                      T8 = 4.0 / 3.0 * T1 * (T1 - T0) + 0.4 * T0 * T0;
+                      T5 = -2.0 * qsrc / T2 - T3 * (T1 * (3.0 * T1 - 8.0
+                         * T0 / 3.0) + 2.0 * T0 * T0 / 3.0);
+                      T6 = AbulkCV * (qsrc / T2 + T3 * T8);
+                      T7 = T6 * VdseffCV / AbulkCV;
+
+                      Csg = T5 * (1.0 - dDeltaPhi_dVg) + T6 * dVdseffCV_dVg;
+                      Csd = Csg * dVgsteff_dVd + T6 * dVdseffCV_dVd
+                          + QovCox * dCoxeff_dVd;
+                      Csb = Csg * dVgsteff_dVb + T6 * dVdseffCV_dVb
+                          + T7 * dAbulkCV_dVb + QovCox * dCoxeff_dVb;
+                      Csg = Csg * dVgsteff_dVg + QovCox * dCoxeff_dVg;
+                  }
+                  else
+                  {   /* 50/50 partition */
+                      qsrc = -0.5 * qgate;
+                      Csg = -0.5 * Cgg1;
+                      Csd = -0.5 * Cgd1;
+                      Csb = -0.5 * Cgb1;
+                  }
+
+                  qgate += Qac0 + Qsub0 - qbulk;
+                  qbulk -= (Qac0 + Qsub0);
+                  qdrn = -(qgate + qbulk + qsrc);
+
+                  Cbg = Cbg1 - dQac0_dVg - dQsub0_dVg;
+                  Cbd = Cbd1 - dQsub0_dVd;
+                  Cbb = Cbb1 - dQac0_dVb - dQsub0_dVb;
+
+                  Cgg = Cgg1 - Cbg;
+                  Cgd = Cgd1 - Cbd;
+                  Cgb = Cgb1 - Cbb;
+                  Cgb *= dVbseff_dVb;
+                  Cbb *= dVbseff_dVb;
+                  Csb *= dVbseff_dVb;
+
+                  here->BSIM3v32cggb = Cgg;
+                  here->BSIM3v32cgsb = -(Cgg + Cgd + Cgb);
+                  here->BSIM3v32cgdb = Cgd;
+                  here->BSIM3v32cdgb = -(Cgg + Cbg + Csg);
+                  here->BSIM3v32cdsb = (Cgg + Cgd + Cgb + Cbg + Cbd + Cbb
+                                  + Csg + Csd + Csb);
+                  here->BSIM3v32cddb = -(Cgd + Cbd + Csd);
+                  here->BSIM3v32cbgb = Cbg;
+                  here->BSIM3v32cbsb = -(Cbg + Cbd + Cbb);
+                  here->BSIM3v32cbdb = Cbd;
+                  here->BSIM3v32qinv = -qinoi;
+              }  /* End of CTM */
+          }
+	  
+finished: 
+          /* Returning Values to Calling Routine */
+          /*
+           *  COMPUTE EQUIVALENT DRAIN CURRENT SOURCE
+           */
+	
+          here->BSIM3v32qgate = qgate;
+          here->BSIM3v32qbulk = qbulk;
+          here->BSIM3v32qdrn = qdrn;
+          here->BSIM3v32cd = cdrain;
+
+          if (ChargeComputationNeeded)
+          {   /*  charge storage elements
+               *  bulk-drain and bulk-source depletion capacitances
+               *  czbd : zero bias drain junction capacitance
+               *  czbs : zero bias source junction capacitance
+               *  czbdsw: zero bias drain junction sidewall capacitance
+                          along field oxide
+               *  czbssw: zero bias source junction sidewall capacitance
+                          along field oxide
+               *  czbdswg: zero bias drain junction sidewall capacitance
+                           along gate side
+               *  czbsswg: zero bias source junction sidewall capacitance
+                           along gate side
+               */
+	       
+              double nstate_qbs = *(ckt->CKTstate0 + here->BSIM3v32qbs); /* F.B. */
+	      double nstate_qbd = *(ckt->CKTstate0 + here->BSIM3v32qbd); /* F.B. */
+	      
+	      if (model->BSIM3v32acmMod == 0)
+              {
+                  /* Added revision dependent code */
+                  switch (model->BSIM3v32intVersion) {
+                    case BSIM3v32V324:
+                    case BSIM3v32V323:
+                      czbd = model->BSIM3v32unitAreaTempJctCap * here->BSIM3v32drainArea;        /*bug fix */
+                      czbs = model->BSIM3v32unitAreaTempJctCap * here->BSIM3v32sourceArea;
+                      break;
+                    case BSIM3v32V322:
+                    case BSIM3v32V32:
+                    default:
+                      czbd = model->BSIM3v32unitAreaJctCap * here->BSIM3v32drainArea;
+                      czbs = model->BSIM3v32unitAreaJctCap * here->BSIM3v32sourceArea;
+                  }
+                  
+                  if (here->BSIM3v32drainPerimeter < pParam->BSIM3v32weff)
+                  {
+                      czbdsw = 0.0;
+                      /* Added revision dependent code */
+                      switch (model->BSIM3v32intVersion) {
+                        case BSIM3v32V324:
+                        case BSIM3v32V323:
+                          czbdswg = model->BSIM3v32unitLengthGateSidewallTempJctCap
+                            * here->BSIM3v32drainPerimeter;
+                          break;
+                        case BSIM3v32V322:
+                        case BSIM3v32V32:
+                        default:
+                          czbdswg = model->BSIM3v32unitLengthGateSidewallJctCap
+                            * here->BSIM3v32drainPerimeter;
+                      }
+                  }
+                  else
+                  {
+                      /* Added revision dependent code */
+                      switch (model->BSIM3v32intVersion) {
+                        case BSIM3v32V324:
+                        case BSIM3v32V323:
+                          czbdsw = model->BSIM3v32unitLengthSidewallTempJctCap
+                                 * (here->BSIM3v32drainPerimeter - pParam->BSIM3v32weff);
+                          czbdswg = model->BSIM3v32unitLengthGateSidewallTempJctCap
+                                  *  pParam->BSIM3v32weff;
+                          break;
+                        case BSIM3v32V322:
+                        case BSIM3v32V32:
+                        default:
+                          czbdsw = model->BSIM3v32unitLengthSidewallJctCap
+                                 * (here->BSIM3v32drainPerimeter - pParam->BSIM3v32weff);
+                          czbdswg = model->BSIM3v32unitLengthGateSidewallJctCap
+                                  *  pParam->BSIM3v32weff;
+                      }
+                  }
+                  if (here->BSIM3v32sourcePerimeter < pParam->BSIM3v32weff)
+                  {
+                      czbssw = 0.0;
+                      /* Added revision dependent code */
+                      switch (model->BSIM3v32intVersion) {
+                        case BSIM3v32V324:
+                        case BSIM3v32V323:
+                          czbsswg = model->BSIM3v32unitLengthGateSidewallTempJctCap
+                            * here->BSIM3v32sourcePerimeter;
+                          break;
+                        case BSIM3v32V322:
+                        case BSIM3v32V32:
+                        default:
+                          czbsswg = model->BSIM3v32unitLengthGateSidewallJctCap
+                            * here->BSIM3v32sourcePerimeter;
+                      }
+                  }
+                  else
+                  {
+                      /* Added revision dependent code */
+                      switch (model->BSIM3v32intVersion) {
+                        case BSIM3v32V324:
+                        case BSIM3v32V323:
+                          czbssw = model->BSIM3v32unitLengthSidewallTempJctCap
+                            * (here->BSIM3v32sourcePerimeter - pParam->BSIM3v32weff);
+                          czbsswg = model->BSIM3v32unitLengthGateSidewallTempJctCap
+                            * pParam->BSIM3v32weff;
+                          break;
+                        case BSIM3v32V322:
+                        case BSIM3v32V32:
+                        default:
+                          czbssw = model->BSIM3v32unitLengthSidewallJctCap
+                            * (here->BSIM3v32sourcePerimeter - pParam->BSIM3v32weff);
+                          czbsswg = model->BSIM3v32unitLengthGateSidewallJctCap
+                            * pParam->BSIM3v32weff;
+                      }
+                  }
+
+              } else {
+                  /* Added revision dependent code */
+		  /* F.B. made inline helper function easier to vectorize */
+		  error = BSIM3v32_ACM_junctionCapacitances(
+		      model,
+		      here,
+		      &czbd,
+                      &czbdsw,
+                      &czbdswg,
+                      &czbs,
+                      &czbssw,
+                      &czbsswg
+                      );
+                  if (SIMDANY(error))
+                      return(error);
+              }
+
+              MJ = model->BSIM3v32bulkJctBotGradingCoeff;
+              MJSW = model->BSIM3v32bulkJctSideGradingCoeff;
+              MJSWG = model->BSIM3v32bulkJctGateSideGradingCoeff;
+
+              /* Source Bulk Junction */
+              if (vbs == 0.0)
+              {   nstate_qbs = 0.0;
+                  here->BSIM3v32capbs = czbs + czbssw + czbsswg;
+              }
+              else if (vbs < 0.0)
+              {   if (czbs > 0.0)
+                  {   arg = 1.0 - vbs / model->BSIM3v32PhiB;
+                      if (MJ == 0.5)
+                          sarg = 1.0 / sqrt(arg);
+                      else
+                          sarg = powMJ(arg,-MJ); /* F.B. */
+			  /* sarg = exp(-MJ * log(arg)); */
+                      nstate_qbs = model->BSIM3v32PhiB * czbs
+                                       * (1.0 - arg * sarg) / (1.0 - MJ);
+                      here->BSIM3v32capbs = czbs * sarg;
+                  }
+                  else
+                  {   nstate_qbs = 0.0;
+                      here->BSIM3v32capbs = 0.0;
+                  }
+                  if (czbssw > 0.0)
+                  {   arg = 1.0 - vbs / model->BSIM3v32PhiBSW;
+                      if (MJSW == 0.5)
+                          sarg = 1.0 / sqrt(arg);
+                      else
+                          sarg = powMJSW(arg,-MJSW); /* F.B. */
+			  /* sarg = exp(-MJSW * log(arg)); */
+                      nstate_qbs += model->BSIM3v32PhiBSW * czbssw
+                                       * (1.0 - arg * sarg) / (1.0 - MJSW);
+                      here->BSIM3v32capbs += czbssw * sarg;
+                  }
+                  if (czbsswg > 0.0)
+                  {   arg = 1.0 - vbs / model->BSIM3v32PhiBSWG;
+                      if (MJSWG == 0.5)
+                          sarg = 1.0 / sqrt(arg);
+                      else
+                          sarg = powMJSWG(arg, -MJSWG); /* F.B: */
+			  /* sarg = exp(-MJSWG * log(arg)); */
+                      nstate_qbs += model->BSIM3v32PhiBSWG * czbsswg
+                                       * (1.0 - arg * sarg) / (1.0 - MJSWG);
+                      here->BSIM3v32capbs += czbsswg * sarg;
+                  }
+
+              }
+              else
+              {   T0 = czbs + czbssw + czbsswg;
+                  T1 = vbs * (czbs * MJ / model->BSIM3v32PhiB + czbssw * MJSW
+                     / model->BSIM3v32PhiBSW + czbsswg * MJSWG / model->BSIM3v32PhiBSWG);
+                  nstate_qbs = vbs * (T0 + 0.5 * T1);
+                  here->BSIM3v32capbs = T0 + T1;
+              }
+              *(ckt->CKTstate0 + here->BSIM3v32qbs) = nstate_qbs; /* F.B. moved memory write outside ifs */
+	      
+              /* Drain Bulk Junction */
+              if (vbd == 0.0)
+              {   nstate_qbd = 0.0;
+                  here->BSIM3v32capbd = czbd + czbdsw + czbdswg;
+              }
+              else if (vbd < 0.0)
+              {   if (czbd > 0.0)
+                  {   arg = 1.0 - vbd / model->BSIM3v32PhiB;
+                      if (MJ == 0.5)
+                          sarg = 1.0 / sqrt(arg);
+                      else
+                          sarg = powMJ(arg, -MJ); /* F.B: */
+			  /* sarg = exp(-MJ * log(arg)); */
+                      nstate_qbd = model->BSIM3v32PhiB * czbd
+                                       * (1.0 - arg * sarg) / (1.0 - MJ);
+                      here->BSIM3v32capbd = czbd * sarg;
+                  }
+                  else
+                  {   nstate_qbd = 0.0;
+                      here->BSIM3v32capbd = 0.0;
+                  }
+                  if (czbdsw > 0.0)
+                  {   arg = 1.0 - vbd / model->BSIM3v32PhiBSW;
+                      if (MJSW == 0.5)
+                          sarg = 1.0 / sqrt(arg);
+                      else
+                          sarg = powMJSW(arg,-MJSW);
+			  /* sarg = exp(-MJSW * log(arg)); */
+                      nstate_qbd += model->BSIM3v32PhiBSW * czbdsw
+                                       * (1.0 - arg * sarg) / (1.0 - MJSW);
+                      here->BSIM3v32capbd += czbdsw * sarg;
+                  }
+                  if (czbdswg > 0.0)
+                  {   arg = 1.0 - vbd / model->BSIM3v32PhiBSWG;
+                      if (MJSWG == 0.5)
+                          sarg = 1.0 / sqrt(arg);
+                      else
+                          sarg = powMJSWG(arg,-MJSWG);
+			  /* sarg = exp(-MJSWG * log(arg)); */
+                      nstate_qbd += model->BSIM3v32PhiBSWG * czbdswg
+                                       * (1.0 - arg * sarg) / (1.0 - MJSWG);
+                      here->BSIM3v32capbd += czbdswg * sarg;
+                  }
+              }
+              else
+              {   T0 = czbd + czbdsw + czbdswg;
+                  T1 = vbd * (czbd * MJ / model->BSIM3v32PhiB + czbdsw * MJSW
+                     / model->BSIM3v32PhiBSW + czbdswg * MJSWG / model->BSIM3v32PhiBSWG);
+                  nstate_qbd = vbd * (T0 + 0.5 * T1);
+                  here->BSIM3v32capbd = T0 + T1;
+              }
+              *(ckt->CKTstate0 + here->BSIM3v32qbd) = nstate_qbd; /* F.B. moved memory write outside ifs */
+	  }
+          
+	  
+          /*
+           *  check convergence
+           */
+          if ((here->BSIM3v32off == 0) || (!(ckt->CKTmode & MODEINITFIX)))
+          {   
+#ifndef NEWCONV	  
+	      double tol;
+              double Idtot, Ibtot;
+#endif /* NEWCONV */      
+	      int nonconcount;
+	      nonconcount = Check;
+#ifndef NEWCONV	      
+		  Idtot = here->BSIM3v32cd - here->BSIM3v32cbd;
+		  if (BSIM3v32mode) /* F.B. >= 0 removed */
+		  	Idtot += here->BSIM3v32cbd; /*F.B: rewrite */
+	      	 
+                  tol = ckt->CKTreltol * MAX(fabs(cdhat), fabs(Idtot))
+                      + ckt->CKTabstol;
+		  
+		  /* F.B. rewrite for simd */
+		  nonconcount = nonconcount | (fabs(cdhat - Idtot) >= tol);
+		  Ibtot = here->BSIM3v32cbs + here->BSIM3v32cbd - here->BSIM3v32csub;
+		  tol = ckt->CKTreltol * MAX(fabs(cbhat), fabs(Ibtot))
+                          + ckt->CKTabstol;
+		  nonconcount = nonconcount | (fabs(cbhat - Ibtot) > tol);
+#endif /* NEWCONV */	
+             #ifdef USE_OMP
+	     nonconcount = nonconcount & 1;
+	     here->BSIM3v32noncon = nonconcount;
+	     #else
+	     ckt->CKTnoncon += SIMDCOUNT(nonconcount);
+	     #endif
+          }
+	  #ifdef USE_OMP
+	  else
+	  	here->BSIM3v32noncon=0;
+	  #endif
+	 
+          *(ckt->CKTstate0 + here->BSIM3v32vbs) = vbs;
+          *(ckt->CKTstate0 + here->BSIM3v32vbd) = vbd;
+          *(ckt->CKTstate0 + here->BSIM3v32vgs) = vgs;
+          *(ckt->CKTstate0 + here->BSIM3v32vds) = vds;
+          *(ckt->CKTstate0 + here->BSIM3v32qdef) = qdef;
+	  
+          /* bulk and channel charge plus overlaps */
+	   
+          if (!ChargeComputationNeeded)
+              goto line850;
+
+#ifndef NOBYPASS
+line755:
+#endif
+          /* NQS begins */
+          if (here->BSIM3v32nqsMod)
+          {   qcheq = -(qbulk + qgate);
+
+              here->BSIM3v32cqgb = -(here->BSIM3v32cggb + here->BSIM3v32cbgb);
+              here->BSIM3v32cqdb = -(here->BSIM3v32cgdb + here->BSIM3v32cbdb);
+              here->BSIM3v32cqsb = -(here->BSIM3v32cgsb + here->BSIM3v32cbsb);
+              here->BSIM3v32cqbb = -(here->BSIM3v32cqgb + here->BSIM3v32cqdb
+                              + here->BSIM3v32cqsb);
+
+              gtau_drift = fabs(here->BSIM3v32tconst * qcheq) * ScalingFactor;
+              T0 = pParam->BSIM3v32leffCV * pParam->BSIM3v32leffCV;
+              gtau_diff = 16.0 * here->BSIM3v32u0temp * model->BSIM3v32vtm / T0
+                        * ScalingFactor;
+              here->BSIM3v32gtau =  gtau_drift + gtau_diff;
+          }
+
+          if (model->BSIM3v32capMod == 0)
+          {
+              /* code merge -JX */
+              cgdo = pParam->BSIM3v32cgdo;
+              qgdo = pParam->BSIM3v32cgdo * vgd;
+              cgso = pParam->BSIM3v32cgso;
+              qgso = pParam->BSIM3v32cgso * vgs;
+          }
+          else if (model->BSIM3v32capMod == 1)
+          {   if (vgd < 0.0)
+              {   T1 = sqrt(1.0 - 4.0 * vgd / pParam->BSIM3v32ckappa);
+                  cgdo = pParam->BSIM3v32cgdo + pParam->BSIM3v32weffCV
+                       * pParam->BSIM3v32cgdl / T1;
+                  qgdo = pParam->BSIM3v32cgdo * vgd - pParam->BSIM3v32weffCV * 0.5
+                       * pParam->BSIM3v32cgdl * pParam->BSIM3v32ckappa * (T1 - 1.0);
+              }
+              else
+              {   cgdo = pParam->BSIM3v32cgdo + pParam->BSIM3v32weffCV
+                       * pParam->BSIM3v32cgdl;
+                  qgdo = (pParam->BSIM3v32weffCV * pParam->BSIM3v32cgdl
+                       + pParam->BSIM3v32cgdo) * vgd;
+              }
+
+              if (vgs < 0.0)
+              {   T1 = sqrt(1.0 - 4.0 * vgs / pParam->BSIM3v32ckappa);
+                  cgso = pParam->BSIM3v32cgso + pParam->BSIM3v32weffCV
+                       * pParam->BSIM3v32cgsl / T1;
+                  qgso = pParam->BSIM3v32cgso * vgs - pParam->BSIM3v32weffCV * 0.5
+                       * pParam->BSIM3v32cgsl * pParam->BSIM3v32ckappa * (T1 - 1.0);
+              }
+              else
+              {   cgso = pParam->BSIM3v32cgso + pParam->BSIM3v32weffCV
+                       * pParam->BSIM3v32cgsl;
+                  qgso = (pParam->BSIM3v32weffCV * pParam->BSIM3v32cgsl
+                       + pParam->BSIM3v32cgso) * vgs;
+              }
+          }
+          else
+          {   T0 = vgd + DELTA_1;
+              T1 = sqrt(T0 * T0 + 4.0 * DELTA_1);
+              T2 = 0.5 * (T0 - T1);
+
+              T3 = pParam->BSIM3v32weffCV * pParam->BSIM3v32cgdl;
+              T4 = sqrt(1.0 - 4.0 * T2 / pParam->BSIM3v32ckappa);
+              cgdo = pParam->BSIM3v32cgdo + T3 - T3 * (1.0 - 1.0 / T4)
+                   * (0.5 - 0.5 * T0 / T1);
+              qgdo = (pParam->BSIM3v32cgdo + T3) * vgd - T3 * (T2
+                   + 0.5 * pParam->BSIM3v32ckappa * (T4 - 1.0));
+
+              T0 = vgs + DELTA_1;
+              T1 = sqrt(T0 * T0 + 4.0 * DELTA_1);
+              T2 = 0.5 * (T0 - T1);
+              T3 = pParam->BSIM3v32weffCV * pParam->BSIM3v32cgsl;
+              T4 = sqrt(1.0 - 4.0 * T2 / pParam->BSIM3v32ckappa);
+              cgso = pParam->BSIM3v32cgso + T3 - T3 * (1.0 - 1.0 / T4)
+                   * (0.5 - 0.5 * T0 / T1);
+              qgso = (pParam->BSIM3v32cgso + T3) * vgs - T3 * (T2
+                   + 0.5 * pParam->BSIM3v32ckappa * (T4 - 1.0));
+          }
+
+          here->BSIM3v32cgdo = cgdo;
+          here->BSIM3v32cgso = cgso;
+ 
+          ag0 = ckt->CKTag[0];
+	  /* F.B. moved from if/else arms to default val */
+	  ddxpart_dVd = ddxpart_dVg = ddxpart_dVb = ddxpart_dVs = 0.0;
+          dsxpart_dVd = dsxpart_dVg = dsxpart_dVb = dsxpart_dVs = 0.0;
+	  ggtg = ggtd = ggtb = ggts = 0.0;
+          if (BSIM3v32mode) /* F.B. > 0 removed */
+          {   if (here->BSIM3v32nqsMod == 0)
+              {   gcggb = (here->BSIM3v32cggb + cgdo + cgso
+                        + pParam->BSIM3v32cgbo ) * ag0;
+                  gcgdb = (here->BSIM3v32cgdb - cgdo) * ag0;
+                  gcgsb = (here->BSIM3v32cgsb - cgso) * ag0;
+
+                  gcdgb = (here->BSIM3v32cdgb - cgdo) * ag0;
+                  gcddb = (here->BSIM3v32cddb + here->BSIM3v32capbd + cgdo) * ag0;
+                  gcdsb = here->BSIM3v32cdsb * ag0;
+
+                  gcsgb = -(here->BSIM3v32cggb + here->BSIM3v32cbgb
+                        + here->BSIM3v32cdgb + cgso) * ag0;
+                  gcsdb = -(here->BSIM3v32cgdb + here->BSIM3v32cbdb
+                        + here->BSIM3v32cddb) * ag0;
+                  gcssb = (here->BSIM3v32capbs + cgso - (here->BSIM3v32cgsb
+                        + here->BSIM3v32cbsb + here->BSIM3v32cdsb)) * ag0;
+
+                  gcbgb = (here->BSIM3v32cbgb - pParam->BSIM3v32cgbo) * ag0;
+                  gcbdb = (here->BSIM3v32cbdb - here->BSIM3v32capbd) * ag0;
+                  gcbsb = (here->BSIM3v32cbsb - here->BSIM3v32capbs) * ag0;
+
+                  qgd = qgdo;
+                  qgs = qgso;
+                  qgb = pParam->BSIM3v32cgbo * vgb;
+                  qgate += qgd + qgs + qgb;
+                  qbulk -= qgb;
+                  qdrn -= qgd;
+                  qsrc = -(qgate + qbulk + qdrn);
+
+                  /* ggtg = ggtd = ggtb = ggts = 0.0; */
+                  sxpart = 0.6;
+                  dxpart = 0.4;
+                  /* ddxpart_dVd = ddxpart_dVg = ddxpart_dVb = ddxpart_dVs = 0.0;
+                  dsxpart_dVd = dsxpart_dVg = dsxpart_dVb = dsxpart_dVs = 0.0; */
+              }
+              else
+              {   if (qcheq > 0.0)
+                      T0 = here->BSIM3v32tconst * qdef * ScalingFactor;
+                  else
+                      T0 = -here->BSIM3v32tconst * qdef * ScalingFactor;
+                  ggtg = T0 * here->BSIM3v32cqgb;
+		  here->BSIM3v32gtg = ggtg;
+                  ggtd = T0 * here->BSIM3v32cqdb;
+		  here->BSIM3v32gtd = ggtd;
+                  ggts = T0 * here->BSIM3v32cqsb;
+		  here->BSIM3v32gts = ggts;
+                  ggtb = T0 * here->BSIM3v32cqbb;
+		  here->BSIM3v32gtb = ggtb;
+                  gqdef = ScalingFactor * ag0;
+
+                  gcqgb = here->BSIM3v32cqgb * ag0;
+                  gcqdb = here->BSIM3v32cqdb * ag0;
+                  gcqsb = here->BSIM3v32cqsb * ag0;
+                  gcqbb = here->BSIM3v32cqbb * ag0;
+
+                  gcggb = (cgdo + cgso + pParam->BSIM3v32cgbo ) * ag0;
+                  gcgdb = -cgdo * ag0;
+                  gcgsb = -cgso * ag0;
+
+                  gcdgb = -cgdo * ag0;
+                  gcddb = (here->BSIM3v32capbd + cgdo) * ag0;
+                  gcdsb = 0.0;
+
+                  gcsgb = -cgso * ag0;
+                  gcsdb = 0.0;
+                  gcssb = (here->BSIM3v32capbs + cgso) * ag0;
+
+                  gcbgb = -pParam->BSIM3v32cgbo * ag0;
+                  gcbdb = -here->BSIM3v32capbd * ag0;
+                  gcbsb = -here->BSIM3v32capbs * ag0;
+
+                  /* CoxWL = model->BSIM3v32cox * pParam->BSIM3v32weffCV
+                        * pParam->BSIM3v32leffCV; F.B. */
+                  /* ddxpart_dVd = ddxpart_dVg = ddxpart_dVb
+                                  = ddxpart_dVs = 0.0; */
+		  if (fabs(qcheq) <= 1.0e-5 * CoxWL)
+                  {   if (model->BSIM3v32xpart < 0.5)
+                      {   dxpart = 0.4;
+                      }
+                      else if (model->BSIM3v32xpart > 0.5)
+                      {   dxpart = 0.0;
+                      }
+                      else
+                      {   dxpart = 0.5;
+                      }
+                  }
+                  else
+                  {   dxpart = qdrn / qcheq;
+                      Cdd = here->BSIM3v32cddb;
+                      Csd = -(here->BSIM3v32cgdb + here->BSIM3v32cddb
+                          + here->BSIM3v32cbdb);
+                      ddxpart_dVd = (Cdd - dxpart * (Cdd + Csd)) / qcheq;
+                      Cdg = here->BSIM3v32cdgb;
+                      Csg = -(here->BSIM3v32cggb + here->BSIM3v32cdgb
+                          + here->BSIM3v32cbgb);
+                      ddxpart_dVg = (Cdg - dxpart * (Cdg + Csg)) / qcheq;
+
+                      Cds = here->BSIM3v32cdsb;
+                      Css = -(here->BSIM3v32cgsb + here->BSIM3v32cdsb
+                          + here->BSIM3v32cbsb);
+                      ddxpart_dVs = (Cds - dxpart * (Cds + Css)) / qcheq;
+
+                      ddxpart_dVb = -(ddxpart_dVd + ddxpart_dVg + ddxpart_dVs);
+                  }
+                  sxpart = 1.0 - dxpart;
+                  dsxpart_dVd = -ddxpart_dVd;
+                  dsxpart_dVg = -ddxpart_dVg;
+                  dsxpart_dVs = -ddxpart_dVs;
+                  dsxpart_dVb = -(dsxpart_dVd + dsxpart_dVg + dsxpart_dVs);
+
+                  qgd = qgdo;
+                  qgs = qgso;
+                  qgb = pParam->BSIM3v32cgbo * vgb;
+                  qgate = qgd + qgs + qgb;
+                  qbulk = -qgb;
+                  qdrn = -qgd;
+                  qsrc = -(qgate + qbulk + qdrn);
+              }
+          }
+          else
+          {   if (here->BSIM3v32nqsMod == 0)
+              {   gcggb = (here->BSIM3v32cggb + cgdo + cgso
+                        + pParam->BSIM3v32cgbo ) * ag0;
+                  gcgdb = (here->BSIM3v32cgsb - cgdo) * ag0;
+                  gcgsb = (here->BSIM3v32cgdb - cgso) * ag0;
+
+                  gcdgb = -(here->BSIM3v32cggb + here->BSIM3v32cbgb
+                        + here->BSIM3v32cdgb + cgdo) * ag0;
+                  gcddb = (here->BSIM3v32capbd + cgdo - (here->BSIM3v32cgsb
+                        + here->BSIM3v32cbsb + here->BSIM3v32cdsb)) * ag0;
+                  gcdsb = -(here->BSIM3v32cgdb + here->BSIM3v32cbdb
+                        + here->BSIM3v32cddb) * ag0;
+
+                  gcsgb = (here->BSIM3v32cdgb - cgso) * ag0;
+                  gcsdb = here->BSIM3v32cdsb * ag0;
+                  gcssb = (here->BSIM3v32cddb + here->BSIM3v32capbs + cgso) * ag0;
+
+                  gcbgb = (here->BSIM3v32cbgb - pParam->BSIM3v32cgbo) * ag0;
+                  gcbdb = (here->BSIM3v32cbsb - here->BSIM3v32capbd) * ag0;
+                  gcbsb = (here->BSIM3v32cbdb - here->BSIM3v32capbs) * ag0;
+
+                  qgd = qgdo;
+                  qgs = qgso;
+                  qgb = pParam->BSIM3v32cgbo * vgb;
+                  qgate += qgd + qgs + qgb;
+                  qbulk -= qgb;
+                  qsrc = qdrn - qgs;
+                  qdrn = -(qgate + qbulk + qsrc);
+
+                  /* ggtg = ggtd = ggtb = ggts = 0.0; */
+                  sxpart = 0.4;
+                  dxpart = 0.6;
+                  /* ddxpart_dVd = ddxpart_dVg = ddxpart_dVb = ddxpart_dVs = 0.0;
+                  dsxpart_dVd = dsxpart_dVg = dsxpart_dVb = dsxpart_dVs = 0.0; */
+              }
+              else
+              {   if (qcheq > 0.0)
+                      T0 = here->BSIM3v32tconst * qdef * ScalingFactor;
+                  else
+                      T0 = -here->BSIM3v32tconst * qdef * ScalingFactor;
+                  ggtg = T0 * here->BSIM3v32cqgb;
+		  here->BSIM3v32gtg = ggtg;
+                  ggts = T0 * here->BSIM3v32cqdb;
+		  here->BSIM3v32gtd = ggts;
+                  ggtd = T0 * here->BSIM3v32cqsb;
+		  here->BSIM3v32gts = ggtd;
+                  ggtb  = T0 * here->BSIM3v32cqbb;
+		  here->BSIM3v32gtb = ggtb;
+                  gqdef = ScalingFactor * ag0;
+
+                  gcqgb = here->BSIM3v32cqgb * ag0;
+                  gcqdb = here->BSIM3v32cqsb * ag0;
+                  gcqsb = here->BSIM3v32cqdb * ag0;
+                  gcqbb = here->BSIM3v32cqbb * ag0;
+
+                  gcggb = (cgdo + cgso + pParam->BSIM3v32cgbo) * ag0;
+                  gcgdb = -cgdo * ag0;
+                  gcgsb = -cgso * ag0;
+
+                  gcdgb = -cgdo * ag0;
+                  gcddb = (here->BSIM3v32capbd + cgdo) * ag0;
+                  gcdsb = 0.0;
+
+                  gcsgb = -cgso * ag0;
+                  gcsdb = 0.0;
+                  gcssb = (here->BSIM3v32capbs + cgso) * ag0;
+
+                  gcbgb = -pParam->BSIM3v32cgbo * ag0;
+                  gcbdb = -here->BSIM3v32capbd * ag0;
+                  gcbsb = -here->BSIM3v32capbs * ag0;
+
+                  /* CoxWL = model->BSIM3v32cox * pParam->BSIM3v32weffCV
+                        * pParam->BSIM3v32leffCV; F.B. */
+                  if (fabs(qcheq) <= 1.0e-5 * CoxWL)
+                  {   if (model->BSIM3v32xpart < 0.5)
+                      {   sxpart = 0.4;
+                      }
+                      else if (model->BSIM3v32xpart > 0.5)
+                      {   sxpart = 0.0;
+                      }
+                      else
+                      {   sxpart = 0.5;
+                      }
+                      /* dsxpart_dVd = dsxpart_dVg = dsxpart_dVb
+                                  = dsxpart_dVs = 0.0; */
+                  }
+                  else
+                  {   sxpart = qdrn / qcheq;
+                      Css = here->BSIM3v32cddb;
+                      Cds = -(here->BSIM3v32cgdb + here->BSIM3v32cddb
+                          + here->BSIM3v32cbdb);
+                      dsxpart_dVs = (Css - sxpart * (Css + Cds)) / qcheq;
+                      Csg = here->BSIM3v32cdgb;
+                      Cdg = -(here->BSIM3v32cggb + here->BSIM3v32cdgb
+                          + here->BSIM3v32cbgb);
+                      dsxpart_dVg = (Csg - sxpart * (Csg + Cdg)) / qcheq;
+
+                      Csd = here->BSIM3v32cdsb;
+                      Cdd = -(here->BSIM3v32cgsb + here->BSIM3v32cdsb
+                          + here->BSIM3v32cbsb);
+                      dsxpart_dVd = (Csd - sxpart * (Csd + Cdd)) / qcheq;
+
+                      dsxpart_dVb = -(dsxpart_dVd + dsxpart_dVg + dsxpart_dVs);
+                  }
+                  dxpart = 1.0 - sxpart;
+                  ddxpart_dVd = -dsxpart_dVd;
+                  ddxpart_dVg = -dsxpart_dVg;
+                  ddxpart_dVs = -dsxpart_dVs;
+                  ddxpart_dVb = -(ddxpart_dVd + ddxpart_dVg + ddxpart_dVs);
+
+                  qgd = qgdo;
+                  qgs = qgso;
+                  qgb = pParam->BSIM3v32cgbo * vgb;
+                  qgate = qgd + qgs + qgb;
+                  qbulk = -qgb;
+                  qsrc = -qgs;
+                  qdrn = -(qgate + qbulk + qsrc);
+              }
+          }
+/* F.B. do not reach here durint testing */
+          cqdef = cqcheq = 0.0;
+#ifndef SIMDIFYCPP /* FB: no bypass in SIMD */
+          if (ByPass) goto line860;
+#endif		
+          *(ckt->CKTstate0 + here->BSIM3v32qg) = qgate;
+          *(ckt->CKTstate0 + here->BSIM3v32qd) = qdrn
+                    - *(ckt->CKTstate0 + here->BSIM3v32qbd);
+          *(ckt->CKTstate0 + here->BSIM3v32qb) = qbulk
+                    + *(ckt->CKTstate0 + here->BSIM3v32qbd)
+                    + *(ckt->CKTstate0 + here->BSIM3v32qbs);
+
+          if (here->BSIM3v32nqsMod)
+          {   *(ckt->CKTstate0 + here->BSIM3v32qcdump) = qdef * ScalingFactor;
+              *(ckt->CKTstate0 + here->BSIM3v32qcheq) = qcheq;
+          }
+
+          /* store small signal parameters */
+          if (ckt->CKTmode & MODEINITSMSIG)
+          {   goto line1000;
+          }
+          if (!ChargeComputationNeeded)
+              goto line850;
+
+          if (ckt->CKTmode & MODEINITTRAN)
+          {   *(ckt->CKTstate1 + here->BSIM3v32qb) =
+                    *(ckt->CKTstate0 + here->BSIM3v32qb);
+              *(ckt->CKTstate1 + here->BSIM3v32qg) =
+                    *(ckt->CKTstate0 + here->BSIM3v32qg);
+              *(ckt->CKTstate1 + here->BSIM3v32qd) =
+                    *(ckt->CKTstate0 + here->BSIM3v32qd);
+              if (here->BSIM3v32nqsMod)
+              {   *(ckt->CKTstate1 + here->BSIM3v32qcheq) =
+                    *(ckt->CKTstate0 + here->BSIM3v32qcheq);
+                  *(ckt->CKTstate1 + here->BSIM3v32qcdump) =
+                    *(ckt->CKTstate0 + here->BSIM3v32qcdump);
+              }
+          }
+
+          error = NIintegrate(ckt, &geq, &ceq, 0.0, here->BSIM3v32qb);
+          if (SIMDANY(error))
+              return(error);
+          error = NIintegrate(ckt, &geq, &ceq, 0.0, here->BSIM3v32qg);
+          if (SIMDANY(error))
+              return(error);
+          error = NIintegrate(ckt, &geq, &ceq, 0.0, here->BSIM3v32qd);
+          if (SIMDANY(error))
+              return(error);
+          if (here->BSIM3v32nqsMod)
+          {   error = NIintegrate(ckt, &geq, &ceq, 0.0, here->BSIM3v32qcdump);
+              if (SIMDANY(error))
+                  return(error);
+              error = NIintegrate(ckt, &geq, &ceq, 0.0, here->BSIM3v32qcheq);
+              if (SIMDANY(error))
+                  return(error);
+          }
+          goto line860;
+	  
+line850:
+          /* initialize to zero charge conductance and current */
+          ceqqg = ceqqb = ceqqd = 0.0;
+          cqcheq = cqdef = 0.0;
+
+          gcdgb = gcddb = gcdsb = 0.0;
+          gcsgb = gcsdb = gcssb = 0.0;
+          gcggb = gcgdb = gcgsb = 0.0;
+          gcbgb = gcbdb = gcbsb = 0.0;
+
+          gqdef = gcqgb = gcqdb = gcqsb = gcqbb = 0.0;
+          ggtg = ggtd = ggtb = ggts = 0.0;
+	  /* F.B: moved dxpart assignment outside the following expression */
+	  /* dxpart = (BSIM3v32mode > 0) ? 0.4 : 0.6; */
+	  dxpart = 0.6;
+	  
+	  if(BSIM3v32mode)
+	  	dxpart = 0.4;
+          sxpart = (1.0 - dxpart);
+          ddxpart_dVd = ddxpart_dVg = ddxpart_dVb = ddxpart_dVs = 0.0;
+          dsxpart_dVd = dsxpart_dVg = dsxpart_dVb = dsxpart_dVs = 0.0;
+
+          if (here->BSIM3v32nqsMod)
+              here->BSIM3v32gtau = 16.0 * here->BSIM3v32u0temp * model->BSIM3v32vtm
+                              / pParam->BSIM3v32leffCV / pParam->BSIM3v32leffCV
+                              * ScalingFactor;
+          else
+              here->BSIM3v32gtau = 0.0;
+
+          goto line900;
+
+line860:
+          /* evaluate equivalent charge current */
+
+          cqgate = *(ckt->CKTstate0 + here->BSIM3v32cqg);
+          cqbulk = *(ckt->CKTstate0 + here->BSIM3v32cqb);
+          cqdrn = *(ckt->CKTstate0 + here->BSIM3v32cqd);
+
+          ceqqg = cqgate - gcggb * vgb + gcgdb * vbd + gcgsb * vbs;
+          ceqqb = cqbulk - gcbgb * vgb + gcbdb * vbd + gcbsb * vbs;
+          ceqqd = cqdrn - gcdgb * vgb + gcddb * vbd + gcdsb * vbs;
+
+          if (here->BSIM3v32nqsMod)
+          {   T0 = ggtg * vgb - ggtd * vbd - ggts * vbs;
+              ceqqg += T0;
+              T1 = qdef * here->BSIM3v32gtau;
+              ceqqd -= dxpart * T0 + T1 * (ddxpart_dVg * vgb - ddxpart_dVd
+                    * vbd - ddxpart_dVs * vbs);
+              cqdef = *(ckt->CKTstate0 + here->BSIM3v32cqcdump) - gqdef * qdef;
+              cqcheq = *(ckt->CKTstate0 + here->BSIM3v32cqcheq)
+                     - (gcqgb * vgb - gcqdb * vbd  - gcqsb * vbs) + T0;
+          }
+
+          if (ckt->CKTmode & MODEINITTRAN)
+          {   *(ckt->CKTstate1 + here->BSIM3v32cqb) =
+                    *(ckt->CKTstate0 + here->BSIM3v32cqb);
+              *(ckt->CKTstate1 + here->BSIM3v32cqg) =
+                    *(ckt->CKTstate0 + here->BSIM3v32cqg);
+              *(ckt->CKTstate1 + here->BSIM3v32cqd) =
+                    *(ckt->CKTstate0 + here->BSIM3v32cqd);
+
+              if (here->BSIM3v32nqsMod)
+              {   *(ckt->CKTstate1 + here->BSIM3v32cqcheq) =
+                        *(ckt->CKTstate0 + here->BSIM3v32cqcheq);
+                  *(ckt->CKTstate1 + here->BSIM3v32cqcdump) =
+                        *(ckt->CKTstate0 + here->BSIM3v32cqcdump);
+              }
+          }
+
+          /*
+           *  load current vector
+           */
+line900: ;
+         
+	  if (BSIM3v32mode) /* F.B. >= 0 removed */
+          {   Gm = here->BSIM3v32gm;
+              Gmbs = here->BSIM3v32gmbs;
+              FwdSum = Gm + Gmbs;
+              RevSum = 0.0;
+              cdreq = model->BSIM3v32type * (cdrain - here->BSIM3v32gds * vds
+                    - Gm * vgs - Gmbs * vbs);
+
+              ceqbd = -model->BSIM3v32type * (here->BSIM3v32csub
+                    - here->BSIM3v32gbds * vds - here->BSIM3v32gbgs * vgs
+                    - here->BSIM3v32gbbs * vbs);
+              ceqbs = 0.0;
+
+              gbbdp = -here->BSIM3v32gbds;
+              gbbsp = (here->BSIM3v32gbds + here->BSIM3v32gbgs + here->BSIM3v32gbbs);
+
+              gbdpg = here->BSIM3v32gbgs;
+              gbdpdp = here->BSIM3v32gbds;
+              gbdpb = here->BSIM3v32gbbs;
+              gbdpsp = -(gbdpg + gbdpdp + gbdpb);
+
+              gbspg = 0.0;
+              gbspdp = 0.0;
+              gbspb = 0.0;
+              gbspsp = 0.0;
+          }
+          else
+          {   Gm = -here->BSIM3v32gm;
+              Gmbs = -here->BSIM3v32gmbs;
+              FwdSum = 0.0;
+              RevSum = -(Gm + Gmbs);
+              cdreq = -model->BSIM3v32type * (cdrain + here->BSIM3v32gds * vds
+                    + Gm * vgd + Gmbs * vbd);
+
+              ceqbs = -model->BSIM3v32type * (here->BSIM3v32csub
+                    + here->BSIM3v32gbds * vds - here->BSIM3v32gbgs * vgd
+                    - here->BSIM3v32gbbs * vbd);
+              ceqbd = 0.0;
+
+              gbbsp = -here->BSIM3v32gbds;
+              gbbdp = (here->BSIM3v32gbds + here->BSIM3v32gbgs + here->BSIM3v32gbbs);
+
+              gbdpg = 0.0;
+              gbdpsp = 0.0;
+              gbdpb = 0.0;
+              gbdpdp = 0.0;
+
+              gbspg = here->BSIM3v32gbgs;
+              gbspsp = here->BSIM3v32gbds;
+              gbspb = here->BSIM3v32gbbs;
+              gbspdp = -(gbspg + gbspsp + gbspb);
+          }
+
+           if (model->BSIM3v32type > 0)
+           {   ceqbs += (here->BSIM3v32cbs - here->BSIM3v32gbs * vbs);
+               ceqbd += (here->BSIM3v32cbd - here->BSIM3v32gbd * vbd);
+               /*
+               ceqqg = ceqqg;
+               ceqqb = ceqqb;
+               ceqqd = ceqqd;
+               cqdef = cqdef;
+               cqcheq = cqcheq;
+               */
+           }
+           else
+           {   ceqbs -= (here->BSIM3v32cbs - here->BSIM3v32gbs * vbs);
+               ceqbd -= (here->BSIM3v32cbd - here->BSIM3v32gbd * vbd);
+               ceqqg = -ceqqg;
+               ceqqb = -ceqqb;
+               ceqqd = -ceqqd;
+               cqdef = -cqdef;
+               cqcheq = -cqcheq;
+           }
+
+          m = here->BSIM3v32m;
+          
+         
+#ifdef USE_OMP
+          here->BSIM3v32rhsG = m * ceqqg;
+          here->BSIM3v32rhsB = m * (ceqbs + ceqbd + ceqqb);
+          here->BSIM3v32rhsD = m * (ceqbd - cdreq - ceqqd);
+          here->BSIM3v32rhsS = m * (cdreq + ceqbs + ceqqg
+              + ceqqb + ceqqd);
+          #ifndef OMP_EFFMEM
+	  if (here->BSIM3v32nqsMod)
+              here->BSIM3v32rhsQ = m * (cqcheq - cqdef);
+	  #else
+          if (here->BSIM3v32nqsMod)
+            *(ckt->CKTrhs + here->BSIM3v32qNode) += m * (cqcheq - cqdef);
+	  #endif
+#else
+          (*(ckt->CKTrhs + here->BSIM3v32gNode) -= m * ceqqg);
+          (*(ckt->CKTrhs + here->BSIM3v32bNode) -= m * (ceqbs + ceqbd + ceqqb));
+          (*(ckt->CKTrhs + here->BSIM3v32dNodePrime) += m * (ceqbd - cdreq - ceqqd));
+          (*(ckt->CKTrhs + here->BSIM3v32sNodePrime) += m * (cdreq + ceqbs + ceqqg
+                                                     + ceqqb + ceqqd));
+          if (here->BSIM3v32nqsMod)
+            *(ckt->CKTrhs + here->BSIM3v32qNode) += m * (cqcheq - cqdef);
+#endif
+
+          /*
+           *  load y matrix
+           */
+
+          T1 = qdef * here->BSIM3v32gtau;
+	  
+#if defined(SIMDIFYCPP) && defined(SIMDDEBUG)
+	  SEQCHECK4(0.0,0.0,0.0,0.0) /* safeguard when debug seq vs simd */
+#endif
+
+#ifdef USE_OMP
+          here->BSIM3v32DdPt = m * here->BSIM3v32drainConductance;
+	  here->BSIM3v32SsPt = m * here->BSIM3v32sourceConductance;
+          here->BSIM3v32GgPt = m * (gcggb - ggtg);
+	  here->BSIM3v32BbPt = m * (here->BSIM3v32gbd + here->BSIM3v32gbs
+              - gcbgb - gcbdb - gcbsb - here->BSIM3v32gbbs);
+	      
+          here->BSIM3v32GbPt = m * (gcggb + gcgdb + gcgsb + ggtb);
+          here->BSIM3v32GdpPt = m * (gcgdb - ggtd);
+          here->BSIM3v32GspPt = m * (gcgsb - ggts);
+          here->BSIM3v32BgPt = m * (gcbgb - here->BSIM3v32gbgs);
+          here->BSIM3v32BdpPt = m * (gcbdb - here->BSIM3v32gbd + gbbdp);
+          here->BSIM3v32BspPt = m * (gcbsb - here->BSIM3v32gbs + gbbsp);
+          here->BSIM3v32DPgPt = m * (Gm + gcdgb + dxpart * ggtg
+              + T1 * ddxpart_dVg + gbdpg);
+          here->BSIM3v32DPbPt = m * (here->BSIM3v32gbd - Gmbs + gcdgb + gcddb
+              + gcdsb - dxpart * ggtb
+              - T1 * ddxpart_dVb - gbdpb);
+          here->BSIM3v32DPspPt = m * (here->BSIM3v32gds + FwdSum - gcdsb
+              - dxpart * ggts - T1 * ddxpart_dVs - gbdpsp);
+          here->BSIM3v32SPgPt = m * (gcsgb - Gm + sxpart * ggtg
+              + T1 * dsxpart_dVg + gbspg);
+          here->BSIM3v32SPbPt = m * (here->BSIM3v32gbs + Gmbs + gcsgb + gcsdb
+              + gcssb - sxpart * ggtb
+              - T1 * dsxpart_dVb - gbspb);
+          here->BSIM3v32SPdpPt = m * (here->BSIM3v32gds + RevSum - gcsdb
+              - sxpart * ggtd - T1 * dsxpart_dVd - gbspdp);
+	  here->BSIM3v32DPdpPt = m * (here->BSIM3v32drainConductance
+              + here->BSIM3v32gds + here->BSIM3v32gbd
+              + RevSum + gcddb + dxpart * ggtd
+              + T1 * ddxpart_dVd + gbdpdp);
+          here->BSIM3v32SPspPt = m * (here->BSIM3v32sourceConductance
+              + here->BSIM3v32gds + here->BSIM3v32gbs
+              + FwdSum + gcssb + sxpart * ggts
+              + T1 * dsxpart_dVs + gbspsp);
+	  here->BSIM3v32DdpPt = m * here->BSIM3v32drainConductance;
+	  here->BSIM3v32SspPt = m * here->BSIM3v32sourceConductance;
+	  here->BSIM3v32DPdPt = m * here->BSIM3v32drainConductance;
+	  here->BSIM3v32SPsPt = m * here->BSIM3v32sourceConductance;
+	  #ifndef OMP_EFFMEM
+	  if (here->BSIM3v32nqsMod)
+          {
+              here->BSIM3v32QqPt = m * (gqdef + here->BSIM3v32gtau);
+
+              here->BSIM3v32DPqPt = m * (dxpart * here->BSIM3v32gtau);
+              here->BSIM3v32SPqPt = m * (sxpart * here->BSIM3v32gtau);
+              here->BSIM3v32GqPt = m * here->BSIM3v32gtau;
+
+              here->BSIM3v32QgPt = m * (ggtg - gcqgb);
+              here->BSIM3v32QdpPt = m * (ggtd - gcqdb);
+              here->BSIM3v32QspPt = m * (ggts - gcqsb);
+              here->BSIM3v32QbPt = m * (ggtb - gcqbb);
+          }
+	  #else
+	  if (here->BSIM3v32nqsMod)
+            {
+              *(here->BSIM3v32QqPtr) += m * (gqdef + here->BSIM3v32gtau);
+
+              *(here->BSIM3v32DPqPtr) += m * (dxpart * here->BSIM3v32gtau);
+              *(here->BSIM3v32SPqPtr) += m * (sxpart * here->BSIM3v32gtau);
+              *(here->BSIM3v32GqPtr) -= m * here->BSIM3v32gtau;
+
+              *(here->BSIM3v32QgPtr) += m * (ggtg - gcqgb);
+              *(here->BSIM3v32QdpPtr) += m * (ggtd - gcqdb);
+              *(here->BSIM3v32QspPtr) += m * (ggts - gcqsb);
+              *(here->BSIM3v32QbPtr) += m * (ggtb - gcqbb);
+            }
+	  #endif      
+#endif /* ifdef USE_OMP */
+	  
+#ifndef USE_OMP
+          (*(here->BSIM3v32DdPtr) += m * here->BSIM3v32drainConductance);
+          (*(here->BSIM3v32GgPtr) += m * (gcggb - ggtg));
+          (*(here->BSIM3v32SsPtr) += m * here->BSIM3v32sourceConductance);
+          (*(here->BSIM3v32BbPtr) += m * (here->BSIM3v32gbd + here->BSIM3v32gbs
+                               - gcbgb - gcbdb - gcbsb - here->BSIM3v32gbbs));
+          (*(here->BSIM3v32DPdpPtr) += m * (here->BSIM3v32drainConductance
+                                 + here->BSIM3v32gds + here->BSIM3v32gbd
+                                 + RevSum + gcddb + dxpart * ggtd
+                                 + T1 * ddxpart_dVd + gbdpdp));
+          (*(here->BSIM3v32SPspPtr) += m * (here->BSIM3v32sourceConductance
+                                 + here->BSIM3v32gds + here->BSIM3v32gbs
+                                 + FwdSum + gcssb + sxpart * ggts
+                                 + T1 * dsxpart_dVs + gbspsp));
+          (*(here->BSIM3v32DdpPtr) -= m * here->BSIM3v32drainConductance);
+          (*(here->BSIM3v32GbPtr) -= m * (gcggb + gcgdb + gcgsb + ggtb));
+          (*(here->BSIM3v32GdpPtr) += m * (gcgdb - ggtd));
+          (*(here->BSIM3v32GspPtr) += m * (gcgsb - ggts));
+          (*(here->BSIM3v32SspPtr) -= m * here->BSIM3v32sourceConductance);
+          (*(here->BSIM3v32BgPtr) += m * (gcbgb - here->BSIM3v32gbgs));
+          (*(here->BSIM3v32BdpPtr) += m * (gcbdb - here->BSIM3v32gbd + gbbdp));
+          (*(here->BSIM3v32BspPtr) += m * (gcbsb - here->BSIM3v32gbs + gbbsp));
+          (*(here->BSIM3v32DPdPtr) -= m * here->BSIM3v32drainConductance);
+          (*(here->BSIM3v32DPgPtr) += m * (Gm + gcdgb + dxpart * ggtg
+                                + T1 * ddxpart_dVg + gbdpg));
+          (*(here->BSIM3v32DPbPtr) -= m * (here->BSIM3v32gbd - Gmbs + gcdgb + gcddb
+                                + gcdsb - dxpart * ggtb
+                                - T1 * ddxpart_dVb - gbdpb));
+          (*(here->BSIM3v32DPspPtr) -= m * (here->BSIM3v32gds + FwdSum - gcdsb
+                                - dxpart * ggts - T1 * ddxpart_dVs - gbdpsp));
+          (*(here->BSIM3v32SPgPtr) += m * (gcsgb - Gm + sxpart * ggtg
+                                + T1 * dsxpart_dVg + gbspg));
+          (*(here->BSIM3v32SPsPtr) -= m * here->BSIM3v32sourceConductance);
+          (*(here->BSIM3v32SPbPtr) -= m * (here->BSIM3v32gbs + Gmbs + gcsgb + gcsdb
+                                + gcssb - sxpart * ggtb
+                                - T1 * dsxpart_dVb - gbspb));
+          (*(here->BSIM3v32SPdpPtr) -= m * (here->BSIM3v32gds + RevSum - gcsdb
+                                - sxpart * ggtd - T1 * dsxpart_dVd - gbspdp));
+
+          if (here->BSIM3v32nqsMod)
+            {
+              *(here->BSIM3v32QqPtr) += m * (gqdef + here->BSIM3v32gtau);
+
+              *(here->BSIM3v32DPqPtr) += m * (dxpart * here->BSIM3v32gtau);
+              *(here->BSIM3v32SPqPtr) += m * (sxpart * here->BSIM3v32gtau);
+              *(here->BSIM3v32GqPtr) -= m * here->BSIM3v32gtau;
+
+              *(here->BSIM3v32QgPtr) += m * (ggtg - gcqgb);
+              *(here->BSIM3v32QdpPtr) += m * (ggtd - gcqdb);
+              *(here->BSIM3v32QspPtr) += m * (ggts - gcqsb);
+              *(here->BSIM3v32QbPtr) += m * (ggtb - gcqbb);
+            }
+#endif
+
+line1000:  ;
+
+return(OK);
+}
+
diff --git a/src/spicelib/devices/bsim3v32/b3v32ldseq_simd4.c b/src/spicelib/devices/bsim3v32/b3v32ldseq_simd4.c
new file mode 100644
index 000000000..3ca136463
--- /dev/null
+++ b/src/spicelib/devices/bsim3v32/b3v32ldseq_simd4.c
@@ -0,0 +1,5689 @@
+/**** BSIM3v3.2.4, Released by Xuemei Xi 12/21/2001 ****/
+
+/**********
+ * Copyright 2001 Regents of the University of California. All rights reserved.
+ * Original File: b3ld.c of BSIM3v3.2.4
+ * Author: 1991 JianHui Huang and Min-Chie Jeng.
+ * Modified by Mansun Chan (1995).
+ * Author: 1997-1999 Weidong Liu.
+ * Author: 2001 Xuemei Xi
+ * Modified by Xuemei Xi, 10/05, 12/21, 2001.
+ * Modified by Paolo Nenzi 2002 and Dietmar Warning 2003
+ * Modified by Florian Ballenegger 2020 for SIMD version generation
+ **********/
+ 
+ /**********
+ * Modified 2020 by Florian Ballenegger, Anamosic Ballenegger Design
+ * Distributed under the same license terms as the original code,
+ * see file "B3TERMS_OF_USE"
+ **********/
+
+{
+  Vec4d SourceSatCurrent;
+  Vec4d DrainSatCurrent;
+  double ag0;
+  Vec4d qgd;
+  Vec4d qgs;
+  Vec4d qgb;
+  Vec4d cbhat;
+  Vec4d VgstNVt;
+  Vec4d ExpVgst;
+  Vec4d cdrain;
+  Vec4d cdhat;
+  Vec4d cdreq;
+  Vec4d ceqbd;
+  Vec4d ceqbs;
+  Vec4d ceqqb;
+  Vec4d ceqqd;
+  Vec4d ceqqg;
+  double ceq;
+  double geq;
+  Vec4d czbd;
+  Vec4d czbdsw;
+  Vec4d czbdswg;
+  Vec4d czbs;
+  Vec4d czbssw;
+  Vec4d czbsswg;
+  Vec4d evbd;
+  Vec4d evbs;
+  Vec4d arg;
+  Vec4d sarg;
+  Vec4d Vfbeff;
+  Vec4d dVfbeff_dVg;
+  Vec4d dVfbeff_dVd = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d dVfbeff_dVb;
+  Vec4d V3;
+  Vec4d V4;
+  Vec4d gcbdb;
+  Vec4d gcbgb;
+  Vec4d gcbsb;
+  Vec4d gcddb;
+  Vec4d gcdgb;
+  Vec4d gcdsb;
+  Vec4d gcgdb;
+  Vec4d gcggb;
+  Vec4d gcgsb;
+  Vec4d gcsdb;
+  Vec4d gcsgb;
+  Vec4d gcssb;
+  double MJ;
+  double MJSW;
+  double MJSWG;
+  Vec4d vbd;
+  Vec4d vbs;
+  Vec4d vds;
+  Vec4d vgb;
+  Vec4d vgd;
+  Vec4d vgs;
+  Vec4d qgate = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d qbulk = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d qdrn = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d qsrc;
+  Vec4d qinoi;
+  Vec4d cqgate;
+  Vec4d cqbulk;
+  Vec4d cqdrn;
+  Vec4d Vds;
+  Vec4d Vgs;
+  Vec4d Vbs;
+  Vec4d Gmbs;
+  Vec4d FwdSum;
+  Vec4d RevSum;
+  Vec4d Vgs_eff;
+  Vec4d Vfb;
+  Vec4d dVfb_dVb = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d dVfb_dVd = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d Phis;
+  Vec4d dPhis_dVb;
+  Vec4d sqrtPhis;
+  Vec4d dsqrtPhis_dVb;
+  Vec4d Vth;
+  Vec4d dVth_dVb;
+  Vec4d dVth_dVd;
+  Vec4d Vgst;
+  Vec4d dVgst_dVg;
+  Vec4d dVgst_dVb;
+  Vec4d dVgs_eff_dVg;
+  double Nvtm;
+  double Vtm;
+  Vec4d n;
+  Vec4d dn_dVb;
+  Vec4d dn_dVd;
+  double voffcv;
+  Vec4d noff;
+  Vec4d dnoff_dVd;
+  Vec4d dnoff_dVb;
+  Vec4d ExpArg;
+  double V0;
+  Vec4d CoxWLcen;
+  Vec4d QovCox;
+  double LINK;
+  Vec4d DeltaPhi;
+  Vec4d dDeltaPhi_dVg;
+  Vec4d dDeltaPhi_dVd;
+  Vec4d dDeltaPhi_dVb;
+  double Cox;
+  double Tox;
+  Vec4d Tcen;
+  Vec4d dTcen_dVg;
+  Vec4d dTcen_dVd;
+  Vec4d dTcen_dVb;
+  Vec4d Ccen;
+  Vec4d Coxeff;
+  Vec4d dCoxeff_dVg;
+  Vec4d dCoxeff_dVd;
+  Vec4d dCoxeff_dVb;
+  Vec4d Denomi;
+  Vec4d dDenomi_dVg;
+  Vec4d dDenomi_dVd;
+  Vec4d dDenomi_dVb;
+  Vec4d ueff;
+  Vec4d dueff_dVg;
+  Vec4d dueff_dVd;
+  Vec4d dueff_dVb;
+  Vec4d Esat;
+  Vec4d Vdsat;
+  Vec4d EsatL;
+  Vec4d dEsatL_dVg;
+  Vec4d dEsatL_dVd;
+  Vec4d dEsatL_dVb;
+  Vec4d dVdsat_dVg;
+  Vec4d dVdsat_dVb;
+  Vec4d dVdsat_dVd;
+  Vec4d Vasat;
+  Vec4d dAlphaz_dVg;
+  Vec4d dAlphaz_dVb;
+  Vec4d dVasat_dVg;
+  Vec4d dVasat_dVb;
+  Vec4d dVasat_dVd;
+  Vec4d Va;
+  Vec4d dVa_dVd;
+  Vec4d dVa_dVg;
+  Vec4d dVa_dVb;
+  Vec4d Vbseff;
+  Vec4d dVbseff_dVb;
+  Vec4d VbseffCV;
+  Vec4d dVbseffCV_dVb;
+  Vec4d Arg1;
+  double One_Third_CoxWL;
+  double Two_Third_CoxWL;
+  Vec4d Alphaz;
+  double CoxWL;
+  Vec4d T0;
+  Vec4d dT0_dVg;
+  Vec4d dT0_dVd;
+  Vec4d dT0_dVb;
+  Vec4d T1;
+  Vec4d dT1_dVg;
+  Vec4d dT1_dVd;
+  Vec4d dT1_dVb;
+  Vec4d T2;
+  Vec4d dT2_dVg;
+  Vec4d dT2_dVd;
+  Vec4d dT2_dVb;
+  Vec4d T3;
+  Vec4d dT3_dVg;
+  Vec4d dT3_dVd;
+  Vec4d dT3_dVb;
+  Vec4d T4;
+  Vec4d T5;
+  Vec4d T6;
+  Vec4d T7;
+  Vec4d T8;
+  Vec4d T9;
+  Vec4d T10;
+  Vec4d T11;
+  Vec4d T12;
+  Vec4d tmp;
+  Vec4d Abulk;
+  Vec4d dAbulk_dVb;
+  Vec4d Abulk0;
+  Vec4d dAbulk0_dVb;
+  double tmpuni;
+  Vec4d VACLM;
+  Vec4d dVACLM_dVg;
+  Vec4d dVACLM_dVd;
+  Vec4d dVACLM_dVb;
+  Vec4d VADIBL;
+  Vec4d dVADIBL_dVg;
+  Vec4d dVADIBL_dVd;
+  Vec4d dVADIBL_dVb;
+  Vec4d Xdep;
+  Vec4d dXdep_dVb;
+  Vec4d lt1;
+  Vec4d dlt1_dVb;
+  Vec4d ltw;
+  Vec4d dltw_dVb;
+  Vec4d Delt_vth;
+  Vec4d dDelt_vth_dVb;
+  Vec4d Theta0;
+  Vec4d dTheta0_dVb;
+  double TempRatio;
+  Vec4d tmp1;
+  Vec4d tmp2;
+  Vec4d tmp3;
+  Vec4d tmp4;
+  Vec4d DIBL_Sft;
+  Vec4d dDIBL_Sft_dVd;
+  Vec4d Lambda;
+  Vec4d dLambda_dVg;
+  double a1;
+  double ScalingFactor;
+  Vec4d Vgsteff;
+  Vec4d dVgsteff_dVg;
+  Vec4d dVgsteff_dVd;
+  Vec4d dVgsteff_dVb;
+  Vec4d Vdseff;
+  Vec4d dVdseff_dVg;
+  Vec4d dVdseff_dVd;
+  Vec4d dVdseff_dVb;
+  Vec4d VdseffCV;
+  Vec4d dVdseffCV_dVg;
+  Vec4d dVdseffCV_dVd;
+  Vec4d dVdseffCV_dVb;
+  Vec4d diffVds;
+  Vec4d dAbulk_dVg;
+  Vec4d beta;
+  Vec4d dbeta_dVg;
+  Vec4d dbeta_dVd;
+  Vec4d dbeta_dVb;
+  Vec4d gche;
+  Vec4d dgche_dVg;
+  Vec4d dgche_dVd;
+  Vec4d dgche_dVb;
+  Vec4d fgche1;
+  Vec4d dfgche1_dVg;
+  Vec4d dfgche1_dVd;
+  Vec4d dfgche1_dVb;
+  Vec4d fgche2;
+  Vec4d dfgche2_dVg;
+  Vec4d dfgche2_dVd;
+  Vec4d dfgche2_dVb;
+  Vec4d Idl;
+  Vec4d dIdl_dVg;
+  Vec4d dIdl_dVd;
+  Vec4d dIdl_dVb;
+  Vec4d Idsa;
+  Vec4d dIdsa_dVg;
+  Vec4d dIdsa_dVd;
+  Vec4d dIdsa_dVb;
+  Vec4d Ids;
+  Vec4d Gm;
+  Vec4d Gds;
+  Vec4d Gmb;
+  Vec4d Isub;
+  Vec4d Gbd;
+  Vec4d Gbg;
+  Vec4d Gbb;
+  Vec4d VASCBE;
+  Vec4d dVASCBE_dVg;
+  Vec4d dVASCBE_dVd;
+  Vec4d dVASCBE_dVb;
+  Vec4d CoxWovL;
+  Vec4d Rds;
+  Vec4d dRds_dVg;
+  Vec4d dRds_dVb;
+  Vec4d WVCox;
+  Vec4d WVCoxRds;
+  Vec4d Vgst2Vtm;
+  Vec4d VdsatCV;
+  Vec4d dVdsatCV_dVg;
+  Vec4d dVdsatCV_dVb;
+  double Leff;
+  Vec4d Weff;
+  Vec4d dWeff_dVg;
+  Vec4d dWeff_dVb;
+  Vec4d AbulkCV;
+  Vec4d dAbulkCV_dVb;
+  Vec4d qgdo;
+  Vec4d qgso;
+  Vec4d cgdo;
+  Vec4d cgso;
+  Vec4d qcheq = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d qdef;
+  Vec4d gqdef = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d cqdef;
+  Vec4d cqcheq;
+  Vec4d gtau_diff;
+  Vec4d gtau_drift;
+  Vec4d gcqdb = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d gcqsb = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d gcqgb = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d gcqbb = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d dxpart;
+  Vec4d sxpart;
+  Vec4d ggtg;
+  Vec4d ggtd;
+  Vec4d ggts;
+  Vec4d ggtb;
+  Vec4d ddxpart_dVd;
+  Vec4d ddxpart_dVg;
+  Vec4d ddxpart_dVb;
+  Vec4d ddxpart_dVs;
+  Vec4d dsxpart_dVd;
+  Vec4d dsxpart_dVg;
+  Vec4d dsxpart_dVb;
+  Vec4d dsxpart_dVs;
+  Vec4d gbspsp;
+  Vec4d gbbdp;
+  Vec4d gbbsp;
+  Vec4d gbspg;
+  Vec4d gbspb;
+  Vec4d gbspdp;
+  Vec4d gbdpdp;
+  Vec4d gbdpg;
+  Vec4d gbdpb;
+  Vec4d gbdpsp;
+  Vec4d Cgg;
+  Vec4d Cgd;
+  Vec4d Cgb;
+  Vec4d Cdg;
+  Vec4d Cdd;
+  Vec4d Cds;
+  Vec4d Csg;
+  Vec4d Csd;
+  Vec4d Css;
+  Vec4d Csb;
+  Vec4d Cbg;
+  Vec4d Cbd;
+  Vec4d Cbb;
+  Vec4d Cgg1;
+  Vec4d Cgb1;
+  Vec4d Cgd1;
+  Vec4d Cbg1;
+  Vec4d Cbb1;
+  Vec4d Cbd1;
+  Vec4d Qac0;
+  Vec4d Qsub0;
+  Vec4d dQac0_dVg;
+  Vec4d dQac0_dVd = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d dQac0_dVb;
+  Vec4d dQsub0_dVg;
+  Vec4d dQsub0_dVd;
+  Vec4d dQsub0_dVb;
+  Vec4d m;
+  Vec4m BSIM3v32mode;
+  Vec4m Check;
+  int ChargeComputationNeeded;
+  int error;
+  ScalingFactor = 1.0e-9;
+  ChargeComputationNeeded = ((ckt->CKTmode & (((MODEDCTRANCURVE | MODEAC) | MODETRAN) | MODEINITSMSIG)) || ((ckt->CKTmode & MODETRANOP) && (ckt->CKTmode & MODEUIC))) ? (1) : (0);
+  vbs = SIMDLOADDATA(0, data);
+  vgs = SIMDLOADDATA(1, data);
+  vds = SIMDLOADDATA(2, data);
+  qdef = SIMDLOADDATA(3, data);
+  cdhat = SIMDLOADDATA(4, data);
+  cbhat = SIMDLOADDATA(5, data);
+  Check = SIMDLOADDATA(6, data) > 0.5;
+  SIMDIFYCMD(start);
+  vbd = vbs - vds;
+  vgd = vgs - vds;
+  vgb = vgs - vbs;
+  Nvtm = model->BSIM3v32vtm * model->BSIM3v32jctEmissionCoeff;
+  if (model->BSIM3v32acmMod == 0)
+  {
+    SourceSatCurrent = vec4_SIMDTOVECTOR(1.0e-14);
+    if (1)
+    {
+      Vec4m condmask0 = (((Vec4d ){heres[0]->BSIM3v32sourceArea, heres[1]->BSIM3v32sourceArea, heres[2]->BSIM3v32sourceArea, heres[3]->BSIM3v32sourceArea}) <= 0.0) & (((Vec4d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter}) <= 0.0);
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        ;
+      }
+      {
+        SourceSatCurrent = vec4_blend(SourceSatCurrent, (((Vec4d ){heres[0]->BSIM3v32sourceArea, heres[1]->BSIM3v32sourceArea, heres[2]->BSIM3v32sourceArea, heres[3]->BSIM3v32sourceArea}) * model->BSIM3v32jctTempSatCurDensity) + (((Vec4d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter}) * model->BSIM3v32jctSidewallTempSatCurDensity), condmask_false0);
+      }
+    }
+
+    DrainSatCurrent = vec4_SIMDTOVECTOR(1.0e-14);
+    if (1)
+    {
+      Vec4m condmask0 = (((Vec4d ){heres[0]->BSIM3v32drainArea, heres[1]->BSIM3v32drainArea, heres[2]->BSIM3v32drainArea, heres[3]->BSIM3v32drainArea}) <= 0.0) & (((Vec4d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter}) <= 0.0);
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        ;
+      }
+      {
+        DrainSatCurrent = vec4_blend(DrainSatCurrent, (((Vec4d ){heres[0]->BSIM3v32drainArea, heres[1]->BSIM3v32drainArea, heres[2]->BSIM3v32drainArea, heres[3]->BSIM3v32drainArea}) * model->BSIM3v32jctTempSatCurDensity) + (((Vec4d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter}) * model->BSIM3v32jctSidewallTempSatCurDensity), condmask_false0);
+      }
+    }
+
+  }
+  else
+  {
+    error = vec4_BSIM3v32_ACM_saturationCurrents(model, heres, &DrainSatCurrent, &SourceSatCurrent);
+    if (SIMDANY(error))
+      return error;
+
+  }
+
+  if (1)
+  {
+    Vec4m condmask0 = SourceSatCurrent <= 0.0;
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      {
+        if (condmask_true0[0])
+          heres[0]->BSIM3v32gbs = ckt->CKTgmin;
+
+        if (condmask_true0[1])
+          heres[1]->BSIM3v32gbs = ckt->CKTgmin;
+
+        if (condmask_true0[2])
+          heres[2]->BSIM3v32gbs = ckt->CKTgmin;
+
+        if (condmask_true0[3])
+          heres[3]->BSIM3v32gbs = ckt->CKTgmin;
+
+      }
+      {
+        Vec4d val = ((Vec4d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs}) * vbs;
+        if (condmask_true0[0])
+          heres[0]->BSIM3v32cbs = val[0];
+
+        if (condmask_true0[1])
+          heres[1]->BSIM3v32cbs = val[1];
+
+        if (condmask_true0[2])
+          heres[2]->BSIM3v32cbs = val[2];
+
+        if (condmask_true0[3])
+          heres[3]->BSIM3v32cbs = val[3];
+
+      }
+    }
+    {
+      if (model->BSIM3v32ijth == 0.0)
+      {
+        evbs = vec4_blend(evbs, vec4_exp(vbs / Nvtm), condmask_false0);
+        {
+          Vec4d val = ((SourceSatCurrent * evbs) / Nvtm) + ckt->CKTgmin;
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32gbs = val[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32gbs = val[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32gbs = val[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32gbs = val[3];
+
+        }
+        {
+          Vec4d val = (SourceSatCurrent * (evbs - 1.0)) + (ckt->CKTgmin * vbs);
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32cbs = val[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32cbs = val[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32cbs = val[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32cbs = val[3];
+
+        }
+      }
+      else
+      {
+        if (1)
+        {
+          Vec4m condmask1 = vbs < ((Vec4d ){heres[0]->BSIM3v32vjsm, heres[1]->BSIM3v32vjsm, heres[2]->BSIM3v32vjsm, heres[3]->BSIM3v32vjsm});
+          Vec4m condmask_true1 = condmask_false0 & condmask1;
+          Vec4m condmask_false1 = condmask_false0 & (~condmask1);
+          {
+            evbs = vec4_blend(evbs, vec4_exp(vbs / Nvtm), condmask_true1);
+            {
+              Vec4d val = ((SourceSatCurrent * evbs) / Nvtm) + ckt->CKTgmin;
+              if (condmask_true1[0])
+                heres[0]->BSIM3v32gbs = val[0];
+
+              if (condmask_true1[1])
+                heres[1]->BSIM3v32gbs = val[1];
+
+              if (condmask_true1[2])
+                heres[2]->BSIM3v32gbs = val[2];
+
+              if (condmask_true1[3])
+                heres[3]->BSIM3v32gbs = val[3];
+
+            }
+            {
+              Vec4d val = (SourceSatCurrent * (evbs - 1.0)) + (ckt->CKTgmin * vbs);
+              if (condmask_true1[0])
+                heres[0]->BSIM3v32cbs = val[0];
+
+              if (condmask_true1[1])
+                heres[1]->BSIM3v32cbs = val[1];
+
+              if (condmask_true1[2])
+                heres[2]->BSIM3v32cbs = val[2];
+
+              if (condmask_true1[3])
+                heres[3]->BSIM3v32cbs = val[3];
+
+            }
+          }
+          {
+            switch (model->BSIM3v32intVersion)
+            {
+              case BSIM3v32V324:
+
+              case BSIM3v32V323:
+
+              case BSIM3v32V322:
+                T0 = vec4_blend(T0, ((Vec4d ){heres[0]->BSIM3v32IsEvjsm, heres[1]->BSIM3v32IsEvjsm, heres[2]->BSIM3v32IsEvjsm, heres[3]->BSIM3v32IsEvjsm}) / Nvtm, condmask_false1);
+              {
+                Vec4d val = T0 + ckt->CKTgmin;
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32gbs = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32gbs = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32gbs = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32gbs = val[3];
+
+              }
+              {
+                Vec4d val = ((((Vec4d ){heres[0]->BSIM3v32IsEvjsm, heres[1]->BSIM3v32IsEvjsm, heres[2]->BSIM3v32IsEvjsm, heres[3]->BSIM3v32IsEvjsm}) - SourceSatCurrent) + (T0 * (vbs - ((Vec4d ){heres[0]->BSIM3v32vjsm, heres[1]->BSIM3v32vjsm, heres[2]->BSIM3v32vjsm, heres[3]->BSIM3v32vjsm})))) + (ckt->CKTgmin * vbs);
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32cbs = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32cbs = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32cbs = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32cbs = val[3];
+
+              }
+                break;
+
+              case BSIM3v32V32:
+
+              default:
+                T0 = vec4_blend(T0, (SourceSatCurrent + model->BSIM3v32ijth) / Nvtm, condmask_false1);
+              {
+                Vec4d val = T0 + ckt->CKTgmin;
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32gbs = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32gbs = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32gbs = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32gbs = val[3];
+
+              }
+              {
+                Vec4d val = (model->BSIM3v32ijth + (ckt->CKTgmin * vbs)) + (T0 * (vbs - ((Vec4d ){heres[0]->BSIM3v32vjsm, heres[1]->BSIM3v32vjsm, heres[2]->BSIM3v32vjsm, heres[3]->BSIM3v32vjsm})));
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32cbs = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32cbs = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32cbs = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32cbs = val[3];
+
+              }
+
+            }
+
+          }
+        }
+
+      }
+
+    }
+  }
+
+  if (1)
+  {
+    Vec4m condmask0 = DrainSatCurrent <= 0.0;
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      {
+        if (condmask_true0[0])
+          heres[0]->BSIM3v32gbd = ckt->CKTgmin;
+
+        if (condmask_true0[1])
+          heres[1]->BSIM3v32gbd = ckt->CKTgmin;
+
+        if (condmask_true0[2])
+          heres[2]->BSIM3v32gbd = ckt->CKTgmin;
+
+        if (condmask_true0[3])
+          heres[3]->BSIM3v32gbd = ckt->CKTgmin;
+
+      }
+      {
+        Vec4d val = ((Vec4d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd}) * vbd;
+        if (condmask_true0[0])
+          heres[0]->BSIM3v32cbd = val[0];
+
+        if (condmask_true0[1])
+          heres[1]->BSIM3v32cbd = val[1];
+
+        if (condmask_true0[2])
+          heres[2]->BSIM3v32cbd = val[2];
+
+        if (condmask_true0[3])
+          heres[3]->BSIM3v32cbd = val[3];
+
+      }
+    }
+    {
+      if (model->BSIM3v32ijth == 0.0)
+      {
+        evbd = vec4_blend(evbd, vec4_exp(vbd / Nvtm), condmask_false0);
+        {
+          Vec4d val = ((DrainSatCurrent * evbd) / Nvtm) + ckt->CKTgmin;
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32gbd = val[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32gbd = val[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32gbd = val[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32gbd = val[3];
+
+        }
+        {
+          Vec4d val = (DrainSatCurrent * (evbd - 1.0)) + (ckt->CKTgmin * vbd);
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32cbd = val[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32cbd = val[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32cbd = val[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32cbd = val[3];
+
+        }
+      }
+      else
+      {
+        if (1)
+        {
+          Vec4m condmask1 = vbd < ((Vec4d ){heres[0]->BSIM3v32vjdm, heres[1]->BSIM3v32vjdm, heres[2]->BSIM3v32vjdm, heres[3]->BSIM3v32vjdm});
+          Vec4m condmask_true1 = condmask_false0 & condmask1;
+          Vec4m condmask_false1 = condmask_false0 & (~condmask1);
+          {
+            evbd = vec4_blend(evbd, vec4_exp(vbd / Nvtm), condmask_true1);
+            {
+              Vec4d val = ((DrainSatCurrent * evbd) / Nvtm) + ckt->CKTgmin;
+              if (condmask_true1[0])
+                heres[0]->BSIM3v32gbd = val[0];
+
+              if (condmask_true1[1])
+                heres[1]->BSIM3v32gbd = val[1];
+
+              if (condmask_true1[2])
+                heres[2]->BSIM3v32gbd = val[2];
+
+              if (condmask_true1[3])
+                heres[3]->BSIM3v32gbd = val[3];
+
+            }
+            {
+              Vec4d val = (DrainSatCurrent * (evbd - 1.0)) + (ckt->CKTgmin * vbd);
+              if (condmask_true1[0])
+                heres[0]->BSIM3v32cbd = val[0];
+
+              if (condmask_true1[1])
+                heres[1]->BSIM3v32cbd = val[1];
+
+              if (condmask_true1[2])
+                heres[2]->BSIM3v32cbd = val[2];
+
+              if (condmask_true1[3])
+                heres[3]->BSIM3v32cbd = val[3];
+
+            }
+          }
+          {
+            switch (model->BSIM3v32intVersion)
+            {
+              case BSIM3v32V324:
+
+              case BSIM3v32V323:
+
+              case BSIM3v32V322:
+                T0 = vec4_blend(T0, ((Vec4d ){heres[0]->BSIM3v32IsEvjdm, heres[1]->BSIM3v32IsEvjdm, heres[2]->BSIM3v32IsEvjdm, heres[3]->BSIM3v32IsEvjdm}) / Nvtm, condmask_false1);
+              {
+                Vec4d val = T0 + ckt->CKTgmin;
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32gbd = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32gbd = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32gbd = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32gbd = val[3];
+
+              }
+              {
+                Vec4d val = ((((Vec4d ){heres[0]->BSIM3v32IsEvjdm, heres[1]->BSIM3v32IsEvjdm, heres[2]->BSIM3v32IsEvjdm, heres[3]->BSIM3v32IsEvjdm}) - DrainSatCurrent) + (T0 * (vbd - ((Vec4d ){heres[0]->BSIM3v32vjdm, heres[1]->BSIM3v32vjdm, heres[2]->BSIM3v32vjdm, heres[3]->BSIM3v32vjdm})))) + (ckt->CKTgmin * vbd);
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32cbd = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32cbd = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32cbd = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32cbd = val[3];
+
+              }
+                break;
+
+              case BSIM3v32V32:
+
+              default:
+                T0 = vec4_blend(T0, (DrainSatCurrent + model->BSIM3v32ijth) / Nvtm, condmask_false1);
+              {
+                Vec4d val = T0 + ckt->CKTgmin;
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32gbd = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32gbd = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32gbd = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32gbd = val[3];
+
+              }
+              {
+                Vec4d val = (model->BSIM3v32ijth + (ckt->CKTgmin * vbd)) + (T0 * (vbd - ((Vec4d ){heres[0]->BSIM3v32vjdm, heres[1]->BSIM3v32vjdm, heres[2]->BSIM3v32vjdm, heres[3]->BSIM3v32vjdm})));
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32cbd = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32cbd = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32cbd = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32cbd = val[3];
+
+              }
+
+            }
+
+          }
+        }
+
+      }
+
+    }
+  }
+
+  BSIM3v32mode = vds >= 0.0;
+  if (1)
+  {
+    Vec4m condmask0 = BSIM3v32mode;
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      Vds = vec4_blend(Vds, vds, condmask_true0);
+      Vgs = vec4_blend(Vgs, vgs, condmask_true0);
+      Vbs = vec4_blend(Vbs, vbs, condmask_true0);
+    }
+    {
+      Vds = vec4_blend(Vds, -vds, condmask_false0);
+      Vgs = vec4_blend(Vgs, vgd, condmask_false0);
+      Vbs = vec4_blend(Vbs, vbd, condmask_false0);
+    }
+  }
+
+  {
+    Vec4m modesym;
+    modesym = (2 * (BSIM3v32mode & 0x1)) - 1;
+    {
+      heres[0]->BSIM3v32mode = modesym[0];
+      heres[1]->BSIM3v32mode = modesym[1];
+      heres[2]->BSIM3v32mode = modesym[2];
+      heres[3]->BSIM3v32mode = modesym[3];
+    }
+  }
+  T0 = (Vbs - pParam->BSIM3v32vbsc) - 0.001;
+  T1 = vec4_sqrt((T0 * T0) - (0.004 * pParam->BSIM3v32vbsc));
+  Vbseff = pParam->BSIM3v32vbsc + (0.5 * (T0 + T1));
+  dVbseff_dVb = 0.5 * (1.0 + (T0 / T1));
+  if (1)
+  {
+    Vec4m condmask0 = Vbseff < Vbs;
+    Vec4m condmask_true0 = condmask0;
+    {
+      Vbseff = vec4_blend(Vbseff, Vbs, condmask_true0);
+    }
+  }
+
+  if (1)
+  {
+    Vec4m condmask0 = Vbseff > 0.0;
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      T0 = vec4_blend(T0, pParam->BSIM3v32phi / (pParam->BSIM3v32phi + Vbseff), condmask_true0);
+      Phis = vec4_blend(Phis, pParam->BSIM3v32phi * T0, condmask_true0);
+      dPhis_dVb = vec4_blend(dPhis_dVb, (-T0) * T0, condmask_true0);
+      sqrtPhis = vec4_blend(sqrtPhis, pParam->BSIM3v32phis3 / (pParam->BSIM3v32phi + (0.5 * Vbseff)), condmask_true0);
+      dsqrtPhis_dVb = vec4_blend(dsqrtPhis_dVb, (((-0.5) * sqrtPhis) * sqrtPhis) / pParam->BSIM3v32phis3, condmask_true0);
+    }
+    {
+      Phis = vec4_blend(Phis, pParam->BSIM3v32phi - Vbseff, condmask_false0);
+      dPhis_dVb = vec4_blend(dPhis_dVb, vec4_SIMDTOVECTOR(-1.0), condmask_false0);
+      sqrtPhis = vec4_blend(sqrtPhis, vec4_sqrt(Phis), condmask_false0);
+      dsqrtPhis_dVb = vec4_blend(dsqrtPhis_dVb, (-0.5) / sqrtPhis, condmask_false0);
+    }
+  }
+
+  Xdep = (pParam->BSIM3v32Xdep0 * sqrtPhis) / pParam->BSIM3v32sqrtPhi;
+  dXdep_dVb = (pParam->BSIM3v32Xdep0 / pParam->BSIM3v32sqrtPhi) * dsqrtPhis_dVb;
+  Leff = pParam->BSIM3v32leff;
+  Vtm = model->BSIM3v32vtm;
+  T3 = vec4_sqrt(Xdep);
+  V0 = pParam->BSIM3v32vbi - pParam->BSIM3v32phi;
+  T0 = pParam->BSIM3v32dvt2 * Vbseff;
+  T2 = vec4_SIMDTOVECTOR(pParam->BSIM3v32dvt2);
+  if (1)
+  {
+    Vec4m condmask0 = T0 >= (-0.5);
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      T1 = vec4_blend(T1, 1.0 + T0, condmask_true0);
+    }
+    {
+      T4 = vec4_blend(T4, 1.0 / (3.0 + (8.0 * T0)), condmask_false0);
+      T1 = vec4_blend(T1, (1.0 + (3.0 * T0)) * T4, condmask_false0);
+      T2 = vec4_blend(T2, (T2 * T4) * T4, condmask_false0);
+    }
+  }
+
+  lt1 = (model->BSIM3v32factor1 * T3) * T1;
+  dlt1_dVb = model->BSIM3v32factor1 * ((((0.5 / T3) * T1) * dXdep_dVb) + (T3 * T2));
+  T0 = pParam->BSIM3v32dvt2w * Vbseff;
+  if (1)
+  {
+    Vec4m condmask0 = T0 >= (-0.5);
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      T1 = vec4_blend(T1, 1.0 + T0, condmask_true0);
+      T2 = vec4_blend(T2, vec4_SIMDTOVECTOR(pParam->BSIM3v32dvt2w), condmask_true0);
+    }
+    {
+      T4 = vec4_blend(T4, 1.0 / (3.0 + (8.0 * T0)), condmask_false0);
+      T1 = vec4_blend(T1, (1.0 + (3.0 * T0)) * T4, condmask_false0);
+      T2 = vec4_blend(T2, (pParam->BSIM3v32dvt2w * T4) * T4, condmask_false0);
+    }
+  }
+
+  ltw = (model->BSIM3v32factor1 * T3) * T1;
+  dltw_dVb = model->BSIM3v32factor1 * ((((0.5 / T3) * T1) * dXdep_dVb) + (T3 * T2));
+  T0 = (((-0.5) * pParam->BSIM3v32dvt1) * Leff) / lt1;
+  if (1)
+  {
+    Vec4m condmask0 = T0 > (-EXP_THRESHOLD);
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      T1 = vec4_blend(T1, vec4_exp(T0), condmask_true0);
+      Theta0 = vec4_blend(Theta0, T1 * (1.0 + (2.0 * T1)), condmask_true0);
+      dT1_dVb = vec4_blend(dT1_dVb, (((-T0) / lt1) * T1) * dlt1_dVb, condmask_true0);
+      dTheta0_dVb = vec4_blend(dTheta0_dVb, (1.0 + (4.0 * T1)) * dT1_dVb, condmask_true0);
+    }
+    {
+      T1 = vec4_blend(T1, vec4_SIMDTOVECTOR(MIN_EXP), condmask_false0);
+      Theta0 = vec4_blend(Theta0, T1 * (1.0 + (2.0 * T1)), condmask_false0);
+      dTheta0_dVb = vec4_blend(dTheta0_dVb, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+    }
+  }
+
+  Delt_vth = (pParam->BSIM3v32dvt0 * Theta0) * V0;
+  dDelt_vth_dVb = (pParam->BSIM3v32dvt0 * dTheta0_dVb) * V0;
+  T0 = ((((-0.5) * pParam->BSIM3v32dvt1w) * pParam->BSIM3v32weff) * Leff) / ltw;
+  if (1)
+  {
+    Vec4m condmask0 = T0 > (-EXP_THRESHOLD);
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      T1 = vec4_blend(T1, vec4_exp(T0), condmask_true0);
+      T2 = vec4_blend(T2, T1 * (1.0 + (2.0 * T1)), condmask_true0);
+      dT1_dVb = vec4_blend(dT1_dVb, (((-T0) / ltw) * T1) * dltw_dVb, condmask_true0);
+      dT2_dVb = vec4_blend(dT2_dVb, (1.0 + (4.0 * T1)) * dT1_dVb, condmask_true0);
+    }
+    {
+      T1 = vec4_blend(T1, vec4_SIMDTOVECTOR(MIN_EXP), condmask_false0);
+      T2 = vec4_blend(T2, T1 * (1.0 + (2.0 * T1)), condmask_false0);
+      dT2_dVb = vec4_blend(dT2_dVb, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+    }
+  }
+
+  T0 = pParam->BSIM3v32dvt0w * T2;
+  T2 = T0 * V0;
+  dT2_dVb = (pParam->BSIM3v32dvt0w * dT2_dVb) * V0;
+  TempRatio = (ckt->CKTtemp / model->BSIM3v32tnom) - 1.0;
+  T0 = vec4_SIMDTOVECTOR(sqrt(1.0 + (pParam->BSIM3v32nlx / Leff)));
+  T1 = ((pParam->BSIM3v32k1ox * (T0 - 1.0)) * pParam->BSIM3v32sqrtPhi) + (((pParam->BSIM3v32kt1 + (pParam->BSIM3v32kt1l / Leff)) + (pParam->BSIM3v32kt2 * Vbseff)) * TempRatio);
+  tmp2 = vec4_SIMDTOVECTOR((model->BSIM3v32tox * pParam->BSIM3v32phi) / (pParam->BSIM3v32weff + pParam->BSIM3v32w0));
+  T3 = pParam->BSIM3v32eta0 + (pParam->BSIM3v32etab * Vbseff);
+  if (1)
+  {
+    Vec4m condmask0 = T3 < 1.0e-4;
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      T9 = vec4_blend(T9, 1.0 / (3.0 - (2.0e4 * T3)), condmask_true0);
+      T3 = vec4_blend(T3, (2.0e-4 - T3) * T9, condmask_true0);
+      T4 = vec4_blend(T4, T9 * T9, condmask_true0);
+    }
+    {
+      T4 = vec4_blend(T4, vec4_SIMDTOVECTOR(1.0), condmask_false0);
+    }
+  }
+
+  dDIBL_Sft_dVd = T3 * pParam->BSIM3v32theta0vb0;
+  DIBL_Sft = dDIBL_Sft_dVd * Vds;
+  Vth = ((((((((model->BSIM3v32type * ((Vec4d ){heres[0]->BSIM3v32vth0, heres[1]->BSIM3v32vth0, heres[2]->BSIM3v32vth0, heres[3]->BSIM3v32vth0})) - (pParam->BSIM3v32k1 * pParam->BSIM3v32sqrtPhi)) + (pParam->BSIM3v32k1ox * sqrtPhis)) - (pParam->BSIM3v32k2ox * Vbseff)) - Delt_vth) - T2) + ((pParam->BSIM3v32k3 + (pParam->BSIM3v32k3b * Vbseff)) * tmp2)) + T1) - DIBL_Sft;
+  {
+    heres[0]->BSIM3v32von = Vth[0];
+    heres[1]->BSIM3v32von = Vth[1];
+    heres[2]->BSIM3v32von = Vth[2];
+    heres[3]->BSIM3v32von = Vth[3];
+  }
+  dVth_dVb = ((((((pParam->BSIM3v32k1ox * dsqrtPhis_dVb) - pParam->BSIM3v32k2ox) - dDelt_vth_dVb) - dT2_dVb) + (pParam->BSIM3v32k3b * tmp2)) - (((pParam->BSIM3v32etab * Vds) * pParam->BSIM3v32theta0vb0) * T4)) + (pParam->BSIM3v32kt2 * TempRatio);
+  dVth_dVd = -dDIBL_Sft_dVd;
+  tmp2 = (pParam->BSIM3v32nfactor * EPSSI) / Xdep;
+  tmp3 = (pParam->BSIM3v32cdsc + (pParam->BSIM3v32cdscb * Vbseff)) + (pParam->BSIM3v32cdscd * Vds);
+  tmp4 = ((tmp2 + (tmp3 * Theta0)) + pParam->BSIM3v32cit) / model->BSIM3v32cox;
+  if (1)
+  {
+    Vec4m condmask0 = tmp4 >= (-0.5);
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      n = vec4_blend(n, 1.0 + tmp4, condmask_true0);
+      dn_dVb = vec4_blend(dn_dVb, (((((-tmp2) / Xdep) * dXdep_dVb) + (tmp3 * dTheta0_dVb)) + (pParam->BSIM3v32cdscb * Theta0)) / model->BSIM3v32cox, condmask_true0);
+      dn_dVd = vec4_blend(dn_dVd, (pParam->BSIM3v32cdscd * Theta0) / model->BSIM3v32cox, condmask_true0);
+    }
+    {
+      T0 = vec4_blend(T0, 1.0 / (3.0 + (8.0 * tmp4)), condmask_false0);
+      n = vec4_blend(n, (1.0 + (3.0 * tmp4)) * T0, condmask_false0);
+      T0 = vec4_blend(T0, T0 * T0, condmask_false0);
+      dn_dVb = vec4_blend(dn_dVb, ((((((-tmp2) / Xdep) * dXdep_dVb) + (tmp3 * dTheta0_dVb)) + (pParam->BSIM3v32cdscb * Theta0)) / model->BSIM3v32cox) * T0, condmask_false0);
+      dn_dVd = vec4_blend(dn_dVd, ((pParam->BSIM3v32cdscd * Theta0) / model->BSIM3v32cox) * T0, condmask_false0);
+    }
+  }
+
+  T0 = ((Vec4d ){heres[0]->BSIM3v32vfb, heres[1]->BSIM3v32vfb, heres[2]->BSIM3v32vfb, heres[3]->BSIM3v32vfb}) + pParam->BSIM3v32phi;
+  Vgs_eff = Vgs;
+  dVgs_eff_dVg = vec4_SIMDTOVECTOR(1.0);
+  if ((pParam->BSIM3v32ngate > 1.e18) && (pParam->BSIM3v32ngate < 1.e25))
+    if (1)
+  {
+    Vec4m condmask0 = Vgs > T0;
+    Vec4m condmask_true0 = condmask0;
+    {
+      T1 = vec4_blend(T1, vec4_SIMDTOVECTOR((((1.0e6 * Charge_q) * EPSSI) * pParam->BSIM3v32ngate) / (model->BSIM3v32cox * model->BSIM3v32cox)), condmask_true0);
+      T4 = vec4_blend(T4, vec4_sqrt(1.0 + ((2.0 * (Vgs - T0)) / T1)), condmask_true0);
+      T2 = vec4_blend(T2, T1 * (T4 - 1.0), condmask_true0);
+      T3 = vec4_blend(T3, ((0.5 * T2) * T2) / T1, condmask_true0);
+      T7 = vec4_blend(T7, (1.12 - T3) - 0.05, condmask_true0);
+      T6 = vec4_blend(T6, vec4_sqrt((T7 * T7) + 0.224), condmask_true0);
+      T5 = vec4_blend(T5, 1.12 - (0.5 * (T7 + T6)), condmask_true0);
+      Vgs_eff = vec4_blend(Vgs_eff, Vgs - T5, condmask_true0);
+      dVgs_eff_dVg = vec4_blend(dVgs_eff_dVg, 1.0 - ((0.5 - (0.5 / T4)) * (1.0 + (T7 / T6))), condmask_true0);
+    }
+  }
+
+
+  Vgst = Vgs_eff - Vth;
+  T10 = (2.0 * n) * Vtm;
+  VgstNVt = Vgst / T10;
+  ExpArg = ((2.0 * pParam->BSIM3v32voff) - Vgst) / T10;
+  T0 = VgstNVt;
+  if (1)
+  {
+    Vec4m condmask0 = ExpArg > EXP_THRESHOLD;
+    Vec4m condmask_true0 = condmask0;
+    T0 = vec4_blend(T0, (Vgst - pParam->BSIM3v32voff) / (n * Vtm), condmask_true0);
+  }
+
+  ExpVgst = vec4_exp(T0);
+  if (1)
+  {
+    Vec4m condmask0 = VgstNVt > EXP_THRESHOLD;
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      Vgsteff = vec4_blend(Vgsteff, Vgst, condmask_true0);
+      dVgsteff_dVg = vec4_blend(dVgsteff_dVg, dVgs_eff_dVg, condmask_true0);
+      dVgsteff_dVd = vec4_blend(dVgsteff_dVd, -dVth_dVd, condmask_true0);
+      dVgsteff_dVb = vec4_blend(dVgsteff_dVb, -dVth_dVb, condmask_true0);
+    }
+    if (1)
+    {
+      Vec4m condmask1 = ExpArg > EXP_THRESHOLD;
+      Vec4m condmask_true1 = condmask_false0 & condmask1;
+      Vec4m condmask_false1 = condmask_false0 & (~condmask1);
+      {
+        Vgsteff = vec4_blend(Vgsteff, ((Vtm * pParam->BSIM3v32cdep0) / model->BSIM3v32cox) * ExpVgst, condmask_true1);
+        dVgsteff_dVg = vec4_blend(dVgsteff_dVg, Vgsteff / (n * Vtm), condmask_true1);
+        dVgsteff_dVd = vec4_blend(dVgsteff_dVd, (-dVgsteff_dVg) * (dVth_dVd + ((T0 * Vtm) * dn_dVd)), condmask_true1);
+        dVgsteff_dVb = vec4_blend(dVgsteff_dVb, (-dVgsteff_dVg) * (dVth_dVb + ((T0 * Vtm) * dn_dVb)), condmask_true1);
+        dVgsteff_dVg = vec4_blend(dVgsteff_dVg, dVgsteff_dVg * dVgs_eff_dVg, condmask_true1);
+      }
+      {
+        T1 = vec4_blend(T1, T10 * vec4_log(1.0 + ExpVgst), condmask_false1);
+        dT1_dVg = vec4_blend(dT1_dVg, ExpVgst / (1.0 + ExpVgst), condmask_false1);
+        dT1_dVb = vec4_blend(dT1_dVb, ((-dT1_dVg) * (dVth_dVb + ((Vgst / n) * dn_dVb))) + ((T1 / n) * dn_dVb), condmask_false1);
+        dT1_dVd = vec4_blend(dT1_dVd, ((-dT1_dVg) * (dVth_dVd + ((Vgst / n) * dn_dVd))) + ((T1 / n) * dn_dVd), condmask_false1);
+        dT2_dVg = vec4_blend(dT2_dVg, ((-model->BSIM3v32cox) / (Vtm * pParam->BSIM3v32cdep0)) * vec4_exp(ExpArg), condmask_false1);
+        T2 = vec4_blend(T2, 1.0 - (T10 * dT2_dVg), condmask_false1);
+        dT2_dVd = vec4_blend(dT2_dVd, ((-dT2_dVg) * (dVth_dVd - (((2.0 * Vtm) * ExpArg) * dn_dVd))) + (((T2 - 1.0) / n) * dn_dVd), condmask_false1);
+        dT2_dVb = vec4_blend(dT2_dVb, ((-dT2_dVg) * (dVth_dVb - (((2.0 * Vtm) * ExpArg) * dn_dVb))) + (((T2 - 1.0) / n) * dn_dVb), condmask_false1);
+        Vgsteff = vec4_blend(Vgsteff, T1 / T2, condmask_false1);
+        T3 = vec4_blend(T3, T2 * T2, condmask_false1);
+        dVgsteff_dVg = vec4_blend(dVgsteff_dVg, (((T2 * dT1_dVg) - (T1 * dT2_dVg)) / T3) * dVgs_eff_dVg, condmask_false1);
+        dVgsteff_dVd = vec4_blend(dVgsteff_dVd, ((T2 * dT1_dVd) - (T1 * dT2_dVd)) / T3, condmask_false1);
+        dVgsteff_dVb = vec4_blend(dVgsteff_dVb, ((T2 * dT1_dVb) - (T1 * dT2_dVb)) / T3, condmask_false1);
+      }
+    }
+
+  }
+
+  if (model->BSIM3v32intVersion > BSIM3v32V323)
+  {
+    {
+      heres[0]->BSIM3v32Vgsteff = Vgsteff[0];
+      heres[1]->BSIM3v32Vgsteff = Vgsteff[1];
+      heres[2]->BSIM3v32Vgsteff = Vgsteff[2];
+      heres[3]->BSIM3v32Vgsteff = Vgsteff[3];
+    }
+  }
+
+  T9 = sqrtPhis - pParam->BSIM3v32sqrtPhi;
+  Weff = pParam->BSIM3v32weff - (2.0 * ((pParam->BSIM3v32dwg * Vgsteff) + (pParam->BSIM3v32dwb * T9)));
+  dWeff_dVg = vec4_SIMDTOVECTOR((-2.0) * pParam->BSIM3v32dwg);
+  dWeff_dVb = ((-2.0) * pParam->BSIM3v32dwb) * dsqrtPhis_dVb;
+  if (1)
+  {
+    Vec4m condmask0 = Weff < 2.0e-8;
+    Vec4m condmask_true0 = condmask0;
+    {
+      T0 = vec4_blend(T0, 1.0 / (6.0e-8 - (2.0 * Weff)), condmask_true0);
+      Weff = vec4_blend(Weff, (2.0e-8 * (4.0e-8 - Weff)) * T0, condmask_true0);
+      T0 = vec4_blend(T0, T0 * (T0 * 4.0e-16), condmask_true0);
+      dWeff_dVg = vec4_blend(dWeff_dVg, dWeff_dVg * T0, condmask_true0);
+      dWeff_dVb = vec4_blend(dWeff_dVb, dWeff_dVb * T0, condmask_true0);
+    }
+  }
+
+  T0 = (pParam->BSIM3v32prwg * Vgsteff) + (pParam->BSIM3v32prwb * T9);
+  if (1)
+  {
+    Vec4m condmask0 = T0 >= (-0.9);
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      Rds = vec4_blend(Rds, pParam->BSIM3v32rds0 * (1.0 + T0), condmask_true0);
+      dRds_dVg = vec4_blend(dRds_dVg, vec4_SIMDTOVECTOR(pParam->BSIM3v32rds0 * pParam->BSIM3v32prwg), condmask_true0);
+      dRds_dVb = vec4_blend(dRds_dVb, (pParam->BSIM3v32rds0 * pParam->BSIM3v32prwb) * dsqrtPhis_dVb, condmask_true0);
+    }
+    {
+      T1 = vec4_blend(T1, 1.0 / (17.0 + (20.0 * T0)), condmask_false0);
+      Rds = vec4_blend(Rds, (pParam->BSIM3v32rds0 * (0.8 + T0)) * T1, condmask_false0);
+      T1 = vec4_blend(T1, T1 * T1, condmask_false0);
+      dRds_dVg = vec4_blend(dRds_dVg, (pParam->BSIM3v32rds0 * pParam->BSIM3v32prwg) * T1, condmask_false0);
+      dRds_dVb = vec4_blend(dRds_dVb, ((pParam->BSIM3v32rds0 * pParam->BSIM3v32prwb) * dsqrtPhis_dVb) * T1, condmask_false0);
+    }
+  }
+
+  if (model->BSIM3v32intVersion > BSIM3v32V323)
+  {
+    {
+      heres[0]->BSIM3v32rds = Rds[0];
+      heres[1]->BSIM3v32rds = Rds[1];
+      heres[2]->BSIM3v32rds = Rds[2];
+      heres[3]->BSIM3v32rds = Rds[3];
+    }
+  }
+
+  T1 = (0.5 * pParam->BSIM3v32k1ox) / sqrtPhis;
+  dT1_dVb = ((-T1) / sqrtPhis) * dsqrtPhis_dVb;
+  T9 = vec4_sqrt(pParam->BSIM3v32xj * Xdep);
+  tmp1 = Leff + (2.0 * T9);
+  T5 = Leff / tmp1;
+  tmp2 = pParam->BSIM3v32a0 * T5;
+  tmp3 = vec4_SIMDTOVECTOR(pParam->BSIM3v32weff + pParam->BSIM3v32b1);
+  tmp4 = pParam->BSIM3v32b0 / tmp3;
+  T2 = tmp2 + tmp4;
+  dT2_dVb = (((-T9) / tmp1) / Xdep) * dXdep_dVb;
+  T6 = T5 * T5;
+  T7 = T5 * T6;
+  Abulk0 = 1.0 + (T1 * T2);
+  dAbulk0_dVb = ((T1 * tmp2) * dT2_dVb) + (T2 * dT1_dVb);
+  T8 = (pParam->BSIM3v32ags * pParam->BSIM3v32a0) * T7;
+  dAbulk_dVg = (-T1) * T8;
+  Abulk = Abulk0 + (dAbulk_dVg * Vgsteff);
+  dAbulk_dVb = dAbulk0_dVb - ((T8 * Vgsteff) * (dT1_dVb + ((3.0 * T1) * dT2_dVb)));
+  if (1)
+  {
+    Vec4m condmask0 = Abulk0 < 0.1;
+    Vec4m condmask_true0 = condmask0;
+    {
+      T9 = vec4_blend(T9, 1.0 / (3.0 - (20.0 * Abulk0)), condmask_true0);
+      Abulk0 = vec4_blend(Abulk0, (0.2 - Abulk0) * T9, condmask_true0);
+      dAbulk0_dVb = vec4_blend(dAbulk0_dVb, dAbulk0_dVb * (T9 * T9), condmask_true0);
+    }
+  }
+
+  if (1)
+  {
+    Vec4m condmask0 = Abulk < 0.1;
+    Vec4m condmask_true0 = condmask0;
+    {
+      T9 = vec4_blend(T9, 1.0 / (3.0 - (20.0 * Abulk)), condmask_true0);
+      Abulk = vec4_blend(Abulk, (0.2 - Abulk) * T9, condmask_true0);
+      if (model->BSIM3v32intVersion > BSIM3v32V32)
+      {
+        T10 = vec4_blend(T10, T9 * T9, condmask_true0);
+        dAbulk_dVb = vec4_blend(dAbulk_dVb, dAbulk_dVb * T10, condmask_true0);
+        dAbulk_dVg = vec4_blend(dAbulk_dVg, dAbulk_dVg * T10, condmask_true0);
+      }
+      else
+      {
+        dAbulk_dVb = vec4_blend(dAbulk_dVb, dAbulk_dVb * (T9 * T9), condmask_true0);
+      }
+
+    }
+  }
+
+  if (model->BSIM3v32intVersion > BSIM3v32V323)
+  {
+    {
+      heres[0]->BSIM3v32Abulk = Abulk[0];
+      heres[1]->BSIM3v32Abulk = Abulk[1];
+      heres[2]->BSIM3v32Abulk = Abulk[2];
+      heres[3]->BSIM3v32Abulk = Abulk[3];
+    }
+  }
+
+  T2 = pParam->BSIM3v32keta * Vbseff;
+  if (1)
+  {
+    Vec4m condmask0 = T2 >= (-0.9);
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      T0 = vec4_blend(T0, 1.0 / (1.0 + T2), condmask_true0);
+      dT0_dVb = vec4_blend(dT0_dVb, ((-pParam->BSIM3v32keta) * T0) * T0, condmask_true0);
+    }
+    {
+      T1 = vec4_blend(T1, 1.0 / (0.8 + T2), condmask_false0);
+      T0 = vec4_blend(T0, (17.0 + (20.0 * T2)) * T1, condmask_false0);
+      dT0_dVb = vec4_blend(dT0_dVb, ((-pParam->BSIM3v32keta) * T1) * T1, condmask_false0);
+    }
+  }
+
+  dAbulk_dVg *= T0;
+  dAbulk_dVb = (dAbulk_dVb * T0) + (Abulk * dT0_dVb);
+  dAbulk0_dVb = (dAbulk0_dVb * T0) + (Abulk0 * dT0_dVb);
+  Abulk *= T0;
+  Abulk0 *= T0;
+  if (model->BSIM3v32mobMod == 1)
+  {
+    T0 = (Vgsteff + Vth) + Vth;
+    T2 = pParam->BSIM3v32ua + (pParam->BSIM3v32uc * Vbseff);
+    T3 = T0 / model->BSIM3v32tox;
+    T5 = T3 * (T2 + (pParam->BSIM3v32ub * T3));
+    dDenomi_dVg = (T2 + ((2.0 * pParam->BSIM3v32ub) * T3)) / model->BSIM3v32tox;
+    dDenomi_dVd = (dDenomi_dVg * 2.0) * dVth_dVd;
+    dDenomi_dVb = ((dDenomi_dVg * 2.0) * dVth_dVb) + (pParam->BSIM3v32uc * T3);
+  }
+  else
+    if (model->BSIM3v32mobMod == 2)
+  {
+    T5 = (Vgsteff / model->BSIM3v32tox) * ((pParam->BSIM3v32ua + (pParam->BSIM3v32uc * Vbseff)) + ((pParam->BSIM3v32ub * Vgsteff) / model->BSIM3v32tox));
+    dDenomi_dVg = ((pParam->BSIM3v32ua + (pParam->BSIM3v32uc * Vbseff)) + (((2.0 * pParam->BSIM3v32ub) * Vgsteff) / model->BSIM3v32tox)) / model->BSIM3v32tox;
+    dDenomi_dVd = vec4_SIMDTOVECTOR(0.0);
+    dDenomi_dVb = (Vgsteff * pParam->BSIM3v32uc) / model->BSIM3v32tox;
+  }
+  else
+  {
+    T0 = (Vgsteff + Vth) + Vth;
+    T2 = 1.0 + (pParam->BSIM3v32uc * Vbseff);
+    T3 = T0 / model->BSIM3v32tox;
+    T4 = T3 * (pParam->BSIM3v32ua + (pParam->BSIM3v32ub * T3));
+    T5 = T4 * T2;
+    dDenomi_dVg = ((pParam->BSIM3v32ua + ((2.0 * pParam->BSIM3v32ub) * T3)) * T2) / model->BSIM3v32tox;
+    dDenomi_dVd = (dDenomi_dVg * 2.0) * dVth_dVd;
+    dDenomi_dVb = ((dDenomi_dVg * 2.0) * dVth_dVb) + (pParam->BSIM3v32uc * T4);
+  }
+
+
+  if (1)
+  {
+    Vec4m condmask0 = T5 >= (-0.8);
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      Denomi = vec4_blend(Denomi, 1.0 + T5, condmask_true0);
+    }
+    {
+      T9 = vec4_blend(T9, 1.0 / (7.0 + (10.0 * T5)), condmask_false0);
+      Denomi = vec4_blend(Denomi, (0.6 + T5) * T9, condmask_false0);
+      T9 = vec4_blend(T9, T9 * T9, condmask_false0);
+      dDenomi_dVg = vec4_blend(dDenomi_dVg, dDenomi_dVg * T9, condmask_false0);
+      dDenomi_dVd = vec4_blend(dDenomi_dVd, dDenomi_dVd * T9, condmask_false0);
+      dDenomi_dVb = vec4_blend(dDenomi_dVb, dDenomi_dVb * T9, condmask_false0);
+    }
+  }
+
+  {
+    Vec4d val = ueff = ((Vec4d ){heres[0]->BSIM3v32u0temp, heres[1]->BSIM3v32u0temp, heres[2]->BSIM3v32u0temp, heres[3]->BSIM3v32u0temp}) / Denomi;
+    heres[0]->BSIM3v32ueff = val[0];
+    heres[1]->BSIM3v32ueff = val[1];
+    heres[2]->BSIM3v32ueff = val[2];
+    heres[3]->BSIM3v32ueff = val[3];
+  }
+  T9 = (-ueff) / Denomi;
+  dueff_dVg = T9 * dDenomi_dVg;
+  dueff_dVd = T9 * dDenomi_dVd;
+  dueff_dVb = T9 * dDenomi_dVb;
+  WVCox = (Weff * pParam->BSIM3v32vsattemp) * model->BSIM3v32cox;
+  WVCoxRds = WVCox * Rds;
+  Esat = (2.0 * pParam->BSIM3v32vsattemp) / ueff;
+  EsatL = Esat * Leff;
+  T0 = (-EsatL) / ueff;
+  dEsatL_dVg = T0 * dueff_dVg;
+  dEsatL_dVd = T0 * dueff_dVd;
+  dEsatL_dVb = T0 * dueff_dVb;
+  a1 = pParam->BSIM3v32a1;
+  if (a1 == 0.0)
+  {
+    Lambda = vec4_SIMDTOVECTOR(pParam->BSIM3v32a2);
+    dLambda_dVg = vec4_SIMDTOVECTOR(0.0);
+  }
+  else
+    if (a1 > 0.0)
+  {
+    T0 = vec4_SIMDTOVECTOR(1.0 - pParam->BSIM3v32a2);
+    T1 = (T0 - (pParam->BSIM3v32a1 * Vgsteff)) - 0.0001;
+    T2 = vec4_sqrt((T1 * T1) + (0.0004 * T0));
+    Lambda = (pParam->BSIM3v32a2 + T0) - (0.5 * (T1 + T2));
+    dLambda_dVg = (0.5 * pParam->BSIM3v32a1) * (1.0 + (T1 / T2));
+  }
+  else
+  {
+    T1 = (pParam->BSIM3v32a2 + (pParam->BSIM3v32a1 * Vgsteff)) - 0.0001;
+    T2 = vec4_sqrt((T1 * T1) + (0.0004 * pParam->BSIM3v32a2));
+    Lambda = 0.5 * (T1 + T2);
+    dLambda_dVg = (0.5 * pParam->BSIM3v32a1) * (1.0 + (T1 / T2));
+  }
+
+
+  Vgst2Vtm = Vgsteff + (2.0 * Vtm);
+  if (model->BSIM3v32intVersion > BSIM3v32V323)
+  {
+    {
+      Vec4d val = Abulk / Vgst2Vtm;
+      heres[0]->BSIM3v32AbovVgst2Vtm = val[0];
+      heres[1]->BSIM3v32AbovVgst2Vtm = val[1];
+      heres[2]->BSIM3v32AbovVgst2Vtm = val[2];
+      heres[3]->BSIM3v32AbovVgst2Vtm = val[3];
+    }
+  }
+
+  if (1)
+  {
+    Vec4m condmask0 = Rds > 0;
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      tmp2 = vec4_blend(tmp2, (dRds_dVg / Rds) + (dWeff_dVg / Weff), condmask_true0);
+      tmp3 = vec4_blend(tmp3, (dRds_dVb / Rds) + (dWeff_dVb / Weff), condmask_true0);
+    }
+    {
+      tmp2 = vec4_blend(tmp2, dWeff_dVg / Weff, condmask_false0);
+      tmp3 = vec4_blend(tmp3, dWeff_dVb / Weff, condmask_false0);
+    }
+  }
+
+  if (1)
+  {
+    Vec4m condmask0 = (Rds == 0.0) & (Lambda == 1.0);
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      T0 = vec4_blend(T0, 1.0 / ((Abulk * EsatL) + Vgst2Vtm), condmask_true0);
+      tmp1 = vec4_blend(tmp1, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+      T1 = vec4_blend(T1, T0 * T0, condmask_true0);
+      T2 = vec4_blend(T2, Vgst2Vtm * T0, condmask_true0);
+      T3 = vec4_blend(T3, EsatL * Vgst2Vtm, condmask_true0);
+      Vdsat = vec4_blend(Vdsat, T3 * T0, condmask_true0);
+      dT0_dVg = vec4_blend(dT0_dVg, (-(((Abulk * dEsatL_dVg) + (EsatL * dAbulk_dVg)) + 1.0)) * T1, condmask_true0);
+      dT0_dVd = vec4_blend(dT0_dVd, (-(Abulk * dEsatL_dVd)) * T1, condmask_true0);
+      dT0_dVb = vec4_blend(dT0_dVb, (-((Abulk * dEsatL_dVb) + (dAbulk_dVb * EsatL))) * T1, condmask_true0);
+      dVdsat_dVg = vec4_blend(dVdsat_dVg, ((T3 * dT0_dVg) + (T2 * dEsatL_dVg)) + (EsatL * T0), condmask_true0);
+      dVdsat_dVd = vec4_blend(dVdsat_dVd, (T3 * dT0_dVd) + (T2 * dEsatL_dVd), condmask_true0);
+      dVdsat_dVb = vec4_blend(dVdsat_dVb, (T3 * dT0_dVb) + (T2 * dEsatL_dVb), condmask_true0);
+    }
+    {
+      tmp1 = vec4_blend(tmp1, dLambda_dVg / (Lambda * Lambda), condmask_false0);
+      T9 = vec4_blend(T9, Abulk * WVCoxRds, condmask_false0);
+      T8 = vec4_blend(T8, Abulk * T9, condmask_false0);
+      T7 = vec4_blend(T7, Vgst2Vtm * T9, condmask_false0);
+      T6 = vec4_blend(T6, Vgst2Vtm * WVCoxRds, condmask_false0);
+      T0 = vec4_blend(T0, (2.0 * Abulk) * ((T9 - 1.0) + (1.0 / Lambda)), condmask_false0);
+      dT0_dVg = vec4_blend(dT0_dVg, 2.0 * (((T8 * tmp2) - (Abulk * tmp1)) + ((((2.0 * T9) + (1.0 / Lambda)) - 1.0) * dAbulk_dVg)), condmask_false0);
+      dT0_dVb = vec4_blend(dT0_dVb, 2.0 * ((T8 * (((2.0 / Abulk) * dAbulk_dVb) + tmp3)) + (((1.0 / Lambda) - 1.0) * dAbulk_dVb)), condmask_false0);
+      dT0_dVd = vec4_blend(dT0_dVd, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+      T1 = vec4_blend(T1, ((Vgst2Vtm * ((2.0 / Lambda) - 1.0)) + (Abulk * EsatL)) + (3.0 * T7), condmask_false0);
+      dT1_dVg = vec4_blend(dT1_dVg, (((((2.0 / Lambda) - 1.0) - ((2.0 * Vgst2Vtm) * tmp1)) + (Abulk * dEsatL_dVg)) + (EsatL * dAbulk_dVg)) + (3.0 * ((T9 + (T7 * tmp2)) + (T6 * dAbulk_dVg))), condmask_false0);
+      dT1_dVb = vec4_blend(dT1_dVb, ((Abulk * dEsatL_dVb) + (EsatL * dAbulk_dVb)) + (3.0 * ((T6 * dAbulk_dVb) + (T7 * tmp3))), condmask_false0);
+      dT1_dVd = vec4_blend(dT1_dVd, Abulk * dEsatL_dVd, condmask_false0);
+      T2 = vec4_blend(T2, Vgst2Vtm * (EsatL + (2.0 * T6)), condmask_false0);
+      dT2_dVg = vec4_blend(dT2_dVg, (EsatL + (Vgst2Vtm * dEsatL_dVg)) + (T6 * (4.0 + ((2.0 * Vgst2Vtm) * tmp2))), condmask_false0);
+      dT2_dVb = vec4_blend(dT2_dVb, Vgst2Vtm * (dEsatL_dVb + ((2.0 * T6) * tmp3)), condmask_false0);
+      dT2_dVd = vec4_blend(dT2_dVd, Vgst2Vtm * dEsatL_dVd, condmask_false0);
+      T3 = vec4_blend(T3, vec4_sqrt((T1 * T1) - ((2.0 * T0) * T2)), condmask_false0);
+      Vdsat = vec4_blend(Vdsat, (T1 - T3) / T0, condmask_false0);
+      dT3_dVg = vec4_blend(dT3_dVg, ((T1 * dT1_dVg) - (2.0 * ((T0 * dT2_dVg) + (T2 * dT0_dVg)))) / T3, condmask_false0);
+      dT3_dVd = vec4_blend(dT3_dVd, ((T1 * dT1_dVd) - (2.0 * ((T0 * dT2_dVd) + (T2 * dT0_dVd)))) / T3, condmask_false0);
+      dT3_dVb = vec4_blend(dT3_dVb, ((T1 * dT1_dVb) - (2.0 * ((T0 * dT2_dVb) + (T2 * dT0_dVb)))) / T3, condmask_false0);
+      dVdsat_dVg = vec4_blend(dVdsat_dVg, ((dT1_dVg - ((((T1 * dT1_dVg) - (dT0_dVg * T2)) - (T0 * dT2_dVg)) / T3)) - (Vdsat * dT0_dVg)) / T0, condmask_false0);
+      dVdsat_dVb = vec4_blend(dVdsat_dVb, ((dT1_dVb - ((((T1 * dT1_dVb) - (dT0_dVb * T2)) - (T0 * dT2_dVb)) / T3)) - (Vdsat * dT0_dVb)) / T0, condmask_false0);
+      dVdsat_dVd = vec4_blend(dVdsat_dVd, (dT1_dVd - (((T1 * dT1_dVd) - (T0 * dT2_dVd)) / T3)) / T0, condmask_false0);
+    }
+  }
+
+  {
+    heres[0]->BSIM3v32vdsat = Vdsat[0];
+    heres[1]->BSIM3v32vdsat = Vdsat[1];
+    heres[2]->BSIM3v32vdsat = Vdsat[2];
+    heres[3]->BSIM3v32vdsat = Vdsat[3];
+  }
+  T1 = (Vdsat - Vds) - pParam->BSIM3v32delta;
+  dT1_dVg = dVdsat_dVg;
+  dT1_dVd = dVdsat_dVd - 1.0;
+  dT1_dVb = dVdsat_dVb;
+  T2 = vec4_sqrt((T1 * T1) + ((4.0 * pParam->BSIM3v32delta) * Vdsat));
+  T0 = T1 / T2;
+  T3 = (2.0 * pParam->BSIM3v32delta) / T2;
+  dT2_dVg = (T0 * dT1_dVg) + (T3 * dVdsat_dVg);
+  dT2_dVd = (T0 * dT1_dVd) + (T3 * dVdsat_dVd);
+  dT2_dVb = (T0 * dT1_dVb) + (T3 * dVdsat_dVb);
+  Vdseff = Vdsat - (0.5 * (T1 + T2));
+  dVdseff_dVg = dVdsat_dVg - (0.5 * (dT1_dVg + dT2_dVg));
+  dVdseff_dVd = dVdsat_dVd - (0.5 * (dT1_dVd + dT2_dVd));
+  dVdseff_dVb = dVdsat_dVb - (0.5 * (dT1_dVb + dT2_dVb));
+  switch (model->BSIM3v32intVersion)
+  {
+    case BSIM3v32V324:
+
+    case BSIM3v32V323:
+
+    case BSIM3v32V322:
+      if (1)
+    {
+      Vec4m condmask0 = Vds == 0.0;
+      Vec4m condmask_true0 = condmask0;
+      {
+        Vdseff = vec4_blend(Vdseff, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+        dVdseff_dVg = vec4_blend(dVdseff_dVg, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+        dVdseff_dVb = vec4_blend(dVdseff_dVb, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+      }
+    }
+
+      break;
+
+    case BSIM3v32V32:
+
+    default:
+      break;
+
+  }
+
+  tmp4 = 1.0 - (((0.5 * Abulk) * Vdsat) / Vgst2Vtm);
+  T9 = WVCoxRds * Vgsteff;
+  T8 = T9 / Vgst2Vtm;
+  T0 = (EsatL + Vdsat) + ((2.0 * T9) * tmp4);
+  T7 = (2.0 * WVCoxRds) * tmp4;
+  dT0_dVg = ((dEsatL_dVg + dVdsat_dVg) + (T7 * (1.0 + (tmp2 * Vgsteff)))) - (T8 * (((Abulk * dVdsat_dVg) - ((Abulk * Vdsat) / Vgst2Vtm)) + (Vdsat * dAbulk_dVg)));
+  dT0_dVb = ((dEsatL_dVb + dVdsat_dVb) + ((T7 * tmp3) * Vgsteff)) - (T8 * ((dAbulk_dVb * Vdsat) + (Abulk * dVdsat_dVb)));
+  dT0_dVd = (dEsatL_dVd + dVdsat_dVd) - ((T8 * Abulk) * dVdsat_dVd);
+  T9 = WVCoxRds * Abulk;
+  T1 = ((2.0 / Lambda) - 1.0) + T9;
+  dT1_dVg = ((-2.0) * tmp1) + (WVCoxRds * ((Abulk * tmp2) + dAbulk_dVg));
+  dT1_dVb = (dAbulk_dVb * WVCoxRds) + (T9 * tmp3);
+  Vasat = T0 / T1;
+  dVasat_dVg = (dT0_dVg - (Vasat * dT1_dVg)) / T1;
+  dVasat_dVb = (dT0_dVb - (Vasat * dT1_dVb)) / T1;
+  dVasat_dVd = dT0_dVd / T1;
+  if (1)
+  {
+    Vec4m condmask0 = Vdseff > Vds;
+    Vec4m condmask_true0 = condmask0;
+    Vdseff = vec4_blend(Vdseff, Vds, condmask_true0);
+  }
+
+  diffVds = Vds - Vdseff;
+  if (model->BSIM3v32intVersion > BSIM3v32V323)
+  {
+    {
+      heres[0]->BSIM3v32Vdseff = Vdseff[0];
+      heres[1]->BSIM3v32Vdseff = Vdseff[1];
+      heres[2]->BSIM3v32Vdseff = Vdseff[2];
+      heres[3]->BSIM3v32Vdseff = Vdseff[3];
+    }
+  }
+
+  VACLM = vec4_SIMDTOVECTOR(MAX_EXP);
+  dVACLM_dVd = (dVACLM_dVg = (dVACLM_dVb = vec4_SIMDTOVECTOR(0.0)));
+  if (pParam->BSIM3v32pclm > 0.0)
+    if (1)
+  {
+    Vec4m condmask0 = diffVds > 1.0e-10;
+    Vec4m condmask_true0 = condmask0;
+    {
+      T0 = vec4_blend(T0, 1.0 / ((pParam->BSIM3v32pclm * Abulk) * pParam->BSIM3v32litl), condmask_true0);
+      dT0_dVb = vec4_blend(dT0_dVb, ((-T0) / Abulk) * dAbulk_dVb, condmask_true0);
+      dT0_dVg = vec4_blend(dT0_dVg, ((-T0) / Abulk) * dAbulk_dVg, condmask_true0);
+      T2 = vec4_blend(T2, Vgsteff / EsatL, condmask_true0);
+      T1 = vec4_blend(T1, Leff * (Abulk + T2), condmask_true0);
+      dT1_dVg = vec4_blend(dT1_dVg, Leff * (((1.0 - (T2 * dEsatL_dVg)) / EsatL) + dAbulk_dVg), condmask_true0);
+      dT1_dVb = vec4_blend(dT1_dVb, Leff * (dAbulk_dVb - ((T2 * dEsatL_dVb) / EsatL)), condmask_true0);
+      dT1_dVd = vec4_blend(dT1_dVd, ((-T2) * dEsatL_dVd) / Esat, condmask_true0);
+      T9 = vec4_blend(T9, T0 * T1, condmask_true0);
+      VACLM = vec4_blend(VACLM, T9 * diffVds, condmask_true0);
+      dVACLM_dVg = vec4_blend(dVACLM_dVg, (((T0 * dT1_dVg) * diffVds) - (T9 * dVdseff_dVg)) + ((T1 * diffVds) * dT0_dVg), condmask_true0);
+      dVACLM_dVb = vec4_blend(dVACLM_dVb, (((dT0_dVb * T1) + (T0 * dT1_dVb)) * diffVds) - (T9 * dVdseff_dVb), condmask_true0);
+      dVACLM_dVd = vec4_blend(dVACLM_dVd, ((T0 * dT1_dVd) * diffVds) + (T9 * (1.0 - dVdseff_dVd)), condmask_true0);
+    }
+  }
+
+
+  if (pParam->BSIM3v32thetaRout > 0.0)
+  {
+    T8 = Abulk * Vdsat;
+    T0 = Vgst2Vtm * T8;
+    dT0_dVg = (((Vgst2Vtm * Abulk) * dVdsat_dVg) + T8) + ((Vgst2Vtm * Vdsat) * dAbulk_dVg);
+    dT0_dVb = Vgst2Vtm * ((dAbulk_dVb * Vdsat) + (Abulk * dVdsat_dVb));
+    dT0_dVd = (Vgst2Vtm * Abulk) * dVdsat_dVd;
+    T1 = Vgst2Vtm + T8;
+    dT1_dVg = (1.0 + (Abulk * dVdsat_dVg)) + (Vdsat * dAbulk_dVg);
+    dT1_dVb = (Abulk * dVdsat_dVb) + (dAbulk_dVb * Vdsat);
+    dT1_dVd = Abulk * dVdsat_dVd;
+    T9 = T1 * T1;
+    T2 = vec4_SIMDTOVECTOR(pParam->BSIM3v32thetaRout);
+    VADIBL = (Vgst2Vtm - (T0 / T1)) / T2;
+    dVADIBL_dVg = ((1.0 - (dT0_dVg / T1)) + ((T0 * dT1_dVg) / T9)) / T2;
+    dVADIBL_dVb = (((-dT0_dVb) / T1) + ((T0 * dT1_dVb) / T9)) / T2;
+    dVADIBL_dVd = (((-dT0_dVd) / T1) + ((T0 * dT1_dVd) / T9)) / T2;
+    T7 = pParam->BSIM3v32pdiblb * Vbseff;
+    if (1)
+    {
+      Vec4m condmask0 = T7 >= (-0.9);
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        T3 = vec4_blend(T3, 1.0 / (1.0 + T7), condmask_true0);
+        VADIBL = vec4_blend(VADIBL, VADIBL * T3, condmask_true0);
+        dVADIBL_dVg = vec4_blend(dVADIBL_dVg, dVADIBL_dVg * T3, condmask_true0);
+        dVADIBL_dVb = vec4_blend(dVADIBL_dVb, (dVADIBL_dVb - (VADIBL * pParam->BSIM3v32pdiblb)) * T3, condmask_true0);
+        dVADIBL_dVd = vec4_blend(dVADIBL_dVd, dVADIBL_dVd * T3, condmask_true0);
+      }
+      {
+        T4 = vec4_blend(T4, 1.0 / (0.8 + T7), condmask_false0);
+        T3 = vec4_blend(T3, (17.0 + (20.0 * T7)) * T4, condmask_false0);
+        dVADIBL_dVg = vec4_blend(dVADIBL_dVg, dVADIBL_dVg * T3, condmask_false0);
+        dVADIBL_dVb = vec4_blend(dVADIBL_dVb, (dVADIBL_dVb * T3) - (((VADIBL * pParam->BSIM3v32pdiblb) * T4) * T4), condmask_false0);
+        dVADIBL_dVd = vec4_blend(dVADIBL_dVd, dVADIBL_dVd * T3, condmask_false0);
+        VADIBL = vec4_blend(VADIBL, VADIBL * T3, condmask_false0);
+      }
+    }
+
+  }
+  else
+  {
+    VADIBL = vec4_SIMDTOVECTOR(MAX_EXP);
+    dVADIBL_dVd = (dVADIBL_dVg = (dVADIBL_dVb = vec4_SIMDTOVECTOR(0.0)));
+  }
+
+  T8 = pParam->BSIM3v32pvag / EsatL;
+  T9 = T8 * Vgsteff;
+  if (1)
+  {
+    Vec4m condmask0 = T9 > (-0.9);
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      T0 = vec4_blend(T0, 1.0 + T9, condmask_true0);
+      dT0_dVg = vec4_blend(dT0_dVg, T8 * (1.0 - ((Vgsteff * dEsatL_dVg) / EsatL)), condmask_true0);
+      dT0_dVb = vec4_blend(dT0_dVb, ((-T9) * dEsatL_dVb) / EsatL, condmask_true0);
+      dT0_dVd = vec4_blend(dT0_dVd, ((-T9) * dEsatL_dVd) / EsatL, condmask_true0);
+    }
+    {
+      T1 = vec4_blend(T1, 1.0 / (17.0 + (20.0 * T9)), condmask_false0);
+      T0 = vec4_blend(T0, (0.8 + T9) * T1, condmask_false0);
+      T1 = vec4_blend(T1, T1 * T1, condmask_false0);
+      dT0_dVg = vec4_blend(dT0_dVg, (T8 * (1.0 - ((Vgsteff * dEsatL_dVg) / EsatL))) * T1, condmask_false0);
+      T9 = vec4_blend(T9, T9 * (T1 / EsatL), condmask_false0);
+      dT0_dVb = vec4_blend(dT0_dVb, (-T9) * dEsatL_dVb, condmask_false0);
+      dT0_dVd = vec4_blend(dT0_dVd, (-T9) * dEsatL_dVd, condmask_false0);
+    }
+  }
+
+  tmp1 = VACLM * VACLM;
+  tmp2 = VADIBL * VADIBL;
+  tmp3 = VACLM + VADIBL;
+  T1 = (VACLM * VADIBL) / tmp3;
+  tmp3 *= tmp3;
+  dT1_dVg = ((tmp1 * dVADIBL_dVg) + (tmp2 * dVACLM_dVg)) / tmp3;
+  dT1_dVd = ((tmp1 * dVADIBL_dVd) + (tmp2 * dVACLM_dVd)) / tmp3;
+  dT1_dVb = ((tmp1 * dVADIBL_dVb) + (tmp2 * dVACLM_dVb)) / tmp3;
+  Va = Vasat + (T0 * T1);
+  dVa_dVg = (dVasat_dVg + (T1 * dT0_dVg)) + (T0 * dT1_dVg);
+  dVa_dVd = (dVasat_dVd + (T1 * dT0_dVd)) + (T0 * dT1_dVd);
+  dVa_dVb = (dVasat_dVb + (T1 * dT0_dVb)) + (T0 * dT1_dVb);
+  dVASCBE_dVg = (dVASCBE_dVd = (dVASCBE_dVb = vec4_SIMDTOVECTOR(0.0)));
+  if (pParam->BSIM3v32pscbe2 > 0.0)
+  {
+    if (1)
+    {
+      Vec4m condmask0 = diffVds > ((pParam->BSIM3v32pscbe1 * pParam->BSIM3v32litl) / EXP_THRESHOLD);
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        T0 = vec4_blend(T0, (pParam->BSIM3v32pscbe1 * pParam->BSIM3v32litl) / diffVds, condmask_true0);
+        VASCBE = vec4_blend(VASCBE, (Leff * vec4_exp(T0)) / pParam->BSIM3v32pscbe2, condmask_true0);
+        T1 = vec4_blend(T1, (T0 * VASCBE) / diffVds, condmask_true0);
+        dVASCBE_dVg = vec4_blend(dVASCBE_dVg, T1 * dVdseff_dVg, condmask_true0);
+        dVASCBE_dVd = vec4_blend(dVASCBE_dVd, (-T1) * (1.0 - dVdseff_dVd), condmask_true0);
+        dVASCBE_dVb = vec4_blend(dVASCBE_dVb, T1 * dVdseff_dVb, condmask_true0);
+      }
+      {
+        VASCBE = vec4_blend(VASCBE, vec4_SIMDTOVECTOR((MAX_EXP * Leff) / pParam->BSIM3v32pscbe2), condmask_false0);
+      }
+    }
+
+  }
+  else
+  {
+    VASCBE = vec4_SIMDTOVECTOR(MAX_EXP);
+  }
+
+  CoxWovL = (model->BSIM3v32cox * Weff) / Leff;
+  beta = ueff * CoxWovL;
+  dbeta_dVg = (CoxWovL * dueff_dVg) + ((beta * dWeff_dVg) / Weff);
+  dbeta_dVd = CoxWovL * dueff_dVd;
+  dbeta_dVb = (CoxWovL * dueff_dVb) + ((beta * dWeff_dVb) / Weff);
+  T0 = 1.0 - (((0.5 * Abulk) * Vdseff) / Vgst2Vtm);
+  dT0_dVg = ((-0.5) * (((Abulk * dVdseff_dVg) - ((Abulk * Vdseff) / Vgst2Vtm)) + (Vdseff * dAbulk_dVg))) / Vgst2Vtm;
+  dT0_dVd = (((-0.5) * Abulk) * dVdseff_dVd) / Vgst2Vtm;
+  dT0_dVb = ((-0.5) * ((Abulk * dVdseff_dVb) + (dAbulk_dVb * Vdseff))) / Vgst2Vtm;
+  fgche1 = Vgsteff * T0;
+  dfgche1_dVg = (Vgsteff * dT0_dVg) + T0;
+  dfgche1_dVd = Vgsteff * dT0_dVd;
+  dfgche1_dVb = Vgsteff * dT0_dVb;
+  T9 = Vdseff / EsatL;
+  fgche2 = 1.0 + T9;
+  dfgche2_dVg = (dVdseff_dVg - (T9 * dEsatL_dVg)) / EsatL;
+  dfgche2_dVd = (dVdseff_dVd - (T9 * dEsatL_dVd)) / EsatL;
+  dfgche2_dVb = (dVdseff_dVb - (T9 * dEsatL_dVb)) / EsatL;
+  gche = (beta * fgche1) / fgche2;
+  dgche_dVg = (((beta * dfgche1_dVg) + (fgche1 * dbeta_dVg)) - (gche * dfgche2_dVg)) / fgche2;
+  dgche_dVd = (((beta * dfgche1_dVd) + (fgche1 * dbeta_dVd)) - (gche * dfgche2_dVd)) / fgche2;
+  dgche_dVb = (((beta * dfgche1_dVb) + (fgche1 * dbeta_dVb)) - (gche * dfgche2_dVb)) / fgche2;
+  T0 = 1.0 + (gche * Rds);
+  T9 = Vdseff / T0;
+  Idl = gche * T9;
+  dIdl_dVg = (((gche * dVdseff_dVg) + (T9 * dgche_dVg)) / T0) - (((Idl * gche) / T0) * dRds_dVg);
+  dIdl_dVd = ((gche * dVdseff_dVd) + (T9 * dgche_dVd)) / T0;
+  dIdl_dVb = (((gche * dVdseff_dVb) + (T9 * dgche_dVb)) - ((Idl * dRds_dVb) * gche)) / T0;
+  T9 = diffVds / Va;
+  T0 = 1.0 + T9;
+  Idsa = Idl * T0;
+  dIdsa_dVg = (T0 * dIdl_dVg) - ((Idl * (dVdseff_dVg + (T9 * dVa_dVg))) / Va);
+  dIdsa_dVd = (T0 * dIdl_dVd) + ((Idl * ((1.0 - dVdseff_dVd) - (T9 * dVa_dVd))) / Va);
+  dIdsa_dVb = (T0 * dIdl_dVb) - ((Idl * (dVdseff_dVb + (T9 * dVa_dVb))) / Va);
+  T9 = diffVds / VASCBE;
+  T0 = 1.0 + T9;
+  Ids = Idsa * T0;
+  Gm = (T0 * dIdsa_dVg) - ((Idsa * (dVdseff_dVg + (T9 * dVASCBE_dVg))) / VASCBE);
+  Gds = (T0 * dIdsa_dVd) + ((Idsa * ((1.0 - dVdseff_dVd) - (T9 * dVASCBE_dVd))) / VASCBE);
+  Gmb = (T0 * dIdsa_dVb) - ((Idsa * (dVdseff_dVb + (T9 * dVASCBE_dVb))) / VASCBE);
+  Gds += Gm * dVgsteff_dVd;
+  Gmb += Gm * dVgsteff_dVb;
+  Gm *= dVgsteff_dVg;
+  Gmb *= dVbseff_dVb;
+  tmpuni = pParam->BSIM3v32alpha0 + (pParam->BSIM3v32alpha1 * Leff);
+  if ((tmpuni <= 0.0) || (pParam->BSIM3v32beta0 <= 0.0))
+  {
+    Isub = (Gbd = (Gbb = (Gbg = vec4_SIMDTOVECTOR(0.0))));
+  }
+  else
+  {
+    T2 = vec4_SIMDTOVECTOR(tmpuni / Leff);
+    if (1)
+    {
+      Vec4m condmask0 = diffVds > (pParam->BSIM3v32beta0 / EXP_THRESHOLD);
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        T0 = vec4_blend(T0, (-pParam->BSIM3v32beta0) / diffVds, condmask_true0);
+        T1 = vec4_blend(T1, (T2 * diffVds) * vec4_exp(T0), condmask_true0);
+        T3 = vec4_blend(T3, (T1 / diffVds) * (T0 - 1.0), condmask_true0);
+        dT1_dVg = vec4_blend(dT1_dVg, T3 * dVdseff_dVg, condmask_true0);
+        dT1_dVd = vec4_blend(dT1_dVd, T3 * (dVdseff_dVd - 1.0), condmask_true0);
+        dT1_dVb = vec4_blend(dT1_dVb, T3 * dVdseff_dVb, condmask_true0);
+      }
+      {
+        T3 = vec4_blend(T3, T2 * MIN_EXP, condmask_false0);
+        T1 = vec4_blend(T1, T3 * diffVds, condmask_false0);
+        dT1_dVg = vec4_blend(dT1_dVg, (-T3) * dVdseff_dVg, condmask_false0);
+        dT1_dVd = vec4_blend(dT1_dVd, T3 * (1.0 - dVdseff_dVd), condmask_false0);
+        dT1_dVb = vec4_blend(dT1_dVb, (-T3) * dVdseff_dVb, condmask_false0);
+      }
+    }
+
+    Isub = T1 * Idsa;
+    Gbg = (T1 * dIdsa_dVg) + (Idsa * dT1_dVg);
+    Gbd = (T1 * dIdsa_dVd) + (Idsa * dT1_dVd);
+    Gbb = (T1 * dIdsa_dVb) + (Idsa * dT1_dVb);
+    Gbd += Gbg * dVgsteff_dVd;
+    Gbb += Gbg * dVgsteff_dVb;
+    Gbg *= dVgsteff_dVg;
+    Gbb *= dVbseff_dVb;
+  }
+
+  cdrain = Ids;
+  {
+    heres[0]->BSIM3v32gds = Gds[0];
+    heres[1]->BSIM3v32gds = Gds[1];
+    heres[2]->BSIM3v32gds = Gds[2];
+    heres[3]->BSIM3v32gds = Gds[3];
+  }
+  {
+    heres[0]->BSIM3v32gm = Gm[0];
+    heres[1]->BSIM3v32gm = Gm[1];
+    heres[2]->BSIM3v32gm = Gm[2];
+    heres[3]->BSIM3v32gm = Gm[3];
+  }
+  {
+    heres[0]->BSIM3v32gmbs = Gmb[0];
+    heres[1]->BSIM3v32gmbs = Gmb[1];
+    heres[2]->BSIM3v32gmbs = Gmb[2];
+    heres[3]->BSIM3v32gmbs = Gmb[3];
+  }
+  {
+    heres[0]->BSIM3v32gbbs = Gbb[0];
+    heres[1]->BSIM3v32gbbs = Gbb[1];
+    heres[2]->BSIM3v32gbbs = Gbb[2];
+    heres[3]->BSIM3v32gbbs = Gbb[3];
+  }
+  {
+    heres[0]->BSIM3v32gbgs = Gbg[0];
+    heres[1]->BSIM3v32gbgs = Gbg[1];
+    heres[2]->BSIM3v32gbgs = Gbg[2];
+    heres[3]->BSIM3v32gbgs = Gbg[3];
+  }
+  {
+    heres[0]->BSIM3v32gbds = Gbd[0];
+    heres[1]->BSIM3v32gbds = Gbd[1];
+    heres[2]->BSIM3v32gbds = Gbd[2];
+    heres[3]->BSIM3v32gbds = Gbd[3];
+  }
+  {
+    heres[0]->BSIM3v32csub = Isub[0];
+    heres[1]->BSIM3v32csub = Isub[1];
+    heres[2]->BSIM3v32csub = Isub[2];
+    heres[3]->BSIM3v32csub = Isub[3];
+  }
+  CoxWL = (model->BSIM3v32cox * pParam->BSIM3v32weffCV) * pParam->BSIM3v32leffCV;
+  One_Third_CoxWL = CoxWL / 3.0;
+  Two_Third_CoxWL = 2.0 * One_Third_CoxWL;
+  if ((model->BSIM3v32xpart < 0) | (!ChargeComputationNeeded))
+  {
+    qgate = vec4_SIMDTOVECTOR(0.0);
+    qdrn = vec4_SIMDTOVECTOR(0.0);
+    qsrc = vec4_SIMDTOVECTOR(0.0);
+    qbulk = vec4_SIMDTOVECTOR(0.0);
+    {
+      heres[0]->BSIM3v32cggb = 0.0;
+      heres[1]->BSIM3v32cggb = 0.0;
+      heres[2]->BSIM3v32cggb = 0.0;
+      heres[3]->BSIM3v32cggb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cgsb = 0.0;
+      heres[1]->BSIM3v32cgsb = 0.0;
+      heres[2]->BSIM3v32cgsb = 0.0;
+      heres[3]->BSIM3v32cgsb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cgdb = 0.0;
+      heres[1]->BSIM3v32cgdb = 0.0;
+      heres[2]->BSIM3v32cgdb = 0.0;
+      heres[3]->BSIM3v32cgdb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cdgb = 0.0;
+      heres[1]->BSIM3v32cdgb = 0.0;
+      heres[2]->BSIM3v32cdgb = 0.0;
+      heres[3]->BSIM3v32cdgb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cdsb = 0.0;
+      heres[1]->BSIM3v32cdsb = 0.0;
+      heres[2]->BSIM3v32cdsb = 0.0;
+      heres[3]->BSIM3v32cdsb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cddb = 0.0;
+      heres[1]->BSIM3v32cddb = 0.0;
+      heres[2]->BSIM3v32cddb = 0.0;
+      heres[3]->BSIM3v32cddb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cbgb = 0.0;
+      heres[1]->BSIM3v32cbgb = 0.0;
+      heres[2]->BSIM3v32cbgb = 0.0;
+      heres[3]->BSIM3v32cbgb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cbsb = 0.0;
+      heres[1]->BSIM3v32cbsb = 0.0;
+      heres[2]->BSIM3v32cbsb = 0.0;
+      heres[3]->BSIM3v32cbsb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cbdb = 0.0;
+      heres[1]->BSIM3v32cbdb = 0.0;
+      heres[2]->BSIM3v32cbdb = 0.0;
+      heres[3]->BSIM3v32cbdb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cqdb = 0.0;
+      heres[1]->BSIM3v32cqdb = 0.0;
+      heres[2]->BSIM3v32cqdb = 0.0;
+      heres[3]->BSIM3v32cqdb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cqsb = 0.0;
+      heres[1]->BSIM3v32cqsb = 0.0;
+      heres[2]->BSIM3v32cqsb = 0.0;
+      heres[3]->BSIM3v32cqsb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cqgb = 0.0;
+      heres[1]->BSIM3v32cqgb = 0.0;
+      heres[2]->BSIM3v32cqgb = 0.0;
+      heres[3]->BSIM3v32cqgb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cqbb = 0.0;
+      heres[1]->BSIM3v32cqbb = 0.0;
+      heres[2]->BSIM3v32cqbb = 0.0;
+      heres[3]->BSIM3v32cqbb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32gtau = 0.0;
+      heres[1]->BSIM3v32gtau = 0.0;
+      heres[2]->BSIM3v32gtau = 0.0;
+      heres[3]->BSIM3v32gtau = 0.0;
+    }
+    goto finished;
+  }
+  else
+    if (model->BSIM3v32capMod == 0)
+  {
+    if (1)
+    {
+      Vec4m condmask0 = Vbseff < 0.0;
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        Vbseff = vec4_blend(Vbseff, Vbs, condmask_true0);
+        dVbseff_dVb = vec4_blend(dVbseff_dVb, vec4_SIMDTOVECTOR(1.0), condmask_true0);
+      }
+      {
+        Vbseff = vec4_blend(Vbseff, pParam->BSIM3v32phi - Phis, condmask_false0);
+        dVbseff_dVb = vec4_blend(dVbseff_dVb, -dPhis_dVb, condmask_false0);
+      }
+    }
+
+    Vfb = vec4_SIMDTOVECTOR(pParam->BSIM3v32vfbcv);
+    Vth = (Vfb + pParam->BSIM3v32phi) + (pParam->BSIM3v32k1ox * sqrtPhis);
+    Vgst = Vgs_eff - Vth;
+    dVth_dVb = pParam->BSIM3v32k1ox * dsqrtPhis_dVb;
+    dVgst_dVb = -dVth_dVb;
+    dVgst_dVg = dVgs_eff_dVg;
+    Arg1 = (Vgs_eff - Vbseff) - Vfb;
+    if (1)
+    {
+      Vec4m condmask0 = Arg1 <= 0.0;
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        qgate = vec4_blend(qgate, CoxWL * Arg1, condmask_true0);
+        qbulk = vec4_blend(qbulk, -qgate, condmask_true0);
+        qdrn = vec4_blend(qdrn, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+        {
+          Vec4d val = CoxWL * dVgs_eff_dVg;
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cggb = val[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cggb = val[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cggb = val[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cggb = val[3];
+
+        }
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cgdb = 0.0;
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cgdb = 0.0;
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cgdb = 0.0;
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cgdb = 0.0;
+
+        }
+        {
+          Vec4d val = CoxWL * (dVbseff_dVb - dVgs_eff_dVg);
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cgsb = val[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cgsb = val[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cgsb = val[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cgsb = val[3];
+
+        }
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cdgb = 0.0;
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cdgb = 0.0;
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cdgb = 0.0;
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cdgb = 0.0;
+
+        }
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cddb = 0.0;
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cddb = 0.0;
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cddb = 0.0;
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cddb = 0.0;
+
+        }
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cdsb = 0.0;
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cdsb = 0.0;
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cdsb = 0.0;
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cdsb = 0.0;
+
+        }
+        {
+          Vec4d val = (-CoxWL) * dVgs_eff_dVg;
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cbgb = val[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cbgb = val[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cbgb = val[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cbgb = val[3];
+
+        }
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cbdb = 0.0;
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cbdb = 0.0;
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cbdb = 0.0;
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cbdb = 0.0;
+
+        }
+        {
+          Vec4d val = -((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb});
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cbsb = val[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cbsb = val[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cbsb = val[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cbsb = val[3];
+
+        }
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32qinv = 0.0;
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32qinv = 0.0;
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32qinv = 0.0;
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32qinv = 0.0;
+
+        }
+      }
+      if (1)
+      {
+        Vec4m condmask1 = Vgst <= 0.0;
+        Vec4m condmask_true1 = condmask_false0 & condmask1;
+        Vec4m condmask_false1 = condmask_false0 & (~condmask1);
+        {
+          T1 = vec4_blend(T1, vec4_SIMDTOVECTOR(0.5 * pParam->BSIM3v32k1ox), condmask_true1);
+          T2 = vec4_blend(T2, vec4_sqrt((T1 * T1) + Arg1), condmask_true1);
+          qgate = vec4_blend(qgate, (CoxWL * pParam->BSIM3v32k1ox) * (T2 - T1), condmask_true1);
+          qbulk = vec4_blend(qbulk, -qgate, condmask_true1);
+          qdrn = vec4_blend(qdrn, vec4_SIMDTOVECTOR(0.0), condmask_true1);
+          T0 = vec4_blend(T0, (CoxWL * T1) / T2, condmask_true1);
+          {
+            Vec4d val = T0 * dVgs_eff_dVg;
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cggb = val[0];
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cggb = val[1];
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cggb = val[2];
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cggb = val[3];
+
+          }
+          {
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cgdb = 0.0;
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cgdb = 0.0;
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cgdb = 0.0;
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cgdb = 0.0;
+
+          }
+          {
+            Vec4d val = T0 * (dVbseff_dVb - dVgs_eff_dVg);
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cgsb = val[0];
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cgsb = val[1];
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cgsb = val[2];
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cgsb = val[3];
+
+          }
+          {
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cdgb = 0.0;
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cdgb = 0.0;
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cdgb = 0.0;
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cdgb = 0.0;
+
+          }
+          {
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cddb = 0.0;
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cddb = 0.0;
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cddb = 0.0;
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cddb = 0.0;
+
+          }
+          {
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cdsb = 0.0;
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cdsb = 0.0;
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cdsb = 0.0;
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cdsb = 0.0;
+
+          }
+          {
+            Vec4d val = -((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb});
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cbgb = val[0];
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cbgb = val[1];
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cbgb = val[2];
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cbgb = val[3];
+
+          }
+          {
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cbdb = 0.0;
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cbdb = 0.0;
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cbdb = 0.0;
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cbdb = 0.0;
+
+          }
+          {
+            Vec4d val = -((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb});
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cbsb = val[0];
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cbsb = val[1];
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cbsb = val[2];
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cbsb = val[3];
+
+          }
+          {
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32qinv = 0.0;
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32qinv = 0.0;
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32qinv = 0.0;
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32qinv = 0.0;
+
+          }
+        }
+        {
+          AbulkCV = vec4_blend(AbulkCV, Abulk0 * pParam->BSIM3v32abulkCVfactor, condmask_false1);
+          dAbulkCV_dVb = vec4_blend(dAbulkCV_dVb, pParam->BSIM3v32abulkCVfactor * dAbulk0_dVb, condmask_false1);
+          Vdsat = vec4_blend(Vdsat, Vgst / AbulkCV, condmask_false1);
+          dVdsat_dVg = vec4_blend(dVdsat_dVg, dVgs_eff_dVg / AbulkCV, condmask_false1);
+          dVdsat_dVb = vec4_blend(dVdsat_dVb, (-((Vdsat * dAbulkCV_dVb) + dVth_dVb)) / AbulkCV, condmask_false1);
+          if (model->BSIM3v32xpart > 0.5)
+          {
+            if (1)
+            {
+              Vec4m condmask2 = Vdsat <= Vds;
+              Vec4m condmask_true2 = condmask_false1 & condmask2;
+              Vec4m condmask_false2 = condmask_false1 & (~condmask2);
+              {
+                T1 = vec4_blend(T1, Vdsat / 3.0, condmask_true2);
+                qgate = vec4_blend(qgate, CoxWL * (((Vgs_eff - Vfb) - pParam->BSIM3v32phi) - T1), condmask_true2);
+                T2 = vec4_blend(T2, (-Two_Third_CoxWL) * Vgst, condmask_true2);
+                qbulk = vec4_blend(qbulk, -(qgate + T2), condmask_true2);
+                qdrn = vec4_blend(qdrn, vec4_SIMDTOVECTOR(0.0), condmask_true2);
+                {
+                  Vec4d val = (One_Third_CoxWL * (3.0 - dVdsat_dVg)) * dVgs_eff_dVg;
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cggb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cggb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cggb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cggb = val[3];
+
+                }
+                T2 = vec4_blend(T2, (-One_Third_CoxWL) * dVdsat_dVb, condmask_true2);
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + T2);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cgsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cgsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cgsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cgsb = val[3];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cgdb = 0.0;
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cdgb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cdgb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cdgb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cdgb = 0.0;
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cddb = 0.0;
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cdsb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cdsb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cdsb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cdsb = 0.0;
+
+                }
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) - (Two_Third_CoxWL * dVgs_eff_dVg));
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbgb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbgb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbgb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbgb = val[3];
+
+                }
+                T3 = vec4_blend(T3, -(T2 + (Two_Third_CoxWL * dVth_dVb)), condmask_true2);
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb}) + T3);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbsb = val[3];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbdb = 0.0;
+
+                }
+                {
+                  Vec4d val = -(qgate + qbulk);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32qinv = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32qinv = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32qinv = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32qinv = val[3];
+
+                }
+              }
+              {
+                Alphaz = vec4_blend(Alphaz, Vgst / Vdsat, condmask_false2);
+                T1 = vec4_blend(T1, (2.0 * Vdsat) - Vds, condmask_false2);
+                T2 = vec4_blend(T2, Vds / (3.0 * T1), condmask_false2);
+                T3 = vec4_blend(T3, T2 * Vds, condmask_false2);
+                T9 = vec4_blend(T9, vec4_SIMDTOVECTOR(0.25 * CoxWL), condmask_false2);
+                T4 = vec4_blend(T4, T9 * Alphaz, condmask_false2);
+                T7 = vec4_blend(T7, ((2.0 * Vds) - T1) - (3.0 * T3), condmask_false2);
+                T8 = vec4_blend(T8, (T3 - T1) - (2.0 * Vds), condmask_false2);
+                qgate = vec4_blend(qgate, CoxWL * (((Vgs_eff - Vfb) - pParam->BSIM3v32phi) - (0.5 * (Vds - T3))), condmask_false2);
+                T10 = vec4_blend(T10, T4 * T8, condmask_false2);
+                qdrn = vec4_blend(qdrn, T4 * T7, condmask_false2);
+                qbulk = vec4_blend(qbulk, -((qgate + qdrn) + T10), condmask_false2);
+                T5 = vec4_blend(T5, T3 / T1, condmask_false2);
+                {
+                  Vec4d val = (CoxWL * (1.0 - (T5 * dVdsat_dVg))) * dVgs_eff_dVg;
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cggb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cggb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cggb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cggb = val[3];
+
+                }
+                T11 = vec4_blend(T11, ((-CoxWL) * T5) * dVdsat_dVb, condmask_false2);
+                {
+                  Vec4d val = CoxWL * ((T2 - 0.5) + (0.5 * T5));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cgdb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cgdb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cgdb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cgdb = val[3];
+
+                }
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + T11) + ((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cgsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cgsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cgsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cgsb = val[3];
+
+                }
+                T6 = vec4_blend(T6, 1.0 / Vdsat, condmask_false2);
+                dAlphaz_dVg = vec4_blend(dAlphaz_dVg, T6 * (1.0 - (Alphaz * dVdsat_dVg)), condmask_false2);
+                dAlphaz_dVb = vec4_blend(dAlphaz_dVb, (-T6) * (dVth_dVb + (Alphaz * dVdsat_dVb)), condmask_false2);
+                T7 = vec4_blend(T7, T9 * T7, condmask_false2);
+                T8 = vec4_blend(T8, T9 * T8, condmask_false2);
+                T9 = vec4_blend(T9, (2.0 * T4) * (1.0 - (3.0 * T5)), condmask_false2);
+                {
+                  Vec4d val = ((T7 * dAlphaz_dVg) - (T9 * dVdsat_dVg)) * dVgs_eff_dVg;
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cdgb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cdgb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cdgb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cdgb = val[3];
+
+                }
+                T12 = vec4_blend(T12, (T7 * dAlphaz_dVb) - (T9 * dVdsat_dVb), condmask_false2);
+                {
+                  Vec4d val = T4 * ((3.0 - (6.0 * T2)) - (3.0 * T5));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cddb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cddb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cddb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cddb = val[3];
+
+                }
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb}) + T12) + ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb}));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cdsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cdsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cdsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cdsb = val[3];
+
+                }
+                T9 = vec4_blend(T9, (2.0 * T4) * (1.0 + T5), condmask_false2);
+                T10 = vec4_blend(T10, ((T8 * dAlphaz_dVg) - (T9 * dVdsat_dVg)) * dVgs_eff_dVg, condmask_false2);
+                T11 = vec4_blend(T11, (T8 * dAlphaz_dVb) - (T9 * dVdsat_dVb), condmask_false2);
+                T12 = vec4_blend(T12, T4 * (((2.0 * T2) + T5) - 1.0), condmask_false2);
+                T0 = vec4_blend(T0, -((T10 + T11) + T12), condmask_false2);
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + ((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb})) + T10);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbgb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbgb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbgb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbgb = val[3];
+
+                }
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}) + ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb})) + T12);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbdb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbdb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbdb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbdb = val[3];
+
+                }
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb}) + ((Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb})) + T0);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbsb = val[3];
+
+                }
+                {
+                  Vec4d val = -(qgate + qbulk);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32qinv = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32qinv = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32qinv = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32qinv = val[3];
+
+                }
+              }
+            }
+
+          }
+          else
+            if (model->BSIM3v32xpart < 0.5)
+          {
+            if (1)
+            {
+              Vec4m condmask2 = Vds >= Vdsat;
+              Vec4m condmask_true2 = condmask_false1 & condmask2;
+              Vec4m condmask_false2 = condmask_false1 & (~condmask2);
+              {
+                T1 = vec4_blend(T1, Vdsat / 3.0, condmask_true2);
+                qgate = vec4_blend(qgate, CoxWL * (((Vgs_eff - Vfb) - pParam->BSIM3v32phi) - T1), condmask_true2);
+                T2 = vec4_blend(T2, (-Two_Third_CoxWL) * Vgst, condmask_true2);
+                qbulk = vec4_blend(qbulk, -(qgate + T2), condmask_true2);
+                qdrn = vec4_blend(qdrn, 0.4 * T2, condmask_true2);
+                {
+                  Vec4d val = (One_Third_CoxWL * (3.0 - dVdsat_dVg)) * dVgs_eff_dVg;
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cggb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cggb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cggb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cggb = val[3];
+
+                }
+                T2 = vec4_blend(T2, (-One_Third_CoxWL) * dVdsat_dVb, condmask_true2);
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + T2);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cgsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cgsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cgsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cgsb = val[3];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cgdb = 0.0;
+
+                }
+                T3 = vec4_blend(T3, vec4_SIMDTOVECTOR(0.4 * Two_Third_CoxWL), condmask_true2);
+                {
+                  Vec4d val = (-T3) * dVgs_eff_dVg;
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cdgb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cdgb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cdgb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cdgb = val[3];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cddb = 0.0;
+
+                }
+                T4 = vec4_blend(T4, T3 * dVth_dVb, condmask_true2);
+                {
+                  Vec4d val = -(T4 + ((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb}));
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cdsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cdsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cdsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cdsb = val[3];
+
+                }
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) - (Two_Third_CoxWL * dVgs_eff_dVg));
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbgb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbgb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbgb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbgb = val[3];
+
+                }
+                T3 = vec4_blend(T3, -(T2 + (Two_Third_CoxWL * dVth_dVb)), condmask_true2);
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb}) + T3);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbsb = val[3];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbdb = 0.0;
+
+                }
+                {
+                  Vec4d val = -(qgate + qbulk);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32qinv = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32qinv = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32qinv = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32qinv = val[3];
+
+                }
+              }
+              {
+                Alphaz = vec4_blend(Alphaz, Vgst / Vdsat, condmask_false2);
+                T1 = vec4_blend(T1, (2.0 * Vdsat) - Vds, condmask_false2);
+                T2 = vec4_blend(T2, Vds / (3.0 * T1), condmask_false2);
+                T3 = vec4_blend(T3, T2 * Vds, condmask_false2);
+                T9 = vec4_blend(T9, vec4_SIMDTOVECTOR(0.25 * CoxWL), condmask_false2);
+                T4 = vec4_blend(T4, T9 * Alphaz, condmask_false2);
+                qgate = vec4_blend(qgate, CoxWL * (((Vgs_eff - Vfb) - pParam->BSIM3v32phi) - (0.5 * (Vds - T3))), condmask_false2);
+                T5 = vec4_blend(T5, T3 / T1, condmask_false2);
+                {
+                  Vec4d val = (CoxWL * (1.0 - (T5 * dVdsat_dVg))) * dVgs_eff_dVg;
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cggb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cggb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cggb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cggb = val[3];
+
+                }
+                tmp = vec4_blend(tmp, ((-CoxWL) * T5) * dVdsat_dVb, condmask_false2);
+                {
+                  Vec4d val = CoxWL * ((T2 - 0.5) + (0.5 * T5));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cgdb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cgdb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cgdb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cgdb = val[3];
+
+                }
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + ((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb})) + tmp);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cgsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cgsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cgsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cgsb = val[3];
+
+                }
+                T6 = vec4_blend(T6, 1.0 / Vdsat, condmask_false2);
+                dAlphaz_dVg = vec4_blend(dAlphaz_dVg, T6 * (1.0 - (Alphaz * dVdsat_dVg)), condmask_false2);
+                dAlphaz_dVb = vec4_blend(dAlphaz_dVb, (-T6) * (dVth_dVb + (Alphaz * dVdsat_dVb)), condmask_false2);
+                T6 = vec4_blend(T6, (((8.0 * Vdsat) * Vdsat) - ((6.0 * Vdsat) * Vds)) + ((1.2 * Vds) * Vds), condmask_false2);
+                T8 = vec4_blend(T8, T2 / T1, condmask_false2);
+                T7 = vec4_blend(T7, (Vds - T1) - (T8 * T6), condmask_false2);
+                qdrn = vec4_blend(qdrn, T4 * T7, condmask_false2);
+                T7 = vec4_blend(T7, T7 * T9, condmask_false2);
+                tmp = vec4_blend(tmp, T8 / T1, condmask_false2);
+                tmp1 = vec4_blend(tmp1, T4 * ((2.0 - ((4.0 * tmp) * T6)) + (T8 * ((16.0 * Vdsat) - (6.0 * Vds)))), condmask_false2);
+                {
+                  Vec4d val = ((T7 * dAlphaz_dVg) - (tmp1 * dVdsat_dVg)) * dVgs_eff_dVg;
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cdgb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cdgb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cdgb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cdgb = val[3];
+
+                }
+                T10 = vec4_blend(T10, (T7 * dAlphaz_dVb) - (tmp1 * dVdsat_dVb), condmask_false2);
+                {
+                  Vec4d val = T4 * ((2.0 - (((1.0 / ((3.0 * T1) * T1)) + (2.0 * tmp)) * T6)) + (T8 * ((6.0 * Vdsat) - (2.4 * Vds))));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cddb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cddb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cddb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cddb = val[3];
+
+                }
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb}) + T10) + ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb}));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cdsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cdsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cdsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cdsb = val[3];
+
+                }
+                T7 = vec4_blend(T7, 2.0 * (T1 + T3), condmask_false2);
+                qbulk = vec4_blend(qbulk, -(qgate - (T4 * T7)), condmask_false2);
+                T7 = vec4_blend(T7, T7 * T9, condmask_false2);
+                T0 = vec4_blend(T0, (4.0 * T4) * (1.0 - T5), condmask_false2);
+                T12 = vec4_blend(T12, ((((-T7) * dAlphaz_dVg) - ((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb})) - (T0 * dVdsat_dVg)) * dVgs_eff_dVg, condmask_false2);
+                T11 = vec4_blend(T11, (((-T7) * dAlphaz_dVb) - T10) - (T0 * dVdsat_dVb), condmask_false2);
+                T10 = vec4_blend(T10, (((-4.0) * T4) * ((T2 - 0.5) + (0.5 * T5))) - ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb}), condmask_false2);
+                tmp = vec4_blend(tmp, -((T10 + T11) + T12), condmask_false2);
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + ((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb})) + T12);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbgb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbgb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbgb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbgb = val[3];
+
+                }
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}) + ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb})) + T10);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbdb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbdb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbdb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbdb = val[3];
+
+                }
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb}) + ((Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb})) + tmp);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbsb = val[3];
+
+                }
+                {
+                  Vec4d val = -(qgate + qbulk);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32qinv = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32qinv = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32qinv = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32qinv = val[3];
+
+                }
+              }
+            }
+
+          }
+          else
+          {
+            if (1)
+            {
+              Vec4m condmask2 = Vds >= Vdsat;
+              Vec4m condmask_true2 = condmask_false1 & condmask2;
+              Vec4m condmask_false2 = condmask_false1 & (~condmask2);
+              {
+                T1 = vec4_blend(T1, Vdsat / 3.0, condmask_true2);
+                qgate = vec4_blend(qgate, CoxWL * (((Vgs_eff - Vfb) - pParam->BSIM3v32phi) - T1), condmask_true2);
+                T2 = vec4_blend(T2, (-Two_Third_CoxWL) * Vgst, condmask_true2);
+                qbulk = vec4_blend(qbulk, -(qgate + T2), condmask_true2);
+                qdrn = vec4_blend(qdrn, 0.5 * T2, condmask_true2);
+                {
+                  Vec4d val = (One_Third_CoxWL * (3.0 - dVdsat_dVg)) * dVgs_eff_dVg;
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cggb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cggb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cggb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cggb = val[3];
+
+                }
+                T2 = vec4_blend(T2, (-One_Third_CoxWL) * dVdsat_dVb, condmask_true2);
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + T2);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cgsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cgsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cgsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cgsb = val[3];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cgdb = 0.0;
+
+                }
+                {
+                  Vec4d val = (-One_Third_CoxWL) * dVgs_eff_dVg;
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cdgb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cdgb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cdgb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cdgb = val[3];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cddb = 0.0;
+
+                }
+                T4 = vec4_blend(T4, One_Third_CoxWL * dVth_dVb, condmask_true2);
+                {
+                  Vec4d val = -(T4 + ((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb}));
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cdsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cdsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cdsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cdsb = val[3];
+
+                }
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) - (Two_Third_CoxWL * dVgs_eff_dVg));
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbgb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbgb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbgb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbgb = val[3];
+
+                }
+                T3 = vec4_blend(T3, -(T2 + (Two_Third_CoxWL * dVth_dVb)), condmask_true2);
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb}) + T3);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbsb = val[3];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbdb = 0.0;
+
+                }
+                {
+                  Vec4d val = -(qgate + qbulk);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32qinv = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32qinv = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32qinv = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32qinv = val[3];
+
+                }
+              }
+              {
+                Alphaz = vec4_blend(Alphaz, Vgst / Vdsat, condmask_false2);
+                T1 = vec4_blend(T1, (2.0 * Vdsat) - Vds, condmask_false2);
+                T2 = vec4_blend(T2, Vds / (3.0 * T1), condmask_false2);
+                T3 = vec4_blend(T3, T2 * Vds, condmask_false2);
+                T9 = vec4_blend(T9, vec4_SIMDTOVECTOR(0.25 * CoxWL), condmask_false2);
+                T4 = vec4_blend(T4, T9 * Alphaz, condmask_false2);
+                qgate = vec4_blend(qgate, CoxWL * (((Vgs_eff - Vfb) - pParam->BSIM3v32phi) - (0.5 * (Vds - T3))), condmask_false2);
+                T5 = vec4_blend(T5, T3 / T1, condmask_false2);
+                {
+                  Vec4d val = (CoxWL * (1.0 - (T5 * dVdsat_dVg))) * dVgs_eff_dVg;
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cggb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cggb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cggb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cggb = val[3];
+
+                }
+                tmp = vec4_blend(tmp, ((-CoxWL) * T5) * dVdsat_dVb, condmask_false2);
+                {
+                  Vec4d val = CoxWL * ((T2 - 0.5) + (0.5 * T5));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cgdb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cgdb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cgdb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cgdb = val[3];
+
+                }
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + ((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb})) + tmp);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cgsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cgsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cgsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cgsb = val[3];
+
+                }
+                T6 = vec4_blend(T6, 1.0 / Vdsat, condmask_false2);
+                dAlphaz_dVg = vec4_blend(dAlphaz_dVg, T6 * (1.0 - (Alphaz * dVdsat_dVg)), condmask_false2);
+                dAlphaz_dVb = vec4_blend(dAlphaz_dVb, (-T6) * (dVth_dVb + (Alphaz * dVdsat_dVb)), condmask_false2);
+                T7 = vec4_blend(T7, T1 + T3, condmask_false2);
+                qdrn = vec4_blend(qdrn, (-T4) * T7, condmask_false2);
+                qbulk = vec4_blend(qbulk, -((qgate + qdrn) + qdrn), condmask_false2);
+                T7 = vec4_blend(T7, T7 * T9, condmask_false2);
+                T0 = vec4_blend(T0, T4 * ((2.0 * T5) - 2.0), condmask_false2);
+                {
+                  Vec4d val = ((T0 * dVdsat_dVg) - (T7 * dAlphaz_dVg)) * dVgs_eff_dVg;
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cdgb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cdgb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cdgb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cdgb = val[3];
+
+                }
+                T12 = vec4_blend(T12, (T0 * dVdsat_dVb) - (T7 * dAlphaz_dVb), condmask_false2);
+                {
+                  Vec4d val = T4 * ((1.0 - (2.0 * T2)) - T5);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cddb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cddb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cddb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cddb = val[3];
+
+                }
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb}) + T12) + ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb}));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cdsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cdsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cdsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cdsb = val[3];
+
+                }
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + (2.0 * ((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb})));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbgb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbgb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbgb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbgb = val[3];
+
+                }
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}) + (2.0 * ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb})));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbdb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbdb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbdb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbdb = val[3];
+
+                }
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb}) + (2.0 * ((Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb})));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbsb = val[3];
+
+                }
+                {
+                  Vec4d val = -(qgate + qbulk);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32qinv = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32qinv = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32qinv = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32qinv = val[3];
+
+                }
+              }
+            }
+
+          }
+
+
+        }
+      }
+
+    }
+
+  }
+  else
+  {
+    if (1)
+    {
+      Vec4m condmask0 = Vbseff < 0.0;
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        VbseffCV = vec4_blend(VbseffCV, Vbseff, condmask_true0);
+        dVbseffCV_dVb = vec4_blend(dVbseffCV_dVb, vec4_SIMDTOVECTOR(1.0), condmask_true0);
+      }
+      {
+        VbseffCV = vec4_blend(VbseffCV, pParam->BSIM3v32phi - Phis, condmask_false0);
+        dVbseffCV_dVb = vec4_blend(dVbseffCV_dVb, -dPhis_dVb, condmask_false0);
+      }
+    }
+
+    noff = n * pParam->BSIM3v32noff;
+    dnoff_dVd = pParam->BSIM3v32noff * dn_dVd;
+    dnoff_dVb = pParam->BSIM3v32noff * dn_dVb;
+    T0 = Vtm * noff;
+    voffcv = pParam->BSIM3v32voffcv;
+    VgstNVt = (Vgst - voffcv) / T0;
+    if (1)
+    {
+      Vec4m condmask0 = VgstNVt > EXP_THRESHOLD;
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        Vgsteff = vec4_blend(Vgsteff, Vgst - voffcv, condmask_true0);
+        dVgsteff_dVg = vec4_blend(dVgsteff_dVg, dVgs_eff_dVg, condmask_true0);
+        dVgsteff_dVd = vec4_blend(dVgsteff_dVd, -dVth_dVd, condmask_true0);
+        dVgsteff_dVb = vec4_blend(dVgsteff_dVb, -dVth_dVb, condmask_true0);
+      }
+      if (1)
+      {
+        Vec4m condmask1 = VgstNVt < (-EXP_THRESHOLD);
+        Vec4m condmask_true1 = condmask_false0 & condmask1;
+        Vec4m condmask_false1 = condmask_false0 & (~condmask1);
+        {
+          Vgsteff = vec4_blend(Vgsteff, T0 * log(1.0 + MIN_EXP), condmask_true1);
+          dVgsteff_dVg = vec4_blend(dVgsteff_dVg, vec4_SIMDTOVECTOR(0.0), condmask_true1);
+          dVgsteff_dVd = vec4_blend(dVgsteff_dVd, Vgsteff / noff, condmask_true1);
+          dVgsteff_dVb = vec4_blend(dVgsteff_dVb, dVgsteff_dVd * dnoff_dVb, condmask_true1);
+          dVgsteff_dVd = vec4_blend(dVgsteff_dVd, dVgsteff_dVd * dnoff_dVd, condmask_true1);
+        }
+        {
+          ExpVgst = vec4_blend(ExpVgst, vec4_exp(VgstNVt), condmask_false1);
+          Vgsteff = vec4_blend(Vgsteff, T0 * vec4_log(1.0 + ExpVgst), condmask_false1);
+          dVgsteff_dVg = vec4_blend(dVgsteff_dVg, ExpVgst / (1.0 + ExpVgst), condmask_false1);
+          dVgsteff_dVd = vec4_blend(dVgsteff_dVd, ((-dVgsteff_dVg) * (dVth_dVd + (((Vgst - voffcv) / noff) * dnoff_dVd))) + ((Vgsteff / noff) * dnoff_dVd), condmask_false1);
+          dVgsteff_dVb = vec4_blend(dVgsteff_dVb, ((-dVgsteff_dVg) * (dVth_dVb + (((Vgst - voffcv) / noff) * dnoff_dVb))) + ((Vgsteff / noff) * dnoff_dVb), condmask_false1);
+          dVgsteff_dVg = vec4_blend(dVgsteff_dVg, dVgsteff_dVg * dVgs_eff_dVg, condmask_false1);
+        }
+      }
+
+    }
+
+    if (model->BSIM3v32capMod == 1)
+    {
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          Vfb = (Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb};
+          break;
+
+        case BSIM3v32V32:
+          Vfb = (Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb};
+          dVfb_dVb = (dVfb_dVd = vec4_SIMDTOVECTOR(0.0));
+          break;
+
+        default:
+          Vfb = (Vth - pParam->BSIM3v32phi) - (pParam->BSIM3v32k1ox * sqrtPhis);
+          dVfb_dVb = dVth_dVb - (pParam->BSIM3v32k1ox * dsqrtPhis_dVb);
+          dVfb_dVd = dVth_dVd;
+
+      }
+
+      Arg1 = ((Vgs_eff - VbseffCV) - Vfb) - Vgsteff;
+      if (1)
+      {
+        Vec4m condmask0 = Arg1 <= 0.0;
+        Vec4m condmask_true0 = condmask0;
+        Vec4m condmask_false0 = ~condmask0;
+        {
+          qgate = vec4_blend(qgate, CoxWL * Arg1, condmask_true0);
+          Cgg = vec4_blend(Cgg, CoxWL * (dVgs_eff_dVg - dVgsteff_dVg), condmask_true0);
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+
+            case BSIM3v32V322:
+              Cgd = vec4_blend(Cgd, (-CoxWL) * dVgsteff_dVd, condmask_true0);
+              Cgb = vec4_blend(Cgb, (-CoxWL) * (dVbseffCV_dVb + dVgsteff_dVb), condmask_true0);
+              break;
+
+            case BSIM3v32V32:
+
+            default:
+              Cgd = vec4_blend(Cgd, (-CoxWL) * (dVfb_dVd + dVgsteff_dVd), condmask_true0);
+              Cgb = vec4_blend(Cgb, (-CoxWL) * ((dVfb_dVb + dVbseffCV_dVb) + dVgsteff_dVb), condmask_true0);
+
+          }
+
+        }
+        {
+          T0 = vec4_blend(T0, vec4_SIMDTOVECTOR(0.5 * pParam->BSIM3v32k1ox), condmask_false0);
+          T1 = vec4_blend(T1, vec4_sqrt((T0 * T0) + Arg1), condmask_false0);
+          T2 = vec4_blend(T2, (CoxWL * T0) / T1, condmask_false0);
+          qgate = vec4_blend(qgate, (CoxWL * pParam->BSIM3v32k1ox) * (T1 - T0), condmask_false0);
+          Cgg = vec4_blend(Cgg, T2 * (dVgs_eff_dVg - dVgsteff_dVg), condmask_false0);
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+
+            case BSIM3v32V322:
+              Cgd = vec4_blend(Cgd, (-T2) * dVgsteff_dVd, condmask_false0);
+              Cgb = vec4_blend(Cgb, (-T2) * (dVbseffCV_dVb + dVgsteff_dVb), condmask_false0);
+              break;
+
+            case BSIM3v32V32:
+
+            default:
+              Cgd = vec4_blend(Cgd, (-T2) * (dVfb_dVd + dVgsteff_dVd), condmask_false0);
+              Cgb = vec4_blend(Cgb, (-T2) * ((dVfb_dVb + dVbseffCV_dVb) + dVgsteff_dVb), condmask_false0);
+
+          }
+
+        }
+      }
+
+      qbulk = -qgate;
+      Cbg = -Cgg;
+      Cbd = -Cgd;
+      Cbb = -Cgb;
+      One_Third_CoxWL = CoxWL / 3.0;
+      Two_Third_CoxWL = 2.0 * One_Third_CoxWL;
+      AbulkCV = Abulk0 * pParam->BSIM3v32abulkCVfactor;
+      dAbulkCV_dVb = pParam->BSIM3v32abulkCVfactor * dAbulk0_dVb;
+      VdsatCV = Vgsteff / AbulkCV;
+      if (1)
+      {
+        Vec4m condmask0 = VdsatCV < Vds;
+        Vec4m condmask_true0 = condmask0;
+        Vec4m condmask_false0 = ~condmask0;
+        {
+          dVdsatCV_dVg = vec4_blend(dVdsatCV_dVg, 1.0 / AbulkCV, condmask_true0);
+          dVdsatCV_dVb = vec4_blend(dVdsatCV_dVb, ((-VdsatCV) * dAbulkCV_dVb) / AbulkCV, condmask_true0);
+          T0 = vec4_blend(T0, Vgsteff - (VdsatCV / 3.0), condmask_true0);
+          dT0_dVg = vec4_blend(dT0_dVg, 1.0 - (dVdsatCV_dVg / 3.0), condmask_true0);
+          dT0_dVb = vec4_blend(dT0_dVb, (-dVdsatCV_dVb) / 3.0, condmask_true0);
+          qgate = vec4_blend(qgate, qgate + (CoxWL * T0), condmask_true0);
+          Cgg1 = vec4_blend(Cgg1, CoxWL * dT0_dVg, condmask_true0);
+          Cgb1 = vec4_blend(Cgb1, (CoxWL * dT0_dVb) + (Cgg1 * dVgsteff_dVb), condmask_true0);
+          Cgd1 = vec4_blend(Cgd1, Cgg1 * dVgsteff_dVd, condmask_true0);
+          Cgg1 = vec4_blend(Cgg1, Cgg1 * dVgsteff_dVg, condmask_true0);
+          Cgg = vec4_blend(Cgg, Cgg + Cgg1, condmask_true0);
+          Cgb = vec4_blend(Cgb, Cgb + Cgb1, condmask_true0);
+          Cgd = vec4_blend(Cgd, Cgd + Cgd1, condmask_true0);
+          T0 = vec4_blend(T0, VdsatCV - Vgsteff, condmask_true0);
+          dT0_dVg = vec4_blend(dT0_dVg, dVdsatCV_dVg - 1.0, condmask_true0);
+          dT0_dVb = vec4_blend(dT0_dVb, dVdsatCV_dVb, condmask_true0);
+          qbulk = vec4_blend(qbulk, qbulk + (One_Third_CoxWL * T0), condmask_true0);
+          Cbg1 = vec4_blend(Cbg1, One_Third_CoxWL * dT0_dVg, condmask_true0);
+          Cbb1 = vec4_blend(Cbb1, (One_Third_CoxWL * dT0_dVb) + (Cbg1 * dVgsteff_dVb), condmask_true0);
+          Cbd1 = vec4_blend(Cbd1, Cbg1 * dVgsteff_dVd, condmask_true0);
+          Cbg1 = vec4_blend(Cbg1, Cbg1 * dVgsteff_dVg, condmask_true0);
+          Cbg = vec4_blend(Cbg, Cbg + Cbg1, condmask_true0);
+          Cbb = vec4_blend(Cbb, Cbb + Cbb1, condmask_true0);
+          Cbd = vec4_blend(Cbd, Cbd + Cbd1, condmask_true0);
+          if (model->BSIM3v32xpart > 0.5)
+            T0 = vec4_blend(T0, vec4_SIMDTOVECTOR(-Two_Third_CoxWL), condmask_true0);
+          else
+            if (model->BSIM3v32xpart < 0.5)
+            T0 = vec4_blend(T0, vec4_SIMDTOVECTOR((-0.4) * CoxWL), condmask_true0);
+          else
+            T0 = vec4_blend(T0, vec4_SIMDTOVECTOR(-One_Third_CoxWL), condmask_true0);
+
+
+          qsrc = vec4_blend(qsrc, T0 * Vgsteff, condmask_true0);
+          Csg = vec4_blend(Csg, T0 * dVgsteff_dVg, condmask_true0);
+          Csb = vec4_blend(Csb, T0 * dVgsteff_dVb, condmask_true0);
+          Csd = vec4_blend(Csd, T0 * dVgsteff_dVd, condmask_true0);
+          Cgb = vec4_blend(Cgb, Cgb * dVbseff_dVb, condmask_true0);
+          Cbb = vec4_blend(Cbb, Cbb * dVbseff_dVb, condmask_true0);
+          Csb = vec4_blend(Csb, Csb * dVbseff_dVb, condmask_true0);
+        }
+        {
+          T0 = vec4_blend(T0, AbulkCV * Vds, condmask_false0);
+          T1 = vec4_blend(T1, 12.0 * ((Vgsteff - (0.5 * T0)) + 1.e-20), condmask_false0);
+          T2 = vec4_blend(T2, Vds / T1, condmask_false0);
+          T3 = vec4_blend(T3, T0 * T2, condmask_false0);
+          dT3_dVg = vec4_blend(dT3_dVg, (((-12.0) * T2) * T2) * AbulkCV, condmask_false0);
+          dT3_dVd = vec4_blend(dT3_dVd, ((((6.0 * T0) * ((4.0 * Vgsteff) - T0)) / T1) / T1) - 0.5, condmask_false0);
+          dT3_dVb = vec4_blend(dT3_dVb, (((12.0 * T2) * T2) * dAbulkCV_dVb) * Vgsteff, condmask_false0);
+          qgate = vec4_blend(qgate, qgate + (CoxWL * ((Vgsteff - (0.5 * Vds)) + T3)), condmask_false0);
+          Cgg1 = vec4_blend(Cgg1, CoxWL * (1.0 + dT3_dVg), condmask_false0);
+          Cgb1 = vec4_blend(Cgb1, (CoxWL * dT3_dVb) + (Cgg1 * dVgsteff_dVb), condmask_false0);
+          Cgd1 = vec4_blend(Cgd1, (CoxWL * dT3_dVd) + (Cgg1 * dVgsteff_dVd), condmask_false0);
+          Cgg1 = vec4_blend(Cgg1, Cgg1 * dVgsteff_dVg, condmask_false0);
+          Cgg = vec4_blend(Cgg, Cgg + Cgg1, condmask_false0);
+          Cgb = vec4_blend(Cgb, Cgb + Cgb1, condmask_false0);
+          Cgd = vec4_blend(Cgd, Cgd + Cgd1, condmask_false0);
+          qbulk = vec4_blend(qbulk, qbulk + ((CoxWL * (1.0 - AbulkCV)) * ((0.5 * Vds) - T3)), condmask_false0);
+          Cbg1 = vec4_blend(Cbg1, (-CoxWL) * ((1.0 - AbulkCV) * dT3_dVg), condmask_false0);
+          Cbb1 = vec4_blend(Cbb1, ((-CoxWL) * (((1.0 - AbulkCV) * dT3_dVb) + (((0.5 * Vds) - T3) * dAbulkCV_dVb))) + (Cbg1 * dVgsteff_dVb), condmask_false0);
+          Cbd1 = vec4_blend(Cbd1, (((-CoxWL) * (1.0 - AbulkCV)) * dT3_dVd) + (Cbg1 * dVgsteff_dVd), condmask_false0);
+          Cbg1 = vec4_blend(Cbg1, Cbg1 * dVgsteff_dVg, condmask_false0);
+          Cbg = vec4_blend(Cbg, Cbg + Cbg1, condmask_false0);
+          Cbb = vec4_blend(Cbb, Cbb + Cbb1, condmask_false0);
+          Cbd = vec4_blend(Cbd, Cbd + Cbd1, condmask_false0);
+          if (model->BSIM3v32xpart > 0.5)
+          {
+            T1 = vec4_blend(T1, T1 + T1, condmask_false0);
+            qsrc = vec4_blend(qsrc, (-CoxWL) * (((0.5 * Vgsteff) + (0.25 * T0)) - ((T0 * T0) / T1)), condmask_false0);
+            Csg = vec4_blend(Csg, (-CoxWL) * (0.5 + (((((24.0 * T0) * Vds) / T1) / T1) * AbulkCV)), condmask_false0);
+            Csb = vec4_blend(Csb, ((-CoxWL) * (((0.25 * Vds) * dAbulkCV_dVb) - ((((((12.0 * T0) * Vds) / T1) / T1) * ((4.0 * Vgsteff) - T0)) * dAbulkCV_dVb))) + (Csg * dVgsteff_dVb), condmask_false0);
+            Csd = vec4_blend(Csd, ((-CoxWL) * ((0.25 * AbulkCV) - (((((12.0 * AbulkCV) * T0) / T1) / T1) * ((4.0 * Vgsteff) - T0)))) + (Csg * dVgsteff_dVd), condmask_false0);
+            Csg = vec4_blend(Csg, Csg * dVgsteff_dVg, condmask_false0);
+          }
+          else
+            if (model->BSIM3v32xpart < 0.5)
+          {
+            T1 = vec4_blend(T1, T1 / 12.0, condmask_false0);
+            T2 = vec4_blend(T2, (0.5 * CoxWL) / (T1 * T1), condmask_false0);
+            T3 = vec4_blend(T3, (Vgsteff * ((((2.0 * T0) * T0) / 3.0) + (Vgsteff * (Vgsteff - ((4.0 * T0) / 3.0))))) - ((((2.0 * T0) * T0) * T0) / 15.0), condmask_false0);
+            qsrc = vec4_blend(qsrc, (-T2) * T3, condmask_false0);
+            T4 = vec4_blend(T4, (((4.0 / 3.0) * Vgsteff) * (Vgsteff - T0)) + ((0.4 * T0) * T0), condmask_false0);
+            Csg = vec4_blend(Csg, (((-2.0) * qsrc) / T1) - (T2 * ((Vgsteff * ((3.0 * Vgsteff) - ((8.0 * T0) / 3.0))) + (((2.0 * T0) * T0) / 3.0))), condmask_false0);
+            Csb = vec4_blend(Csb, ((((qsrc / T1) * Vds) + ((T2 * T4) * Vds)) * dAbulkCV_dVb) + (Csg * dVgsteff_dVb), condmask_false0);
+            Csd = vec4_blend(Csd, (((qsrc / T1) + (T2 * T4)) * AbulkCV) + (Csg * dVgsteff_dVd), condmask_false0);
+            Csg = vec4_blend(Csg, Csg * dVgsteff_dVg, condmask_false0);
+          }
+          else
+          {
+            qsrc = vec4_blend(qsrc, (-0.5) * (qgate + qbulk), condmask_false0);
+            Csg = vec4_blend(Csg, (-0.5) * (Cgg1 + Cbg1), condmask_false0);
+            Csb = vec4_blend(Csb, (-0.5) * (Cgb1 + Cbb1), condmask_false0);
+            Csd = vec4_blend(Csd, (-0.5) * (Cgd1 + Cbd1), condmask_false0);
+          }
+
+
+          Cgb = vec4_blend(Cgb, Cgb * dVbseff_dVb, condmask_false0);
+          Cbb = vec4_blend(Cbb, Cbb * dVbseff_dVb, condmask_false0);
+          Csb = vec4_blend(Csb, Csb * dVbseff_dVb, condmask_false0);
+        }
+      }
+
+      qdrn = -((qgate + qbulk) + qsrc);
+      {
+        heres[0]->BSIM3v32cggb = Cgg[0];
+        heres[1]->BSIM3v32cggb = Cgg[1];
+        heres[2]->BSIM3v32cggb = Cgg[2];
+        heres[3]->BSIM3v32cggb = Cgg[3];
+      }
+      {
+        Vec4d val = -((Cgg + Cgd) + Cgb);
+        heres[0]->BSIM3v32cgsb = val[0];
+        heres[1]->BSIM3v32cgsb = val[1];
+        heres[2]->BSIM3v32cgsb = val[2];
+        heres[3]->BSIM3v32cgsb = val[3];
+      }
+      {
+        heres[0]->BSIM3v32cgdb = Cgd[0];
+        heres[1]->BSIM3v32cgdb = Cgd[1];
+        heres[2]->BSIM3v32cgdb = Cgd[2];
+        heres[3]->BSIM3v32cgdb = Cgd[3];
+      }
+      {
+        Vec4d val = -((Cgg + Cbg) + Csg);
+        heres[0]->BSIM3v32cdgb = val[0];
+        heres[1]->BSIM3v32cdgb = val[1];
+        heres[2]->BSIM3v32cdgb = val[2];
+        heres[3]->BSIM3v32cdgb = val[3];
+      }
+      {
+        Vec4d val = (((((((Cgg + Cgd) + Cgb) + Cbg) + Cbd) + Cbb) + Csg) + Csd) + Csb;
+        heres[0]->BSIM3v32cdsb = val[0];
+        heres[1]->BSIM3v32cdsb = val[1];
+        heres[2]->BSIM3v32cdsb = val[2];
+        heres[3]->BSIM3v32cdsb = val[3];
+      }
+      {
+        Vec4d val = -((Cgd + Cbd) + Csd);
+        heres[0]->BSIM3v32cddb = val[0];
+        heres[1]->BSIM3v32cddb = val[1];
+        heres[2]->BSIM3v32cddb = val[2];
+        heres[3]->BSIM3v32cddb = val[3];
+      }
+      {
+        heres[0]->BSIM3v32cbgb = Cbg[0];
+        heres[1]->BSIM3v32cbgb = Cbg[1];
+        heres[2]->BSIM3v32cbgb = Cbg[2];
+        heres[3]->BSIM3v32cbgb = Cbg[3];
+      }
+      {
+        Vec4d val = -((Cbg + Cbd) + Cbb);
+        heres[0]->BSIM3v32cbsb = val[0];
+        heres[1]->BSIM3v32cbsb = val[1];
+        heres[2]->BSIM3v32cbsb = val[2];
+        heres[3]->BSIM3v32cbsb = val[3];
+      }
+      {
+        heres[0]->BSIM3v32cbdb = Cbd[0];
+        heres[1]->BSIM3v32cbdb = Cbd[1];
+        heres[2]->BSIM3v32cbdb = Cbd[2];
+        heres[3]->BSIM3v32cbdb = Cbd[3];
+      }
+      {
+        Vec4d val = -(qgate + qbulk);
+        heres[0]->BSIM3v32qinv = val[0];
+        heres[1]->BSIM3v32qinv = val[1];
+        heres[2]->BSIM3v32qinv = val[2];
+        heres[3]->BSIM3v32qinv = val[3];
+      }
+    }
+    else
+      if (model->BSIM3v32capMod == 2)
+    {
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          Vfb = (Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb};
+          break;
+
+        case BSIM3v32V32:
+          Vfb = (Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb};
+          dVfb_dVb = (dVfb_dVd = vec4_SIMDTOVECTOR(0.0));
+          break;
+
+        default:
+          Vfb = (Vth - pParam->BSIM3v32phi) - (pParam->BSIM3v32k1ox * sqrtPhis);
+          dVfb_dVb = dVth_dVb - (pParam->BSIM3v32k1ox * dsqrtPhis_dVb);
+          dVfb_dVd = dVth_dVd;
+
+      }
+
+      V3 = ((Vfb - Vgs_eff) + VbseffCV) - DELTA_3;
+      T0 = V3 * V3;
+      T2 = (4.0 * DELTA_3) * Vfb;
+      if (1)
+      {
+        Vec4m condmask0 = Vfb <= 0.0;
+        Vec4m condmask_true0 = condmask0;
+        Vec4m condmask_false0 = ~condmask0;
+        {
+          T0 = vec4_blend(T0, T0 - T2, condmask_true0);
+          T2 = vec4_blend(T2, vec4_SIMDTOVECTOR(-DELTA_3), condmask_true0);
+        }
+        {
+          T0 = vec4_blend(T0, T0 + T2, condmask_false0);
+          T2 = vec4_blend(T2, vec4_SIMDTOVECTOR(DELTA_3), condmask_false0);
+        }
+      }
+
+      T0 = vec4_sqrt(T0);
+      T2 = T2 / T0;
+      T1 = 0.5 * (1.0 + (V3 / T0));
+      Vfbeff = Vfb - (0.5 * (V3 + T0));
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          dVfbeff_dVd = ((1.0 - T1) - T2) * dVfb_dVd;
+
+      }
+
+      dVfbeff_dVg = T1 * dVgs_eff_dVg;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          dVfbeff_dVb = (-T1) * dVbseffCV_dVb;
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          dVfbeff_dVb = (((1.0 - T1) - T2) * dVfb_dVb) - (T1 * dVbseffCV_dVb);
+
+      }
+
+      Qac0 = CoxWL * (Vfbeff - Vfb);
+      dQac0_dVg = CoxWL * dVfbeff_dVg;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          dQac0_dVd = CoxWL * (dVfbeff_dVd - dVfb_dVd);
+
+      }
+
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          dQac0_dVb = CoxWL * dVfbeff_dVb;
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          dQac0_dVb = CoxWL * (dVfbeff_dVb - dVfb_dVb);
+
+      }
+
+      T0 = vec4_SIMDTOVECTOR(0.5 * pParam->BSIM3v32k1ox);
+      T3 = ((Vgs_eff - Vfbeff) - VbseffCV) - Vgsteff;
+      if (pParam->BSIM3v32k1ox == 0.0)
+      {
+        T1 = vec4_SIMDTOVECTOR(0.0);
+        T2 = vec4_SIMDTOVECTOR(0.0);
+      }
+      else
+        if (1)
+      {
+        Vec4m condmask0 = T3 < 0.0;
+        Vec4m condmask_true0 = condmask0;
+        Vec4m condmask_false0 = ~condmask0;
+        {
+          T1 = vec4_blend(T1, T0 + (T3 / pParam->BSIM3v32k1ox), condmask_true0);
+          T2 = vec4_blend(T2, vec4_SIMDTOVECTOR(CoxWL), condmask_true0);
+        }
+        {
+          T1 = vec4_blend(T1, vec4_sqrt((T0 * T0) + T3), condmask_false0);
+          T2 = vec4_blend(T2, (CoxWL * T0) / T1, condmask_false0);
+        }
+      }
+
+
+      Qsub0 = (CoxWL * pParam->BSIM3v32k1ox) * (T1 - T0);
+      dQsub0_dVg = T2 * ((dVgs_eff_dVg - dVfbeff_dVg) - dVgsteff_dVg);
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          dQsub0_dVd = (-T2) * dVgsteff_dVd;
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          dQsub0_dVd = (-T2) * (dVfbeff_dVd + dVgsteff_dVd);
+
+      }
+
+      dQsub0_dVb = (-T2) * ((dVfbeff_dVb + dVbseffCV_dVb) + dVgsteff_dVb);
+      AbulkCV = Abulk0 * pParam->BSIM3v32abulkCVfactor;
+      dAbulkCV_dVb = pParam->BSIM3v32abulkCVfactor * dAbulk0_dVb;
+      VdsatCV = Vgsteff / AbulkCV;
+      V4 = (VdsatCV - Vds) - DELTA_4;
+      T0 = vec4_sqrt((V4 * V4) + ((4.0 * DELTA_4) * VdsatCV));
+      VdseffCV = VdsatCV - (0.5 * (V4 + T0));
+      T1 = 0.5 * (1.0 + (V4 / T0));
+      T2 = DELTA_4 / T0;
+      T3 = ((1.0 - T1) - T2) / AbulkCV;
+      dVdseffCV_dVg = T3;
+      dVdseffCV_dVd = T1;
+      dVdseffCV_dVb = ((-T3) * VdsatCV) * dAbulkCV_dVb;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          if (1)
+        {
+          Vec4m condmask0 = Vds == 0.0;
+          Vec4m condmask_true0 = condmask0;
+          {
+            VdseffCV = vec4_blend(VdseffCV, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+            dVdseffCV_dVg = vec4_blend(dVdseffCV_dVg, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+            dVdseffCV_dVb = vec4_blend(dVdseffCV_dVb, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+          }
+        }
+
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          break;
+
+      }
+
+      T0 = AbulkCV * VdseffCV;
+      T1 = 12.0 * ((Vgsteff - (0.5 * T0)) + 1e-20);
+      T2 = VdseffCV / T1;
+      T3 = T0 * T2;
+      T4 = 1.0 - (((12.0 * T2) * T2) * AbulkCV);
+      T5 = (((6.0 * T0) * ((4.0 * Vgsteff) - T0)) / (T1 * T1)) - 0.5;
+      T6 = ((12.0 * T2) * T2) * Vgsteff;
+      qinoi = (-CoxWL) * ((Vgsteff - (0.5 * T0)) + (AbulkCV * T3));
+      qgate = CoxWL * ((Vgsteff - (0.5 * VdseffCV)) + T3);
+      Cgg1 = CoxWL * (T4 + (T5 * dVdseffCV_dVg));
+      Cgd1 = ((CoxWL * T5) * dVdseffCV_dVd) + (Cgg1 * dVgsteff_dVd);
+      Cgb1 = (CoxWL * ((T5 * dVdseffCV_dVb) + (T6 * dAbulkCV_dVb))) + (Cgg1 * dVgsteff_dVb);
+      Cgg1 *= dVgsteff_dVg;
+      T7 = 1.0 - AbulkCV;
+      qbulk = (CoxWL * T7) * ((0.5 * VdseffCV) - T3);
+      T4 = (-T7) * (T4 - 1.0);
+      T5 = (-T7) * T5;
+      T6 = -((T7 * T6) + ((0.5 * VdseffCV) - T3));
+      Cbg1 = CoxWL * (T4 + (T5 * dVdseffCV_dVg));
+      Cbd1 = ((CoxWL * T5) * dVdseffCV_dVd) + (Cbg1 * dVgsteff_dVd);
+      Cbb1 = (CoxWL * ((T5 * dVdseffCV_dVb) + (T6 * dAbulkCV_dVb))) + (Cbg1 * dVgsteff_dVb);
+      Cbg1 *= dVgsteff_dVg;
+      if (model->BSIM3v32xpart > 0.5)
+      {
+        T1 = T1 + T1;
+        qsrc = (-CoxWL) * (((0.5 * Vgsteff) + (0.25 * T0)) - ((T0 * T0) / T1));
+        T7 = ((4.0 * Vgsteff) - T0) / (T1 * T1);
+        T4 = -(0.5 + (((24.0 * T0) * T0) / (T1 * T1)));
+        T5 = -((0.25 * AbulkCV) - (((12.0 * AbulkCV) * T0) * T7));
+        T6 = -((0.25 * VdseffCV) - (((12.0 * T0) * VdseffCV) * T7));
+        Csg = CoxWL * (T4 + (T5 * dVdseffCV_dVg));
+        Csd = ((CoxWL * T5) * dVdseffCV_dVd) + (Csg * dVgsteff_dVd);
+        Csb = (CoxWL * ((T5 * dVdseffCV_dVb) + (T6 * dAbulkCV_dVb))) + (Csg * dVgsteff_dVb);
+        Csg *= dVgsteff_dVg;
+      }
+      else
+        if (model->BSIM3v32xpart < 0.5)
+      {
+        T1 = T1 / 12.0;
+        T2 = (0.5 * CoxWL) / (T1 * T1);
+        T3 = (Vgsteff * ((((2.0 * T0) * T0) / 3.0) + (Vgsteff * (Vgsteff - ((4.0 * T0) / 3.0))))) - ((((2.0 * T0) * T0) * T0) / 15.0);
+        qsrc = (-T2) * T3;
+        T7 = (((4.0 / 3.0) * Vgsteff) * (Vgsteff - T0)) + ((0.4 * T0) * T0);
+        T4 = (((-2.0) * qsrc) / T1) - (T2 * ((Vgsteff * ((3.0 * Vgsteff) - ((8.0 * T0) / 3.0))) + (((2.0 * T0) * T0) / 3.0)));
+        T5 = ((qsrc / T1) + (T2 * T7)) * AbulkCV;
+        T6 = ((qsrc / T1) * VdseffCV) + ((T2 * T7) * VdseffCV);
+        Csg = T4 + (T5 * dVdseffCV_dVg);
+        Csd = (T5 * dVdseffCV_dVd) + (Csg * dVgsteff_dVd);
+        Csb = ((T5 * dVdseffCV_dVb) + (T6 * dAbulkCV_dVb)) + (Csg * dVgsteff_dVb);
+        Csg *= dVgsteff_dVg;
+      }
+      else
+      {
+        qsrc = (-0.5) * (qgate + qbulk);
+        Csg = (-0.5) * (Cgg1 + Cbg1);
+        Csb = (-0.5) * (Cgb1 + Cbb1);
+        Csd = (-0.5) * (Cgd1 + Cbd1);
+      }
+
+
+      qgate += Qac0 + Qsub0;
+      qbulk -= Qac0 + Qsub0;
+      qdrn = -((qgate + qbulk) + qsrc);
+      Cgg = (dQac0_dVg + dQsub0_dVg) + Cgg1;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          Cgd = dQsub0_dVd + Cgd1;
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          Cgd = (dQac0_dVd + dQsub0_dVd) + Cgd1;
+
+      }
+
+      Cgb = (dQac0_dVb + dQsub0_dVb) + Cgb1;
+      Cbg = (Cbg1 - dQac0_dVg) - dQsub0_dVg;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          Cbd = Cbd1 - dQsub0_dVd;
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          Cbd = (Cbd1 - dQac0_dVd) - dQsub0_dVd;
+
+      }
+
+      Cbb = (Cbb1 - dQac0_dVb) - dQsub0_dVb;
+      Cgb *= dVbseff_dVb;
+      Cbb *= dVbseff_dVb;
+      Csb *= dVbseff_dVb;
+      {
+        heres[0]->BSIM3v32cggb = Cgg[0];
+        heres[1]->BSIM3v32cggb = Cgg[1];
+        heres[2]->BSIM3v32cggb = Cgg[2];
+        heres[3]->BSIM3v32cggb = Cgg[3];
+      }
+      {
+        Vec4d val = -((Cgg + Cgd) + Cgb);
+        heres[0]->BSIM3v32cgsb = val[0];
+        heres[1]->BSIM3v32cgsb = val[1];
+        heres[2]->BSIM3v32cgsb = val[2];
+        heres[3]->BSIM3v32cgsb = val[3];
+      }
+      {
+        heres[0]->BSIM3v32cgdb = Cgd[0];
+        heres[1]->BSIM3v32cgdb = Cgd[1];
+        heres[2]->BSIM3v32cgdb = Cgd[2];
+        heres[3]->BSIM3v32cgdb = Cgd[3];
+      }
+      {
+        Vec4d val = -((Cgg + Cbg) + Csg);
+        heres[0]->BSIM3v32cdgb = val[0];
+        heres[1]->BSIM3v32cdgb = val[1];
+        heres[2]->BSIM3v32cdgb = val[2];
+        heres[3]->BSIM3v32cdgb = val[3];
+      }
+      {
+        Vec4d val = (((((((Cgg + Cgd) + Cgb) + Cbg) + Cbd) + Cbb) + Csg) + Csd) + Csb;
+        heres[0]->BSIM3v32cdsb = val[0];
+        heres[1]->BSIM3v32cdsb = val[1];
+        heres[2]->BSIM3v32cdsb = val[2];
+        heres[3]->BSIM3v32cdsb = val[3];
+      }
+      {
+        Vec4d val = -((Cgd + Cbd) + Csd);
+        heres[0]->BSIM3v32cddb = val[0];
+        heres[1]->BSIM3v32cddb = val[1];
+        heres[2]->BSIM3v32cddb = val[2];
+        heres[3]->BSIM3v32cddb = val[3];
+      }
+      {
+        heres[0]->BSIM3v32cbgb = Cbg[0];
+        heres[1]->BSIM3v32cbgb = Cbg[1];
+        heres[2]->BSIM3v32cbgb = Cbg[2];
+        heres[3]->BSIM3v32cbgb = Cbg[3];
+      }
+      {
+        Vec4d val = -((Cbg + Cbd) + Cbb);
+        heres[0]->BSIM3v32cbsb = val[0];
+        heres[1]->BSIM3v32cbsb = val[1];
+        heres[2]->BSIM3v32cbsb = val[2];
+        heres[3]->BSIM3v32cbsb = val[3];
+      }
+      {
+        heres[0]->BSIM3v32cbdb = Cbd[0];
+        heres[1]->BSIM3v32cbdb = Cbd[1];
+        heres[2]->BSIM3v32cbdb = Cbd[2];
+        heres[3]->BSIM3v32cbdb = Cbd[3];
+      }
+      {
+        heres[0]->BSIM3v32qinv = qinoi[0];
+        heres[1]->BSIM3v32qinv = qinoi[1];
+        heres[2]->BSIM3v32qinv = qinoi[2];
+        heres[3]->BSIM3v32qinv = qinoi[3];
+      }
+    }
+    else
+      if (model->BSIM3v32capMod == 3)
+    {
+      V3 = ((((Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb}) - Vgs_eff) + VbseffCV) - DELTA_3;
+      T0 = V3 * V3;
+      T2 = (4.0 * DELTA_3) * ((Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb});
+      if (1)
+      {
+        Vec4m condmask0 = ((Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb}) <= 0.0;
+        Vec4m condmask_true0 = condmask0;
+        Vec4m condmask_false0 = ~condmask0;
+        {
+          T0 = vec4_blend(T0, T0 - T2, condmask_true0);
+          T2 = vec4_blend(T2, vec4_SIMDTOVECTOR(-DELTA_3), condmask_true0);
+        }
+        {
+          T0 = vec4_blend(T0, T0 + T2, condmask_false0);
+          T2 = vec4_blend(T2, vec4_SIMDTOVECTOR(DELTA_3), condmask_false0);
+        }
+      }
+
+      T0 = vec4_sqrt(T0);
+      T2 = T2 / T0;
+      T1 = 0.5 * (1.0 + (V3 / T0));
+      Vfbeff = ((Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb}) - (0.5 * (V3 + T0));
+      dVfbeff_dVg = T1 * dVgs_eff_dVg;
+      dVfbeff_dVb = (-T1) * dVbseffCV_dVb;
+      Cox = model->BSIM3v32cox;
+      Tox = 1.0e8 * model->BSIM3v32tox;
+      T0 = ((Vgs_eff - VbseffCV) - ((Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb})) / Tox;
+      dT0_dVg = dVgs_eff_dVg / Tox;
+      dT0_dVb = (-dVbseffCV_dVb) / Tox;
+      tmp = T0 * pParam->BSIM3v32acde;
+      dTcen_dVg = (dTcen_dVb = vec4_SIMDTOVECTOR(0.0));
+      if (1)
+      {
+        Vec4m condmask0 = ((-EXP_THRESHOLD) < tmp) & (tmp < EXP_THRESHOLD);
+        Vec4m condmask_true0 = condmask0;
+        Vec4m condmask_false0 = ~condmask0;
+        {
+          Tcen = vec4_blend(Tcen, pParam->BSIM3v32ldeb * vec4_exp(tmp), condmask_true0);
+          dTcen_dVg = vec4_blend(dTcen_dVg, pParam->BSIM3v32acde * Tcen, condmask_true0);
+          dTcen_dVb = vec4_blend(dTcen_dVb, dTcen_dVg * dT0_dVb, condmask_true0);
+          dTcen_dVg = vec4_blend(dTcen_dVg, dTcen_dVg * dT0_dVg, condmask_true0);
+        }
+        if (1)
+        {
+          Vec4m condmask1 = tmp <= (-EXP_THRESHOLD);
+          Vec4m condmask_true1 = condmask_false0 & condmask1;
+          Vec4m condmask_false1 = condmask_false0 & (~condmask1);
+          {
+            Tcen = vec4_blend(Tcen, vec4_SIMDTOVECTOR(pParam->BSIM3v32ldeb * MIN_EXP), condmask_true1);
+          }
+          {
+            Tcen = vec4_blend(Tcen, vec4_SIMDTOVECTOR(pParam->BSIM3v32ldeb * MAX_EXP), condmask_false1);
+          }
+        }
+
+      }
+
+      LINK = 1.0e-3 * model->BSIM3v32tox;
+      V3 = (pParam->BSIM3v32ldeb - Tcen) - LINK;
+      V4 = vec4_sqrt((V3 * V3) + ((4.0 * LINK) * pParam->BSIM3v32ldeb));
+      Tcen = pParam->BSIM3v32ldeb - (0.5 * (V3 + V4));
+      T1 = 0.5 * (1.0 + (V3 / V4));
+      dTcen_dVg *= T1;
+      dTcen_dVb *= T1;
+      Ccen = EPSSI / Tcen;
+      T2 = Cox / (Cox + Ccen);
+      Coxeff = T2 * Ccen;
+      T3 = (-Ccen) / Tcen;
+      dCoxeff_dVg = (T2 * T2) * T3;
+      dCoxeff_dVb = dCoxeff_dVg * dTcen_dVb;
+      dCoxeff_dVg *= dTcen_dVg;
+      CoxWLcen = (CoxWL * Coxeff) / Cox;
+      Qac0 = CoxWLcen * (Vfbeff - ((Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb}));
+      QovCox = Qac0 / Coxeff;
+      dQac0_dVg = (CoxWLcen * dVfbeff_dVg) + (QovCox * dCoxeff_dVg);
+      dQac0_dVb = (CoxWLcen * dVfbeff_dVb) + (QovCox * dCoxeff_dVb);
+      T0 = vec4_SIMDTOVECTOR(0.5 * pParam->BSIM3v32k1ox);
+      T3 = ((Vgs_eff - Vfbeff) - VbseffCV) - Vgsteff;
+      if (pParam->BSIM3v32k1ox == 0.0)
+      {
+        T1 = vec4_SIMDTOVECTOR(0.0);
+        T2 = vec4_SIMDTOVECTOR(0.0);
+      }
+      else
+        if (1)
+      {
+        Vec4m condmask0 = T3 < 0.0;
+        Vec4m condmask_true0 = condmask0;
+        Vec4m condmask_false0 = ~condmask0;
+        {
+          T1 = vec4_blend(T1, T0 + (T3 / pParam->BSIM3v32k1ox), condmask_true0);
+          T2 = vec4_blend(T2, CoxWLcen, condmask_true0);
+        }
+        {
+          T1 = vec4_blend(T1, vec4_sqrt((T0 * T0) + T3), condmask_false0);
+          T2 = vec4_blend(T2, (CoxWLcen * T0) / T1, condmask_false0);
+        }
+      }
+
+
+      Qsub0 = (CoxWLcen * pParam->BSIM3v32k1ox) * (T1 - T0);
+      QovCox = Qsub0 / Coxeff;
+      dQsub0_dVg = (T2 * ((dVgs_eff_dVg - dVfbeff_dVg) - dVgsteff_dVg)) + (QovCox * dCoxeff_dVg);
+      dQsub0_dVd = (-T2) * dVgsteff_dVd;
+      dQsub0_dVb = ((-T2) * ((dVfbeff_dVb + dVbseffCV_dVb) + dVgsteff_dVb)) + (QovCox * dCoxeff_dVb);
+      if (pParam->BSIM3v32k1ox <= 0.0)
+      {
+        Denomi = vec4_SIMDTOVECTOR((0.25 * pParam->BSIM3v32moin) * Vtm);
+        T0 = vec4_SIMDTOVECTOR(0.5 * pParam->BSIM3v32sqrtPhi);
+      }
+      else
+      {
+        Denomi = vec4_SIMDTOVECTOR(((pParam->BSIM3v32moin * Vtm) * pParam->BSIM3v32k1ox) * pParam->BSIM3v32k1ox);
+        T0 = vec4_SIMDTOVECTOR(pParam->BSIM3v32k1ox * pParam->BSIM3v32sqrtPhi);
+      }
+
+      T1 = (2.0 * T0) + Vgsteff;
+      DeltaPhi = Vtm * vec4_log(1.0 + ((T1 * Vgsteff) / Denomi));
+      dDeltaPhi_dVg = ((2.0 * Vtm) * (T1 - T0)) / (Denomi + (T1 * Vgsteff));
+      dDeltaPhi_dVd = dDeltaPhi_dVg * dVgsteff_dVd;
+      dDeltaPhi_dVb = dDeltaPhi_dVg * dVgsteff_dVb;
+      T3 = 4.0 * ((Vth - ((Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb})) - pParam->BSIM3v32phi);
+      Tox += Tox;
+      if (1)
+      {
+        Vec4m condmask0 = T3 >= 0.0;
+        Vec4m condmask_true0 = condmask0;
+        Vec4m condmask_false0 = ~condmask0;
+        {
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+
+            case BSIM3v32V322:
+              T0 = vec4_blend(T0, (Vgsteff + T3) / Tox, condmask_true0);
+              dT0_dVd = vec4_blend(dT0_dVd, (dVgsteff_dVd + (4.0 * dVth_dVd)) / Tox, condmask_true0);
+              dT0_dVb = vec4_blend(dT0_dVb, (dVgsteff_dVb + (4.0 * dVth_dVb)) / Tox, condmask_true0);
+              break;
+
+            case BSIM3v32V32:
+
+            default:
+              T0 = vec4_blend(T0, (Vgsteff + T3) / Tox, condmask_true0);
+
+          }
+
+        }
+        {
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+
+            case BSIM3v32V322:
+              T0 = vec4_blend(T0, (Vgsteff + 1.0e-20) / Tox, condmask_false0);
+              dT0_dVd = vec4_blend(dT0_dVd, dVgsteff_dVd / Tox, condmask_false0);
+              dT0_dVb = vec4_blend(dT0_dVb, dVgsteff_dVb / Tox, condmask_false0);
+              break;
+
+            case BSIM3v32V32:
+
+            default:
+              T0 = vec4_blend(T0, (Vgsteff + 1.0e-20) / Tox, condmask_false0);
+
+          }
+
+        }
+      }
+
+      tmp = vec4_pow0p7(T0, 0.7);
+      T1 = 1.0 + tmp;
+      T2 = (0.7 * tmp) / (T0 * Tox);
+      Tcen = 1.9e-9 / T1;
+      dTcen_dVg = (((-1.9e-9) * T2) / T1) / T1;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          dTcen_dVd = Tox * dTcen_dVg;
+          dTcen_dVb = dTcen_dVd * dT0_dVb;
+          dTcen_dVd *= dT0_dVd;
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          dTcen_dVd = dTcen_dVg * ((4.0 * dVth_dVd) + dVgsteff_dVd);
+          dTcen_dVb = dTcen_dVg * ((4.0 * dVth_dVb) + dVgsteff_dVb);
+
+      }
+
+      dTcen_dVg *= dVgsteff_dVg;
+      Ccen = EPSSI / Tcen;
+      T0 = Cox / (Cox + Ccen);
+      Coxeff = T0 * Ccen;
+      T1 = (-Ccen) / Tcen;
+      dCoxeff_dVg = (T0 * T0) * T1;
+      dCoxeff_dVd = dCoxeff_dVg * dTcen_dVd;
+      dCoxeff_dVb = dCoxeff_dVg * dTcen_dVb;
+      dCoxeff_dVg *= dTcen_dVg;
+      CoxWLcen = (CoxWL * Coxeff) / Cox;
+      AbulkCV = Abulk0 * pParam->BSIM3v32abulkCVfactor;
+      dAbulkCV_dVb = pParam->BSIM3v32abulkCVfactor * dAbulk0_dVb;
+      VdsatCV = (Vgsteff - DeltaPhi) / AbulkCV;
+      V4 = (VdsatCV - Vds) - DELTA_4;
+      T0 = vec4_sqrt((V4 * V4) + ((4.0 * DELTA_4) * VdsatCV));
+      VdseffCV = VdsatCV - (0.5 * (V4 + T0));
+      T1 = 0.5 * (1.0 + (V4 / T0));
+      T2 = DELTA_4 / T0;
+      T3 = ((1.0 - T1) - T2) / AbulkCV;
+      T4 = T3 * (1.0 - dDeltaPhi_dVg);
+      dVdseffCV_dVg = T4;
+      dVdseffCV_dVd = T1;
+      dVdseffCV_dVb = ((-T3) * VdsatCV) * dAbulkCV_dVb;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          if (1)
+        {
+          Vec4m condmask0 = Vds == 0.0;
+          Vec4m condmask_true0 = condmask0;
+          {
+            VdseffCV = vec4_blend(VdseffCV, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+            dVdseffCV_dVg = vec4_blend(dVdseffCV_dVg, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+            dVdseffCV_dVb = vec4_blend(dVdseffCV_dVb, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+          }
+        }
+
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          break;
+
+      }
+
+      T0 = AbulkCV * VdseffCV;
+      T1 = Vgsteff - DeltaPhi;
+      T2 = 12.0 * ((T1 - (0.5 * T0)) + 1.0e-20);
+      T3 = T0 / T2;
+      T4 = 1.0 - ((12.0 * T3) * T3);
+      T5 = AbulkCV * ((((6.0 * T0) * ((4.0 * T1) - T0)) / (T2 * T2)) - 0.5);
+      T6 = (T5 * VdseffCV) / AbulkCV;
+      qgate = (qinoi = CoxWLcen * (T1 - (T0 * (0.5 - T3))));
+      QovCox = qgate / Coxeff;
+      Cgg1 = CoxWLcen * ((T4 * (1.0 - dDeltaPhi_dVg)) + (T5 * dVdseffCV_dVg));
+      Cgd1 = (((CoxWLcen * T5) * dVdseffCV_dVd) + (Cgg1 * dVgsteff_dVd)) + (QovCox * dCoxeff_dVd);
+      Cgb1 = ((CoxWLcen * ((T5 * dVdseffCV_dVb) + (T6 * dAbulkCV_dVb))) + (Cgg1 * dVgsteff_dVb)) + (QovCox * dCoxeff_dVb);
+      Cgg1 = (Cgg1 * dVgsteff_dVg) + (QovCox * dCoxeff_dVg);
+      T7 = 1.0 - AbulkCV;
+      T8 = T2 * T2;
+      T9 = (((12.0 * T7) * T0) * T0) / (T8 * AbulkCV);
+      T10 = T9 * (1.0 - dDeltaPhi_dVg);
+      T11 = ((-T7) * T5) / AbulkCV;
+      T12 = -(((T9 * T1) / AbulkCV) + (VdseffCV * (0.5 - (T0 / T2))));
+      qbulk = (CoxWLcen * T7) * ((0.5 * VdseffCV) - ((T0 * VdseffCV) / T2));
+      QovCox = qbulk / Coxeff;
+      Cbg1 = CoxWLcen * (T10 + (T11 * dVdseffCV_dVg));
+      Cbd1 = (((CoxWLcen * T11) * dVdseffCV_dVd) + (Cbg1 * dVgsteff_dVd)) + (QovCox * dCoxeff_dVd);
+      Cbb1 = ((CoxWLcen * ((T11 * dVdseffCV_dVb) + (T12 * dAbulkCV_dVb))) + (Cbg1 * dVgsteff_dVb)) + (QovCox * dCoxeff_dVb);
+      Cbg1 = (Cbg1 * dVgsteff_dVg) + (QovCox * dCoxeff_dVg);
+      if (model->BSIM3v32xpart > 0.5)
+      {
+        qsrc = (-CoxWLcen) * (((T1 / 2.0) + (T0 / 4.0)) - (((0.5 * T0) * T0) / T2));
+        QovCox = qsrc / Coxeff;
+        T2 += T2;
+        T3 = T2 * T2;
+        T7 = -(0.25 - (((12.0 * T0) * ((4.0 * T1) - T0)) / T3));
+        T4 = (-(0.5 + (((24.0 * T0) * T0) / T3))) * (1.0 - dDeltaPhi_dVg);
+        T5 = T7 * AbulkCV;
+        T6 = T7 * VdseffCV;
+        Csg = CoxWLcen * (T4 + (T5 * dVdseffCV_dVg));
+        Csd = (((CoxWLcen * T5) * dVdseffCV_dVd) + (Csg * dVgsteff_dVd)) + (QovCox * dCoxeff_dVd);
+        Csb = ((CoxWLcen * ((T5 * dVdseffCV_dVb) + (T6 * dAbulkCV_dVb))) + (Csg * dVgsteff_dVb)) + (QovCox * dCoxeff_dVb);
+        Csg = (Csg * dVgsteff_dVg) + (QovCox * dCoxeff_dVg);
+      }
+      else
+        if (model->BSIM3v32xpart < 0.5)
+      {
+        T2 = T2 / 12.0;
+        T3 = (0.5 * CoxWLcen) / (T2 * T2);
+        T4 = (T1 * ((((2.0 * T0) * T0) / 3.0) + (T1 * (T1 - ((4.0 * T0) / 3.0))))) - ((((2.0 * T0) * T0) * T0) / 15.0);
+        qsrc = (-T3) * T4;
+        QovCox = qsrc / Coxeff;
+        T8 = (((4.0 / 3.0) * T1) * (T1 - T0)) + ((0.4 * T0) * T0);
+        T5 = (((-2.0) * qsrc) / T2) - (T3 * ((T1 * ((3.0 * T1) - ((8.0 * T0) / 3.0))) + (((2.0 * T0) * T0) / 3.0)));
+        T6 = AbulkCV * ((qsrc / T2) + (T3 * T8));
+        T7 = (T6 * VdseffCV) / AbulkCV;
+        Csg = (T5 * (1.0 - dDeltaPhi_dVg)) + (T6 * dVdseffCV_dVg);
+        Csd = ((Csg * dVgsteff_dVd) + (T6 * dVdseffCV_dVd)) + (QovCox * dCoxeff_dVd);
+        Csb = (((Csg * dVgsteff_dVb) + (T6 * dVdseffCV_dVb)) + (T7 * dAbulkCV_dVb)) + (QovCox * dCoxeff_dVb);
+        Csg = (Csg * dVgsteff_dVg) + (QovCox * dCoxeff_dVg);
+      }
+      else
+      {
+        qsrc = (-0.5) * qgate;
+        Csg = (-0.5) * Cgg1;
+        Csd = (-0.5) * Cgd1;
+        Csb = (-0.5) * Cgb1;
+      }
+
+
+      qgate += (Qac0 + Qsub0) - qbulk;
+      qbulk -= Qac0 + Qsub0;
+      qdrn = -((qgate + qbulk) + qsrc);
+      Cbg = (Cbg1 - dQac0_dVg) - dQsub0_dVg;
+      Cbd = Cbd1 - dQsub0_dVd;
+      Cbb = (Cbb1 - dQac0_dVb) - dQsub0_dVb;
+      Cgg = Cgg1 - Cbg;
+      Cgd = Cgd1 - Cbd;
+      Cgb = Cgb1 - Cbb;
+      Cgb *= dVbseff_dVb;
+      Cbb *= dVbseff_dVb;
+      Csb *= dVbseff_dVb;
+      {
+        heres[0]->BSIM3v32cggb = Cgg[0];
+        heres[1]->BSIM3v32cggb = Cgg[1];
+        heres[2]->BSIM3v32cggb = Cgg[2];
+        heres[3]->BSIM3v32cggb = Cgg[3];
+      }
+      {
+        Vec4d val = -((Cgg + Cgd) + Cgb);
+        heres[0]->BSIM3v32cgsb = val[0];
+        heres[1]->BSIM3v32cgsb = val[1];
+        heres[2]->BSIM3v32cgsb = val[2];
+        heres[3]->BSIM3v32cgsb = val[3];
+      }
+      {
+        heres[0]->BSIM3v32cgdb = Cgd[0];
+        heres[1]->BSIM3v32cgdb = Cgd[1];
+        heres[2]->BSIM3v32cgdb = Cgd[2];
+        heres[3]->BSIM3v32cgdb = Cgd[3];
+      }
+      {
+        Vec4d val = -((Cgg + Cbg) + Csg);
+        heres[0]->BSIM3v32cdgb = val[0];
+        heres[1]->BSIM3v32cdgb = val[1];
+        heres[2]->BSIM3v32cdgb = val[2];
+        heres[3]->BSIM3v32cdgb = val[3];
+      }
+      {
+        Vec4d val = (((((((Cgg + Cgd) + Cgb) + Cbg) + Cbd) + Cbb) + Csg) + Csd) + Csb;
+        heres[0]->BSIM3v32cdsb = val[0];
+        heres[1]->BSIM3v32cdsb = val[1];
+        heres[2]->BSIM3v32cdsb = val[2];
+        heres[3]->BSIM3v32cdsb = val[3];
+      }
+      {
+        Vec4d val = -((Cgd + Cbd) + Csd);
+        heres[0]->BSIM3v32cddb = val[0];
+        heres[1]->BSIM3v32cddb = val[1];
+        heres[2]->BSIM3v32cddb = val[2];
+        heres[3]->BSIM3v32cddb = val[3];
+      }
+      {
+        heres[0]->BSIM3v32cbgb = Cbg[0];
+        heres[1]->BSIM3v32cbgb = Cbg[1];
+        heres[2]->BSIM3v32cbgb = Cbg[2];
+        heres[3]->BSIM3v32cbgb = Cbg[3];
+      }
+      {
+        Vec4d val = -((Cbg + Cbd) + Cbb);
+        heres[0]->BSIM3v32cbsb = val[0];
+        heres[1]->BSIM3v32cbsb = val[1];
+        heres[2]->BSIM3v32cbsb = val[2];
+        heres[3]->BSIM3v32cbsb = val[3];
+      }
+      {
+        heres[0]->BSIM3v32cbdb = Cbd[0];
+        heres[1]->BSIM3v32cbdb = Cbd[1];
+        heres[2]->BSIM3v32cbdb = Cbd[2];
+        heres[3]->BSIM3v32cbdb = Cbd[3];
+      }
+      {
+        Vec4d val = -qinoi;
+        heres[0]->BSIM3v32qinv = val[0];
+        heres[1]->BSIM3v32qinv = val[1];
+        heres[2]->BSIM3v32qinv = val[2];
+        heres[3]->BSIM3v32qinv = val[3];
+      }
+    }
+
+
+
+  }
+
+
+  finished:
+  {
+    heres[0]->BSIM3v32qgate = qgate[0];
+    heres[1]->BSIM3v32qgate = qgate[1];
+    heres[2]->BSIM3v32qgate = qgate[2];
+    heres[3]->BSIM3v32qgate = qgate[3];
+  }
+
+  {
+    heres[0]->BSIM3v32qbulk = qbulk[0];
+    heres[1]->BSIM3v32qbulk = qbulk[1];
+    heres[2]->BSIM3v32qbulk = qbulk[2];
+    heres[3]->BSIM3v32qbulk = qbulk[3];
+  }
+  {
+    heres[0]->BSIM3v32qdrn = qdrn[0];
+    heres[1]->BSIM3v32qdrn = qdrn[1];
+    heres[2]->BSIM3v32qdrn = qdrn[2];
+    heres[3]->BSIM3v32qdrn = qdrn[3];
+  }
+  {
+    heres[0]->BSIM3v32cd = cdrain[0];
+    heres[1]->BSIM3v32cd = cdrain[1];
+    heres[2]->BSIM3v32cd = cdrain[2];
+    heres[3]->BSIM3v32cd = cdrain[3];
+  }
+  if (ChargeComputationNeeded)
+  {
+    Vec4d nstate_qbs = vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qbs, heres[1]->BSIM3v32qbs, heres[2]->BSIM3v32qbs, heres[3]->BSIM3v32qbs});
+    Vec4d nstate_qbd = vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qbd, heres[1]->BSIM3v32qbd, heres[2]->BSIM3v32qbd, heres[3]->BSIM3v32qbd});
+    if (model->BSIM3v32acmMod == 0)
+    {
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+          czbd = model->BSIM3v32unitAreaTempJctCap * ((Vec4d ){heres[0]->BSIM3v32drainArea, heres[1]->BSIM3v32drainArea, heres[2]->BSIM3v32drainArea, heres[3]->BSIM3v32drainArea});
+          czbs = model->BSIM3v32unitAreaTempJctCap * ((Vec4d ){heres[0]->BSIM3v32sourceArea, heres[1]->BSIM3v32sourceArea, heres[2]->BSIM3v32sourceArea, heres[3]->BSIM3v32sourceArea});
+          break;
+
+        case BSIM3v32V322:
+
+        case BSIM3v32V32:
+
+        default:
+          czbd = model->BSIM3v32unitAreaJctCap * ((Vec4d ){heres[0]->BSIM3v32drainArea, heres[1]->BSIM3v32drainArea, heres[2]->BSIM3v32drainArea, heres[3]->BSIM3v32drainArea});
+          czbs = model->BSIM3v32unitAreaJctCap * ((Vec4d ){heres[0]->BSIM3v32sourceArea, heres[1]->BSIM3v32sourceArea, heres[2]->BSIM3v32sourceArea, heres[3]->BSIM3v32sourceArea});
+
+      }
+
+      if (1)
+      {
+        Vec4m condmask0 = ((Vec4d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter}) < pParam->BSIM3v32weff;
+        Vec4m condmask_true0 = condmask0;
+        Vec4m condmask_false0 = ~condmask0;
+        {
+          czbdsw = vec4_blend(czbdsw, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+              czbdswg = vec4_blend(czbdswg, model->BSIM3v32unitLengthGateSidewallTempJctCap * ((Vec4d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter}), condmask_true0);
+              break;
+
+            case BSIM3v32V322:
+
+            case BSIM3v32V32:
+
+            default:
+              czbdswg = vec4_blend(czbdswg, model->BSIM3v32unitLengthGateSidewallJctCap * ((Vec4d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter}), condmask_true0);
+
+          }
+
+        }
+        {
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+              czbdsw = vec4_blend(czbdsw, model->BSIM3v32unitLengthSidewallTempJctCap * (((Vec4d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter}) - pParam->BSIM3v32weff), condmask_false0);
+              czbdswg = vec4_blend(czbdswg, vec4_SIMDTOVECTOR(model->BSIM3v32unitLengthGateSidewallTempJctCap * pParam->BSIM3v32weff), condmask_false0);
+              break;
+
+            case BSIM3v32V322:
+
+            case BSIM3v32V32:
+
+            default:
+              czbdsw = vec4_blend(czbdsw, model->BSIM3v32unitLengthSidewallJctCap * (((Vec4d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter}) - pParam->BSIM3v32weff), condmask_false0);
+              czbdswg = vec4_blend(czbdswg, vec4_SIMDTOVECTOR(model->BSIM3v32unitLengthGateSidewallJctCap * pParam->BSIM3v32weff), condmask_false0);
+
+          }
+
+        }
+      }
+
+      if (1)
+      {
+        Vec4m condmask0 = ((Vec4d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter}) < pParam->BSIM3v32weff;
+        Vec4m condmask_true0 = condmask0;
+        Vec4m condmask_false0 = ~condmask0;
+        {
+          czbssw = vec4_blend(czbssw, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+              czbsswg = vec4_blend(czbsswg, model->BSIM3v32unitLengthGateSidewallTempJctCap * ((Vec4d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter}), condmask_true0);
+              break;
+
+            case BSIM3v32V322:
+
+            case BSIM3v32V32:
+
+            default:
+              czbsswg = vec4_blend(czbsswg, model->BSIM3v32unitLengthGateSidewallJctCap * ((Vec4d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter}), condmask_true0);
+
+          }
+
+        }
+        {
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+              czbssw = vec4_blend(czbssw, model->BSIM3v32unitLengthSidewallTempJctCap * (((Vec4d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter}) - pParam->BSIM3v32weff), condmask_false0);
+              czbsswg = vec4_blend(czbsswg, vec4_SIMDTOVECTOR(model->BSIM3v32unitLengthGateSidewallTempJctCap * pParam->BSIM3v32weff), condmask_false0);
+              break;
+
+            case BSIM3v32V322:
+
+            case BSIM3v32V32:
+
+            default:
+              czbssw = vec4_blend(czbssw, model->BSIM3v32unitLengthSidewallJctCap * (((Vec4d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter}) - pParam->BSIM3v32weff), condmask_false0);
+              czbsswg = vec4_blend(czbsswg, vec4_SIMDTOVECTOR(model->BSIM3v32unitLengthGateSidewallJctCap * pParam->BSIM3v32weff), condmask_false0);
+
+          }
+
+        }
+      }
+
+    }
+    else
+    {
+      error = vec4_BSIM3v32_ACM_junctionCapacitances(model, heres, &czbd, &czbdsw, &czbdswg, &czbs, &czbssw, &czbsswg);
+      if (SIMDANY(error))
+        return error;
+
+    }
+
+    MJ = model->BSIM3v32bulkJctBotGradingCoeff;
+    MJSW = model->BSIM3v32bulkJctSideGradingCoeff;
+    MJSWG = model->BSIM3v32bulkJctGateSideGradingCoeff;
+    if (1)
+    {
+      Vec4m condmask0 = vbs == 0.0;
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        nstate_qbs = vec4_blend(nstate_qbs, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+        {
+          Vec4d val = (czbs + czbssw) + czbsswg;
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32capbs = val[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32capbs = val[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32capbs = val[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32capbs = val[3];
+
+        }
+      }
+      if (1)
+      {
+        Vec4m condmask1 = vbs < 0.0;
+        Vec4m condmask_true1 = condmask_false0 & condmask1;
+        Vec4m condmask_false1 = condmask_false0 & (~condmask1);
+        {
+          if (1)
+          {
+            Vec4m condmask2 = czbs > 0.0;
+            Vec4m condmask_true2 = condmask_true1 & condmask2;
+            Vec4m condmask_false2 = condmask_true1 & (~condmask2);
+            {
+              arg = vec4_blend(arg, 1.0 - (vbs / model->BSIM3v32PhiB), condmask_true2);
+              if (MJ == 0.5)
+                sarg = vec4_blend(sarg, 1.0 / vec4_sqrt(arg), condmask_true2);
+              else
+                sarg = vec4_blend(sarg, vec4_powMJ(arg, -MJ), condmask_true2);
+
+              nstate_qbs = vec4_blend(nstate_qbs, ((model->BSIM3v32PhiB * czbs) * (1.0 - (arg * sarg))) / (1.0 - MJ), condmask_true2);
+              {
+                Vec4d val = czbs * sarg;
+                if (condmask_true2[0])
+                  heres[0]->BSIM3v32capbs = val[0];
+
+                if (condmask_true2[1])
+                  heres[1]->BSIM3v32capbs = val[1];
+
+                if (condmask_true2[2])
+                  heres[2]->BSIM3v32capbs = val[2];
+
+                if (condmask_true2[3])
+                  heres[3]->BSIM3v32capbs = val[3];
+
+              }
+            }
+            {
+              nstate_qbs = vec4_blend(nstate_qbs, vec4_SIMDTOVECTOR(0.0), condmask_false2);
+              {
+                if (condmask_false2[0])
+                  heres[0]->BSIM3v32capbs = 0.0;
+
+                if (condmask_false2[1])
+                  heres[1]->BSIM3v32capbs = 0.0;
+
+                if (condmask_false2[2])
+                  heres[2]->BSIM3v32capbs = 0.0;
+
+                if (condmask_false2[3])
+                  heres[3]->BSIM3v32capbs = 0.0;
+
+              }
+            }
+          }
+
+          if (1)
+          {
+            Vec4m condmask2 = czbssw > 0.0;
+            Vec4m condmask_true2 = condmask_true1 & condmask2;
+            {
+              arg = vec4_blend(arg, 1.0 - (vbs / model->BSIM3v32PhiBSW), condmask_true2);
+              if (MJSW == 0.5)
+                sarg = vec4_blend(sarg, 1.0 / vec4_sqrt(arg), condmask_true2);
+              else
+                sarg = vec4_blend(sarg, vec4_powMJSW(arg, -MJSW), condmask_true2);
+
+              nstate_qbs = vec4_blend(nstate_qbs, nstate_qbs + (((model->BSIM3v32PhiBSW * czbssw) * (1.0 - (arg * sarg))) / (1.0 - MJSW)), condmask_true2);
+              {
+                Vec4d val = czbssw * sarg;
+                if (condmask_true2[0])
+                  heres[0]->BSIM3v32capbs += val[0];
+
+                if (condmask_true2[1])
+                  heres[1]->BSIM3v32capbs += val[1];
+
+                if (condmask_true2[2])
+                  heres[2]->BSIM3v32capbs += val[2];
+
+                if (condmask_true2[3])
+                  heres[3]->BSIM3v32capbs += val[3];
+
+              }
+            }
+          }
+
+          if (1)
+          {
+            Vec4m condmask2 = czbsswg > 0.0;
+            Vec4m condmask_true2 = condmask_true1 & condmask2;
+            {
+              arg = vec4_blend(arg, 1.0 - (vbs / model->BSIM3v32PhiBSWG), condmask_true2);
+              if (MJSWG == 0.5)
+                sarg = vec4_blend(sarg, 1.0 / vec4_sqrt(arg), condmask_true2);
+              else
+                sarg = vec4_blend(sarg, vec4_powMJSWG(arg, -MJSWG), condmask_true2);
+
+              nstate_qbs = vec4_blend(nstate_qbs, nstate_qbs + (((model->BSIM3v32PhiBSWG * czbsswg) * (1.0 - (arg * sarg))) / (1.0 - MJSWG)), condmask_true2);
+              {
+                Vec4d val = czbsswg * sarg;
+                if (condmask_true2[0])
+                  heres[0]->BSIM3v32capbs += val[0];
+
+                if (condmask_true2[1])
+                  heres[1]->BSIM3v32capbs += val[1];
+
+                if (condmask_true2[2])
+                  heres[2]->BSIM3v32capbs += val[2];
+
+                if (condmask_true2[3])
+                  heres[3]->BSIM3v32capbs += val[3];
+
+              }
+            }
+          }
+
+        }
+        {
+          T0 = vec4_blend(T0, (czbs + czbssw) + czbsswg, condmask_false1);
+          T1 = vec4_blend(T1, vbs * ((((czbs * MJ) / model->BSIM3v32PhiB) + ((czbssw * MJSW) / model->BSIM3v32PhiBSW)) + ((czbsswg * MJSWG) / model->BSIM3v32PhiBSWG)), condmask_false1);
+          nstate_qbs = vec4_blend(nstate_qbs, vbs * (T0 + (0.5 * T1)), condmask_false1);
+          {
+            Vec4d val = T0 + T1;
+            if (condmask_false1[0])
+              heres[0]->BSIM3v32capbs = val[0];
+
+            if (condmask_false1[1])
+              heres[1]->BSIM3v32capbs = val[1];
+
+            if (condmask_false1[2])
+              heres[2]->BSIM3v32capbs = val[2];
+
+            if (condmask_false1[3])
+              heres[3]->BSIM3v32capbs = val[3];
+
+          }
+        }
+      }
+
+    }
+
+    vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qbs, heres[1]->BSIM3v32qbs, heres[2]->BSIM3v32qbs, heres[3]->BSIM3v32qbs}, nstate_qbs);
+    if (1)
+    {
+      Vec4m condmask0 = vbd == 0.0;
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        nstate_qbd = vec4_blend(nstate_qbd, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+        {
+          Vec4d val = (czbd + czbdsw) + czbdswg;
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32capbd = val[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32capbd = val[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32capbd = val[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32capbd = val[3];
+
+        }
+      }
+      if (1)
+      {
+        Vec4m condmask1 = vbd < 0.0;
+        Vec4m condmask_true1 = condmask_false0 & condmask1;
+        Vec4m condmask_false1 = condmask_false0 & (~condmask1);
+        {
+          if (1)
+          {
+            Vec4m condmask2 = czbd > 0.0;
+            Vec4m condmask_true2 = condmask_true1 & condmask2;
+            Vec4m condmask_false2 = condmask_true1 & (~condmask2);
+            {
+              arg = vec4_blend(arg, 1.0 - (vbd / model->BSIM3v32PhiB), condmask_true2);
+              if (MJ == 0.5)
+                sarg = vec4_blend(sarg, 1.0 / vec4_sqrt(arg), condmask_true2);
+              else
+                sarg = vec4_blend(sarg, vec4_powMJ(arg, -MJ), condmask_true2);
+
+              nstate_qbd = vec4_blend(nstate_qbd, ((model->BSIM3v32PhiB * czbd) * (1.0 - (arg * sarg))) / (1.0 - MJ), condmask_true2);
+              {
+                Vec4d val = czbd * sarg;
+                if (condmask_true2[0])
+                  heres[0]->BSIM3v32capbd = val[0];
+
+                if (condmask_true2[1])
+                  heres[1]->BSIM3v32capbd = val[1];
+
+                if (condmask_true2[2])
+                  heres[2]->BSIM3v32capbd = val[2];
+
+                if (condmask_true2[3])
+                  heres[3]->BSIM3v32capbd = val[3];
+
+              }
+            }
+            {
+              nstate_qbd = vec4_blend(nstate_qbd, vec4_SIMDTOVECTOR(0.0), condmask_false2);
+              {
+                if (condmask_false2[0])
+                  heres[0]->BSIM3v32capbd = 0.0;
+
+                if (condmask_false2[1])
+                  heres[1]->BSIM3v32capbd = 0.0;
+
+                if (condmask_false2[2])
+                  heres[2]->BSIM3v32capbd = 0.0;
+
+                if (condmask_false2[3])
+                  heres[3]->BSIM3v32capbd = 0.0;
+
+              }
+            }
+          }
+
+          if (1)
+          {
+            Vec4m condmask2 = czbdsw > 0.0;
+            Vec4m condmask_true2 = condmask_true1 & condmask2;
+            {
+              arg = vec4_blend(arg, 1.0 - (vbd / model->BSIM3v32PhiBSW), condmask_true2);
+              if (MJSW == 0.5)
+                sarg = vec4_blend(sarg, 1.0 / vec4_sqrt(arg), condmask_true2);
+              else
+                sarg = vec4_blend(sarg, vec4_powMJSW(arg, -MJSW), condmask_true2);
+
+              nstate_qbd = vec4_blend(nstate_qbd, nstate_qbd + (((model->BSIM3v32PhiBSW * czbdsw) * (1.0 - (arg * sarg))) / (1.0 - MJSW)), condmask_true2);
+              {
+                Vec4d val = czbdsw * sarg;
+                if (condmask_true2[0])
+                  heres[0]->BSIM3v32capbd += val[0];
+
+                if (condmask_true2[1])
+                  heres[1]->BSIM3v32capbd += val[1];
+
+                if (condmask_true2[2])
+                  heres[2]->BSIM3v32capbd += val[2];
+
+                if (condmask_true2[3])
+                  heres[3]->BSIM3v32capbd += val[3];
+
+              }
+            }
+          }
+
+          if (1)
+          {
+            Vec4m condmask2 = czbdswg > 0.0;
+            Vec4m condmask_true2 = condmask_true1 & condmask2;
+            {
+              arg = vec4_blend(arg, 1.0 - (vbd / model->BSIM3v32PhiBSWG), condmask_true2);
+              if (MJSWG == 0.5)
+                sarg = vec4_blend(sarg, 1.0 / vec4_sqrt(arg), condmask_true2);
+              else
+                sarg = vec4_blend(sarg, vec4_powMJSWG(arg, -MJSWG), condmask_true2);
+
+              nstate_qbd = vec4_blend(nstate_qbd, nstate_qbd + (((model->BSIM3v32PhiBSWG * czbdswg) * (1.0 - (arg * sarg))) / (1.0 - MJSWG)), condmask_true2);
+              {
+                Vec4d val = czbdswg * sarg;
+                if (condmask_true2[0])
+                  heres[0]->BSIM3v32capbd += val[0];
+
+                if (condmask_true2[1])
+                  heres[1]->BSIM3v32capbd += val[1];
+
+                if (condmask_true2[2])
+                  heres[2]->BSIM3v32capbd += val[2];
+
+                if (condmask_true2[3])
+                  heres[3]->BSIM3v32capbd += val[3];
+
+              }
+            }
+          }
+
+        }
+        {
+          T0 = vec4_blend(T0, (czbd + czbdsw) + czbdswg, condmask_false1);
+          T1 = vec4_blend(T1, vbd * ((((czbd * MJ) / model->BSIM3v32PhiB) + ((czbdsw * MJSW) / model->BSIM3v32PhiBSW)) + ((czbdswg * MJSWG) / model->BSIM3v32PhiBSWG)), condmask_false1);
+          nstate_qbd = vec4_blend(nstate_qbd, vbd * (T0 + (0.5 * T1)), condmask_false1);
+          {
+            Vec4d val = T0 + T1;
+            if (condmask_false1[0])
+              heres[0]->BSIM3v32capbd = val[0];
+
+            if (condmask_false1[1])
+              heres[1]->BSIM3v32capbd = val[1];
+
+            if (condmask_false1[2])
+              heres[2]->BSIM3v32capbd = val[2];
+
+            if (condmask_false1[3])
+              heres[3]->BSIM3v32capbd = val[3];
+
+          }
+        }
+      }
+
+    }
+
+    vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qbd, heres[1]->BSIM3v32qbd, heres[2]->BSIM3v32qbd, heres[3]->BSIM3v32qbd}, nstate_qbd);
+  }
+
+  if ((heres[0]->BSIM3v32off == 0) || (!(ckt->CKTmode & MODEINITFIX)))
+  {
+    Vec4m nonconcount;
+    nonconcount = Check;
+    ckt->CKTnoncon += vec4_SIMDCOUNT(nonconcount);
+  }
+
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32vbs, heres[1]->BSIM3v32vbs, heres[2]->BSIM3v32vbs, heres[3]->BSIM3v32vbs}, vbs);
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32vbd, heres[1]->BSIM3v32vbd, heres[2]->BSIM3v32vbd, heres[3]->BSIM3v32vbd}, vbd);
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32vgs, heres[1]->BSIM3v32vgs, heres[2]->BSIM3v32vgs, heres[3]->BSIM3v32vgs}, vgs);
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32vds, heres[1]->BSIM3v32vds, heres[2]->BSIM3v32vds, heres[3]->BSIM3v32vds}, vds);
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qdef, heres[1]->BSIM3v32qdef, heres[2]->BSIM3v32qdef, heres[3]->BSIM3v32qdef}, qdef);
+  if (!ChargeComputationNeeded)
+    goto line850;
+
+  line755:
+  if (heres[0]->BSIM3v32nqsMod)
+  {
+    qcheq = -(qbulk + qgate);
+    {
+      Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + ((Vec4d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb}));
+      heres[0]->BSIM3v32cqgb = val[0];
+      heres[1]->BSIM3v32cqgb = val[1];
+      heres[2]->BSIM3v32cqgb = val[2];
+      heres[3]->BSIM3v32cqgb = val[3];
+    }
+    {
+      Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}) + ((Vec4d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb}));
+      heres[0]->BSIM3v32cqdb = val[0];
+      heres[1]->BSIM3v32cqdb = val[1];
+      heres[2]->BSIM3v32cqdb = val[2];
+      heres[3]->BSIM3v32cqdb = val[3];
+    }
+    {
+      Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb}) + ((Vec4d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb}));
+      heres[0]->BSIM3v32cqsb = val[0];
+      heres[1]->BSIM3v32cqsb = val[1];
+      heres[2]->BSIM3v32cqsb = val[2];
+      heres[3]->BSIM3v32cqsb = val[3];
+    }
+    {
+      Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cqgb, heres[1]->BSIM3v32cqgb, heres[2]->BSIM3v32cqgb, heres[3]->BSIM3v32cqgb}) + ((Vec4d ){heres[0]->BSIM3v32cqdb, heres[1]->BSIM3v32cqdb, heres[2]->BSIM3v32cqdb, heres[3]->BSIM3v32cqdb})) + ((Vec4d ){heres[0]->BSIM3v32cqsb, heres[1]->BSIM3v32cqsb, heres[2]->BSIM3v32cqsb, heres[3]->BSIM3v32cqsb}));
+      heres[0]->BSIM3v32cqbb = val[0];
+      heres[1]->BSIM3v32cqbb = val[1];
+      heres[2]->BSIM3v32cqbb = val[2];
+      heres[3]->BSIM3v32cqbb = val[3];
+    }
+    gtau_drift = vec4_fabs(((Vec4d ){heres[0]->BSIM3v32tconst, heres[1]->BSIM3v32tconst, heres[2]->BSIM3v32tconst, heres[3]->BSIM3v32tconst}) * qcheq) * ScalingFactor;
+    T0 = vec4_SIMDTOVECTOR(pParam->BSIM3v32leffCV * pParam->BSIM3v32leffCV);
+    gtau_diff = (((16.0 * ((Vec4d ){heres[0]->BSIM3v32u0temp, heres[1]->BSIM3v32u0temp, heres[2]->BSIM3v32u0temp, heres[3]->BSIM3v32u0temp})) * model->BSIM3v32vtm) / T0) * ScalingFactor;
+    {
+      Vec4d val = gtau_drift + gtau_diff;
+      heres[0]->BSIM3v32gtau = val[0];
+      heres[1]->BSIM3v32gtau = val[1];
+      heres[2]->BSIM3v32gtau = val[2];
+      heres[3]->BSIM3v32gtau = val[3];
+    }
+  }
+
+
+  if (model->BSIM3v32capMod == 0)
+  {
+    cgdo = vec4_SIMDTOVECTOR(pParam->BSIM3v32cgdo);
+    qgdo = pParam->BSIM3v32cgdo * vgd;
+    cgso = vec4_SIMDTOVECTOR(pParam->BSIM3v32cgso);
+    qgso = pParam->BSIM3v32cgso * vgs;
+  }
+  else
+    if (model->BSIM3v32capMod == 1)
+  {
+    if (1)
+    {
+      Vec4m condmask0 = vgd < 0.0;
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        T1 = vec4_blend(T1, vec4_sqrt(1.0 - ((4.0 * vgd) / pParam->BSIM3v32ckappa)), condmask_true0);
+        cgdo = vec4_blend(cgdo, pParam->BSIM3v32cgdo + ((pParam->BSIM3v32weffCV * pParam->BSIM3v32cgdl) / T1), condmask_true0);
+        qgdo = vec4_blend(qgdo, (pParam->BSIM3v32cgdo * vgd) - ((((pParam->BSIM3v32weffCV * 0.5) * pParam->BSIM3v32cgdl) * pParam->BSIM3v32ckappa) * (T1 - 1.0)), condmask_true0);
+      }
+      {
+        cgdo = vec4_blend(cgdo, vec4_SIMDTOVECTOR(pParam->BSIM3v32cgdo + (pParam->BSIM3v32weffCV * pParam->BSIM3v32cgdl)), condmask_false0);
+        qgdo = vec4_blend(qgdo, ((pParam->BSIM3v32weffCV * pParam->BSIM3v32cgdl) + pParam->BSIM3v32cgdo) * vgd, condmask_false0);
+      }
+    }
+
+    if (1)
+    {
+      Vec4m condmask0 = vgs < 0.0;
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        T1 = vec4_blend(T1, vec4_sqrt(1.0 - ((4.0 * vgs) / pParam->BSIM3v32ckappa)), condmask_true0);
+        cgso = vec4_blend(cgso, pParam->BSIM3v32cgso + ((pParam->BSIM3v32weffCV * pParam->BSIM3v32cgsl) / T1), condmask_true0);
+        qgso = vec4_blend(qgso, (pParam->BSIM3v32cgso * vgs) - ((((pParam->BSIM3v32weffCV * 0.5) * pParam->BSIM3v32cgsl) * pParam->BSIM3v32ckappa) * (T1 - 1.0)), condmask_true0);
+      }
+      {
+        cgso = vec4_blend(cgso, vec4_SIMDTOVECTOR(pParam->BSIM3v32cgso + (pParam->BSIM3v32weffCV * pParam->BSIM3v32cgsl)), condmask_false0);
+        qgso = vec4_blend(qgso, ((pParam->BSIM3v32weffCV * pParam->BSIM3v32cgsl) + pParam->BSIM3v32cgso) * vgs, condmask_false0);
+      }
+    }
+
+  }
+  else
+  {
+    T0 = vgd + DELTA_1;
+    T1 = vec4_sqrt((T0 * T0) + (4.0 * DELTA_1));
+    T2 = 0.5 * (T0 - T1);
+    T3 = vec4_SIMDTOVECTOR(pParam->BSIM3v32weffCV * pParam->BSIM3v32cgdl);
+    T4 = vec4_sqrt(1.0 - ((4.0 * T2) / pParam->BSIM3v32ckappa));
+    cgdo = (pParam->BSIM3v32cgdo + T3) - ((T3 * (1.0 - (1.0 / T4))) * (0.5 - ((0.5 * T0) / T1)));
+    qgdo = ((pParam->BSIM3v32cgdo + T3) * vgd) - (T3 * (T2 + ((0.5 * pParam->BSIM3v32ckappa) * (T4 - 1.0))));
+    T0 = vgs + DELTA_1;
+    T1 = vec4_sqrt((T0 * T0) + (4.0 * DELTA_1));
+    T2 = 0.5 * (T0 - T1);
+    T3 = vec4_SIMDTOVECTOR(pParam->BSIM3v32weffCV * pParam->BSIM3v32cgsl);
+    T4 = vec4_sqrt(1.0 - ((4.0 * T2) / pParam->BSIM3v32ckappa));
+    cgso = (pParam->BSIM3v32cgso + T3) - ((T3 * (1.0 - (1.0 / T4))) * (0.5 - ((0.5 * T0) / T1)));
+    qgso = ((pParam->BSIM3v32cgso + T3) * vgs) - (T3 * (T2 + ((0.5 * pParam->BSIM3v32ckappa) * (T4 - 1.0))));
+  }
+
+
+  {
+    heres[0]->BSIM3v32cgdo = cgdo[0];
+    heres[1]->BSIM3v32cgdo = cgdo[1];
+    heres[2]->BSIM3v32cgdo = cgdo[2];
+    heres[3]->BSIM3v32cgdo = cgdo[3];
+  }
+  {
+    heres[0]->BSIM3v32cgso = cgso[0];
+    heres[1]->BSIM3v32cgso = cgso[1];
+    heres[2]->BSIM3v32cgso = cgso[2];
+    heres[3]->BSIM3v32cgso = cgso[3];
+  }
+  ag0 = ckt->CKTag[0];
+  ddxpart_dVd = (ddxpart_dVg = (ddxpart_dVb = (ddxpart_dVs = vec4_SIMDTOVECTOR(0.0))));
+  dsxpart_dVd = (dsxpart_dVg = (dsxpart_dVb = (dsxpart_dVs = vec4_SIMDTOVECTOR(0.0))));
+  ggtg = (ggtd = (ggtb = (ggts = vec4_SIMDTOVECTOR(0.0))));
+  if (1)
+  {
+    Vec4m condmask0 = BSIM3v32mode;
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      if (heres[0]->BSIM3v32nqsMod == 0)
+      {
+        gcggb = vec4_blend(gcggb, (((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + cgdo) + cgso) + pParam->BSIM3v32cgbo) * ag0, condmask_true0);
+        gcgdb = vec4_blend(gcgdb, (((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}) - cgdo) * ag0, condmask_true0);
+        gcgsb = vec4_blend(gcgsb, (((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb}) - cgso) * ag0, condmask_true0);
+        gcdgb = vec4_blend(gcdgb, (((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb}) - cgdo) * ag0, condmask_true0);
+        gcddb = vec4_blend(gcddb, ((((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb}) + ((Vec4d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd})) + cgdo) * ag0, condmask_true0);
+        gcdsb = vec4_blend(gcdsb, ((Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb}) * ag0, condmask_true0);
+        gcsgb = vec4_blend(gcsgb, (-(((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + ((Vec4d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb})) + ((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb})) + cgso)) * ag0, condmask_true0);
+        gcsdb = vec4_blend(gcsdb, (-((((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}) + ((Vec4d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb})) + ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb}))) * ag0, condmask_true0);
+        gcssb = vec4_blend(gcssb, ((((Vec4d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs}) + cgso) - ((((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb}) + ((Vec4d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb})) + ((Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb}))) * ag0, condmask_true0);
+        gcbgb = vec4_blend(gcbgb, (((Vec4d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb}) - pParam->BSIM3v32cgbo) * ag0, condmask_true0);
+        gcbdb = vec4_blend(gcbdb, (((Vec4d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb}) - ((Vec4d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd})) * ag0, condmask_true0);
+        gcbsb = vec4_blend(gcbsb, (((Vec4d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb}) - ((Vec4d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs})) * ag0, condmask_true0);
+        qgd = vec4_blend(qgd, qgdo, condmask_true0);
+        qgs = vec4_blend(qgs, qgso, condmask_true0);
+        qgb = vec4_blend(qgb, pParam->BSIM3v32cgbo * vgb, condmask_true0);
+        qgate = vec4_blend(qgate, qgate + ((qgd + qgs) + qgb), condmask_true0);
+        qbulk = vec4_blend(qbulk, qbulk - qgb, condmask_true0);
+        qdrn = vec4_blend(qdrn, qdrn - qgd, condmask_true0);
+        qsrc = vec4_blend(qsrc, -((qgate + qbulk) + qdrn), condmask_true0);
+        sxpart = vec4_blend(sxpart, vec4_SIMDTOVECTOR(0.6), condmask_true0);
+        dxpart = vec4_blend(dxpart, vec4_SIMDTOVECTOR(0.4), condmask_true0);
+      }
+      else
+      {
+        if (1)
+        {
+          Vec4m condmask1 = qcheq > 0.0;
+          Vec4m condmask_true1 = condmask_true0 & condmask1;
+          Vec4m condmask_false1 = condmask_true0 & (~condmask1);
+          T0 = vec4_blend(T0, (((Vec4d ){heres[0]->BSIM3v32tconst, heres[1]->BSIM3v32tconst, heres[2]->BSIM3v32tconst, heres[3]->BSIM3v32tconst}) * qdef) * ScalingFactor, condmask_true1);
+          T0 = vec4_blend(T0, ((-((Vec4d ){heres[0]->BSIM3v32tconst, heres[1]->BSIM3v32tconst, heres[2]->BSIM3v32tconst, heres[3]->BSIM3v32tconst})) * qdef) * ScalingFactor, condmask_false1);
+        }
+
+        ggtg = vec4_blend(ggtg, T0 * ((Vec4d ){heres[0]->BSIM3v32cqgb, heres[1]->BSIM3v32cqgb, heres[2]->BSIM3v32cqgb, heres[3]->BSIM3v32cqgb}), condmask_true0);
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32gtg = ggtg[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32gtg = ggtg[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32gtg = ggtg[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32gtg = ggtg[3];
+
+        }
+        ggtd = vec4_blend(ggtd, T0 * ((Vec4d ){heres[0]->BSIM3v32cqdb, heres[1]->BSIM3v32cqdb, heres[2]->BSIM3v32cqdb, heres[3]->BSIM3v32cqdb}), condmask_true0);
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32gtd = ggtd[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32gtd = ggtd[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32gtd = ggtd[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32gtd = ggtd[3];
+
+        }
+        ggts = vec4_blend(ggts, T0 * ((Vec4d ){heres[0]->BSIM3v32cqsb, heres[1]->BSIM3v32cqsb, heres[2]->BSIM3v32cqsb, heres[3]->BSIM3v32cqsb}), condmask_true0);
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32gts = ggts[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32gts = ggts[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32gts = ggts[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32gts = ggts[3];
+
+        }
+        ggtb = vec4_blend(ggtb, T0 * ((Vec4d ){heres[0]->BSIM3v32cqbb, heres[1]->BSIM3v32cqbb, heres[2]->BSIM3v32cqbb, heres[3]->BSIM3v32cqbb}), condmask_true0);
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32gtb = ggtb[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32gtb = ggtb[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32gtb = ggtb[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32gtb = ggtb[3];
+
+        }
+        gqdef = vec4_blend(gqdef, vec4_SIMDTOVECTOR(ScalingFactor * ag0), condmask_true0);
+        gcqgb = vec4_blend(gcqgb, ((Vec4d ){heres[0]->BSIM3v32cqgb, heres[1]->BSIM3v32cqgb, heres[2]->BSIM3v32cqgb, heres[3]->BSIM3v32cqgb}) * ag0, condmask_true0);
+        gcqdb = vec4_blend(gcqdb, ((Vec4d ){heres[0]->BSIM3v32cqdb, heres[1]->BSIM3v32cqdb, heres[2]->BSIM3v32cqdb, heres[3]->BSIM3v32cqdb}) * ag0, condmask_true0);
+        gcqsb = vec4_blend(gcqsb, ((Vec4d ){heres[0]->BSIM3v32cqsb, heres[1]->BSIM3v32cqsb, heres[2]->BSIM3v32cqsb, heres[3]->BSIM3v32cqsb}) * ag0, condmask_true0);
+        gcqbb = vec4_blend(gcqbb, ((Vec4d ){heres[0]->BSIM3v32cqbb, heres[1]->BSIM3v32cqbb, heres[2]->BSIM3v32cqbb, heres[3]->BSIM3v32cqbb}) * ag0, condmask_true0);
+        gcggb = vec4_blend(gcggb, ((cgdo + cgso) + pParam->BSIM3v32cgbo) * ag0, condmask_true0);
+        gcgdb = vec4_blend(gcgdb, (-cgdo) * ag0, condmask_true0);
+        gcgsb = vec4_blend(gcgsb, (-cgso) * ag0, condmask_true0);
+        gcdgb = vec4_blend(gcdgb, (-cgdo) * ag0, condmask_true0);
+        gcddb = vec4_blend(gcddb, (((Vec4d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd}) + cgdo) * ag0, condmask_true0);
+        gcdsb = vec4_blend(gcdsb, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+        gcsgb = vec4_blend(gcsgb, (-cgso) * ag0, condmask_true0);
+        gcsdb = vec4_blend(gcsdb, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+        gcssb = vec4_blend(gcssb, (((Vec4d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs}) + cgso) * ag0, condmask_true0);
+        gcbgb = vec4_blend(gcbgb, vec4_SIMDTOVECTOR((-pParam->BSIM3v32cgbo) * ag0), condmask_true0);
+        gcbdb = vec4_blend(gcbdb, (-((Vec4d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd})) * ag0, condmask_true0);
+        gcbsb = vec4_blend(gcbsb, (-((Vec4d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs})) * ag0, condmask_true0);
+        if (1)
+        {
+          Vec4m condmask1 = vec4_fabs(qcheq) <= (1.0e-5 * CoxWL);
+          Vec4m condmask_true1 = condmask_true0 & condmask1;
+          Vec4m condmask_false1 = condmask_true0 & (~condmask1);
+          {
+            if (model->BSIM3v32xpart < 0.5)
+            {
+              dxpart = vec4_blend(dxpart, vec4_SIMDTOVECTOR(0.4), condmask_true1);
+            }
+            else
+              if (model->BSIM3v32xpart > 0.5)
+            {
+              dxpart = vec4_blend(dxpart, vec4_SIMDTOVECTOR(0.0), condmask_true1);
+            }
+            else
+            {
+              dxpart = vec4_blend(dxpart, vec4_SIMDTOVECTOR(0.5), condmask_true1);
+            }
+
+
+          }
+          {
+            dxpart = vec4_blend(dxpart, qdrn / qcheq, condmask_false1);
+            Cdd = vec4_blend(Cdd, (Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb}, condmask_false1);
+            Csd = vec4_blend(Csd, -((((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}) + ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb})) + ((Vec4d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb})), condmask_false1);
+            ddxpart_dVd = vec4_blend(ddxpart_dVd, (Cdd - (dxpart * (Cdd + Csd))) / qcheq, condmask_false1);
+            Cdg = vec4_blend(Cdg, (Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb}, condmask_false1);
+            Csg = vec4_blend(Csg, -((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + ((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb})) + ((Vec4d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb})), condmask_false1);
+            ddxpart_dVg = vec4_blend(ddxpart_dVg, (Cdg - (dxpart * (Cdg + Csg))) / qcheq, condmask_false1);
+            Cds = vec4_blend(Cds, (Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb}, condmask_false1);
+            Css = vec4_blend(Css, -((((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb}) + ((Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb})) + ((Vec4d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb})), condmask_false1);
+            ddxpart_dVs = vec4_blend(ddxpart_dVs, (Cds - (dxpart * (Cds + Css))) / qcheq, condmask_false1);
+            ddxpart_dVb = vec4_blend(ddxpart_dVb, -((ddxpart_dVd + ddxpart_dVg) + ddxpart_dVs), condmask_false1);
+          }
+        }
+
+        sxpart = vec4_blend(sxpart, 1.0 - dxpart, condmask_true0);
+        dsxpart_dVd = vec4_blend(dsxpart_dVd, -ddxpart_dVd, condmask_true0);
+        dsxpart_dVg = vec4_blend(dsxpart_dVg, -ddxpart_dVg, condmask_true0);
+        dsxpart_dVs = vec4_blend(dsxpart_dVs, -ddxpart_dVs, condmask_true0);
+        dsxpart_dVb = vec4_blend(dsxpart_dVb, -((dsxpart_dVd + dsxpart_dVg) + dsxpart_dVs), condmask_true0);
+        qgd = vec4_blend(qgd, qgdo, condmask_true0);
+        qgs = vec4_blend(qgs, qgso, condmask_true0);
+        qgb = vec4_blend(qgb, pParam->BSIM3v32cgbo * vgb, condmask_true0);
+        qgate = vec4_blend(qgate, (qgd + qgs) + qgb, condmask_true0);
+        qbulk = vec4_blend(qbulk, -qgb, condmask_true0);
+        qdrn = vec4_blend(qdrn, -qgd, condmask_true0);
+        qsrc = vec4_blend(qsrc, -((qgate + qbulk) + qdrn), condmask_true0);
+      }
+
+    }
+    {
+      if (heres[0]->BSIM3v32nqsMod == 0)
+      {
+        gcggb = vec4_blend(gcggb, (((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + cgdo) + cgso) + pParam->BSIM3v32cgbo) * ag0, condmask_false0);
+        gcgdb = vec4_blend(gcgdb, (((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb}) - cgdo) * ag0, condmask_false0);
+        gcgsb = vec4_blend(gcgsb, (((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}) - cgso) * ag0, condmask_false0);
+        gcdgb = vec4_blend(gcdgb, (-(((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + ((Vec4d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb})) + ((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb})) + cgdo)) * ag0, condmask_false0);
+        gcddb = vec4_blend(gcddb, ((((Vec4d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd}) + cgdo) - ((((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb}) + ((Vec4d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb})) + ((Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb}))) * ag0, condmask_false0);
+        gcdsb = vec4_blend(gcdsb, (-((((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}) + ((Vec4d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb})) + ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb}))) * ag0, condmask_false0);
+        gcsgb = vec4_blend(gcsgb, (((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb}) - cgso) * ag0, condmask_false0);
+        gcsdb = vec4_blend(gcsdb, ((Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb}) * ag0, condmask_false0);
+        gcssb = vec4_blend(gcssb, ((((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb}) + ((Vec4d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs})) + cgso) * ag0, condmask_false0);
+        gcbgb = vec4_blend(gcbgb, (((Vec4d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb}) - pParam->BSIM3v32cgbo) * ag0, condmask_false0);
+        gcbdb = vec4_blend(gcbdb, (((Vec4d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb}) - ((Vec4d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd})) * ag0, condmask_false0);
+        gcbsb = vec4_blend(gcbsb, (((Vec4d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb}) - ((Vec4d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs})) * ag0, condmask_false0);
+        qgd = vec4_blend(qgd, qgdo, condmask_false0);
+        qgs = vec4_blend(qgs, qgso, condmask_false0);
+        qgb = vec4_blend(qgb, pParam->BSIM3v32cgbo * vgb, condmask_false0);
+        qgate = vec4_blend(qgate, qgate + ((qgd + qgs) + qgb), condmask_false0);
+        qbulk = vec4_blend(qbulk, qbulk - qgb, condmask_false0);
+        qsrc = vec4_blend(qsrc, qdrn - qgs, condmask_false0);
+        qdrn = vec4_blend(qdrn, -((qgate + qbulk) + qsrc), condmask_false0);
+        sxpart = vec4_blend(sxpart, vec4_SIMDTOVECTOR(0.4), condmask_false0);
+        dxpart = vec4_blend(dxpart, vec4_SIMDTOVECTOR(0.6), condmask_false0);
+      }
+      else
+      {
+        if (1)
+        {
+          Vec4m condmask1 = qcheq > 0.0;
+          Vec4m condmask_true1 = condmask_false0 & condmask1;
+          Vec4m condmask_false1 = condmask_false0 & (~condmask1);
+          T0 = vec4_blend(T0, (((Vec4d ){heres[0]->BSIM3v32tconst, heres[1]->BSIM3v32tconst, heres[2]->BSIM3v32tconst, heres[3]->BSIM3v32tconst}) * qdef) * ScalingFactor, condmask_true1);
+          T0 = vec4_blend(T0, ((-((Vec4d ){heres[0]->BSIM3v32tconst, heres[1]->BSIM3v32tconst, heres[2]->BSIM3v32tconst, heres[3]->BSIM3v32tconst})) * qdef) * ScalingFactor, condmask_false1);
+        }
+
+        ggtg = vec4_blend(ggtg, T0 * ((Vec4d ){heres[0]->BSIM3v32cqgb, heres[1]->BSIM3v32cqgb, heres[2]->BSIM3v32cqgb, heres[3]->BSIM3v32cqgb}), condmask_false0);
+        {
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32gtg = ggtg[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32gtg = ggtg[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32gtg = ggtg[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32gtg = ggtg[3];
+
+        }
+        ggts = vec4_blend(ggts, T0 * ((Vec4d ){heres[0]->BSIM3v32cqdb, heres[1]->BSIM3v32cqdb, heres[2]->BSIM3v32cqdb, heres[3]->BSIM3v32cqdb}), condmask_false0);
+        {
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32gtd = ggts[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32gtd = ggts[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32gtd = ggts[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32gtd = ggts[3];
+
+        }
+        ggtd = vec4_blend(ggtd, T0 * ((Vec4d ){heres[0]->BSIM3v32cqsb, heres[1]->BSIM3v32cqsb, heres[2]->BSIM3v32cqsb, heres[3]->BSIM3v32cqsb}), condmask_false0);
+        {
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32gts = ggtd[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32gts = ggtd[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32gts = ggtd[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32gts = ggtd[3];
+
+        }
+        ggtb = vec4_blend(ggtb, T0 * ((Vec4d ){heres[0]->BSIM3v32cqbb, heres[1]->BSIM3v32cqbb, heres[2]->BSIM3v32cqbb, heres[3]->BSIM3v32cqbb}), condmask_false0);
+        {
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32gtb = ggtb[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32gtb = ggtb[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32gtb = ggtb[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32gtb = ggtb[3];
+
+        }
+        gqdef = vec4_blend(gqdef, vec4_SIMDTOVECTOR(ScalingFactor * ag0), condmask_false0);
+        gcqgb = vec4_blend(gcqgb, ((Vec4d ){heres[0]->BSIM3v32cqgb, heres[1]->BSIM3v32cqgb, heres[2]->BSIM3v32cqgb, heres[3]->BSIM3v32cqgb}) * ag0, condmask_false0);
+        gcqdb = vec4_blend(gcqdb, ((Vec4d ){heres[0]->BSIM3v32cqsb, heres[1]->BSIM3v32cqsb, heres[2]->BSIM3v32cqsb, heres[3]->BSIM3v32cqsb}) * ag0, condmask_false0);
+        gcqsb = vec4_blend(gcqsb, ((Vec4d ){heres[0]->BSIM3v32cqdb, heres[1]->BSIM3v32cqdb, heres[2]->BSIM3v32cqdb, heres[3]->BSIM3v32cqdb}) * ag0, condmask_false0);
+        gcqbb = vec4_blend(gcqbb, ((Vec4d ){heres[0]->BSIM3v32cqbb, heres[1]->BSIM3v32cqbb, heres[2]->BSIM3v32cqbb, heres[3]->BSIM3v32cqbb}) * ag0, condmask_false0);
+        gcggb = vec4_blend(gcggb, ((cgdo + cgso) + pParam->BSIM3v32cgbo) * ag0, condmask_false0);
+        gcgdb = vec4_blend(gcgdb, (-cgdo) * ag0, condmask_false0);
+        gcgsb = vec4_blend(gcgsb, (-cgso) * ag0, condmask_false0);
+        gcdgb = vec4_blend(gcdgb, (-cgdo) * ag0, condmask_false0);
+        gcddb = vec4_blend(gcddb, (((Vec4d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd}) + cgdo) * ag0, condmask_false0);
+        gcdsb = vec4_blend(gcdsb, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+        gcsgb = vec4_blend(gcsgb, (-cgso) * ag0, condmask_false0);
+        gcsdb = vec4_blend(gcsdb, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+        gcssb = vec4_blend(gcssb, (((Vec4d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs}) + cgso) * ag0, condmask_false0);
+        gcbgb = vec4_blend(gcbgb, vec4_SIMDTOVECTOR((-pParam->BSIM3v32cgbo) * ag0), condmask_false0);
+        gcbdb = vec4_blend(gcbdb, (-((Vec4d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd})) * ag0, condmask_false0);
+        gcbsb = vec4_blend(gcbsb, (-((Vec4d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs})) * ag0, condmask_false0);
+        if (1)
+        {
+          Vec4m condmask1 = vec4_fabs(qcheq) <= (1.0e-5 * CoxWL);
+          Vec4m condmask_true1 = condmask_false0 & condmask1;
+          Vec4m condmask_false1 = condmask_false0 & (~condmask1);
+          {
+            if (model->BSIM3v32xpart < 0.5)
+            {
+              sxpart = vec4_blend(sxpart, vec4_SIMDTOVECTOR(0.4), condmask_true1);
+            }
+            else
+              if (model->BSIM3v32xpart > 0.5)
+            {
+              sxpart = vec4_blend(sxpart, vec4_SIMDTOVECTOR(0.0), condmask_true1);
+            }
+            else
+            {
+              sxpart = vec4_blend(sxpart, vec4_SIMDTOVECTOR(0.5), condmask_true1);
+            }
+
+
+          }
+          {
+            sxpart = vec4_blend(sxpart, qdrn / qcheq, condmask_false1);
+            Css = vec4_blend(Css, (Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb}, condmask_false1);
+            Cds = vec4_blend(Cds, -((((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}) + ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb})) + ((Vec4d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb})), condmask_false1);
+            dsxpart_dVs = vec4_blend(dsxpart_dVs, (Css - (sxpart * (Css + Cds))) / qcheq, condmask_false1);
+            Csg = vec4_blend(Csg, (Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb}, condmask_false1);
+            Cdg = vec4_blend(Cdg, -((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + ((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb})) + ((Vec4d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb})), condmask_false1);
+            dsxpart_dVg = vec4_blend(dsxpart_dVg, (Csg - (sxpart * (Csg + Cdg))) / qcheq, condmask_false1);
+            Csd = vec4_blend(Csd, (Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb}, condmask_false1);
+            Cdd = vec4_blend(Cdd, -((((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb}) + ((Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb})) + ((Vec4d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb})), condmask_false1);
+            dsxpart_dVd = vec4_blend(dsxpart_dVd, (Csd - (sxpart * (Csd + Cdd))) / qcheq, condmask_false1);
+            dsxpart_dVb = vec4_blend(dsxpart_dVb, -((dsxpart_dVd + dsxpart_dVg) + dsxpart_dVs), condmask_false1);
+          }
+        }
+
+        dxpart = vec4_blend(dxpart, 1.0 - sxpart, condmask_false0);
+        ddxpart_dVd = vec4_blend(ddxpart_dVd, -dsxpart_dVd, condmask_false0);
+        ddxpart_dVg = vec4_blend(ddxpart_dVg, -dsxpart_dVg, condmask_false0);
+        ddxpart_dVs = vec4_blend(ddxpart_dVs, -dsxpart_dVs, condmask_false0);
+        ddxpart_dVb = vec4_blend(ddxpart_dVb, -((ddxpart_dVd + ddxpart_dVg) + ddxpart_dVs), condmask_false0);
+        qgd = vec4_blend(qgd, qgdo, condmask_false0);
+        qgs = vec4_blend(qgs, qgso, condmask_false0);
+        qgb = vec4_blend(qgb, pParam->BSIM3v32cgbo * vgb, condmask_false0);
+        qgate = vec4_blend(qgate, (qgd + qgs) + qgb, condmask_false0);
+        qbulk = vec4_blend(qbulk, -qgb, condmask_false0);
+        qsrc = vec4_blend(qsrc, -qgs, condmask_false0);
+        qdrn = vec4_blend(qdrn, -((qgate + qbulk) + qsrc), condmask_false0);
+      }
+
+    }
+  }
+
+  cqdef = (cqcheq = vec4_SIMDTOVECTOR(0.0));
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qg, heres[1]->BSIM3v32qg, heres[2]->BSIM3v32qg, heres[3]->BSIM3v32qg}, qgate);
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qd, heres[1]->BSIM3v32qd, heres[2]->BSIM3v32qd, heres[3]->BSIM3v32qd}, qdrn - vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qbd, heres[1]->BSIM3v32qbd, heres[2]->BSIM3v32qbd, heres[3]->BSIM3v32qbd}));
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qb, heres[1]->BSIM3v32qb, heres[2]->BSIM3v32qb, heres[3]->BSIM3v32qb}, (qbulk + vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qbd, heres[1]->BSIM3v32qbd, heres[2]->BSIM3v32qbd, heres[3]->BSIM3v32qbd})) + vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qbs, heres[1]->BSIM3v32qbs, heres[2]->BSIM3v32qbs, heres[3]->BSIM3v32qbs}));
+  if (heres[0]->BSIM3v32nqsMod)
+  {
+    vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qcdump, heres[1]->BSIM3v32qcdump, heres[2]->BSIM3v32qcdump, heres[3]->BSIM3v32qcdump}, qdef * ScalingFactor);
+    vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qcheq, heres[1]->BSIM3v32qcheq, heres[2]->BSIM3v32qcheq, heres[3]->BSIM3v32qcheq}, qcheq);
+  }
+
+  if (ckt->CKTmode & MODEINITSMSIG)
+  {
+    goto line1000;
+  }
+
+  if (!ChargeComputationNeeded)
+    goto line850;
+
+  if (ckt->CKTmode & MODEINITTRAN)
+  {
+    vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec4m ){heres[0]->BSIM3v32qb, heres[1]->BSIM3v32qb, heres[2]->BSIM3v32qb, heres[3]->BSIM3v32qb}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qb, heres[1]->BSIM3v32qb, heres[2]->BSIM3v32qb, heres[3]->BSIM3v32qb}));
+    vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec4m ){heres[0]->BSIM3v32qg, heres[1]->BSIM3v32qg, heres[2]->BSIM3v32qg, heres[3]->BSIM3v32qg}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qg, heres[1]->BSIM3v32qg, heres[2]->BSIM3v32qg, heres[3]->BSIM3v32qg}));
+    vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec4m ){heres[0]->BSIM3v32qd, heres[1]->BSIM3v32qd, heres[2]->BSIM3v32qd, heres[3]->BSIM3v32qd}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qd, heres[1]->BSIM3v32qd, heres[2]->BSIM3v32qd, heres[3]->BSIM3v32qd}));
+    if (heres[0]->BSIM3v32nqsMod)
+    {
+      vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec4m ){heres[0]->BSIM3v32qcheq, heres[1]->BSIM3v32qcheq, heres[2]->BSIM3v32qcheq, heres[3]->BSIM3v32qcheq}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qcheq, heres[1]->BSIM3v32qcheq, heres[2]->BSIM3v32qcheq, heres[3]->BSIM3v32qcheq}));
+      vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec4m ){heres[0]->BSIM3v32qcdump, heres[1]->BSIM3v32qcdump, heres[2]->BSIM3v32qcdump, heres[3]->BSIM3v32qcdump}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qcdump, heres[1]->BSIM3v32qcdump, heres[2]->BSIM3v32qcdump, heres[3]->BSIM3v32qcdump}));
+    }
+
+  }
+
+  error = vec4_NIintegrate(ckt, &geq, &ceq, 0.0, (Vec4m ){heres[0]->BSIM3v32qb, heres[1]->BSIM3v32qb, heres[2]->BSIM3v32qb, heres[3]->BSIM3v32qb});
+  if (SIMDANY(error))
+    return error;
+
+  error = vec4_NIintegrate(ckt, &geq, &ceq, 0.0, (Vec4m ){heres[0]->BSIM3v32qg, heres[1]->BSIM3v32qg, heres[2]->BSIM3v32qg, heres[3]->BSIM3v32qg});
+  if (SIMDANY(error))
+    return error;
+
+  error = vec4_NIintegrate(ckt, &geq, &ceq, 0.0, (Vec4m ){heres[0]->BSIM3v32qd, heres[1]->BSIM3v32qd, heres[2]->BSIM3v32qd, heres[3]->BSIM3v32qd});
+  if (SIMDANY(error))
+    return error;
+
+  if (heres[0]->BSIM3v32nqsMod)
+  {
+    error = vec4_NIintegrate(ckt, &geq, &ceq, 0.0, (Vec4m ){heres[0]->BSIM3v32qcdump, heres[1]->BSIM3v32qcdump, heres[2]->BSIM3v32qcdump, heres[3]->BSIM3v32qcdump});
+    if (SIMDANY(error))
+      return error;
+
+    error = vec4_NIintegrate(ckt, &geq, &ceq, 0.0, (Vec4m ){heres[0]->BSIM3v32qcheq, heres[1]->BSIM3v32qcheq, heres[2]->BSIM3v32qcheq, heres[3]->BSIM3v32qcheq});
+    if (SIMDANY(error))
+      return error;
+
+  }
+
+  goto line860;
+  line850:
+  ceqqg = (ceqqb = (ceqqd = vec4_SIMDTOVECTOR(0.0)));
+
+  cqcheq = (cqdef = vec4_SIMDTOVECTOR(0.0));
+  gcdgb = (gcddb = (gcdsb = vec4_SIMDTOVECTOR(0.0)));
+  gcsgb = (gcsdb = (gcssb = vec4_SIMDTOVECTOR(0.0)));
+  gcggb = (gcgdb = (gcgsb = vec4_SIMDTOVECTOR(0.0)));
+  gcbgb = (gcbdb = (gcbsb = vec4_SIMDTOVECTOR(0.0)));
+  gqdef = (gcqgb = (gcqdb = (gcqsb = (gcqbb = vec4_SIMDTOVECTOR(0.0)))));
+  ggtg = (ggtd = (ggtb = (ggts = vec4_SIMDTOVECTOR(0.0))));
+  dxpart = vec4_SIMDTOVECTOR(0.6);
+  if (1)
+  {
+    Vec4m condmask0 = BSIM3v32mode;
+    Vec4m condmask_true0 = condmask0;
+    dxpart = vec4_blend(dxpart, vec4_SIMDTOVECTOR(0.4), condmask_true0);
+  }
+
+  sxpart = 1.0 - dxpart;
+  ddxpart_dVd = (ddxpart_dVg = (ddxpart_dVb = (ddxpart_dVs = vec4_SIMDTOVECTOR(0.0))));
+  dsxpart_dVd = (dsxpart_dVg = (dsxpart_dVb = (dsxpart_dVs = vec4_SIMDTOVECTOR(0.0))));
+  if (heres[0]->BSIM3v32nqsMod)
+  {
+    Vec4d val = ((((16.0 * ((Vec4d ){heres[0]->BSIM3v32u0temp, heres[1]->BSIM3v32u0temp, heres[2]->BSIM3v32u0temp, heres[3]->BSIM3v32u0temp})) * model->BSIM3v32vtm) / pParam->BSIM3v32leffCV) / pParam->BSIM3v32leffCV) * ScalingFactor;
+    heres[0]->BSIM3v32gtau = val[0];
+    heres[1]->BSIM3v32gtau = val[1];
+    heres[2]->BSIM3v32gtau = val[2];
+    heres[3]->BSIM3v32gtau = val[3];
+  }
+  else
+  {
+    heres[0]->BSIM3v32gtau = 0.0;
+    heres[1]->BSIM3v32gtau = 0.0;
+    heres[2]->BSIM3v32gtau = 0.0;
+    heres[3]->BSIM3v32gtau = 0.0;
+  }
+
+  goto line900;
+  line860:
+  cqgate = vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32cqg, heres[1]->BSIM3v32cqg, heres[2]->BSIM3v32cqg, heres[3]->BSIM3v32cqg});
+
+  cqbulk = vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32cqb, heres[1]->BSIM3v32cqb, heres[2]->BSIM3v32cqb, heres[3]->BSIM3v32cqb});
+  cqdrn = vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32cqd, heres[1]->BSIM3v32cqd, heres[2]->BSIM3v32cqd, heres[3]->BSIM3v32cqd});
+  ceqqg = ((cqgate - (gcggb * vgb)) + (gcgdb * vbd)) + (gcgsb * vbs);
+  ceqqb = ((cqbulk - (gcbgb * vgb)) + (gcbdb * vbd)) + (gcbsb * vbs);
+  ceqqd = ((cqdrn - (gcdgb * vgb)) + (gcddb * vbd)) + (gcdsb * vbs);
+  if (heres[0]->BSIM3v32nqsMod)
+  {
+    T0 = ((ggtg * vgb) - (ggtd * vbd)) - (ggts * vbs);
+    ceqqg += T0;
+    T1 = qdef * ((Vec4d ){heres[0]->BSIM3v32gtau, heres[1]->BSIM3v32gtau, heres[2]->BSIM3v32gtau, heres[3]->BSIM3v32gtau});
+    ceqqd -= (dxpart * T0) + (T1 * (((ddxpart_dVg * vgb) - (ddxpart_dVd * vbd)) - (ddxpart_dVs * vbs)));
+    cqdef = vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32cqcdump, heres[1]->BSIM3v32cqcdump, heres[2]->BSIM3v32cqcdump, heres[3]->BSIM3v32cqcdump}) - (gqdef * qdef);
+    cqcheq = (vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32cqcheq, heres[1]->BSIM3v32cqcheq, heres[2]->BSIM3v32cqcheq, heres[3]->BSIM3v32cqcheq}) - (((gcqgb * vgb) - (gcqdb * vbd)) - (gcqsb * vbs))) + T0;
+  }
+
+  if (ckt->CKTmode & MODEINITTRAN)
+  {
+    vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec4m ){heres[0]->BSIM3v32cqb, heres[1]->BSIM3v32cqb, heres[2]->BSIM3v32cqb, heres[3]->BSIM3v32cqb}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32cqb, heres[1]->BSIM3v32cqb, heres[2]->BSIM3v32cqb, heres[3]->BSIM3v32cqb}));
+    vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec4m ){heres[0]->BSIM3v32cqg, heres[1]->BSIM3v32cqg, heres[2]->BSIM3v32cqg, heres[3]->BSIM3v32cqg}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32cqg, heres[1]->BSIM3v32cqg, heres[2]->BSIM3v32cqg, heres[3]->BSIM3v32cqg}));
+    vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec4m ){heres[0]->BSIM3v32cqd, heres[1]->BSIM3v32cqd, heres[2]->BSIM3v32cqd, heres[3]->BSIM3v32cqd}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32cqd, heres[1]->BSIM3v32cqd, heres[2]->BSIM3v32cqd, heres[3]->BSIM3v32cqd}));
+    if (heres[0]->BSIM3v32nqsMod)
+    {
+      vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec4m ){heres[0]->BSIM3v32cqcheq, heres[1]->BSIM3v32cqcheq, heres[2]->BSIM3v32cqcheq, heres[3]->BSIM3v32cqcheq}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32cqcheq, heres[1]->BSIM3v32cqcheq, heres[2]->BSIM3v32cqcheq, heres[3]->BSIM3v32cqcheq}));
+      vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec4m ){heres[0]->BSIM3v32cqcdump, heres[1]->BSIM3v32cqcdump, heres[2]->BSIM3v32cqcdump, heres[3]->BSIM3v32cqcdump}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32cqcdump, heres[1]->BSIM3v32cqcdump, heres[2]->BSIM3v32cqcdump, heres[3]->BSIM3v32cqcdump}));
+    }
+
+  }
+
+  line900:
+  ;
+
+  if (1)
+  {
+    Vec4m condmask0 = BSIM3v32mode;
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      Gm = vec4_blend(Gm, (Vec4d ){heres[0]->BSIM3v32gm, heres[1]->BSIM3v32gm, heres[2]->BSIM3v32gm, heres[3]->BSIM3v32gm}, condmask_true0);
+      Gmbs = vec4_blend(Gmbs, (Vec4d ){heres[0]->BSIM3v32gmbs, heres[1]->BSIM3v32gmbs, heres[2]->BSIM3v32gmbs, heres[3]->BSIM3v32gmbs}, condmask_true0);
+      FwdSum = vec4_blend(FwdSum, Gm + Gmbs, condmask_true0);
+      RevSum = vec4_blend(RevSum, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+      cdreq = vec4_blend(cdreq, model->BSIM3v32type * (((cdrain - (((Vec4d ){heres[0]->BSIM3v32gds, heres[1]->BSIM3v32gds, heres[2]->BSIM3v32gds, heres[3]->BSIM3v32gds}) * vds)) - (Gm * vgs)) - (Gmbs * vbs)), condmask_true0);
+      ceqbd = vec4_blend(ceqbd, (-model->BSIM3v32type) * (((((Vec4d ){heres[0]->BSIM3v32csub, heres[1]->BSIM3v32csub, heres[2]->BSIM3v32csub, heres[3]->BSIM3v32csub}) - (((Vec4d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds}) * vds)) - (((Vec4d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs}) * vgs)) - (((Vec4d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs}) * vbs)), condmask_true0);
+      ceqbs = vec4_blend(ceqbs, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+      gbbdp = vec4_blend(gbbdp, -((Vec4d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds}), condmask_true0);
+      gbbsp = vec4_blend(gbbsp, (((Vec4d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds}) + ((Vec4d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs})) + ((Vec4d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs}), condmask_true0);
+      gbdpg = vec4_blend(gbdpg, (Vec4d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs}, condmask_true0);
+      gbdpdp = vec4_blend(gbdpdp, (Vec4d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds}, condmask_true0);
+      gbdpb = vec4_blend(gbdpb, (Vec4d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs}, condmask_true0);
+      gbdpsp = vec4_blend(gbdpsp, -((gbdpg + gbdpdp) + gbdpb), condmask_true0);
+      gbspg = vec4_blend(gbspg, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+      gbspdp = vec4_blend(gbspdp, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+      gbspb = vec4_blend(gbspb, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+      gbspsp = vec4_blend(gbspsp, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+    }
+    {
+      Gm = vec4_blend(Gm, -((Vec4d ){heres[0]->BSIM3v32gm, heres[1]->BSIM3v32gm, heres[2]->BSIM3v32gm, heres[3]->BSIM3v32gm}), condmask_false0);
+      Gmbs = vec4_blend(Gmbs, -((Vec4d ){heres[0]->BSIM3v32gmbs, heres[1]->BSIM3v32gmbs, heres[2]->BSIM3v32gmbs, heres[3]->BSIM3v32gmbs}), condmask_false0);
+      FwdSum = vec4_blend(FwdSum, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+      RevSum = vec4_blend(RevSum, -(Gm + Gmbs), condmask_false0);
+      cdreq = vec4_blend(cdreq, (-model->BSIM3v32type) * (((cdrain + (((Vec4d ){heres[0]->BSIM3v32gds, heres[1]->BSIM3v32gds, heres[2]->BSIM3v32gds, heres[3]->BSIM3v32gds}) * vds)) + (Gm * vgd)) + (Gmbs * vbd)), condmask_false0);
+      ceqbs = vec4_blend(ceqbs, (-model->BSIM3v32type) * (((((Vec4d ){heres[0]->BSIM3v32csub, heres[1]->BSIM3v32csub, heres[2]->BSIM3v32csub, heres[3]->BSIM3v32csub}) + (((Vec4d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds}) * vds)) - (((Vec4d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs}) * vgd)) - (((Vec4d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs}) * vbd)), condmask_false0);
+      ceqbd = vec4_blend(ceqbd, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+      gbbsp = vec4_blend(gbbsp, -((Vec4d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds}), condmask_false0);
+      gbbdp = vec4_blend(gbbdp, (((Vec4d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds}) + ((Vec4d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs})) + ((Vec4d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs}), condmask_false0);
+      gbdpg = vec4_blend(gbdpg, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+      gbdpsp = vec4_blend(gbdpsp, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+      gbdpb = vec4_blend(gbdpb, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+      gbdpdp = vec4_blend(gbdpdp, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+      gbspg = vec4_blend(gbspg, (Vec4d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs}, condmask_false0);
+      gbspsp = vec4_blend(gbspsp, (Vec4d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds}, condmask_false0);
+      gbspb = vec4_blend(gbspb, (Vec4d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs}, condmask_false0);
+      gbspdp = vec4_blend(gbspdp, -((gbspg + gbspsp) + gbspb), condmask_false0);
+    }
+  }
+
+  if (model->BSIM3v32type > 0)
+  {
+    ceqbs += ((Vec4d ){heres[0]->BSIM3v32cbs, heres[1]->BSIM3v32cbs, heres[2]->BSIM3v32cbs, heres[3]->BSIM3v32cbs}) - (((Vec4d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs}) * vbs);
+    ceqbd += ((Vec4d ){heres[0]->BSIM3v32cbd, heres[1]->BSIM3v32cbd, heres[2]->BSIM3v32cbd, heres[3]->BSIM3v32cbd}) - (((Vec4d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd}) * vbd);
+  }
+  else
+  {
+    ceqbs -= ((Vec4d ){heres[0]->BSIM3v32cbs, heres[1]->BSIM3v32cbs, heres[2]->BSIM3v32cbs, heres[3]->BSIM3v32cbs}) - (((Vec4d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs}) * vbs);
+    ceqbd -= ((Vec4d ){heres[0]->BSIM3v32cbd, heres[1]->BSIM3v32cbd, heres[2]->BSIM3v32cbd, heres[3]->BSIM3v32cbd}) - (((Vec4d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd}) * vbd);
+    ceqqg = -ceqqg;
+    ceqqb = -ceqqb;
+    ceqqd = -ceqqd;
+    cqdef = -cqdef;
+    cqcheq = -cqcheq;
+  }
+
+  m = (Vec4d ){heres[0]->BSIM3v32m, heres[1]->BSIM3v32m, heres[2]->BSIM3v32m, heres[3]->BSIM3v32m};
+  vec4_BSIM3v32_StateSub(ckt->CKTrhs, (Vec4m ){heres[0]->BSIM3v32gNode, heres[1]->BSIM3v32gNode, heres[2]->BSIM3v32gNode, heres[3]->BSIM3v32gNode}, m * ceqqg);
+  vec4_BSIM3v32_StateSub(ckt->CKTrhs, (Vec4m ){heres[0]->BSIM3v32bNode, heres[1]->BSIM3v32bNode, heres[2]->BSIM3v32bNode, heres[3]->BSIM3v32bNode}, m * ((ceqbs + ceqbd) + ceqqb));
+  vec4_BSIM3v32_StateAdd(ckt->CKTrhs, (Vec4m ){heres[0]->BSIM3v32dNodePrime, heres[1]->BSIM3v32dNodePrime, heres[2]->BSIM3v32dNodePrime, heres[3]->BSIM3v32dNodePrime}, m * ((ceqbd - cdreq) - ceqqd));
+  vec4_BSIM3v32_StateAdd(ckt->CKTrhs, (Vec4m ){heres[0]->BSIM3v32sNodePrime, heres[1]->BSIM3v32sNodePrime, heres[2]->BSIM3v32sNodePrime, heres[3]->BSIM3v32sNodePrime}, m * ((((cdreq + ceqbs) + ceqqg) + ceqqb) + ceqqd));
+  if (heres[0]->BSIM3v32nqsMod)
+    vec4_BSIM3v32_StateAdd(ckt->CKTrhs, (Vec4m ){heres[0]->BSIM3v32qNode, heres[1]->BSIM3v32qNode, heres[2]->BSIM3v32qNode, heres[3]->BSIM3v32qNode}, m * (cqcheq - cqdef));
+
+  T1 = qdef * ((Vec4d ){heres[0]->BSIM3v32gtau, heres[1]->BSIM3v32gtau, heres[2]->BSIM3v32gtau, heres[3]->BSIM3v32gtau});
+  {
+    Vec4d val = m * ((Vec4d ){heres[0]->BSIM3v32drainConductance, heres[1]->BSIM3v32drainConductance, heres[2]->BSIM3v32drainConductance, heres[3]->BSIM3v32drainConductance});
+    *heres[0]->BSIM3v32DdPtr += val[0];
+    *heres[1]->BSIM3v32DdPtr += val[1];
+    *heres[2]->BSIM3v32DdPtr += val[2];
+    *heres[3]->BSIM3v32DdPtr += val[3];
+  }
+  {
+    Vec4d val = m * (gcggb - ggtg);
+    *heres[0]->BSIM3v32GgPtr += val[0];
+    *heres[1]->BSIM3v32GgPtr += val[1];
+    *heres[2]->BSIM3v32GgPtr += val[2];
+    *heres[3]->BSIM3v32GgPtr += val[3];
+  }
+  {
+    Vec4d val = m * ((Vec4d ){heres[0]->BSIM3v32sourceConductance, heres[1]->BSIM3v32sourceConductance, heres[2]->BSIM3v32sourceConductance, heres[3]->BSIM3v32sourceConductance});
+    *heres[0]->BSIM3v32SsPtr += val[0];
+    *heres[1]->BSIM3v32SsPtr += val[1];
+    *heres[2]->BSIM3v32SsPtr += val[2];
+    *heres[3]->BSIM3v32SsPtr += val[3];
+  }
+  {
+    Vec4d val = m * (((((((Vec4d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd}) + ((Vec4d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs})) - gcbgb) - gcbdb) - gcbsb) - ((Vec4d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs}));
+    *heres[0]->BSIM3v32BbPtr += val[0];
+    *heres[1]->BSIM3v32BbPtr += val[1];
+    *heres[2]->BSIM3v32BbPtr += val[2];
+    *heres[3]->BSIM3v32BbPtr += val[3];
+  }
+  {
+    Vec4d val = m * (((((((((Vec4d ){heres[0]->BSIM3v32drainConductance, heres[1]->BSIM3v32drainConductance, heres[2]->BSIM3v32drainConductance, heres[3]->BSIM3v32drainConductance}) + ((Vec4d ){heres[0]->BSIM3v32gds, heres[1]->BSIM3v32gds, heres[2]->BSIM3v32gds, heres[3]->BSIM3v32gds})) + ((Vec4d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd})) + RevSum) + gcddb) + (dxpart * ggtd)) + (T1 * ddxpart_dVd)) + gbdpdp);
+    *heres[0]->BSIM3v32DPdpPtr += val[0];
+    *heres[1]->BSIM3v32DPdpPtr += val[1];
+    *heres[2]->BSIM3v32DPdpPtr += val[2];
+    *heres[3]->BSIM3v32DPdpPtr += val[3];
+  }
+  {
+    Vec4d val = m * (((((((((Vec4d ){heres[0]->BSIM3v32sourceConductance, heres[1]->BSIM3v32sourceConductance, heres[2]->BSIM3v32sourceConductance, heres[3]->BSIM3v32sourceConductance}) + ((Vec4d ){heres[0]->BSIM3v32gds, heres[1]->BSIM3v32gds, heres[2]->BSIM3v32gds, heres[3]->BSIM3v32gds})) + ((Vec4d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs})) + FwdSum) + gcssb) + (sxpart * ggts)) + (T1 * dsxpart_dVs)) + gbspsp);
+    *heres[0]->BSIM3v32SPspPtr += val[0];
+    *heres[1]->BSIM3v32SPspPtr += val[1];
+    *heres[2]->BSIM3v32SPspPtr += val[2];
+    *heres[3]->BSIM3v32SPspPtr += val[3];
+  }
+  {
+    Vec4d val = m * ((Vec4d ){heres[0]->BSIM3v32drainConductance, heres[1]->BSIM3v32drainConductance, heres[2]->BSIM3v32drainConductance, heres[3]->BSIM3v32drainConductance});
+    *heres[0]->BSIM3v32DdpPtr -= val[0];
+    *heres[1]->BSIM3v32DdpPtr -= val[1];
+    *heres[2]->BSIM3v32DdpPtr -= val[2];
+    *heres[3]->BSIM3v32DdpPtr -= val[3];
+  }
+  {
+    Vec4d val = m * (((gcggb + gcgdb) + gcgsb) + ggtb);
+    *heres[0]->BSIM3v32GbPtr -= val[0];
+    *heres[1]->BSIM3v32GbPtr -= val[1];
+    *heres[2]->BSIM3v32GbPtr -= val[2];
+    *heres[3]->BSIM3v32GbPtr -= val[3];
+  }
+  {
+    Vec4d val = m * (gcgdb - ggtd);
+    *heres[0]->BSIM3v32GdpPtr += val[0];
+    *heres[1]->BSIM3v32GdpPtr += val[1];
+    *heres[2]->BSIM3v32GdpPtr += val[2];
+    *heres[3]->BSIM3v32GdpPtr += val[3];
+  }
+  {
+    Vec4d val = m * (gcgsb - ggts);
+    *heres[0]->BSIM3v32GspPtr += val[0];
+    *heres[1]->BSIM3v32GspPtr += val[1];
+    *heres[2]->BSIM3v32GspPtr += val[2];
+    *heres[3]->BSIM3v32GspPtr += val[3];
+  }
+  {
+    Vec4d val = m * ((Vec4d ){heres[0]->BSIM3v32sourceConductance, heres[1]->BSIM3v32sourceConductance, heres[2]->BSIM3v32sourceConductance, heres[3]->BSIM3v32sourceConductance});
+    *heres[0]->BSIM3v32SspPtr -= val[0];
+    *heres[1]->BSIM3v32SspPtr -= val[1];
+    *heres[2]->BSIM3v32SspPtr -= val[2];
+    *heres[3]->BSIM3v32SspPtr -= val[3];
+  }
+  {
+    Vec4d val = m * (gcbgb - ((Vec4d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs}));
+    *heres[0]->BSIM3v32BgPtr += val[0];
+    *heres[1]->BSIM3v32BgPtr += val[1];
+    *heres[2]->BSIM3v32BgPtr += val[2];
+    *heres[3]->BSIM3v32BgPtr += val[3];
+  }
+  {
+    Vec4d val = m * ((gcbdb - ((Vec4d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd})) + gbbdp);
+    *heres[0]->BSIM3v32BdpPtr += val[0];
+    *heres[1]->BSIM3v32BdpPtr += val[1];
+    *heres[2]->BSIM3v32BdpPtr += val[2];
+    *heres[3]->BSIM3v32BdpPtr += val[3];
+  }
+  {
+    Vec4d val = m * ((gcbsb - ((Vec4d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs})) + gbbsp);
+    *heres[0]->BSIM3v32BspPtr += val[0];
+    *heres[1]->BSIM3v32BspPtr += val[1];
+    *heres[2]->BSIM3v32BspPtr += val[2];
+    *heres[3]->BSIM3v32BspPtr += val[3];
+  }
+  {
+    Vec4d val = m * ((Vec4d ){heres[0]->BSIM3v32drainConductance, heres[1]->BSIM3v32drainConductance, heres[2]->BSIM3v32drainConductance, heres[3]->BSIM3v32drainConductance});
+    *heres[0]->BSIM3v32DPdPtr -= val[0];
+    *heres[1]->BSIM3v32DPdPtr -= val[1];
+    *heres[2]->BSIM3v32DPdPtr -= val[2];
+    *heres[3]->BSIM3v32DPdPtr -= val[3];
+  }
+  {
+    Vec4d val = m * ((((Gm + gcdgb) + (dxpart * ggtg)) + (T1 * ddxpart_dVg)) + gbdpg);
+    *heres[0]->BSIM3v32DPgPtr += val[0];
+    *heres[1]->BSIM3v32DPgPtr += val[1];
+    *heres[2]->BSIM3v32DPgPtr += val[2];
+    *heres[3]->BSIM3v32DPgPtr += val[3];
+  }
+  {
+    Vec4d val = m * (((((((((Vec4d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd}) - Gmbs) + gcdgb) + gcddb) + gcdsb) - (dxpart * ggtb)) - (T1 * ddxpart_dVb)) - gbdpb);
+    *heres[0]->BSIM3v32DPbPtr -= val[0];
+    *heres[1]->BSIM3v32DPbPtr -= val[1];
+    *heres[2]->BSIM3v32DPbPtr -= val[2];
+    *heres[3]->BSIM3v32DPbPtr -= val[3];
+  }
+  {
+    Vec4d val = m * (((((((Vec4d ){heres[0]->BSIM3v32gds, heres[1]->BSIM3v32gds, heres[2]->BSIM3v32gds, heres[3]->BSIM3v32gds}) + FwdSum) - gcdsb) - (dxpart * ggts)) - (T1 * ddxpart_dVs)) - gbdpsp);
+    *heres[0]->BSIM3v32DPspPtr -= val[0];
+    *heres[1]->BSIM3v32DPspPtr -= val[1];
+    *heres[2]->BSIM3v32DPspPtr -= val[2];
+    *heres[3]->BSIM3v32DPspPtr -= val[3];
+  }
+  {
+    Vec4d val = m * ((((gcsgb - Gm) + (sxpart * ggtg)) + (T1 * dsxpart_dVg)) + gbspg);
+    *heres[0]->BSIM3v32SPgPtr += val[0];
+    *heres[1]->BSIM3v32SPgPtr += val[1];
+    *heres[2]->BSIM3v32SPgPtr += val[2];
+    *heres[3]->BSIM3v32SPgPtr += val[3];
+  }
+  {
+    Vec4d val = m * ((Vec4d ){heres[0]->BSIM3v32sourceConductance, heres[1]->BSIM3v32sourceConductance, heres[2]->BSIM3v32sourceConductance, heres[3]->BSIM3v32sourceConductance});
+    *heres[0]->BSIM3v32SPsPtr -= val[0];
+    *heres[1]->BSIM3v32SPsPtr -= val[1];
+    *heres[2]->BSIM3v32SPsPtr -= val[2];
+    *heres[3]->BSIM3v32SPsPtr -= val[3];
+  }
+  {
+    Vec4d val = m * (((((((((Vec4d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs}) + Gmbs) + gcsgb) + gcsdb) + gcssb) - (sxpart * ggtb)) - (T1 * dsxpart_dVb)) - gbspb);
+    *heres[0]->BSIM3v32SPbPtr -= val[0];
+    *heres[1]->BSIM3v32SPbPtr -= val[1];
+    *heres[2]->BSIM3v32SPbPtr -= val[2];
+    *heres[3]->BSIM3v32SPbPtr -= val[3];
+  }
+  {
+    Vec4d val = m * (((((((Vec4d ){heres[0]->BSIM3v32gds, heres[1]->BSIM3v32gds, heres[2]->BSIM3v32gds, heres[3]->BSIM3v32gds}) + RevSum) - gcsdb) - (sxpart * ggtd)) - (T1 * dsxpart_dVd)) - gbspdp);
+    *heres[0]->BSIM3v32SPdpPtr -= val[0];
+    *heres[1]->BSIM3v32SPdpPtr -= val[1];
+    *heres[2]->BSIM3v32SPdpPtr -= val[2];
+    *heres[3]->BSIM3v32SPdpPtr -= val[3];
+  }
+  if (heres[0]->BSIM3v32nqsMod)
+  {
+    {
+      Vec4d val = m * (gqdef + ((Vec4d ){heres[0]->BSIM3v32gtau, heres[1]->BSIM3v32gtau, heres[2]->BSIM3v32gtau, heres[3]->BSIM3v32gtau}));
+      *heres[0]->BSIM3v32QqPtr += val[0];
+      *heres[1]->BSIM3v32QqPtr += val[1];
+      *heres[2]->BSIM3v32QqPtr += val[2];
+      *heres[3]->BSIM3v32QqPtr += val[3];
+    }
+    {
+      Vec4d val = m * (dxpart * ((Vec4d ){heres[0]->BSIM3v32gtau, heres[1]->BSIM3v32gtau, heres[2]->BSIM3v32gtau, heres[3]->BSIM3v32gtau}));
+      *heres[0]->BSIM3v32DPqPtr += val[0];
+      *heres[1]->BSIM3v32DPqPtr += val[1];
+      *heres[2]->BSIM3v32DPqPtr += val[2];
+      *heres[3]->BSIM3v32DPqPtr += val[3];
+    }
+    {
+      Vec4d val = m * (sxpart * ((Vec4d ){heres[0]->BSIM3v32gtau, heres[1]->BSIM3v32gtau, heres[2]->BSIM3v32gtau, heres[3]->BSIM3v32gtau}));
+      *heres[0]->BSIM3v32SPqPtr += val[0];
+      *heres[1]->BSIM3v32SPqPtr += val[1];
+      *heres[2]->BSIM3v32SPqPtr += val[2];
+      *heres[3]->BSIM3v32SPqPtr += val[3];
+    }
+    {
+      Vec4d val = m * ((Vec4d ){heres[0]->BSIM3v32gtau, heres[1]->BSIM3v32gtau, heres[2]->BSIM3v32gtau, heres[3]->BSIM3v32gtau});
+      *heres[0]->BSIM3v32GqPtr -= val[0];
+      *heres[1]->BSIM3v32GqPtr -= val[1];
+      *heres[2]->BSIM3v32GqPtr -= val[2];
+      *heres[3]->BSIM3v32GqPtr -= val[3];
+    }
+    {
+      Vec4d val = m * (ggtg - gcqgb);
+      *heres[0]->BSIM3v32QgPtr += val[0];
+      *heres[1]->BSIM3v32QgPtr += val[1];
+      *heres[2]->BSIM3v32QgPtr += val[2];
+      *heres[3]->BSIM3v32QgPtr += val[3];
+    }
+    {
+      Vec4d val = m * (ggtd - gcqdb);
+      *heres[0]->BSIM3v32QdpPtr += val[0];
+      *heres[1]->BSIM3v32QdpPtr += val[1];
+      *heres[2]->BSIM3v32QdpPtr += val[2];
+      *heres[3]->BSIM3v32QdpPtr += val[3];
+    }
+    {
+      Vec4d val = m * (ggts - gcqsb);
+      *heres[0]->BSIM3v32QspPtr += val[0];
+      *heres[1]->BSIM3v32QspPtr += val[1];
+      *heres[2]->BSIM3v32QspPtr += val[2];
+      *heres[3]->BSIM3v32QspPtr += val[3];
+    }
+    {
+      Vec4d val = m * (ggtb - gcqbb);
+      *heres[0]->BSIM3v32QbPtr += val[0];
+      *heres[1]->BSIM3v32QbPtr += val[1];
+      *heres[2]->BSIM3v32QbPtr += val[2];
+      *heres[3]->BSIM3v32QbPtr += val[3];
+    }
+  }
+
+  line1000:
+  ;
+
+  return OK;
+}
+
diff --git a/src/spicelib/devices/bsim3v32/b3v32ldseq_simd4_omp.c b/src/spicelib/devices/bsim3v32/b3v32ldseq_simd4_omp.c
new file mode 100644
index 000000000..2250f8272
--- /dev/null
+++ b/src/spicelib/devices/bsim3v32/b3v32ldseq_simd4_omp.c
@@ -0,0 +1,5726 @@
+/**** BSIM3v3.2.4, Released by Xuemei Xi 12/21/2001 ****/
+
+/**********
+ * Copyright 2001 Regents of the University of California. All rights reserved.
+ * Original File: b3ld.c of BSIM3v3.2.4
+ * Author: 1991 JianHui Huang and Min-Chie Jeng.
+ * Modified by Mansun Chan (1995).
+ * Author: 1997-1999 Weidong Liu.
+ * Author: 2001 Xuemei Xi
+ * Modified by Xuemei Xi, 10/05, 12/21, 2001.
+ * Modified by Paolo Nenzi 2002 and Dietmar Warning 2003
+ * Modified by Florian Ballenegger 2020 for SIMD version generation
+ **********/
+ 
+ /**********
+ * Modified 2020 by Florian Ballenegger, Anamosic Ballenegger Design
+ * Distributed under the same license terms as the original code,
+ * see file "B3TERMS_OF_USE"
+ **********/
+
+{
+  Vec4d SourceSatCurrent;
+  Vec4d DrainSatCurrent;
+  double ag0;
+  Vec4d qgd;
+  Vec4d qgs;
+  Vec4d qgb;
+  Vec4d cbhat;
+  Vec4d VgstNVt;
+  Vec4d ExpVgst;
+  Vec4d cdrain;
+  Vec4d cdhat;
+  Vec4d cdreq;
+  Vec4d ceqbd;
+  Vec4d ceqbs;
+  Vec4d ceqqb;
+  Vec4d ceqqd;
+  Vec4d ceqqg;
+  double ceq;
+  double geq;
+  Vec4d czbd;
+  Vec4d czbdsw;
+  Vec4d czbdswg;
+  Vec4d czbs;
+  Vec4d czbssw;
+  Vec4d czbsswg;
+  Vec4d evbd;
+  Vec4d evbs;
+  Vec4d arg;
+  Vec4d sarg;
+  Vec4d Vfbeff;
+  Vec4d dVfbeff_dVg;
+  Vec4d dVfbeff_dVd = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d dVfbeff_dVb;
+  Vec4d V3;
+  Vec4d V4;
+  Vec4d gcbdb;
+  Vec4d gcbgb;
+  Vec4d gcbsb;
+  Vec4d gcddb;
+  Vec4d gcdgb;
+  Vec4d gcdsb;
+  Vec4d gcgdb;
+  Vec4d gcggb;
+  Vec4d gcgsb;
+  Vec4d gcsdb;
+  Vec4d gcsgb;
+  Vec4d gcssb;
+  double MJ;
+  double MJSW;
+  double MJSWG;
+  Vec4d vbd;
+  Vec4d vbs;
+  Vec4d vds;
+  Vec4d vgb;
+  Vec4d vgd;
+  Vec4d vgs;
+  Vec4d qgate = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d qbulk = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d qdrn = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d qsrc;
+  Vec4d qinoi;
+  Vec4d cqgate;
+  Vec4d cqbulk;
+  Vec4d cqdrn;
+  Vec4d Vds;
+  Vec4d Vgs;
+  Vec4d Vbs;
+  Vec4d Gmbs;
+  Vec4d FwdSum;
+  Vec4d RevSum;
+  Vec4d Vgs_eff;
+  Vec4d Vfb;
+  Vec4d dVfb_dVb = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d dVfb_dVd = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d Phis;
+  Vec4d dPhis_dVb;
+  Vec4d sqrtPhis;
+  Vec4d dsqrtPhis_dVb;
+  Vec4d Vth;
+  Vec4d dVth_dVb;
+  Vec4d dVth_dVd;
+  Vec4d Vgst;
+  Vec4d dVgst_dVg;
+  Vec4d dVgst_dVb;
+  Vec4d dVgs_eff_dVg;
+  double Nvtm;
+  double Vtm;
+  Vec4d n;
+  Vec4d dn_dVb;
+  Vec4d dn_dVd;
+  double voffcv;
+  Vec4d noff;
+  Vec4d dnoff_dVd;
+  Vec4d dnoff_dVb;
+  Vec4d ExpArg;
+  double V0;
+  Vec4d CoxWLcen;
+  Vec4d QovCox;
+  double LINK;
+  Vec4d DeltaPhi;
+  Vec4d dDeltaPhi_dVg;
+  Vec4d dDeltaPhi_dVd;
+  Vec4d dDeltaPhi_dVb;
+  double Cox;
+  double Tox;
+  Vec4d Tcen;
+  Vec4d dTcen_dVg;
+  Vec4d dTcen_dVd;
+  Vec4d dTcen_dVb;
+  Vec4d Ccen;
+  Vec4d Coxeff;
+  Vec4d dCoxeff_dVg;
+  Vec4d dCoxeff_dVd;
+  Vec4d dCoxeff_dVb;
+  Vec4d Denomi;
+  Vec4d dDenomi_dVg;
+  Vec4d dDenomi_dVd;
+  Vec4d dDenomi_dVb;
+  Vec4d ueff;
+  Vec4d dueff_dVg;
+  Vec4d dueff_dVd;
+  Vec4d dueff_dVb;
+  Vec4d Esat;
+  Vec4d Vdsat;
+  Vec4d EsatL;
+  Vec4d dEsatL_dVg;
+  Vec4d dEsatL_dVd;
+  Vec4d dEsatL_dVb;
+  Vec4d dVdsat_dVg;
+  Vec4d dVdsat_dVb;
+  Vec4d dVdsat_dVd;
+  Vec4d Vasat;
+  Vec4d dAlphaz_dVg;
+  Vec4d dAlphaz_dVb;
+  Vec4d dVasat_dVg;
+  Vec4d dVasat_dVb;
+  Vec4d dVasat_dVd;
+  Vec4d Va;
+  Vec4d dVa_dVd;
+  Vec4d dVa_dVg;
+  Vec4d dVa_dVb;
+  Vec4d Vbseff;
+  Vec4d dVbseff_dVb;
+  Vec4d VbseffCV;
+  Vec4d dVbseffCV_dVb;
+  Vec4d Arg1;
+  double One_Third_CoxWL;
+  double Two_Third_CoxWL;
+  Vec4d Alphaz;
+  double CoxWL;
+  Vec4d T0;
+  Vec4d dT0_dVg;
+  Vec4d dT0_dVd;
+  Vec4d dT0_dVb;
+  Vec4d T1;
+  Vec4d dT1_dVg;
+  Vec4d dT1_dVd;
+  Vec4d dT1_dVb;
+  Vec4d T2;
+  Vec4d dT2_dVg;
+  Vec4d dT2_dVd;
+  Vec4d dT2_dVb;
+  Vec4d T3;
+  Vec4d dT3_dVg;
+  Vec4d dT3_dVd;
+  Vec4d dT3_dVb;
+  Vec4d T4;
+  Vec4d T5;
+  Vec4d T6;
+  Vec4d T7;
+  Vec4d T8;
+  Vec4d T9;
+  Vec4d T10;
+  Vec4d T11;
+  Vec4d T12;
+  Vec4d tmp;
+  Vec4d Abulk;
+  Vec4d dAbulk_dVb;
+  Vec4d Abulk0;
+  Vec4d dAbulk0_dVb;
+  double tmpuni;
+  Vec4d VACLM;
+  Vec4d dVACLM_dVg;
+  Vec4d dVACLM_dVd;
+  Vec4d dVACLM_dVb;
+  Vec4d VADIBL;
+  Vec4d dVADIBL_dVg;
+  Vec4d dVADIBL_dVd;
+  Vec4d dVADIBL_dVb;
+  Vec4d Xdep;
+  Vec4d dXdep_dVb;
+  Vec4d lt1;
+  Vec4d dlt1_dVb;
+  Vec4d ltw;
+  Vec4d dltw_dVb;
+  Vec4d Delt_vth;
+  Vec4d dDelt_vth_dVb;
+  Vec4d Theta0;
+  Vec4d dTheta0_dVb;
+  double TempRatio;
+  Vec4d tmp1;
+  Vec4d tmp2;
+  Vec4d tmp3;
+  Vec4d tmp4;
+  Vec4d DIBL_Sft;
+  Vec4d dDIBL_Sft_dVd;
+  Vec4d Lambda;
+  Vec4d dLambda_dVg;
+  double a1;
+  double ScalingFactor;
+  Vec4d Vgsteff;
+  Vec4d dVgsteff_dVg;
+  Vec4d dVgsteff_dVd;
+  Vec4d dVgsteff_dVb;
+  Vec4d Vdseff;
+  Vec4d dVdseff_dVg;
+  Vec4d dVdseff_dVd;
+  Vec4d dVdseff_dVb;
+  Vec4d VdseffCV;
+  Vec4d dVdseffCV_dVg;
+  Vec4d dVdseffCV_dVd;
+  Vec4d dVdseffCV_dVb;
+  Vec4d diffVds;
+  Vec4d dAbulk_dVg;
+  Vec4d beta;
+  Vec4d dbeta_dVg;
+  Vec4d dbeta_dVd;
+  Vec4d dbeta_dVb;
+  Vec4d gche;
+  Vec4d dgche_dVg;
+  Vec4d dgche_dVd;
+  Vec4d dgche_dVb;
+  Vec4d fgche1;
+  Vec4d dfgche1_dVg;
+  Vec4d dfgche1_dVd;
+  Vec4d dfgche1_dVb;
+  Vec4d fgche2;
+  Vec4d dfgche2_dVg;
+  Vec4d dfgche2_dVd;
+  Vec4d dfgche2_dVb;
+  Vec4d Idl;
+  Vec4d dIdl_dVg;
+  Vec4d dIdl_dVd;
+  Vec4d dIdl_dVb;
+  Vec4d Idsa;
+  Vec4d dIdsa_dVg;
+  Vec4d dIdsa_dVd;
+  Vec4d dIdsa_dVb;
+  Vec4d Ids;
+  Vec4d Gm;
+  Vec4d Gds;
+  Vec4d Gmb;
+  Vec4d Isub;
+  Vec4d Gbd;
+  Vec4d Gbg;
+  Vec4d Gbb;
+  Vec4d VASCBE;
+  Vec4d dVASCBE_dVg;
+  Vec4d dVASCBE_dVd;
+  Vec4d dVASCBE_dVb;
+  Vec4d CoxWovL;
+  Vec4d Rds;
+  Vec4d dRds_dVg;
+  Vec4d dRds_dVb;
+  Vec4d WVCox;
+  Vec4d WVCoxRds;
+  Vec4d Vgst2Vtm;
+  Vec4d VdsatCV;
+  Vec4d dVdsatCV_dVg;
+  Vec4d dVdsatCV_dVb;
+  double Leff;
+  Vec4d Weff;
+  Vec4d dWeff_dVg;
+  Vec4d dWeff_dVb;
+  Vec4d AbulkCV;
+  Vec4d dAbulkCV_dVb;
+  Vec4d qgdo;
+  Vec4d qgso;
+  Vec4d cgdo;
+  Vec4d cgso;
+  Vec4d qcheq = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d qdef;
+  Vec4d gqdef = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d cqdef;
+  Vec4d cqcheq;
+  Vec4d gtau_diff;
+  Vec4d gtau_drift;
+  Vec4d gcqdb = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d gcqsb = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d gcqgb = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d gcqbb = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d dxpart;
+  Vec4d sxpart;
+  Vec4d ggtg;
+  Vec4d ggtd;
+  Vec4d ggts;
+  Vec4d ggtb;
+  Vec4d ddxpart_dVd;
+  Vec4d ddxpart_dVg;
+  Vec4d ddxpart_dVb;
+  Vec4d ddxpart_dVs;
+  Vec4d dsxpart_dVd;
+  Vec4d dsxpart_dVg;
+  Vec4d dsxpart_dVb;
+  Vec4d dsxpart_dVs;
+  Vec4d gbspsp;
+  Vec4d gbbdp;
+  Vec4d gbbsp;
+  Vec4d gbspg;
+  Vec4d gbspb;
+  Vec4d gbspdp;
+  Vec4d gbdpdp;
+  Vec4d gbdpg;
+  Vec4d gbdpb;
+  Vec4d gbdpsp;
+  Vec4d Cgg;
+  Vec4d Cgd;
+  Vec4d Cgb;
+  Vec4d Cdg;
+  Vec4d Cdd;
+  Vec4d Cds;
+  Vec4d Csg;
+  Vec4d Csd;
+  Vec4d Css;
+  Vec4d Csb;
+  Vec4d Cbg;
+  Vec4d Cbd;
+  Vec4d Cbb;
+  Vec4d Cgg1;
+  Vec4d Cgb1;
+  Vec4d Cgd1;
+  Vec4d Cbg1;
+  Vec4d Cbb1;
+  Vec4d Cbd1;
+  Vec4d Qac0;
+  Vec4d Qsub0;
+  Vec4d dQac0_dVg;
+  Vec4d dQac0_dVd = (Vec4d ){0.0, 0.0, 0.0, 0.0};
+  Vec4d dQac0_dVb;
+  Vec4d dQsub0_dVg;
+  Vec4d dQsub0_dVd;
+  Vec4d dQsub0_dVb;
+  Vec4d m;
+  Vec4m BSIM3v32mode;
+  Vec4m Check;
+  int ChargeComputationNeeded;
+  int error;
+  ScalingFactor = 1.0e-9;
+  ChargeComputationNeeded = ((ckt->CKTmode & (((MODEDCTRANCURVE | MODEAC) | MODETRAN) | MODEINITSMSIG)) || ((ckt->CKTmode & MODETRANOP) && (ckt->CKTmode & MODEUIC))) ? (1) : (0);
+  vbs = (Vec4d ){heres[0]->BSIM3v32SIMDvbs, heres[1]->BSIM3v32SIMDvbs, heres[2]->BSIM3v32SIMDvbs, heres[3]->BSIM3v32SIMDvbs};
+  vgs = (Vec4d ){heres[0]->BSIM3v32SIMDvgs, heres[1]->BSIM3v32SIMDvgs, heres[2]->BSIM3v32SIMDvgs, heres[3]->BSIM3v32SIMDvgs};
+  vds = (Vec4d ){heres[0]->BSIM3v32SIMDvds, heres[1]->BSIM3v32SIMDvds, heres[2]->BSIM3v32SIMDvds, heres[3]->BSIM3v32SIMDvds};
+  qdef = (Vec4d ){heres[0]->BSIM3v32SIMDqdef, heres[1]->BSIM3v32SIMDqdef, heres[2]->BSIM3v32SIMDqdef, heres[3]->BSIM3v32SIMDqdef};
+  cdhat = (Vec4d ){heres[0]->BSIM3v32SIMDcdhat, heres[1]->BSIM3v32SIMDcdhat, heres[2]->BSIM3v32SIMDcdhat, heres[3]->BSIM3v32SIMDcdhat};
+  cbhat = (Vec4d ){heres[0]->BSIM3v32SIMDcbhat, heres[1]->BSIM3v32SIMDcbhat, heres[2]->BSIM3v32SIMDcbhat, heres[3]->BSIM3v32SIMDcbhat};
+  Check = (Vec4m ){heres[0]->BSIM3v32SIMDCheck, heres[1]->BSIM3v32SIMDCheck, heres[2]->BSIM3v32SIMDCheck, heres[3]->BSIM3v32SIMDCheck};
+  SIMDIFYCMD(start);
+  vbd = vbs - vds;
+  vgd = vgs - vds;
+  vgb = vgs - vbs;
+  Nvtm = model->BSIM3v32vtm * model->BSIM3v32jctEmissionCoeff;
+  if (model->BSIM3v32acmMod == 0)
+  {
+    SourceSatCurrent = vec4_SIMDTOVECTOR(1.0e-14);
+    if (1)
+    {
+      Vec4m condmask0 = (((Vec4d ){heres[0]->BSIM3v32sourceArea, heres[1]->BSIM3v32sourceArea, heres[2]->BSIM3v32sourceArea, heres[3]->BSIM3v32sourceArea}) <= 0.0) & (((Vec4d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter}) <= 0.0);
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        ;
+      }
+      {
+        SourceSatCurrent = vec4_blend(SourceSatCurrent, (((Vec4d ){heres[0]->BSIM3v32sourceArea, heres[1]->BSIM3v32sourceArea, heres[2]->BSIM3v32sourceArea, heres[3]->BSIM3v32sourceArea}) * model->BSIM3v32jctTempSatCurDensity) + (((Vec4d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter}) * model->BSIM3v32jctSidewallTempSatCurDensity), condmask_false0);
+      }
+    }
+
+    DrainSatCurrent = vec4_SIMDTOVECTOR(1.0e-14);
+    if (1)
+    {
+      Vec4m condmask0 = (((Vec4d ){heres[0]->BSIM3v32drainArea, heres[1]->BSIM3v32drainArea, heres[2]->BSIM3v32drainArea, heres[3]->BSIM3v32drainArea}) <= 0.0) & (((Vec4d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter}) <= 0.0);
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        ;
+      }
+      {
+        DrainSatCurrent = vec4_blend(DrainSatCurrent, (((Vec4d ){heres[0]->BSIM3v32drainArea, heres[1]->BSIM3v32drainArea, heres[2]->BSIM3v32drainArea, heres[3]->BSIM3v32drainArea}) * model->BSIM3v32jctTempSatCurDensity) + (((Vec4d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter}) * model->BSIM3v32jctSidewallTempSatCurDensity), condmask_false0);
+      }
+    }
+
+  }
+  else
+  {
+    error = vec4_BSIM3v32_ACM_saturationCurrents(model, heres, &DrainSatCurrent, &SourceSatCurrent);
+    if (SIMDANY(error))
+      return error;
+
+  }
+
+  if (1)
+  {
+    Vec4m condmask0 = SourceSatCurrent <= 0.0;
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      {
+        if (condmask_true0[0])
+          heres[0]->BSIM3v32gbs = ckt->CKTgmin;
+
+        if (condmask_true0[1])
+          heres[1]->BSIM3v32gbs = ckt->CKTgmin;
+
+        if (condmask_true0[2])
+          heres[2]->BSIM3v32gbs = ckt->CKTgmin;
+
+        if (condmask_true0[3])
+          heres[3]->BSIM3v32gbs = ckt->CKTgmin;
+
+      }
+      {
+        Vec4d val = ((Vec4d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs}) * vbs;
+        if (condmask_true0[0])
+          heres[0]->BSIM3v32cbs = val[0];
+
+        if (condmask_true0[1])
+          heres[1]->BSIM3v32cbs = val[1];
+
+        if (condmask_true0[2])
+          heres[2]->BSIM3v32cbs = val[2];
+
+        if (condmask_true0[3])
+          heres[3]->BSIM3v32cbs = val[3];
+
+      }
+    }
+    {
+      if (model->BSIM3v32ijth == 0.0)
+      {
+        evbs = vec4_blend(evbs, vec4_exp(vbs / Nvtm), condmask_false0);
+        {
+          Vec4d val = ((SourceSatCurrent * evbs) / Nvtm) + ckt->CKTgmin;
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32gbs = val[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32gbs = val[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32gbs = val[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32gbs = val[3];
+
+        }
+        {
+          Vec4d val = (SourceSatCurrent * (evbs - 1.0)) + (ckt->CKTgmin * vbs);
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32cbs = val[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32cbs = val[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32cbs = val[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32cbs = val[3];
+
+        }
+      }
+      else
+      {
+        if (1)
+        {
+          Vec4m condmask1 = vbs < ((Vec4d ){heres[0]->BSIM3v32vjsm, heres[1]->BSIM3v32vjsm, heres[2]->BSIM3v32vjsm, heres[3]->BSIM3v32vjsm});
+          Vec4m condmask_true1 = condmask_false0 & condmask1;
+          Vec4m condmask_false1 = condmask_false0 & (~condmask1);
+          {
+            evbs = vec4_blend(evbs, vec4_exp(vbs / Nvtm), condmask_true1);
+            {
+              Vec4d val = ((SourceSatCurrent * evbs) / Nvtm) + ckt->CKTgmin;
+              if (condmask_true1[0])
+                heres[0]->BSIM3v32gbs = val[0];
+
+              if (condmask_true1[1])
+                heres[1]->BSIM3v32gbs = val[1];
+
+              if (condmask_true1[2])
+                heres[2]->BSIM3v32gbs = val[2];
+
+              if (condmask_true1[3])
+                heres[3]->BSIM3v32gbs = val[3];
+
+            }
+            {
+              Vec4d val = (SourceSatCurrent * (evbs - 1.0)) + (ckt->CKTgmin * vbs);
+              if (condmask_true1[0])
+                heres[0]->BSIM3v32cbs = val[0];
+
+              if (condmask_true1[1])
+                heres[1]->BSIM3v32cbs = val[1];
+
+              if (condmask_true1[2])
+                heres[2]->BSIM3v32cbs = val[2];
+
+              if (condmask_true1[3])
+                heres[3]->BSIM3v32cbs = val[3];
+
+            }
+          }
+          {
+            switch (model->BSIM3v32intVersion)
+            {
+              case BSIM3v32V324:
+
+              case BSIM3v32V323:
+
+              case BSIM3v32V322:
+                T0 = vec4_blend(T0, ((Vec4d ){heres[0]->BSIM3v32IsEvjsm, heres[1]->BSIM3v32IsEvjsm, heres[2]->BSIM3v32IsEvjsm, heres[3]->BSIM3v32IsEvjsm}) / Nvtm, condmask_false1);
+              {
+                Vec4d val = T0 + ckt->CKTgmin;
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32gbs = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32gbs = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32gbs = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32gbs = val[3];
+
+              }
+              {
+                Vec4d val = ((((Vec4d ){heres[0]->BSIM3v32IsEvjsm, heres[1]->BSIM3v32IsEvjsm, heres[2]->BSIM3v32IsEvjsm, heres[3]->BSIM3v32IsEvjsm}) - SourceSatCurrent) + (T0 * (vbs - ((Vec4d ){heres[0]->BSIM3v32vjsm, heres[1]->BSIM3v32vjsm, heres[2]->BSIM3v32vjsm, heres[3]->BSIM3v32vjsm})))) + (ckt->CKTgmin * vbs);
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32cbs = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32cbs = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32cbs = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32cbs = val[3];
+
+              }
+                break;
+
+              case BSIM3v32V32:
+
+              default:
+                T0 = vec4_blend(T0, (SourceSatCurrent + model->BSIM3v32ijth) / Nvtm, condmask_false1);
+              {
+                Vec4d val = T0 + ckt->CKTgmin;
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32gbs = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32gbs = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32gbs = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32gbs = val[3];
+
+              }
+              {
+                Vec4d val = (model->BSIM3v32ijth + (ckt->CKTgmin * vbs)) + (T0 * (vbs - ((Vec4d ){heres[0]->BSIM3v32vjsm, heres[1]->BSIM3v32vjsm, heres[2]->BSIM3v32vjsm, heres[3]->BSIM3v32vjsm})));
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32cbs = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32cbs = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32cbs = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32cbs = val[3];
+
+              }
+
+            }
+
+          }
+        }
+
+      }
+
+    }
+  }
+
+  if (1)
+  {
+    Vec4m condmask0 = DrainSatCurrent <= 0.0;
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      {
+        if (condmask_true0[0])
+          heres[0]->BSIM3v32gbd = ckt->CKTgmin;
+
+        if (condmask_true0[1])
+          heres[1]->BSIM3v32gbd = ckt->CKTgmin;
+
+        if (condmask_true0[2])
+          heres[2]->BSIM3v32gbd = ckt->CKTgmin;
+
+        if (condmask_true0[3])
+          heres[3]->BSIM3v32gbd = ckt->CKTgmin;
+
+      }
+      {
+        Vec4d val = ((Vec4d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd}) * vbd;
+        if (condmask_true0[0])
+          heres[0]->BSIM3v32cbd = val[0];
+
+        if (condmask_true0[1])
+          heres[1]->BSIM3v32cbd = val[1];
+
+        if (condmask_true0[2])
+          heres[2]->BSIM3v32cbd = val[2];
+
+        if (condmask_true0[3])
+          heres[3]->BSIM3v32cbd = val[3];
+
+      }
+    }
+    {
+      if (model->BSIM3v32ijth == 0.0)
+      {
+        evbd = vec4_blend(evbd, vec4_exp(vbd / Nvtm), condmask_false0);
+        {
+          Vec4d val = ((DrainSatCurrent * evbd) / Nvtm) + ckt->CKTgmin;
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32gbd = val[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32gbd = val[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32gbd = val[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32gbd = val[3];
+
+        }
+        {
+          Vec4d val = (DrainSatCurrent * (evbd - 1.0)) + (ckt->CKTgmin * vbd);
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32cbd = val[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32cbd = val[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32cbd = val[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32cbd = val[3];
+
+        }
+      }
+      else
+      {
+        if (1)
+        {
+          Vec4m condmask1 = vbd < ((Vec4d ){heres[0]->BSIM3v32vjdm, heres[1]->BSIM3v32vjdm, heres[2]->BSIM3v32vjdm, heres[3]->BSIM3v32vjdm});
+          Vec4m condmask_true1 = condmask_false0 & condmask1;
+          Vec4m condmask_false1 = condmask_false0 & (~condmask1);
+          {
+            evbd = vec4_blend(evbd, vec4_exp(vbd / Nvtm), condmask_true1);
+            {
+              Vec4d val = ((DrainSatCurrent * evbd) / Nvtm) + ckt->CKTgmin;
+              if (condmask_true1[0])
+                heres[0]->BSIM3v32gbd = val[0];
+
+              if (condmask_true1[1])
+                heres[1]->BSIM3v32gbd = val[1];
+
+              if (condmask_true1[2])
+                heres[2]->BSIM3v32gbd = val[2];
+
+              if (condmask_true1[3])
+                heres[3]->BSIM3v32gbd = val[3];
+
+            }
+            {
+              Vec4d val = (DrainSatCurrent * (evbd - 1.0)) + (ckt->CKTgmin * vbd);
+              if (condmask_true1[0])
+                heres[0]->BSIM3v32cbd = val[0];
+
+              if (condmask_true1[1])
+                heres[1]->BSIM3v32cbd = val[1];
+
+              if (condmask_true1[2])
+                heres[2]->BSIM3v32cbd = val[2];
+
+              if (condmask_true1[3])
+                heres[3]->BSIM3v32cbd = val[3];
+
+            }
+          }
+          {
+            switch (model->BSIM3v32intVersion)
+            {
+              case BSIM3v32V324:
+
+              case BSIM3v32V323:
+
+              case BSIM3v32V322:
+                T0 = vec4_blend(T0, ((Vec4d ){heres[0]->BSIM3v32IsEvjdm, heres[1]->BSIM3v32IsEvjdm, heres[2]->BSIM3v32IsEvjdm, heres[3]->BSIM3v32IsEvjdm}) / Nvtm, condmask_false1);
+              {
+                Vec4d val = T0 + ckt->CKTgmin;
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32gbd = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32gbd = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32gbd = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32gbd = val[3];
+
+              }
+              {
+                Vec4d val = ((((Vec4d ){heres[0]->BSIM3v32IsEvjdm, heres[1]->BSIM3v32IsEvjdm, heres[2]->BSIM3v32IsEvjdm, heres[3]->BSIM3v32IsEvjdm}) - DrainSatCurrent) + (T0 * (vbd - ((Vec4d ){heres[0]->BSIM3v32vjdm, heres[1]->BSIM3v32vjdm, heres[2]->BSIM3v32vjdm, heres[3]->BSIM3v32vjdm})))) + (ckt->CKTgmin * vbd);
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32cbd = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32cbd = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32cbd = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32cbd = val[3];
+
+              }
+                break;
+
+              case BSIM3v32V32:
+
+              default:
+                T0 = vec4_blend(T0, (DrainSatCurrent + model->BSIM3v32ijth) / Nvtm, condmask_false1);
+              {
+                Vec4d val = T0 + ckt->CKTgmin;
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32gbd = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32gbd = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32gbd = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32gbd = val[3];
+
+              }
+              {
+                Vec4d val = (model->BSIM3v32ijth + (ckt->CKTgmin * vbd)) + (T0 * (vbd - ((Vec4d ){heres[0]->BSIM3v32vjdm, heres[1]->BSIM3v32vjdm, heres[2]->BSIM3v32vjdm, heres[3]->BSIM3v32vjdm})));
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32cbd = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32cbd = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32cbd = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32cbd = val[3];
+
+              }
+
+            }
+
+          }
+        }
+
+      }
+
+    }
+  }
+
+  BSIM3v32mode = vds >= 0.0;
+  if (1)
+  {
+    Vec4m condmask0 = BSIM3v32mode;
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      Vds = vec4_blend(Vds, vds, condmask_true0);
+      Vgs = vec4_blend(Vgs, vgs, condmask_true0);
+      Vbs = vec4_blend(Vbs, vbs, condmask_true0);
+    }
+    {
+      Vds = vec4_blend(Vds, -vds, condmask_false0);
+      Vgs = vec4_blend(Vgs, vgd, condmask_false0);
+      Vbs = vec4_blend(Vbs, vbd, condmask_false0);
+    }
+  }
+
+  {
+    Vec4m modesym;
+    modesym = (2 * (BSIM3v32mode & 0x1)) - 1;
+    {
+      heres[0]->BSIM3v32mode = modesym[0];
+      heres[1]->BSIM3v32mode = modesym[1];
+      heres[2]->BSIM3v32mode = modesym[2];
+      heres[3]->BSIM3v32mode = modesym[3];
+    }
+  }
+  T0 = (Vbs - pParam->BSIM3v32vbsc) - 0.001;
+  T1 = vec4_sqrt((T0 * T0) - (0.004 * pParam->BSIM3v32vbsc));
+  Vbseff = pParam->BSIM3v32vbsc + (0.5 * (T0 + T1));
+  dVbseff_dVb = 0.5 * (1.0 + (T0 / T1));
+  if (1)
+  {
+    Vec4m condmask0 = Vbseff < Vbs;
+    Vec4m condmask_true0 = condmask0;
+    {
+      Vbseff = vec4_blend(Vbseff, Vbs, condmask_true0);
+    }
+  }
+
+  if (1)
+  {
+    Vec4m condmask0 = Vbseff > 0.0;
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      T0 = vec4_blend(T0, pParam->BSIM3v32phi / (pParam->BSIM3v32phi + Vbseff), condmask_true0);
+      Phis = vec4_blend(Phis, pParam->BSIM3v32phi * T0, condmask_true0);
+      dPhis_dVb = vec4_blend(dPhis_dVb, (-T0) * T0, condmask_true0);
+      sqrtPhis = vec4_blend(sqrtPhis, pParam->BSIM3v32phis3 / (pParam->BSIM3v32phi + (0.5 * Vbseff)), condmask_true0);
+      dsqrtPhis_dVb = vec4_blend(dsqrtPhis_dVb, (((-0.5) * sqrtPhis) * sqrtPhis) / pParam->BSIM3v32phis3, condmask_true0);
+    }
+    {
+      Phis = vec4_blend(Phis, pParam->BSIM3v32phi - Vbseff, condmask_false0);
+      dPhis_dVb = vec4_blend(dPhis_dVb, vec4_SIMDTOVECTOR(-1.0), condmask_false0);
+      sqrtPhis = vec4_blend(sqrtPhis, vec4_sqrt(Phis), condmask_false0);
+      dsqrtPhis_dVb = vec4_blend(dsqrtPhis_dVb, (-0.5) / sqrtPhis, condmask_false0);
+    }
+  }
+
+  Xdep = (pParam->BSIM3v32Xdep0 * sqrtPhis) / pParam->BSIM3v32sqrtPhi;
+  dXdep_dVb = (pParam->BSIM3v32Xdep0 / pParam->BSIM3v32sqrtPhi) * dsqrtPhis_dVb;
+  Leff = pParam->BSIM3v32leff;
+  Vtm = model->BSIM3v32vtm;
+  T3 = vec4_sqrt(Xdep);
+  V0 = pParam->BSIM3v32vbi - pParam->BSIM3v32phi;
+  T0 = pParam->BSIM3v32dvt2 * Vbseff;
+  T2 = vec4_SIMDTOVECTOR(pParam->BSIM3v32dvt2);
+  if (1)
+  {
+    Vec4m condmask0 = T0 >= (-0.5);
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      T1 = vec4_blend(T1, 1.0 + T0, condmask_true0);
+    }
+    {
+      T4 = vec4_blend(T4, 1.0 / (3.0 + (8.0 * T0)), condmask_false0);
+      T1 = vec4_blend(T1, (1.0 + (3.0 * T0)) * T4, condmask_false0);
+      T2 = vec4_blend(T2, (T2 * T4) * T4, condmask_false0);
+    }
+  }
+
+  lt1 = (model->BSIM3v32factor1 * T3) * T1;
+  dlt1_dVb = model->BSIM3v32factor1 * ((((0.5 / T3) * T1) * dXdep_dVb) + (T3 * T2));
+  T0 = pParam->BSIM3v32dvt2w * Vbseff;
+  if (1)
+  {
+    Vec4m condmask0 = T0 >= (-0.5);
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      T1 = vec4_blend(T1, 1.0 + T0, condmask_true0);
+      T2 = vec4_blend(T2, vec4_SIMDTOVECTOR(pParam->BSIM3v32dvt2w), condmask_true0);
+    }
+    {
+      T4 = vec4_blend(T4, 1.0 / (3.0 + (8.0 * T0)), condmask_false0);
+      T1 = vec4_blend(T1, (1.0 + (3.0 * T0)) * T4, condmask_false0);
+      T2 = vec4_blend(T2, (pParam->BSIM3v32dvt2w * T4) * T4, condmask_false0);
+    }
+  }
+
+  ltw = (model->BSIM3v32factor1 * T3) * T1;
+  dltw_dVb = model->BSIM3v32factor1 * ((((0.5 / T3) * T1) * dXdep_dVb) + (T3 * T2));
+  T0 = (((-0.5) * pParam->BSIM3v32dvt1) * Leff) / lt1;
+  if (1)
+  {
+    Vec4m condmask0 = T0 > (-EXP_THRESHOLD);
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      T1 = vec4_blend(T1, vec4_exp(T0), condmask_true0);
+      Theta0 = vec4_blend(Theta0, T1 * (1.0 + (2.0 * T1)), condmask_true0);
+      dT1_dVb = vec4_blend(dT1_dVb, (((-T0) / lt1) * T1) * dlt1_dVb, condmask_true0);
+      dTheta0_dVb = vec4_blend(dTheta0_dVb, (1.0 + (4.0 * T1)) * dT1_dVb, condmask_true0);
+    }
+    {
+      T1 = vec4_blend(T1, vec4_SIMDTOVECTOR(MIN_EXP), condmask_false0);
+      Theta0 = vec4_blend(Theta0, T1 * (1.0 + (2.0 * T1)), condmask_false0);
+      dTheta0_dVb = vec4_blend(dTheta0_dVb, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+    }
+  }
+
+  Delt_vth = (pParam->BSIM3v32dvt0 * Theta0) * V0;
+  dDelt_vth_dVb = (pParam->BSIM3v32dvt0 * dTheta0_dVb) * V0;
+  T0 = ((((-0.5) * pParam->BSIM3v32dvt1w) * pParam->BSIM3v32weff) * Leff) / ltw;
+  if (1)
+  {
+    Vec4m condmask0 = T0 > (-EXP_THRESHOLD);
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      T1 = vec4_blend(T1, vec4_exp(T0), condmask_true0);
+      T2 = vec4_blend(T2, T1 * (1.0 + (2.0 * T1)), condmask_true0);
+      dT1_dVb = vec4_blend(dT1_dVb, (((-T0) / ltw) * T1) * dltw_dVb, condmask_true0);
+      dT2_dVb = vec4_blend(dT2_dVb, (1.0 + (4.0 * T1)) * dT1_dVb, condmask_true0);
+    }
+    {
+      T1 = vec4_blend(T1, vec4_SIMDTOVECTOR(MIN_EXP), condmask_false0);
+      T2 = vec4_blend(T2, T1 * (1.0 + (2.0 * T1)), condmask_false0);
+      dT2_dVb = vec4_blend(dT2_dVb, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+    }
+  }
+
+  T0 = pParam->BSIM3v32dvt0w * T2;
+  T2 = T0 * V0;
+  dT2_dVb = (pParam->BSIM3v32dvt0w * dT2_dVb) * V0;
+  TempRatio = (ckt->CKTtemp / model->BSIM3v32tnom) - 1.0;
+  T0 = vec4_SIMDTOVECTOR(sqrt(1.0 + (pParam->BSIM3v32nlx / Leff)));
+  T1 = ((pParam->BSIM3v32k1ox * (T0 - 1.0)) * pParam->BSIM3v32sqrtPhi) + (((pParam->BSIM3v32kt1 + (pParam->BSIM3v32kt1l / Leff)) + (pParam->BSIM3v32kt2 * Vbseff)) * TempRatio);
+  tmp2 = vec4_SIMDTOVECTOR((model->BSIM3v32tox * pParam->BSIM3v32phi) / (pParam->BSIM3v32weff + pParam->BSIM3v32w0));
+  T3 = pParam->BSIM3v32eta0 + (pParam->BSIM3v32etab * Vbseff);
+  if (1)
+  {
+    Vec4m condmask0 = T3 < 1.0e-4;
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      T9 = vec4_blend(T9, 1.0 / (3.0 - (2.0e4 * T3)), condmask_true0);
+      T3 = vec4_blend(T3, (2.0e-4 - T3) * T9, condmask_true0);
+      T4 = vec4_blend(T4, T9 * T9, condmask_true0);
+    }
+    {
+      T4 = vec4_blend(T4, vec4_SIMDTOVECTOR(1.0), condmask_false0);
+    }
+  }
+
+  dDIBL_Sft_dVd = T3 * pParam->BSIM3v32theta0vb0;
+  DIBL_Sft = dDIBL_Sft_dVd * Vds;
+  Vth = ((((((((model->BSIM3v32type * ((Vec4d ){heres[0]->BSIM3v32vth0, heres[1]->BSIM3v32vth0, heres[2]->BSIM3v32vth0, heres[3]->BSIM3v32vth0})) - (pParam->BSIM3v32k1 * pParam->BSIM3v32sqrtPhi)) + (pParam->BSIM3v32k1ox * sqrtPhis)) - (pParam->BSIM3v32k2ox * Vbseff)) - Delt_vth) - T2) + ((pParam->BSIM3v32k3 + (pParam->BSIM3v32k3b * Vbseff)) * tmp2)) + T1) - DIBL_Sft;
+  {
+    heres[0]->BSIM3v32von = Vth[0];
+    heres[1]->BSIM3v32von = Vth[1];
+    heres[2]->BSIM3v32von = Vth[2];
+    heres[3]->BSIM3v32von = Vth[3];
+  }
+  dVth_dVb = ((((((pParam->BSIM3v32k1ox * dsqrtPhis_dVb) - pParam->BSIM3v32k2ox) - dDelt_vth_dVb) - dT2_dVb) + (pParam->BSIM3v32k3b * tmp2)) - (((pParam->BSIM3v32etab * Vds) * pParam->BSIM3v32theta0vb0) * T4)) + (pParam->BSIM3v32kt2 * TempRatio);
+  dVth_dVd = -dDIBL_Sft_dVd;
+  tmp2 = (pParam->BSIM3v32nfactor * EPSSI) / Xdep;
+  tmp3 = (pParam->BSIM3v32cdsc + (pParam->BSIM3v32cdscb * Vbseff)) + (pParam->BSIM3v32cdscd * Vds);
+  tmp4 = ((tmp2 + (tmp3 * Theta0)) + pParam->BSIM3v32cit) / model->BSIM3v32cox;
+  if (1)
+  {
+    Vec4m condmask0 = tmp4 >= (-0.5);
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      n = vec4_blend(n, 1.0 + tmp4, condmask_true0);
+      dn_dVb = vec4_blend(dn_dVb, (((((-tmp2) / Xdep) * dXdep_dVb) + (tmp3 * dTheta0_dVb)) + (pParam->BSIM3v32cdscb * Theta0)) / model->BSIM3v32cox, condmask_true0);
+      dn_dVd = vec4_blend(dn_dVd, (pParam->BSIM3v32cdscd * Theta0) / model->BSIM3v32cox, condmask_true0);
+    }
+    {
+      T0 = vec4_blend(T0, 1.0 / (3.0 + (8.0 * tmp4)), condmask_false0);
+      n = vec4_blend(n, (1.0 + (3.0 * tmp4)) * T0, condmask_false0);
+      T0 = vec4_blend(T0, T0 * T0, condmask_false0);
+      dn_dVb = vec4_blend(dn_dVb, ((((((-tmp2) / Xdep) * dXdep_dVb) + (tmp3 * dTheta0_dVb)) + (pParam->BSIM3v32cdscb * Theta0)) / model->BSIM3v32cox) * T0, condmask_false0);
+      dn_dVd = vec4_blend(dn_dVd, ((pParam->BSIM3v32cdscd * Theta0) / model->BSIM3v32cox) * T0, condmask_false0);
+    }
+  }
+
+  T0 = ((Vec4d ){heres[0]->BSIM3v32vfb, heres[1]->BSIM3v32vfb, heres[2]->BSIM3v32vfb, heres[3]->BSIM3v32vfb}) + pParam->BSIM3v32phi;
+  Vgs_eff = Vgs;
+  dVgs_eff_dVg = vec4_SIMDTOVECTOR(1.0);
+  if ((pParam->BSIM3v32ngate > 1.e18) && (pParam->BSIM3v32ngate < 1.e25))
+    if (1)
+  {
+    Vec4m condmask0 = Vgs > T0;
+    Vec4m condmask_true0 = condmask0;
+    {
+      T1 = vec4_blend(T1, vec4_SIMDTOVECTOR((((1.0e6 * Charge_q) * EPSSI) * pParam->BSIM3v32ngate) / (model->BSIM3v32cox * model->BSIM3v32cox)), condmask_true0);
+      T4 = vec4_blend(T4, vec4_sqrt(1.0 + ((2.0 * (Vgs - T0)) / T1)), condmask_true0);
+      T2 = vec4_blend(T2, T1 * (T4 - 1.0), condmask_true0);
+      T3 = vec4_blend(T3, ((0.5 * T2) * T2) / T1, condmask_true0);
+      T7 = vec4_blend(T7, (1.12 - T3) - 0.05, condmask_true0);
+      T6 = vec4_blend(T6, vec4_sqrt((T7 * T7) + 0.224), condmask_true0);
+      T5 = vec4_blend(T5, 1.12 - (0.5 * (T7 + T6)), condmask_true0);
+      Vgs_eff = vec4_blend(Vgs_eff, Vgs - T5, condmask_true0);
+      dVgs_eff_dVg = vec4_blend(dVgs_eff_dVg, 1.0 - ((0.5 - (0.5 / T4)) * (1.0 + (T7 / T6))), condmask_true0);
+    }
+  }
+
+
+  Vgst = Vgs_eff - Vth;
+  T10 = (2.0 * n) * Vtm;
+  VgstNVt = Vgst / T10;
+  ExpArg = ((2.0 * pParam->BSIM3v32voff) - Vgst) / T10;
+  T0 = VgstNVt;
+  if (1)
+  {
+    Vec4m condmask0 = ExpArg > EXP_THRESHOLD;
+    Vec4m condmask_true0 = condmask0;
+    T0 = vec4_blend(T0, (Vgst - pParam->BSIM3v32voff) / (n * Vtm), condmask_true0);
+  }
+
+  ExpVgst = vec4_exp(T0);
+  if (1)
+  {
+    Vec4m condmask0 = VgstNVt > EXP_THRESHOLD;
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      Vgsteff = vec4_blend(Vgsteff, Vgst, condmask_true0);
+      dVgsteff_dVg = vec4_blend(dVgsteff_dVg, dVgs_eff_dVg, condmask_true0);
+      dVgsteff_dVd = vec4_blend(dVgsteff_dVd, -dVth_dVd, condmask_true0);
+      dVgsteff_dVb = vec4_blend(dVgsteff_dVb, -dVth_dVb, condmask_true0);
+    }
+    if (1)
+    {
+      Vec4m condmask1 = ExpArg > EXP_THRESHOLD;
+      Vec4m condmask_true1 = condmask_false0 & condmask1;
+      Vec4m condmask_false1 = condmask_false0 & (~condmask1);
+      {
+        Vgsteff = vec4_blend(Vgsteff, ((Vtm * pParam->BSIM3v32cdep0) / model->BSIM3v32cox) * ExpVgst, condmask_true1);
+        dVgsteff_dVg = vec4_blend(dVgsteff_dVg, Vgsteff / (n * Vtm), condmask_true1);
+        dVgsteff_dVd = vec4_blend(dVgsteff_dVd, (-dVgsteff_dVg) * (dVth_dVd + ((T0 * Vtm) * dn_dVd)), condmask_true1);
+        dVgsteff_dVb = vec4_blend(dVgsteff_dVb, (-dVgsteff_dVg) * (dVth_dVb + ((T0 * Vtm) * dn_dVb)), condmask_true1);
+        dVgsteff_dVg = vec4_blend(dVgsteff_dVg, dVgsteff_dVg * dVgs_eff_dVg, condmask_true1);
+      }
+      {
+        T1 = vec4_blend(T1, T10 * vec4_log(1.0 + ExpVgst), condmask_false1);
+        dT1_dVg = vec4_blend(dT1_dVg, ExpVgst / (1.0 + ExpVgst), condmask_false1);
+        dT1_dVb = vec4_blend(dT1_dVb, ((-dT1_dVg) * (dVth_dVb + ((Vgst / n) * dn_dVb))) + ((T1 / n) * dn_dVb), condmask_false1);
+        dT1_dVd = vec4_blend(dT1_dVd, ((-dT1_dVg) * (dVth_dVd + ((Vgst / n) * dn_dVd))) + ((T1 / n) * dn_dVd), condmask_false1);
+        dT2_dVg = vec4_blend(dT2_dVg, ((-model->BSIM3v32cox) / (Vtm * pParam->BSIM3v32cdep0)) * vec4_exp(ExpArg), condmask_false1);
+        T2 = vec4_blend(T2, 1.0 - (T10 * dT2_dVg), condmask_false1);
+        dT2_dVd = vec4_blend(dT2_dVd, ((-dT2_dVg) * (dVth_dVd - (((2.0 * Vtm) * ExpArg) * dn_dVd))) + (((T2 - 1.0) / n) * dn_dVd), condmask_false1);
+        dT2_dVb = vec4_blend(dT2_dVb, ((-dT2_dVg) * (dVth_dVb - (((2.0 * Vtm) * ExpArg) * dn_dVb))) + (((T2 - 1.0) / n) * dn_dVb), condmask_false1);
+        Vgsteff = vec4_blend(Vgsteff, T1 / T2, condmask_false1);
+        T3 = vec4_blend(T3, T2 * T2, condmask_false1);
+        dVgsteff_dVg = vec4_blend(dVgsteff_dVg, (((T2 * dT1_dVg) - (T1 * dT2_dVg)) / T3) * dVgs_eff_dVg, condmask_false1);
+        dVgsteff_dVd = vec4_blend(dVgsteff_dVd, ((T2 * dT1_dVd) - (T1 * dT2_dVd)) / T3, condmask_false1);
+        dVgsteff_dVb = vec4_blend(dVgsteff_dVb, ((T2 * dT1_dVb) - (T1 * dT2_dVb)) / T3, condmask_false1);
+      }
+    }
+
+  }
+
+  if (model->BSIM3v32intVersion > BSIM3v32V323)
+  {
+    {
+      heres[0]->BSIM3v32Vgsteff = Vgsteff[0];
+      heres[1]->BSIM3v32Vgsteff = Vgsteff[1];
+      heres[2]->BSIM3v32Vgsteff = Vgsteff[2];
+      heres[3]->BSIM3v32Vgsteff = Vgsteff[3];
+    }
+  }
+
+  T9 = sqrtPhis - pParam->BSIM3v32sqrtPhi;
+  Weff = pParam->BSIM3v32weff - (2.0 * ((pParam->BSIM3v32dwg * Vgsteff) + (pParam->BSIM3v32dwb * T9)));
+  dWeff_dVg = vec4_SIMDTOVECTOR((-2.0) * pParam->BSIM3v32dwg);
+  dWeff_dVb = ((-2.0) * pParam->BSIM3v32dwb) * dsqrtPhis_dVb;
+  if (1)
+  {
+    Vec4m condmask0 = Weff < 2.0e-8;
+    Vec4m condmask_true0 = condmask0;
+    {
+      T0 = vec4_blend(T0, 1.0 / (6.0e-8 - (2.0 * Weff)), condmask_true0);
+      Weff = vec4_blend(Weff, (2.0e-8 * (4.0e-8 - Weff)) * T0, condmask_true0);
+      T0 = vec4_blend(T0, T0 * (T0 * 4.0e-16), condmask_true0);
+      dWeff_dVg = vec4_blend(dWeff_dVg, dWeff_dVg * T0, condmask_true0);
+      dWeff_dVb = vec4_blend(dWeff_dVb, dWeff_dVb * T0, condmask_true0);
+    }
+  }
+
+  T0 = (pParam->BSIM3v32prwg * Vgsteff) + (pParam->BSIM3v32prwb * T9);
+  if (1)
+  {
+    Vec4m condmask0 = T0 >= (-0.9);
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      Rds = vec4_blend(Rds, pParam->BSIM3v32rds0 * (1.0 + T0), condmask_true0);
+      dRds_dVg = vec4_blend(dRds_dVg, vec4_SIMDTOVECTOR(pParam->BSIM3v32rds0 * pParam->BSIM3v32prwg), condmask_true0);
+      dRds_dVb = vec4_blend(dRds_dVb, (pParam->BSIM3v32rds0 * pParam->BSIM3v32prwb) * dsqrtPhis_dVb, condmask_true0);
+    }
+    {
+      T1 = vec4_blend(T1, 1.0 / (17.0 + (20.0 * T0)), condmask_false0);
+      Rds = vec4_blend(Rds, (pParam->BSIM3v32rds0 * (0.8 + T0)) * T1, condmask_false0);
+      T1 = vec4_blend(T1, T1 * T1, condmask_false0);
+      dRds_dVg = vec4_blend(dRds_dVg, (pParam->BSIM3v32rds0 * pParam->BSIM3v32prwg) * T1, condmask_false0);
+      dRds_dVb = vec4_blend(dRds_dVb, ((pParam->BSIM3v32rds0 * pParam->BSIM3v32prwb) * dsqrtPhis_dVb) * T1, condmask_false0);
+    }
+  }
+
+  if (model->BSIM3v32intVersion > BSIM3v32V323)
+  {
+    {
+      heres[0]->BSIM3v32rds = Rds[0];
+      heres[1]->BSIM3v32rds = Rds[1];
+      heres[2]->BSIM3v32rds = Rds[2];
+      heres[3]->BSIM3v32rds = Rds[3];
+    }
+  }
+
+  T1 = (0.5 * pParam->BSIM3v32k1ox) / sqrtPhis;
+  dT1_dVb = ((-T1) / sqrtPhis) * dsqrtPhis_dVb;
+  T9 = vec4_sqrt(pParam->BSIM3v32xj * Xdep);
+  tmp1 = Leff + (2.0 * T9);
+  T5 = Leff / tmp1;
+  tmp2 = pParam->BSIM3v32a0 * T5;
+  tmp3 = vec4_SIMDTOVECTOR(pParam->BSIM3v32weff + pParam->BSIM3v32b1);
+  tmp4 = pParam->BSIM3v32b0 / tmp3;
+  T2 = tmp2 + tmp4;
+  dT2_dVb = (((-T9) / tmp1) / Xdep) * dXdep_dVb;
+  T6 = T5 * T5;
+  T7 = T5 * T6;
+  Abulk0 = 1.0 + (T1 * T2);
+  dAbulk0_dVb = ((T1 * tmp2) * dT2_dVb) + (T2 * dT1_dVb);
+  T8 = (pParam->BSIM3v32ags * pParam->BSIM3v32a0) * T7;
+  dAbulk_dVg = (-T1) * T8;
+  Abulk = Abulk0 + (dAbulk_dVg * Vgsteff);
+  dAbulk_dVb = dAbulk0_dVb - ((T8 * Vgsteff) * (dT1_dVb + ((3.0 * T1) * dT2_dVb)));
+  if (1)
+  {
+    Vec4m condmask0 = Abulk0 < 0.1;
+    Vec4m condmask_true0 = condmask0;
+    {
+      T9 = vec4_blend(T9, 1.0 / (3.0 - (20.0 * Abulk0)), condmask_true0);
+      Abulk0 = vec4_blend(Abulk0, (0.2 - Abulk0) * T9, condmask_true0);
+      dAbulk0_dVb = vec4_blend(dAbulk0_dVb, dAbulk0_dVb * (T9 * T9), condmask_true0);
+    }
+  }
+
+  if (1)
+  {
+    Vec4m condmask0 = Abulk < 0.1;
+    Vec4m condmask_true0 = condmask0;
+    {
+      T9 = vec4_blend(T9, 1.0 / (3.0 - (20.0 * Abulk)), condmask_true0);
+      Abulk = vec4_blend(Abulk, (0.2 - Abulk) * T9, condmask_true0);
+      if (model->BSIM3v32intVersion > BSIM3v32V32)
+      {
+        T10 = vec4_blend(T10, T9 * T9, condmask_true0);
+        dAbulk_dVb = vec4_blend(dAbulk_dVb, dAbulk_dVb * T10, condmask_true0);
+        dAbulk_dVg = vec4_blend(dAbulk_dVg, dAbulk_dVg * T10, condmask_true0);
+      }
+      else
+      {
+        dAbulk_dVb = vec4_blend(dAbulk_dVb, dAbulk_dVb * (T9 * T9), condmask_true0);
+      }
+
+    }
+  }
+
+  if (model->BSIM3v32intVersion > BSIM3v32V323)
+  {
+    {
+      heres[0]->BSIM3v32Abulk = Abulk[0];
+      heres[1]->BSIM3v32Abulk = Abulk[1];
+      heres[2]->BSIM3v32Abulk = Abulk[2];
+      heres[3]->BSIM3v32Abulk = Abulk[3];
+    }
+  }
+
+  T2 = pParam->BSIM3v32keta * Vbseff;
+  if (1)
+  {
+    Vec4m condmask0 = T2 >= (-0.9);
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      T0 = vec4_blend(T0, 1.0 / (1.0 + T2), condmask_true0);
+      dT0_dVb = vec4_blend(dT0_dVb, ((-pParam->BSIM3v32keta) * T0) * T0, condmask_true0);
+    }
+    {
+      T1 = vec4_blend(T1, 1.0 / (0.8 + T2), condmask_false0);
+      T0 = vec4_blend(T0, (17.0 + (20.0 * T2)) * T1, condmask_false0);
+      dT0_dVb = vec4_blend(dT0_dVb, ((-pParam->BSIM3v32keta) * T1) * T1, condmask_false0);
+    }
+  }
+
+  dAbulk_dVg *= T0;
+  dAbulk_dVb = (dAbulk_dVb * T0) + (Abulk * dT0_dVb);
+  dAbulk0_dVb = (dAbulk0_dVb * T0) + (Abulk0 * dT0_dVb);
+  Abulk *= T0;
+  Abulk0 *= T0;
+  if (model->BSIM3v32mobMod == 1)
+  {
+    T0 = (Vgsteff + Vth) + Vth;
+    T2 = pParam->BSIM3v32ua + (pParam->BSIM3v32uc * Vbseff);
+    T3 = T0 / model->BSIM3v32tox;
+    T5 = T3 * (T2 + (pParam->BSIM3v32ub * T3));
+    dDenomi_dVg = (T2 + ((2.0 * pParam->BSIM3v32ub) * T3)) / model->BSIM3v32tox;
+    dDenomi_dVd = (dDenomi_dVg * 2.0) * dVth_dVd;
+    dDenomi_dVb = ((dDenomi_dVg * 2.0) * dVth_dVb) + (pParam->BSIM3v32uc * T3);
+  }
+  else
+    if (model->BSIM3v32mobMod == 2)
+  {
+    T5 = (Vgsteff / model->BSIM3v32tox) * ((pParam->BSIM3v32ua + (pParam->BSIM3v32uc * Vbseff)) + ((pParam->BSIM3v32ub * Vgsteff) / model->BSIM3v32tox));
+    dDenomi_dVg = ((pParam->BSIM3v32ua + (pParam->BSIM3v32uc * Vbseff)) + (((2.0 * pParam->BSIM3v32ub) * Vgsteff) / model->BSIM3v32tox)) / model->BSIM3v32tox;
+    dDenomi_dVd = vec4_SIMDTOVECTOR(0.0);
+    dDenomi_dVb = (Vgsteff * pParam->BSIM3v32uc) / model->BSIM3v32tox;
+  }
+  else
+  {
+    T0 = (Vgsteff + Vth) + Vth;
+    T2 = 1.0 + (pParam->BSIM3v32uc * Vbseff);
+    T3 = T0 / model->BSIM3v32tox;
+    T4 = T3 * (pParam->BSIM3v32ua + (pParam->BSIM3v32ub * T3));
+    T5 = T4 * T2;
+    dDenomi_dVg = ((pParam->BSIM3v32ua + ((2.0 * pParam->BSIM3v32ub) * T3)) * T2) / model->BSIM3v32tox;
+    dDenomi_dVd = (dDenomi_dVg * 2.0) * dVth_dVd;
+    dDenomi_dVb = ((dDenomi_dVg * 2.0) * dVth_dVb) + (pParam->BSIM3v32uc * T4);
+  }
+
+
+  if (1)
+  {
+    Vec4m condmask0 = T5 >= (-0.8);
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      Denomi = vec4_blend(Denomi, 1.0 + T5, condmask_true0);
+    }
+    {
+      T9 = vec4_blend(T9, 1.0 / (7.0 + (10.0 * T5)), condmask_false0);
+      Denomi = vec4_blend(Denomi, (0.6 + T5) * T9, condmask_false0);
+      T9 = vec4_blend(T9, T9 * T9, condmask_false0);
+      dDenomi_dVg = vec4_blend(dDenomi_dVg, dDenomi_dVg * T9, condmask_false0);
+      dDenomi_dVd = vec4_blend(dDenomi_dVd, dDenomi_dVd * T9, condmask_false0);
+      dDenomi_dVb = vec4_blend(dDenomi_dVb, dDenomi_dVb * T9, condmask_false0);
+    }
+  }
+
+  {
+    Vec4d val = ueff = ((Vec4d ){heres[0]->BSIM3v32u0temp, heres[1]->BSIM3v32u0temp, heres[2]->BSIM3v32u0temp, heres[3]->BSIM3v32u0temp}) / Denomi;
+    heres[0]->BSIM3v32ueff = val[0];
+    heres[1]->BSIM3v32ueff = val[1];
+    heres[2]->BSIM3v32ueff = val[2];
+    heres[3]->BSIM3v32ueff = val[3];
+  }
+  T9 = (-ueff) / Denomi;
+  dueff_dVg = T9 * dDenomi_dVg;
+  dueff_dVd = T9 * dDenomi_dVd;
+  dueff_dVb = T9 * dDenomi_dVb;
+  WVCox = (Weff * pParam->BSIM3v32vsattemp) * model->BSIM3v32cox;
+  WVCoxRds = WVCox * Rds;
+  Esat = (2.0 * pParam->BSIM3v32vsattemp) / ueff;
+  EsatL = Esat * Leff;
+  T0 = (-EsatL) / ueff;
+  dEsatL_dVg = T0 * dueff_dVg;
+  dEsatL_dVd = T0 * dueff_dVd;
+  dEsatL_dVb = T0 * dueff_dVb;
+  a1 = pParam->BSIM3v32a1;
+  if (a1 == 0.0)
+  {
+    Lambda = vec4_SIMDTOVECTOR(pParam->BSIM3v32a2);
+    dLambda_dVg = vec4_SIMDTOVECTOR(0.0);
+  }
+  else
+    if (a1 > 0.0)
+  {
+    T0 = vec4_SIMDTOVECTOR(1.0 - pParam->BSIM3v32a2);
+    T1 = (T0 - (pParam->BSIM3v32a1 * Vgsteff)) - 0.0001;
+    T2 = vec4_sqrt((T1 * T1) + (0.0004 * T0));
+    Lambda = (pParam->BSIM3v32a2 + T0) - (0.5 * (T1 + T2));
+    dLambda_dVg = (0.5 * pParam->BSIM3v32a1) * (1.0 + (T1 / T2));
+  }
+  else
+  {
+    T1 = (pParam->BSIM3v32a2 + (pParam->BSIM3v32a1 * Vgsteff)) - 0.0001;
+    T2 = vec4_sqrt((T1 * T1) + (0.0004 * pParam->BSIM3v32a2));
+    Lambda = 0.5 * (T1 + T2);
+    dLambda_dVg = (0.5 * pParam->BSIM3v32a1) * (1.0 + (T1 / T2));
+  }
+
+
+  Vgst2Vtm = Vgsteff + (2.0 * Vtm);
+  if (model->BSIM3v32intVersion > BSIM3v32V323)
+  {
+    {
+      Vec4d val = Abulk / Vgst2Vtm;
+      heres[0]->BSIM3v32AbovVgst2Vtm = val[0];
+      heres[1]->BSIM3v32AbovVgst2Vtm = val[1];
+      heres[2]->BSIM3v32AbovVgst2Vtm = val[2];
+      heres[3]->BSIM3v32AbovVgst2Vtm = val[3];
+    }
+  }
+
+  if (1)
+  {
+    Vec4m condmask0 = Rds > 0;
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      tmp2 = vec4_blend(tmp2, (dRds_dVg / Rds) + (dWeff_dVg / Weff), condmask_true0);
+      tmp3 = vec4_blend(tmp3, (dRds_dVb / Rds) + (dWeff_dVb / Weff), condmask_true0);
+    }
+    {
+      tmp2 = vec4_blend(tmp2, dWeff_dVg / Weff, condmask_false0);
+      tmp3 = vec4_blend(tmp3, dWeff_dVb / Weff, condmask_false0);
+    }
+  }
+
+  if (1)
+  {
+    Vec4m condmask0 = (Rds == 0.0) & (Lambda == 1.0);
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      T0 = vec4_blend(T0, 1.0 / ((Abulk * EsatL) + Vgst2Vtm), condmask_true0);
+      tmp1 = vec4_blend(tmp1, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+      T1 = vec4_blend(T1, T0 * T0, condmask_true0);
+      T2 = vec4_blend(T2, Vgst2Vtm * T0, condmask_true0);
+      T3 = vec4_blend(T3, EsatL * Vgst2Vtm, condmask_true0);
+      Vdsat = vec4_blend(Vdsat, T3 * T0, condmask_true0);
+      dT0_dVg = vec4_blend(dT0_dVg, (-(((Abulk * dEsatL_dVg) + (EsatL * dAbulk_dVg)) + 1.0)) * T1, condmask_true0);
+      dT0_dVd = vec4_blend(dT0_dVd, (-(Abulk * dEsatL_dVd)) * T1, condmask_true0);
+      dT0_dVb = vec4_blend(dT0_dVb, (-((Abulk * dEsatL_dVb) + (dAbulk_dVb * EsatL))) * T1, condmask_true0);
+      dVdsat_dVg = vec4_blend(dVdsat_dVg, ((T3 * dT0_dVg) + (T2 * dEsatL_dVg)) + (EsatL * T0), condmask_true0);
+      dVdsat_dVd = vec4_blend(dVdsat_dVd, (T3 * dT0_dVd) + (T2 * dEsatL_dVd), condmask_true0);
+      dVdsat_dVb = vec4_blend(dVdsat_dVb, (T3 * dT0_dVb) + (T2 * dEsatL_dVb), condmask_true0);
+    }
+    {
+      tmp1 = vec4_blend(tmp1, dLambda_dVg / (Lambda * Lambda), condmask_false0);
+      T9 = vec4_blend(T9, Abulk * WVCoxRds, condmask_false0);
+      T8 = vec4_blend(T8, Abulk * T9, condmask_false0);
+      T7 = vec4_blend(T7, Vgst2Vtm * T9, condmask_false0);
+      T6 = vec4_blend(T6, Vgst2Vtm * WVCoxRds, condmask_false0);
+      T0 = vec4_blend(T0, (2.0 * Abulk) * ((T9 - 1.0) + (1.0 / Lambda)), condmask_false0);
+      dT0_dVg = vec4_blend(dT0_dVg, 2.0 * (((T8 * tmp2) - (Abulk * tmp1)) + ((((2.0 * T9) + (1.0 / Lambda)) - 1.0) * dAbulk_dVg)), condmask_false0);
+      dT0_dVb = vec4_blend(dT0_dVb, 2.0 * ((T8 * (((2.0 / Abulk) * dAbulk_dVb) + tmp3)) + (((1.0 / Lambda) - 1.0) * dAbulk_dVb)), condmask_false0);
+      dT0_dVd = vec4_blend(dT0_dVd, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+      T1 = vec4_blend(T1, ((Vgst2Vtm * ((2.0 / Lambda) - 1.0)) + (Abulk * EsatL)) + (3.0 * T7), condmask_false0);
+      dT1_dVg = vec4_blend(dT1_dVg, (((((2.0 / Lambda) - 1.0) - ((2.0 * Vgst2Vtm) * tmp1)) + (Abulk * dEsatL_dVg)) + (EsatL * dAbulk_dVg)) + (3.0 * ((T9 + (T7 * tmp2)) + (T6 * dAbulk_dVg))), condmask_false0);
+      dT1_dVb = vec4_blend(dT1_dVb, ((Abulk * dEsatL_dVb) + (EsatL * dAbulk_dVb)) + (3.0 * ((T6 * dAbulk_dVb) + (T7 * tmp3))), condmask_false0);
+      dT1_dVd = vec4_blend(dT1_dVd, Abulk * dEsatL_dVd, condmask_false0);
+      T2 = vec4_blend(T2, Vgst2Vtm * (EsatL + (2.0 * T6)), condmask_false0);
+      dT2_dVg = vec4_blend(dT2_dVg, (EsatL + (Vgst2Vtm * dEsatL_dVg)) + (T6 * (4.0 + ((2.0 * Vgst2Vtm) * tmp2))), condmask_false0);
+      dT2_dVb = vec4_blend(dT2_dVb, Vgst2Vtm * (dEsatL_dVb + ((2.0 * T6) * tmp3)), condmask_false0);
+      dT2_dVd = vec4_blend(dT2_dVd, Vgst2Vtm * dEsatL_dVd, condmask_false0);
+      T3 = vec4_blend(T3, vec4_sqrt((T1 * T1) - ((2.0 * T0) * T2)), condmask_false0);
+      Vdsat = vec4_blend(Vdsat, (T1 - T3) / T0, condmask_false0);
+      dT3_dVg = vec4_blend(dT3_dVg, ((T1 * dT1_dVg) - (2.0 * ((T0 * dT2_dVg) + (T2 * dT0_dVg)))) / T3, condmask_false0);
+      dT3_dVd = vec4_blend(dT3_dVd, ((T1 * dT1_dVd) - (2.0 * ((T0 * dT2_dVd) + (T2 * dT0_dVd)))) / T3, condmask_false0);
+      dT3_dVb = vec4_blend(dT3_dVb, ((T1 * dT1_dVb) - (2.0 * ((T0 * dT2_dVb) + (T2 * dT0_dVb)))) / T3, condmask_false0);
+      dVdsat_dVg = vec4_blend(dVdsat_dVg, ((dT1_dVg - ((((T1 * dT1_dVg) - (dT0_dVg * T2)) - (T0 * dT2_dVg)) / T3)) - (Vdsat * dT0_dVg)) / T0, condmask_false0);
+      dVdsat_dVb = vec4_blend(dVdsat_dVb, ((dT1_dVb - ((((T1 * dT1_dVb) - (dT0_dVb * T2)) - (T0 * dT2_dVb)) / T3)) - (Vdsat * dT0_dVb)) / T0, condmask_false0);
+      dVdsat_dVd = vec4_blend(dVdsat_dVd, (dT1_dVd - (((T1 * dT1_dVd) - (T0 * dT2_dVd)) / T3)) / T0, condmask_false0);
+    }
+  }
+
+  {
+    heres[0]->BSIM3v32vdsat = Vdsat[0];
+    heres[1]->BSIM3v32vdsat = Vdsat[1];
+    heres[2]->BSIM3v32vdsat = Vdsat[2];
+    heres[3]->BSIM3v32vdsat = Vdsat[3];
+  }
+  T1 = (Vdsat - Vds) - pParam->BSIM3v32delta;
+  dT1_dVg = dVdsat_dVg;
+  dT1_dVd = dVdsat_dVd - 1.0;
+  dT1_dVb = dVdsat_dVb;
+  T2 = vec4_sqrt((T1 * T1) + ((4.0 * pParam->BSIM3v32delta) * Vdsat));
+  T0 = T1 / T2;
+  T3 = (2.0 * pParam->BSIM3v32delta) / T2;
+  dT2_dVg = (T0 * dT1_dVg) + (T3 * dVdsat_dVg);
+  dT2_dVd = (T0 * dT1_dVd) + (T3 * dVdsat_dVd);
+  dT2_dVb = (T0 * dT1_dVb) + (T3 * dVdsat_dVb);
+  Vdseff = Vdsat - (0.5 * (T1 + T2));
+  dVdseff_dVg = dVdsat_dVg - (0.5 * (dT1_dVg + dT2_dVg));
+  dVdseff_dVd = dVdsat_dVd - (0.5 * (dT1_dVd + dT2_dVd));
+  dVdseff_dVb = dVdsat_dVb - (0.5 * (dT1_dVb + dT2_dVb));
+  switch (model->BSIM3v32intVersion)
+  {
+    case BSIM3v32V324:
+
+    case BSIM3v32V323:
+
+    case BSIM3v32V322:
+      if (1)
+    {
+      Vec4m condmask0 = Vds == 0.0;
+      Vec4m condmask_true0 = condmask0;
+      {
+        Vdseff = vec4_blend(Vdseff, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+        dVdseff_dVg = vec4_blend(dVdseff_dVg, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+        dVdseff_dVb = vec4_blend(dVdseff_dVb, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+      }
+    }
+
+      break;
+
+    case BSIM3v32V32:
+
+    default:
+      break;
+
+  }
+
+  tmp4 = 1.0 - (((0.5 * Abulk) * Vdsat) / Vgst2Vtm);
+  T9 = WVCoxRds * Vgsteff;
+  T8 = T9 / Vgst2Vtm;
+  T0 = (EsatL + Vdsat) + ((2.0 * T9) * tmp4);
+  T7 = (2.0 * WVCoxRds) * tmp4;
+  dT0_dVg = ((dEsatL_dVg + dVdsat_dVg) + (T7 * (1.0 + (tmp2 * Vgsteff)))) - (T8 * (((Abulk * dVdsat_dVg) - ((Abulk * Vdsat) / Vgst2Vtm)) + (Vdsat * dAbulk_dVg)));
+  dT0_dVb = ((dEsatL_dVb + dVdsat_dVb) + ((T7 * tmp3) * Vgsteff)) - (T8 * ((dAbulk_dVb * Vdsat) + (Abulk * dVdsat_dVb)));
+  dT0_dVd = (dEsatL_dVd + dVdsat_dVd) - ((T8 * Abulk) * dVdsat_dVd);
+  T9 = WVCoxRds * Abulk;
+  T1 = ((2.0 / Lambda) - 1.0) + T9;
+  dT1_dVg = ((-2.0) * tmp1) + (WVCoxRds * ((Abulk * tmp2) + dAbulk_dVg));
+  dT1_dVb = (dAbulk_dVb * WVCoxRds) + (T9 * tmp3);
+  Vasat = T0 / T1;
+  dVasat_dVg = (dT0_dVg - (Vasat * dT1_dVg)) / T1;
+  dVasat_dVb = (dT0_dVb - (Vasat * dT1_dVb)) / T1;
+  dVasat_dVd = dT0_dVd / T1;
+  if (1)
+  {
+    Vec4m condmask0 = Vdseff > Vds;
+    Vec4m condmask_true0 = condmask0;
+    Vdseff = vec4_blend(Vdseff, Vds, condmask_true0);
+  }
+
+  diffVds = Vds - Vdseff;
+  if (model->BSIM3v32intVersion > BSIM3v32V323)
+  {
+    {
+      heres[0]->BSIM3v32Vdseff = Vdseff[0];
+      heres[1]->BSIM3v32Vdseff = Vdseff[1];
+      heres[2]->BSIM3v32Vdseff = Vdseff[2];
+      heres[3]->BSIM3v32Vdseff = Vdseff[3];
+    }
+  }
+
+  VACLM = vec4_SIMDTOVECTOR(MAX_EXP);
+  dVACLM_dVd = (dVACLM_dVg = (dVACLM_dVb = vec4_SIMDTOVECTOR(0.0)));
+  if (pParam->BSIM3v32pclm > 0.0)
+    if (1)
+  {
+    Vec4m condmask0 = diffVds > 1.0e-10;
+    Vec4m condmask_true0 = condmask0;
+    {
+      T0 = vec4_blend(T0, 1.0 / ((pParam->BSIM3v32pclm * Abulk) * pParam->BSIM3v32litl), condmask_true0);
+      dT0_dVb = vec4_blend(dT0_dVb, ((-T0) / Abulk) * dAbulk_dVb, condmask_true0);
+      dT0_dVg = vec4_blend(dT0_dVg, ((-T0) / Abulk) * dAbulk_dVg, condmask_true0);
+      T2 = vec4_blend(T2, Vgsteff / EsatL, condmask_true0);
+      T1 = vec4_blend(T1, Leff * (Abulk + T2), condmask_true0);
+      dT1_dVg = vec4_blend(dT1_dVg, Leff * (((1.0 - (T2 * dEsatL_dVg)) / EsatL) + dAbulk_dVg), condmask_true0);
+      dT1_dVb = vec4_blend(dT1_dVb, Leff * (dAbulk_dVb - ((T2 * dEsatL_dVb) / EsatL)), condmask_true0);
+      dT1_dVd = vec4_blend(dT1_dVd, ((-T2) * dEsatL_dVd) / Esat, condmask_true0);
+      T9 = vec4_blend(T9, T0 * T1, condmask_true0);
+      VACLM = vec4_blend(VACLM, T9 * diffVds, condmask_true0);
+      dVACLM_dVg = vec4_blend(dVACLM_dVg, (((T0 * dT1_dVg) * diffVds) - (T9 * dVdseff_dVg)) + ((T1 * diffVds) * dT0_dVg), condmask_true0);
+      dVACLM_dVb = vec4_blend(dVACLM_dVb, (((dT0_dVb * T1) + (T0 * dT1_dVb)) * diffVds) - (T9 * dVdseff_dVb), condmask_true0);
+      dVACLM_dVd = vec4_blend(dVACLM_dVd, ((T0 * dT1_dVd) * diffVds) + (T9 * (1.0 - dVdseff_dVd)), condmask_true0);
+    }
+  }
+
+
+  if (pParam->BSIM3v32thetaRout > 0.0)
+  {
+    T8 = Abulk * Vdsat;
+    T0 = Vgst2Vtm * T8;
+    dT0_dVg = (((Vgst2Vtm * Abulk) * dVdsat_dVg) + T8) + ((Vgst2Vtm * Vdsat) * dAbulk_dVg);
+    dT0_dVb = Vgst2Vtm * ((dAbulk_dVb * Vdsat) + (Abulk * dVdsat_dVb));
+    dT0_dVd = (Vgst2Vtm * Abulk) * dVdsat_dVd;
+    T1 = Vgst2Vtm + T8;
+    dT1_dVg = (1.0 + (Abulk * dVdsat_dVg)) + (Vdsat * dAbulk_dVg);
+    dT1_dVb = (Abulk * dVdsat_dVb) + (dAbulk_dVb * Vdsat);
+    dT1_dVd = Abulk * dVdsat_dVd;
+    T9 = T1 * T1;
+    T2 = vec4_SIMDTOVECTOR(pParam->BSIM3v32thetaRout);
+    VADIBL = (Vgst2Vtm - (T0 / T1)) / T2;
+    dVADIBL_dVg = ((1.0 - (dT0_dVg / T1)) + ((T0 * dT1_dVg) / T9)) / T2;
+    dVADIBL_dVb = (((-dT0_dVb) / T1) + ((T0 * dT1_dVb) / T9)) / T2;
+    dVADIBL_dVd = (((-dT0_dVd) / T1) + ((T0 * dT1_dVd) / T9)) / T2;
+    T7 = pParam->BSIM3v32pdiblb * Vbseff;
+    if (1)
+    {
+      Vec4m condmask0 = T7 >= (-0.9);
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        T3 = vec4_blend(T3, 1.0 / (1.0 + T7), condmask_true0);
+        VADIBL = vec4_blend(VADIBL, VADIBL * T3, condmask_true0);
+        dVADIBL_dVg = vec4_blend(dVADIBL_dVg, dVADIBL_dVg * T3, condmask_true0);
+        dVADIBL_dVb = vec4_blend(dVADIBL_dVb, (dVADIBL_dVb - (VADIBL * pParam->BSIM3v32pdiblb)) * T3, condmask_true0);
+        dVADIBL_dVd = vec4_blend(dVADIBL_dVd, dVADIBL_dVd * T3, condmask_true0);
+      }
+      {
+        T4 = vec4_blend(T4, 1.0 / (0.8 + T7), condmask_false0);
+        T3 = vec4_blend(T3, (17.0 + (20.0 * T7)) * T4, condmask_false0);
+        dVADIBL_dVg = vec4_blend(dVADIBL_dVg, dVADIBL_dVg * T3, condmask_false0);
+        dVADIBL_dVb = vec4_blend(dVADIBL_dVb, (dVADIBL_dVb * T3) - (((VADIBL * pParam->BSIM3v32pdiblb) * T4) * T4), condmask_false0);
+        dVADIBL_dVd = vec4_blend(dVADIBL_dVd, dVADIBL_dVd * T3, condmask_false0);
+        VADIBL = vec4_blend(VADIBL, VADIBL * T3, condmask_false0);
+      }
+    }
+
+  }
+  else
+  {
+    VADIBL = vec4_SIMDTOVECTOR(MAX_EXP);
+    dVADIBL_dVd = (dVADIBL_dVg = (dVADIBL_dVb = vec4_SIMDTOVECTOR(0.0)));
+  }
+
+  T8 = pParam->BSIM3v32pvag / EsatL;
+  T9 = T8 * Vgsteff;
+  if (1)
+  {
+    Vec4m condmask0 = T9 > (-0.9);
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      T0 = vec4_blend(T0, 1.0 + T9, condmask_true0);
+      dT0_dVg = vec4_blend(dT0_dVg, T8 * (1.0 - ((Vgsteff * dEsatL_dVg) / EsatL)), condmask_true0);
+      dT0_dVb = vec4_blend(dT0_dVb, ((-T9) * dEsatL_dVb) / EsatL, condmask_true0);
+      dT0_dVd = vec4_blend(dT0_dVd, ((-T9) * dEsatL_dVd) / EsatL, condmask_true0);
+    }
+    {
+      T1 = vec4_blend(T1, 1.0 / (17.0 + (20.0 * T9)), condmask_false0);
+      T0 = vec4_blend(T0, (0.8 + T9) * T1, condmask_false0);
+      T1 = vec4_blend(T1, T1 * T1, condmask_false0);
+      dT0_dVg = vec4_blend(dT0_dVg, (T8 * (1.0 - ((Vgsteff * dEsatL_dVg) / EsatL))) * T1, condmask_false0);
+      T9 = vec4_blend(T9, T9 * (T1 / EsatL), condmask_false0);
+      dT0_dVb = vec4_blend(dT0_dVb, (-T9) * dEsatL_dVb, condmask_false0);
+      dT0_dVd = vec4_blend(dT0_dVd, (-T9) * dEsatL_dVd, condmask_false0);
+    }
+  }
+
+  tmp1 = VACLM * VACLM;
+  tmp2 = VADIBL * VADIBL;
+  tmp3 = VACLM + VADIBL;
+  T1 = (VACLM * VADIBL) / tmp3;
+  tmp3 *= tmp3;
+  dT1_dVg = ((tmp1 * dVADIBL_dVg) + (tmp2 * dVACLM_dVg)) / tmp3;
+  dT1_dVd = ((tmp1 * dVADIBL_dVd) + (tmp2 * dVACLM_dVd)) / tmp3;
+  dT1_dVb = ((tmp1 * dVADIBL_dVb) + (tmp2 * dVACLM_dVb)) / tmp3;
+  Va = Vasat + (T0 * T1);
+  dVa_dVg = (dVasat_dVg + (T1 * dT0_dVg)) + (T0 * dT1_dVg);
+  dVa_dVd = (dVasat_dVd + (T1 * dT0_dVd)) + (T0 * dT1_dVd);
+  dVa_dVb = (dVasat_dVb + (T1 * dT0_dVb)) + (T0 * dT1_dVb);
+  dVASCBE_dVg = (dVASCBE_dVd = (dVASCBE_dVb = vec4_SIMDTOVECTOR(0.0)));
+  if (pParam->BSIM3v32pscbe2 > 0.0)
+  {
+    if (1)
+    {
+      Vec4m condmask0 = diffVds > ((pParam->BSIM3v32pscbe1 * pParam->BSIM3v32litl) / EXP_THRESHOLD);
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        T0 = vec4_blend(T0, (pParam->BSIM3v32pscbe1 * pParam->BSIM3v32litl) / diffVds, condmask_true0);
+        VASCBE = vec4_blend(VASCBE, (Leff * vec4_exp(T0)) / pParam->BSIM3v32pscbe2, condmask_true0);
+        T1 = vec4_blend(T1, (T0 * VASCBE) / diffVds, condmask_true0);
+        dVASCBE_dVg = vec4_blend(dVASCBE_dVg, T1 * dVdseff_dVg, condmask_true0);
+        dVASCBE_dVd = vec4_blend(dVASCBE_dVd, (-T1) * (1.0 - dVdseff_dVd), condmask_true0);
+        dVASCBE_dVb = vec4_blend(dVASCBE_dVb, T1 * dVdseff_dVb, condmask_true0);
+      }
+      {
+        VASCBE = vec4_blend(VASCBE, vec4_SIMDTOVECTOR((MAX_EXP * Leff) / pParam->BSIM3v32pscbe2), condmask_false0);
+      }
+    }
+
+  }
+  else
+  {
+    VASCBE = vec4_SIMDTOVECTOR(MAX_EXP);
+  }
+
+  CoxWovL = (model->BSIM3v32cox * Weff) / Leff;
+  beta = ueff * CoxWovL;
+  dbeta_dVg = (CoxWovL * dueff_dVg) + ((beta * dWeff_dVg) / Weff);
+  dbeta_dVd = CoxWovL * dueff_dVd;
+  dbeta_dVb = (CoxWovL * dueff_dVb) + ((beta * dWeff_dVb) / Weff);
+  T0 = 1.0 - (((0.5 * Abulk) * Vdseff) / Vgst2Vtm);
+  dT0_dVg = ((-0.5) * (((Abulk * dVdseff_dVg) - ((Abulk * Vdseff) / Vgst2Vtm)) + (Vdseff * dAbulk_dVg))) / Vgst2Vtm;
+  dT0_dVd = (((-0.5) * Abulk) * dVdseff_dVd) / Vgst2Vtm;
+  dT0_dVb = ((-0.5) * ((Abulk * dVdseff_dVb) + (dAbulk_dVb * Vdseff))) / Vgst2Vtm;
+  fgche1 = Vgsteff * T0;
+  dfgche1_dVg = (Vgsteff * dT0_dVg) + T0;
+  dfgche1_dVd = Vgsteff * dT0_dVd;
+  dfgche1_dVb = Vgsteff * dT0_dVb;
+  T9 = Vdseff / EsatL;
+  fgche2 = 1.0 + T9;
+  dfgche2_dVg = (dVdseff_dVg - (T9 * dEsatL_dVg)) / EsatL;
+  dfgche2_dVd = (dVdseff_dVd - (T9 * dEsatL_dVd)) / EsatL;
+  dfgche2_dVb = (dVdseff_dVb - (T9 * dEsatL_dVb)) / EsatL;
+  gche = (beta * fgche1) / fgche2;
+  dgche_dVg = (((beta * dfgche1_dVg) + (fgche1 * dbeta_dVg)) - (gche * dfgche2_dVg)) / fgche2;
+  dgche_dVd = (((beta * dfgche1_dVd) + (fgche1 * dbeta_dVd)) - (gche * dfgche2_dVd)) / fgche2;
+  dgche_dVb = (((beta * dfgche1_dVb) + (fgche1 * dbeta_dVb)) - (gche * dfgche2_dVb)) / fgche2;
+  T0 = 1.0 + (gche * Rds);
+  T9 = Vdseff / T0;
+  Idl = gche * T9;
+  dIdl_dVg = (((gche * dVdseff_dVg) + (T9 * dgche_dVg)) / T0) - (((Idl * gche) / T0) * dRds_dVg);
+  dIdl_dVd = ((gche * dVdseff_dVd) + (T9 * dgche_dVd)) / T0;
+  dIdl_dVb = (((gche * dVdseff_dVb) + (T9 * dgche_dVb)) - ((Idl * dRds_dVb) * gche)) / T0;
+  T9 = diffVds / Va;
+  T0 = 1.0 + T9;
+  Idsa = Idl * T0;
+  dIdsa_dVg = (T0 * dIdl_dVg) - ((Idl * (dVdseff_dVg + (T9 * dVa_dVg))) / Va);
+  dIdsa_dVd = (T0 * dIdl_dVd) + ((Idl * ((1.0 - dVdseff_dVd) - (T9 * dVa_dVd))) / Va);
+  dIdsa_dVb = (T0 * dIdl_dVb) - ((Idl * (dVdseff_dVb + (T9 * dVa_dVb))) / Va);
+  T9 = diffVds / VASCBE;
+  T0 = 1.0 + T9;
+  Ids = Idsa * T0;
+  Gm = (T0 * dIdsa_dVg) - ((Idsa * (dVdseff_dVg + (T9 * dVASCBE_dVg))) / VASCBE);
+  Gds = (T0 * dIdsa_dVd) + ((Idsa * ((1.0 - dVdseff_dVd) - (T9 * dVASCBE_dVd))) / VASCBE);
+  Gmb = (T0 * dIdsa_dVb) - ((Idsa * (dVdseff_dVb + (T9 * dVASCBE_dVb))) / VASCBE);
+  Gds += Gm * dVgsteff_dVd;
+  Gmb += Gm * dVgsteff_dVb;
+  Gm *= dVgsteff_dVg;
+  Gmb *= dVbseff_dVb;
+  tmpuni = pParam->BSIM3v32alpha0 + (pParam->BSIM3v32alpha1 * Leff);
+  if ((tmpuni <= 0.0) || (pParam->BSIM3v32beta0 <= 0.0))
+  {
+    Isub = (Gbd = (Gbb = (Gbg = vec4_SIMDTOVECTOR(0.0))));
+  }
+  else
+  {
+    T2 = vec4_SIMDTOVECTOR(tmpuni / Leff);
+    if (1)
+    {
+      Vec4m condmask0 = diffVds > (pParam->BSIM3v32beta0 / EXP_THRESHOLD);
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        T0 = vec4_blend(T0, (-pParam->BSIM3v32beta0) / diffVds, condmask_true0);
+        T1 = vec4_blend(T1, (T2 * diffVds) * vec4_exp(T0), condmask_true0);
+        T3 = vec4_blend(T3, (T1 / diffVds) * (T0 - 1.0), condmask_true0);
+        dT1_dVg = vec4_blend(dT1_dVg, T3 * dVdseff_dVg, condmask_true0);
+        dT1_dVd = vec4_blend(dT1_dVd, T3 * (dVdseff_dVd - 1.0), condmask_true0);
+        dT1_dVb = vec4_blend(dT1_dVb, T3 * dVdseff_dVb, condmask_true0);
+      }
+      {
+        T3 = vec4_blend(T3, T2 * MIN_EXP, condmask_false0);
+        T1 = vec4_blend(T1, T3 * diffVds, condmask_false0);
+        dT1_dVg = vec4_blend(dT1_dVg, (-T3) * dVdseff_dVg, condmask_false0);
+        dT1_dVd = vec4_blend(dT1_dVd, T3 * (1.0 - dVdseff_dVd), condmask_false0);
+        dT1_dVb = vec4_blend(dT1_dVb, (-T3) * dVdseff_dVb, condmask_false0);
+      }
+    }
+
+    Isub = T1 * Idsa;
+    Gbg = (T1 * dIdsa_dVg) + (Idsa * dT1_dVg);
+    Gbd = (T1 * dIdsa_dVd) + (Idsa * dT1_dVd);
+    Gbb = (T1 * dIdsa_dVb) + (Idsa * dT1_dVb);
+    Gbd += Gbg * dVgsteff_dVd;
+    Gbb += Gbg * dVgsteff_dVb;
+    Gbg *= dVgsteff_dVg;
+    Gbb *= dVbseff_dVb;
+  }
+
+  cdrain = Ids;
+  {
+    heres[0]->BSIM3v32gds = Gds[0];
+    heres[1]->BSIM3v32gds = Gds[1];
+    heres[2]->BSIM3v32gds = Gds[2];
+    heres[3]->BSIM3v32gds = Gds[3];
+  }
+  {
+    heres[0]->BSIM3v32gm = Gm[0];
+    heres[1]->BSIM3v32gm = Gm[1];
+    heres[2]->BSIM3v32gm = Gm[2];
+    heres[3]->BSIM3v32gm = Gm[3];
+  }
+  {
+    heres[0]->BSIM3v32gmbs = Gmb[0];
+    heres[1]->BSIM3v32gmbs = Gmb[1];
+    heres[2]->BSIM3v32gmbs = Gmb[2];
+    heres[3]->BSIM3v32gmbs = Gmb[3];
+  }
+  {
+    heres[0]->BSIM3v32gbbs = Gbb[0];
+    heres[1]->BSIM3v32gbbs = Gbb[1];
+    heres[2]->BSIM3v32gbbs = Gbb[2];
+    heres[3]->BSIM3v32gbbs = Gbb[3];
+  }
+  {
+    heres[0]->BSIM3v32gbgs = Gbg[0];
+    heres[1]->BSIM3v32gbgs = Gbg[1];
+    heres[2]->BSIM3v32gbgs = Gbg[2];
+    heres[3]->BSIM3v32gbgs = Gbg[3];
+  }
+  {
+    heres[0]->BSIM3v32gbds = Gbd[0];
+    heres[1]->BSIM3v32gbds = Gbd[1];
+    heres[2]->BSIM3v32gbds = Gbd[2];
+    heres[3]->BSIM3v32gbds = Gbd[3];
+  }
+  {
+    heres[0]->BSIM3v32csub = Isub[0];
+    heres[1]->BSIM3v32csub = Isub[1];
+    heres[2]->BSIM3v32csub = Isub[2];
+    heres[3]->BSIM3v32csub = Isub[3];
+  }
+  CoxWL = (model->BSIM3v32cox * pParam->BSIM3v32weffCV) * pParam->BSIM3v32leffCV;
+  One_Third_CoxWL = CoxWL / 3.0;
+  Two_Third_CoxWL = 2.0 * One_Third_CoxWL;
+  if ((model->BSIM3v32xpart < 0) | (!ChargeComputationNeeded))
+  {
+    qgate = vec4_SIMDTOVECTOR(0.0);
+    qdrn = vec4_SIMDTOVECTOR(0.0);
+    qsrc = vec4_SIMDTOVECTOR(0.0);
+    qbulk = vec4_SIMDTOVECTOR(0.0);
+    {
+      heres[0]->BSIM3v32cggb = 0.0;
+      heres[1]->BSIM3v32cggb = 0.0;
+      heres[2]->BSIM3v32cggb = 0.0;
+      heres[3]->BSIM3v32cggb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cgsb = 0.0;
+      heres[1]->BSIM3v32cgsb = 0.0;
+      heres[2]->BSIM3v32cgsb = 0.0;
+      heres[3]->BSIM3v32cgsb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cgdb = 0.0;
+      heres[1]->BSIM3v32cgdb = 0.0;
+      heres[2]->BSIM3v32cgdb = 0.0;
+      heres[3]->BSIM3v32cgdb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cdgb = 0.0;
+      heres[1]->BSIM3v32cdgb = 0.0;
+      heres[2]->BSIM3v32cdgb = 0.0;
+      heres[3]->BSIM3v32cdgb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cdsb = 0.0;
+      heres[1]->BSIM3v32cdsb = 0.0;
+      heres[2]->BSIM3v32cdsb = 0.0;
+      heres[3]->BSIM3v32cdsb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cddb = 0.0;
+      heres[1]->BSIM3v32cddb = 0.0;
+      heres[2]->BSIM3v32cddb = 0.0;
+      heres[3]->BSIM3v32cddb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cbgb = 0.0;
+      heres[1]->BSIM3v32cbgb = 0.0;
+      heres[2]->BSIM3v32cbgb = 0.0;
+      heres[3]->BSIM3v32cbgb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cbsb = 0.0;
+      heres[1]->BSIM3v32cbsb = 0.0;
+      heres[2]->BSIM3v32cbsb = 0.0;
+      heres[3]->BSIM3v32cbsb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cbdb = 0.0;
+      heres[1]->BSIM3v32cbdb = 0.0;
+      heres[2]->BSIM3v32cbdb = 0.0;
+      heres[3]->BSIM3v32cbdb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cqdb = 0.0;
+      heres[1]->BSIM3v32cqdb = 0.0;
+      heres[2]->BSIM3v32cqdb = 0.0;
+      heres[3]->BSIM3v32cqdb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cqsb = 0.0;
+      heres[1]->BSIM3v32cqsb = 0.0;
+      heres[2]->BSIM3v32cqsb = 0.0;
+      heres[3]->BSIM3v32cqsb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cqgb = 0.0;
+      heres[1]->BSIM3v32cqgb = 0.0;
+      heres[2]->BSIM3v32cqgb = 0.0;
+      heres[3]->BSIM3v32cqgb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cqbb = 0.0;
+      heres[1]->BSIM3v32cqbb = 0.0;
+      heres[2]->BSIM3v32cqbb = 0.0;
+      heres[3]->BSIM3v32cqbb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32gtau = 0.0;
+      heres[1]->BSIM3v32gtau = 0.0;
+      heres[2]->BSIM3v32gtau = 0.0;
+      heres[3]->BSIM3v32gtau = 0.0;
+    }
+    goto finished;
+  }
+  else
+    if (model->BSIM3v32capMod == 0)
+  {
+    if (1)
+    {
+      Vec4m condmask0 = Vbseff < 0.0;
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        Vbseff = vec4_blend(Vbseff, Vbs, condmask_true0);
+        dVbseff_dVb = vec4_blend(dVbseff_dVb, vec4_SIMDTOVECTOR(1.0), condmask_true0);
+      }
+      {
+        Vbseff = vec4_blend(Vbseff, pParam->BSIM3v32phi - Phis, condmask_false0);
+        dVbseff_dVb = vec4_blend(dVbseff_dVb, -dPhis_dVb, condmask_false0);
+      }
+    }
+
+    Vfb = vec4_SIMDTOVECTOR(pParam->BSIM3v32vfbcv);
+    Vth = (Vfb + pParam->BSIM3v32phi) + (pParam->BSIM3v32k1ox * sqrtPhis);
+    Vgst = Vgs_eff - Vth;
+    dVth_dVb = pParam->BSIM3v32k1ox * dsqrtPhis_dVb;
+    dVgst_dVb = -dVth_dVb;
+    dVgst_dVg = dVgs_eff_dVg;
+    Arg1 = (Vgs_eff - Vbseff) - Vfb;
+    if (1)
+    {
+      Vec4m condmask0 = Arg1 <= 0.0;
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        qgate = vec4_blend(qgate, CoxWL * Arg1, condmask_true0);
+        qbulk = vec4_blend(qbulk, -qgate, condmask_true0);
+        qdrn = vec4_blend(qdrn, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+        {
+          Vec4d val = CoxWL * dVgs_eff_dVg;
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cggb = val[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cggb = val[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cggb = val[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cggb = val[3];
+
+        }
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cgdb = 0.0;
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cgdb = 0.0;
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cgdb = 0.0;
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cgdb = 0.0;
+
+        }
+        {
+          Vec4d val = CoxWL * (dVbseff_dVb - dVgs_eff_dVg);
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cgsb = val[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cgsb = val[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cgsb = val[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cgsb = val[3];
+
+        }
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cdgb = 0.0;
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cdgb = 0.0;
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cdgb = 0.0;
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cdgb = 0.0;
+
+        }
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cddb = 0.0;
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cddb = 0.0;
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cddb = 0.0;
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cddb = 0.0;
+
+        }
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cdsb = 0.0;
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cdsb = 0.0;
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cdsb = 0.0;
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cdsb = 0.0;
+
+        }
+        {
+          Vec4d val = (-CoxWL) * dVgs_eff_dVg;
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cbgb = val[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cbgb = val[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cbgb = val[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cbgb = val[3];
+
+        }
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cbdb = 0.0;
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cbdb = 0.0;
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cbdb = 0.0;
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cbdb = 0.0;
+
+        }
+        {
+          Vec4d val = -((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb});
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cbsb = val[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cbsb = val[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cbsb = val[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cbsb = val[3];
+
+        }
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32qinv = 0.0;
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32qinv = 0.0;
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32qinv = 0.0;
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32qinv = 0.0;
+
+        }
+      }
+      if (1)
+      {
+        Vec4m condmask1 = Vgst <= 0.0;
+        Vec4m condmask_true1 = condmask_false0 & condmask1;
+        Vec4m condmask_false1 = condmask_false0 & (~condmask1);
+        {
+          T1 = vec4_blend(T1, vec4_SIMDTOVECTOR(0.5 * pParam->BSIM3v32k1ox), condmask_true1);
+          T2 = vec4_blend(T2, vec4_sqrt((T1 * T1) + Arg1), condmask_true1);
+          qgate = vec4_blend(qgate, (CoxWL * pParam->BSIM3v32k1ox) * (T2 - T1), condmask_true1);
+          qbulk = vec4_blend(qbulk, -qgate, condmask_true1);
+          qdrn = vec4_blend(qdrn, vec4_SIMDTOVECTOR(0.0), condmask_true1);
+          T0 = vec4_blend(T0, (CoxWL * T1) / T2, condmask_true1);
+          {
+            Vec4d val = T0 * dVgs_eff_dVg;
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cggb = val[0];
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cggb = val[1];
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cggb = val[2];
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cggb = val[3];
+
+          }
+          {
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cgdb = 0.0;
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cgdb = 0.0;
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cgdb = 0.0;
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cgdb = 0.0;
+
+          }
+          {
+            Vec4d val = T0 * (dVbseff_dVb - dVgs_eff_dVg);
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cgsb = val[0];
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cgsb = val[1];
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cgsb = val[2];
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cgsb = val[3];
+
+          }
+          {
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cdgb = 0.0;
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cdgb = 0.0;
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cdgb = 0.0;
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cdgb = 0.0;
+
+          }
+          {
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cddb = 0.0;
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cddb = 0.0;
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cddb = 0.0;
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cddb = 0.0;
+
+          }
+          {
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cdsb = 0.0;
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cdsb = 0.0;
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cdsb = 0.0;
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cdsb = 0.0;
+
+          }
+          {
+            Vec4d val = -((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb});
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cbgb = val[0];
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cbgb = val[1];
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cbgb = val[2];
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cbgb = val[3];
+
+          }
+          {
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cbdb = 0.0;
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cbdb = 0.0;
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cbdb = 0.0;
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cbdb = 0.0;
+
+          }
+          {
+            Vec4d val = -((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb});
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cbsb = val[0];
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cbsb = val[1];
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cbsb = val[2];
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cbsb = val[3];
+
+          }
+          {
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32qinv = 0.0;
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32qinv = 0.0;
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32qinv = 0.0;
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32qinv = 0.0;
+
+          }
+        }
+        {
+          AbulkCV = vec4_blend(AbulkCV, Abulk0 * pParam->BSIM3v32abulkCVfactor, condmask_false1);
+          dAbulkCV_dVb = vec4_blend(dAbulkCV_dVb, pParam->BSIM3v32abulkCVfactor * dAbulk0_dVb, condmask_false1);
+          Vdsat = vec4_blend(Vdsat, Vgst / AbulkCV, condmask_false1);
+          dVdsat_dVg = vec4_blend(dVdsat_dVg, dVgs_eff_dVg / AbulkCV, condmask_false1);
+          dVdsat_dVb = vec4_blend(dVdsat_dVb, (-((Vdsat * dAbulkCV_dVb) + dVth_dVb)) / AbulkCV, condmask_false1);
+          if (model->BSIM3v32xpart > 0.5)
+          {
+            if (1)
+            {
+              Vec4m condmask2 = Vdsat <= Vds;
+              Vec4m condmask_true2 = condmask_false1 & condmask2;
+              Vec4m condmask_false2 = condmask_false1 & (~condmask2);
+              {
+                T1 = vec4_blend(T1, Vdsat / 3.0, condmask_true2);
+                qgate = vec4_blend(qgate, CoxWL * (((Vgs_eff - Vfb) - pParam->BSIM3v32phi) - T1), condmask_true2);
+                T2 = vec4_blend(T2, (-Two_Third_CoxWL) * Vgst, condmask_true2);
+                qbulk = vec4_blend(qbulk, -(qgate + T2), condmask_true2);
+                qdrn = vec4_blend(qdrn, vec4_SIMDTOVECTOR(0.0), condmask_true2);
+                {
+                  Vec4d val = (One_Third_CoxWL * (3.0 - dVdsat_dVg)) * dVgs_eff_dVg;
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cggb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cggb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cggb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cggb = val[3];
+
+                }
+                T2 = vec4_blend(T2, (-One_Third_CoxWL) * dVdsat_dVb, condmask_true2);
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + T2);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cgsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cgsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cgsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cgsb = val[3];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cgdb = 0.0;
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cdgb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cdgb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cdgb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cdgb = 0.0;
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cddb = 0.0;
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cdsb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cdsb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cdsb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cdsb = 0.0;
+
+                }
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) - (Two_Third_CoxWL * dVgs_eff_dVg));
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbgb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbgb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbgb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbgb = val[3];
+
+                }
+                T3 = vec4_blend(T3, -(T2 + (Two_Third_CoxWL * dVth_dVb)), condmask_true2);
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb}) + T3);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbsb = val[3];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbdb = 0.0;
+
+                }
+                {
+                  Vec4d val = -(qgate + qbulk);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32qinv = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32qinv = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32qinv = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32qinv = val[3];
+
+                }
+              }
+              {
+                Alphaz = vec4_blend(Alphaz, Vgst / Vdsat, condmask_false2);
+                T1 = vec4_blend(T1, (2.0 * Vdsat) - Vds, condmask_false2);
+                T2 = vec4_blend(T2, Vds / (3.0 * T1), condmask_false2);
+                T3 = vec4_blend(T3, T2 * Vds, condmask_false2);
+                T9 = vec4_blend(T9, vec4_SIMDTOVECTOR(0.25 * CoxWL), condmask_false2);
+                T4 = vec4_blend(T4, T9 * Alphaz, condmask_false2);
+                T7 = vec4_blend(T7, ((2.0 * Vds) - T1) - (3.0 * T3), condmask_false2);
+                T8 = vec4_blend(T8, (T3 - T1) - (2.0 * Vds), condmask_false2);
+                qgate = vec4_blend(qgate, CoxWL * (((Vgs_eff - Vfb) - pParam->BSIM3v32phi) - (0.5 * (Vds - T3))), condmask_false2);
+                T10 = vec4_blend(T10, T4 * T8, condmask_false2);
+                qdrn = vec4_blend(qdrn, T4 * T7, condmask_false2);
+                qbulk = vec4_blend(qbulk, -((qgate + qdrn) + T10), condmask_false2);
+                T5 = vec4_blend(T5, T3 / T1, condmask_false2);
+                {
+                  Vec4d val = (CoxWL * (1.0 - (T5 * dVdsat_dVg))) * dVgs_eff_dVg;
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cggb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cggb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cggb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cggb = val[3];
+
+                }
+                T11 = vec4_blend(T11, ((-CoxWL) * T5) * dVdsat_dVb, condmask_false2);
+                {
+                  Vec4d val = CoxWL * ((T2 - 0.5) + (0.5 * T5));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cgdb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cgdb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cgdb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cgdb = val[3];
+
+                }
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + T11) + ((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cgsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cgsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cgsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cgsb = val[3];
+
+                }
+                T6 = vec4_blend(T6, 1.0 / Vdsat, condmask_false2);
+                dAlphaz_dVg = vec4_blend(dAlphaz_dVg, T6 * (1.0 - (Alphaz * dVdsat_dVg)), condmask_false2);
+                dAlphaz_dVb = vec4_blend(dAlphaz_dVb, (-T6) * (dVth_dVb + (Alphaz * dVdsat_dVb)), condmask_false2);
+                T7 = vec4_blend(T7, T9 * T7, condmask_false2);
+                T8 = vec4_blend(T8, T9 * T8, condmask_false2);
+                T9 = vec4_blend(T9, (2.0 * T4) * (1.0 - (3.0 * T5)), condmask_false2);
+                {
+                  Vec4d val = ((T7 * dAlphaz_dVg) - (T9 * dVdsat_dVg)) * dVgs_eff_dVg;
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cdgb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cdgb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cdgb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cdgb = val[3];
+
+                }
+                T12 = vec4_blend(T12, (T7 * dAlphaz_dVb) - (T9 * dVdsat_dVb), condmask_false2);
+                {
+                  Vec4d val = T4 * ((3.0 - (6.0 * T2)) - (3.0 * T5));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cddb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cddb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cddb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cddb = val[3];
+
+                }
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb}) + T12) + ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb}));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cdsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cdsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cdsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cdsb = val[3];
+
+                }
+                T9 = vec4_blend(T9, (2.0 * T4) * (1.0 + T5), condmask_false2);
+                T10 = vec4_blend(T10, ((T8 * dAlphaz_dVg) - (T9 * dVdsat_dVg)) * dVgs_eff_dVg, condmask_false2);
+                T11 = vec4_blend(T11, (T8 * dAlphaz_dVb) - (T9 * dVdsat_dVb), condmask_false2);
+                T12 = vec4_blend(T12, T4 * (((2.0 * T2) + T5) - 1.0), condmask_false2);
+                T0 = vec4_blend(T0, -((T10 + T11) + T12), condmask_false2);
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + ((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb})) + T10);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbgb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbgb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbgb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbgb = val[3];
+
+                }
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}) + ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb})) + T12);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbdb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbdb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbdb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbdb = val[3];
+
+                }
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb}) + ((Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb})) + T0);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbsb = val[3];
+
+                }
+                {
+                  Vec4d val = -(qgate + qbulk);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32qinv = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32qinv = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32qinv = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32qinv = val[3];
+
+                }
+              }
+            }
+
+          }
+          else
+            if (model->BSIM3v32xpart < 0.5)
+          {
+            if (1)
+            {
+              Vec4m condmask2 = Vds >= Vdsat;
+              Vec4m condmask_true2 = condmask_false1 & condmask2;
+              Vec4m condmask_false2 = condmask_false1 & (~condmask2);
+              {
+                T1 = vec4_blend(T1, Vdsat / 3.0, condmask_true2);
+                qgate = vec4_blend(qgate, CoxWL * (((Vgs_eff - Vfb) - pParam->BSIM3v32phi) - T1), condmask_true2);
+                T2 = vec4_blend(T2, (-Two_Third_CoxWL) * Vgst, condmask_true2);
+                qbulk = vec4_blend(qbulk, -(qgate + T2), condmask_true2);
+                qdrn = vec4_blend(qdrn, 0.4 * T2, condmask_true2);
+                {
+                  Vec4d val = (One_Third_CoxWL * (3.0 - dVdsat_dVg)) * dVgs_eff_dVg;
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cggb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cggb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cggb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cggb = val[3];
+
+                }
+                T2 = vec4_blend(T2, (-One_Third_CoxWL) * dVdsat_dVb, condmask_true2);
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + T2);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cgsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cgsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cgsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cgsb = val[3];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cgdb = 0.0;
+
+                }
+                T3 = vec4_blend(T3, vec4_SIMDTOVECTOR(0.4 * Two_Third_CoxWL), condmask_true2);
+                {
+                  Vec4d val = (-T3) * dVgs_eff_dVg;
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cdgb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cdgb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cdgb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cdgb = val[3];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cddb = 0.0;
+
+                }
+                T4 = vec4_blend(T4, T3 * dVth_dVb, condmask_true2);
+                {
+                  Vec4d val = -(T4 + ((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb}));
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cdsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cdsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cdsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cdsb = val[3];
+
+                }
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) - (Two_Third_CoxWL * dVgs_eff_dVg));
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbgb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbgb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbgb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbgb = val[3];
+
+                }
+                T3 = vec4_blend(T3, -(T2 + (Two_Third_CoxWL * dVth_dVb)), condmask_true2);
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb}) + T3);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbsb = val[3];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbdb = 0.0;
+
+                }
+                {
+                  Vec4d val = -(qgate + qbulk);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32qinv = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32qinv = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32qinv = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32qinv = val[3];
+
+                }
+              }
+              {
+                Alphaz = vec4_blend(Alphaz, Vgst / Vdsat, condmask_false2);
+                T1 = vec4_blend(T1, (2.0 * Vdsat) - Vds, condmask_false2);
+                T2 = vec4_blend(T2, Vds / (3.0 * T1), condmask_false2);
+                T3 = vec4_blend(T3, T2 * Vds, condmask_false2);
+                T9 = vec4_blend(T9, vec4_SIMDTOVECTOR(0.25 * CoxWL), condmask_false2);
+                T4 = vec4_blend(T4, T9 * Alphaz, condmask_false2);
+                qgate = vec4_blend(qgate, CoxWL * (((Vgs_eff - Vfb) - pParam->BSIM3v32phi) - (0.5 * (Vds - T3))), condmask_false2);
+                T5 = vec4_blend(T5, T3 / T1, condmask_false2);
+                {
+                  Vec4d val = (CoxWL * (1.0 - (T5 * dVdsat_dVg))) * dVgs_eff_dVg;
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cggb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cggb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cggb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cggb = val[3];
+
+                }
+                tmp = vec4_blend(tmp, ((-CoxWL) * T5) * dVdsat_dVb, condmask_false2);
+                {
+                  Vec4d val = CoxWL * ((T2 - 0.5) + (0.5 * T5));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cgdb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cgdb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cgdb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cgdb = val[3];
+
+                }
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + ((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb})) + tmp);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cgsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cgsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cgsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cgsb = val[3];
+
+                }
+                T6 = vec4_blend(T6, 1.0 / Vdsat, condmask_false2);
+                dAlphaz_dVg = vec4_blend(dAlphaz_dVg, T6 * (1.0 - (Alphaz * dVdsat_dVg)), condmask_false2);
+                dAlphaz_dVb = vec4_blend(dAlphaz_dVb, (-T6) * (dVth_dVb + (Alphaz * dVdsat_dVb)), condmask_false2);
+                T6 = vec4_blend(T6, (((8.0 * Vdsat) * Vdsat) - ((6.0 * Vdsat) * Vds)) + ((1.2 * Vds) * Vds), condmask_false2);
+                T8 = vec4_blend(T8, T2 / T1, condmask_false2);
+                T7 = vec4_blend(T7, (Vds - T1) - (T8 * T6), condmask_false2);
+                qdrn = vec4_blend(qdrn, T4 * T7, condmask_false2);
+                T7 = vec4_blend(T7, T7 * T9, condmask_false2);
+                tmp = vec4_blend(tmp, T8 / T1, condmask_false2);
+                tmp1 = vec4_blend(tmp1, T4 * ((2.0 - ((4.0 * tmp) * T6)) + (T8 * ((16.0 * Vdsat) - (6.0 * Vds)))), condmask_false2);
+                {
+                  Vec4d val = ((T7 * dAlphaz_dVg) - (tmp1 * dVdsat_dVg)) * dVgs_eff_dVg;
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cdgb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cdgb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cdgb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cdgb = val[3];
+
+                }
+                T10 = vec4_blend(T10, (T7 * dAlphaz_dVb) - (tmp1 * dVdsat_dVb), condmask_false2);
+                {
+                  Vec4d val = T4 * ((2.0 - (((1.0 / ((3.0 * T1) * T1)) + (2.0 * tmp)) * T6)) + (T8 * ((6.0 * Vdsat) - (2.4 * Vds))));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cddb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cddb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cddb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cddb = val[3];
+
+                }
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb}) + T10) + ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb}));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cdsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cdsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cdsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cdsb = val[3];
+
+                }
+                T7 = vec4_blend(T7, 2.0 * (T1 + T3), condmask_false2);
+                qbulk = vec4_blend(qbulk, -(qgate - (T4 * T7)), condmask_false2);
+                T7 = vec4_blend(T7, T7 * T9, condmask_false2);
+                T0 = vec4_blend(T0, (4.0 * T4) * (1.0 - T5), condmask_false2);
+                T12 = vec4_blend(T12, ((((-T7) * dAlphaz_dVg) - ((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb})) - (T0 * dVdsat_dVg)) * dVgs_eff_dVg, condmask_false2);
+                T11 = vec4_blend(T11, (((-T7) * dAlphaz_dVb) - T10) - (T0 * dVdsat_dVb), condmask_false2);
+                T10 = vec4_blend(T10, (((-4.0) * T4) * ((T2 - 0.5) + (0.5 * T5))) - ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb}), condmask_false2);
+                tmp = vec4_blend(tmp, -((T10 + T11) + T12), condmask_false2);
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + ((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb})) + T12);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbgb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbgb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbgb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbgb = val[3];
+
+                }
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}) + ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb})) + T10);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbdb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbdb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbdb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbdb = val[3];
+
+                }
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb}) + ((Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb})) + tmp);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbsb = val[3];
+
+                }
+                {
+                  Vec4d val = -(qgate + qbulk);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32qinv = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32qinv = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32qinv = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32qinv = val[3];
+
+                }
+              }
+            }
+
+          }
+          else
+          {
+            if (1)
+            {
+              Vec4m condmask2 = Vds >= Vdsat;
+              Vec4m condmask_true2 = condmask_false1 & condmask2;
+              Vec4m condmask_false2 = condmask_false1 & (~condmask2);
+              {
+                T1 = vec4_blend(T1, Vdsat / 3.0, condmask_true2);
+                qgate = vec4_blend(qgate, CoxWL * (((Vgs_eff - Vfb) - pParam->BSIM3v32phi) - T1), condmask_true2);
+                T2 = vec4_blend(T2, (-Two_Third_CoxWL) * Vgst, condmask_true2);
+                qbulk = vec4_blend(qbulk, -(qgate + T2), condmask_true2);
+                qdrn = vec4_blend(qdrn, 0.5 * T2, condmask_true2);
+                {
+                  Vec4d val = (One_Third_CoxWL * (3.0 - dVdsat_dVg)) * dVgs_eff_dVg;
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cggb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cggb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cggb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cggb = val[3];
+
+                }
+                T2 = vec4_blend(T2, (-One_Third_CoxWL) * dVdsat_dVb, condmask_true2);
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + T2);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cgsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cgsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cgsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cgsb = val[3];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cgdb = 0.0;
+
+                }
+                {
+                  Vec4d val = (-One_Third_CoxWL) * dVgs_eff_dVg;
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cdgb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cdgb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cdgb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cdgb = val[3];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cddb = 0.0;
+
+                }
+                T4 = vec4_blend(T4, One_Third_CoxWL * dVth_dVb, condmask_true2);
+                {
+                  Vec4d val = -(T4 + ((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb}));
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cdsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cdsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cdsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cdsb = val[3];
+
+                }
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) - (Two_Third_CoxWL * dVgs_eff_dVg));
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbgb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbgb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbgb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbgb = val[3];
+
+                }
+                T3 = vec4_blend(T3, -(T2 + (Two_Third_CoxWL * dVth_dVb)), condmask_true2);
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb}) + T3);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbsb = val[3];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbdb = 0.0;
+
+                }
+                {
+                  Vec4d val = -(qgate + qbulk);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32qinv = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32qinv = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32qinv = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32qinv = val[3];
+
+                }
+              }
+              {
+                Alphaz = vec4_blend(Alphaz, Vgst / Vdsat, condmask_false2);
+                T1 = vec4_blend(T1, (2.0 * Vdsat) - Vds, condmask_false2);
+                T2 = vec4_blend(T2, Vds / (3.0 * T1), condmask_false2);
+                T3 = vec4_blend(T3, T2 * Vds, condmask_false2);
+                T9 = vec4_blend(T9, vec4_SIMDTOVECTOR(0.25 * CoxWL), condmask_false2);
+                T4 = vec4_blend(T4, T9 * Alphaz, condmask_false2);
+                qgate = vec4_blend(qgate, CoxWL * (((Vgs_eff - Vfb) - pParam->BSIM3v32phi) - (0.5 * (Vds - T3))), condmask_false2);
+                T5 = vec4_blend(T5, T3 / T1, condmask_false2);
+                {
+                  Vec4d val = (CoxWL * (1.0 - (T5 * dVdsat_dVg))) * dVgs_eff_dVg;
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cggb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cggb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cggb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cggb = val[3];
+
+                }
+                tmp = vec4_blend(tmp, ((-CoxWL) * T5) * dVdsat_dVb, condmask_false2);
+                {
+                  Vec4d val = CoxWL * ((T2 - 0.5) + (0.5 * T5));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cgdb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cgdb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cgdb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cgdb = val[3];
+
+                }
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + ((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb})) + tmp);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cgsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cgsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cgsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cgsb = val[3];
+
+                }
+                T6 = vec4_blend(T6, 1.0 / Vdsat, condmask_false2);
+                dAlphaz_dVg = vec4_blend(dAlphaz_dVg, T6 * (1.0 - (Alphaz * dVdsat_dVg)), condmask_false2);
+                dAlphaz_dVb = vec4_blend(dAlphaz_dVb, (-T6) * (dVth_dVb + (Alphaz * dVdsat_dVb)), condmask_false2);
+                T7 = vec4_blend(T7, T1 + T3, condmask_false2);
+                qdrn = vec4_blend(qdrn, (-T4) * T7, condmask_false2);
+                qbulk = vec4_blend(qbulk, -((qgate + qdrn) + qdrn), condmask_false2);
+                T7 = vec4_blend(T7, T7 * T9, condmask_false2);
+                T0 = vec4_blend(T0, T4 * ((2.0 * T5) - 2.0), condmask_false2);
+                {
+                  Vec4d val = ((T0 * dVdsat_dVg) - (T7 * dAlphaz_dVg)) * dVgs_eff_dVg;
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cdgb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cdgb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cdgb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cdgb = val[3];
+
+                }
+                T12 = vec4_blend(T12, (T0 * dVdsat_dVb) - (T7 * dAlphaz_dVb), condmask_false2);
+                {
+                  Vec4d val = T4 * ((1.0 - (2.0 * T2)) - T5);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cddb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cddb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cddb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cddb = val[3];
+
+                }
+                {
+                  Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb}) + T12) + ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb}));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cdsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cdsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cdsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cdsb = val[3];
+
+                }
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + (2.0 * ((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb})));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbgb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbgb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbgb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbgb = val[3];
+
+                }
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}) + (2.0 * ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb})));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbdb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbdb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbdb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbdb = val[3];
+
+                }
+                {
+                  Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb}) + (2.0 * ((Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb})));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbsb = val[3];
+
+                }
+                {
+                  Vec4d val = -(qgate + qbulk);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32qinv = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32qinv = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32qinv = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32qinv = val[3];
+
+                }
+              }
+            }
+
+          }
+
+
+        }
+      }
+
+    }
+
+  }
+  else
+  {
+    if (1)
+    {
+      Vec4m condmask0 = Vbseff < 0.0;
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        VbseffCV = vec4_blend(VbseffCV, Vbseff, condmask_true0);
+        dVbseffCV_dVb = vec4_blend(dVbseffCV_dVb, vec4_SIMDTOVECTOR(1.0), condmask_true0);
+      }
+      {
+        VbseffCV = vec4_blend(VbseffCV, pParam->BSIM3v32phi - Phis, condmask_false0);
+        dVbseffCV_dVb = vec4_blend(dVbseffCV_dVb, -dPhis_dVb, condmask_false0);
+      }
+    }
+
+    noff = n * pParam->BSIM3v32noff;
+    dnoff_dVd = pParam->BSIM3v32noff * dn_dVd;
+    dnoff_dVb = pParam->BSIM3v32noff * dn_dVb;
+    T0 = Vtm * noff;
+    voffcv = pParam->BSIM3v32voffcv;
+    VgstNVt = (Vgst - voffcv) / T0;
+    if (1)
+    {
+      Vec4m condmask0 = VgstNVt > EXP_THRESHOLD;
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        Vgsteff = vec4_blend(Vgsteff, Vgst - voffcv, condmask_true0);
+        dVgsteff_dVg = vec4_blend(dVgsteff_dVg, dVgs_eff_dVg, condmask_true0);
+        dVgsteff_dVd = vec4_blend(dVgsteff_dVd, -dVth_dVd, condmask_true0);
+        dVgsteff_dVb = vec4_blend(dVgsteff_dVb, -dVth_dVb, condmask_true0);
+      }
+      if (1)
+      {
+        Vec4m condmask1 = VgstNVt < (-EXP_THRESHOLD);
+        Vec4m condmask_true1 = condmask_false0 & condmask1;
+        Vec4m condmask_false1 = condmask_false0 & (~condmask1);
+        {
+          Vgsteff = vec4_blend(Vgsteff, T0 * log(1.0 + MIN_EXP), condmask_true1);
+          dVgsteff_dVg = vec4_blend(dVgsteff_dVg, vec4_SIMDTOVECTOR(0.0), condmask_true1);
+          dVgsteff_dVd = vec4_blend(dVgsteff_dVd, Vgsteff / noff, condmask_true1);
+          dVgsteff_dVb = vec4_blend(dVgsteff_dVb, dVgsteff_dVd * dnoff_dVb, condmask_true1);
+          dVgsteff_dVd = vec4_blend(dVgsteff_dVd, dVgsteff_dVd * dnoff_dVd, condmask_true1);
+        }
+        {
+          ExpVgst = vec4_blend(ExpVgst, vec4_exp(VgstNVt), condmask_false1);
+          Vgsteff = vec4_blend(Vgsteff, T0 * vec4_log(1.0 + ExpVgst), condmask_false1);
+          dVgsteff_dVg = vec4_blend(dVgsteff_dVg, ExpVgst / (1.0 + ExpVgst), condmask_false1);
+          dVgsteff_dVd = vec4_blend(dVgsteff_dVd, ((-dVgsteff_dVg) * (dVth_dVd + (((Vgst - voffcv) / noff) * dnoff_dVd))) + ((Vgsteff / noff) * dnoff_dVd), condmask_false1);
+          dVgsteff_dVb = vec4_blend(dVgsteff_dVb, ((-dVgsteff_dVg) * (dVth_dVb + (((Vgst - voffcv) / noff) * dnoff_dVb))) + ((Vgsteff / noff) * dnoff_dVb), condmask_false1);
+          dVgsteff_dVg = vec4_blend(dVgsteff_dVg, dVgsteff_dVg * dVgs_eff_dVg, condmask_false1);
+        }
+      }
+
+    }
+
+    if (model->BSIM3v32capMod == 1)
+    {
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          Vfb = (Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb};
+          break;
+
+        case BSIM3v32V32:
+          Vfb = (Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb};
+          dVfb_dVb = (dVfb_dVd = vec4_SIMDTOVECTOR(0.0));
+          break;
+
+        default:
+          Vfb = (Vth - pParam->BSIM3v32phi) - (pParam->BSIM3v32k1ox * sqrtPhis);
+          dVfb_dVb = dVth_dVb - (pParam->BSIM3v32k1ox * dsqrtPhis_dVb);
+          dVfb_dVd = dVth_dVd;
+
+      }
+
+      Arg1 = ((Vgs_eff - VbseffCV) - Vfb) - Vgsteff;
+      if (1)
+      {
+        Vec4m condmask0 = Arg1 <= 0.0;
+        Vec4m condmask_true0 = condmask0;
+        Vec4m condmask_false0 = ~condmask0;
+        {
+          qgate = vec4_blend(qgate, CoxWL * Arg1, condmask_true0);
+          Cgg = vec4_blend(Cgg, CoxWL * (dVgs_eff_dVg - dVgsteff_dVg), condmask_true0);
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+
+            case BSIM3v32V322:
+              Cgd = vec4_blend(Cgd, (-CoxWL) * dVgsteff_dVd, condmask_true0);
+              Cgb = vec4_blend(Cgb, (-CoxWL) * (dVbseffCV_dVb + dVgsteff_dVb), condmask_true0);
+              break;
+
+            case BSIM3v32V32:
+
+            default:
+              Cgd = vec4_blend(Cgd, (-CoxWL) * (dVfb_dVd + dVgsteff_dVd), condmask_true0);
+              Cgb = vec4_blend(Cgb, (-CoxWL) * ((dVfb_dVb + dVbseffCV_dVb) + dVgsteff_dVb), condmask_true0);
+
+          }
+
+        }
+        {
+          T0 = vec4_blend(T0, vec4_SIMDTOVECTOR(0.5 * pParam->BSIM3v32k1ox), condmask_false0);
+          T1 = vec4_blend(T1, vec4_sqrt((T0 * T0) + Arg1), condmask_false0);
+          T2 = vec4_blend(T2, (CoxWL * T0) / T1, condmask_false0);
+          qgate = vec4_blend(qgate, (CoxWL * pParam->BSIM3v32k1ox) * (T1 - T0), condmask_false0);
+          Cgg = vec4_blend(Cgg, T2 * (dVgs_eff_dVg - dVgsteff_dVg), condmask_false0);
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+
+            case BSIM3v32V322:
+              Cgd = vec4_blend(Cgd, (-T2) * dVgsteff_dVd, condmask_false0);
+              Cgb = vec4_blend(Cgb, (-T2) * (dVbseffCV_dVb + dVgsteff_dVb), condmask_false0);
+              break;
+
+            case BSIM3v32V32:
+
+            default:
+              Cgd = vec4_blend(Cgd, (-T2) * (dVfb_dVd + dVgsteff_dVd), condmask_false0);
+              Cgb = vec4_blend(Cgb, (-T2) * ((dVfb_dVb + dVbseffCV_dVb) + dVgsteff_dVb), condmask_false0);
+
+          }
+
+        }
+      }
+
+      qbulk = -qgate;
+      Cbg = -Cgg;
+      Cbd = -Cgd;
+      Cbb = -Cgb;
+      One_Third_CoxWL = CoxWL / 3.0;
+      Two_Third_CoxWL = 2.0 * One_Third_CoxWL;
+      AbulkCV = Abulk0 * pParam->BSIM3v32abulkCVfactor;
+      dAbulkCV_dVb = pParam->BSIM3v32abulkCVfactor * dAbulk0_dVb;
+      VdsatCV = Vgsteff / AbulkCV;
+      if (1)
+      {
+        Vec4m condmask0 = VdsatCV < Vds;
+        Vec4m condmask_true0 = condmask0;
+        Vec4m condmask_false0 = ~condmask0;
+        {
+          dVdsatCV_dVg = vec4_blend(dVdsatCV_dVg, 1.0 / AbulkCV, condmask_true0);
+          dVdsatCV_dVb = vec4_blend(dVdsatCV_dVb, ((-VdsatCV) * dAbulkCV_dVb) / AbulkCV, condmask_true0);
+          T0 = vec4_blend(T0, Vgsteff - (VdsatCV / 3.0), condmask_true0);
+          dT0_dVg = vec4_blend(dT0_dVg, 1.0 - (dVdsatCV_dVg / 3.0), condmask_true0);
+          dT0_dVb = vec4_blend(dT0_dVb, (-dVdsatCV_dVb) / 3.0, condmask_true0);
+          qgate = vec4_blend(qgate, qgate + (CoxWL * T0), condmask_true0);
+          Cgg1 = vec4_blend(Cgg1, CoxWL * dT0_dVg, condmask_true0);
+          Cgb1 = vec4_blend(Cgb1, (CoxWL * dT0_dVb) + (Cgg1 * dVgsteff_dVb), condmask_true0);
+          Cgd1 = vec4_blend(Cgd1, Cgg1 * dVgsteff_dVd, condmask_true0);
+          Cgg1 = vec4_blend(Cgg1, Cgg1 * dVgsteff_dVg, condmask_true0);
+          Cgg = vec4_blend(Cgg, Cgg + Cgg1, condmask_true0);
+          Cgb = vec4_blend(Cgb, Cgb + Cgb1, condmask_true0);
+          Cgd = vec4_blend(Cgd, Cgd + Cgd1, condmask_true0);
+          T0 = vec4_blend(T0, VdsatCV - Vgsteff, condmask_true0);
+          dT0_dVg = vec4_blend(dT0_dVg, dVdsatCV_dVg - 1.0, condmask_true0);
+          dT0_dVb = vec4_blend(dT0_dVb, dVdsatCV_dVb, condmask_true0);
+          qbulk = vec4_blend(qbulk, qbulk + (One_Third_CoxWL * T0), condmask_true0);
+          Cbg1 = vec4_blend(Cbg1, One_Third_CoxWL * dT0_dVg, condmask_true0);
+          Cbb1 = vec4_blend(Cbb1, (One_Third_CoxWL * dT0_dVb) + (Cbg1 * dVgsteff_dVb), condmask_true0);
+          Cbd1 = vec4_blend(Cbd1, Cbg1 * dVgsteff_dVd, condmask_true0);
+          Cbg1 = vec4_blend(Cbg1, Cbg1 * dVgsteff_dVg, condmask_true0);
+          Cbg = vec4_blend(Cbg, Cbg + Cbg1, condmask_true0);
+          Cbb = vec4_blend(Cbb, Cbb + Cbb1, condmask_true0);
+          Cbd = vec4_blend(Cbd, Cbd + Cbd1, condmask_true0);
+          if (model->BSIM3v32xpart > 0.5)
+            T0 = vec4_blend(T0, vec4_SIMDTOVECTOR(-Two_Third_CoxWL), condmask_true0);
+          else
+            if (model->BSIM3v32xpart < 0.5)
+            T0 = vec4_blend(T0, vec4_SIMDTOVECTOR((-0.4) * CoxWL), condmask_true0);
+          else
+            T0 = vec4_blend(T0, vec4_SIMDTOVECTOR(-One_Third_CoxWL), condmask_true0);
+
+
+          qsrc = vec4_blend(qsrc, T0 * Vgsteff, condmask_true0);
+          Csg = vec4_blend(Csg, T0 * dVgsteff_dVg, condmask_true0);
+          Csb = vec4_blend(Csb, T0 * dVgsteff_dVb, condmask_true0);
+          Csd = vec4_blend(Csd, T0 * dVgsteff_dVd, condmask_true0);
+          Cgb = vec4_blend(Cgb, Cgb * dVbseff_dVb, condmask_true0);
+          Cbb = vec4_blend(Cbb, Cbb * dVbseff_dVb, condmask_true0);
+          Csb = vec4_blend(Csb, Csb * dVbseff_dVb, condmask_true0);
+        }
+        {
+          T0 = vec4_blend(T0, AbulkCV * Vds, condmask_false0);
+          T1 = vec4_blend(T1, 12.0 * ((Vgsteff - (0.5 * T0)) + 1.e-20), condmask_false0);
+          T2 = vec4_blend(T2, Vds / T1, condmask_false0);
+          T3 = vec4_blend(T3, T0 * T2, condmask_false0);
+          dT3_dVg = vec4_blend(dT3_dVg, (((-12.0) * T2) * T2) * AbulkCV, condmask_false0);
+          dT3_dVd = vec4_blend(dT3_dVd, ((((6.0 * T0) * ((4.0 * Vgsteff) - T0)) / T1) / T1) - 0.5, condmask_false0);
+          dT3_dVb = vec4_blend(dT3_dVb, (((12.0 * T2) * T2) * dAbulkCV_dVb) * Vgsteff, condmask_false0);
+          qgate = vec4_blend(qgate, qgate + (CoxWL * ((Vgsteff - (0.5 * Vds)) + T3)), condmask_false0);
+          Cgg1 = vec4_blend(Cgg1, CoxWL * (1.0 + dT3_dVg), condmask_false0);
+          Cgb1 = vec4_blend(Cgb1, (CoxWL * dT3_dVb) + (Cgg1 * dVgsteff_dVb), condmask_false0);
+          Cgd1 = vec4_blend(Cgd1, (CoxWL * dT3_dVd) + (Cgg1 * dVgsteff_dVd), condmask_false0);
+          Cgg1 = vec4_blend(Cgg1, Cgg1 * dVgsteff_dVg, condmask_false0);
+          Cgg = vec4_blend(Cgg, Cgg + Cgg1, condmask_false0);
+          Cgb = vec4_blend(Cgb, Cgb + Cgb1, condmask_false0);
+          Cgd = vec4_blend(Cgd, Cgd + Cgd1, condmask_false0);
+          qbulk = vec4_blend(qbulk, qbulk + ((CoxWL * (1.0 - AbulkCV)) * ((0.5 * Vds) - T3)), condmask_false0);
+          Cbg1 = vec4_blend(Cbg1, (-CoxWL) * ((1.0 - AbulkCV) * dT3_dVg), condmask_false0);
+          Cbb1 = vec4_blend(Cbb1, ((-CoxWL) * (((1.0 - AbulkCV) * dT3_dVb) + (((0.5 * Vds) - T3) * dAbulkCV_dVb))) + (Cbg1 * dVgsteff_dVb), condmask_false0);
+          Cbd1 = vec4_blend(Cbd1, (((-CoxWL) * (1.0 - AbulkCV)) * dT3_dVd) + (Cbg1 * dVgsteff_dVd), condmask_false0);
+          Cbg1 = vec4_blend(Cbg1, Cbg1 * dVgsteff_dVg, condmask_false0);
+          Cbg = vec4_blend(Cbg, Cbg + Cbg1, condmask_false0);
+          Cbb = vec4_blend(Cbb, Cbb + Cbb1, condmask_false0);
+          Cbd = vec4_blend(Cbd, Cbd + Cbd1, condmask_false0);
+          if (model->BSIM3v32xpart > 0.5)
+          {
+            T1 = vec4_blend(T1, T1 + T1, condmask_false0);
+            qsrc = vec4_blend(qsrc, (-CoxWL) * (((0.5 * Vgsteff) + (0.25 * T0)) - ((T0 * T0) / T1)), condmask_false0);
+            Csg = vec4_blend(Csg, (-CoxWL) * (0.5 + (((((24.0 * T0) * Vds) / T1) / T1) * AbulkCV)), condmask_false0);
+            Csb = vec4_blend(Csb, ((-CoxWL) * (((0.25 * Vds) * dAbulkCV_dVb) - ((((((12.0 * T0) * Vds) / T1) / T1) * ((4.0 * Vgsteff) - T0)) * dAbulkCV_dVb))) + (Csg * dVgsteff_dVb), condmask_false0);
+            Csd = vec4_blend(Csd, ((-CoxWL) * ((0.25 * AbulkCV) - (((((12.0 * AbulkCV) * T0) / T1) / T1) * ((4.0 * Vgsteff) - T0)))) + (Csg * dVgsteff_dVd), condmask_false0);
+            Csg = vec4_blend(Csg, Csg * dVgsteff_dVg, condmask_false0);
+          }
+          else
+            if (model->BSIM3v32xpart < 0.5)
+          {
+            T1 = vec4_blend(T1, T1 / 12.0, condmask_false0);
+            T2 = vec4_blend(T2, (0.5 * CoxWL) / (T1 * T1), condmask_false0);
+            T3 = vec4_blend(T3, (Vgsteff * ((((2.0 * T0) * T0) / 3.0) + (Vgsteff * (Vgsteff - ((4.0 * T0) / 3.0))))) - ((((2.0 * T0) * T0) * T0) / 15.0), condmask_false0);
+            qsrc = vec4_blend(qsrc, (-T2) * T3, condmask_false0);
+            T4 = vec4_blend(T4, (((4.0 / 3.0) * Vgsteff) * (Vgsteff - T0)) + ((0.4 * T0) * T0), condmask_false0);
+            Csg = vec4_blend(Csg, (((-2.0) * qsrc) / T1) - (T2 * ((Vgsteff * ((3.0 * Vgsteff) - ((8.0 * T0) / 3.0))) + (((2.0 * T0) * T0) / 3.0))), condmask_false0);
+            Csb = vec4_blend(Csb, ((((qsrc / T1) * Vds) + ((T2 * T4) * Vds)) * dAbulkCV_dVb) + (Csg * dVgsteff_dVb), condmask_false0);
+            Csd = vec4_blend(Csd, (((qsrc / T1) + (T2 * T4)) * AbulkCV) + (Csg * dVgsteff_dVd), condmask_false0);
+            Csg = vec4_blend(Csg, Csg * dVgsteff_dVg, condmask_false0);
+          }
+          else
+          {
+            qsrc = vec4_blend(qsrc, (-0.5) * (qgate + qbulk), condmask_false0);
+            Csg = vec4_blend(Csg, (-0.5) * (Cgg1 + Cbg1), condmask_false0);
+            Csb = vec4_blend(Csb, (-0.5) * (Cgb1 + Cbb1), condmask_false0);
+            Csd = vec4_blend(Csd, (-0.5) * (Cgd1 + Cbd1), condmask_false0);
+          }
+
+
+          Cgb = vec4_blend(Cgb, Cgb * dVbseff_dVb, condmask_false0);
+          Cbb = vec4_blend(Cbb, Cbb * dVbseff_dVb, condmask_false0);
+          Csb = vec4_blend(Csb, Csb * dVbseff_dVb, condmask_false0);
+        }
+      }
+
+      qdrn = -((qgate + qbulk) + qsrc);
+      {
+        heres[0]->BSIM3v32cggb = Cgg[0];
+        heres[1]->BSIM3v32cggb = Cgg[1];
+        heres[2]->BSIM3v32cggb = Cgg[2];
+        heres[3]->BSIM3v32cggb = Cgg[3];
+      }
+      {
+        Vec4d val = -((Cgg + Cgd) + Cgb);
+        heres[0]->BSIM3v32cgsb = val[0];
+        heres[1]->BSIM3v32cgsb = val[1];
+        heres[2]->BSIM3v32cgsb = val[2];
+        heres[3]->BSIM3v32cgsb = val[3];
+      }
+      {
+        heres[0]->BSIM3v32cgdb = Cgd[0];
+        heres[1]->BSIM3v32cgdb = Cgd[1];
+        heres[2]->BSIM3v32cgdb = Cgd[2];
+        heres[3]->BSIM3v32cgdb = Cgd[3];
+      }
+      {
+        Vec4d val = -((Cgg + Cbg) + Csg);
+        heres[0]->BSIM3v32cdgb = val[0];
+        heres[1]->BSIM3v32cdgb = val[1];
+        heres[2]->BSIM3v32cdgb = val[2];
+        heres[3]->BSIM3v32cdgb = val[3];
+      }
+      {
+        Vec4d val = (((((((Cgg + Cgd) + Cgb) + Cbg) + Cbd) + Cbb) + Csg) + Csd) + Csb;
+        heres[0]->BSIM3v32cdsb = val[0];
+        heres[1]->BSIM3v32cdsb = val[1];
+        heres[2]->BSIM3v32cdsb = val[2];
+        heres[3]->BSIM3v32cdsb = val[3];
+      }
+      {
+        Vec4d val = -((Cgd + Cbd) + Csd);
+        heres[0]->BSIM3v32cddb = val[0];
+        heres[1]->BSIM3v32cddb = val[1];
+        heres[2]->BSIM3v32cddb = val[2];
+        heres[3]->BSIM3v32cddb = val[3];
+      }
+      {
+        heres[0]->BSIM3v32cbgb = Cbg[0];
+        heres[1]->BSIM3v32cbgb = Cbg[1];
+        heres[2]->BSIM3v32cbgb = Cbg[2];
+        heres[3]->BSIM3v32cbgb = Cbg[3];
+      }
+      {
+        Vec4d val = -((Cbg + Cbd) + Cbb);
+        heres[0]->BSIM3v32cbsb = val[0];
+        heres[1]->BSIM3v32cbsb = val[1];
+        heres[2]->BSIM3v32cbsb = val[2];
+        heres[3]->BSIM3v32cbsb = val[3];
+      }
+      {
+        heres[0]->BSIM3v32cbdb = Cbd[0];
+        heres[1]->BSIM3v32cbdb = Cbd[1];
+        heres[2]->BSIM3v32cbdb = Cbd[2];
+        heres[3]->BSIM3v32cbdb = Cbd[3];
+      }
+      {
+        Vec4d val = -(qgate + qbulk);
+        heres[0]->BSIM3v32qinv = val[0];
+        heres[1]->BSIM3v32qinv = val[1];
+        heres[2]->BSIM3v32qinv = val[2];
+        heres[3]->BSIM3v32qinv = val[3];
+      }
+    }
+    else
+      if (model->BSIM3v32capMod == 2)
+    {
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          Vfb = (Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb};
+          break;
+
+        case BSIM3v32V32:
+          Vfb = (Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb};
+          dVfb_dVb = (dVfb_dVd = vec4_SIMDTOVECTOR(0.0));
+          break;
+
+        default:
+          Vfb = (Vth - pParam->BSIM3v32phi) - (pParam->BSIM3v32k1ox * sqrtPhis);
+          dVfb_dVb = dVth_dVb - (pParam->BSIM3v32k1ox * dsqrtPhis_dVb);
+          dVfb_dVd = dVth_dVd;
+
+      }
+
+      V3 = ((Vfb - Vgs_eff) + VbseffCV) - DELTA_3;
+      T0 = V3 * V3;
+      T2 = (4.0 * DELTA_3) * Vfb;
+      if (1)
+      {
+        Vec4m condmask0 = Vfb <= 0.0;
+        Vec4m condmask_true0 = condmask0;
+        Vec4m condmask_false0 = ~condmask0;
+        {
+          T0 = vec4_blend(T0, T0 - T2, condmask_true0);
+          T2 = vec4_blend(T2, vec4_SIMDTOVECTOR(-DELTA_3), condmask_true0);
+        }
+        {
+          T0 = vec4_blend(T0, T0 + T2, condmask_false0);
+          T2 = vec4_blend(T2, vec4_SIMDTOVECTOR(DELTA_3), condmask_false0);
+        }
+      }
+
+      T0 = vec4_sqrt(T0);
+      T2 = T2 / T0;
+      T1 = 0.5 * (1.0 + (V3 / T0));
+      Vfbeff = Vfb - (0.5 * (V3 + T0));
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          dVfbeff_dVd = ((1.0 - T1) - T2) * dVfb_dVd;
+
+      }
+
+      dVfbeff_dVg = T1 * dVgs_eff_dVg;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          dVfbeff_dVb = (-T1) * dVbseffCV_dVb;
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          dVfbeff_dVb = (((1.0 - T1) - T2) * dVfb_dVb) - (T1 * dVbseffCV_dVb);
+
+      }
+
+      Qac0 = CoxWL * (Vfbeff - Vfb);
+      dQac0_dVg = CoxWL * dVfbeff_dVg;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          dQac0_dVd = CoxWL * (dVfbeff_dVd - dVfb_dVd);
+
+      }
+
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          dQac0_dVb = CoxWL * dVfbeff_dVb;
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          dQac0_dVb = CoxWL * (dVfbeff_dVb - dVfb_dVb);
+
+      }
+
+      T0 = vec4_SIMDTOVECTOR(0.5 * pParam->BSIM3v32k1ox);
+      T3 = ((Vgs_eff - Vfbeff) - VbseffCV) - Vgsteff;
+      if (pParam->BSIM3v32k1ox == 0.0)
+      {
+        T1 = vec4_SIMDTOVECTOR(0.0);
+        T2 = vec4_SIMDTOVECTOR(0.0);
+      }
+      else
+        if (1)
+      {
+        Vec4m condmask0 = T3 < 0.0;
+        Vec4m condmask_true0 = condmask0;
+        Vec4m condmask_false0 = ~condmask0;
+        {
+          T1 = vec4_blend(T1, T0 + (T3 / pParam->BSIM3v32k1ox), condmask_true0);
+          T2 = vec4_blend(T2, vec4_SIMDTOVECTOR(CoxWL), condmask_true0);
+        }
+        {
+          T1 = vec4_blend(T1, vec4_sqrt((T0 * T0) + T3), condmask_false0);
+          T2 = vec4_blend(T2, (CoxWL * T0) / T1, condmask_false0);
+        }
+      }
+
+
+      Qsub0 = (CoxWL * pParam->BSIM3v32k1ox) * (T1 - T0);
+      dQsub0_dVg = T2 * ((dVgs_eff_dVg - dVfbeff_dVg) - dVgsteff_dVg);
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          dQsub0_dVd = (-T2) * dVgsteff_dVd;
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          dQsub0_dVd = (-T2) * (dVfbeff_dVd + dVgsteff_dVd);
+
+      }
+
+      dQsub0_dVb = (-T2) * ((dVfbeff_dVb + dVbseffCV_dVb) + dVgsteff_dVb);
+      AbulkCV = Abulk0 * pParam->BSIM3v32abulkCVfactor;
+      dAbulkCV_dVb = pParam->BSIM3v32abulkCVfactor * dAbulk0_dVb;
+      VdsatCV = Vgsteff / AbulkCV;
+      V4 = (VdsatCV - Vds) - DELTA_4;
+      T0 = vec4_sqrt((V4 * V4) + ((4.0 * DELTA_4) * VdsatCV));
+      VdseffCV = VdsatCV - (0.5 * (V4 + T0));
+      T1 = 0.5 * (1.0 + (V4 / T0));
+      T2 = DELTA_4 / T0;
+      T3 = ((1.0 - T1) - T2) / AbulkCV;
+      dVdseffCV_dVg = T3;
+      dVdseffCV_dVd = T1;
+      dVdseffCV_dVb = ((-T3) * VdsatCV) * dAbulkCV_dVb;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          if (1)
+        {
+          Vec4m condmask0 = Vds == 0.0;
+          Vec4m condmask_true0 = condmask0;
+          {
+            VdseffCV = vec4_blend(VdseffCV, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+            dVdseffCV_dVg = vec4_blend(dVdseffCV_dVg, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+            dVdseffCV_dVb = vec4_blend(dVdseffCV_dVb, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+          }
+        }
+
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          break;
+
+      }
+
+      T0 = AbulkCV * VdseffCV;
+      T1 = 12.0 * ((Vgsteff - (0.5 * T0)) + 1e-20);
+      T2 = VdseffCV / T1;
+      T3 = T0 * T2;
+      T4 = 1.0 - (((12.0 * T2) * T2) * AbulkCV);
+      T5 = (((6.0 * T0) * ((4.0 * Vgsteff) - T0)) / (T1 * T1)) - 0.5;
+      T6 = ((12.0 * T2) * T2) * Vgsteff;
+      qinoi = (-CoxWL) * ((Vgsteff - (0.5 * T0)) + (AbulkCV * T3));
+      qgate = CoxWL * ((Vgsteff - (0.5 * VdseffCV)) + T3);
+      Cgg1 = CoxWL * (T4 + (T5 * dVdseffCV_dVg));
+      Cgd1 = ((CoxWL * T5) * dVdseffCV_dVd) + (Cgg1 * dVgsteff_dVd);
+      Cgb1 = (CoxWL * ((T5 * dVdseffCV_dVb) + (T6 * dAbulkCV_dVb))) + (Cgg1 * dVgsteff_dVb);
+      Cgg1 *= dVgsteff_dVg;
+      T7 = 1.0 - AbulkCV;
+      qbulk = (CoxWL * T7) * ((0.5 * VdseffCV) - T3);
+      T4 = (-T7) * (T4 - 1.0);
+      T5 = (-T7) * T5;
+      T6 = -((T7 * T6) + ((0.5 * VdseffCV) - T3));
+      Cbg1 = CoxWL * (T4 + (T5 * dVdseffCV_dVg));
+      Cbd1 = ((CoxWL * T5) * dVdseffCV_dVd) + (Cbg1 * dVgsteff_dVd);
+      Cbb1 = (CoxWL * ((T5 * dVdseffCV_dVb) + (T6 * dAbulkCV_dVb))) + (Cbg1 * dVgsteff_dVb);
+      Cbg1 *= dVgsteff_dVg;
+      if (model->BSIM3v32xpart > 0.5)
+      {
+        T1 = T1 + T1;
+        qsrc = (-CoxWL) * (((0.5 * Vgsteff) + (0.25 * T0)) - ((T0 * T0) / T1));
+        T7 = ((4.0 * Vgsteff) - T0) / (T1 * T1);
+        T4 = -(0.5 + (((24.0 * T0) * T0) / (T1 * T1)));
+        T5 = -((0.25 * AbulkCV) - (((12.0 * AbulkCV) * T0) * T7));
+        T6 = -((0.25 * VdseffCV) - (((12.0 * T0) * VdseffCV) * T7));
+        Csg = CoxWL * (T4 + (T5 * dVdseffCV_dVg));
+        Csd = ((CoxWL * T5) * dVdseffCV_dVd) + (Csg * dVgsteff_dVd);
+        Csb = (CoxWL * ((T5 * dVdseffCV_dVb) + (T6 * dAbulkCV_dVb))) + (Csg * dVgsteff_dVb);
+        Csg *= dVgsteff_dVg;
+      }
+      else
+        if (model->BSIM3v32xpart < 0.5)
+      {
+        T1 = T1 / 12.0;
+        T2 = (0.5 * CoxWL) / (T1 * T1);
+        T3 = (Vgsteff * ((((2.0 * T0) * T0) / 3.0) + (Vgsteff * (Vgsteff - ((4.0 * T0) / 3.0))))) - ((((2.0 * T0) * T0) * T0) / 15.0);
+        qsrc = (-T2) * T3;
+        T7 = (((4.0 / 3.0) * Vgsteff) * (Vgsteff - T0)) + ((0.4 * T0) * T0);
+        T4 = (((-2.0) * qsrc) / T1) - (T2 * ((Vgsteff * ((3.0 * Vgsteff) - ((8.0 * T0) / 3.0))) + (((2.0 * T0) * T0) / 3.0)));
+        T5 = ((qsrc / T1) + (T2 * T7)) * AbulkCV;
+        T6 = ((qsrc / T1) * VdseffCV) + ((T2 * T7) * VdseffCV);
+        Csg = T4 + (T5 * dVdseffCV_dVg);
+        Csd = (T5 * dVdseffCV_dVd) + (Csg * dVgsteff_dVd);
+        Csb = ((T5 * dVdseffCV_dVb) + (T6 * dAbulkCV_dVb)) + (Csg * dVgsteff_dVb);
+        Csg *= dVgsteff_dVg;
+      }
+      else
+      {
+        qsrc = (-0.5) * (qgate + qbulk);
+        Csg = (-0.5) * (Cgg1 + Cbg1);
+        Csb = (-0.5) * (Cgb1 + Cbb1);
+        Csd = (-0.5) * (Cgd1 + Cbd1);
+      }
+
+
+      qgate += Qac0 + Qsub0;
+      qbulk -= Qac0 + Qsub0;
+      qdrn = -((qgate + qbulk) + qsrc);
+      Cgg = (dQac0_dVg + dQsub0_dVg) + Cgg1;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          Cgd = dQsub0_dVd + Cgd1;
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          Cgd = (dQac0_dVd + dQsub0_dVd) + Cgd1;
+
+      }
+
+      Cgb = (dQac0_dVb + dQsub0_dVb) + Cgb1;
+      Cbg = (Cbg1 - dQac0_dVg) - dQsub0_dVg;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          Cbd = Cbd1 - dQsub0_dVd;
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          Cbd = (Cbd1 - dQac0_dVd) - dQsub0_dVd;
+
+      }
+
+      Cbb = (Cbb1 - dQac0_dVb) - dQsub0_dVb;
+      Cgb *= dVbseff_dVb;
+      Cbb *= dVbseff_dVb;
+      Csb *= dVbseff_dVb;
+      {
+        heres[0]->BSIM3v32cggb = Cgg[0];
+        heres[1]->BSIM3v32cggb = Cgg[1];
+        heres[2]->BSIM3v32cggb = Cgg[2];
+        heres[3]->BSIM3v32cggb = Cgg[3];
+      }
+      {
+        Vec4d val = -((Cgg + Cgd) + Cgb);
+        heres[0]->BSIM3v32cgsb = val[0];
+        heres[1]->BSIM3v32cgsb = val[1];
+        heres[2]->BSIM3v32cgsb = val[2];
+        heres[3]->BSIM3v32cgsb = val[3];
+      }
+      {
+        heres[0]->BSIM3v32cgdb = Cgd[0];
+        heres[1]->BSIM3v32cgdb = Cgd[1];
+        heres[2]->BSIM3v32cgdb = Cgd[2];
+        heres[3]->BSIM3v32cgdb = Cgd[3];
+      }
+      {
+        Vec4d val = -((Cgg + Cbg) + Csg);
+        heres[0]->BSIM3v32cdgb = val[0];
+        heres[1]->BSIM3v32cdgb = val[1];
+        heres[2]->BSIM3v32cdgb = val[2];
+        heres[3]->BSIM3v32cdgb = val[3];
+      }
+      {
+        Vec4d val = (((((((Cgg + Cgd) + Cgb) + Cbg) + Cbd) + Cbb) + Csg) + Csd) + Csb;
+        heres[0]->BSIM3v32cdsb = val[0];
+        heres[1]->BSIM3v32cdsb = val[1];
+        heres[2]->BSIM3v32cdsb = val[2];
+        heres[3]->BSIM3v32cdsb = val[3];
+      }
+      {
+        Vec4d val = -((Cgd + Cbd) + Csd);
+        heres[0]->BSIM3v32cddb = val[0];
+        heres[1]->BSIM3v32cddb = val[1];
+        heres[2]->BSIM3v32cddb = val[2];
+        heres[3]->BSIM3v32cddb = val[3];
+      }
+      {
+        heres[0]->BSIM3v32cbgb = Cbg[0];
+        heres[1]->BSIM3v32cbgb = Cbg[1];
+        heres[2]->BSIM3v32cbgb = Cbg[2];
+        heres[3]->BSIM3v32cbgb = Cbg[3];
+      }
+      {
+        Vec4d val = -((Cbg + Cbd) + Cbb);
+        heres[0]->BSIM3v32cbsb = val[0];
+        heres[1]->BSIM3v32cbsb = val[1];
+        heres[2]->BSIM3v32cbsb = val[2];
+        heres[3]->BSIM3v32cbsb = val[3];
+      }
+      {
+        heres[0]->BSIM3v32cbdb = Cbd[0];
+        heres[1]->BSIM3v32cbdb = Cbd[1];
+        heres[2]->BSIM3v32cbdb = Cbd[2];
+        heres[3]->BSIM3v32cbdb = Cbd[3];
+      }
+      {
+        heres[0]->BSIM3v32qinv = qinoi[0];
+        heres[1]->BSIM3v32qinv = qinoi[1];
+        heres[2]->BSIM3v32qinv = qinoi[2];
+        heres[3]->BSIM3v32qinv = qinoi[3];
+      }
+    }
+    else
+      if (model->BSIM3v32capMod == 3)
+    {
+      V3 = ((((Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb}) - Vgs_eff) + VbseffCV) - DELTA_3;
+      T0 = V3 * V3;
+      T2 = (4.0 * DELTA_3) * ((Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb});
+      if (1)
+      {
+        Vec4m condmask0 = ((Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb}) <= 0.0;
+        Vec4m condmask_true0 = condmask0;
+        Vec4m condmask_false0 = ~condmask0;
+        {
+          T0 = vec4_blend(T0, T0 - T2, condmask_true0);
+          T2 = vec4_blend(T2, vec4_SIMDTOVECTOR(-DELTA_3), condmask_true0);
+        }
+        {
+          T0 = vec4_blend(T0, T0 + T2, condmask_false0);
+          T2 = vec4_blend(T2, vec4_SIMDTOVECTOR(DELTA_3), condmask_false0);
+        }
+      }
+
+      T0 = vec4_sqrt(T0);
+      T2 = T2 / T0;
+      T1 = 0.5 * (1.0 + (V3 / T0));
+      Vfbeff = ((Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb}) - (0.5 * (V3 + T0));
+      dVfbeff_dVg = T1 * dVgs_eff_dVg;
+      dVfbeff_dVb = (-T1) * dVbseffCV_dVb;
+      Cox = model->BSIM3v32cox;
+      Tox = 1.0e8 * model->BSIM3v32tox;
+      T0 = ((Vgs_eff - VbseffCV) - ((Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb})) / Tox;
+      dT0_dVg = dVgs_eff_dVg / Tox;
+      dT0_dVb = (-dVbseffCV_dVb) / Tox;
+      tmp = T0 * pParam->BSIM3v32acde;
+      dTcen_dVg = (dTcen_dVb = vec4_SIMDTOVECTOR(0.0));
+      if (1)
+      {
+        Vec4m condmask0 = ((-EXP_THRESHOLD) < tmp) & (tmp < EXP_THRESHOLD);
+        Vec4m condmask_true0 = condmask0;
+        Vec4m condmask_false0 = ~condmask0;
+        {
+          Tcen = vec4_blend(Tcen, pParam->BSIM3v32ldeb * vec4_exp(tmp), condmask_true0);
+          dTcen_dVg = vec4_blend(dTcen_dVg, pParam->BSIM3v32acde * Tcen, condmask_true0);
+          dTcen_dVb = vec4_blend(dTcen_dVb, dTcen_dVg * dT0_dVb, condmask_true0);
+          dTcen_dVg = vec4_blend(dTcen_dVg, dTcen_dVg * dT0_dVg, condmask_true0);
+        }
+        if (1)
+        {
+          Vec4m condmask1 = tmp <= (-EXP_THRESHOLD);
+          Vec4m condmask_true1 = condmask_false0 & condmask1;
+          Vec4m condmask_false1 = condmask_false0 & (~condmask1);
+          {
+            Tcen = vec4_blend(Tcen, vec4_SIMDTOVECTOR(pParam->BSIM3v32ldeb * MIN_EXP), condmask_true1);
+          }
+          {
+            Tcen = vec4_blend(Tcen, vec4_SIMDTOVECTOR(pParam->BSIM3v32ldeb * MAX_EXP), condmask_false1);
+          }
+        }
+
+      }
+
+      LINK = 1.0e-3 * model->BSIM3v32tox;
+      V3 = (pParam->BSIM3v32ldeb - Tcen) - LINK;
+      V4 = vec4_sqrt((V3 * V3) + ((4.0 * LINK) * pParam->BSIM3v32ldeb));
+      Tcen = pParam->BSIM3v32ldeb - (0.5 * (V3 + V4));
+      T1 = 0.5 * (1.0 + (V3 / V4));
+      dTcen_dVg *= T1;
+      dTcen_dVb *= T1;
+      Ccen = EPSSI / Tcen;
+      T2 = Cox / (Cox + Ccen);
+      Coxeff = T2 * Ccen;
+      T3 = (-Ccen) / Tcen;
+      dCoxeff_dVg = (T2 * T2) * T3;
+      dCoxeff_dVb = dCoxeff_dVg * dTcen_dVb;
+      dCoxeff_dVg *= dTcen_dVg;
+      CoxWLcen = (CoxWL * Coxeff) / Cox;
+      Qac0 = CoxWLcen * (Vfbeff - ((Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb}));
+      QovCox = Qac0 / Coxeff;
+      dQac0_dVg = (CoxWLcen * dVfbeff_dVg) + (QovCox * dCoxeff_dVg);
+      dQac0_dVb = (CoxWLcen * dVfbeff_dVb) + (QovCox * dCoxeff_dVb);
+      T0 = vec4_SIMDTOVECTOR(0.5 * pParam->BSIM3v32k1ox);
+      T3 = ((Vgs_eff - Vfbeff) - VbseffCV) - Vgsteff;
+      if (pParam->BSIM3v32k1ox == 0.0)
+      {
+        T1 = vec4_SIMDTOVECTOR(0.0);
+        T2 = vec4_SIMDTOVECTOR(0.0);
+      }
+      else
+        if (1)
+      {
+        Vec4m condmask0 = T3 < 0.0;
+        Vec4m condmask_true0 = condmask0;
+        Vec4m condmask_false0 = ~condmask0;
+        {
+          T1 = vec4_blend(T1, T0 + (T3 / pParam->BSIM3v32k1ox), condmask_true0);
+          T2 = vec4_blend(T2, CoxWLcen, condmask_true0);
+        }
+        {
+          T1 = vec4_blend(T1, vec4_sqrt((T0 * T0) + T3), condmask_false0);
+          T2 = vec4_blend(T2, (CoxWLcen * T0) / T1, condmask_false0);
+        }
+      }
+
+
+      Qsub0 = (CoxWLcen * pParam->BSIM3v32k1ox) * (T1 - T0);
+      QovCox = Qsub0 / Coxeff;
+      dQsub0_dVg = (T2 * ((dVgs_eff_dVg - dVfbeff_dVg) - dVgsteff_dVg)) + (QovCox * dCoxeff_dVg);
+      dQsub0_dVd = (-T2) * dVgsteff_dVd;
+      dQsub0_dVb = ((-T2) * ((dVfbeff_dVb + dVbseffCV_dVb) + dVgsteff_dVb)) + (QovCox * dCoxeff_dVb);
+      if (pParam->BSIM3v32k1ox <= 0.0)
+      {
+        Denomi = vec4_SIMDTOVECTOR((0.25 * pParam->BSIM3v32moin) * Vtm);
+        T0 = vec4_SIMDTOVECTOR(0.5 * pParam->BSIM3v32sqrtPhi);
+      }
+      else
+      {
+        Denomi = vec4_SIMDTOVECTOR(((pParam->BSIM3v32moin * Vtm) * pParam->BSIM3v32k1ox) * pParam->BSIM3v32k1ox);
+        T0 = vec4_SIMDTOVECTOR(pParam->BSIM3v32k1ox * pParam->BSIM3v32sqrtPhi);
+      }
+
+      T1 = (2.0 * T0) + Vgsteff;
+      DeltaPhi = Vtm * vec4_log(1.0 + ((T1 * Vgsteff) / Denomi));
+      dDeltaPhi_dVg = ((2.0 * Vtm) * (T1 - T0)) / (Denomi + (T1 * Vgsteff));
+      dDeltaPhi_dVd = dDeltaPhi_dVg * dVgsteff_dVd;
+      dDeltaPhi_dVb = dDeltaPhi_dVg * dVgsteff_dVb;
+      T3 = 4.0 * ((Vth - ((Vec4d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb})) - pParam->BSIM3v32phi);
+      Tox += Tox;
+      if (1)
+      {
+        Vec4m condmask0 = T3 >= 0.0;
+        Vec4m condmask_true0 = condmask0;
+        Vec4m condmask_false0 = ~condmask0;
+        {
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+
+            case BSIM3v32V322:
+              T0 = vec4_blend(T0, (Vgsteff + T3) / Tox, condmask_true0);
+              dT0_dVd = vec4_blend(dT0_dVd, (dVgsteff_dVd + (4.0 * dVth_dVd)) / Tox, condmask_true0);
+              dT0_dVb = vec4_blend(dT0_dVb, (dVgsteff_dVb + (4.0 * dVth_dVb)) / Tox, condmask_true0);
+              break;
+
+            case BSIM3v32V32:
+
+            default:
+              T0 = vec4_blend(T0, (Vgsteff + T3) / Tox, condmask_true0);
+
+          }
+
+        }
+        {
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+
+            case BSIM3v32V322:
+              T0 = vec4_blend(T0, (Vgsteff + 1.0e-20) / Tox, condmask_false0);
+              dT0_dVd = vec4_blend(dT0_dVd, dVgsteff_dVd / Tox, condmask_false0);
+              dT0_dVb = vec4_blend(dT0_dVb, dVgsteff_dVb / Tox, condmask_false0);
+              break;
+
+            case BSIM3v32V32:
+
+            default:
+              T0 = vec4_blend(T0, (Vgsteff + 1.0e-20) / Tox, condmask_false0);
+
+          }
+
+        }
+      }
+
+      tmp = vec4_pow0p7(T0, 0.7);
+      T1 = 1.0 + tmp;
+      T2 = (0.7 * tmp) / (T0 * Tox);
+      Tcen = 1.9e-9 / T1;
+      dTcen_dVg = (((-1.9e-9) * T2) / T1) / T1;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          dTcen_dVd = Tox * dTcen_dVg;
+          dTcen_dVb = dTcen_dVd * dT0_dVb;
+          dTcen_dVd *= dT0_dVd;
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          dTcen_dVd = dTcen_dVg * ((4.0 * dVth_dVd) + dVgsteff_dVd);
+          dTcen_dVb = dTcen_dVg * ((4.0 * dVth_dVb) + dVgsteff_dVb);
+
+      }
+
+      dTcen_dVg *= dVgsteff_dVg;
+      Ccen = EPSSI / Tcen;
+      T0 = Cox / (Cox + Ccen);
+      Coxeff = T0 * Ccen;
+      T1 = (-Ccen) / Tcen;
+      dCoxeff_dVg = (T0 * T0) * T1;
+      dCoxeff_dVd = dCoxeff_dVg * dTcen_dVd;
+      dCoxeff_dVb = dCoxeff_dVg * dTcen_dVb;
+      dCoxeff_dVg *= dTcen_dVg;
+      CoxWLcen = (CoxWL * Coxeff) / Cox;
+      AbulkCV = Abulk0 * pParam->BSIM3v32abulkCVfactor;
+      dAbulkCV_dVb = pParam->BSIM3v32abulkCVfactor * dAbulk0_dVb;
+      VdsatCV = (Vgsteff - DeltaPhi) / AbulkCV;
+      V4 = (VdsatCV - Vds) - DELTA_4;
+      T0 = vec4_sqrt((V4 * V4) + ((4.0 * DELTA_4) * VdsatCV));
+      VdseffCV = VdsatCV - (0.5 * (V4 + T0));
+      T1 = 0.5 * (1.0 + (V4 / T0));
+      T2 = DELTA_4 / T0;
+      T3 = ((1.0 - T1) - T2) / AbulkCV;
+      T4 = T3 * (1.0 - dDeltaPhi_dVg);
+      dVdseffCV_dVg = T4;
+      dVdseffCV_dVd = T1;
+      dVdseffCV_dVb = ((-T3) * VdsatCV) * dAbulkCV_dVb;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          if (1)
+        {
+          Vec4m condmask0 = Vds == 0.0;
+          Vec4m condmask_true0 = condmask0;
+          {
+            VdseffCV = vec4_blend(VdseffCV, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+            dVdseffCV_dVg = vec4_blend(dVdseffCV_dVg, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+            dVdseffCV_dVb = vec4_blend(dVdseffCV_dVb, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+          }
+        }
+
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          break;
+
+      }
+
+      T0 = AbulkCV * VdseffCV;
+      T1 = Vgsteff - DeltaPhi;
+      T2 = 12.0 * ((T1 - (0.5 * T0)) + 1.0e-20);
+      T3 = T0 / T2;
+      T4 = 1.0 - ((12.0 * T3) * T3);
+      T5 = AbulkCV * ((((6.0 * T0) * ((4.0 * T1) - T0)) / (T2 * T2)) - 0.5);
+      T6 = (T5 * VdseffCV) / AbulkCV;
+      qgate = (qinoi = CoxWLcen * (T1 - (T0 * (0.5 - T3))));
+      QovCox = qgate / Coxeff;
+      Cgg1 = CoxWLcen * ((T4 * (1.0 - dDeltaPhi_dVg)) + (T5 * dVdseffCV_dVg));
+      Cgd1 = (((CoxWLcen * T5) * dVdseffCV_dVd) + (Cgg1 * dVgsteff_dVd)) + (QovCox * dCoxeff_dVd);
+      Cgb1 = ((CoxWLcen * ((T5 * dVdseffCV_dVb) + (T6 * dAbulkCV_dVb))) + (Cgg1 * dVgsteff_dVb)) + (QovCox * dCoxeff_dVb);
+      Cgg1 = (Cgg1 * dVgsteff_dVg) + (QovCox * dCoxeff_dVg);
+      T7 = 1.0 - AbulkCV;
+      T8 = T2 * T2;
+      T9 = (((12.0 * T7) * T0) * T0) / (T8 * AbulkCV);
+      T10 = T9 * (1.0 - dDeltaPhi_dVg);
+      T11 = ((-T7) * T5) / AbulkCV;
+      T12 = -(((T9 * T1) / AbulkCV) + (VdseffCV * (0.5 - (T0 / T2))));
+      qbulk = (CoxWLcen * T7) * ((0.5 * VdseffCV) - ((T0 * VdseffCV) / T2));
+      QovCox = qbulk / Coxeff;
+      Cbg1 = CoxWLcen * (T10 + (T11 * dVdseffCV_dVg));
+      Cbd1 = (((CoxWLcen * T11) * dVdseffCV_dVd) + (Cbg1 * dVgsteff_dVd)) + (QovCox * dCoxeff_dVd);
+      Cbb1 = ((CoxWLcen * ((T11 * dVdseffCV_dVb) + (T12 * dAbulkCV_dVb))) + (Cbg1 * dVgsteff_dVb)) + (QovCox * dCoxeff_dVb);
+      Cbg1 = (Cbg1 * dVgsteff_dVg) + (QovCox * dCoxeff_dVg);
+      if (model->BSIM3v32xpart > 0.5)
+      {
+        qsrc = (-CoxWLcen) * (((T1 / 2.0) + (T0 / 4.0)) - (((0.5 * T0) * T0) / T2));
+        QovCox = qsrc / Coxeff;
+        T2 += T2;
+        T3 = T2 * T2;
+        T7 = -(0.25 - (((12.0 * T0) * ((4.0 * T1) - T0)) / T3));
+        T4 = (-(0.5 + (((24.0 * T0) * T0) / T3))) * (1.0 - dDeltaPhi_dVg);
+        T5 = T7 * AbulkCV;
+        T6 = T7 * VdseffCV;
+        Csg = CoxWLcen * (T4 + (T5 * dVdseffCV_dVg));
+        Csd = (((CoxWLcen * T5) * dVdseffCV_dVd) + (Csg * dVgsteff_dVd)) + (QovCox * dCoxeff_dVd);
+        Csb = ((CoxWLcen * ((T5 * dVdseffCV_dVb) + (T6 * dAbulkCV_dVb))) + (Csg * dVgsteff_dVb)) + (QovCox * dCoxeff_dVb);
+        Csg = (Csg * dVgsteff_dVg) + (QovCox * dCoxeff_dVg);
+      }
+      else
+        if (model->BSIM3v32xpart < 0.5)
+      {
+        T2 = T2 / 12.0;
+        T3 = (0.5 * CoxWLcen) / (T2 * T2);
+        T4 = (T1 * ((((2.0 * T0) * T0) / 3.0) + (T1 * (T1 - ((4.0 * T0) / 3.0))))) - ((((2.0 * T0) * T0) * T0) / 15.0);
+        qsrc = (-T3) * T4;
+        QovCox = qsrc / Coxeff;
+        T8 = (((4.0 / 3.0) * T1) * (T1 - T0)) + ((0.4 * T0) * T0);
+        T5 = (((-2.0) * qsrc) / T2) - (T3 * ((T1 * ((3.0 * T1) - ((8.0 * T0) / 3.0))) + (((2.0 * T0) * T0) / 3.0)));
+        T6 = AbulkCV * ((qsrc / T2) + (T3 * T8));
+        T7 = (T6 * VdseffCV) / AbulkCV;
+        Csg = (T5 * (1.0 - dDeltaPhi_dVg)) + (T6 * dVdseffCV_dVg);
+        Csd = ((Csg * dVgsteff_dVd) + (T6 * dVdseffCV_dVd)) + (QovCox * dCoxeff_dVd);
+        Csb = (((Csg * dVgsteff_dVb) + (T6 * dVdseffCV_dVb)) + (T7 * dAbulkCV_dVb)) + (QovCox * dCoxeff_dVb);
+        Csg = (Csg * dVgsteff_dVg) + (QovCox * dCoxeff_dVg);
+      }
+      else
+      {
+        qsrc = (-0.5) * qgate;
+        Csg = (-0.5) * Cgg1;
+        Csd = (-0.5) * Cgd1;
+        Csb = (-0.5) * Cgb1;
+      }
+
+
+      qgate += (Qac0 + Qsub0) - qbulk;
+      qbulk -= Qac0 + Qsub0;
+      qdrn = -((qgate + qbulk) + qsrc);
+      Cbg = (Cbg1 - dQac0_dVg) - dQsub0_dVg;
+      Cbd = Cbd1 - dQsub0_dVd;
+      Cbb = (Cbb1 - dQac0_dVb) - dQsub0_dVb;
+      Cgg = Cgg1 - Cbg;
+      Cgd = Cgd1 - Cbd;
+      Cgb = Cgb1 - Cbb;
+      Cgb *= dVbseff_dVb;
+      Cbb *= dVbseff_dVb;
+      Csb *= dVbseff_dVb;
+      {
+        heres[0]->BSIM3v32cggb = Cgg[0];
+        heres[1]->BSIM3v32cggb = Cgg[1];
+        heres[2]->BSIM3v32cggb = Cgg[2];
+        heres[3]->BSIM3v32cggb = Cgg[3];
+      }
+      {
+        Vec4d val = -((Cgg + Cgd) + Cgb);
+        heres[0]->BSIM3v32cgsb = val[0];
+        heres[1]->BSIM3v32cgsb = val[1];
+        heres[2]->BSIM3v32cgsb = val[2];
+        heres[3]->BSIM3v32cgsb = val[3];
+      }
+      {
+        heres[0]->BSIM3v32cgdb = Cgd[0];
+        heres[1]->BSIM3v32cgdb = Cgd[1];
+        heres[2]->BSIM3v32cgdb = Cgd[2];
+        heres[3]->BSIM3v32cgdb = Cgd[3];
+      }
+      {
+        Vec4d val = -((Cgg + Cbg) + Csg);
+        heres[0]->BSIM3v32cdgb = val[0];
+        heres[1]->BSIM3v32cdgb = val[1];
+        heres[2]->BSIM3v32cdgb = val[2];
+        heres[3]->BSIM3v32cdgb = val[3];
+      }
+      {
+        Vec4d val = (((((((Cgg + Cgd) + Cgb) + Cbg) + Cbd) + Cbb) + Csg) + Csd) + Csb;
+        heres[0]->BSIM3v32cdsb = val[0];
+        heres[1]->BSIM3v32cdsb = val[1];
+        heres[2]->BSIM3v32cdsb = val[2];
+        heres[3]->BSIM3v32cdsb = val[3];
+      }
+      {
+        Vec4d val = -((Cgd + Cbd) + Csd);
+        heres[0]->BSIM3v32cddb = val[0];
+        heres[1]->BSIM3v32cddb = val[1];
+        heres[2]->BSIM3v32cddb = val[2];
+        heres[3]->BSIM3v32cddb = val[3];
+      }
+      {
+        heres[0]->BSIM3v32cbgb = Cbg[0];
+        heres[1]->BSIM3v32cbgb = Cbg[1];
+        heres[2]->BSIM3v32cbgb = Cbg[2];
+        heres[3]->BSIM3v32cbgb = Cbg[3];
+      }
+      {
+        Vec4d val = -((Cbg + Cbd) + Cbb);
+        heres[0]->BSIM3v32cbsb = val[0];
+        heres[1]->BSIM3v32cbsb = val[1];
+        heres[2]->BSIM3v32cbsb = val[2];
+        heres[3]->BSIM3v32cbsb = val[3];
+      }
+      {
+        heres[0]->BSIM3v32cbdb = Cbd[0];
+        heres[1]->BSIM3v32cbdb = Cbd[1];
+        heres[2]->BSIM3v32cbdb = Cbd[2];
+        heres[3]->BSIM3v32cbdb = Cbd[3];
+      }
+      {
+        Vec4d val = -qinoi;
+        heres[0]->BSIM3v32qinv = val[0];
+        heres[1]->BSIM3v32qinv = val[1];
+        heres[2]->BSIM3v32qinv = val[2];
+        heres[3]->BSIM3v32qinv = val[3];
+      }
+    }
+
+
+
+  }
+
+
+  finished:
+  {
+    heres[0]->BSIM3v32qgate = qgate[0];
+    heres[1]->BSIM3v32qgate = qgate[1];
+    heres[2]->BSIM3v32qgate = qgate[2];
+    heres[3]->BSIM3v32qgate = qgate[3];
+  }
+
+  {
+    heres[0]->BSIM3v32qbulk = qbulk[0];
+    heres[1]->BSIM3v32qbulk = qbulk[1];
+    heres[2]->BSIM3v32qbulk = qbulk[2];
+    heres[3]->BSIM3v32qbulk = qbulk[3];
+  }
+  {
+    heres[0]->BSIM3v32qdrn = qdrn[0];
+    heres[1]->BSIM3v32qdrn = qdrn[1];
+    heres[2]->BSIM3v32qdrn = qdrn[2];
+    heres[3]->BSIM3v32qdrn = qdrn[3];
+  }
+  {
+    heres[0]->BSIM3v32cd = cdrain[0];
+    heres[1]->BSIM3v32cd = cdrain[1];
+    heres[2]->BSIM3v32cd = cdrain[2];
+    heres[3]->BSIM3v32cd = cdrain[3];
+  }
+  if (ChargeComputationNeeded)
+  {
+    Vec4d nstate_qbs = vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qbs, heres[1]->BSIM3v32qbs, heres[2]->BSIM3v32qbs, heres[3]->BSIM3v32qbs});
+    Vec4d nstate_qbd = vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qbd, heres[1]->BSIM3v32qbd, heres[2]->BSIM3v32qbd, heres[3]->BSIM3v32qbd});
+    if (model->BSIM3v32acmMod == 0)
+    {
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+          czbd = model->BSIM3v32unitAreaTempJctCap * ((Vec4d ){heres[0]->BSIM3v32drainArea, heres[1]->BSIM3v32drainArea, heres[2]->BSIM3v32drainArea, heres[3]->BSIM3v32drainArea});
+          czbs = model->BSIM3v32unitAreaTempJctCap * ((Vec4d ){heres[0]->BSIM3v32sourceArea, heres[1]->BSIM3v32sourceArea, heres[2]->BSIM3v32sourceArea, heres[3]->BSIM3v32sourceArea});
+          break;
+
+        case BSIM3v32V322:
+
+        case BSIM3v32V32:
+
+        default:
+          czbd = model->BSIM3v32unitAreaJctCap * ((Vec4d ){heres[0]->BSIM3v32drainArea, heres[1]->BSIM3v32drainArea, heres[2]->BSIM3v32drainArea, heres[3]->BSIM3v32drainArea});
+          czbs = model->BSIM3v32unitAreaJctCap * ((Vec4d ){heres[0]->BSIM3v32sourceArea, heres[1]->BSIM3v32sourceArea, heres[2]->BSIM3v32sourceArea, heres[3]->BSIM3v32sourceArea});
+
+      }
+
+      if (1)
+      {
+        Vec4m condmask0 = ((Vec4d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter}) < pParam->BSIM3v32weff;
+        Vec4m condmask_true0 = condmask0;
+        Vec4m condmask_false0 = ~condmask0;
+        {
+          czbdsw = vec4_blend(czbdsw, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+              czbdswg = vec4_blend(czbdswg, model->BSIM3v32unitLengthGateSidewallTempJctCap * ((Vec4d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter}), condmask_true0);
+              break;
+
+            case BSIM3v32V322:
+
+            case BSIM3v32V32:
+
+            default:
+              czbdswg = vec4_blend(czbdswg, model->BSIM3v32unitLengthGateSidewallJctCap * ((Vec4d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter}), condmask_true0);
+
+          }
+
+        }
+        {
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+              czbdsw = vec4_blend(czbdsw, model->BSIM3v32unitLengthSidewallTempJctCap * (((Vec4d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter}) - pParam->BSIM3v32weff), condmask_false0);
+              czbdswg = vec4_blend(czbdswg, vec4_SIMDTOVECTOR(model->BSIM3v32unitLengthGateSidewallTempJctCap * pParam->BSIM3v32weff), condmask_false0);
+              break;
+
+            case BSIM3v32V322:
+
+            case BSIM3v32V32:
+
+            default:
+              czbdsw = vec4_blend(czbdsw, model->BSIM3v32unitLengthSidewallJctCap * (((Vec4d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter}) - pParam->BSIM3v32weff), condmask_false0);
+              czbdswg = vec4_blend(czbdswg, vec4_SIMDTOVECTOR(model->BSIM3v32unitLengthGateSidewallJctCap * pParam->BSIM3v32weff), condmask_false0);
+
+          }
+
+        }
+      }
+
+      if (1)
+      {
+        Vec4m condmask0 = ((Vec4d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter}) < pParam->BSIM3v32weff;
+        Vec4m condmask_true0 = condmask0;
+        Vec4m condmask_false0 = ~condmask0;
+        {
+          czbssw = vec4_blend(czbssw, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+              czbsswg = vec4_blend(czbsswg, model->BSIM3v32unitLengthGateSidewallTempJctCap * ((Vec4d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter}), condmask_true0);
+              break;
+
+            case BSIM3v32V322:
+
+            case BSIM3v32V32:
+
+            default:
+              czbsswg = vec4_blend(czbsswg, model->BSIM3v32unitLengthGateSidewallJctCap * ((Vec4d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter}), condmask_true0);
+
+          }
+
+        }
+        {
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+              czbssw = vec4_blend(czbssw, model->BSIM3v32unitLengthSidewallTempJctCap * (((Vec4d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter}) - pParam->BSIM3v32weff), condmask_false0);
+              czbsswg = vec4_blend(czbsswg, vec4_SIMDTOVECTOR(model->BSIM3v32unitLengthGateSidewallTempJctCap * pParam->BSIM3v32weff), condmask_false0);
+              break;
+
+            case BSIM3v32V322:
+
+            case BSIM3v32V32:
+
+            default:
+              czbssw = vec4_blend(czbssw, model->BSIM3v32unitLengthSidewallJctCap * (((Vec4d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter}) - pParam->BSIM3v32weff), condmask_false0);
+              czbsswg = vec4_blend(czbsswg, vec4_SIMDTOVECTOR(model->BSIM3v32unitLengthGateSidewallJctCap * pParam->BSIM3v32weff), condmask_false0);
+
+          }
+
+        }
+      }
+
+    }
+    else
+    {
+      error = vec4_BSIM3v32_ACM_junctionCapacitances(model, heres, &czbd, &czbdsw, &czbdswg, &czbs, &czbssw, &czbsswg);
+      if (SIMDANY(error))
+        return error;
+
+    }
+
+    MJ = model->BSIM3v32bulkJctBotGradingCoeff;
+    MJSW = model->BSIM3v32bulkJctSideGradingCoeff;
+    MJSWG = model->BSIM3v32bulkJctGateSideGradingCoeff;
+    if (1)
+    {
+      Vec4m condmask0 = vbs == 0.0;
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        nstate_qbs = vec4_blend(nstate_qbs, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+        {
+          Vec4d val = (czbs + czbssw) + czbsswg;
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32capbs = val[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32capbs = val[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32capbs = val[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32capbs = val[3];
+
+        }
+      }
+      if (1)
+      {
+        Vec4m condmask1 = vbs < 0.0;
+        Vec4m condmask_true1 = condmask_false0 & condmask1;
+        Vec4m condmask_false1 = condmask_false0 & (~condmask1);
+        {
+          if (1)
+          {
+            Vec4m condmask2 = czbs > 0.0;
+            Vec4m condmask_true2 = condmask_true1 & condmask2;
+            Vec4m condmask_false2 = condmask_true1 & (~condmask2);
+            {
+              arg = vec4_blend(arg, 1.0 - (vbs / model->BSIM3v32PhiB), condmask_true2);
+              if (MJ == 0.5)
+                sarg = vec4_blend(sarg, 1.0 / vec4_sqrt(arg), condmask_true2);
+              else
+                sarg = vec4_blend(sarg, vec4_powMJ(arg, -MJ), condmask_true2);
+
+              nstate_qbs = vec4_blend(nstate_qbs, ((model->BSIM3v32PhiB * czbs) * (1.0 - (arg * sarg))) / (1.0 - MJ), condmask_true2);
+              {
+                Vec4d val = czbs * sarg;
+                if (condmask_true2[0])
+                  heres[0]->BSIM3v32capbs = val[0];
+
+                if (condmask_true2[1])
+                  heres[1]->BSIM3v32capbs = val[1];
+
+                if (condmask_true2[2])
+                  heres[2]->BSIM3v32capbs = val[2];
+
+                if (condmask_true2[3])
+                  heres[3]->BSIM3v32capbs = val[3];
+
+              }
+            }
+            {
+              nstate_qbs = vec4_blend(nstate_qbs, vec4_SIMDTOVECTOR(0.0), condmask_false2);
+              {
+                if (condmask_false2[0])
+                  heres[0]->BSIM3v32capbs = 0.0;
+
+                if (condmask_false2[1])
+                  heres[1]->BSIM3v32capbs = 0.0;
+
+                if (condmask_false2[2])
+                  heres[2]->BSIM3v32capbs = 0.0;
+
+                if (condmask_false2[3])
+                  heres[3]->BSIM3v32capbs = 0.0;
+
+              }
+            }
+          }
+
+          if (1)
+          {
+            Vec4m condmask2 = czbssw > 0.0;
+            Vec4m condmask_true2 = condmask_true1 & condmask2;
+            {
+              arg = vec4_blend(arg, 1.0 - (vbs / model->BSIM3v32PhiBSW), condmask_true2);
+              if (MJSW == 0.5)
+                sarg = vec4_blend(sarg, 1.0 / vec4_sqrt(arg), condmask_true2);
+              else
+                sarg = vec4_blend(sarg, vec4_powMJSW(arg, -MJSW), condmask_true2);
+
+              nstate_qbs = vec4_blend(nstate_qbs, nstate_qbs + (((model->BSIM3v32PhiBSW * czbssw) * (1.0 - (arg * sarg))) / (1.0 - MJSW)), condmask_true2);
+              {
+                Vec4d val = czbssw * sarg;
+                if (condmask_true2[0])
+                  heres[0]->BSIM3v32capbs += val[0];
+
+                if (condmask_true2[1])
+                  heres[1]->BSIM3v32capbs += val[1];
+
+                if (condmask_true2[2])
+                  heres[2]->BSIM3v32capbs += val[2];
+
+                if (condmask_true2[3])
+                  heres[3]->BSIM3v32capbs += val[3];
+
+              }
+            }
+          }
+
+          if (1)
+          {
+            Vec4m condmask2 = czbsswg > 0.0;
+            Vec4m condmask_true2 = condmask_true1 & condmask2;
+            {
+              arg = vec4_blend(arg, 1.0 - (vbs / model->BSIM3v32PhiBSWG), condmask_true2);
+              if (MJSWG == 0.5)
+                sarg = vec4_blend(sarg, 1.0 / vec4_sqrt(arg), condmask_true2);
+              else
+                sarg = vec4_blend(sarg, vec4_powMJSWG(arg, -MJSWG), condmask_true2);
+
+              nstate_qbs = vec4_blend(nstate_qbs, nstate_qbs + (((model->BSIM3v32PhiBSWG * czbsswg) * (1.0 - (arg * sarg))) / (1.0 - MJSWG)), condmask_true2);
+              {
+                Vec4d val = czbsswg * sarg;
+                if (condmask_true2[0])
+                  heres[0]->BSIM3v32capbs += val[0];
+
+                if (condmask_true2[1])
+                  heres[1]->BSIM3v32capbs += val[1];
+
+                if (condmask_true2[2])
+                  heres[2]->BSIM3v32capbs += val[2];
+
+                if (condmask_true2[3])
+                  heres[3]->BSIM3v32capbs += val[3];
+
+              }
+            }
+          }
+
+        }
+        {
+          T0 = vec4_blend(T0, (czbs + czbssw) + czbsswg, condmask_false1);
+          T1 = vec4_blend(T1, vbs * ((((czbs * MJ) / model->BSIM3v32PhiB) + ((czbssw * MJSW) / model->BSIM3v32PhiBSW)) + ((czbsswg * MJSWG) / model->BSIM3v32PhiBSWG)), condmask_false1);
+          nstate_qbs = vec4_blend(nstate_qbs, vbs * (T0 + (0.5 * T1)), condmask_false1);
+          {
+            Vec4d val = T0 + T1;
+            if (condmask_false1[0])
+              heres[0]->BSIM3v32capbs = val[0];
+
+            if (condmask_false1[1])
+              heres[1]->BSIM3v32capbs = val[1];
+
+            if (condmask_false1[2])
+              heres[2]->BSIM3v32capbs = val[2];
+
+            if (condmask_false1[3])
+              heres[3]->BSIM3v32capbs = val[3];
+
+          }
+        }
+      }
+
+    }
+
+    vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qbs, heres[1]->BSIM3v32qbs, heres[2]->BSIM3v32qbs, heres[3]->BSIM3v32qbs}, nstate_qbs);
+    if (1)
+    {
+      Vec4m condmask0 = vbd == 0.0;
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        nstate_qbd = vec4_blend(nstate_qbd, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+        {
+          Vec4d val = (czbd + czbdsw) + czbdswg;
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32capbd = val[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32capbd = val[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32capbd = val[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32capbd = val[3];
+
+        }
+      }
+      if (1)
+      {
+        Vec4m condmask1 = vbd < 0.0;
+        Vec4m condmask_true1 = condmask_false0 & condmask1;
+        Vec4m condmask_false1 = condmask_false0 & (~condmask1);
+        {
+          if (1)
+          {
+            Vec4m condmask2 = czbd > 0.0;
+            Vec4m condmask_true2 = condmask_true1 & condmask2;
+            Vec4m condmask_false2 = condmask_true1 & (~condmask2);
+            {
+              arg = vec4_blend(arg, 1.0 - (vbd / model->BSIM3v32PhiB), condmask_true2);
+              if (MJ == 0.5)
+                sarg = vec4_blend(sarg, 1.0 / vec4_sqrt(arg), condmask_true2);
+              else
+                sarg = vec4_blend(sarg, vec4_powMJ(arg, -MJ), condmask_true2);
+
+              nstate_qbd = vec4_blend(nstate_qbd, ((model->BSIM3v32PhiB * czbd) * (1.0 - (arg * sarg))) / (1.0 - MJ), condmask_true2);
+              {
+                Vec4d val = czbd * sarg;
+                if (condmask_true2[0])
+                  heres[0]->BSIM3v32capbd = val[0];
+
+                if (condmask_true2[1])
+                  heres[1]->BSIM3v32capbd = val[1];
+
+                if (condmask_true2[2])
+                  heres[2]->BSIM3v32capbd = val[2];
+
+                if (condmask_true2[3])
+                  heres[3]->BSIM3v32capbd = val[3];
+
+              }
+            }
+            {
+              nstate_qbd = vec4_blend(nstate_qbd, vec4_SIMDTOVECTOR(0.0), condmask_false2);
+              {
+                if (condmask_false2[0])
+                  heres[0]->BSIM3v32capbd = 0.0;
+
+                if (condmask_false2[1])
+                  heres[1]->BSIM3v32capbd = 0.0;
+
+                if (condmask_false2[2])
+                  heres[2]->BSIM3v32capbd = 0.0;
+
+                if (condmask_false2[3])
+                  heres[3]->BSIM3v32capbd = 0.0;
+
+              }
+            }
+          }
+
+          if (1)
+          {
+            Vec4m condmask2 = czbdsw > 0.0;
+            Vec4m condmask_true2 = condmask_true1 & condmask2;
+            {
+              arg = vec4_blend(arg, 1.0 - (vbd / model->BSIM3v32PhiBSW), condmask_true2);
+              if (MJSW == 0.5)
+                sarg = vec4_blend(sarg, 1.0 / vec4_sqrt(arg), condmask_true2);
+              else
+                sarg = vec4_blend(sarg, vec4_powMJSW(arg, -MJSW), condmask_true2);
+
+              nstate_qbd = vec4_blend(nstate_qbd, nstate_qbd + (((model->BSIM3v32PhiBSW * czbdsw) * (1.0 - (arg * sarg))) / (1.0 - MJSW)), condmask_true2);
+              {
+                Vec4d val = czbdsw * sarg;
+                if (condmask_true2[0])
+                  heres[0]->BSIM3v32capbd += val[0];
+
+                if (condmask_true2[1])
+                  heres[1]->BSIM3v32capbd += val[1];
+
+                if (condmask_true2[2])
+                  heres[2]->BSIM3v32capbd += val[2];
+
+                if (condmask_true2[3])
+                  heres[3]->BSIM3v32capbd += val[3];
+
+              }
+            }
+          }
+
+          if (1)
+          {
+            Vec4m condmask2 = czbdswg > 0.0;
+            Vec4m condmask_true2 = condmask_true1 & condmask2;
+            {
+              arg = vec4_blend(arg, 1.0 - (vbd / model->BSIM3v32PhiBSWG), condmask_true2);
+              if (MJSWG == 0.5)
+                sarg = vec4_blend(sarg, 1.0 / vec4_sqrt(arg), condmask_true2);
+              else
+                sarg = vec4_blend(sarg, vec4_powMJSWG(arg, -MJSWG), condmask_true2);
+
+              nstate_qbd = vec4_blend(nstate_qbd, nstate_qbd + (((model->BSIM3v32PhiBSWG * czbdswg) * (1.0 - (arg * sarg))) / (1.0 - MJSWG)), condmask_true2);
+              {
+                Vec4d val = czbdswg * sarg;
+                if (condmask_true2[0])
+                  heres[0]->BSIM3v32capbd += val[0];
+
+                if (condmask_true2[1])
+                  heres[1]->BSIM3v32capbd += val[1];
+
+                if (condmask_true2[2])
+                  heres[2]->BSIM3v32capbd += val[2];
+
+                if (condmask_true2[3])
+                  heres[3]->BSIM3v32capbd += val[3];
+
+              }
+            }
+          }
+
+        }
+        {
+          T0 = vec4_blend(T0, (czbd + czbdsw) + czbdswg, condmask_false1);
+          T1 = vec4_blend(T1, vbd * ((((czbd * MJ) / model->BSIM3v32PhiB) + ((czbdsw * MJSW) / model->BSIM3v32PhiBSW)) + ((czbdswg * MJSWG) / model->BSIM3v32PhiBSWG)), condmask_false1);
+          nstate_qbd = vec4_blend(nstate_qbd, vbd * (T0 + (0.5 * T1)), condmask_false1);
+          {
+            Vec4d val = T0 + T1;
+            if (condmask_false1[0])
+              heres[0]->BSIM3v32capbd = val[0];
+
+            if (condmask_false1[1])
+              heres[1]->BSIM3v32capbd = val[1];
+
+            if (condmask_false1[2])
+              heres[2]->BSIM3v32capbd = val[2];
+
+            if (condmask_false1[3])
+              heres[3]->BSIM3v32capbd = val[3];
+
+          }
+        }
+      }
+
+    }
+
+    vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qbd, heres[1]->BSIM3v32qbd, heres[2]->BSIM3v32qbd, heres[3]->BSIM3v32qbd}, nstate_qbd);
+  }
+
+  if ((heres[0]->BSIM3v32off == 0) || (!(ckt->CKTmode & MODEINITFIX)))
+  {
+    Vec4m nonconcount;
+    nonconcount = Check;
+    nonconcount = nonconcount & 1;
+    {
+      heres[0]->BSIM3v32noncon = nonconcount[0];
+      heres[1]->BSIM3v32noncon = nonconcount[1];
+      heres[2]->BSIM3v32noncon = nonconcount[2];
+      heres[3]->BSIM3v32noncon = nonconcount[3];
+    }
+  }
+  else
+  {
+    heres[0]->BSIM3v32noncon = 0;
+    heres[1]->BSIM3v32noncon = 0;
+    heres[2]->BSIM3v32noncon = 0;
+    heres[3]->BSIM3v32noncon = 0;
+  }
+
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32vbs, heres[1]->BSIM3v32vbs, heres[2]->BSIM3v32vbs, heres[3]->BSIM3v32vbs}, vbs);
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32vbd, heres[1]->BSIM3v32vbd, heres[2]->BSIM3v32vbd, heres[3]->BSIM3v32vbd}, vbd);
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32vgs, heres[1]->BSIM3v32vgs, heres[2]->BSIM3v32vgs, heres[3]->BSIM3v32vgs}, vgs);
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32vds, heres[1]->BSIM3v32vds, heres[2]->BSIM3v32vds, heres[3]->BSIM3v32vds}, vds);
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qdef, heres[1]->BSIM3v32qdef, heres[2]->BSIM3v32qdef, heres[3]->BSIM3v32qdef}, qdef);
+  if (!ChargeComputationNeeded)
+    goto line850;
+
+  line755:
+  if (heres[0]->BSIM3v32nqsMod)
+  {
+    qcheq = -(qbulk + qgate);
+    {
+      Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + ((Vec4d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb}));
+      heres[0]->BSIM3v32cqgb = val[0];
+      heres[1]->BSIM3v32cqgb = val[1];
+      heres[2]->BSIM3v32cqgb = val[2];
+      heres[3]->BSIM3v32cqgb = val[3];
+    }
+    {
+      Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}) + ((Vec4d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb}));
+      heres[0]->BSIM3v32cqdb = val[0];
+      heres[1]->BSIM3v32cqdb = val[1];
+      heres[2]->BSIM3v32cqdb = val[2];
+      heres[3]->BSIM3v32cqdb = val[3];
+    }
+    {
+      Vec4d val = -(((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb}) + ((Vec4d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb}));
+      heres[0]->BSIM3v32cqsb = val[0];
+      heres[1]->BSIM3v32cqsb = val[1];
+      heres[2]->BSIM3v32cqsb = val[2];
+      heres[3]->BSIM3v32cqsb = val[3];
+    }
+    {
+      Vec4d val = -((((Vec4d ){heres[0]->BSIM3v32cqgb, heres[1]->BSIM3v32cqgb, heres[2]->BSIM3v32cqgb, heres[3]->BSIM3v32cqgb}) + ((Vec4d ){heres[0]->BSIM3v32cqdb, heres[1]->BSIM3v32cqdb, heres[2]->BSIM3v32cqdb, heres[3]->BSIM3v32cqdb})) + ((Vec4d ){heres[0]->BSIM3v32cqsb, heres[1]->BSIM3v32cqsb, heres[2]->BSIM3v32cqsb, heres[3]->BSIM3v32cqsb}));
+      heres[0]->BSIM3v32cqbb = val[0];
+      heres[1]->BSIM3v32cqbb = val[1];
+      heres[2]->BSIM3v32cqbb = val[2];
+      heres[3]->BSIM3v32cqbb = val[3];
+    }
+    gtau_drift = vec4_fabs(((Vec4d ){heres[0]->BSIM3v32tconst, heres[1]->BSIM3v32tconst, heres[2]->BSIM3v32tconst, heres[3]->BSIM3v32tconst}) * qcheq) * ScalingFactor;
+    T0 = vec4_SIMDTOVECTOR(pParam->BSIM3v32leffCV * pParam->BSIM3v32leffCV);
+    gtau_diff = (((16.0 * ((Vec4d ){heres[0]->BSIM3v32u0temp, heres[1]->BSIM3v32u0temp, heres[2]->BSIM3v32u0temp, heres[3]->BSIM3v32u0temp})) * model->BSIM3v32vtm) / T0) * ScalingFactor;
+    {
+      Vec4d val = gtau_drift + gtau_diff;
+      heres[0]->BSIM3v32gtau = val[0];
+      heres[1]->BSIM3v32gtau = val[1];
+      heres[2]->BSIM3v32gtau = val[2];
+      heres[3]->BSIM3v32gtau = val[3];
+    }
+  }
+
+
+  if (model->BSIM3v32capMod == 0)
+  {
+    cgdo = vec4_SIMDTOVECTOR(pParam->BSIM3v32cgdo);
+    qgdo = pParam->BSIM3v32cgdo * vgd;
+    cgso = vec4_SIMDTOVECTOR(pParam->BSIM3v32cgso);
+    qgso = pParam->BSIM3v32cgso * vgs;
+  }
+  else
+    if (model->BSIM3v32capMod == 1)
+  {
+    if (1)
+    {
+      Vec4m condmask0 = vgd < 0.0;
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        T1 = vec4_blend(T1, vec4_sqrt(1.0 - ((4.0 * vgd) / pParam->BSIM3v32ckappa)), condmask_true0);
+        cgdo = vec4_blend(cgdo, pParam->BSIM3v32cgdo + ((pParam->BSIM3v32weffCV * pParam->BSIM3v32cgdl) / T1), condmask_true0);
+        qgdo = vec4_blend(qgdo, (pParam->BSIM3v32cgdo * vgd) - ((((pParam->BSIM3v32weffCV * 0.5) * pParam->BSIM3v32cgdl) * pParam->BSIM3v32ckappa) * (T1 - 1.0)), condmask_true0);
+      }
+      {
+        cgdo = vec4_blend(cgdo, vec4_SIMDTOVECTOR(pParam->BSIM3v32cgdo + (pParam->BSIM3v32weffCV * pParam->BSIM3v32cgdl)), condmask_false0);
+        qgdo = vec4_blend(qgdo, ((pParam->BSIM3v32weffCV * pParam->BSIM3v32cgdl) + pParam->BSIM3v32cgdo) * vgd, condmask_false0);
+      }
+    }
+
+    if (1)
+    {
+      Vec4m condmask0 = vgs < 0.0;
+      Vec4m condmask_true0 = condmask0;
+      Vec4m condmask_false0 = ~condmask0;
+      {
+        T1 = vec4_blend(T1, vec4_sqrt(1.0 - ((4.0 * vgs) / pParam->BSIM3v32ckappa)), condmask_true0);
+        cgso = vec4_blend(cgso, pParam->BSIM3v32cgso + ((pParam->BSIM3v32weffCV * pParam->BSIM3v32cgsl) / T1), condmask_true0);
+        qgso = vec4_blend(qgso, (pParam->BSIM3v32cgso * vgs) - ((((pParam->BSIM3v32weffCV * 0.5) * pParam->BSIM3v32cgsl) * pParam->BSIM3v32ckappa) * (T1 - 1.0)), condmask_true0);
+      }
+      {
+        cgso = vec4_blend(cgso, vec4_SIMDTOVECTOR(pParam->BSIM3v32cgso + (pParam->BSIM3v32weffCV * pParam->BSIM3v32cgsl)), condmask_false0);
+        qgso = vec4_blend(qgso, ((pParam->BSIM3v32weffCV * pParam->BSIM3v32cgsl) + pParam->BSIM3v32cgso) * vgs, condmask_false0);
+      }
+    }
+
+  }
+  else
+  {
+    T0 = vgd + DELTA_1;
+    T1 = vec4_sqrt((T0 * T0) + (4.0 * DELTA_1));
+    T2 = 0.5 * (T0 - T1);
+    T3 = vec4_SIMDTOVECTOR(pParam->BSIM3v32weffCV * pParam->BSIM3v32cgdl);
+    T4 = vec4_sqrt(1.0 - ((4.0 * T2) / pParam->BSIM3v32ckappa));
+    cgdo = (pParam->BSIM3v32cgdo + T3) - ((T3 * (1.0 - (1.0 / T4))) * (0.5 - ((0.5 * T0) / T1)));
+    qgdo = ((pParam->BSIM3v32cgdo + T3) * vgd) - (T3 * (T2 + ((0.5 * pParam->BSIM3v32ckappa) * (T4 - 1.0))));
+    T0 = vgs + DELTA_1;
+    T1 = vec4_sqrt((T0 * T0) + (4.0 * DELTA_1));
+    T2 = 0.5 * (T0 - T1);
+    T3 = vec4_SIMDTOVECTOR(pParam->BSIM3v32weffCV * pParam->BSIM3v32cgsl);
+    T4 = vec4_sqrt(1.0 - ((4.0 * T2) / pParam->BSIM3v32ckappa));
+    cgso = (pParam->BSIM3v32cgso + T3) - ((T3 * (1.0 - (1.0 / T4))) * (0.5 - ((0.5 * T0) / T1)));
+    qgso = ((pParam->BSIM3v32cgso + T3) * vgs) - (T3 * (T2 + ((0.5 * pParam->BSIM3v32ckappa) * (T4 - 1.0))));
+  }
+
+
+  {
+    heres[0]->BSIM3v32cgdo = cgdo[0];
+    heres[1]->BSIM3v32cgdo = cgdo[1];
+    heres[2]->BSIM3v32cgdo = cgdo[2];
+    heres[3]->BSIM3v32cgdo = cgdo[3];
+  }
+  {
+    heres[0]->BSIM3v32cgso = cgso[0];
+    heres[1]->BSIM3v32cgso = cgso[1];
+    heres[2]->BSIM3v32cgso = cgso[2];
+    heres[3]->BSIM3v32cgso = cgso[3];
+  }
+  ag0 = ckt->CKTag[0];
+  ddxpart_dVd = (ddxpart_dVg = (ddxpart_dVb = (ddxpart_dVs = vec4_SIMDTOVECTOR(0.0))));
+  dsxpart_dVd = (dsxpart_dVg = (dsxpart_dVb = (dsxpart_dVs = vec4_SIMDTOVECTOR(0.0))));
+  ggtg = (ggtd = (ggtb = (ggts = vec4_SIMDTOVECTOR(0.0))));
+  if (1)
+  {
+    Vec4m condmask0 = BSIM3v32mode;
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      if (heres[0]->BSIM3v32nqsMod == 0)
+      {
+        gcggb = vec4_blend(gcggb, (((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + cgdo) + cgso) + pParam->BSIM3v32cgbo) * ag0, condmask_true0);
+        gcgdb = vec4_blend(gcgdb, (((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}) - cgdo) * ag0, condmask_true0);
+        gcgsb = vec4_blend(gcgsb, (((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb}) - cgso) * ag0, condmask_true0);
+        gcdgb = vec4_blend(gcdgb, (((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb}) - cgdo) * ag0, condmask_true0);
+        gcddb = vec4_blend(gcddb, ((((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb}) + ((Vec4d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd})) + cgdo) * ag0, condmask_true0);
+        gcdsb = vec4_blend(gcdsb, ((Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb}) * ag0, condmask_true0);
+        gcsgb = vec4_blend(gcsgb, (-(((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + ((Vec4d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb})) + ((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb})) + cgso)) * ag0, condmask_true0);
+        gcsdb = vec4_blend(gcsdb, (-((((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}) + ((Vec4d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb})) + ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb}))) * ag0, condmask_true0);
+        gcssb = vec4_blend(gcssb, ((((Vec4d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs}) + cgso) - ((((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb}) + ((Vec4d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb})) + ((Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb}))) * ag0, condmask_true0);
+        gcbgb = vec4_blend(gcbgb, (((Vec4d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb}) - pParam->BSIM3v32cgbo) * ag0, condmask_true0);
+        gcbdb = vec4_blend(gcbdb, (((Vec4d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb}) - ((Vec4d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd})) * ag0, condmask_true0);
+        gcbsb = vec4_blend(gcbsb, (((Vec4d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb}) - ((Vec4d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs})) * ag0, condmask_true0);
+        qgd = vec4_blend(qgd, qgdo, condmask_true0);
+        qgs = vec4_blend(qgs, qgso, condmask_true0);
+        qgb = vec4_blend(qgb, pParam->BSIM3v32cgbo * vgb, condmask_true0);
+        qgate = vec4_blend(qgate, qgate + ((qgd + qgs) + qgb), condmask_true0);
+        qbulk = vec4_blend(qbulk, qbulk - qgb, condmask_true0);
+        qdrn = vec4_blend(qdrn, qdrn - qgd, condmask_true0);
+        qsrc = vec4_blend(qsrc, -((qgate + qbulk) + qdrn), condmask_true0);
+        sxpart = vec4_blend(sxpart, vec4_SIMDTOVECTOR(0.6), condmask_true0);
+        dxpart = vec4_blend(dxpart, vec4_SIMDTOVECTOR(0.4), condmask_true0);
+      }
+      else
+      {
+        if (1)
+        {
+          Vec4m condmask1 = qcheq > 0.0;
+          Vec4m condmask_true1 = condmask_true0 & condmask1;
+          Vec4m condmask_false1 = condmask_true0 & (~condmask1);
+          T0 = vec4_blend(T0, (((Vec4d ){heres[0]->BSIM3v32tconst, heres[1]->BSIM3v32tconst, heres[2]->BSIM3v32tconst, heres[3]->BSIM3v32tconst}) * qdef) * ScalingFactor, condmask_true1);
+          T0 = vec4_blend(T0, ((-((Vec4d ){heres[0]->BSIM3v32tconst, heres[1]->BSIM3v32tconst, heres[2]->BSIM3v32tconst, heres[3]->BSIM3v32tconst})) * qdef) * ScalingFactor, condmask_false1);
+        }
+
+        ggtg = vec4_blend(ggtg, T0 * ((Vec4d ){heres[0]->BSIM3v32cqgb, heres[1]->BSIM3v32cqgb, heres[2]->BSIM3v32cqgb, heres[3]->BSIM3v32cqgb}), condmask_true0);
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32gtg = ggtg[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32gtg = ggtg[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32gtg = ggtg[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32gtg = ggtg[3];
+
+        }
+        ggtd = vec4_blend(ggtd, T0 * ((Vec4d ){heres[0]->BSIM3v32cqdb, heres[1]->BSIM3v32cqdb, heres[2]->BSIM3v32cqdb, heres[3]->BSIM3v32cqdb}), condmask_true0);
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32gtd = ggtd[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32gtd = ggtd[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32gtd = ggtd[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32gtd = ggtd[3];
+
+        }
+        ggts = vec4_blend(ggts, T0 * ((Vec4d ){heres[0]->BSIM3v32cqsb, heres[1]->BSIM3v32cqsb, heres[2]->BSIM3v32cqsb, heres[3]->BSIM3v32cqsb}), condmask_true0);
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32gts = ggts[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32gts = ggts[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32gts = ggts[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32gts = ggts[3];
+
+        }
+        ggtb = vec4_blend(ggtb, T0 * ((Vec4d ){heres[0]->BSIM3v32cqbb, heres[1]->BSIM3v32cqbb, heres[2]->BSIM3v32cqbb, heres[3]->BSIM3v32cqbb}), condmask_true0);
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32gtb = ggtb[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32gtb = ggtb[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32gtb = ggtb[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32gtb = ggtb[3];
+
+        }
+        gqdef = vec4_blend(gqdef, vec4_SIMDTOVECTOR(ScalingFactor * ag0), condmask_true0);
+        gcqgb = vec4_blend(gcqgb, ((Vec4d ){heres[0]->BSIM3v32cqgb, heres[1]->BSIM3v32cqgb, heres[2]->BSIM3v32cqgb, heres[3]->BSIM3v32cqgb}) * ag0, condmask_true0);
+        gcqdb = vec4_blend(gcqdb, ((Vec4d ){heres[0]->BSIM3v32cqdb, heres[1]->BSIM3v32cqdb, heres[2]->BSIM3v32cqdb, heres[3]->BSIM3v32cqdb}) * ag0, condmask_true0);
+        gcqsb = vec4_blend(gcqsb, ((Vec4d ){heres[0]->BSIM3v32cqsb, heres[1]->BSIM3v32cqsb, heres[2]->BSIM3v32cqsb, heres[3]->BSIM3v32cqsb}) * ag0, condmask_true0);
+        gcqbb = vec4_blend(gcqbb, ((Vec4d ){heres[0]->BSIM3v32cqbb, heres[1]->BSIM3v32cqbb, heres[2]->BSIM3v32cqbb, heres[3]->BSIM3v32cqbb}) * ag0, condmask_true0);
+        gcggb = vec4_blend(gcggb, ((cgdo + cgso) + pParam->BSIM3v32cgbo) * ag0, condmask_true0);
+        gcgdb = vec4_blend(gcgdb, (-cgdo) * ag0, condmask_true0);
+        gcgsb = vec4_blend(gcgsb, (-cgso) * ag0, condmask_true0);
+        gcdgb = vec4_blend(gcdgb, (-cgdo) * ag0, condmask_true0);
+        gcddb = vec4_blend(gcddb, (((Vec4d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd}) + cgdo) * ag0, condmask_true0);
+        gcdsb = vec4_blend(gcdsb, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+        gcsgb = vec4_blend(gcsgb, (-cgso) * ag0, condmask_true0);
+        gcsdb = vec4_blend(gcsdb, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+        gcssb = vec4_blend(gcssb, (((Vec4d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs}) + cgso) * ag0, condmask_true0);
+        gcbgb = vec4_blend(gcbgb, vec4_SIMDTOVECTOR((-pParam->BSIM3v32cgbo) * ag0), condmask_true0);
+        gcbdb = vec4_blend(gcbdb, (-((Vec4d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd})) * ag0, condmask_true0);
+        gcbsb = vec4_blend(gcbsb, (-((Vec4d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs})) * ag0, condmask_true0);
+        if (1)
+        {
+          Vec4m condmask1 = vec4_fabs(qcheq) <= (1.0e-5 * CoxWL);
+          Vec4m condmask_true1 = condmask_true0 & condmask1;
+          Vec4m condmask_false1 = condmask_true0 & (~condmask1);
+          {
+            if (model->BSIM3v32xpart < 0.5)
+            {
+              dxpart = vec4_blend(dxpart, vec4_SIMDTOVECTOR(0.4), condmask_true1);
+            }
+            else
+              if (model->BSIM3v32xpart > 0.5)
+            {
+              dxpart = vec4_blend(dxpart, vec4_SIMDTOVECTOR(0.0), condmask_true1);
+            }
+            else
+            {
+              dxpart = vec4_blend(dxpart, vec4_SIMDTOVECTOR(0.5), condmask_true1);
+            }
+
+
+          }
+          {
+            dxpart = vec4_blend(dxpart, qdrn / qcheq, condmask_false1);
+            Cdd = vec4_blend(Cdd, (Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb}, condmask_false1);
+            Csd = vec4_blend(Csd, -((((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}) + ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb})) + ((Vec4d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb})), condmask_false1);
+            ddxpart_dVd = vec4_blend(ddxpart_dVd, (Cdd - (dxpart * (Cdd + Csd))) / qcheq, condmask_false1);
+            Cdg = vec4_blend(Cdg, (Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb}, condmask_false1);
+            Csg = vec4_blend(Csg, -((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + ((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb})) + ((Vec4d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb})), condmask_false1);
+            ddxpart_dVg = vec4_blend(ddxpart_dVg, (Cdg - (dxpart * (Cdg + Csg))) / qcheq, condmask_false1);
+            Cds = vec4_blend(Cds, (Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb}, condmask_false1);
+            Css = vec4_blend(Css, -((((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb}) + ((Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb})) + ((Vec4d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb})), condmask_false1);
+            ddxpart_dVs = vec4_blend(ddxpart_dVs, (Cds - (dxpart * (Cds + Css))) / qcheq, condmask_false1);
+            ddxpart_dVb = vec4_blend(ddxpart_dVb, -((ddxpart_dVd + ddxpart_dVg) + ddxpart_dVs), condmask_false1);
+          }
+        }
+
+        sxpart = vec4_blend(sxpart, 1.0 - dxpart, condmask_true0);
+        dsxpart_dVd = vec4_blend(dsxpart_dVd, -ddxpart_dVd, condmask_true0);
+        dsxpart_dVg = vec4_blend(dsxpart_dVg, -ddxpart_dVg, condmask_true0);
+        dsxpart_dVs = vec4_blend(dsxpart_dVs, -ddxpart_dVs, condmask_true0);
+        dsxpart_dVb = vec4_blend(dsxpart_dVb, -((dsxpart_dVd + dsxpart_dVg) + dsxpart_dVs), condmask_true0);
+        qgd = vec4_blend(qgd, qgdo, condmask_true0);
+        qgs = vec4_blend(qgs, qgso, condmask_true0);
+        qgb = vec4_blend(qgb, pParam->BSIM3v32cgbo * vgb, condmask_true0);
+        qgate = vec4_blend(qgate, (qgd + qgs) + qgb, condmask_true0);
+        qbulk = vec4_blend(qbulk, -qgb, condmask_true0);
+        qdrn = vec4_blend(qdrn, -qgd, condmask_true0);
+        qsrc = vec4_blend(qsrc, -((qgate + qbulk) + qdrn), condmask_true0);
+      }
+
+    }
+    {
+      if (heres[0]->BSIM3v32nqsMod == 0)
+      {
+        gcggb = vec4_blend(gcggb, (((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + cgdo) + cgso) + pParam->BSIM3v32cgbo) * ag0, condmask_false0);
+        gcgdb = vec4_blend(gcgdb, (((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb}) - cgdo) * ag0, condmask_false0);
+        gcgsb = vec4_blend(gcgsb, (((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}) - cgso) * ag0, condmask_false0);
+        gcdgb = vec4_blend(gcdgb, (-(((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + ((Vec4d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb})) + ((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb})) + cgdo)) * ag0, condmask_false0);
+        gcddb = vec4_blend(gcddb, ((((Vec4d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd}) + cgdo) - ((((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb}) + ((Vec4d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb})) + ((Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb}))) * ag0, condmask_false0);
+        gcdsb = vec4_blend(gcdsb, (-((((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}) + ((Vec4d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb})) + ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb}))) * ag0, condmask_false0);
+        gcsgb = vec4_blend(gcsgb, (((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb}) - cgso) * ag0, condmask_false0);
+        gcsdb = vec4_blend(gcsdb, ((Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb}) * ag0, condmask_false0);
+        gcssb = vec4_blend(gcssb, ((((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb}) + ((Vec4d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs})) + cgso) * ag0, condmask_false0);
+        gcbgb = vec4_blend(gcbgb, (((Vec4d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb}) - pParam->BSIM3v32cgbo) * ag0, condmask_false0);
+        gcbdb = vec4_blend(gcbdb, (((Vec4d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb}) - ((Vec4d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd})) * ag0, condmask_false0);
+        gcbsb = vec4_blend(gcbsb, (((Vec4d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb}) - ((Vec4d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs})) * ag0, condmask_false0);
+        qgd = vec4_blend(qgd, qgdo, condmask_false0);
+        qgs = vec4_blend(qgs, qgso, condmask_false0);
+        qgb = vec4_blend(qgb, pParam->BSIM3v32cgbo * vgb, condmask_false0);
+        qgate = vec4_blend(qgate, qgate + ((qgd + qgs) + qgb), condmask_false0);
+        qbulk = vec4_blend(qbulk, qbulk - qgb, condmask_false0);
+        qsrc = vec4_blend(qsrc, qdrn - qgs, condmask_false0);
+        qdrn = vec4_blend(qdrn, -((qgate + qbulk) + qsrc), condmask_false0);
+        sxpart = vec4_blend(sxpart, vec4_SIMDTOVECTOR(0.4), condmask_false0);
+        dxpart = vec4_blend(dxpart, vec4_SIMDTOVECTOR(0.6), condmask_false0);
+      }
+      else
+      {
+        if (1)
+        {
+          Vec4m condmask1 = qcheq > 0.0;
+          Vec4m condmask_true1 = condmask_false0 & condmask1;
+          Vec4m condmask_false1 = condmask_false0 & (~condmask1);
+          T0 = vec4_blend(T0, (((Vec4d ){heres[0]->BSIM3v32tconst, heres[1]->BSIM3v32tconst, heres[2]->BSIM3v32tconst, heres[3]->BSIM3v32tconst}) * qdef) * ScalingFactor, condmask_true1);
+          T0 = vec4_blend(T0, ((-((Vec4d ){heres[0]->BSIM3v32tconst, heres[1]->BSIM3v32tconst, heres[2]->BSIM3v32tconst, heres[3]->BSIM3v32tconst})) * qdef) * ScalingFactor, condmask_false1);
+        }
+
+        ggtg = vec4_blend(ggtg, T0 * ((Vec4d ){heres[0]->BSIM3v32cqgb, heres[1]->BSIM3v32cqgb, heres[2]->BSIM3v32cqgb, heres[3]->BSIM3v32cqgb}), condmask_false0);
+        {
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32gtg = ggtg[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32gtg = ggtg[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32gtg = ggtg[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32gtg = ggtg[3];
+
+        }
+        ggts = vec4_blend(ggts, T0 * ((Vec4d ){heres[0]->BSIM3v32cqdb, heres[1]->BSIM3v32cqdb, heres[2]->BSIM3v32cqdb, heres[3]->BSIM3v32cqdb}), condmask_false0);
+        {
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32gtd = ggts[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32gtd = ggts[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32gtd = ggts[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32gtd = ggts[3];
+
+        }
+        ggtd = vec4_blend(ggtd, T0 * ((Vec4d ){heres[0]->BSIM3v32cqsb, heres[1]->BSIM3v32cqsb, heres[2]->BSIM3v32cqsb, heres[3]->BSIM3v32cqsb}), condmask_false0);
+        {
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32gts = ggtd[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32gts = ggtd[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32gts = ggtd[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32gts = ggtd[3];
+
+        }
+        ggtb = vec4_blend(ggtb, T0 * ((Vec4d ){heres[0]->BSIM3v32cqbb, heres[1]->BSIM3v32cqbb, heres[2]->BSIM3v32cqbb, heres[3]->BSIM3v32cqbb}), condmask_false0);
+        {
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32gtb = ggtb[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32gtb = ggtb[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32gtb = ggtb[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32gtb = ggtb[3];
+
+        }
+        gqdef = vec4_blend(gqdef, vec4_SIMDTOVECTOR(ScalingFactor * ag0), condmask_false0);
+        gcqgb = vec4_blend(gcqgb, ((Vec4d ){heres[0]->BSIM3v32cqgb, heres[1]->BSIM3v32cqgb, heres[2]->BSIM3v32cqgb, heres[3]->BSIM3v32cqgb}) * ag0, condmask_false0);
+        gcqdb = vec4_blend(gcqdb, ((Vec4d ){heres[0]->BSIM3v32cqsb, heres[1]->BSIM3v32cqsb, heres[2]->BSIM3v32cqsb, heres[3]->BSIM3v32cqsb}) * ag0, condmask_false0);
+        gcqsb = vec4_blend(gcqsb, ((Vec4d ){heres[0]->BSIM3v32cqdb, heres[1]->BSIM3v32cqdb, heres[2]->BSIM3v32cqdb, heres[3]->BSIM3v32cqdb}) * ag0, condmask_false0);
+        gcqbb = vec4_blend(gcqbb, ((Vec4d ){heres[0]->BSIM3v32cqbb, heres[1]->BSIM3v32cqbb, heres[2]->BSIM3v32cqbb, heres[3]->BSIM3v32cqbb}) * ag0, condmask_false0);
+        gcggb = vec4_blend(gcggb, ((cgdo + cgso) + pParam->BSIM3v32cgbo) * ag0, condmask_false0);
+        gcgdb = vec4_blend(gcgdb, (-cgdo) * ag0, condmask_false0);
+        gcgsb = vec4_blend(gcgsb, (-cgso) * ag0, condmask_false0);
+        gcdgb = vec4_blend(gcdgb, (-cgdo) * ag0, condmask_false0);
+        gcddb = vec4_blend(gcddb, (((Vec4d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd}) + cgdo) * ag0, condmask_false0);
+        gcdsb = vec4_blend(gcdsb, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+        gcsgb = vec4_blend(gcsgb, (-cgso) * ag0, condmask_false0);
+        gcsdb = vec4_blend(gcsdb, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+        gcssb = vec4_blend(gcssb, (((Vec4d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs}) + cgso) * ag0, condmask_false0);
+        gcbgb = vec4_blend(gcbgb, vec4_SIMDTOVECTOR((-pParam->BSIM3v32cgbo) * ag0), condmask_false0);
+        gcbdb = vec4_blend(gcbdb, (-((Vec4d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd})) * ag0, condmask_false0);
+        gcbsb = vec4_blend(gcbsb, (-((Vec4d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs})) * ag0, condmask_false0);
+        if (1)
+        {
+          Vec4m condmask1 = vec4_fabs(qcheq) <= (1.0e-5 * CoxWL);
+          Vec4m condmask_true1 = condmask_false0 & condmask1;
+          Vec4m condmask_false1 = condmask_false0 & (~condmask1);
+          {
+            if (model->BSIM3v32xpart < 0.5)
+            {
+              sxpart = vec4_blend(sxpart, vec4_SIMDTOVECTOR(0.4), condmask_true1);
+            }
+            else
+              if (model->BSIM3v32xpart > 0.5)
+            {
+              sxpart = vec4_blend(sxpart, vec4_SIMDTOVECTOR(0.0), condmask_true1);
+            }
+            else
+            {
+              sxpart = vec4_blend(sxpart, vec4_SIMDTOVECTOR(0.5), condmask_true1);
+            }
+
+
+          }
+          {
+            sxpart = vec4_blend(sxpart, qdrn / qcheq, condmask_false1);
+            Css = vec4_blend(Css, (Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb}, condmask_false1);
+            Cds = vec4_blend(Cds, -((((Vec4d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb}) + ((Vec4d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb})) + ((Vec4d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb})), condmask_false1);
+            dsxpart_dVs = vec4_blend(dsxpart_dVs, (Css - (sxpart * (Css + Cds))) / qcheq, condmask_false1);
+            Csg = vec4_blend(Csg, (Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb}, condmask_false1);
+            Cdg = vec4_blend(Cdg, -((((Vec4d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb}) + ((Vec4d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb})) + ((Vec4d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb})), condmask_false1);
+            dsxpart_dVg = vec4_blend(dsxpart_dVg, (Csg - (sxpart * (Csg + Cdg))) / qcheq, condmask_false1);
+            Csd = vec4_blend(Csd, (Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb}, condmask_false1);
+            Cdd = vec4_blend(Cdd, -((((Vec4d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb}) + ((Vec4d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb})) + ((Vec4d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb})), condmask_false1);
+            dsxpart_dVd = vec4_blend(dsxpart_dVd, (Csd - (sxpart * (Csd + Cdd))) / qcheq, condmask_false1);
+            dsxpart_dVb = vec4_blend(dsxpart_dVb, -((dsxpart_dVd + dsxpart_dVg) + dsxpart_dVs), condmask_false1);
+          }
+        }
+
+        dxpart = vec4_blend(dxpart, 1.0 - sxpart, condmask_false0);
+        ddxpart_dVd = vec4_blend(ddxpart_dVd, -dsxpart_dVd, condmask_false0);
+        ddxpart_dVg = vec4_blend(ddxpart_dVg, -dsxpart_dVg, condmask_false0);
+        ddxpart_dVs = vec4_blend(ddxpart_dVs, -dsxpart_dVs, condmask_false0);
+        ddxpart_dVb = vec4_blend(ddxpart_dVb, -((ddxpart_dVd + ddxpart_dVg) + ddxpart_dVs), condmask_false0);
+        qgd = vec4_blend(qgd, qgdo, condmask_false0);
+        qgs = vec4_blend(qgs, qgso, condmask_false0);
+        qgb = vec4_blend(qgb, pParam->BSIM3v32cgbo * vgb, condmask_false0);
+        qgate = vec4_blend(qgate, (qgd + qgs) + qgb, condmask_false0);
+        qbulk = vec4_blend(qbulk, -qgb, condmask_false0);
+        qsrc = vec4_blend(qsrc, -qgs, condmask_false0);
+        qdrn = vec4_blend(qdrn, -((qgate + qbulk) + qsrc), condmask_false0);
+      }
+
+    }
+  }
+
+  cqdef = (cqcheq = vec4_SIMDTOVECTOR(0.0));
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qg, heres[1]->BSIM3v32qg, heres[2]->BSIM3v32qg, heres[3]->BSIM3v32qg}, qgate);
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qd, heres[1]->BSIM3v32qd, heres[2]->BSIM3v32qd, heres[3]->BSIM3v32qd}, qdrn - vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qbd, heres[1]->BSIM3v32qbd, heres[2]->BSIM3v32qbd, heres[3]->BSIM3v32qbd}));
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qb, heres[1]->BSIM3v32qb, heres[2]->BSIM3v32qb, heres[3]->BSIM3v32qb}, (qbulk + vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qbd, heres[1]->BSIM3v32qbd, heres[2]->BSIM3v32qbd, heres[3]->BSIM3v32qbd})) + vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qbs, heres[1]->BSIM3v32qbs, heres[2]->BSIM3v32qbs, heres[3]->BSIM3v32qbs}));
+  if (heres[0]->BSIM3v32nqsMod)
+  {
+    vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qcdump, heres[1]->BSIM3v32qcdump, heres[2]->BSIM3v32qcdump, heres[3]->BSIM3v32qcdump}, qdef * ScalingFactor);
+    vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qcheq, heres[1]->BSIM3v32qcheq, heres[2]->BSIM3v32qcheq, heres[3]->BSIM3v32qcheq}, qcheq);
+  }
+
+  if (ckt->CKTmode & MODEINITSMSIG)
+  {
+    goto line1000;
+  }
+
+  if (!ChargeComputationNeeded)
+    goto line850;
+
+  if (ckt->CKTmode & MODEINITTRAN)
+  {
+    vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec4m ){heres[0]->BSIM3v32qb, heres[1]->BSIM3v32qb, heres[2]->BSIM3v32qb, heres[3]->BSIM3v32qb}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qb, heres[1]->BSIM3v32qb, heres[2]->BSIM3v32qb, heres[3]->BSIM3v32qb}));
+    vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec4m ){heres[0]->BSIM3v32qg, heres[1]->BSIM3v32qg, heres[2]->BSIM3v32qg, heres[3]->BSIM3v32qg}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qg, heres[1]->BSIM3v32qg, heres[2]->BSIM3v32qg, heres[3]->BSIM3v32qg}));
+    vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec4m ){heres[0]->BSIM3v32qd, heres[1]->BSIM3v32qd, heres[2]->BSIM3v32qd, heres[3]->BSIM3v32qd}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qd, heres[1]->BSIM3v32qd, heres[2]->BSIM3v32qd, heres[3]->BSIM3v32qd}));
+    if (heres[0]->BSIM3v32nqsMod)
+    {
+      vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec4m ){heres[0]->BSIM3v32qcheq, heres[1]->BSIM3v32qcheq, heres[2]->BSIM3v32qcheq, heres[3]->BSIM3v32qcheq}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qcheq, heres[1]->BSIM3v32qcheq, heres[2]->BSIM3v32qcheq, heres[3]->BSIM3v32qcheq}));
+      vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec4m ){heres[0]->BSIM3v32qcdump, heres[1]->BSIM3v32qcdump, heres[2]->BSIM3v32qcdump, heres[3]->BSIM3v32qcdump}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32qcdump, heres[1]->BSIM3v32qcdump, heres[2]->BSIM3v32qcdump, heres[3]->BSIM3v32qcdump}));
+    }
+
+  }
+
+  error = vec4_NIintegrate(ckt, &geq, &ceq, 0.0, (Vec4m ){heres[0]->BSIM3v32qb, heres[1]->BSIM3v32qb, heres[2]->BSIM3v32qb, heres[3]->BSIM3v32qb});
+  if (SIMDANY(error))
+    return error;
+
+  error = vec4_NIintegrate(ckt, &geq, &ceq, 0.0, (Vec4m ){heres[0]->BSIM3v32qg, heres[1]->BSIM3v32qg, heres[2]->BSIM3v32qg, heres[3]->BSIM3v32qg});
+  if (SIMDANY(error))
+    return error;
+
+  error = vec4_NIintegrate(ckt, &geq, &ceq, 0.0, (Vec4m ){heres[0]->BSIM3v32qd, heres[1]->BSIM3v32qd, heres[2]->BSIM3v32qd, heres[3]->BSIM3v32qd});
+  if (SIMDANY(error))
+    return error;
+
+  if (heres[0]->BSIM3v32nqsMod)
+  {
+    error = vec4_NIintegrate(ckt, &geq, &ceq, 0.0, (Vec4m ){heres[0]->BSIM3v32qcdump, heres[1]->BSIM3v32qcdump, heres[2]->BSIM3v32qcdump, heres[3]->BSIM3v32qcdump});
+    if (SIMDANY(error))
+      return error;
+
+    error = vec4_NIintegrate(ckt, &geq, &ceq, 0.0, (Vec4m ){heres[0]->BSIM3v32qcheq, heres[1]->BSIM3v32qcheq, heres[2]->BSIM3v32qcheq, heres[3]->BSIM3v32qcheq});
+    if (SIMDANY(error))
+      return error;
+
+  }
+
+  goto line860;
+  line850:
+  ceqqg = (ceqqb = (ceqqd = vec4_SIMDTOVECTOR(0.0)));
+
+  cqcheq = (cqdef = vec4_SIMDTOVECTOR(0.0));
+  gcdgb = (gcddb = (gcdsb = vec4_SIMDTOVECTOR(0.0)));
+  gcsgb = (gcsdb = (gcssb = vec4_SIMDTOVECTOR(0.0)));
+  gcggb = (gcgdb = (gcgsb = vec4_SIMDTOVECTOR(0.0)));
+  gcbgb = (gcbdb = (gcbsb = vec4_SIMDTOVECTOR(0.0)));
+  gqdef = (gcqgb = (gcqdb = (gcqsb = (gcqbb = vec4_SIMDTOVECTOR(0.0)))));
+  ggtg = (ggtd = (ggtb = (ggts = vec4_SIMDTOVECTOR(0.0))));
+  dxpart = vec4_SIMDTOVECTOR(0.6);
+  if (1)
+  {
+    Vec4m condmask0 = BSIM3v32mode;
+    Vec4m condmask_true0 = condmask0;
+    dxpart = vec4_blend(dxpart, vec4_SIMDTOVECTOR(0.4), condmask_true0);
+  }
+
+  sxpart = 1.0 - dxpart;
+  ddxpart_dVd = (ddxpart_dVg = (ddxpart_dVb = (ddxpart_dVs = vec4_SIMDTOVECTOR(0.0))));
+  dsxpart_dVd = (dsxpart_dVg = (dsxpart_dVb = (dsxpart_dVs = vec4_SIMDTOVECTOR(0.0))));
+  if (heres[0]->BSIM3v32nqsMod)
+  {
+    Vec4d val = ((((16.0 * ((Vec4d ){heres[0]->BSIM3v32u0temp, heres[1]->BSIM3v32u0temp, heres[2]->BSIM3v32u0temp, heres[3]->BSIM3v32u0temp})) * model->BSIM3v32vtm) / pParam->BSIM3v32leffCV) / pParam->BSIM3v32leffCV) * ScalingFactor;
+    heres[0]->BSIM3v32gtau = val[0];
+    heres[1]->BSIM3v32gtau = val[1];
+    heres[2]->BSIM3v32gtau = val[2];
+    heres[3]->BSIM3v32gtau = val[3];
+  }
+  else
+  {
+    heres[0]->BSIM3v32gtau = 0.0;
+    heres[1]->BSIM3v32gtau = 0.0;
+    heres[2]->BSIM3v32gtau = 0.0;
+    heres[3]->BSIM3v32gtau = 0.0;
+  }
+
+  goto line900;
+  line860:
+  cqgate = vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32cqg, heres[1]->BSIM3v32cqg, heres[2]->BSIM3v32cqg, heres[3]->BSIM3v32cqg});
+
+  cqbulk = vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32cqb, heres[1]->BSIM3v32cqb, heres[2]->BSIM3v32cqb, heres[3]->BSIM3v32cqb});
+  cqdrn = vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32cqd, heres[1]->BSIM3v32cqd, heres[2]->BSIM3v32cqd, heres[3]->BSIM3v32cqd});
+  ceqqg = ((cqgate - (gcggb * vgb)) + (gcgdb * vbd)) + (gcgsb * vbs);
+  ceqqb = ((cqbulk - (gcbgb * vgb)) + (gcbdb * vbd)) + (gcbsb * vbs);
+  ceqqd = ((cqdrn - (gcdgb * vgb)) + (gcddb * vbd)) + (gcdsb * vbs);
+  if (heres[0]->BSIM3v32nqsMod)
+  {
+    T0 = ((ggtg * vgb) - (ggtd * vbd)) - (ggts * vbs);
+    ceqqg += T0;
+    T1 = qdef * ((Vec4d ){heres[0]->BSIM3v32gtau, heres[1]->BSIM3v32gtau, heres[2]->BSIM3v32gtau, heres[3]->BSIM3v32gtau});
+    ceqqd -= (dxpart * T0) + (T1 * (((ddxpart_dVg * vgb) - (ddxpart_dVd * vbd)) - (ddxpart_dVs * vbs)));
+    cqdef = vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32cqcdump, heres[1]->BSIM3v32cqcdump, heres[2]->BSIM3v32cqcdump, heres[3]->BSIM3v32cqcdump}) - (gqdef * qdef);
+    cqcheq = (vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32cqcheq, heres[1]->BSIM3v32cqcheq, heres[2]->BSIM3v32cqcheq, heres[3]->BSIM3v32cqcheq}) - (((gcqgb * vgb) - (gcqdb * vbd)) - (gcqsb * vbs))) + T0;
+  }
+
+  if (ckt->CKTmode & MODEINITTRAN)
+  {
+    vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec4m ){heres[0]->BSIM3v32cqb, heres[1]->BSIM3v32cqb, heres[2]->BSIM3v32cqb, heres[3]->BSIM3v32cqb}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32cqb, heres[1]->BSIM3v32cqb, heres[2]->BSIM3v32cqb, heres[3]->BSIM3v32cqb}));
+    vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec4m ){heres[0]->BSIM3v32cqg, heres[1]->BSIM3v32cqg, heres[2]->BSIM3v32cqg, heres[3]->BSIM3v32cqg}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32cqg, heres[1]->BSIM3v32cqg, heres[2]->BSIM3v32cqg, heres[3]->BSIM3v32cqg}));
+    vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec4m ){heres[0]->BSIM3v32cqd, heres[1]->BSIM3v32cqd, heres[2]->BSIM3v32cqd, heres[3]->BSIM3v32cqd}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32cqd, heres[1]->BSIM3v32cqd, heres[2]->BSIM3v32cqd, heres[3]->BSIM3v32cqd}));
+    if (heres[0]->BSIM3v32nqsMod)
+    {
+      vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec4m ){heres[0]->BSIM3v32cqcheq, heres[1]->BSIM3v32cqcheq, heres[2]->BSIM3v32cqcheq, heres[3]->BSIM3v32cqcheq}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32cqcheq, heres[1]->BSIM3v32cqcheq, heres[2]->BSIM3v32cqcheq, heres[3]->BSIM3v32cqcheq}));
+      vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec4m ){heres[0]->BSIM3v32cqcdump, heres[1]->BSIM3v32cqcdump, heres[2]->BSIM3v32cqcdump, heres[3]->BSIM3v32cqcdump}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec4m ){heres[0]->BSIM3v32cqcdump, heres[1]->BSIM3v32cqcdump, heres[2]->BSIM3v32cqcdump, heres[3]->BSIM3v32cqcdump}));
+    }
+
+  }
+
+  line900:
+  ;
+
+  if (1)
+  {
+    Vec4m condmask0 = BSIM3v32mode;
+    Vec4m condmask_true0 = condmask0;
+    Vec4m condmask_false0 = ~condmask0;
+    {
+      Gm = vec4_blend(Gm, (Vec4d ){heres[0]->BSIM3v32gm, heres[1]->BSIM3v32gm, heres[2]->BSIM3v32gm, heres[3]->BSIM3v32gm}, condmask_true0);
+      Gmbs = vec4_blend(Gmbs, (Vec4d ){heres[0]->BSIM3v32gmbs, heres[1]->BSIM3v32gmbs, heres[2]->BSIM3v32gmbs, heres[3]->BSIM3v32gmbs}, condmask_true0);
+      FwdSum = vec4_blend(FwdSum, Gm + Gmbs, condmask_true0);
+      RevSum = vec4_blend(RevSum, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+      cdreq = vec4_blend(cdreq, model->BSIM3v32type * (((cdrain - (((Vec4d ){heres[0]->BSIM3v32gds, heres[1]->BSIM3v32gds, heres[2]->BSIM3v32gds, heres[3]->BSIM3v32gds}) * vds)) - (Gm * vgs)) - (Gmbs * vbs)), condmask_true0);
+      ceqbd = vec4_blend(ceqbd, (-model->BSIM3v32type) * (((((Vec4d ){heres[0]->BSIM3v32csub, heres[1]->BSIM3v32csub, heres[2]->BSIM3v32csub, heres[3]->BSIM3v32csub}) - (((Vec4d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds}) * vds)) - (((Vec4d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs}) * vgs)) - (((Vec4d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs}) * vbs)), condmask_true0);
+      ceqbs = vec4_blend(ceqbs, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+      gbbdp = vec4_blend(gbbdp, -((Vec4d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds}), condmask_true0);
+      gbbsp = vec4_blend(gbbsp, (((Vec4d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds}) + ((Vec4d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs})) + ((Vec4d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs}), condmask_true0);
+      gbdpg = vec4_blend(gbdpg, (Vec4d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs}, condmask_true0);
+      gbdpdp = vec4_blend(gbdpdp, (Vec4d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds}, condmask_true0);
+      gbdpb = vec4_blend(gbdpb, (Vec4d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs}, condmask_true0);
+      gbdpsp = vec4_blend(gbdpsp, -((gbdpg + gbdpdp) + gbdpb), condmask_true0);
+      gbspg = vec4_blend(gbspg, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+      gbspdp = vec4_blend(gbspdp, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+      gbspb = vec4_blend(gbspb, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+      gbspsp = vec4_blend(gbspsp, vec4_SIMDTOVECTOR(0.0), condmask_true0);
+    }
+    {
+      Gm = vec4_blend(Gm, -((Vec4d ){heres[0]->BSIM3v32gm, heres[1]->BSIM3v32gm, heres[2]->BSIM3v32gm, heres[3]->BSIM3v32gm}), condmask_false0);
+      Gmbs = vec4_blend(Gmbs, -((Vec4d ){heres[0]->BSIM3v32gmbs, heres[1]->BSIM3v32gmbs, heres[2]->BSIM3v32gmbs, heres[3]->BSIM3v32gmbs}), condmask_false0);
+      FwdSum = vec4_blend(FwdSum, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+      RevSum = vec4_blend(RevSum, -(Gm + Gmbs), condmask_false0);
+      cdreq = vec4_blend(cdreq, (-model->BSIM3v32type) * (((cdrain + (((Vec4d ){heres[0]->BSIM3v32gds, heres[1]->BSIM3v32gds, heres[2]->BSIM3v32gds, heres[3]->BSIM3v32gds}) * vds)) + (Gm * vgd)) + (Gmbs * vbd)), condmask_false0);
+      ceqbs = vec4_blend(ceqbs, (-model->BSIM3v32type) * (((((Vec4d ){heres[0]->BSIM3v32csub, heres[1]->BSIM3v32csub, heres[2]->BSIM3v32csub, heres[3]->BSIM3v32csub}) + (((Vec4d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds}) * vds)) - (((Vec4d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs}) * vgd)) - (((Vec4d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs}) * vbd)), condmask_false0);
+      ceqbd = vec4_blend(ceqbd, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+      gbbsp = vec4_blend(gbbsp, -((Vec4d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds}), condmask_false0);
+      gbbdp = vec4_blend(gbbdp, (((Vec4d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds}) + ((Vec4d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs})) + ((Vec4d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs}), condmask_false0);
+      gbdpg = vec4_blend(gbdpg, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+      gbdpsp = vec4_blend(gbdpsp, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+      gbdpb = vec4_blend(gbdpb, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+      gbdpdp = vec4_blend(gbdpdp, vec4_SIMDTOVECTOR(0.0), condmask_false0);
+      gbspg = vec4_blend(gbspg, (Vec4d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs}, condmask_false0);
+      gbspsp = vec4_blend(gbspsp, (Vec4d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds}, condmask_false0);
+      gbspb = vec4_blend(gbspb, (Vec4d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs}, condmask_false0);
+      gbspdp = vec4_blend(gbspdp, -((gbspg + gbspsp) + gbspb), condmask_false0);
+    }
+  }
+
+  if (model->BSIM3v32type > 0)
+  {
+    ceqbs += ((Vec4d ){heres[0]->BSIM3v32cbs, heres[1]->BSIM3v32cbs, heres[2]->BSIM3v32cbs, heres[3]->BSIM3v32cbs}) - (((Vec4d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs}) * vbs);
+    ceqbd += ((Vec4d ){heres[0]->BSIM3v32cbd, heres[1]->BSIM3v32cbd, heres[2]->BSIM3v32cbd, heres[3]->BSIM3v32cbd}) - (((Vec4d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd}) * vbd);
+  }
+  else
+  {
+    ceqbs -= ((Vec4d ){heres[0]->BSIM3v32cbs, heres[1]->BSIM3v32cbs, heres[2]->BSIM3v32cbs, heres[3]->BSIM3v32cbs}) - (((Vec4d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs}) * vbs);
+    ceqbd -= ((Vec4d ){heres[0]->BSIM3v32cbd, heres[1]->BSIM3v32cbd, heres[2]->BSIM3v32cbd, heres[3]->BSIM3v32cbd}) - (((Vec4d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd}) * vbd);
+    ceqqg = -ceqqg;
+    ceqqb = -ceqqb;
+    ceqqd = -ceqqd;
+    cqdef = -cqdef;
+    cqcheq = -cqcheq;
+  }
+
+  m = (Vec4d ){heres[0]->BSIM3v32m, heres[1]->BSIM3v32m, heres[2]->BSIM3v32m, heres[3]->BSIM3v32m};
+  {
+    Vec4d val = m * ceqqg;
+    heres[0]->BSIM3v32rhsG = val[0];
+    heres[1]->BSIM3v32rhsG = val[1];
+    heres[2]->BSIM3v32rhsG = val[2];
+    heres[3]->BSIM3v32rhsG = val[3];
+  }
+  {
+    Vec4d val = m * ((ceqbs + ceqbd) + ceqqb);
+    heres[0]->BSIM3v32rhsB = val[0];
+    heres[1]->BSIM3v32rhsB = val[1];
+    heres[2]->BSIM3v32rhsB = val[2];
+    heres[3]->BSIM3v32rhsB = val[3];
+  }
+  {
+    Vec4d val = m * ((ceqbd - cdreq) - ceqqd);
+    heres[0]->BSIM3v32rhsD = val[0];
+    heres[1]->BSIM3v32rhsD = val[1];
+    heres[2]->BSIM3v32rhsD = val[2];
+    heres[3]->BSIM3v32rhsD = val[3];
+  }
+  {
+    Vec4d val = m * ((((cdreq + ceqbs) + ceqqg) + ceqqb) + ceqqd);
+    heres[0]->BSIM3v32rhsS = val[0];
+    heres[1]->BSIM3v32rhsS = val[1];
+    heres[2]->BSIM3v32rhsS = val[2];
+    heres[3]->BSIM3v32rhsS = val[3];
+  }
+  if (heres[0]->BSIM3v32nqsMod)
+    vec4_BSIM3v32_StateAdd(ckt->CKTrhs, (Vec4m ){heres[0]->BSIM3v32qNode, heres[1]->BSIM3v32qNode, heres[2]->BSIM3v32qNode, heres[3]->BSIM3v32qNode}, m * (cqcheq - cqdef));
+
+  T1 = qdef * ((Vec4d ){heres[0]->BSIM3v32gtau, heres[1]->BSIM3v32gtau, heres[2]->BSIM3v32gtau, heres[3]->BSIM3v32gtau});
+  {
+    Vec4d val = m * ((Vec4d ){heres[0]->BSIM3v32drainConductance, heres[1]->BSIM3v32drainConductance, heres[2]->BSIM3v32drainConductance, heres[3]->BSIM3v32drainConductance});
+    heres[0]->BSIM3v32DdPt = val[0];
+    heres[1]->BSIM3v32DdPt = val[1];
+    heres[2]->BSIM3v32DdPt = val[2];
+    heres[3]->BSIM3v32DdPt = val[3];
+  }
+  {
+    Vec4d val = m * ((Vec4d ){heres[0]->BSIM3v32sourceConductance, heres[1]->BSIM3v32sourceConductance, heres[2]->BSIM3v32sourceConductance, heres[3]->BSIM3v32sourceConductance});
+    heres[0]->BSIM3v32SsPt = val[0];
+    heres[1]->BSIM3v32SsPt = val[1];
+    heres[2]->BSIM3v32SsPt = val[2];
+    heres[3]->BSIM3v32SsPt = val[3];
+  }
+  {
+    Vec4d val = m * (gcggb - ggtg);
+    heres[0]->BSIM3v32GgPt = val[0];
+    heres[1]->BSIM3v32GgPt = val[1];
+    heres[2]->BSIM3v32GgPt = val[2];
+    heres[3]->BSIM3v32GgPt = val[3];
+  }
+  {
+    Vec4d val = m * (((((((Vec4d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd}) + ((Vec4d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs})) - gcbgb) - gcbdb) - gcbsb) - ((Vec4d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs}));
+    heres[0]->BSIM3v32BbPt = val[0];
+    heres[1]->BSIM3v32BbPt = val[1];
+    heres[2]->BSIM3v32BbPt = val[2];
+    heres[3]->BSIM3v32BbPt = val[3];
+  }
+  {
+    Vec4d val = m * (((gcggb + gcgdb) + gcgsb) + ggtb);
+    heres[0]->BSIM3v32GbPt = val[0];
+    heres[1]->BSIM3v32GbPt = val[1];
+    heres[2]->BSIM3v32GbPt = val[2];
+    heres[3]->BSIM3v32GbPt = val[3];
+  }
+  {
+    Vec4d val = m * (gcgdb - ggtd);
+    heres[0]->BSIM3v32GdpPt = val[0];
+    heres[1]->BSIM3v32GdpPt = val[1];
+    heres[2]->BSIM3v32GdpPt = val[2];
+    heres[3]->BSIM3v32GdpPt = val[3];
+  }
+  {
+    Vec4d val = m * (gcgsb - ggts);
+    heres[0]->BSIM3v32GspPt = val[0];
+    heres[1]->BSIM3v32GspPt = val[1];
+    heres[2]->BSIM3v32GspPt = val[2];
+    heres[3]->BSIM3v32GspPt = val[3];
+  }
+  {
+    Vec4d val = m * (gcbgb - ((Vec4d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs}));
+    heres[0]->BSIM3v32BgPt = val[0];
+    heres[1]->BSIM3v32BgPt = val[1];
+    heres[2]->BSIM3v32BgPt = val[2];
+    heres[3]->BSIM3v32BgPt = val[3];
+  }
+  {
+    Vec4d val = m * ((gcbdb - ((Vec4d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd})) + gbbdp);
+    heres[0]->BSIM3v32BdpPt = val[0];
+    heres[1]->BSIM3v32BdpPt = val[1];
+    heres[2]->BSIM3v32BdpPt = val[2];
+    heres[3]->BSIM3v32BdpPt = val[3];
+  }
+  {
+    Vec4d val = m * ((gcbsb - ((Vec4d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs})) + gbbsp);
+    heres[0]->BSIM3v32BspPt = val[0];
+    heres[1]->BSIM3v32BspPt = val[1];
+    heres[2]->BSIM3v32BspPt = val[2];
+    heres[3]->BSIM3v32BspPt = val[3];
+  }
+  {
+    Vec4d val = m * ((((Gm + gcdgb) + (dxpart * ggtg)) + (T1 * ddxpart_dVg)) + gbdpg);
+    heres[0]->BSIM3v32DPgPt = val[0];
+    heres[1]->BSIM3v32DPgPt = val[1];
+    heres[2]->BSIM3v32DPgPt = val[2];
+    heres[3]->BSIM3v32DPgPt = val[3];
+  }
+  {
+    Vec4d val = m * (((((((((Vec4d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd}) - Gmbs) + gcdgb) + gcddb) + gcdsb) - (dxpart * ggtb)) - (T1 * ddxpart_dVb)) - gbdpb);
+    heres[0]->BSIM3v32DPbPt = val[0];
+    heres[1]->BSIM3v32DPbPt = val[1];
+    heres[2]->BSIM3v32DPbPt = val[2];
+    heres[3]->BSIM3v32DPbPt = val[3];
+  }
+  {
+    Vec4d val = m * (((((((Vec4d ){heres[0]->BSIM3v32gds, heres[1]->BSIM3v32gds, heres[2]->BSIM3v32gds, heres[3]->BSIM3v32gds}) + FwdSum) - gcdsb) - (dxpart * ggts)) - (T1 * ddxpart_dVs)) - gbdpsp);
+    heres[0]->BSIM3v32DPspPt = val[0];
+    heres[1]->BSIM3v32DPspPt = val[1];
+    heres[2]->BSIM3v32DPspPt = val[2];
+    heres[3]->BSIM3v32DPspPt = val[3];
+  }
+  {
+    Vec4d val = m * ((((gcsgb - Gm) + (sxpart * ggtg)) + (T1 * dsxpart_dVg)) + gbspg);
+    heres[0]->BSIM3v32SPgPt = val[0];
+    heres[1]->BSIM3v32SPgPt = val[1];
+    heres[2]->BSIM3v32SPgPt = val[2];
+    heres[3]->BSIM3v32SPgPt = val[3];
+  }
+  {
+    Vec4d val = m * (((((((((Vec4d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs}) + Gmbs) + gcsgb) + gcsdb) + gcssb) - (sxpart * ggtb)) - (T1 * dsxpart_dVb)) - gbspb);
+    heres[0]->BSIM3v32SPbPt = val[0];
+    heres[1]->BSIM3v32SPbPt = val[1];
+    heres[2]->BSIM3v32SPbPt = val[2];
+    heres[3]->BSIM3v32SPbPt = val[3];
+  }
+  {
+    Vec4d val = m * (((((((Vec4d ){heres[0]->BSIM3v32gds, heres[1]->BSIM3v32gds, heres[2]->BSIM3v32gds, heres[3]->BSIM3v32gds}) + RevSum) - gcsdb) - (sxpart * ggtd)) - (T1 * dsxpart_dVd)) - gbspdp);
+    heres[0]->BSIM3v32SPdpPt = val[0];
+    heres[1]->BSIM3v32SPdpPt = val[1];
+    heres[2]->BSIM3v32SPdpPt = val[2];
+    heres[3]->BSIM3v32SPdpPt = val[3];
+  }
+  {
+    Vec4d val = m * (((((((((Vec4d ){heres[0]->BSIM3v32drainConductance, heres[1]->BSIM3v32drainConductance, heres[2]->BSIM3v32drainConductance, heres[3]->BSIM3v32drainConductance}) + ((Vec4d ){heres[0]->BSIM3v32gds, heres[1]->BSIM3v32gds, heres[2]->BSIM3v32gds, heres[3]->BSIM3v32gds})) + ((Vec4d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd})) + RevSum) + gcddb) + (dxpart * ggtd)) + (T1 * ddxpart_dVd)) + gbdpdp);
+    heres[0]->BSIM3v32DPdpPt = val[0];
+    heres[1]->BSIM3v32DPdpPt = val[1];
+    heres[2]->BSIM3v32DPdpPt = val[2];
+    heres[3]->BSIM3v32DPdpPt = val[3];
+  }
+  {
+    Vec4d val = m * (((((((((Vec4d ){heres[0]->BSIM3v32sourceConductance, heres[1]->BSIM3v32sourceConductance, heres[2]->BSIM3v32sourceConductance, heres[3]->BSIM3v32sourceConductance}) + ((Vec4d ){heres[0]->BSIM3v32gds, heres[1]->BSIM3v32gds, heres[2]->BSIM3v32gds, heres[3]->BSIM3v32gds})) + ((Vec4d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs})) + FwdSum) + gcssb) + (sxpart * ggts)) + (T1 * dsxpart_dVs)) + gbspsp);
+    heres[0]->BSIM3v32SPspPt = val[0];
+    heres[1]->BSIM3v32SPspPt = val[1];
+    heres[2]->BSIM3v32SPspPt = val[2];
+    heres[3]->BSIM3v32SPspPt = val[3];
+  }
+  {
+    Vec4d val = m * ((Vec4d ){heres[0]->BSIM3v32drainConductance, heres[1]->BSIM3v32drainConductance, heres[2]->BSIM3v32drainConductance, heres[3]->BSIM3v32drainConductance});
+    heres[0]->BSIM3v32DdpPt = val[0];
+    heres[1]->BSIM3v32DdpPt = val[1];
+    heres[2]->BSIM3v32DdpPt = val[2];
+    heres[3]->BSIM3v32DdpPt = val[3];
+  }
+  {
+    Vec4d val = m * ((Vec4d ){heres[0]->BSIM3v32sourceConductance, heres[1]->BSIM3v32sourceConductance, heres[2]->BSIM3v32sourceConductance, heres[3]->BSIM3v32sourceConductance});
+    heres[0]->BSIM3v32SspPt = val[0];
+    heres[1]->BSIM3v32SspPt = val[1];
+    heres[2]->BSIM3v32SspPt = val[2];
+    heres[3]->BSIM3v32SspPt = val[3];
+  }
+  {
+    Vec4d val = m * ((Vec4d ){heres[0]->BSIM3v32drainConductance, heres[1]->BSIM3v32drainConductance, heres[2]->BSIM3v32drainConductance, heres[3]->BSIM3v32drainConductance});
+    heres[0]->BSIM3v32DPdPt = val[0];
+    heres[1]->BSIM3v32DPdPt = val[1];
+    heres[2]->BSIM3v32DPdPt = val[2];
+    heres[3]->BSIM3v32DPdPt = val[3];
+  }
+  {
+    Vec4d val = m * ((Vec4d ){heres[0]->BSIM3v32sourceConductance, heres[1]->BSIM3v32sourceConductance, heres[2]->BSIM3v32sourceConductance, heres[3]->BSIM3v32sourceConductance});
+    heres[0]->BSIM3v32SPsPt = val[0];
+    heres[1]->BSIM3v32SPsPt = val[1];
+    heres[2]->BSIM3v32SPsPt = val[2];
+    heres[3]->BSIM3v32SPsPt = val[3];
+  }
+  if (heres[0]->BSIM3v32nqsMod)
+  {
+    {
+      Vec4d val = m * (gqdef + ((Vec4d ){heres[0]->BSIM3v32gtau, heres[1]->BSIM3v32gtau, heres[2]->BSIM3v32gtau, heres[3]->BSIM3v32gtau}));
+      *heres[0]->BSIM3v32QqPtr += val[0];
+      *heres[1]->BSIM3v32QqPtr += val[1];
+      *heres[2]->BSIM3v32QqPtr += val[2];
+      *heres[3]->BSIM3v32QqPtr += val[3];
+    }
+    {
+      Vec4d val = m * (dxpart * ((Vec4d ){heres[0]->BSIM3v32gtau, heres[1]->BSIM3v32gtau, heres[2]->BSIM3v32gtau, heres[3]->BSIM3v32gtau}));
+      *heres[0]->BSIM3v32DPqPtr += val[0];
+      *heres[1]->BSIM3v32DPqPtr += val[1];
+      *heres[2]->BSIM3v32DPqPtr += val[2];
+      *heres[3]->BSIM3v32DPqPtr += val[3];
+    }
+    {
+      Vec4d val = m * (sxpart * ((Vec4d ){heres[0]->BSIM3v32gtau, heres[1]->BSIM3v32gtau, heres[2]->BSIM3v32gtau, heres[3]->BSIM3v32gtau}));
+      *heres[0]->BSIM3v32SPqPtr += val[0];
+      *heres[1]->BSIM3v32SPqPtr += val[1];
+      *heres[2]->BSIM3v32SPqPtr += val[2];
+      *heres[3]->BSIM3v32SPqPtr += val[3];
+    }
+    {
+      Vec4d val = m * ((Vec4d ){heres[0]->BSIM3v32gtau, heres[1]->BSIM3v32gtau, heres[2]->BSIM3v32gtau, heres[3]->BSIM3v32gtau});
+      *heres[0]->BSIM3v32GqPtr -= val[0];
+      *heres[1]->BSIM3v32GqPtr -= val[1];
+      *heres[2]->BSIM3v32GqPtr -= val[2];
+      *heres[3]->BSIM3v32GqPtr -= val[3];
+    }
+    {
+      Vec4d val = m * (ggtg - gcqgb);
+      *heres[0]->BSIM3v32QgPtr += val[0];
+      *heres[1]->BSIM3v32QgPtr += val[1];
+      *heres[2]->BSIM3v32QgPtr += val[2];
+      *heres[3]->BSIM3v32QgPtr += val[3];
+    }
+    {
+      Vec4d val = m * (ggtd - gcqdb);
+      *heres[0]->BSIM3v32QdpPtr += val[0];
+      *heres[1]->BSIM3v32QdpPtr += val[1];
+      *heres[2]->BSIM3v32QdpPtr += val[2];
+      *heres[3]->BSIM3v32QdpPtr += val[3];
+    }
+    {
+      Vec4d val = m * (ggts - gcqsb);
+      *heres[0]->BSIM3v32QspPtr += val[0];
+      *heres[1]->BSIM3v32QspPtr += val[1];
+      *heres[2]->BSIM3v32QspPtr += val[2];
+      *heres[3]->BSIM3v32QspPtr += val[3];
+    }
+    {
+      Vec4d val = m * (ggtb - gcqbb);
+      *heres[0]->BSIM3v32QbPtr += val[0];
+      *heres[1]->BSIM3v32QbPtr += val[1];
+      *heres[2]->BSIM3v32QbPtr += val[2];
+      *heres[3]->BSIM3v32QbPtr += val[3];
+    }
+  }
+
+  line1000:
+  ;
+
+  return OK;
+}
+
diff --git a/src/spicelib/devices/bsim3v32/b3v32ldseq_simd8.c b/src/spicelib/devices/bsim3v32/b3v32ldseq_simd8.c
new file mode 100644
index 000000000..e2b229486
--- /dev/null
+++ b/src/spicelib/devices/bsim3v32/b3v32ldseq_simd8.c
@@ -0,0 +1,7541 @@
+/**** BSIM3v3.2.4, Released by Xuemei Xi 12/21/2001 ****/
+
+/**********
+ * Copyright 2001 Regents of the University of California. All rights reserved.
+ * Original File: b3ld.c of BSIM3v3.2.4
+ * Author: 1991 JianHui Huang and Min-Chie Jeng.
+ * Modified by Mansun Chan (1995).
+ * Author: 1997-1999 Weidong Liu.
+ * Author: 2001 Xuemei Xi
+ * Modified by Xuemei Xi, 10/05, 12/21, 2001.
+ * Modified by Paolo Nenzi 2002 and Dietmar Warning 2003
+ * Modified by Florian Ballenegger 2020 for SIMD version generation
+ **********/
+ 
+ /**********
+ * Modified 2020 by Florian Ballenegger, Anamosic Ballenegger Design
+ * Distributed under the same license terms as the original code,
+ * see file "B3TERMS_OF_USE"
+ **********/
+
+{
+  Vec8d SourceSatCurrent;
+  Vec8d DrainSatCurrent;
+  double ag0;
+  Vec8d qgd;
+  Vec8d qgs;
+  Vec8d qgb;
+  Vec8d cbhat;
+  Vec8d VgstNVt;
+  Vec8d ExpVgst;
+  Vec8d cdrain;
+  Vec8d cdhat;
+  Vec8d cdreq;
+  Vec8d ceqbd;
+  Vec8d ceqbs;
+  Vec8d ceqqb;
+  Vec8d ceqqd;
+  Vec8d ceqqg;
+  double ceq;
+  double geq;
+  Vec8d czbd;
+  Vec8d czbdsw;
+  Vec8d czbdswg;
+  Vec8d czbs;
+  Vec8d czbssw;
+  Vec8d czbsswg;
+  Vec8d evbd;
+  Vec8d evbs;
+  Vec8d arg;
+  Vec8d sarg;
+  Vec8d Vfbeff;
+  Vec8d dVfbeff_dVg;
+  Vec8d dVfbeff_dVd = (Vec8d ){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+  Vec8d dVfbeff_dVb;
+  Vec8d V3;
+  Vec8d V4;
+  Vec8d gcbdb;
+  Vec8d gcbgb;
+  Vec8d gcbsb;
+  Vec8d gcddb;
+  Vec8d gcdgb;
+  Vec8d gcdsb;
+  Vec8d gcgdb;
+  Vec8d gcggb;
+  Vec8d gcgsb;
+  Vec8d gcsdb;
+  Vec8d gcsgb;
+  Vec8d gcssb;
+  double MJ;
+  double MJSW;
+  double MJSWG;
+  Vec8d vbd;
+  Vec8d vbs;
+  Vec8d vds;
+  Vec8d vgb;
+  Vec8d vgd;
+  Vec8d vgs;
+  Vec8d qgate = (Vec8d ){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+  Vec8d qbulk = (Vec8d ){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+  Vec8d qdrn = (Vec8d ){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+  Vec8d qsrc;
+  Vec8d qinoi;
+  Vec8d cqgate;
+  Vec8d cqbulk;
+  Vec8d cqdrn;
+  Vec8d Vds;
+  Vec8d Vgs;
+  Vec8d Vbs;
+  Vec8d Gmbs;
+  Vec8d FwdSum;
+  Vec8d RevSum;
+  Vec8d Vgs_eff;
+  Vec8d Vfb;
+  Vec8d dVfb_dVb = (Vec8d ){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+  Vec8d dVfb_dVd = (Vec8d ){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+  Vec8d Phis;
+  Vec8d dPhis_dVb;
+  Vec8d sqrtPhis;
+  Vec8d dsqrtPhis_dVb;
+  Vec8d Vth;
+  Vec8d dVth_dVb;
+  Vec8d dVth_dVd;
+  Vec8d Vgst;
+  Vec8d dVgst_dVg;
+  Vec8d dVgst_dVb;
+  Vec8d dVgs_eff_dVg;
+  double Nvtm;
+  double Vtm;
+  Vec8d n;
+  Vec8d dn_dVb;
+  Vec8d dn_dVd;
+  double voffcv;
+  Vec8d noff;
+  Vec8d dnoff_dVd;
+  Vec8d dnoff_dVb;
+  Vec8d ExpArg;
+  double V0;
+  Vec8d CoxWLcen;
+  Vec8d QovCox;
+  double LINK;
+  Vec8d DeltaPhi;
+  Vec8d dDeltaPhi_dVg;
+  Vec8d dDeltaPhi_dVd;
+  Vec8d dDeltaPhi_dVb;
+  double Cox;
+  double Tox;
+  Vec8d Tcen;
+  Vec8d dTcen_dVg;
+  Vec8d dTcen_dVd;
+  Vec8d dTcen_dVb;
+  Vec8d Ccen;
+  Vec8d Coxeff;
+  Vec8d dCoxeff_dVg;
+  Vec8d dCoxeff_dVd;
+  Vec8d dCoxeff_dVb;
+  Vec8d Denomi;
+  Vec8d dDenomi_dVg;
+  Vec8d dDenomi_dVd;
+  Vec8d dDenomi_dVb;
+  Vec8d ueff;
+  Vec8d dueff_dVg;
+  Vec8d dueff_dVd;
+  Vec8d dueff_dVb;
+  Vec8d Esat;
+  Vec8d Vdsat;
+  Vec8d EsatL;
+  Vec8d dEsatL_dVg;
+  Vec8d dEsatL_dVd;
+  Vec8d dEsatL_dVb;
+  Vec8d dVdsat_dVg;
+  Vec8d dVdsat_dVb;
+  Vec8d dVdsat_dVd;
+  Vec8d Vasat;
+  Vec8d dAlphaz_dVg;
+  Vec8d dAlphaz_dVb;
+  Vec8d dVasat_dVg;
+  Vec8d dVasat_dVb;
+  Vec8d dVasat_dVd;
+  Vec8d Va;
+  Vec8d dVa_dVd;
+  Vec8d dVa_dVg;
+  Vec8d dVa_dVb;
+  Vec8d Vbseff;
+  Vec8d dVbseff_dVb;
+  Vec8d VbseffCV;
+  Vec8d dVbseffCV_dVb;
+  Vec8d Arg1;
+  double One_Third_CoxWL;
+  double Two_Third_CoxWL;
+  Vec8d Alphaz;
+  double CoxWL;
+  Vec8d T0;
+  Vec8d dT0_dVg;
+  Vec8d dT0_dVd;
+  Vec8d dT0_dVb;
+  Vec8d T1;
+  Vec8d dT1_dVg;
+  Vec8d dT1_dVd;
+  Vec8d dT1_dVb;
+  Vec8d T2;
+  Vec8d dT2_dVg;
+  Vec8d dT2_dVd;
+  Vec8d dT2_dVb;
+  Vec8d T3;
+  Vec8d dT3_dVg;
+  Vec8d dT3_dVd;
+  Vec8d dT3_dVb;
+  Vec8d T4;
+  Vec8d T5;
+  Vec8d T6;
+  Vec8d T7;
+  Vec8d T8;
+  Vec8d T9;
+  Vec8d T10;
+  Vec8d T11;
+  Vec8d T12;
+  Vec8d tmp;
+  Vec8d Abulk;
+  Vec8d dAbulk_dVb;
+  Vec8d Abulk0;
+  Vec8d dAbulk0_dVb;
+  double tmpuni;
+  Vec8d VACLM;
+  Vec8d dVACLM_dVg;
+  Vec8d dVACLM_dVd;
+  Vec8d dVACLM_dVb;
+  Vec8d VADIBL;
+  Vec8d dVADIBL_dVg;
+  Vec8d dVADIBL_dVd;
+  Vec8d dVADIBL_dVb;
+  Vec8d Xdep;
+  Vec8d dXdep_dVb;
+  Vec8d lt1;
+  Vec8d dlt1_dVb;
+  Vec8d ltw;
+  Vec8d dltw_dVb;
+  Vec8d Delt_vth;
+  Vec8d dDelt_vth_dVb;
+  Vec8d Theta0;
+  Vec8d dTheta0_dVb;
+  double TempRatio;
+  Vec8d tmp1;
+  Vec8d tmp2;
+  Vec8d tmp3;
+  Vec8d tmp4;
+  Vec8d DIBL_Sft;
+  Vec8d dDIBL_Sft_dVd;
+  Vec8d Lambda;
+  Vec8d dLambda_dVg;
+  double a1;
+  double ScalingFactor;
+  Vec8d Vgsteff;
+  Vec8d dVgsteff_dVg;
+  Vec8d dVgsteff_dVd;
+  Vec8d dVgsteff_dVb;
+  Vec8d Vdseff;
+  Vec8d dVdseff_dVg;
+  Vec8d dVdseff_dVd;
+  Vec8d dVdseff_dVb;
+  Vec8d VdseffCV;
+  Vec8d dVdseffCV_dVg;
+  Vec8d dVdseffCV_dVd;
+  Vec8d dVdseffCV_dVb;
+  Vec8d diffVds;
+  Vec8d dAbulk_dVg;
+  Vec8d beta;
+  Vec8d dbeta_dVg;
+  Vec8d dbeta_dVd;
+  Vec8d dbeta_dVb;
+  Vec8d gche;
+  Vec8d dgche_dVg;
+  Vec8d dgche_dVd;
+  Vec8d dgche_dVb;
+  Vec8d fgche1;
+  Vec8d dfgche1_dVg;
+  Vec8d dfgche1_dVd;
+  Vec8d dfgche1_dVb;
+  Vec8d fgche2;
+  Vec8d dfgche2_dVg;
+  Vec8d dfgche2_dVd;
+  Vec8d dfgche2_dVb;
+  Vec8d Idl;
+  Vec8d dIdl_dVg;
+  Vec8d dIdl_dVd;
+  Vec8d dIdl_dVb;
+  Vec8d Idsa;
+  Vec8d dIdsa_dVg;
+  Vec8d dIdsa_dVd;
+  Vec8d dIdsa_dVb;
+  Vec8d Ids;
+  Vec8d Gm;
+  Vec8d Gds;
+  Vec8d Gmb;
+  Vec8d Isub;
+  Vec8d Gbd;
+  Vec8d Gbg;
+  Vec8d Gbb;
+  Vec8d VASCBE;
+  Vec8d dVASCBE_dVg;
+  Vec8d dVASCBE_dVd;
+  Vec8d dVASCBE_dVb;
+  Vec8d CoxWovL;
+  Vec8d Rds;
+  Vec8d dRds_dVg;
+  Vec8d dRds_dVb;
+  Vec8d WVCox;
+  Vec8d WVCoxRds;
+  Vec8d Vgst2Vtm;
+  Vec8d VdsatCV;
+  Vec8d dVdsatCV_dVg;
+  Vec8d dVdsatCV_dVb;
+  double Leff;
+  Vec8d Weff;
+  Vec8d dWeff_dVg;
+  Vec8d dWeff_dVb;
+  Vec8d AbulkCV;
+  Vec8d dAbulkCV_dVb;
+  Vec8d qgdo;
+  Vec8d qgso;
+  Vec8d cgdo;
+  Vec8d cgso;
+  Vec8d qcheq = (Vec8d ){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+  Vec8d qdef;
+  Vec8d gqdef = (Vec8d ){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+  Vec8d cqdef;
+  Vec8d cqcheq;
+  Vec8d gtau_diff;
+  Vec8d gtau_drift;
+  Vec8d gcqdb = (Vec8d ){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+  Vec8d gcqsb = (Vec8d ){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+  Vec8d gcqgb = (Vec8d ){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+  Vec8d gcqbb = (Vec8d ){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+  Vec8d dxpart;
+  Vec8d sxpart;
+  Vec8d ggtg;
+  Vec8d ggtd;
+  Vec8d ggts;
+  Vec8d ggtb;
+  Vec8d ddxpart_dVd;
+  Vec8d ddxpart_dVg;
+  Vec8d ddxpart_dVb;
+  Vec8d ddxpart_dVs;
+  Vec8d dsxpart_dVd;
+  Vec8d dsxpart_dVg;
+  Vec8d dsxpart_dVb;
+  Vec8d dsxpart_dVs;
+  Vec8d gbspsp;
+  Vec8d gbbdp;
+  Vec8d gbbsp;
+  Vec8d gbspg;
+  Vec8d gbspb;
+  Vec8d gbspdp;
+  Vec8d gbdpdp;
+  Vec8d gbdpg;
+  Vec8d gbdpb;
+  Vec8d gbdpsp;
+  Vec8d Cgg;
+  Vec8d Cgd;
+  Vec8d Cgb;
+  Vec8d Cdg;
+  Vec8d Cdd;
+  Vec8d Cds;
+  Vec8d Csg;
+  Vec8d Csd;
+  Vec8d Css;
+  Vec8d Csb;
+  Vec8d Cbg;
+  Vec8d Cbd;
+  Vec8d Cbb;
+  Vec8d Cgg1;
+  Vec8d Cgb1;
+  Vec8d Cgd1;
+  Vec8d Cbg1;
+  Vec8d Cbb1;
+  Vec8d Cbd1;
+  Vec8d Qac0;
+  Vec8d Qsub0;
+  Vec8d dQac0_dVg;
+  Vec8d dQac0_dVd = (Vec8d ){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+  Vec8d dQac0_dVb;
+  Vec8d dQsub0_dVg;
+  Vec8d dQsub0_dVd;
+  Vec8d dQsub0_dVb;
+  Vec8d m;
+  Vec8m BSIM3v32mode;
+  Vec8m Check;
+  int ChargeComputationNeeded;
+  int error;
+  ScalingFactor = 1.0e-9;
+  ChargeComputationNeeded = ((ckt->CKTmode & (((MODEDCTRANCURVE | MODEAC) | MODETRAN) | MODEINITSMSIG)) || ((ckt->CKTmode & MODETRANOP) && (ckt->CKTmode & MODEUIC))) ? (1) : (0);
+  vbs = SIMDLOADDATA(0, data);
+  vgs = SIMDLOADDATA(1, data);
+  vds = SIMDLOADDATA(2, data);
+  qdef = SIMDLOADDATA(3, data);
+  cdhat = SIMDLOADDATA(4, data);
+  cbhat = SIMDLOADDATA(5, data);
+  Check = SIMDLOADDATA(6, data) > 0.5;
+  SIMDIFYCMD(start);
+  vbd = vbs - vds;
+  vgd = vgs - vds;
+  vgb = vgs - vbs;
+  Nvtm = model->BSIM3v32vtm * model->BSIM3v32jctEmissionCoeff;
+  if (model->BSIM3v32acmMod == 0)
+  {
+    SourceSatCurrent = vec8_SIMDTOVECTOR(1.0e-14);
+    if (1)
+    {
+      Vec8m condmask0 = (((Vec8d ){heres[0]->BSIM3v32sourceArea, heres[1]->BSIM3v32sourceArea, heres[2]->BSIM3v32sourceArea, heres[3]->BSIM3v32sourceArea, heres[4]->BSIM3v32sourceArea, heres[5]->BSIM3v32sourceArea, heres[6]->BSIM3v32sourceArea, heres[7]->BSIM3v32sourceArea}) <= 0.0) & (((Vec8d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter, heres[4]->BSIM3v32sourcePerimeter, heres[5]->BSIM3v32sourcePerimeter, heres[6]->BSIM3v32sourcePerimeter, heres[7]->BSIM3v32sourcePerimeter}) <= 0.0);
+      Vec8m condmask_true0 = condmask0;
+      Vec8m condmask_false0 = ~condmask0;
+      {
+        ;
+      }
+      {
+        SourceSatCurrent = vec8_blend(SourceSatCurrent, (((Vec8d ){heres[0]->BSIM3v32sourceArea, heres[1]->BSIM3v32sourceArea, heres[2]->BSIM3v32sourceArea, heres[3]->BSIM3v32sourceArea, heres[4]->BSIM3v32sourceArea, heres[5]->BSIM3v32sourceArea, heres[6]->BSIM3v32sourceArea, heres[7]->BSIM3v32sourceArea}) * model->BSIM3v32jctTempSatCurDensity) + (((Vec8d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter, heres[4]->BSIM3v32sourcePerimeter, heres[5]->BSIM3v32sourcePerimeter, heres[6]->BSIM3v32sourcePerimeter, heres[7]->BSIM3v32sourcePerimeter}) * model->BSIM3v32jctSidewallTempSatCurDensity), condmask_false0);
+      }
+    }
+
+    DrainSatCurrent = vec8_SIMDTOVECTOR(1.0e-14);
+    if (1)
+    {
+      Vec8m condmask0 = (((Vec8d ){heres[0]->BSIM3v32drainArea, heres[1]->BSIM3v32drainArea, heres[2]->BSIM3v32drainArea, heres[3]->BSIM3v32drainArea, heres[4]->BSIM3v32drainArea, heres[5]->BSIM3v32drainArea, heres[6]->BSIM3v32drainArea, heres[7]->BSIM3v32drainArea}) <= 0.0) & (((Vec8d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter, heres[4]->BSIM3v32drainPerimeter, heres[5]->BSIM3v32drainPerimeter, heres[6]->BSIM3v32drainPerimeter, heres[7]->BSIM3v32drainPerimeter}) <= 0.0);
+      Vec8m condmask_true0 = condmask0;
+      Vec8m condmask_false0 = ~condmask0;
+      {
+        ;
+      }
+      {
+        DrainSatCurrent = vec8_blend(DrainSatCurrent, (((Vec8d ){heres[0]->BSIM3v32drainArea, heres[1]->BSIM3v32drainArea, heres[2]->BSIM3v32drainArea, heres[3]->BSIM3v32drainArea, heres[4]->BSIM3v32drainArea, heres[5]->BSIM3v32drainArea, heres[6]->BSIM3v32drainArea, heres[7]->BSIM3v32drainArea}) * model->BSIM3v32jctTempSatCurDensity) + (((Vec8d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter, heres[4]->BSIM3v32drainPerimeter, heres[5]->BSIM3v32drainPerimeter, heres[6]->BSIM3v32drainPerimeter, heres[7]->BSIM3v32drainPerimeter}) * model->BSIM3v32jctSidewallTempSatCurDensity), condmask_false0);
+      }
+    }
+
+  }
+  else
+  {
+    error = vec8_BSIM3v32_ACM_saturationCurrents(model, heres, &DrainSatCurrent, &SourceSatCurrent);
+    if (SIMDANY(error))
+      return error;
+
+  }
+
+  if (1)
+  {
+    Vec8m condmask0 = SourceSatCurrent <= 0.0;
+    Vec8m condmask_true0 = condmask0;
+    Vec8m condmask_false0 = ~condmask0;
+    {
+      {
+        if (condmask_true0[0])
+          heres[0]->BSIM3v32gbs = ckt->CKTgmin;
+
+        if (condmask_true0[1])
+          heres[1]->BSIM3v32gbs = ckt->CKTgmin;
+
+        if (condmask_true0[2])
+          heres[2]->BSIM3v32gbs = ckt->CKTgmin;
+
+        if (condmask_true0[3])
+          heres[3]->BSIM3v32gbs = ckt->CKTgmin;
+
+        if (condmask_true0[4])
+          heres[4]->BSIM3v32gbs = ckt->CKTgmin;
+
+        if (condmask_true0[5])
+          heres[5]->BSIM3v32gbs = ckt->CKTgmin;
+
+        if (condmask_true0[6])
+          heres[6]->BSIM3v32gbs = ckt->CKTgmin;
+
+        if (condmask_true0[7])
+          heres[7]->BSIM3v32gbs = ckt->CKTgmin;
+
+      }
+      {
+        Vec8d val = ((Vec8d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs, heres[4]->BSIM3v32gbs, heres[5]->BSIM3v32gbs, heres[6]->BSIM3v32gbs, heres[7]->BSIM3v32gbs}) * vbs;
+        if (condmask_true0[0])
+          heres[0]->BSIM3v32cbs = val[0];
+
+        if (condmask_true0[1])
+          heres[1]->BSIM3v32cbs = val[1];
+
+        if (condmask_true0[2])
+          heres[2]->BSIM3v32cbs = val[2];
+
+        if (condmask_true0[3])
+          heres[3]->BSIM3v32cbs = val[3];
+
+        if (condmask_true0[4])
+          heres[4]->BSIM3v32cbs = val[4];
+
+        if (condmask_true0[5])
+          heres[5]->BSIM3v32cbs = val[5];
+
+        if (condmask_true0[6])
+          heres[6]->BSIM3v32cbs = val[6];
+
+        if (condmask_true0[7])
+          heres[7]->BSIM3v32cbs = val[7];
+
+      }
+    }
+    {
+      if (model->BSIM3v32ijth == 0.0)
+      {
+        evbs = vec8_blend(evbs, vec8_exp(vbs / Nvtm), condmask_false0);
+        {
+          Vec8d val = ((SourceSatCurrent * evbs) / Nvtm) + ckt->CKTgmin;
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32gbs = val[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32gbs = val[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32gbs = val[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32gbs = val[3];
+
+          if (condmask_false0[4])
+            heres[4]->BSIM3v32gbs = val[4];
+
+          if (condmask_false0[5])
+            heres[5]->BSIM3v32gbs = val[5];
+
+          if (condmask_false0[6])
+            heres[6]->BSIM3v32gbs = val[6];
+
+          if (condmask_false0[7])
+            heres[7]->BSIM3v32gbs = val[7];
+
+        }
+        {
+          Vec8d val = (SourceSatCurrent * (evbs - 1.0)) + (ckt->CKTgmin * vbs);
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32cbs = val[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32cbs = val[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32cbs = val[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32cbs = val[3];
+
+          if (condmask_false0[4])
+            heres[4]->BSIM3v32cbs = val[4];
+
+          if (condmask_false0[5])
+            heres[5]->BSIM3v32cbs = val[5];
+
+          if (condmask_false0[6])
+            heres[6]->BSIM3v32cbs = val[6];
+
+          if (condmask_false0[7])
+            heres[7]->BSIM3v32cbs = val[7];
+
+        }
+      }
+      else
+      {
+        if (1)
+        {
+          Vec8m condmask1 = vbs < ((Vec8d ){heres[0]->BSIM3v32vjsm, heres[1]->BSIM3v32vjsm, heres[2]->BSIM3v32vjsm, heres[3]->BSIM3v32vjsm, heres[4]->BSIM3v32vjsm, heres[5]->BSIM3v32vjsm, heres[6]->BSIM3v32vjsm, heres[7]->BSIM3v32vjsm});
+          Vec8m condmask_true1 = condmask_false0 & condmask1;
+          Vec8m condmask_false1 = condmask_false0 & (~condmask1);
+          {
+            evbs = vec8_blend(evbs, vec8_exp(vbs / Nvtm), condmask_true1);
+            {
+              Vec8d val = ((SourceSatCurrent * evbs) / Nvtm) + ckt->CKTgmin;
+              if (condmask_true1[0])
+                heres[0]->BSIM3v32gbs = val[0];
+
+              if (condmask_true1[1])
+                heres[1]->BSIM3v32gbs = val[1];
+
+              if (condmask_true1[2])
+                heres[2]->BSIM3v32gbs = val[2];
+
+              if (condmask_true1[3])
+                heres[3]->BSIM3v32gbs = val[3];
+
+              if (condmask_true1[4])
+                heres[4]->BSIM3v32gbs = val[4];
+
+              if (condmask_true1[5])
+                heres[5]->BSIM3v32gbs = val[5];
+
+              if (condmask_true1[6])
+                heres[6]->BSIM3v32gbs = val[6];
+
+              if (condmask_true1[7])
+                heres[7]->BSIM3v32gbs = val[7];
+
+            }
+            {
+              Vec8d val = (SourceSatCurrent * (evbs - 1.0)) + (ckt->CKTgmin * vbs);
+              if (condmask_true1[0])
+                heres[0]->BSIM3v32cbs = val[0];
+
+              if (condmask_true1[1])
+                heres[1]->BSIM3v32cbs = val[1];
+
+              if (condmask_true1[2])
+                heres[2]->BSIM3v32cbs = val[2];
+
+              if (condmask_true1[3])
+                heres[3]->BSIM3v32cbs = val[3];
+
+              if (condmask_true1[4])
+                heres[4]->BSIM3v32cbs = val[4];
+
+              if (condmask_true1[5])
+                heres[5]->BSIM3v32cbs = val[5];
+
+              if (condmask_true1[6])
+                heres[6]->BSIM3v32cbs = val[6];
+
+              if (condmask_true1[7])
+                heres[7]->BSIM3v32cbs = val[7];
+
+            }
+          }
+          {
+            switch (model->BSIM3v32intVersion)
+            {
+              case BSIM3v32V324:
+
+              case BSIM3v32V323:
+
+              case BSIM3v32V322:
+                T0 = vec8_blend(T0, ((Vec8d ){heres[0]->BSIM3v32IsEvjsm, heres[1]->BSIM3v32IsEvjsm, heres[2]->BSIM3v32IsEvjsm, heres[3]->BSIM3v32IsEvjsm, heres[4]->BSIM3v32IsEvjsm, heres[5]->BSIM3v32IsEvjsm, heres[6]->BSIM3v32IsEvjsm, heres[7]->BSIM3v32IsEvjsm}) / Nvtm, condmask_false1);
+              {
+                Vec8d val = T0 + ckt->CKTgmin;
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32gbs = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32gbs = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32gbs = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32gbs = val[3];
+
+                if (condmask_false1[4])
+                  heres[4]->BSIM3v32gbs = val[4];
+
+                if (condmask_false1[5])
+                  heres[5]->BSIM3v32gbs = val[5];
+
+                if (condmask_false1[6])
+                  heres[6]->BSIM3v32gbs = val[6];
+
+                if (condmask_false1[7])
+                  heres[7]->BSIM3v32gbs = val[7];
+
+              }
+              {
+                Vec8d val = ((((Vec8d ){heres[0]->BSIM3v32IsEvjsm, heres[1]->BSIM3v32IsEvjsm, heres[2]->BSIM3v32IsEvjsm, heres[3]->BSIM3v32IsEvjsm, heres[4]->BSIM3v32IsEvjsm, heres[5]->BSIM3v32IsEvjsm, heres[6]->BSIM3v32IsEvjsm, heres[7]->BSIM3v32IsEvjsm}) - SourceSatCurrent) + (T0 * (vbs - ((Vec8d ){heres[0]->BSIM3v32vjsm, heres[1]->BSIM3v32vjsm, heres[2]->BSIM3v32vjsm, heres[3]->BSIM3v32vjsm, heres[4]->BSIM3v32vjsm, heres[5]->BSIM3v32vjsm, heres[6]->BSIM3v32vjsm, heres[7]->BSIM3v32vjsm})))) + (ckt->CKTgmin * vbs);
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32cbs = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32cbs = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32cbs = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32cbs = val[3];
+
+                if (condmask_false1[4])
+                  heres[4]->BSIM3v32cbs = val[4];
+
+                if (condmask_false1[5])
+                  heres[5]->BSIM3v32cbs = val[5];
+
+                if (condmask_false1[6])
+                  heres[6]->BSIM3v32cbs = val[6];
+
+                if (condmask_false1[7])
+                  heres[7]->BSIM3v32cbs = val[7];
+
+              }
+                break;
+
+              case BSIM3v32V32:
+
+              default:
+                T0 = vec8_blend(T0, (SourceSatCurrent + model->BSIM3v32ijth) / Nvtm, condmask_false1);
+              {
+                Vec8d val = T0 + ckt->CKTgmin;
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32gbs = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32gbs = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32gbs = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32gbs = val[3];
+
+                if (condmask_false1[4])
+                  heres[4]->BSIM3v32gbs = val[4];
+
+                if (condmask_false1[5])
+                  heres[5]->BSIM3v32gbs = val[5];
+
+                if (condmask_false1[6])
+                  heres[6]->BSIM3v32gbs = val[6];
+
+                if (condmask_false1[7])
+                  heres[7]->BSIM3v32gbs = val[7];
+
+              }
+              {
+                Vec8d val = (model->BSIM3v32ijth + (ckt->CKTgmin * vbs)) + (T0 * (vbs - ((Vec8d ){heres[0]->BSIM3v32vjsm, heres[1]->BSIM3v32vjsm, heres[2]->BSIM3v32vjsm, heres[3]->BSIM3v32vjsm, heres[4]->BSIM3v32vjsm, heres[5]->BSIM3v32vjsm, heres[6]->BSIM3v32vjsm, heres[7]->BSIM3v32vjsm})));
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32cbs = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32cbs = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32cbs = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32cbs = val[3];
+
+                if (condmask_false1[4])
+                  heres[4]->BSIM3v32cbs = val[4];
+
+                if (condmask_false1[5])
+                  heres[5]->BSIM3v32cbs = val[5];
+
+                if (condmask_false1[6])
+                  heres[6]->BSIM3v32cbs = val[6];
+
+                if (condmask_false1[7])
+                  heres[7]->BSIM3v32cbs = val[7];
+
+              }
+
+            }
+
+          }
+        }
+
+      }
+
+    }
+  }
+
+  if (1)
+  {
+    Vec8m condmask0 = DrainSatCurrent <= 0.0;
+    Vec8m condmask_true0 = condmask0;
+    Vec8m condmask_false0 = ~condmask0;
+    {
+      {
+        if (condmask_true0[0])
+          heres[0]->BSIM3v32gbd = ckt->CKTgmin;
+
+        if (condmask_true0[1])
+          heres[1]->BSIM3v32gbd = ckt->CKTgmin;
+
+        if (condmask_true0[2])
+          heres[2]->BSIM3v32gbd = ckt->CKTgmin;
+
+        if (condmask_true0[3])
+          heres[3]->BSIM3v32gbd = ckt->CKTgmin;
+
+        if (condmask_true0[4])
+          heres[4]->BSIM3v32gbd = ckt->CKTgmin;
+
+        if (condmask_true0[5])
+          heres[5]->BSIM3v32gbd = ckt->CKTgmin;
+
+        if (condmask_true0[6])
+          heres[6]->BSIM3v32gbd = ckt->CKTgmin;
+
+        if (condmask_true0[7])
+          heres[7]->BSIM3v32gbd = ckt->CKTgmin;
+
+      }
+      {
+        Vec8d val = ((Vec8d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd, heres[4]->BSIM3v32gbd, heres[5]->BSIM3v32gbd, heres[6]->BSIM3v32gbd, heres[7]->BSIM3v32gbd}) * vbd;
+        if (condmask_true0[0])
+          heres[0]->BSIM3v32cbd = val[0];
+
+        if (condmask_true0[1])
+          heres[1]->BSIM3v32cbd = val[1];
+
+        if (condmask_true0[2])
+          heres[2]->BSIM3v32cbd = val[2];
+
+        if (condmask_true0[3])
+          heres[3]->BSIM3v32cbd = val[3];
+
+        if (condmask_true0[4])
+          heres[4]->BSIM3v32cbd = val[4];
+
+        if (condmask_true0[5])
+          heres[5]->BSIM3v32cbd = val[5];
+
+        if (condmask_true0[6])
+          heres[6]->BSIM3v32cbd = val[6];
+
+        if (condmask_true0[7])
+          heres[7]->BSIM3v32cbd = val[7];
+
+      }
+    }
+    {
+      if (model->BSIM3v32ijth == 0.0)
+      {
+        evbd = vec8_blend(evbd, vec8_exp(vbd / Nvtm), condmask_false0);
+        {
+          Vec8d val = ((DrainSatCurrent * evbd) / Nvtm) + ckt->CKTgmin;
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32gbd = val[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32gbd = val[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32gbd = val[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32gbd = val[3];
+
+          if (condmask_false0[4])
+            heres[4]->BSIM3v32gbd = val[4];
+
+          if (condmask_false0[5])
+            heres[5]->BSIM3v32gbd = val[5];
+
+          if (condmask_false0[6])
+            heres[6]->BSIM3v32gbd = val[6];
+
+          if (condmask_false0[7])
+            heres[7]->BSIM3v32gbd = val[7];
+
+        }
+        {
+          Vec8d val = (DrainSatCurrent * (evbd - 1.0)) + (ckt->CKTgmin * vbd);
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32cbd = val[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32cbd = val[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32cbd = val[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32cbd = val[3];
+
+          if (condmask_false0[4])
+            heres[4]->BSIM3v32cbd = val[4];
+
+          if (condmask_false0[5])
+            heres[5]->BSIM3v32cbd = val[5];
+
+          if (condmask_false0[6])
+            heres[6]->BSIM3v32cbd = val[6];
+
+          if (condmask_false0[7])
+            heres[7]->BSIM3v32cbd = val[7];
+
+        }
+      }
+      else
+      {
+        if (1)
+        {
+          Vec8m condmask1 = vbd < ((Vec8d ){heres[0]->BSIM3v32vjdm, heres[1]->BSIM3v32vjdm, heres[2]->BSIM3v32vjdm, heres[3]->BSIM3v32vjdm, heres[4]->BSIM3v32vjdm, heres[5]->BSIM3v32vjdm, heres[6]->BSIM3v32vjdm, heres[7]->BSIM3v32vjdm});
+          Vec8m condmask_true1 = condmask_false0 & condmask1;
+          Vec8m condmask_false1 = condmask_false0 & (~condmask1);
+          {
+            evbd = vec8_blend(evbd, vec8_exp(vbd / Nvtm), condmask_true1);
+            {
+              Vec8d val = ((DrainSatCurrent * evbd) / Nvtm) + ckt->CKTgmin;
+              if (condmask_true1[0])
+                heres[0]->BSIM3v32gbd = val[0];
+
+              if (condmask_true1[1])
+                heres[1]->BSIM3v32gbd = val[1];
+
+              if (condmask_true1[2])
+                heres[2]->BSIM3v32gbd = val[2];
+
+              if (condmask_true1[3])
+                heres[3]->BSIM3v32gbd = val[3];
+
+              if (condmask_true1[4])
+                heres[4]->BSIM3v32gbd = val[4];
+
+              if (condmask_true1[5])
+                heres[5]->BSIM3v32gbd = val[5];
+
+              if (condmask_true1[6])
+                heres[6]->BSIM3v32gbd = val[6];
+
+              if (condmask_true1[7])
+                heres[7]->BSIM3v32gbd = val[7];
+
+            }
+            {
+              Vec8d val = (DrainSatCurrent * (evbd - 1.0)) + (ckt->CKTgmin * vbd);
+              if (condmask_true1[0])
+                heres[0]->BSIM3v32cbd = val[0];
+
+              if (condmask_true1[1])
+                heres[1]->BSIM3v32cbd = val[1];
+
+              if (condmask_true1[2])
+                heres[2]->BSIM3v32cbd = val[2];
+
+              if (condmask_true1[3])
+                heres[3]->BSIM3v32cbd = val[3];
+
+              if (condmask_true1[4])
+                heres[4]->BSIM3v32cbd = val[4];
+
+              if (condmask_true1[5])
+                heres[5]->BSIM3v32cbd = val[5];
+
+              if (condmask_true1[6])
+                heres[6]->BSIM3v32cbd = val[6];
+
+              if (condmask_true1[7])
+                heres[7]->BSIM3v32cbd = val[7];
+
+            }
+          }
+          {
+            switch (model->BSIM3v32intVersion)
+            {
+              case BSIM3v32V324:
+
+              case BSIM3v32V323:
+
+              case BSIM3v32V322:
+                T0 = vec8_blend(T0, ((Vec8d ){heres[0]->BSIM3v32IsEvjdm, heres[1]->BSIM3v32IsEvjdm, heres[2]->BSIM3v32IsEvjdm, heres[3]->BSIM3v32IsEvjdm, heres[4]->BSIM3v32IsEvjdm, heres[5]->BSIM3v32IsEvjdm, heres[6]->BSIM3v32IsEvjdm, heres[7]->BSIM3v32IsEvjdm}) / Nvtm, condmask_false1);
+              {
+                Vec8d val = T0 + ckt->CKTgmin;
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32gbd = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32gbd = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32gbd = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32gbd = val[3];
+
+                if (condmask_false1[4])
+                  heres[4]->BSIM3v32gbd = val[4];
+
+                if (condmask_false1[5])
+                  heres[5]->BSIM3v32gbd = val[5];
+
+                if (condmask_false1[6])
+                  heres[6]->BSIM3v32gbd = val[6];
+
+                if (condmask_false1[7])
+                  heres[7]->BSIM3v32gbd = val[7];
+
+              }
+              {
+                Vec8d val = ((((Vec8d ){heres[0]->BSIM3v32IsEvjdm, heres[1]->BSIM3v32IsEvjdm, heres[2]->BSIM3v32IsEvjdm, heres[3]->BSIM3v32IsEvjdm, heres[4]->BSIM3v32IsEvjdm, heres[5]->BSIM3v32IsEvjdm, heres[6]->BSIM3v32IsEvjdm, heres[7]->BSIM3v32IsEvjdm}) - DrainSatCurrent) + (T0 * (vbd - ((Vec8d ){heres[0]->BSIM3v32vjdm, heres[1]->BSIM3v32vjdm, heres[2]->BSIM3v32vjdm, heres[3]->BSIM3v32vjdm, heres[4]->BSIM3v32vjdm, heres[5]->BSIM3v32vjdm, heres[6]->BSIM3v32vjdm, heres[7]->BSIM3v32vjdm})))) + (ckt->CKTgmin * vbd);
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32cbd = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32cbd = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32cbd = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32cbd = val[3];
+
+                if (condmask_false1[4])
+                  heres[4]->BSIM3v32cbd = val[4];
+
+                if (condmask_false1[5])
+                  heres[5]->BSIM3v32cbd = val[5];
+
+                if (condmask_false1[6])
+                  heres[6]->BSIM3v32cbd = val[6];
+
+                if (condmask_false1[7])
+                  heres[7]->BSIM3v32cbd = val[7];
+
+              }
+                break;
+
+              case BSIM3v32V32:
+
+              default:
+                T0 = vec8_blend(T0, (DrainSatCurrent + model->BSIM3v32ijth) / Nvtm, condmask_false1);
+              {
+                Vec8d val = T0 + ckt->CKTgmin;
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32gbd = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32gbd = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32gbd = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32gbd = val[3];
+
+                if (condmask_false1[4])
+                  heres[4]->BSIM3v32gbd = val[4];
+
+                if (condmask_false1[5])
+                  heres[5]->BSIM3v32gbd = val[5];
+
+                if (condmask_false1[6])
+                  heres[6]->BSIM3v32gbd = val[6];
+
+                if (condmask_false1[7])
+                  heres[7]->BSIM3v32gbd = val[7];
+
+              }
+              {
+                Vec8d val = (model->BSIM3v32ijth + (ckt->CKTgmin * vbd)) + (T0 * (vbd - ((Vec8d ){heres[0]->BSIM3v32vjdm, heres[1]->BSIM3v32vjdm, heres[2]->BSIM3v32vjdm, heres[3]->BSIM3v32vjdm, heres[4]->BSIM3v32vjdm, heres[5]->BSIM3v32vjdm, heres[6]->BSIM3v32vjdm, heres[7]->BSIM3v32vjdm})));
+                if (condmask_false1[0])
+                  heres[0]->BSIM3v32cbd = val[0];
+
+                if (condmask_false1[1])
+                  heres[1]->BSIM3v32cbd = val[1];
+
+                if (condmask_false1[2])
+                  heres[2]->BSIM3v32cbd = val[2];
+
+                if (condmask_false1[3])
+                  heres[3]->BSIM3v32cbd = val[3];
+
+                if (condmask_false1[4])
+                  heres[4]->BSIM3v32cbd = val[4];
+
+                if (condmask_false1[5])
+                  heres[5]->BSIM3v32cbd = val[5];
+
+                if (condmask_false1[6])
+                  heres[6]->BSIM3v32cbd = val[6];
+
+                if (condmask_false1[7])
+                  heres[7]->BSIM3v32cbd = val[7];
+
+              }
+
+            }
+
+          }
+        }
+
+      }
+
+    }
+  }
+
+  BSIM3v32mode = vds >= 0.0;
+  if (1)
+  {
+    Vec8m condmask0 = BSIM3v32mode;
+    Vec8m condmask_true0 = condmask0;
+    Vec8m condmask_false0 = ~condmask0;
+    {
+      Vds = vec8_blend(Vds, vds, condmask_true0);
+      Vgs = vec8_blend(Vgs, vgs, condmask_true0);
+      Vbs = vec8_blend(Vbs, vbs, condmask_true0);
+    }
+    {
+      Vds = vec8_blend(Vds, -vds, condmask_false0);
+      Vgs = vec8_blend(Vgs, vgd, condmask_false0);
+      Vbs = vec8_blend(Vbs, vbd, condmask_false0);
+    }
+  }
+
+  {
+    Vec8m modesym;
+    modesym = (2 * (BSIM3v32mode & 0x1)) - 1;
+    {
+      heres[0]->BSIM3v32mode = modesym[0];
+      heres[1]->BSIM3v32mode = modesym[1];
+      heres[2]->BSIM3v32mode = modesym[2];
+      heres[3]->BSIM3v32mode = modesym[3];
+      heres[4]->BSIM3v32mode = modesym[4];
+      heres[5]->BSIM3v32mode = modesym[5];
+      heres[6]->BSIM3v32mode = modesym[6];
+      heres[7]->BSIM3v32mode = modesym[7];
+    }
+  }
+  T0 = (Vbs - pParam->BSIM3v32vbsc) - 0.001;
+  T1 = vec8_sqrt((T0 * T0) - (0.004 * pParam->BSIM3v32vbsc));
+  Vbseff = pParam->BSIM3v32vbsc + (0.5 * (T0 + T1));
+  dVbseff_dVb = 0.5 * (1.0 + (T0 / T1));
+  if (1)
+  {
+    Vec8m condmask0 = Vbseff < Vbs;
+    Vec8m condmask_true0 = condmask0;
+    {
+      Vbseff = vec8_blend(Vbseff, Vbs, condmask_true0);
+    }
+  }
+
+  if (1)
+  {
+    Vec8m condmask0 = Vbseff > 0.0;
+    Vec8m condmask_true0 = condmask0;
+    Vec8m condmask_false0 = ~condmask0;
+    {
+      T0 = vec8_blend(T0, pParam->BSIM3v32phi / (pParam->BSIM3v32phi + Vbseff), condmask_true0);
+      Phis = vec8_blend(Phis, pParam->BSIM3v32phi * T0, condmask_true0);
+      dPhis_dVb = vec8_blend(dPhis_dVb, (-T0) * T0, condmask_true0);
+      sqrtPhis = vec8_blend(sqrtPhis, pParam->BSIM3v32phis3 / (pParam->BSIM3v32phi + (0.5 * Vbseff)), condmask_true0);
+      dsqrtPhis_dVb = vec8_blend(dsqrtPhis_dVb, (((-0.5) * sqrtPhis) * sqrtPhis) / pParam->BSIM3v32phis3, condmask_true0);
+    }
+    {
+      Phis = vec8_blend(Phis, pParam->BSIM3v32phi - Vbseff, condmask_false0);
+      dPhis_dVb = vec8_blend(dPhis_dVb, vec8_SIMDTOVECTOR(-1.0), condmask_false0);
+      sqrtPhis = vec8_blend(sqrtPhis, vec8_sqrt(Phis), condmask_false0);
+      dsqrtPhis_dVb = vec8_blend(dsqrtPhis_dVb, (-0.5) / sqrtPhis, condmask_false0);
+    }
+  }
+
+  Xdep = (pParam->BSIM3v32Xdep0 * sqrtPhis) / pParam->BSIM3v32sqrtPhi;
+  dXdep_dVb = (pParam->BSIM3v32Xdep0 / pParam->BSIM3v32sqrtPhi) * dsqrtPhis_dVb;
+  Leff = pParam->BSIM3v32leff;
+  Vtm = model->BSIM3v32vtm;
+  T3 = vec8_sqrt(Xdep);
+  V0 = pParam->BSIM3v32vbi - pParam->BSIM3v32phi;
+  T0 = pParam->BSIM3v32dvt2 * Vbseff;
+  T2 = vec8_SIMDTOVECTOR(pParam->BSIM3v32dvt2);
+  if (1)
+  {
+    Vec8m condmask0 = T0 >= (-0.5);
+    Vec8m condmask_true0 = condmask0;
+    Vec8m condmask_false0 = ~condmask0;
+    {
+      T1 = vec8_blend(T1, 1.0 + T0, condmask_true0);
+    }
+    {
+      T4 = vec8_blend(T4, 1.0 / (3.0 + (8.0 * T0)), condmask_false0);
+      T1 = vec8_blend(T1, (1.0 + (3.0 * T0)) * T4, condmask_false0);
+      T2 = vec8_blend(T2, (T2 * T4) * T4, condmask_false0);
+    }
+  }
+
+  lt1 = (model->BSIM3v32factor1 * T3) * T1;
+  dlt1_dVb = model->BSIM3v32factor1 * ((((0.5 / T3) * T1) * dXdep_dVb) + (T3 * T2));
+  T0 = pParam->BSIM3v32dvt2w * Vbseff;
+  if (1)
+  {
+    Vec8m condmask0 = T0 >= (-0.5);
+    Vec8m condmask_true0 = condmask0;
+    Vec8m condmask_false0 = ~condmask0;
+    {
+      T1 = vec8_blend(T1, 1.0 + T0, condmask_true0);
+      T2 = vec8_blend(T2, vec8_SIMDTOVECTOR(pParam->BSIM3v32dvt2w), condmask_true0);
+    }
+    {
+      T4 = vec8_blend(T4, 1.0 / (3.0 + (8.0 * T0)), condmask_false0);
+      T1 = vec8_blend(T1, (1.0 + (3.0 * T0)) * T4, condmask_false0);
+      T2 = vec8_blend(T2, (pParam->BSIM3v32dvt2w * T4) * T4, condmask_false0);
+    }
+  }
+
+  ltw = (model->BSIM3v32factor1 * T3) * T1;
+  dltw_dVb = model->BSIM3v32factor1 * ((((0.5 / T3) * T1) * dXdep_dVb) + (T3 * T2));
+  T0 = (((-0.5) * pParam->BSIM3v32dvt1) * Leff) / lt1;
+  if (1)
+  {
+    Vec8m condmask0 = T0 > (-EXP_THRESHOLD);
+    Vec8m condmask_true0 = condmask0;
+    Vec8m condmask_false0 = ~condmask0;
+    {
+      T1 = vec8_blend(T1, vec8_exp(T0), condmask_true0);
+      Theta0 = vec8_blend(Theta0, T1 * (1.0 + (2.0 * T1)), condmask_true0);
+      dT1_dVb = vec8_blend(dT1_dVb, (((-T0) / lt1) * T1) * dlt1_dVb, condmask_true0);
+      dTheta0_dVb = vec8_blend(dTheta0_dVb, (1.0 + (4.0 * T1)) * dT1_dVb, condmask_true0);
+    }
+    {
+      T1 = vec8_blend(T1, vec8_SIMDTOVECTOR(MIN_EXP), condmask_false0);
+      Theta0 = vec8_blend(Theta0, T1 * (1.0 + (2.0 * T1)), condmask_false0);
+      dTheta0_dVb = vec8_blend(dTheta0_dVb, vec8_SIMDTOVECTOR(0.0), condmask_false0);
+    }
+  }
+
+  Delt_vth = (pParam->BSIM3v32dvt0 * Theta0) * V0;
+  dDelt_vth_dVb = (pParam->BSIM3v32dvt0 * dTheta0_dVb) * V0;
+  T0 = ((((-0.5) * pParam->BSIM3v32dvt1w) * pParam->BSIM3v32weff) * Leff) / ltw;
+  if (1)
+  {
+    Vec8m condmask0 = T0 > (-EXP_THRESHOLD);
+    Vec8m condmask_true0 = condmask0;
+    Vec8m condmask_false0 = ~condmask0;
+    {
+      T1 = vec8_blend(T1, vec8_exp(T0), condmask_true0);
+      T2 = vec8_blend(T2, T1 * (1.0 + (2.0 * T1)), condmask_true0);
+      dT1_dVb = vec8_blend(dT1_dVb, (((-T0) / ltw) * T1) * dltw_dVb, condmask_true0);
+      dT2_dVb = vec8_blend(dT2_dVb, (1.0 + (4.0 * T1)) * dT1_dVb, condmask_true0);
+    }
+    {
+      T1 = vec8_blend(T1, vec8_SIMDTOVECTOR(MIN_EXP), condmask_false0);
+      T2 = vec8_blend(T2, T1 * (1.0 + (2.0 * T1)), condmask_false0);
+      dT2_dVb = vec8_blend(dT2_dVb, vec8_SIMDTOVECTOR(0.0), condmask_false0);
+    }
+  }
+
+  T0 = pParam->BSIM3v32dvt0w * T2;
+  T2 = T0 * V0;
+  dT2_dVb = (pParam->BSIM3v32dvt0w * dT2_dVb) * V0;
+  TempRatio = (ckt->CKTtemp / model->BSIM3v32tnom) - 1.0;
+  T0 = vec8_SIMDTOVECTOR(sqrt(1.0 + (pParam->BSIM3v32nlx / Leff)));
+  T1 = ((pParam->BSIM3v32k1ox * (T0 - 1.0)) * pParam->BSIM3v32sqrtPhi) + (((pParam->BSIM3v32kt1 + (pParam->BSIM3v32kt1l / Leff)) + (pParam->BSIM3v32kt2 * Vbseff)) * TempRatio);
+  tmp2 = vec8_SIMDTOVECTOR((model->BSIM3v32tox * pParam->BSIM3v32phi) / (pParam->BSIM3v32weff + pParam->BSIM3v32w0));
+  T3 = pParam->BSIM3v32eta0 + (pParam->BSIM3v32etab * Vbseff);
+  if (1)
+  {
+    Vec8m condmask0 = T3 < 1.0e-4;
+    Vec8m condmask_true0 = condmask0;
+    Vec8m condmask_false0 = ~condmask0;
+    {
+      T9 = vec8_blend(T9, 1.0 / (3.0 - (2.0e4 * T3)), condmask_true0);
+      T3 = vec8_blend(T3, (2.0e-4 - T3) * T9, condmask_true0);
+      T4 = vec8_blend(T4, T9 * T9, condmask_true0);
+    }
+    {
+      T4 = vec8_blend(T4, vec8_SIMDTOVECTOR(1.0), condmask_false0);
+    }
+  }
+
+  dDIBL_Sft_dVd = T3 * pParam->BSIM3v32theta0vb0;
+  DIBL_Sft = dDIBL_Sft_dVd * Vds;
+  Vth = ((((((((model->BSIM3v32type * ((Vec8d ){heres[0]->BSIM3v32vth0, heres[1]->BSIM3v32vth0, heres[2]->BSIM3v32vth0, heres[3]->BSIM3v32vth0, heres[4]->BSIM3v32vth0, heres[5]->BSIM3v32vth0, heres[6]->BSIM3v32vth0, heres[7]->BSIM3v32vth0})) - (pParam->BSIM3v32k1 * pParam->BSIM3v32sqrtPhi)) + (pParam->BSIM3v32k1ox * sqrtPhis)) - (pParam->BSIM3v32k2ox * Vbseff)) - Delt_vth) - T2) + ((pParam->BSIM3v32k3 + (pParam->BSIM3v32k3b * Vbseff)) * tmp2)) + T1) - DIBL_Sft;
+  {
+    heres[0]->BSIM3v32von = Vth[0];
+    heres[1]->BSIM3v32von = Vth[1];
+    heres[2]->BSIM3v32von = Vth[2];
+    heres[3]->BSIM3v32von = Vth[3];
+    heres[4]->BSIM3v32von = Vth[4];
+    heres[5]->BSIM3v32von = Vth[5];
+    heres[6]->BSIM3v32von = Vth[6];
+    heres[7]->BSIM3v32von = Vth[7];
+  }
+  dVth_dVb = ((((((pParam->BSIM3v32k1ox * dsqrtPhis_dVb) - pParam->BSIM3v32k2ox) - dDelt_vth_dVb) - dT2_dVb) + (pParam->BSIM3v32k3b * tmp2)) - (((pParam->BSIM3v32etab * Vds) * pParam->BSIM3v32theta0vb0) * T4)) + (pParam->BSIM3v32kt2 * TempRatio);
+  dVth_dVd = -dDIBL_Sft_dVd;
+  tmp2 = (pParam->BSIM3v32nfactor * EPSSI) / Xdep;
+  tmp3 = (pParam->BSIM3v32cdsc + (pParam->BSIM3v32cdscb * Vbseff)) + (pParam->BSIM3v32cdscd * Vds);
+  tmp4 = ((tmp2 + (tmp3 * Theta0)) + pParam->BSIM3v32cit) / model->BSIM3v32cox;
+  if (1)
+  {
+    Vec8m condmask0 = tmp4 >= (-0.5);
+    Vec8m condmask_true0 = condmask0;
+    Vec8m condmask_false0 = ~condmask0;
+    {
+      n = vec8_blend(n, 1.0 + tmp4, condmask_true0);
+      dn_dVb = vec8_blend(dn_dVb, (((((-tmp2) / Xdep) * dXdep_dVb) + (tmp3 * dTheta0_dVb)) + (pParam->BSIM3v32cdscb * Theta0)) / model->BSIM3v32cox, condmask_true0);
+      dn_dVd = vec8_blend(dn_dVd, (pParam->BSIM3v32cdscd * Theta0) / model->BSIM3v32cox, condmask_true0);
+    }
+    {
+      T0 = vec8_blend(T0, 1.0 / (3.0 + (8.0 * tmp4)), condmask_false0);
+      n = vec8_blend(n, (1.0 + (3.0 * tmp4)) * T0, condmask_false0);
+      T0 = vec8_blend(T0, T0 * T0, condmask_false0);
+      dn_dVb = vec8_blend(dn_dVb, ((((((-tmp2) / Xdep) * dXdep_dVb) + (tmp3 * dTheta0_dVb)) + (pParam->BSIM3v32cdscb * Theta0)) / model->BSIM3v32cox) * T0, condmask_false0);
+      dn_dVd = vec8_blend(dn_dVd, ((pParam->BSIM3v32cdscd * Theta0) / model->BSIM3v32cox) * T0, condmask_false0);
+    }
+  }
+
+  T0 = ((Vec8d ){heres[0]->BSIM3v32vfb, heres[1]->BSIM3v32vfb, heres[2]->BSIM3v32vfb, heres[3]->BSIM3v32vfb, heres[4]->BSIM3v32vfb, heres[5]->BSIM3v32vfb, heres[6]->BSIM3v32vfb, heres[7]->BSIM3v32vfb}) + pParam->BSIM3v32phi;
+  Vgs_eff = Vgs;
+  dVgs_eff_dVg = vec8_SIMDTOVECTOR(1.0);
+  if ((pParam->BSIM3v32ngate > 1.e18) && (pParam->BSIM3v32ngate < 1.e25))
+    if (1)
+  {
+    Vec8m condmask0 = Vgs > T0;
+    Vec8m condmask_true0 = condmask0;
+    {
+      T1 = vec8_blend(T1, vec8_SIMDTOVECTOR((((1.0e6 * Charge_q) * EPSSI) * pParam->BSIM3v32ngate) / (model->BSIM3v32cox * model->BSIM3v32cox)), condmask_true0);
+      T4 = vec8_blend(T4, vec8_sqrt(1.0 + ((2.0 * (Vgs - T0)) / T1)), condmask_true0);
+      T2 = vec8_blend(T2, T1 * (T4 - 1.0), condmask_true0);
+      T3 = vec8_blend(T3, ((0.5 * T2) * T2) / T1, condmask_true0);
+      T7 = vec8_blend(T7, (1.12 - T3) - 0.05, condmask_true0);
+      T6 = vec8_blend(T6, vec8_sqrt((T7 * T7) + 0.224), condmask_true0);
+      T5 = vec8_blend(T5, 1.12 - (0.5 * (T7 + T6)), condmask_true0);
+      Vgs_eff = vec8_blend(Vgs_eff, Vgs - T5, condmask_true0);
+      dVgs_eff_dVg = vec8_blend(dVgs_eff_dVg, 1.0 - ((0.5 - (0.5 / T4)) * (1.0 + (T7 / T6))), condmask_true0);
+    }
+  }
+
+
+  Vgst = Vgs_eff - Vth;
+  T10 = (2.0 * n) * Vtm;
+  VgstNVt = Vgst / T10;
+  ExpArg = ((2.0 * pParam->BSIM3v32voff) - Vgst) / T10;
+  T0 = VgstNVt;
+  if (1)
+  {
+    Vec8m condmask0 = ExpArg > EXP_THRESHOLD;
+    Vec8m condmask_true0 = condmask0;
+    T0 = vec8_blend(T0, (Vgst - pParam->BSIM3v32voff) / (n * Vtm), condmask_true0);
+  }
+
+  ExpVgst = vec8_exp(T0);
+  if (1)
+  {
+    Vec8m condmask0 = VgstNVt > EXP_THRESHOLD;
+    Vec8m condmask_true0 = condmask0;
+    Vec8m condmask_false0 = ~condmask0;
+    {
+      Vgsteff = vec8_blend(Vgsteff, Vgst, condmask_true0);
+      dVgsteff_dVg = vec8_blend(dVgsteff_dVg, dVgs_eff_dVg, condmask_true0);
+      dVgsteff_dVd = vec8_blend(dVgsteff_dVd, -dVth_dVd, condmask_true0);
+      dVgsteff_dVb = vec8_blend(dVgsteff_dVb, -dVth_dVb, condmask_true0);
+    }
+    if (1)
+    {
+      Vec8m condmask1 = ExpArg > EXP_THRESHOLD;
+      Vec8m condmask_true1 = condmask_false0 & condmask1;
+      Vec8m condmask_false1 = condmask_false0 & (~condmask1);
+      {
+        Vgsteff = vec8_blend(Vgsteff, ((Vtm * pParam->BSIM3v32cdep0) / model->BSIM3v32cox) * ExpVgst, condmask_true1);
+        dVgsteff_dVg = vec8_blend(dVgsteff_dVg, Vgsteff / (n * Vtm), condmask_true1);
+        dVgsteff_dVd = vec8_blend(dVgsteff_dVd, (-dVgsteff_dVg) * (dVth_dVd + ((T0 * Vtm) * dn_dVd)), condmask_true1);
+        dVgsteff_dVb = vec8_blend(dVgsteff_dVb, (-dVgsteff_dVg) * (dVth_dVb + ((T0 * Vtm) * dn_dVb)), condmask_true1);
+        dVgsteff_dVg = vec8_blend(dVgsteff_dVg, dVgsteff_dVg * dVgs_eff_dVg, condmask_true1);
+      }
+      {
+        T1 = vec8_blend(T1, T10 * vec8_log(1.0 + ExpVgst), condmask_false1);
+        dT1_dVg = vec8_blend(dT1_dVg, ExpVgst / (1.0 + ExpVgst), condmask_false1);
+        dT1_dVb = vec8_blend(dT1_dVb, ((-dT1_dVg) * (dVth_dVb + ((Vgst / n) * dn_dVb))) + ((T1 / n) * dn_dVb), condmask_false1);
+        dT1_dVd = vec8_blend(dT1_dVd, ((-dT1_dVg) * (dVth_dVd + ((Vgst / n) * dn_dVd))) + ((T1 / n) * dn_dVd), condmask_false1);
+        dT2_dVg = vec8_blend(dT2_dVg, ((-model->BSIM3v32cox) / (Vtm * pParam->BSIM3v32cdep0)) * vec8_exp(ExpArg), condmask_false1);
+        T2 = vec8_blend(T2, 1.0 - (T10 * dT2_dVg), condmask_false1);
+        dT2_dVd = vec8_blend(dT2_dVd, ((-dT2_dVg) * (dVth_dVd - (((2.0 * Vtm) * ExpArg) * dn_dVd))) + (((T2 - 1.0) / n) * dn_dVd), condmask_false1);
+        dT2_dVb = vec8_blend(dT2_dVb, ((-dT2_dVg) * (dVth_dVb - (((2.0 * Vtm) * ExpArg) * dn_dVb))) + (((T2 - 1.0) / n) * dn_dVb), condmask_false1);
+        Vgsteff = vec8_blend(Vgsteff, T1 / T2, condmask_false1);
+        T3 = vec8_blend(T3, T2 * T2, condmask_false1);
+        dVgsteff_dVg = vec8_blend(dVgsteff_dVg, (((T2 * dT1_dVg) - (T1 * dT2_dVg)) / T3) * dVgs_eff_dVg, condmask_false1);
+        dVgsteff_dVd = vec8_blend(dVgsteff_dVd, ((T2 * dT1_dVd) - (T1 * dT2_dVd)) / T3, condmask_false1);
+        dVgsteff_dVb = vec8_blend(dVgsteff_dVb, ((T2 * dT1_dVb) - (T1 * dT2_dVb)) / T3, condmask_false1);
+      }
+    }
+
+  }
+
+  if (model->BSIM3v32intVersion > BSIM3v32V323)
+  {
+    {
+      heres[0]->BSIM3v32Vgsteff = Vgsteff[0];
+      heres[1]->BSIM3v32Vgsteff = Vgsteff[1];
+      heres[2]->BSIM3v32Vgsteff = Vgsteff[2];
+      heres[3]->BSIM3v32Vgsteff = Vgsteff[3];
+      heres[4]->BSIM3v32Vgsteff = Vgsteff[4];
+      heres[5]->BSIM3v32Vgsteff = Vgsteff[5];
+      heres[6]->BSIM3v32Vgsteff = Vgsteff[6];
+      heres[7]->BSIM3v32Vgsteff = Vgsteff[7];
+    }
+  }
+
+  T9 = sqrtPhis - pParam->BSIM3v32sqrtPhi;
+  Weff = pParam->BSIM3v32weff - (2.0 * ((pParam->BSIM3v32dwg * Vgsteff) + (pParam->BSIM3v32dwb * T9)));
+  dWeff_dVg = vec8_SIMDTOVECTOR((-2.0) * pParam->BSIM3v32dwg);
+  dWeff_dVb = ((-2.0) * pParam->BSIM3v32dwb) * dsqrtPhis_dVb;
+  if (1)
+  {
+    Vec8m condmask0 = Weff < 2.0e-8;
+    Vec8m condmask_true0 = condmask0;
+    {
+      T0 = vec8_blend(T0, 1.0 / (6.0e-8 - (2.0 * Weff)), condmask_true0);
+      Weff = vec8_blend(Weff, (2.0e-8 * (4.0e-8 - Weff)) * T0, condmask_true0);
+      T0 = vec8_blend(T0, T0 * (T0 * 4.0e-16), condmask_true0);
+      dWeff_dVg = vec8_blend(dWeff_dVg, dWeff_dVg * T0, condmask_true0);
+      dWeff_dVb = vec8_blend(dWeff_dVb, dWeff_dVb * T0, condmask_true0);
+    }
+  }
+
+  T0 = (pParam->BSIM3v32prwg * Vgsteff) + (pParam->BSIM3v32prwb * T9);
+  if (1)
+  {
+    Vec8m condmask0 = T0 >= (-0.9);
+    Vec8m condmask_true0 = condmask0;
+    Vec8m condmask_false0 = ~condmask0;
+    {
+      Rds = vec8_blend(Rds, pParam->BSIM3v32rds0 * (1.0 + T0), condmask_true0);
+      dRds_dVg = vec8_blend(dRds_dVg, vec8_SIMDTOVECTOR(pParam->BSIM3v32rds0 * pParam->BSIM3v32prwg), condmask_true0);
+      dRds_dVb = vec8_blend(dRds_dVb, (pParam->BSIM3v32rds0 * pParam->BSIM3v32prwb) * dsqrtPhis_dVb, condmask_true0);
+    }
+    {
+      T1 = vec8_blend(T1, 1.0 / (17.0 + (20.0 * T0)), condmask_false0);
+      Rds = vec8_blend(Rds, (pParam->BSIM3v32rds0 * (0.8 + T0)) * T1, condmask_false0);
+      T1 = vec8_blend(T1, T1 * T1, condmask_false0);
+      dRds_dVg = vec8_blend(dRds_dVg, (pParam->BSIM3v32rds0 * pParam->BSIM3v32prwg) * T1, condmask_false0);
+      dRds_dVb = vec8_blend(dRds_dVb, ((pParam->BSIM3v32rds0 * pParam->BSIM3v32prwb) * dsqrtPhis_dVb) * T1, condmask_false0);
+    }
+  }
+
+  if (model->BSIM3v32intVersion > BSIM3v32V323)
+  {
+    {
+      heres[0]->BSIM3v32rds = Rds[0];
+      heres[1]->BSIM3v32rds = Rds[1];
+      heres[2]->BSIM3v32rds = Rds[2];
+      heres[3]->BSIM3v32rds = Rds[3];
+      heres[4]->BSIM3v32rds = Rds[4];
+      heres[5]->BSIM3v32rds = Rds[5];
+      heres[6]->BSIM3v32rds = Rds[6];
+      heres[7]->BSIM3v32rds = Rds[7];
+    }
+  }
+
+  T1 = (0.5 * pParam->BSIM3v32k1ox) / sqrtPhis;
+  dT1_dVb = ((-T1) / sqrtPhis) * dsqrtPhis_dVb;
+  T9 = vec8_sqrt(pParam->BSIM3v32xj * Xdep);
+  tmp1 = Leff + (2.0 * T9);
+  T5 = Leff / tmp1;
+  tmp2 = pParam->BSIM3v32a0 * T5;
+  tmp3 = vec8_SIMDTOVECTOR(pParam->BSIM3v32weff + pParam->BSIM3v32b1);
+  tmp4 = pParam->BSIM3v32b0 / tmp3;
+  T2 = tmp2 + tmp4;
+  dT2_dVb = (((-T9) / tmp1) / Xdep) * dXdep_dVb;
+  T6 = T5 * T5;
+  T7 = T5 * T6;
+  Abulk0 = 1.0 + (T1 * T2);
+  dAbulk0_dVb = ((T1 * tmp2) * dT2_dVb) + (T2 * dT1_dVb);
+  T8 = (pParam->BSIM3v32ags * pParam->BSIM3v32a0) * T7;
+  dAbulk_dVg = (-T1) * T8;
+  Abulk = Abulk0 + (dAbulk_dVg * Vgsteff);
+  dAbulk_dVb = dAbulk0_dVb - ((T8 * Vgsteff) * (dT1_dVb + ((3.0 * T1) * dT2_dVb)));
+  if (1)
+  {
+    Vec8m condmask0 = Abulk0 < 0.1;
+    Vec8m condmask_true0 = condmask0;
+    {
+      T9 = vec8_blend(T9, 1.0 / (3.0 - (20.0 * Abulk0)), condmask_true0);
+      Abulk0 = vec8_blend(Abulk0, (0.2 - Abulk0) * T9, condmask_true0);
+      dAbulk0_dVb = vec8_blend(dAbulk0_dVb, dAbulk0_dVb * (T9 * T9), condmask_true0);
+    }
+  }
+
+  if (1)
+  {
+    Vec8m condmask0 = Abulk < 0.1;
+    Vec8m condmask_true0 = condmask0;
+    {
+      T9 = vec8_blend(T9, 1.0 / (3.0 - (20.0 * Abulk)), condmask_true0);
+      Abulk = vec8_blend(Abulk, (0.2 - Abulk) * T9, condmask_true0);
+      if (model->BSIM3v32intVersion > BSIM3v32V32)
+      {
+        T10 = vec8_blend(T10, T9 * T9, condmask_true0);
+        dAbulk_dVb = vec8_blend(dAbulk_dVb, dAbulk_dVb * T10, condmask_true0);
+        dAbulk_dVg = vec8_blend(dAbulk_dVg, dAbulk_dVg * T10, condmask_true0);
+      }
+      else
+      {
+        dAbulk_dVb = vec8_blend(dAbulk_dVb, dAbulk_dVb * (T9 * T9), condmask_true0);
+      }
+
+    }
+  }
+
+  if (model->BSIM3v32intVersion > BSIM3v32V323)
+  {
+    {
+      heres[0]->BSIM3v32Abulk = Abulk[0];
+      heres[1]->BSIM3v32Abulk = Abulk[1];
+      heres[2]->BSIM3v32Abulk = Abulk[2];
+      heres[3]->BSIM3v32Abulk = Abulk[3];
+      heres[4]->BSIM3v32Abulk = Abulk[4];
+      heres[5]->BSIM3v32Abulk = Abulk[5];
+      heres[6]->BSIM3v32Abulk = Abulk[6];
+      heres[7]->BSIM3v32Abulk = Abulk[7];
+    }
+  }
+
+  T2 = pParam->BSIM3v32keta * Vbseff;
+  if (1)
+  {
+    Vec8m condmask0 = T2 >= (-0.9);
+    Vec8m condmask_true0 = condmask0;
+    Vec8m condmask_false0 = ~condmask0;
+    {
+      T0 = vec8_blend(T0, 1.0 / (1.0 + T2), condmask_true0);
+      dT0_dVb = vec8_blend(dT0_dVb, ((-pParam->BSIM3v32keta) * T0) * T0, condmask_true0);
+    }
+    {
+      T1 = vec8_blend(T1, 1.0 / (0.8 + T2), condmask_false0);
+      T0 = vec8_blend(T0, (17.0 + (20.0 * T2)) * T1, condmask_false0);
+      dT0_dVb = vec8_blend(dT0_dVb, ((-pParam->BSIM3v32keta) * T1) * T1, condmask_false0);
+    }
+  }
+
+  dAbulk_dVg *= T0;
+  dAbulk_dVb = (dAbulk_dVb * T0) + (Abulk * dT0_dVb);
+  dAbulk0_dVb = (dAbulk0_dVb * T0) + (Abulk0 * dT0_dVb);
+  Abulk *= T0;
+  Abulk0 *= T0;
+  if (model->BSIM3v32mobMod == 1)
+  {
+    T0 = (Vgsteff + Vth) + Vth;
+    T2 = pParam->BSIM3v32ua + (pParam->BSIM3v32uc * Vbseff);
+    T3 = T0 / model->BSIM3v32tox;
+    T5 = T3 * (T2 + (pParam->BSIM3v32ub * T3));
+    dDenomi_dVg = (T2 + ((2.0 * pParam->BSIM3v32ub) * T3)) / model->BSIM3v32tox;
+    dDenomi_dVd = (dDenomi_dVg * 2.0) * dVth_dVd;
+    dDenomi_dVb = ((dDenomi_dVg * 2.0) * dVth_dVb) + (pParam->BSIM3v32uc * T3);
+  }
+  else
+    if (model->BSIM3v32mobMod == 2)
+  {
+    T5 = (Vgsteff / model->BSIM3v32tox) * ((pParam->BSIM3v32ua + (pParam->BSIM3v32uc * Vbseff)) + ((pParam->BSIM3v32ub * Vgsteff) / model->BSIM3v32tox));
+    dDenomi_dVg = ((pParam->BSIM3v32ua + (pParam->BSIM3v32uc * Vbseff)) + (((2.0 * pParam->BSIM3v32ub) * Vgsteff) / model->BSIM3v32tox)) / model->BSIM3v32tox;
+    dDenomi_dVd = vec8_SIMDTOVECTOR(0.0);
+    dDenomi_dVb = (Vgsteff * pParam->BSIM3v32uc) / model->BSIM3v32tox;
+  }
+  else
+  {
+    T0 = (Vgsteff + Vth) + Vth;
+    T2 = 1.0 + (pParam->BSIM3v32uc * Vbseff);
+    T3 = T0 / model->BSIM3v32tox;
+    T4 = T3 * (pParam->BSIM3v32ua + (pParam->BSIM3v32ub * T3));
+    T5 = T4 * T2;
+    dDenomi_dVg = ((pParam->BSIM3v32ua + ((2.0 * pParam->BSIM3v32ub) * T3)) * T2) / model->BSIM3v32tox;
+    dDenomi_dVd = (dDenomi_dVg * 2.0) * dVth_dVd;
+    dDenomi_dVb = ((dDenomi_dVg * 2.0) * dVth_dVb) + (pParam->BSIM3v32uc * T4);
+  }
+
+
+  if (1)
+  {
+    Vec8m condmask0 = T5 >= (-0.8);
+    Vec8m condmask_true0 = condmask0;
+    Vec8m condmask_false0 = ~condmask0;
+    {
+      Denomi = vec8_blend(Denomi, 1.0 + T5, condmask_true0);
+    }
+    {
+      T9 = vec8_blend(T9, 1.0 / (7.0 + (10.0 * T5)), condmask_false0);
+      Denomi = vec8_blend(Denomi, (0.6 + T5) * T9, condmask_false0);
+      T9 = vec8_blend(T9, T9 * T9, condmask_false0);
+      dDenomi_dVg = vec8_blend(dDenomi_dVg, dDenomi_dVg * T9, condmask_false0);
+      dDenomi_dVd = vec8_blend(dDenomi_dVd, dDenomi_dVd * T9, condmask_false0);
+      dDenomi_dVb = vec8_blend(dDenomi_dVb, dDenomi_dVb * T9, condmask_false0);
+    }
+  }
+
+  {
+    Vec8d val = ueff = ((Vec8d ){heres[0]->BSIM3v32u0temp, heres[1]->BSIM3v32u0temp, heres[2]->BSIM3v32u0temp, heres[3]->BSIM3v32u0temp, heres[4]->BSIM3v32u0temp, heres[5]->BSIM3v32u0temp, heres[6]->BSIM3v32u0temp, heres[7]->BSIM3v32u0temp}) / Denomi;
+    heres[0]->BSIM3v32ueff = val[0];
+    heres[1]->BSIM3v32ueff = val[1];
+    heres[2]->BSIM3v32ueff = val[2];
+    heres[3]->BSIM3v32ueff = val[3];
+    heres[4]->BSIM3v32ueff = val[4];
+    heres[5]->BSIM3v32ueff = val[5];
+    heres[6]->BSIM3v32ueff = val[6];
+    heres[7]->BSIM3v32ueff = val[7];
+  }
+  T9 = (-ueff) / Denomi;
+  dueff_dVg = T9 * dDenomi_dVg;
+  dueff_dVd = T9 * dDenomi_dVd;
+  dueff_dVb = T9 * dDenomi_dVb;
+  WVCox = (Weff * pParam->BSIM3v32vsattemp) * model->BSIM3v32cox;
+  WVCoxRds = WVCox * Rds;
+  Esat = (2.0 * pParam->BSIM3v32vsattemp) / ueff;
+  EsatL = Esat * Leff;
+  T0 = (-EsatL) / ueff;
+  dEsatL_dVg = T0 * dueff_dVg;
+  dEsatL_dVd = T0 * dueff_dVd;
+  dEsatL_dVb = T0 * dueff_dVb;
+  a1 = pParam->BSIM3v32a1;
+  if (a1 == 0.0)
+  {
+    Lambda = vec8_SIMDTOVECTOR(pParam->BSIM3v32a2);
+    dLambda_dVg = vec8_SIMDTOVECTOR(0.0);
+  }
+  else
+    if (a1 > 0.0)
+  {
+    T0 = vec8_SIMDTOVECTOR(1.0 - pParam->BSIM3v32a2);
+    T1 = (T0 - (pParam->BSIM3v32a1 * Vgsteff)) - 0.0001;
+    T2 = vec8_sqrt((T1 * T1) + (0.0004 * T0));
+    Lambda = (pParam->BSIM3v32a2 + T0) - (0.5 * (T1 + T2));
+    dLambda_dVg = (0.5 * pParam->BSIM3v32a1) * (1.0 + (T1 / T2));
+  }
+  else
+  {
+    T1 = (pParam->BSIM3v32a2 + (pParam->BSIM3v32a1 * Vgsteff)) - 0.0001;
+    T2 = vec8_sqrt((T1 * T1) + (0.0004 * pParam->BSIM3v32a2));
+    Lambda = 0.5 * (T1 + T2);
+    dLambda_dVg = (0.5 * pParam->BSIM3v32a1) * (1.0 + (T1 / T2));
+  }
+
+
+  Vgst2Vtm = Vgsteff + (2.0 * Vtm);
+  if (model->BSIM3v32intVersion > BSIM3v32V323)
+  {
+    {
+      Vec8d val = Abulk / Vgst2Vtm;
+      heres[0]->BSIM3v32AbovVgst2Vtm = val[0];
+      heres[1]->BSIM3v32AbovVgst2Vtm = val[1];
+      heres[2]->BSIM3v32AbovVgst2Vtm = val[2];
+      heres[3]->BSIM3v32AbovVgst2Vtm = val[3];
+      heres[4]->BSIM3v32AbovVgst2Vtm = val[4];
+      heres[5]->BSIM3v32AbovVgst2Vtm = val[5];
+      heres[6]->BSIM3v32AbovVgst2Vtm = val[6];
+      heres[7]->BSIM3v32AbovVgst2Vtm = val[7];
+    }
+  }
+
+  if (1)
+  {
+    Vec8m condmask0 = Rds > 0;
+    Vec8m condmask_true0 = condmask0;
+    Vec8m condmask_false0 = ~condmask0;
+    {
+      tmp2 = vec8_blend(tmp2, (dRds_dVg / Rds) + (dWeff_dVg / Weff), condmask_true0);
+      tmp3 = vec8_blend(tmp3, (dRds_dVb / Rds) + (dWeff_dVb / Weff), condmask_true0);
+    }
+    {
+      tmp2 = vec8_blend(tmp2, dWeff_dVg / Weff, condmask_false0);
+      tmp3 = vec8_blend(tmp3, dWeff_dVb / Weff, condmask_false0);
+    }
+  }
+
+  if (1)
+  {
+    Vec8m condmask0 = (Rds == 0.0) & (Lambda == 1.0);
+    Vec8m condmask_true0 = condmask0;
+    Vec8m condmask_false0 = ~condmask0;
+    {
+      T0 = vec8_blend(T0, 1.0 / ((Abulk * EsatL) + Vgst2Vtm), condmask_true0);
+      tmp1 = vec8_blend(tmp1, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+      T1 = vec8_blend(T1, T0 * T0, condmask_true0);
+      T2 = vec8_blend(T2, Vgst2Vtm * T0, condmask_true0);
+      T3 = vec8_blend(T3, EsatL * Vgst2Vtm, condmask_true0);
+      Vdsat = vec8_blend(Vdsat, T3 * T0, condmask_true0);
+      dT0_dVg = vec8_blend(dT0_dVg, (-(((Abulk * dEsatL_dVg) + (EsatL * dAbulk_dVg)) + 1.0)) * T1, condmask_true0);
+      dT0_dVd = vec8_blend(dT0_dVd, (-(Abulk * dEsatL_dVd)) * T1, condmask_true0);
+      dT0_dVb = vec8_blend(dT0_dVb, (-((Abulk * dEsatL_dVb) + (dAbulk_dVb * EsatL))) * T1, condmask_true0);
+      dVdsat_dVg = vec8_blend(dVdsat_dVg, ((T3 * dT0_dVg) + (T2 * dEsatL_dVg)) + (EsatL * T0), condmask_true0);
+      dVdsat_dVd = vec8_blend(dVdsat_dVd, (T3 * dT0_dVd) + (T2 * dEsatL_dVd), condmask_true0);
+      dVdsat_dVb = vec8_blend(dVdsat_dVb, (T3 * dT0_dVb) + (T2 * dEsatL_dVb), condmask_true0);
+    }
+    {
+      tmp1 = vec8_blend(tmp1, dLambda_dVg / (Lambda * Lambda), condmask_false0);
+      T9 = vec8_blend(T9, Abulk * WVCoxRds, condmask_false0);
+      T8 = vec8_blend(T8, Abulk * T9, condmask_false0);
+      T7 = vec8_blend(T7, Vgst2Vtm * T9, condmask_false0);
+      T6 = vec8_blend(T6, Vgst2Vtm * WVCoxRds, condmask_false0);
+      T0 = vec8_blend(T0, (2.0 * Abulk) * ((T9 - 1.0) + (1.0 / Lambda)), condmask_false0);
+      dT0_dVg = vec8_blend(dT0_dVg, 2.0 * (((T8 * tmp2) - (Abulk * tmp1)) + ((((2.0 * T9) + (1.0 / Lambda)) - 1.0) * dAbulk_dVg)), condmask_false0);
+      dT0_dVb = vec8_blend(dT0_dVb, 2.0 * ((T8 * (((2.0 / Abulk) * dAbulk_dVb) + tmp3)) + (((1.0 / Lambda) - 1.0) * dAbulk_dVb)), condmask_false0);
+      dT0_dVd = vec8_blend(dT0_dVd, vec8_SIMDTOVECTOR(0.0), condmask_false0);
+      T1 = vec8_blend(T1, ((Vgst2Vtm * ((2.0 / Lambda) - 1.0)) + (Abulk * EsatL)) + (3.0 * T7), condmask_false0);
+      dT1_dVg = vec8_blend(dT1_dVg, (((((2.0 / Lambda) - 1.0) - ((2.0 * Vgst2Vtm) * tmp1)) + (Abulk * dEsatL_dVg)) + (EsatL * dAbulk_dVg)) + (3.0 * ((T9 + (T7 * tmp2)) + (T6 * dAbulk_dVg))), condmask_false0);
+      dT1_dVb = vec8_blend(dT1_dVb, ((Abulk * dEsatL_dVb) + (EsatL * dAbulk_dVb)) + (3.0 * ((T6 * dAbulk_dVb) + (T7 * tmp3))), condmask_false0);
+      dT1_dVd = vec8_blend(dT1_dVd, Abulk * dEsatL_dVd, condmask_false0);
+      T2 = vec8_blend(T2, Vgst2Vtm * (EsatL + (2.0 * T6)), condmask_false0);
+      dT2_dVg = vec8_blend(dT2_dVg, (EsatL + (Vgst2Vtm * dEsatL_dVg)) + (T6 * (4.0 + ((2.0 * Vgst2Vtm) * tmp2))), condmask_false0);
+      dT2_dVb = vec8_blend(dT2_dVb, Vgst2Vtm * (dEsatL_dVb + ((2.0 * T6) * tmp3)), condmask_false0);
+      dT2_dVd = vec8_blend(dT2_dVd, Vgst2Vtm * dEsatL_dVd, condmask_false0);
+      T3 = vec8_blend(T3, vec8_sqrt((T1 * T1) - ((2.0 * T0) * T2)), condmask_false0);
+      Vdsat = vec8_blend(Vdsat, (T1 - T3) / T0, condmask_false0);
+      dT3_dVg = vec8_blend(dT3_dVg, ((T1 * dT1_dVg) - (2.0 * ((T0 * dT2_dVg) + (T2 * dT0_dVg)))) / T3, condmask_false0);
+      dT3_dVd = vec8_blend(dT3_dVd, ((T1 * dT1_dVd) - (2.0 * ((T0 * dT2_dVd) + (T2 * dT0_dVd)))) / T3, condmask_false0);
+      dT3_dVb = vec8_blend(dT3_dVb, ((T1 * dT1_dVb) - (2.0 * ((T0 * dT2_dVb) + (T2 * dT0_dVb)))) / T3, condmask_false0);
+      dVdsat_dVg = vec8_blend(dVdsat_dVg, ((dT1_dVg - ((((T1 * dT1_dVg) - (dT0_dVg * T2)) - (T0 * dT2_dVg)) / T3)) - (Vdsat * dT0_dVg)) / T0, condmask_false0);
+      dVdsat_dVb = vec8_blend(dVdsat_dVb, ((dT1_dVb - ((((T1 * dT1_dVb) - (dT0_dVb * T2)) - (T0 * dT2_dVb)) / T3)) - (Vdsat * dT0_dVb)) / T0, condmask_false0);
+      dVdsat_dVd = vec8_blend(dVdsat_dVd, (dT1_dVd - (((T1 * dT1_dVd) - (T0 * dT2_dVd)) / T3)) / T0, condmask_false0);
+    }
+  }
+
+  {
+    heres[0]->BSIM3v32vdsat = Vdsat[0];
+    heres[1]->BSIM3v32vdsat = Vdsat[1];
+    heres[2]->BSIM3v32vdsat = Vdsat[2];
+    heres[3]->BSIM3v32vdsat = Vdsat[3];
+    heres[4]->BSIM3v32vdsat = Vdsat[4];
+    heres[5]->BSIM3v32vdsat = Vdsat[5];
+    heres[6]->BSIM3v32vdsat = Vdsat[6];
+    heres[7]->BSIM3v32vdsat = Vdsat[7];
+  }
+  T1 = (Vdsat - Vds) - pParam->BSIM3v32delta;
+  dT1_dVg = dVdsat_dVg;
+  dT1_dVd = dVdsat_dVd - 1.0;
+  dT1_dVb = dVdsat_dVb;
+  T2 = vec8_sqrt((T1 * T1) + ((4.0 * pParam->BSIM3v32delta) * Vdsat));
+  T0 = T1 / T2;
+  T3 = (2.0 * pParam->BSIM3v32delta) / T2;
+  dT2_dVg = (T0 * dT1_dVg) + (T3 * dVdsat_dVg);
+  dT2_dVd = (T0 * dT1_dVd) + (T3 * dVdsat_dVd);
+  dT2_dVb = (T0 * dT1_dVb) + (T3 * dVdsat_dVb);
+  Vdseff = Vdsat - (0.5 * (T1 + T2));
+  dVdseff_dVg = dVdsat_dVg - (0.5 * (dT1_dVg + dT2_dVg));
+  dVdseff_dVd = dVdsat_dVd - (0.5 * (dT1_dVd + dT2_dVd));
+  dVdseff_dVb = dVdsat_dVb - (0.5 * (dT1_dVb + dT2_dVb));
+  switch (model->BSIM3v32intVersion)
+  {
+    case BSIM3v32V324:
+
+    case BSIM3v32V323:
+
+    case BSIM3v32V322:
+      if (1)
+    {
+      Vec8m condmask0 = Vds == 0.0;
+      Vec8m condmask_true0 = condmask0;
+      {
+        Vdseff = vec8_blend(Vdseff, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+        dVdseff_dVg = vec8_blend(dVdseff_dVg, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+        dVdseff_dVb = vec8_blend(dVdseff_dVb, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+      }
+    }
+
+      break;
+
+    case BSIM3v32V32:
+
+    default:
+      break;
+
+  }
+
+  tmp4 = 1.0 - (((0.5 * Abulk) * Vdsat) / Vgst2Vtm);
+  T9 = WVCoxRds * Vgsteff;
+  T8 = T9 / Vgst2Vtm;
+  T0 = (EsatL + Vdsat) + ((2.0 * T9) * tmp4);
+  T7 = (2.0 * WVCoxRds) * tmp4;
+  dT0_dVg = ((dEsatL_dVg + dVdsat_dVg) + (T7 * (1.0 + (tmp2 * Vgsteff)))) - (T8 * (((Abulk * dVdsat_dVg) - ((Abulk * Vdsat) / Vgst2Vtm)) + (Vdsat * dAbulk_dVg)));
+  dT0_dVb = ((dEsatL_dVb + dVdsat_dVb) + ((T7 * tmp3) * Vgsteff)) - (T8 * ((dAbulk_dVb * Vdsat) + (Abulk * dVdsat_dVb)));
+  dT0_dVd = (dEsatL_dVd + dVdsat_dVd) - ((T8 * Abulk) * dVdsat_dVd);
+  T9 = WVCoxRds * Abulk;
+  T1 = ((2.0 / Lambda) - 1.0) + T9;
+  dT1_dVg = ((-2.0) * tmp1) + (WVCoxRds * ((Abulk * tmp2) + dAbulk_dVg));
+  dT1_dVb = (dAbulk_dVb * WVCoxRds) + (T9 * tmp3);
+  Vasat = T0 / T1;
+  dVasat_dVg = (dT0_dVg - (Vasat * dT1_dVg)) / T1;
+  dVasat_dVb = (dT0_dVb - (Vasat * dT1_dVb)) / T1;
+  dVasat_dVd = dT0_dVd / T1;
+  if (1)
+  {
+    Vec8m condmask0 = Vdseff > Vds;
+    Vec8m condmask_true0 = condmask0;
+    Vdseff = vec8_blend(Vdseff, Vds, condmask_true0);
+  }
+
+  diffVds = Vds - Vdseff;
+  if (model->BSIM3v32intVersion > BSIM3v32V323)
+  {
+    {
+      heres[0]->BSIM3v32Vdseff = Vdseff[0];
+      heres[1]->BSIM3v32Vdseff = Vdseff[1];
+      heres[2]->BSIM3v32Vdseff = Vdseff[2];
+      heres[3]->BSIM3v32Vdseff = Vdseff[3];
+      heres[4]->BSIM3v32Vdseff = Vdseff[4];
+      heres[5]->BSIM3v32Vdseff = Vdseff[5];
+      heres[6]->BSIM3v32Vdseff = Vdseff[6];
+      heres[7]->BSIM3v32Vdseff = Vdseff[7];
+    }
+  }
+
+  VACLM = vec8_SIMDTOVECTOR(MAX_EXP);
+  dVACLM_dVd = (dVACLM_dVg = (dVACLM_dVb = vec8_SIMDTOVECTOR(0.0)));
+  if (pParam->BSIM3v32pclm > 0.0)
+    if (1)
+  {
+    Vec8m condmask0 = diffVds > 1.0e-10;
+    Vec8m condmask_true0 = condmask0;
+    {
+      T0 = vec8_blend(T0, 1.0 / ((pParam->BSIM3v32pclm * Abulk) * pParam->BSIM3v32litl), condmask_true0);
+      dT0_dVb = vec8_blend(dT0_dVb, ((-T0) / Abulk) * dAbulk_dVb, condmask_true0);
+      dT0_dVg = vec8_blend(dT0_dVg, ((-T0) / Abulk) * dAbulk_dVg, condmask_true0);
+      T2 = vec8_blend(T2, Vgsteff / EsatL, condmask_true0);
+      T1 = vec8_blend(T1, Leff * (Abulk + T2), condmask_true0);
+      dT1_dVg = vec8_blend(dT1_dVg, Leff * (((1.0 - (T2 * dEsatL_dVg)) / EsatL) + dAbulk_dVg), condmask_true0);
+      dT1_dVb = vec8_blend(dT1_dVb, Leff * (dAbulk_dVb - ((T2 * dEsatL_dVb) / EsatL)), condmask_true0);
+      dT1_dVd = vec8_blend(dT1_dVd, ((-T2) * dEsatL_dVd) / Esat, condmask_true0);
+      T9 = vec8_blend(T9, T0 * T1, condmask_true0);
+      VACLM = vec8_blend(VACLM, T9 * diffVds, condmask_true0);
+      dVACLM_dVg = vec8_blend(dVACLM_dVg, (((T0 * dT1_dVg) * diffVds) - (T9 * dVdseff_dVg)) + ((T1 * diffVds) * dT0_dVg), condmask_true0);
+      dVACLM_dVb = vec8_blend(dVACLM_dVb, (((dT0_dVb * T1) + (T0 * dT1_dVb)) * diffVds) - (T9 * dVdseff_dVb), condmask_true0);
+      dVACLM_dVd = vec8_blend(dVACLM_dVd, ((T0 * dT1_dVd) * diffVds) + (T9 * (1.0 - dVdseff_dVd)), condmask_true0);
+    }
+  }
+
+
+  if (pParam->BSIM3v32thetaRout > 0.0)
+  {
+    T8 = Abulk * Vdsat;
+    T0 = Vgst2Vtm * T8;
+    dT0_dVg = (((Vgst2Vtm * Abulk) * dVdsat_dVg) + T8) + ((Vgst2Vtm * Vdsat) * dAbulk_dVg);
+    dT0_dVb = Vgst2Vtm * ((dAbulk_dVb * Vdsat) + (Abulk * dVdsat_dVb));
+    dT0_dVd = (Vgst2Vtm * Abulk) * dVdsat_dVd;
+    T1 = Vgst2Vtm + T8;
+    dT1_dVg = (1.0 + (Abulk * dVdsat_dVg)) + (Vdsat * dAbulk_dVg);
+    dT1_dVb = (Abulk * dVdsat_dVb) + (dAbulk_dVb * Vdsat);
+    dT1_dVd = Abulk * dVdsat_dVd;
+    T9 = T1 * T1;
+    T2 = vec8_SIMDTOVECTOR(pParam->BSIM3v32thetaRout);
+    VADIBL = (Vgst2Vtm - (T0 / T1)) / T2;
+    dVADIBL_dVg = ((1.0 - (dT0_dVg / T1)) + ((T0 * dT1_dVg) / T9)) / T2;
+    dVADIBL_dVb = (((-dT0_dVb) / T1) + ((T0 * dT1_dVb) / T9)) / T2;
+    dVADIBL_dVd = (((-dT0_dVd) / T1) + ((T0 * dT1_dVd) / T9)) / T2;
+    T7 = pParam->BSIM3v32pdiblb * Vbseff;
+    if (1)
+    {
+      Vec8m condmask0 = T7 >= (-0.9);
+      Vec8m condmask_true0 = condmask0;
+      Vec8m condmask_false0 = ~condmask0;
+      {
+        T3 = vec8_blend(T3, 1.0 / (1.0 + T7), condmask_true0);
+        VADIBL = vec8_blend(VADIBL, VADIBL * T3, condmask_true0);
+        dVADIBL_dVg = vec8_blend(dVADIBL_dVg, dVADIBL_dVg * T3, condmask_true0);
+        dVADIBL_dVb = vec8_blend(dVADIBL_dVb, (dVADIBL_dVb - (VADIBL * pParam->BSIM3v32pdiblb)) * T3, condmask_true0);
+        dVADIBL_dVd = vec8_blend(dVADIBL_dVd, dVADIBL_dVd * T3, condmask_true0);
+      }
+      {
+        T4 = vec8_blend(T4, 1.0 / (0.8 + T7), condmask_false0);
+        T3 = vec8_blend(T3, (17.0 + (20.0 * T7)) * T4, condmask_false0);
+        dVADIBL_dVg = vec8_blend(dVADIBL_dVg, dVADIBL_dVg * T3, condmask_false0);
+        dVADIBL_dVb = vec8_blend(dVADIBL_dVb, (dVADIBL_dVb * T3) - (((VADIBL * pParam->BSIM3v32pdiblb) * T4) * T4), condmask_false0);
+        dVADIBL_dVd = vec8_blend(dVADIBL_dVd, dVADIBL_dVd * T3, condmask_false0);
+        VADIBL = vec8_blend(VADIBL, VADIBL * T3, condmask_false0);
+      }
+    }
+
+  }
+  else
+  {
+    VADIBL = vec8_SIMDTOVECTOR(MAX_EXP);
+    dVADIBL_dVd = (dVADIBL_dVg = (dVADIBL_dVb = vec8_SIMDTOVECTOR(0.0)));
+  }
+
+  T8 = pParam->BSIM3v32pvag / EsatL;
+  T9 = T8 * Vgsteff;
+  if (1)
+  {
+    Vec8m condmask0 = T9 > (-0.9);
+    Vec8m condmask_true0 = condmask0;
+    Vec8m condmask_false0 = ~condmask0;
+    {
+      T0 = vec8_blend(T0, 1.0 + T9, condmask_true0);
+      dT0_dVg = vec8_blend(dT0_dVg, T8 * (1.0 - ((Vgsteff * dEsatL_dVg) / EsatL)), condmask_true0);
+      dT0_dVb = vec8_blend(dT0_dVb, ((-T9) * dEsatL_dVb) / EsatL, condmask_true0);
+      dT0_dVd = vec8_blend(dT0_dVd, ((-T9) * dEsatL_dVd) / EsatL, condmask_true0);
+    }
+    {
+      T1 = vec8_blend(T1, 1.0 / (17.0 + (20.0 * T9)), condmask_false0);
+      T0 = vec8_blend(T0, (0.8 + T9) * T1, condmask_false0);
+      T1 = vec8_blend(T1, T1 * T1, condmask_false0);
+      dT0_dVg = vec8_blend(dT0_dVg, (T8 * (1.0 - ((Vgsteff * dEsatL_dVg) / EsatL))) * T1, condmask_false0);
+      T9 = vec8_blend(T9, T9 * (T1 / EsatL), condmask_false0);
+      dT0_dVb = vec8_blend(dT0_dVb, (-T9) * dEsatL_dVb, condmask_false0);
+      dT0_dVd = vec8_blend(dT0_dVd, (-T9) * dEsatL_dVd, condmask_false0);
+    }
+  }
+
+  tmp1 = VACLM * VACLM;
+  tmp2 = VADIBL * VADIBL;
+  tmp3 = VACLM + VADIBL;
+  T1 = (VACLM * VADIBL) / tmp3;
+  tmp3 *= tmp3;
+  dT1_dVg = ((tmp1 * dVADIBL_dVg) + (tmp2 * dVACLM_dVg)) / tmp3;
+  dT1_dVd = ((tmp1 * dVADIBL_dVd) + (tmp2 * dVACLM_dVd)) / tmp3;
+  dT1_dVb = ((tmp1 * dVADIBL_dVb) + (tmp2 * dVACLM_dVb)) / tmp3;
+  Va = Vasat + (T0 * T1);
+  dVa_dVg = (dVasat_dVg + (T1 * dT0_dVg)) + (T0 * dT1_dVg);
+  dVa_dVd = (dVasat_dVd + (T1 * dT0_dVd)) + (T0 * dT1_dVd);
+  dVa_dVb = (dVasat_dVb + (T1 * dT0_dVb)) + (T0 * dT1_dVb);
+  dVASCBE_dVg = (dVASCBE_dVd = (dVASCBE_dVb = vec8_SIMDTOVECTOR(0.0)));
+  if (pParam->BSIM3v32pscbe2 > 0.0)
+  {
+    if (1)
+    {
+      Vec8m condmask0 = diffVds > ((pParam->BSIM3v32pscbe1 * pParam->BSIM3v32litl) / EXP_THRESHOLD);
+      Vec8m condmask_true0 = condmask0;
+      Vec8m condmask_false0 = ~condmask0;
+      {
+        T0 = vec8_blend(T0, (pParam->BSIM3v32pscbe1 * pParam->BSIM3v32litl) / diffVds, condmask_true0);
+        VASCBE = vec8_blend(VASCBE, (Leff * vec8_exp(T0)) / pParam->BSIM3v32pscbe2, condmask_true0);
+        T1 = vec8_blend(T1, (T0 * VASCBE) / diffVds, condmask_true0);
+        dVASCBE_dVg = vec8_blend(dVASCBE_dVg, T1 * dVdseff_dVg, condmask_true0);
+        dVASCBE_dVd = vec8_blend(dVASCBE_dVd, (-T1) * (1.0 - dVdseff_dVd), condmask_true0);
+        dVASCBE_dVb = vec8_blend(dVASCBE_dVb, T1 * dVdseff_dVb, condmask_true0);
+      }
+      {
+        VASCBE = vec8_blend(VASCBE, vec8_SIMDTOVECTOR((MAX_EXP * Leff) / pParam->BSIM3v32pscbe2), condmask_false0);
+      }
+    }
+
+  }
+  else
+  {
+    VASCBE = vec8_SIMDTOVECTOR(MAX_EXP);
+  }
+
+  CoxWovL = (model->BSIM3v32cox * Weff) / Leff;
+  beta = ueff * CoxWovL;
+  dbeta_dVg = (CoxWovL * dueff_dVg) + ((beta * dWeff_dVg) / Weff);
+  dbeta_dVd = CoxWovL * dueff_dVd;
+  dbeta_dVb = (CoxWovL * dueff_dVb) + ((beta * dWeff_dVb) / Weff);
+  T0 = 1.0 - (((0.5 * Abulk) * Vdseff) / Vgst2Vtm);
+  dT0_dVg = ((-0.5) * (((Abulk * dVdseff_dVg) - ((Abulk * Vdseff) / Vgst2Vtm)) + (Vdseff * dAbulk_dVg))) / Vgst2Vtm;
+  dT0_dVd = (((-0.5) * Abulk) * dVdseff_dVd) / Vgst2Vtm;
+  dT0_dVb = ((-0.5) * ((Abulk * dVdseff_dVb) + (dAbulk_dVb * Vdseff))) / Vgst2Vtm;
+  fgche1 = Vgsteff * T0;
+  dfgche1_dVg = (Vgsteff * dT0_dVg) + T0;
+  dfgche1_dVd = Vgsteff * dT0_dVd;
+  dfgche1_dVb = Vgsteff * dT0_dVb;
+  T9 = Vdseff / EsatL;
+  fgche2 = 1.0 + T9;
+  dfgche2_dVg = (dVdseff_dVg - (T9 * dEsatL_dVg)) / EsatL;
+  dfgche2_dVd = (dVdseff_dVd - (T9 * dEsatL_dVd)) / EsatL;
+  dfgche2_dVb = (dVdseff_dVb - (T9 * dEsatL_dVb)) / EsatL;
+  gche = (beta * fgche1) / fgche2;
+  dgche_dVg = (((beta * dfgche1_dVg) + (fgche1 * dbeta_dVg)) - (gche * dfgche2_dVg)) / fgche2;
+  dgche_dVd = (((beta * dfgche1_dVd) + (fgche1 * dbeta_dVd)) - (gche * dfgche2_dVd)) / fgche2;
+  dgche_dVb = (((beta * dfgche1_dVb) + (fgche1 * dbeta_dVb)) - (gche * dfgche2_dVb)) / fgche2;
+  T0 = 1.0 + (gche * Rds);
+  T9 = Vdseff / T0;
+  Idl = gche * T9;
+  dIdl_dVg = (((gche * dVdseff_dVg) + (T9 * dgche_dVg)) / T0) - (((Idl * gche) / T0) * dRds_dVg);
+  dIdl_dVd = ((gche * dVdseff_dVd) + (T9 * dgche_dVd)) / T0;
+  dIdl_dVb = (((gche * dVdseff_dVb) + (T9 * dgche_dVb)) - ((Idl * dRds_dVb) * gche)) / T0;
+  T9 = diffVds / Va;
+  T0 = 1.0 + T9;
+  Idsa = Idl * T0;
+  dIdsa_dVg = (T0 * dIdl_dVg) - ((Idl * (dVdseff_dVg + (T9 * dVa_dVg))) / Va);
+  dIdsa_dVd = (T0 * dIdl_dVd) + ((Idl * ((1.0 - dVdseff_dVd) - (T9 * dVa_dVd))) / Va);
+  dIdsa_dVb = (T0 * dIdl_dVb) - ((Idl * (dVdseff_dVb + (T9 * dVa_dVb))) / Va);
+  T9 = diffVds / VASCBE;
+  T0 = 1.0 + T9;
+  Ids = Idsa * T0;
+  Gm = (T0 * dIdsa_dVg) - ((Idsa * (dVdseff_dVg + (T9 * dVASCBE_dVg))) / VASCBE);
+  Gds = (T0 * dIdsa_dVd) + ((Idsa * ((1.0 - dVdseff_dVd) - (T9 * dVASCBE_dVd))) / VASCBE);
+  Gmb = (T0 * dIdsa_dVb) - ((Idsa * (dVdseff_dVb + (T9 * dVASCBE_dVb))) / VASCBE);
+  Gds += Gm * dVgsteff_dVd;
+  Gmb += Gm * dVgsteff_dVb;
+  Gm *= dVgsteff_dVg;
+  Gmb *= dVbseff_dVb;
+  tmpuni = pParam->BSIM3v32alpha0 + (pParam->BSIM3v32alpha1 * Leff);
+  if ((tmpuni <= 0.0) || (pParam->BSIM3v32beta0 <= 0.0))
+  {
+    Isub = (Gbd = (Gbb = (Gbg = vec8_SIMDTOVECTOR(0.0))));
+  }
+  else
+  {
+    T2 = vec8_SIMDTOVECTOR(tmpuni / Leff);
+    if (1)
+    {
+      Vec8m condmask0 = diffVds > (pParam->BSIM3v32beta0 / EXP_THRESHOLD);
+      Vec8m condmask_true0 = condmask0;
+      Vec8m condmask_false0 = ~condmask0;
+      {
+        T0 = vec8_blend(T0, (-pParam->BSIM3v32beta0) / diffVds, condmask_true0);
+        T1 = vec8_blend(T1, (T2 * diffVds) * vec8_exp(T0), condmask_true0);
+        T3 = vec8_blend(T3, (T1 / diffVds) * (T0 - 1.0), condmask_true0);
+        dT1_dVg = vec8_blend(dT1_dVg, T3 * dVdseff_dVg, condmask_true0);
+        dT1_dVd = vec8_blend(dT1_dVd, T3 * (dVdseff_dVd - 1.0), condmask_true0);
+        dT1_dVb = vec8_blend(dT1_dVb, T3 * dVdseff_dVb, condmask_true0);
+      }
+      {
+        T3 = vec8_blend(T3, T2 * MIN_EXP, condmask_false0);
+        T1 = vec8_blend(T1, T3 * diffVds, condmask_false0);
+        dT1_dVg = vec8_blend(dT1_dVg, (-T3) * dVdseff_dVg, condmask_false0);
+        dT1_dVd = vec8_blend(dT1_dVd, T3 * (1.0 - dVdseff_dVd), condmask_false0);
+        dT1_dVb = vec8_blend(dT1_dVb, (-T3) * dVdseff_dVb, condmask_false0);
+      }
+    }
+
+    Isub = T1 * Idsa;
+    Gbg = (T1 * dIdsa_dVg) + (Idsa * dT1_dVg);
+    Gbd = (T1 * dIdsa_dVd) + (Idsa * dT1_dVd);
+    Gbb = (T1 * dIdsa_dVb) + (Idsa * dT1_dVb);
+    Gbd += Gbg * dVgsteff_dVd;
+    Gbb += Gbg * dVgsteff_dVb;
+    Gbg *= dVgsteff_dVg;
+    Gbb *= dVbseff_dVb;
+  }
+
+  cdrain = Ids;
+  {
+    heres[0]->BSIM3v32gds = Gds[0];
+    heres[1]->BSIM3v32gds = Gds[1];
+    heres[2]->BSIM3v32gds = Gds[2];
+    heres[3]->BSIM3v32gds = Gds[3];
+    heres[4]->BSIM3v32gds = Gds[4];
+    heres[5]->BSIM3v32gds = Gds[5];
+    heres[6]->BSIM3v32gds = Gds[6];
+    heres[7]->BSIM3v32gds = Gds[7];
+  }
+  {
+    heres[0]->BSIM3v32gm = Gm[0];
+    heres[1]->BSIM3v32gm = Gm[1];
+    heres[2]->BSIM3v32gm = Gm[2];
+    heres[3]->BSIM3v32gm = Gm[3];
+    heres[4]->BSIM3v32gm = Gm[4];
+    heres[5]->BSIM3v32gm = Gm[5];
+    heres[6]->BSIM3v32gm = Gm[6];
+    heres[7]->BSIM3v32gm = Gm[7];
+  }
+  {
+    heres[0]->BSIM3v32gmbs = Gmb[0];
+    heres[1]->BSIM3v32gmbs = Gmb[1];
+    heres[2]->BSIM3v32gmbs = Gmb[2];
+    heres[3]->BSIM3v32gmbs = Gmb[3];
+    heres[4]->BSIM3v32gmbs = Gmb[4];
+    heres[5]->BSIM3v32gmbs = Gmb[5];
+    heres[6]->BSIM3v32gmbs = Gmb[6];
+    heres[7]->BSIM3v32gmbs = Gmb[7];
+  }
+  {
+    heres[0]->BSIM3v32gbbs = Gbb[0];
+    heres[1]->BSIM3v32gbbs = Gbb[1];
+    heres[2]->BSIM3v32gbbs = Gbb[2];
+    heres[3]->BSIM3v32gbbs = Gbb[3];
+    heres[4]->BSIM3v32gbbs = Gbb[4];
+    heres[5]->BSIM3v32gbbs = Gbb[5];
+    heres[6]->BSIM3v32gbbs = Gbb[6];
+    heres[7]->BSIM3v32gbbs = Gbb[7];
+  }
+  {
+    heres[0]->BSIM3v32gbgs = Gbg[0];
+    heres[1]->BSIM3v32gbgs = Gbg[1];
+    heres[2]->BSIM3v32gbgs = Gbg[2];
+    heres[3]->BSIM3v32gbgs = Gbg[3];
+    heres[4]->BSIM3v32gbgs = Gbg[4];
+    heres[5]->BSIM3v32gbgs = Gbg[5];
+    heres[6]->BSIM3v32gbgs = Gbg[6];
+    heres[7]->BSIM3v32gbgs = Gbg[7];
+  }
+  {
+    heres[0]->BSIM3v32gbds = Gbd[0];
+    heres[1]->BSIM3v32gbds = Gbd[1];
+    heres[2]->BSIM3v32gbds = Gbd[2];
+    heres[3]->BSIM3v32gbds = Gbd[3];
+    heres[4]->BSIM3v32gbds = Gbd[4];
+    heres[5]->BSIM3v32gbds = Gbd[5];
+    heres[6]->BSIM3v32gbds = Gbd[6];
+    heres[7]->BSIM3v32gbds = Gbd[7];
+  }
+  {
+    heres[0]->BSIM3v32csub = Isub[0];
+    heres[1]->BSIM3v32csub = Isub[1];
+    heres[2]->BSIM3v32csub = Isub[2];
+    heres[3]->BSIM3v32csub = Isub[3];
+    heres[4]->BSIM3v32csub = Isub[4];
+    heres[5]->BSIM3v32csub = Isub[5];
+    heres[6]->BSIM3v32csub = Isub[6];
+    heres[7]->BSIM3v32csub = Isub[7];
+  }
+  CoxWL = (model->BSIM3v32cox * pParam->BSIM3v32weffCV) * pParam->BSIM3v32leffCV;
+  One_Third_CoxWL = CoxWL / 3.0;
+  Two_Third_CoxWL = 2.0 * One_Third_CoxWL;
+  if ((model->BSIM3v32xpart < 0) | (!ChargeComputationNeeded))
+  {
+    qgate = vec8_SIMDTOVECTOR(0.0);
+    qdrn = vec8_SIMDTOVECTOR(0.0);
+    qsrc = vec8_SIMDTOVECTOR(0.0);
+    qbulk = vec8_SIMDTOVECTOR(0.0);
+    {
+      heres[0]->BSIM3v32cggb = 0.0;
+      heres[1]->BSIM3v32cggb = 0.0;
+      heres[2]->BSIM3v32cggb = 0.0;
+      heres[3]->BSIM3v32cggb = 0.0;
+      heres[4]->BSIM3v32cggb = 0.0;
+      heres[5]->BSIM3v32cggb = 0.0;
+      heres[6]->BSIM3v32cggb = 0.0;
+      heres[7]->BSIM3v32cggb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cgsb = 0.0;
+      heres[1]->BSIM3v32cgsb = 0.0;
+      heres[2]->BSIM3v32cgsb = 0.0;
+      heres[3]->BSIM3v32cgsb = 0.0;
+      heres[4]->BSIM3v32cgsb = 0.0;
+      heres[5]->BSIM3v32cgsb = 0.0;
+      heres[6]->BSIM3v32cgsb = 0.0;
+      heres[7]->BSIM3v32cgsb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cgdb = 0.0;
+      heres[1]->BSIM3v32cgdb = 0.0;
+      heres[2]->BSIM3v32cgdb = 0.0;
+      heres[3]->BSIM3v32cgdb = 0.0;
+      heres[4]->BSIM3v32cgdb = 0.0;
+      heres[5]->BSIM3v32cgdb = 0.0;
+      heres[6]->BSIM3v32cgdb = 0.0;
+      heres[7]->BSIM3v32cgdb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cdgb = 0.0;
+      heres[1]->BSIM3v32cdgb = 0.0;
+      heres[2]->BSIM3v32cdgb = 0.0;
+      heres[3]->BSIM3v32cdgb = 0.0;
+      heres[4]->BSIM3v32cdgb = 0.0;
+      heres[5]->BSIM3v32cdgb = 0.0;
+      heres[6]->BSIM3v32cdgb = 0.0;
+      heres[7]->BSIM3v32cdgb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cdsb = 0.0;
+      heres[1]->BSIM3v32cdsb = 0.0;
+      heres[2]->BSIM3v32cdsb = 0.0;
+      heres[3]->BSIM3v32cdsb = 0.0;
+      heres[4]->BSIM3v32cdsb = 0.0;
+      heres[5]->BSIM3v32cdsb = 0.0;
+      heres[6]->BSIM3v32cdsb = 0.0;
+      heres[7]->BSIM3v32cdsb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cddb = 0.0;
+      heres[1]->BSIM3v32cddb = 0.0;
+      heres[2]->BSIM3v32cddb = 0.0;
+      heres[3]->BSIM3v32cddb = 0.0;
+      heres[4]->BSIM3v32cddb = 0.0;
+      heres[5]->BSIM3v32cddb = 0.0;
+      heres[6]->BSIM3v32cddb = 0.0;
+      heres[7]->BSIM3v32cddb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cbgb = 0.0;
+      heres[1]->BSIM3v32cbgb = 0.0;
+      heres[2]->BSIM3v32cbgb = 0.0;
+      heres[3]->BSIM3v32cbgb = 0.0;
+      heres[4]->BSIM3v32cbgb = 0.0;
+      heres[5]->BSIM3v32cbgb = 0.0;
+      heres[6]->BSIM3v32cbgb = 0.0;
+      heres[7]->BSIM3v32cbgb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cbsb = 0.0;
+      heres[1]->BSIM3v32cbsb = 0.0;
+      heres[2]->BSIM3v32cbsb = 0.0;
+      heres[3]->BSIM3v32cbsb = 0.0;
+      heres[4]->BSIM3v32cbsb = 0.0;
+      heres[5]->BSIM3v32cbsb = 0.0;
+      heres[6]->BSIM3v32cbsb = 0.0;
+      heres[7]->BSIM3v32cbsb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cbdb = 0.0;
+      heres[1]->BSIM3v32cbdb = 0.0;
+      heres[2]->BSIM3v32cbdb = 0.0;
+      heres[3]->BSIM3v32cbdb = 0.0;
+      heres[4]->BSIM3v32cbdb = 0.0;
+      heres[5]->BSIM3v32cbdb = 0.0;
+      heres[6]->BSIM3v32cbdb = 0.0;
+      heres[7]->BSIM3v32cbdb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cqdb = 0.0;
+      heres[1]->BSIM3v32cqdb = 0.0;
+      heres[2]->BSIM3v32cqdb = 0.0;
+      heres[3]->BSIM3v32cqdb = 0.0;
+      heres[4]->BSIM3v32cqdb = 0.0;
+      heres[5]->BSIM3v32cqdb = 0.0;
+      heres[6]->BSIM3v32cqdb = 0.0;
+      heres[7]->BSIM3v32cqdb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cqsb = 0.0;
+      heres[1]->BSIM3v32cqsb = 0.0;
+      heres[2]->BSIM3v32cqsb = 0.0;
+      heres[3]->BSIM3v32cqsb = 0.0;
+      heres[4]->BSIM3v32cqsb = 0.0;
+      heres[5]->BSIM3v32cqsb = 0.0;
+      heres[6]->BSIM3v32cqsb = 0.0;
+      heres[7]->BSIM3v32cqsb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cqgb = 0.0;
+      heres[1]->BSIM3v32cqgb = 0.0;
+      heres[2]->BSIM3v32cqgb = 0.0;
+      heres[3]->BSIM3v32cqgb = 0.0;
+      heres[4]->BSIM3v32cqgb = 0.0;
+      heres[5]->BSIM3v32cqgb = 0.0;
+      heres[6]->BSIM3v32cqgb = 0.0;
+      heres[7]->BSIM3v32cqgb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32cqbb = 0.0;
+      heres[1]->BSIM3v32cqbb = 0.0;
+      heres[2]->BSIM3v32cqbb = 0.0;
+      heres[3]->BSIM3v32cqbb = 0.0;
+      heres[4]->BSIM3v32cqbb = 0.0;
+      heres[5]->BSIM3v32cqbb = 0.0;
+      heres[6]->BSIM3v32cqbb = 0.0;
+      heres[7]->BSIM3v32cqbb = 0.0;
+    }
+    {
+      heres[0]->BSIM3v32gtau = 0.0;
+      heres[1]->BSIM3v32gtau = 0.0;
+      heres[2]->BSIM3v32gtau = 0.0;
+      heres[3]->BSIM3v32gtau = 0.0;
+      heres[4]->BSIM3v32gtau = 0.0;
+      heres[5]->BSIM3v32gtau = 0.0;
+      heres[6]->BSIM3v32gtau = 0.0;
+      heres[7]->BSIM3v32gtau = 0.0;
+    }
+    goto finished;
+  }
+  else
+    if (model->BSIM3v32capMod == 0)
+  {
+    if (1)
+    {
+      Vec8m condmask0 = Vbseff < 0.0;
+      Vec8m condmask_true0 = condmask0;
+      Vec8m condmask_false0 = ~condmask0;
+      {
+        Vbseff = vec8_blend(Vbseff, Vbs, condmask_true0);
+        dVbseff_dVb = vec8_blend(dVbseff_dVb, vec8_SIMDTOVECTOR(1.0), condmask_true0);
+      }
+      {
+        Vbseff = vec8_blend(Vbseff, pParam->BSIM3v32phi - Phis, condmask_false0);
+        dVbseff_dVb = vec8_blend(dVbseff_dVb, -dPhis_dVb, condmask_false0);
+      }
+    }
+
+    Vfb = vec8_SIMDTOVECTOR(pParam->BSIM3v32vfbcv);
+    Vth = (Vfb + pParam->BSIM3v32phi) + (pParam->BSIM3v32k1ox * sqrtPhis);
+    Vgst = Vgs_eff - Vth;
+    dVth_dVb = pParam->BSIM3v32k1ox * dsqrtPhis_dVb;
+    dVgst_dVb = -dVth_dVb;
+    dVgst_dVg = dVgs_eff_dVg;
+    Arg1 = (Vgs_eff - Vbseff) - Vfb;
+    if (1)
+    {
+      Vec8m condmask0 = Arg1 <= 0.0;
+      Vec8m condmask_true0 = condmask0;
+      Vec8m condmask_false0 = ~condmask0;
+      {
+        qgate = vec8_blend(qgate, CoxWL * Arg1, condmask_true0);
+        qbulk = vec8_blend(qbulk, -qgate, condmask_true0);
+        qdrn = vec8_blend(qdrn, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+        {
+          Vec8d val = CoxWL * dVgs_eff_dVg;
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cggb = val[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cggb = val[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cggb = val[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cggb = val[3];
+
+          if (condmask_true0[4])
+            heres[4]->BSIM3v32cggb = val[4];
+
+          if (condmask_true0[5])
+            heres[5]->BSIM3v32cggb = val[5];
+
+          if (condmask_true0[6])
+            heres[6]->BSIM3v32cggb = val[6];
+
+          if (condmask_true0[7])
+            heres[7]->BSIM3v32cggb = val[7];
+
+        }
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cgdb = 0.0;
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cgdb = 0.0;
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cgdb = 0.0;
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cgdb = 0.0;
+
+          if (condmask_true0[4])
+            heres[4]->BSIM3v32cgdb = 0.0;
+
+          if (condmask_true0[5])
+            heres[5]->BSIM3v32cgdb = 0.0;
+
+          if (condmask_true0[6])
+            heres[6]->BSIM3v32cgdb = 0.0;
+
+          if (condmask_true0[7])
+            heres[7]->BSIM3v32cgdb = 0.0;
+
+        }
+        {
+          Vec8d val = CoxWL * (dVbseff_dVb - dVgs_eff_dVg);
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cgsb = val[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cgsb = val[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cgsb = val[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cgsb = val[3];
+
+          if (condmask_true0[4])
+            heres[4]->BSIM3v32cgsb = val[4];
+
+          if (condmask_true0[5])
+            heres[5]->BSIM3v32cgsb = val[5];
+
+          if (condmask_true0[6])
+            heres[6]->BSIM3v32cgsb = val[6];
+
+          if (condmask_true0[7])
+            heres[7]->BSIM3v32cgsb = val[7];
+
+        }
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cdgb = 0.0;
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cdgb = 0.0;
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cdgb = 0.0;
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cdgb = 0.0;
+
+          if (condmask_true0[4])
+            heres[4]->BSIM3v32cdgb = 0.0;
+
+          if (condmask_true0[5])
+            heres[5]->BSIM3v32cdgb = 0.0;
+
+          if (condmask_true0[6])
+            heres[6]->BSIM3v32cdgb = 0.0;
+
+          if (condmask_true0[7])
+            heres[7]->BSIM3v32cdgb = 0.0;
+
+        }
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cddb = 0.0;
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cddb = 0.0;
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cddb = 0.0;
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cddb = 0.0;
+
+          if (condmask_true0[4])
+            heres[4]->BSIM3v32cddb = 0.0;
+
+          if (condmask_true0[5])
+            heres[5]->BSIM3v32cddb = 0.0;
+
+          if (condmask_true0[6])
+            heres[6]->BSIM3v32cddb = 0.0;
+
+          if (condmask_true0[7])
+            heres[7]->BSIM3v32cddb = 0.0;
+
+        }
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cdsb = 0.0;
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cdsb = 0.0;
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cdsb = 0.0;
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cdsb = 0.0;
+
+          if (condmask_true0[4])
+            heres[4]->BSIM3v32cdsb = 0.0;
+
+          if (condmask_true0[5])
+            heres[5]->BSIM3v32cdsb = 0.0;
+
+          if (condmask_true0[6])
+            heres[6]->BSIM3v32cdsb = 0.0;
+
+          if (condmask_true0[7])
+            heres[7]->BSIM3v32cdsb = 0.0;
+
+        }
+        {
+          Vec8d val = (-CoxWL) * dVgs_eff_dVg;
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cbgb = val[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cbgb = val[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cbgb = val[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cbgb = val[3];
+
+          if (condmask_true0[4])
+            heres[4]->BSIM3v32cbgb = val[4];
+
+          if (condmask_true0[5])
+            heres[5]->BSIM3v32cbgb = val[5];
+
+          if (condmask_true0[6])
+            heres[6]->BSIM3v32cbgb = val[6];
+
+          if (condmask_true0[7])
+            heres[7]->BSIM3v32cbgb = val[7];
+
+        }
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cbdb = 0.0;
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cbdb = 0.0;
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cbdb = 0.0;
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cbdb = 0.0;
+
+          if (condmask_true0[4])
+            heres[4]->BSIM3v32cbdb = 0.0;
+
+          if (condmask_true0[5])
+            heres[5]->BSIM3v32cbdb = 0.0;
+
+          if (condmask_true0[6])
+            heres[6]->BSIM3v32cbdb = 0.0;
+
+          if (condmask_true0[7])
+            heres[7]->BSIM3v32cbdb = 0.0;
+
+        }
+        {
+          Vec8d val = -((Vec8d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb, heres[4]->BSIM3v32cgsb, heres[5]->BSIM3v32cgsb, heres[6]->BSIM3v32cgsb, heres[7]->BSIM3v32cgsb});
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32cbsb = val[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32cbsb = val[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32cbsb = val[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32cbsb = val[3];
+
+          if (condmask_true0[4])
+            heres[4]->BSIM3v32cbsb = val[4];
+
+          if (condmask_true0[5])
+            heres[5]->BSIM3v32cbsb = val[5];
+
+          if (condmask_true0[6])
+            heres[6]->BSIM3v32cbsb = val[6];
+
+          if (condmask_true0[7])
+            heres[7]->BSIM3v32cbsb = val[7];
+
+        }
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32qinv = 0.0;
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32qinv = 0.0;
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32qinv = 0.0;
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32qinv = 0.0;
+
+          if (condmask_true0[4])
+            heres[4]->BSIM3v32qinv = 0.0;
+
+          if (condmask_true0[5])
+            heres[5]->BSIM3v32qinv = 0.0;
+
+          if (condmask_true0[6])
+            heres[6]->BSIM3v32qinv = 0.0;
+
+          if (condmask_true0[7])
+            heres[7]->BSIM3v32qinv = 0.0;
+
+        }
+      }
+      if (1)
+      {
+        Vec8m condmask1 = Vgst <= 0.0;
+        Vec8m condmask_true1 = condmask_false0 & condmask1;
+        Vec8m condmask_false1 = condmask_false0 & (~condmask1);
+        {
+          T1 = vec8_blend(T1, vec8_SIMDTOVECTOR(0.5 * pParam->BSIM3v32k1ox), condmask_true1);
+          T2 = vec8_blend(T2, vec8_sqrt((T1 * T1) + Arg1), condmask_true1);
+          qgate = vec8_blend(qgate, (CoxWL * pParam->BSIM3v32k1ox) * (T2 - T1), condmask_true1);
+          qbulk = vec8_blend(qbulk, -qgate, condmask_true1);
+          qdrn = vec8_blend(qdrn, vec8_SIMDTOVECTOR(0.0), condmask_true1);
+          T0 = vec8_blend(T0, (CoxWL * T1) / T2, condmask_true1);
+          {
+            Vec8d val = T0 * dVgs_eff_dVg;
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cggb = val[0];
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cggb = val[1];
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cggb = val[2];
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cggb = val[3];
+
+            if (condmask_true1[4])
+              heres[4]->BSIM3v32cggb = val[4];
+
+            if (condmask_true1[5])
+              heres[5]->BSIM3v32cggb = val[5];
+
+            if (condmask_true1[6])
+              heres[6]->BSIM3v32cggb = val[6];
+
+            if (condmask_true1[7])
+              heres[7]->BSIM3v32cggb = val[7];
+
+          }
+          {
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cgdb = 0.0;
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cgdb = 0.0;
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cgdb = 0.0;
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cgdb = 0.0;
+
+            if (condmask_true1[4])
+              heres[4]->BSIM3v32cgdb = 0.0;
+
+            if (condmask_true1[5])
+              heres[5]->BSIM3v32cgdb = 0.0;
+
+            if (condmask_true1[6])
+              heres[6]->BSIM3v32cgdb = 0.0;
+
+            if (condmask_true1[7])
+              heres[7]->BSIM3v32cgdb = 0.0;
+
+          }
+          {
+            Vec8d val = T0 * (dVbseff_dVb - dVgs_eff_dVg);
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cgsb = val[0];
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cgsb = val[1];
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cgsb = val[2];
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cgsb = val[3];
+
+            if (condmask_true1[4])
+              heres[4]->BSIM3v32cgsb = val[4];
+
+            if (condmask_true1[5])
+              heres[5]->BSIM3v32cgsb = val[5];
+
+            if (condmask_true1[6])
+              heres[6]->BSIM3v32cgsb = val[6];
+
+            if (condmask_true1[7])
+              heres[7]->BSIM3v32cgsb = val[7];
+
+          }
+          {
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cdgb = 0.0;
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cdgb = 0.0;
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cdgb = 0.0;
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cdgb = 0.0;
+
+            if (condmask_true1[4])
+              heres[4]->BSIM3v32cdgb = 0.0;
+
+            if (condmask_true1[5])
+              heres[5]->BSIM3v32cdgb = 0.0;
+
+            if (condmask_true1[6])
+              heres[6]->BSIM3v32cdgb = 0.0;
+
+            if (condmask_true1[7])
+              heres[7]->BSIM3v32cdgb = 0.0;
+
+          }
+          {
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cddb = 0.0;
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cddb = 0.0;
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cddb = 0.0;
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cddb = 0.0;
+
+            if (condmask_true1[4])
+              heres[4]->BSIM3v32cddb = 0.0;
+
+            if (condmask_true1[5])
+              heres[5]->BSIM3v32cddb = 0.0;
+
+            if (condmask_true1[6])
+              heres[6]->BSIM3v32cddb = 0.0;
+
+            if (condmask_true1[7])
+              heres[7]->BSIM3v32cddb = 0.0;
+
+          }
+          {
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cdsb = 0.0;
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cdsb = 0.0;
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cdsb = 0.0;
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cdsb = 0.0;
+
+            if (condmask_true1[4])
+              heres[4]->BSIM3v32cdsb = 0.0;
+
+            if (condmask_true1[5])
+              heres[5]->BSIM3v32cdsb = 0.0;
+
+            if (condmask_true1[6])
+              heres[6]->BSIM3v32cdsb = 0.0;
+
+            if (condmask_true1[7])
+              heres[7]->BSIM3v32cdsb = 0.0;
+
+          }
+          {
+            Vec8d val = -((Vec8d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb, heres[4]->BSIM3v32cggb, heres[5]->BSIM3v32cggb, heres[6]->BSIM3v32cggb, heres[7]->BSIM3v32cggb});
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cbgb = val[0];
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cbgb = val[1];
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cbgb = val[2];
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cbgb = val[3];
+
+            if (condmask_true1[4])
+              heres[4]->BSIM3v32cbgb = val[4];
+
+            if (condmask_true1[5])
+              heres[5]->BSIM3v32cbgb = val[5];
+
+            if (condmask_true1[6])
+              heres[6]->BSIM3v32cbgb = val[6];
+
+            if (condmask_true1[7])
+              heres[7]->BSIM3v32cbgb = val[7];
+
+          }
+          {
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cbdb = 0.0;
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cbdb = 0.0;
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cbdb = 0.0;
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cbdb = 0.0;
+
+            if (condmask_true1[4])
+              heres[4]->BSIM3v32cbdb = 0.0;
+
+            if (condmask_true1[5])
+              heres[5]->BSIM3v32cbdb = 0.0;
+
+            if (condmask_true1[6])
+              heres[6]->BSIM3v32cbdb = 0.0;
+
+            if (condmask_true1[7])
+              heres[7]->BSIM3v32cbdb = 0.0;
+
+          }
+          {
+            Vec8d val = -((Vec8d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb, heres[4]->BSIM3v32cgsb, heres[5]->BSIM3v32cgsb, heres[6]->BSIM3v32cgsb, heres[7]->BSIM3v32cgsb});
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32cbsb = val[0];
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32cbsb = val[1];
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32cbsb = val[2];
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32cbsb = val[3];
+
+            if (condmask_true1[4])
+              heres[4]->BSIM3v32cbsb = val[4];
+
+            if (condmask_true1[5])
+              heres[5]->BSIM3v32cbsb = val[5];
+
+            if (condmask_true1[6])
+              heres[6]->BSIM3v32cbsb = val[6];
+
+            if (condmask_true1[7])
+              heres[7]->BSIM3v32cbsb = val[7];
+
+          }
+          {
+            if (condmask_true1[0])
+              heres[0]->BSIM3v32qinv = 0.0;
+
+            if (condmask_true1[1])
+              heres[1]->BSIM3v32qinv = 0.0;
+
+            if (condmask_true1[2])
+              heres[2]->BSIM3v32qinv = 0.0;
+
+            if (condmask_true1[3])
+              heres[3]->BSIM3v32qinv = 0.0;
+
+            if (condmask_true1[4])
+              heres[4]->BSIM3v32qinv = 0.0;
+
+            if (condmask_true1[5])
+              heres[5]->BSIM3v32qinv = 0.0;
+
+            if (condmask_true1[6])
+              heres[6]->BSIM3v32qinv = 0.0;
+
+            if (condmask_true1[7])
+              heres[7]->BSIM3v32qinv = 0.0;
+
+          }
+        }
+        {
+          AbulkCV = vec8_blend(AbulkCV, Abulk0 * pParam->BSIM3v32abulkCVfactor, condmask_false1);
+          dAbulkCV_dVb = vec8_blend(dAbulkCV_dVb, pParam->BSIM3v32abulkCVfactor * dAbulk0_dVb, condmask_false1);
+          Vdsat = vec8_blend(Vdsat, Vgst / AbulkCV, condmask_false1);
+          dVdsat_dVg = vec8_blend(dVdsat_dVg, dVgs_eff_dVg / AbulkCV, condmask_false1);
+          dVdsat_dVb = vec8_blend(dVdsat_dVb, (-((Vdsat * dAbulkCV_dVb) + dVth_dVb)) / AbulkCV, condmask_false1);
+          if (model->BSIM3v32xpart > 0.5)
+          {
+            if (1)
+            {
+              Vec8m condmask2 = Vdsat <= Vds;
+              Vec8m condmask_true2 = condmask_false1 & condmask2;
+              Vec8m condmask_false2 = condmask_false1 & (~condmask2);
+              {
+                T1 = vec8_blend(T1, Vdsat / 3.0, condmask_true2);
+                qgate = vec8_blend(qgate, CoxWL * (((Vgs_eff - Vfb) - pParam->BSIM3v32phi) - T1), condmask_true2);
+                T2 = vec8_blend(T2, (-Two_Third_CoxWL) * Vgst, condmask_true2);
+                qbulk = vec8_blend(qbulk, -(qgate + T2), condmask_true2);
+                qdrn = vec8_blend(qdrn, vec8_SIMDTOVECTOR(0.0), condmask_true2);
+                {
+                  Vec8d val = (One_Third_CoxWL * (3.0 - dVdsat_dVg)) * dVgs_eff_dVg;
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cggb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cggb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cggb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cggb = val[3];
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cggb = val[4];
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cggb = val[5];
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cggb = val[6];
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cggb = val[7];
+
+                }
+                T2 = vec8_blend(T2, (-One_Third_CoxWL) * dVdsat_dVb, condmask_true2);
+                {
+                  Vec8d val = -(((Vec8d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb, heres[4]->BSIM3v32cggb, heres[5]->BSIM3v32cggb, heres[6]->BSIM3v32cggb, heres[7]->BSIM3v32cggb}) + T2);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cgsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cgsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cgsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cgsb = val[3];
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cgsb = val[4];
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cgsb = val[5];
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cgsb = val[6];
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cgsb = val[7];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cgdb = 0.0;
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cdgb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cdgb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cdgb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cdgb = 0.0;
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cdgb = 0.0;
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cdgb = 0.0;
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cdgb = 0.0;
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cdgb = 0.0;
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cddb = 0.0;
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cdsb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cdsb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cdsb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cdsb = 0.0;
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cdsb = 0.0;
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cdsb = 0.0;
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cdsb = 0.0;
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cdsb = 0.0;
+
+                }
+                {
+                  Vec8d val = -(((Vec8d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb, heres[4]->BSIM3v32cggb, heres[5]->BSIM3v32cggb, heres[6]->BSIM3v32cggb, heres[7]->BSIM3v32cggb}) - (Two_Third_CoxWL * dVgs_eff_dVg));
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbgb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbgb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbgb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbgb = val[3];
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cbgb = val[4];
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cbgb = val[5];
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cbgb = val[6];
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cbgb = val[7];
+
+                }
+                T3 = vec8_blend(T3, -(T2 + (Two_Third_CoxWL * dVth_dVb)), condmask_true2);
+                {
+                  Vec8d val = -(((Vec8d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb, heres[4]->BSIM3v32cbgb, heres[5]->BSIM3v32cbgb, heres[6]->BSIM3v32cbgb, heres[7]->BSIM3v32cbgb}) + T3);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbsb = val[3];
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cbsb = val[4];
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cbsb = val[5];
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cbsb = val[6];
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cbsb = val[7];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cbdb = 0.0;
+
+                }
+                {
+                  Vec8d val = -(qgate + qbulk);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32qinv = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32qinv = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32qinv = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32qinv = val[3];
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32qinv = val[4];
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32qinv = val[5];
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32qinv = val[6];
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32qinv = val[7];
+
+                }
+              }
+              {
+                Alphaz = vec8_blend(Alphaz, Vgst / Vdsat, condmask_false2);
+                T1 = vec8_blend(T1, (2.0 * Vdsat) - Vds, condmask_false2);
+                T2 = vec8_blend(T2, Vds / (3.0 * T1), condmask_false2);
+                T3 = vec8_blend(T3, T2 * Vds, condmask_false2);
+                T9 = vec8_blend(T9, vec8_SIMDTOVECTOR(0.25 * CoxWL), condmask_false2);
+                T4 = vec8_blend(T4, T9 * Alphaz, condmask_false2);
+                T7 = vec8_blend(T7, ((2.0 * Vds) - T1) - (3.0 * T3), condmask_false2);
+                T8 = vec8_blend(T8, (T3 - T1) - (2.0 * Vds), condmask_false2);
+                qgate = vec8_blend(qgate, CoxWL * (((Vgs_eff - Vfb) - pParam->BSIM3v32phi) - (0.5 * (Vds - T3))), condmask_false2);
+                T10 = vec8_blend(T10, T4 * T8, condmask_false2);
+                qdrn = vec8_blend(qdrn, T4 * T7, condmask_false2);
+                qbulk = vec8_blend(qbulk, -((qgate + qdrn) + T10), condmask_false2);
+                T5 = vec8_blend(T5, T3 / T1, condmask_false2);
+                {
+                  Vec8d val = (CoxWL * (1.0 - (T5 * dVdsat_dVg))) * dVgs_eff_dVg;
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cggb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cggb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cggb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cggb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cggb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cggb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cggb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cggb = val[7];
+
+                }
+                T11 = vec8_blend(T11, ((-CoxWL) * T5) * dVdsat_dVb, condmask_false2);
+                {
+                  Vec8d val = CoxWL * ((T2 - 0.5) + (0.5 * T5));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cgdb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cgdb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cgdb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cgdb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cgdb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cgdb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cgdb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cgdb = val[7];
+
+                }
+                {
+                  Vec8d val = -((((Vec8d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb, heres[4]->BSIM3v32cggb, heres[5]->BSIM3v32cggb, heres[6]->BSIM3v32cggb, heres[7]->BSIM3v32cggb}) + T11) + ((Vec8d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb, heres[4]->BSIM3v32cgdb, heres[5]->BSIM3v32cgdb, heres[6]->BSIM3v32cgdb, heres[7]->BSIM3v32cgdb}));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cgsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cgsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cgsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cgsb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cgsb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cgsb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cgsb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cgsb = val[7];
+
+                }
+                T6 = vec8_blend(T6, 1.0 / Vdsat, condmask_false2);
+                dAlphaz_dVg = vec8_blend(dAlphaz_dVg, T6 * (1.0 - (Alphaz * dVdsat_dVg)), condmask_false2);
+                dAlphaz_dVb = vec8_blend(dAlphaz_dVb, (-T6) * (dVth_dVb + (Alphaz * dVdsat_dVb)), condmask_false2);
+                T7 = vec8_blend(T7, T9 * T7, condmask_false2);
+                T8 = vec8_blend(T8, T9 * T8, condmask_false2);
+                T9 = vec8_blend(T9, (2.0 * T4) * (1.0 - (3.0 * T5)), condmask_false2);
+                {
+                  Vec8d val = ((T7 * dAlphaz_dVg) - (T9 * dVdsat_dVg)) * dVgs_eff_dVg;
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cdgb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cdgb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cdgb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cdgb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cdgb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cdgb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cdgb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cdgb = val[7];
+
+                }
+                T12 = vec8_blend(T12, (T7 * dAlphaz_dVb) - (T9 * dVdsat_dVb), condmask_false2);
+                {
+                  Vec8d val = T4 * ((3.0 - (6.0 * T2)) - (3.0 * T5));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cddb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cddb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cddb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cddb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cddb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cddb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cddb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cddb = val[7];
+
+                }
+                {
+                  Vec8d val = -((((Vec8d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb, heres[4]->BSIM3v32cdgb, heres[5]->BSIM3v32cdgb, heres[6]->BSIM3v32cdgb, heres[7]->BSIM3v32cdgb}) + T12) + ((Vec8d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb, heres[4]->BSIM3v32cddb, heres[5]->BSIM3v32cddb, heres[6]->BSIM3v32cddb, heres[7]->BSIM3v32cddb}));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cdsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cdsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cdsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cdsb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cdsb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cdsb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cdsb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cdsb = val[7];
+
+                }
+                T9 = vec8_blend(T9, (2.0 * T4) * (1.0 + T5), condmask_false2);
+                T10 = vec8_blend(T10, ((T8 * dAlphaz_dVg) - (T9 * dVdsat_dVg)) * dVgs_eff_dVg, condmask_false2);
+                T11 = vec8_blend(T11, (T8 * dAlphaz_dVb) - (T9 * dVdsat_dVb), condmask_false2);
+                T12 = vec8_blend(T12, T4 * (((2.0 * T2) + T5) - 1.0), condmask_false2);
+                T0 = vec8_blend(T0, -((T10 + T11) + T12), condmask_false2);
+                {
+                  Vec8d val = -((((Vec8d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb, heres[4]->BSIM3v32cggb, heres[5]->BSIM3v32cggb, heres[6]->BSIM3v32cggb, heres[7]->BSIM3v32cggb}) + ((Vec8d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb, heres[4]->BSIM3v32cdgb, heres[5]->BSIM3v32cdgb, heres[6]->BSIM3v32cdgb, heres[7]->BSIM3v32cdgb})) + T10);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbgb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbgb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbgb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbgb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cbgb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cbgb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cbgb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cbgb = val[7];
+
+                }
+                {
+                  Vec8d val = -((((Vec8d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb, heres[4]->BSIM3v32cgdb, heres[5]->BSIM3v32cgdb, heres[6]->BSIM3v32cgdb, heres[7]->BSIM3v32cgdb}) + ((Vec8d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb, heres[4]->BSIM3v32cddb, heres[5]->BSIM3v32cddb, heres[6]->BSIM3v32cddb, heres[7]->BSIM3v32cddb})) + T12);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbdb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbdb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbdb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbdb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cbdb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cbdb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cbdb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cbdb = val[7];
+
+                }
+                {
+                  Vec8d val = -((((Vec8d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb, heres[4]->BSIM3v32cgsb, heres[5]->BSIM3v32cgsb, heres[6]->BSIM3v32cgsb, heres[7]->BSIM3v32cgsb}) + ((Vec8d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb, heres[4]->BSIM3v32cdsb, heres[5]->BSIM3v32cdsb, heres[6]->BSIM3v32cdsb, heres[7]->BSIM3v32cdsb})) + T0);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbsb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cbsb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cbsb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cbsb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cbsb = val[7];
+
+                }
+                {
+                  Vec8d val = -(qgate + qbulk);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32qinv = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32qinv = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32qinv = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32qinv = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32qinv = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32qinv = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32qinv = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32qinv = val[7];
+
+                }
+              }
+            }
+
+          }
+          else
+            if (model->BSIM3v32xpart < 0.5)
+          {
+            if (1)
+            {
+              Vec8m condmask2 = Vds >= Vdsat;
+              Vec8m condmask_true2 = condmask_false1 & condmask2;
+              Vec8m condmask_false2 = condmask_false1 & (~condmask2);
+              {
+                T1 = vec8_blend(T1, Vdsat / 3.0, condmask_true2);
+                qgate = vec8_blend(qgate, CoxWL * (((Vgs_eff - Vfb) - pParam->BSIM3v32phi) - T1), condmask_true2);
+                T2 = vec8_blend(T2, (-Two_Third_CoxWL) * Vgst, condmask_true2);
+                qbulk = vec8_blend(qbulk, -(qgate + T2), condmask_true2);
+                qdrn = vec8_blend(qdrn, 0.4 * T2, condmask_true2);
+                {
+                  Vec8d val = (One_Third_CoxWL * (3.0 - dVdsat_dVg)) * dVgs_eff_dVg;
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cggb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cggb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cggb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cggb = val[3];
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cggb = val[4];
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cggb = val[5];
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cggb = val[6];
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cggb = val[7];
+
+                }
+                T2 = vec8_blend(T2, (-One_Third_CoxWL) * dVdsat_dVb, condmask_true2);
+                {
+                  Vec8d val = -(((Vec8d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb, heres[4]->BSIM3v32cggb, heres[5]->BSIM3v32cggb, heres[6]->BSIM3v32cggb, heres[7]->BSIM3v32cggb}) + T2);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cgsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cgsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cgsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cgsb = val[3];
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cgsb = val[4];
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cgsb = val[5];
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cgsb = val[6];
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cgsb = val[7];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cgdb = 0.0;
+
+                }
+                T3 = vec8_blend(T3, vec8_SIMDTOVECTOR(0.4 * Two_Third_CoxWL), condmask_true2);
+                {
+                  Vec8d val = (-T3) * dVgs_eff_dVg;
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cdgb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cdgb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cdgb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cdgb = val[3];
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cdgb = val[4];
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cdgb = val[5];
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cdgb = val[6];
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cdgb = val[7];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cddb = 0.0;
+
+                }
+                T4 = vec8_blend(T4, T3 * dVth_dVb, condmask_true2);
+                {
+                  Vec8d val = -(T4 + ((Vec8d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb, heres[4]->BSIM3v32cdgb, heres[5]->BSIM3v32cdgb, heres[6]->BSIM3v32cdgb, heres[7]->BSIM3v32cdgb}));
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cdsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cdsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cdsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cdsb = val[3];
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cdsb = val[4];
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cdsb = val[5];
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cdsb = val[6];
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cdsb = val[7];
+
+                }
+                {
+                  Vec8d val = -(((Vec8d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb, heres[4]->BSIM3v32cggb, heres[5]->BSIM3v32cggb, heres[6]->BSIM3v32cggb, heres[7]->BSIM3v32cggb}) - (Two_Third_CoxWL * dVgs_eff_dVg));
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbgb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbgb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbgb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbgb = val[3];
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cbgb = val[4];
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cbgb = val[5];
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cbgb = val[6];
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cbgb = val[7];
+
+                }
+                T3 = vec8_blend(T3, -(T2 + (Two_Third_CoxWL * dVth_dVb)), condmask_true2);
+                {
+                  Vec8d val = -(((Vec8d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb, heres[4]->BSIM3v32cbgb, heres[5]->BSIM3v32cbgb, heres[6]->BSIM3v32cbgb, heres[7]->BSIM3v32cbgb}) + T3);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbsb = val[3];
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cbsb = val[4];
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cbsb = val[5];
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cbsb = val[6];
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cbsb = val[7];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cbdb = 0.0;
+
+                }
+                {
+                  Vec8d val = -(qgate + qbulk);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32qinv = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32qinv = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32qinv = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32qinv = val[3];
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32qinv = val[4];
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32qinv = val[5];
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32qinv = val[6];
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32qinv = val[7];
+
+                }
+              }
+              {
+                Alphaz = vec8_blend(Alphaz, Vgst / Vdsat, condmask_false2);
+                T1 = vec8_blend(T1, (2.0 * Vdsat) - Vds, condmask_false2);
+                T2 = vec8_blend(T2, Vds / (3.0 * T1), condmask_false2);
+                T3 = vec8_blend(T3, T2 * Vds, condmask_false2);
+                T9 = vec8_blend(T9, vec8_SIMDTOVECTOR(0.25 * CoxWL), condmask_false2);
+                T4 = vec8_blend(T4, T9 * Alphaz, condmask_false2);
+                qgate = vec8_blend(qgate, CoxWL * (((Vgs_eff - Vfb) - pParam->BSIM3v32phi) - (0.5 * (Vds - T3))), condmask_false2);
+                T5 = vec8_blend(T5, T3 / T1, condmask_false2);
+                {
+                  Vec8d val = (CoxWL * (1.0 - (T5 * dVdsat_dVg))) * dVgs_eff_dVg;
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cggb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cggb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cggb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cggb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cggb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cggb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cggb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cggb = val[7];
+
+                }
+                tmp = vec8_blend(tmp, ((-CoxWL) * T5) * dVdsat_dVb, condmask_false2);
+                {
+                  Vec8d val = CoxWL * ((T2 - 0.5) + (0.5 * T5));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cgdb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cgdb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cgdb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cgdb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cgdb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cgdb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cgdb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cgdb = val[7];
+
+                }
+                {
+                  Vec8d val = -((((Vec8d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb, heres[4]->BSIM3v32cggb, heres[5]->BSIM3v32cggb, heres[6]->BSIM3v32cggb, heres[7]->BSIM3v32cggb}) + ((Vec8d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb, heres[4]->BSIM3v32cgdb, heres[5]->BSIM3v32cgdb, heres[6]->BSIM3v32cgdb, heres[7]->BSIM3v32cgdb})) + tmp);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cgsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cgsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cgsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cgsb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cgsb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cgsb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cgsb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cgsb = val[7];
+
+                }
+                T6 = vec8_blend(T6, 1.0 / Vdsat, condmask_false2);
+                dAlphaz_dVg = vec8_blend(dAlphaz_dVg, T6 * (1.0 - (Alphaz * dVdsat_dVg)), condmask_false2);
+                dAlphaz_dVb = vec8_blend(dAlphaz_dVb, (-T6) * (dVth_dVb + (Alphaz * dVdsat_dVb)), condmask_false2);
+                T6 = vec8_blend(T6, (((8.0 * Vdsat) * Vdsat) - ((6.0 * Vdsat) * Vds)) + ((1.2 * Vds) * Vds), condmask_false2);
+                T8 = vec8_blend(T8, T2 / T1, condmask_false2);
+                T7 = vec8_blend(T7, (Vds - T1) - (T8 * T6), condmask_false2);
+                qdrn = vec8_blend(qdrn, T4 * T7, condmask_false2);
+                T7 = vec8_blend(T7, T7 * T9, condmask_false2);
+                tmp = vec8_blend(tmp, T8 / T1, condmask_false2);
+                tmp1 = vec8_blend(tmp1, T4 * ((2.0 - ((4.0 * tmp) * T6)) + (T8 * ((16.0 * Vdsat) - (6.0 * Vds)))), condmask_false2);
+                {
+                  Vec8d val = ((T7 * dAlphaz_dVg) - (tmp1 * dVdsat_dVg)) * dVgs_eff_dVg;
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cdgb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cdgb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cdgb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cdgb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cdgb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cdgb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cdgb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cdgb = val[7];
+
+                }
+                T10 = vec8_blend(T10, (T7 * dAlphaz_dVb) - (tmp1 * dVdsat_dVb), condmask_false2);
+                {
+                  Vec8d val = T4 * ((2.0 - (((1.0 / ((3.0 * T1) * T1)) + (2.0 * tmp)) * T6)) + (T8 * ((6.0 * Vdsat) - (2.4 * Vds))));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cddb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cddb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cddb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cddb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cddb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cddb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cddb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cddb = val[7];
+
+                }
+                {
+                  Vec8d val = -((((Vec8d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb, heres[4]->BSIM3v32cdgb, heres[5]->BSIM3v32cdgb, heres[6]->BSIM3v32cdgb, heres[7]->BSIM3v32cdgb}) + T10) + ((Vec8d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb, heres[4]->BSIM3v32cddb, heres[5]->BSIM3v32cddb, heres[6]->BSIM3v32cddb, heres[7]->BSIM3v32cddb}));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cdsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cdsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cdsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cdsb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cdsb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cdsb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cdsb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cdsb = val[7];
+
+                }
+                T7 = vec8_blend(T7, 2.0 * (T1 + T3), condmask_false2);
+                qbulk = vec8_blend(qbulk, -(qgate - (T4 * T7)), condmask_false2);
+                T7 = vec8_blend(T7, T7 * T9, condmask_false2);
+                T0 = vec8_blend(T0, (4.0 * T4) * (1.0 - T5), condmask_false2);
+                T12 = vec8_blend(T12, ((((-T7) * dAlphaz_dVg) - ((Vec8d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb, heres[4]->BSIM3v32cdgb, heres[5]->BSIM3v32cdgb, heres[6]->BSIM3v32cdgb, heres[7]->BSIM3v32cdgb})) - (T0 * dVdsat_dVg)) * dVgs_eff_dVg, condmask_false2);
+                T11 = vec8_blend(T11, (((-T7) * dAlphaz_dVb) - T10) - (T0 * dVdsat_dVb), condmask_false2);
+                T10 = vec8_blend(T10, (((-4.0) * T4) * ((T2 - 0.5) + (0.5 * T5))) - ((Vec8d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb, heres[4]->BSIM3v32cddb, heres[5]->BSIM3v32cddb, heres[6]->BSIM3v32cddb, heres[7]->BSIM3v32cddb}), condmask_false2);
+                tmp = vec8_blend(tmp, -((T10 + T11) + T12), condmask_false2);
+                {
+                  Vec8d val = -((((Vec8d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb, heres[4]->BSIM3v32cggb, heres[5]->BSIM3v32cggb, heres[6]->BSIM3v32cggb, heres[7]->BSIM3v32cggb}) + ((Vec8d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb, heres[4]->BSIM3v32cdgb, heres[5]->BSIM3v32cdgb, heres[6]->BSIM3v32cdgb, heres[7]->BSIM3v32cdgb})) + T12);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbgb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbgb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbgb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbgb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cbgb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cbgb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cbgb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cbgb = val[7];
+
+                }
+                {
+                  Vec8d val = -((((Vec8d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb, heres[4]->BSIM3v32cgdb, heres[5]->BSIM3v32cgdb, heres[6]->BSIM3v32cgdb, heres[7]->BSIM3v32cgdb}) + ((Vec8d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb, heres[4]->BSIM3v32cddb, heres[5]->BSIM3v32cddb, heres[6]->BSIM3v32cddb, heres[7]->BSIM3v32cddb})) + T10);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbdb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbdb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbdb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbdb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cbdb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cbdb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cbdb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cbdb = val[7];
+
+                }
+                {
+                  Vec8d val = -((((Vec8d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb, heres[4]->BSIM3v32cgsb, heres[5]->BSIM3v32cgsb, heres[6]->BSIM3v32cgsb, heres[7]->BSIM3v32cgsb}) + ((Vec8d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb, heres[4]->BSIM3v32cdsb, heres[5]->BSIM3v32cdsb, heres[6]->BSIM3v32cdsb, heres[7]->BSIM3v32cdsb})) + tmp);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbsb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cbsb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cbsb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cbsb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cbsb = val[7];
+
+                }
+                {
+                  Vec8d val = -(qgate + qbulk);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32qinv = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32qinv = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32qinv = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32qinv = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32qinv = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32qinv = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32qinv = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32qinv = val[7];
+
+                }
+              }
+            }
+
+          }
+          else
+          {
+            if (1)
+            {
+              Vec8m condmask2 = Vds >= Vdsat;
+              Vec8m condmask_true2 = condmask_false1 & condmask2;
+              Vec8m condmask_false2 = condmask_false1 & (~condmask2);
+              {
+                T1 = vec8_blend(T1, Vdsat / 3.0, condmask_true2);
+                qgate = vec8_blend(qgate, CoxWL * (((Vgs_eff - Vfb) - pParam->BSIM3v32phi) - T1), condmask_true2);
+                T2 = vec8_blend(T2, (-Two_Third_CoxWL) * Vgst, condmask_true2);
+                qbulk = vec8_blend(qbulk, -(qgate + T2), condmask_true2);
+                qdrn = vec8_blend(qdrn, 0.5 * T2, condmask_true2);
+                {
+                  Vec8d val = (One_Third_CoxWL * (3.0 - dVdsat_dVg)) * dVgs_eff_dVg;
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cggb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cggb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cggb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cggb = val[3];
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cggb = val[4];
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cggb = val[5];
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cggb = val[6];
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cggb = val[7];
+
+                }
+                T2 = vec8_blend(T2, (-One_Third_CoxWL) * dVdsat_dVb, condmask_true2);
+                {
+                  Vec8d val = -(((Vec8d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb, heres[4]->BSIM3v32cggb, heres[5]->BSIM3v32cggb, heres[6]->BSIM3v32cggb, heres[7]->BSIM3v32cggb}) + T2);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cgsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cgsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cgsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cgsb = val[3];
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cgsb = val[4];
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cgsb = val[5];
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cgsb = val[6];
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cgsb = val[7];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cgdb = 0.0;
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cgdb = 0.0;
+
+                }
+                {
+                  Vec8d val = (-One_Third_CoxWL) * dVgs_eff_dVg;
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cdgb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cdgb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cdgb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cdgb = val[3];
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cdgb = val[4];
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cdgb = val[5];
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cdgb = val[6];
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cdgb = val[7];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cddb = 0.0;
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cddb = 0.0;
+
+                }
+                T4 = vec8_blend(T4, One_Third_CoxWL * dVth_dVb, condmask_true2);
+                {
+                  Vec8d val = -(T4 + ((Vec8d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb, heres[4]->BSIM3v32cdgb, heres[5]->BSIM3v32cdgb, heres[6]->BSIM3v32cdgb, heres[7]->BSIM3v32cdgb}));
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cdsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cdsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cdsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cdsb = val[3];
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cdsb = val[4];
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cdsb = val[5];
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cdsb = val[6];
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cdsb = val[7];
+
+                }
+                {
+                  Vec8d val = -(((Vec8d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb, heres[4]->BSIM3v32cggb, heres[5]->BSIM3v32cggb, heres[6]->BSIM3v32cggb, heres[7]->BSIM3v32cggb}) - (Two_Third_CoxWL * dVgs_eff_dVg));
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbgb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbgb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbgb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbgb = val[3];
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cbgb = val[4];
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cbgb = val[5];
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cbgb = val[6];
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cbgb = val[7];
+
+                }
+                T3 = vec8_blend(T3, -(T2 + (Two_Third_CoxWL * dVth_dVb)), condmask_true2);
+                {
+                  Vec8d val = -(((Vec8d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb, heres[4]->BSIM3v32cbgb, heres[5]->BSIM3v32cbgb, heres[6]->BSIM3v32cbgb, heres[7]->BSIM3v32cbgb}) + T3);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbsb = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbsb = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbsb = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbsb = val[3];
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cbsb = val[4];
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cbsb = val[5];
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cbsb = val[6];
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cbsb = val[7];
+
+                }
+                {
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32cbdb = 0.0;
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32cbdb = 0.0;
+
+                }
+                {
+                  Vec8d val = -(qgate + qbulk);
+                  if (condmask_true2[0])
+                    heres[0]->BSIM3v32qinv = val[0];
+
+                  if (condmask_true2[1])
+                    heres[1]->BSIM3v32qinv = val[1];
+
+                  if (condmask_true2[2])
+                    heres[2]->BSIM3v32qinv = val[2];
+
+                  if (condmask_true2[3])
+                    heres[3]->BSIM3v32qinv = val[3];
+
+                  if (condmask_true2[4])
+                    heres[4]->BSIM3v32qinv = val[4];
+
+                  if (condmask_true2[5])
+                    heres[5]->BSIM3v32qinv = val[5];
+
+                  if (condmask_true2[6])
+                    heres[6]->BSIM3v32qinv = val[6];
+
+                  if (condmask_true2[7])
+                    heres[7]->BSIM3v32qinv = val[7];
+
+                }
+              }
+              {
+                Alphaz = vec8_blend(Alphaz, Vgst / Vdsat, condmask_false2);
+                T1 = vec8_blend(T1, (2.0 * Vdsat) - Vds, condmask_false2);
+                T2 = vec8_blend(T2, Vds / (3.0 * T1), condmask_false2);
+                T3 = vec8_blend(T3, T2 * Vds, condmask_false2);
+                T9 = vec8_blend(T9, vec8_SIMDTOVECTOR(0.25 * CoxWL), condmask_false2);
+                T4 = vec8_blend(T4, T9 * Alphaz, condmask_false2);
+                qgate = vec8_blend(qgate, CoxWL * (((Vgs_eff - Vfb) - pParam->BSIM3v32phi) - (0.5 * (Vds - T3))), condmask_false2);
+                T5 = vec8_blend(T5, T3 / T1, condmask_false2);
+                {
+                  Vec8d val = (CoxWL * (1.0 - (T5 * dVdsat_dVg))) * dVgs_eff_dVg;
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cggb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cggb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cggb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cggb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cggb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cggb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cggb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cggb = val[7];
+
+                }
+                tmp = vec8_blend(tmp, ((-CoxWL) * T5) * dVdsat_dVb, condmask_false2);
+                {
+                  Vec8d val = CoxWL * ((T2 - 0.5) + (0.5 * T5));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cgdb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cgdb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cgdb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cgdb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cgdb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cgdb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cgdb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cgdb = val[7];
+
+                }
+                {
+                  Vec8d val = -((((Vec8d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb, heres[4]->BSIM3v32cggb, heres[5]->BSIM3v32cggb, heres[6]->BSIM3v32cggb, heres[7]->BSIM3v32cggb}) + ((Vec8d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb, heres[4]->BSIM3v32cgdb, heres[5]->BSIM3v32cgdb, heres[6]->BSIM3v32cgdb, heres[7]->BSIM3v32cgdb})) + tmp);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cgsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cgsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cgsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cgsb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cgsb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cgsb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cgsb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cgsb = val[7];
+
+                }
+                T6 = vec8_blend(T6, 1.0 / Vdsat, condmask_false2);
+                dAlphaz_dVg = vec8_blend(dAlphaz_dVg, T6 * (1.0 - (Alphaz * dVdsat_dVg)), condmask_false2);
+                dAlphaz_dVb = vec8_blend(dAlphaz_dVb, (-T6) * (dVth_dVb + (Alphaz * dVdsat_dVb)), condmask_false2);
+                T7 = vec8_blend(T7, T1 + T3, condmask_false2);
+                qdrn = vec8_blend(qdrn, (-T4) * T7, condmask_false2);
+                qbulk = vec8_blend(qbulk, -((qgate + qdrn) + qdrn), condmask_false2);
+                T7 = vec8_blend(T7, T7 * T9, condmask_false2);
+                T0 = vec8_blend(T0, T4 * ((2.0 * T5) - 2.0), condmask_false2);
+                {
+                  Vec8d val = ((T0 * dVdsat_dVg) - (T7 * dAlphaz_dVg)) * dVgs_eff_dVg;
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cdgb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cdgb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cdgb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cdgb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cdgb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cdgb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cdgb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cdgb = val[7];
+
+                }
+                T12 = vec8_blend(T12, (T0 * dVdsat_dVb) - (T7 * dAlphaz_dVb), condmask_false2);
+                {
+                  Vec8d val = T4 * ((1.0 - (2.0 * T2)) - T5);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cddb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cddb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cddb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cddb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cddb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cddb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cddb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cddb = val[7];
+
+                }
+                {
+                  Vec8d val = -((((Vec8d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb, heres[4]->BSIM3v32cdgb, heres[5]->BSIM3v32cdgb, heres[6]->BSIM3v32cdgb, heres[7]->BSIM3v32cdgb}) + T12) + ((Vec8d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb, heres[4]->BSIM3v32cddb, heres[5]->BSIM3v32cddb, heres[6]->BSIM3v32cddb, heres[7]->BSIM3v32cddb}));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cdsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cdsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cdsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cdsb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cdsb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cdsb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cdsb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cdsb = val[7];
+
+                }
+                {
+                  Vec8d val = -(((Vec8d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb, heres[4]->BSIM3v32cggb, heres[5]->BSIM3v32cggb, heres[6]->BSIM3v32cggb, heres[7]->BSIM3v32cggb}) + (2.0 * ((Vec8d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb, heres[4]->BSIM3v32cdgb, heres[5]->BSIM3v32cdgb, heres[6]->BSIM3v32cdgb, heres[7]->BSIM3v32cdgb})));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbgb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbgb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbgb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbgb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cbgb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cbgb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cbgb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cbgb = val[7];
+
+                }
+                {
+                  Vec8d val = -(((Vec8d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb, heres[4]->BSIM3v32cgdb, heres[5]->BSIM3v32cgdb, heres[6]->BSIM3v32cgdb, heres[7]->BSIM3v32cgdb}) + (2.0 * ((Vec8d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb, heres[4]->BSIM3v32cddb, heres[5]->BSIM3v32cddb, heres[6]->BSIM3v32cddb, heres[7]->BSIM3v32cddb})));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbdb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbdb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbdb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbdb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cbdb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cbdb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cbdb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cbdb = val[7];
+
+                }
+                {
+                  Vec8d val = -(((Vec8d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb, heres[4]->BSIM3v32cgsb, heres[5]->BSIM3v32cgsb, heres[6]->BSIM3v32cgsb, heres[7]->BSIM3v32cgsb}) + (2.0 * ((Vec8d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb, heres[4]->BSIM3v32cdsb, heres[5]->BSIM3v32cdsb, heres[6]->BSIM3v32cdsb, heres[7]->BSIM3v32cdsb})));
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32cbsb = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32cbsb = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32cbsb = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32cbsb = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32cbsb = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32cbsb = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32cbsb = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32cbsb = val[7];
+
+                }
+                {
+                  Vec8d val = -(qgate + qbulk);
+                  if (condmask_false2[0])
+                    heres[0]->BSIM3v32qinv = val[0];
+
+                  if (condmask_false2[1])
+                    heres[1]->BSIM3v32qinv = val[1];
+
+                  if (condmask_false2[2])
+                    heres[2]->BSIM3v32qinv = val[2];
+
+                  if (condmask_false2[3])
+                    heres[3]->BSIM3v32qinv = val[3];
+
+                  if (condmask_false2[4])
+                    heres[4]->BSIM3v32qinv = val[4];
+
+                  if (condmask_false2[5])
+                    heres[5]->BSIM3v32qinv = val[5];
+
+                  if (condmask_false2[6])
+                    heres[6]->BSIM3v32qinv = val[6];
+
+                  if (condmask_false2[7])
+                    heres[7]->BSIM3v32qinv = val[7];
+
+                }
+              }
+            }
+
+          }
+
+
+        }
+      }
+
+    }
+
+  }
+  else
+  {
+    if (1)
+    {
+      Vec8m condmask0 = Vbseff < 0.0;
+      Vec8m condmask_true0 = condmask0;
+      Vec8m condmask_false0 = ~condmask0;
+      {
+        VbseffCV = vec8_blend(VbseffCV, Vbseff, condmask_true0);
+        dVbseffCV_dVb = vec8_blend(dVbseffCV_dVb, vec8_SIMDTOVECTOR(1.0), condmask_true0);
+      }
+      {
+        VbseffCV = vec8_blend(VbseffCV, pParam->BSIM3v32phi - Phis, condmask_false0);
+        dVbseffCV_dVb = vec8_blend(dVbseffCV_dVb, -dPhis_dVb, condmask_false0);
+      }
+    }
+
+    noff = n * pParam->BSIM3v32noff;
+    dnoff_dVd = pParam->BSIM3v32noff * dn_dVd;
+    dnoff_dVb = pParam->BSIM3v32noff * dn_dVb;
+    T0 = Vtm * noff;
+    voffcv = pParam->BSIM3v32voffcv;
+    VgstNVt = (Vgst - voffcv) / T0;
+    if (1)
+    {
+      Vec8m condmask0 = VgstNVt > EXP_THRESHOLD;
+      Vec8m condmask_true0 = condmask0;
+      Vec8m condmask_false0 = ~condmask0;
+      {
+        Vgsteff = vec8_blend(Vgsteff, Vgst - voffcv, condmask_true0);
+        dVgsteff_dVg = vec8_blend(dVgsteff_dVg, dVgs_eff_dVg, condmask_true0);
+        dVgsteff_dVd = vec8_blend(dVgsteff_dVd, -dVth_dVd, condmask_true0);
+        dVgsteff_dVb = vec8_blend(dVgsteff_dVb, -dVth_dVb, condmask_true0);
+      }
+      if (1)
+      {
+        Vec8m condmask1 = VgstNVt < (-EXP_THRESHOLD);
+        Vec8m condmask_true1 = condmask_false0 & condmask1;
+        Vec8m condmask_false1 = condmask_false0 & (~condmask1);
+        {
+          Vgsteff = vec8_blend(Vgsteff, T0 * log(1.0 + MIN_EXP), condmask_true1);
+          dVgsteff_dVg = vec8_blend(dVgsteff_dVg, vec8_SIMDTOVECTOR(0.0), condmask_true1);
+          dVgsteff_dVd = vec8_blend(dVgsteff_dVd, Vgsteff / noff, condmask_true1);
+          dVgsteff_dVb = vec8_blend(dVgsteff_dVb, dVgsteff_dVd * dnoff_dVb, condmask_true1);
+          dVgsteff_dVd = vec8_blend(dVgsteff_dVd, dVgsteff_dVd * dnoff_dVd, condmask_true1);
+        }
+        {
+          ExpVgst = vec8_blend(ExpVgst, vec8_exp(VgstNVt), condmask_false1);
+          Vgsteff = vec8_blend(Vgsteff, T0 * vec8_log(1.0 + ExpVgst), condmask_false1);
+          dVgsteff_dVg = vec8_blend(dVgsteff_dVg, ExpVgst / (1.0 + ExpVgst), condmask_false1);
+          dVgsteff_dVd = vec8_blend(dVgsteff_dVd, ((-dVgsteff_dVg) * (dVth_dVd + (((Vgst - voffcv) / noff) * dnoff_dVd))) + ((Vgsteff / noff) * dnoff_dVd), condmask_false1);
+          dVgsteff_dVb = vec8_blend(dVgsteff_dVb, ((-dVgsteff_dVg) * (dVth_dVb + (((Vgst - voffcv) / noff) * dnoff_dVb))) + ((Vgsteff / noff) * dnoff_dVb), condmask_false1);
+          dVgsteff_dVg = vec8_blend(dVgsteff_dVg, dVgsteff_dVg * dVgs_eff_dVg, condmask_false1);
+        }
+      }
+
+    }
+
+    if (model->BSIM3v32capMod == 1)
+    {
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          Vfb = (Vec8d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb, heres[4]->BSIM3v32vfbzb, heres[5]->BSIM3v32vfbzb, heres[6]->BSIM3v32vfbzb, heres[7]->BSIM3v32vfbzb};
+          break;
+
+        case BSIM3v32V32:
+          Vfb = (Vec8d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb, heres[4]->BSIM3v32vfbzb, heres[5]->BSIM3v32vfbzb, heres[6]->BSIM3v32vfbzb, heres[7]->BSIM3v32vfbzb};
+          dVfb_dVb = (dVfb_dVd = vec8_SIMDTOVECTOR(0.0));
+          break;
+
+        default:
+          Vfb = (Vth - pParam->BSIM3v32phi) - (pParam->BSIM3v32k1ox * sqrtPhis);
+          dVfb_dVb = dVth_dVb - (pParam->BSIM3v32k1ox * dsqrtPhis_dVb);
+          dVfb_dVd = dVth_dVd;
+
+      }
+
+      Arg1 = ((Vgs_eff - VbseffCV) - Vfb) - Vgsteff;
+      if (1)
+      {
+        Vec8m condmask0 = Arg1 <= 0.0;
+        Vec8m condmask_true0 = condmask0;
+        Vec8m condmask_false0 = ~condmask0;
+        {
+          qgate = vec8_blend(qgate, CoxWL * Arg1, condmask_true0);
+          Cgg = vec8_blend(Cgg, CoxWL * (dVgs_eff_dVg - dVgsteff_dVg), condmask_true0);
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+
+            case BSIM3v32V322:
+              Cgd = vec8_blend(Cgd, (-CoxWL) * dVgsteff_dVd, condmask_true0);
+              Cgb = vec8_blend(Cgb, (-CoxWL) * (dVbseffCV_dVb + dVgsteff_dVb), condmask_true0);
+              break;
+
+            case BSIM3v32V32:
+
+            default:
+              Cgd = vec8_blend(Cgd, (-CoxWL) * (dVfb_dVd + dVgsteff_dVd), condmask_true0);
+              Cgb = vec8_blend(Cgb, (-CoxWL) * ((dVfb_dVb + dVbseffCV_dVb) + dVgsteff_dVb), condmask_true0);
+
+          }
+
+        }
+        {
+          T0 = vec8_blend(T0, vec8_SIMDTOVECTOR(0.5 * pParam->BSIM3v32k1ox), condmask_false0);
+          T1 = vec8_blend(T1, vec8_sqrt((T0 * T0) + Arg1), condmask_false0);
+          T2 = vec8_blend(T2, (CoxWL * T0) / T1, condmask_false0);
+          qgate = vec8_blend(qgate, (CoxWL * pParam->BSIM3v32k1ox) * (T1 - T0), condmask_false0);
+          Cgg = vec8_blend(Cgg, T2 * (dVgs_eff_dVg - dVgsteff_dVg), condmask_false0);
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+
+            case BSIM3v32V322:
+              Cgd = vec8_blend(Cgd, (-T2) * dVgsteff_dVd, condmask_false0);
+              Cgb = vec8_blend(Cgb, (-T2) * (dVbseffCV_dVb + dVgsteff_dVb), condmask_false0);
+              break;
+
+            case BSIM3v32V32:
+
+            default:
+              Cgd = vec8_blend(Cgd, (-T2) * (dVfb_dVd + dVgsteff_dVd), condmask_false0);
+              Cgb = vec8_blend(Cgb, (-T2) * ((dVfb_dVb + dVbseffCV_dVb) + dVgsteff_dVb), condmask_false0);
+
+          }
+
+        }
+      }
+
+      qbulk = -qgate;
+      Cbg = -Cgg;
+      Cbd = -Cgd;
+      Cbb = -Cgb;
+      One_Third_CoxWL = CoxWL / 3.0;
+      Two_Third_CoxWL = 2.0 * One_Third_CoxWL;
+      AbulkCV = Abulk0 * pParam->BSIM3v32abulkCVfactor;
+      dAbulkCV_dVb = pParam->BSIM3v32abulkCVfactor * dAbulk0_dVb;
+      VdsatCV = Vgsteff / AbulkCV;
+      if (1)
+      {
+        Vec8m condmask0 = VdsatCV < Vds;
+        Vec8m condmask_true0 = condmask0;
+        Vec8m condmask_false0 = ~condmask0;
+        {
+          dVdsatCV_dVg = vec8_blend(dVdsatCV_dVg, 1.0 / AbulkCV, condmask_true0);
+          dVdsatCV_dVb = vec8_blend(dVdsatCV_dVb, ((-VdsatCV) * dAbulkCV_dVb) / AbulkCV, condmask_true0);
+          T0 = vec8_blend(T0, Vgsteff - (VdsatCV / 3.0), condmask_true0);
+          dT0_dVg = vec8_blend(dT0_dVg, 1.0 - (dVdsatCV_dVg / 3.0), condmask_true0);
+          dT0_dVb = vec8_blend(dT0_dVb, (-dVdsatCV_dVb) / 3.0, condmask_true0);
+          qgate = vec8_blend(qgate, qgate + (CoxWL * T0), condmask_true0);
+          Cgg1 = vec8_blend(Cgg1, CoxWL * dT0_dVg, condmask_true0);
+          Cgb1 = vec8_blend(Cgb1, (CoxWL * dT0_dVb) + (Cgg1 * dVgsteff_dVb), condmask_true0);
+          Cgd1 = vec8_blend(Cgd1, Cgg1 * dVgsteff_dVd, condmask_true0);
+          Cgg1 = vec8_blend(Cgg1, Cgg1 * dVgsteff_dVg, condmask_true0);
+          Cgg = vec8_blend(Cgg, Cgg + Cgg1, condmask_true0);
+          Cgb = vec8_blend(Cgb, Cgb + Cgb1, condmask_true0);
+          Cgd = vec8_blend(Cgd, Cgd + Cgd1, condmask_true0);
+          T0 = vec8_blend(T0, VdsatCV - Vgsteff, condmask_true0);
+          dT0_dVg = vec8_blend(dT0_dVg, dVdsatCV_dVg - 1.0, condmask_true0);
+          dT0_dVb = vec8_blend(dT0_dVb, dVdsatCV_dVb, condmask_true0);
+          qbulk = vec8_blend(qbulk, qbulk + (One_Third_CoxWL * T0), condmask_true0);
+          Cbg1 = vec8_blend(Cbg1, One_Third_CoxWL * dT0_dVg, condmask_true0);
+          Cbb1 = vec8_blend(Cbb1, (One_Third_CoxWL * dT0_dVb) + (Cbg1 * dVgsteff_dVb), condmask_true0);
+          Cbd1 = vec8_blend(Cbd1, Cbg1 * dVgsteff_dVd, condmask_true0);
+          Cbg1 = vec8_blend(Cbg1, Cbg1 * dVgsteff_dVg, condmask_true0);
+          Cbg = vec8_blend(Cbg, Cbg + Cbg1, condmask_true0);
+          Cbb = vec8_blend(Cbb, Cbb + Cbb1, condmask_true0);
+          Cbd = vec8_blend(Cbd, Cbd + Cbd1, condmask_true0);
+          if (model->BSIM3v32xpart > 0.5)
+            T0 = vec8_blend(T0, vec8_SIMDTOVECTOR(-Two_Third_CoxWL), condmask_true0);
+          else
+            if (model->BSIM3v32xpart < 0.5)
+            T0 = vec8_blend(T0, vec8_SIMDTOVECTOR((-0.4) * CoxWL), condmask_true0);
+          else
+            T0 = vec8_blend(T0, vec8_SIMDTOVECTOR(-One_Third_CoxWL), condmask_true0);
+
+
+          qsrc = vec8_blend(qsrc, T0 * Vgsteff, condmask_true0);
+          Csg = vec8_blend(Csg, T0 * dVgsteff_dVg, condmask_true0);
+          Csb = vec8_blend(Csb, T0 * dVgsteff_dVb, condmask_true0);
+          Csd = vec8_blend(Csd, T0 * dVgsteff_dVd, condmask_true0);
+          Cgb = vec8_blend(Cgb, Cgb * dVbseff_dVb, condmask_true0);
+          Cbb = vec8_blend(Cbb, Cbb * dVbseff_dVb, condmask_true0);
+          Csb = vec8_blend(Csb, Csb * dVbseff_dVb, condmask_true0);
+        }
+        {
+          T0 = vec8_blend(T0, AbulkCV * Vds, condmask_false0);
+          T1 = vec8_blend(T1, 12.0 * ((Vgsteff - (0.5 * T0)) + 1.e-20), condmask_false0);
+          T2 = vec8_blend(T2, Vds / T1, condmask_false0);
+          T3 = vec8_blend(T3, T0 * T2, condmask_false0);
+          dT3_dVg = vec8_blend(dT3_dVg, (((-12.0) * T2) * T2) * AbulkCV, condmask_false0);
+          dT3_dVd = vec8_blend(dT3_dVd, ((((6.0 * T0) * ((4.0 * Vgsteff) - T0)) / T1) / T1) - 0.5, condmask_false0);
+          dT3_dVb = vec8_blend(dT3_dVb, (((12.0 * T2) * T2) * dAbulkCV_dVb) * Vgsteff, condmask_false0);
+          qgate = vec8_blend(qgate, qgate + (CoxWL * ((Vgsteff - (0.5 * Vds)) + T3)), condmask_false0);
+          Cgg1 = vec8_blend(Cgg1, CoxWL * (1.0 + dT3_dVg), condmask_false0);
+          Cgb1 = vec8_blend(Cgb1, (CoxWL * dT3_dVb) + (Cgg1 * dVgsteff_dVb), condmask_false0);
+          Cgd1 = vec8_blend(Cgd1, (CoxWL * dT3_dVd) + (Cgg1 * dVgsteff_dVd), condmask_false0);
+          Cgg1 = vec8_blend(Cgg1, Cgg1 * dVgsteff_dVg, condmask_false0);
+          Cgg = vec8_blend(Cgg, Cgg + Cgg1, condmask_false0);
+          Cgb = vec8_blend(Cgb, Cgb + Cgb1, condmask_false0);
+          Cgd = vec8_blend(Cgd, Cgd + Cgd1, condmask_false0);
+          qbulk = vec8_blend(qbulk, qbulk + ((CoxWL * (1.0 - AbulkCV)) * ((0.5 * Vds) - T3)), condmask_false0);
+          Cbg1 = vec8_blend(Cbg1, (-CoxWL) * ((1.0 - AbulkCV) * dT3_dVg), condmask_false0);
+          Cbb1 = vec8_blend(Cbb1, ((-CoxWL) * (((1.0 - AbulkCV) * dT3_dVb) + (((0.5 * Vds) - T3) * dAbulkCV_dVb))) + (Cbg1 * dVgsteff_dVb), condmask_false0);
+          Cbd1 = vec8_blend(Cbd1, (((-CoxWL) * (1.0 - AbulkCV)) * dT3_dVd) + (Cbg1 * dVgsteff_dVd), condmask_false0);
+          Cbg1 = vec8_blend(Cbg1, Cbg1 * dVgsteff_dVg, condmask_false0);
+          Cbg = vec8_blend(Cbg, Cbg + Cbg1, condmask_false0);
+          Cbb = vec8_blend(Cbb, Cbb + Cbb1, condmask_false0);
+          Cbd = vec8_blend(Cbd, Cbd + Cbd1, condmask_false0);
+          if (model->BSIM3v32xpart > 0.5)
+          {
+            T1 = vec8_blend(T1, T1 + T1, condmask_false0);
+            qsrc = vec8_blend(qsrc, (-CoxWL) * (((0.5 * Vgsteff) + (0.25 * T0)) - ((T0 * T0) / T1)), condmask_false0);
+            Csg = vec8_blend(Csg, (-CoxWL) * (0.5 + (((((24.0 * T0) * Vds) / T1) / T1) * AbulkCV)), condmask_false0);
+            Csb = vec8_blend(Csb, ((-CoxWL) * (((0.25 * Vds) * dAbulkCV_dVb) - ((((((12.0 * T0) * Vds) / T1) / T1) * ((4.0 * Vgsteff) - T0)) * dAbulkCV_dVb))) + (Csg * dVgsteff_dVb), condmask_false0);
+            Csd = vec8_blend(Csd, ((-CoxWL) * ((0.25 * AbulkCV) - (((((12.0 * AbulkCV) * T0) / T1) / T1) * ((4.0 * Vgsteff) - T0)))) + (Csg * dVgsteff_dVd), condmask_false0);
+            Csg = vec8_blend(Csg, Csg * dVgsteff_dVg, condmask_false0);
+          }
+          else
+            if (model->BSIM3v32xpart < 0.5)
+          {
+            T1 = vec8_blend(T1, T1 / 12.0, condmask_false0);
+            T2 = vec8_blend(T2, (0.5 * CoxWL) / (T1 * T1), condmask_false0);
+            T3 = vec8_blend(T3, (Vgsteff * ((((2.0 * T0) * T0) / 3.0) + (Vgsteff * (Vgsteff - ((4.0 * T0) / 3.0))))) - ((((2.0 * T0) * T0) * T0) / 15.0), condmask_false0);
+            qsrc = vec8_blend(qsrc, (-T2) * T3, condmask_false0);
+            T4 = vec8_blend(T4, (((4.0 / 3.0) * Vgsteff) * (Vgsteff - T0)) + ((0.4 * T0) * T0), condmask_false0);
+            Csg = vec8_blend(Csg, (((-2.0) * qsrc) / T1) - (T2 * ((Vgsteff * ((3.0 * Vgsteff) - ((8.0 * T0) / 3.0))) + (((2.0 * T0) * T0) / 3.0))), condmask_false0);
+            Csb = vec8_blend(Csb, ((((qsrc / T1) * Vds) + ((T2 * T4) * Vds)) * dAbulkCV_dVb) + (Csg * dVgsteff_dVb), condmask_false0);
+            Csd = vec8_blend(Csd, (((qsrc / T1) + (T2 * T4)) * AbulkCV) + (Csg * dVgsteff_dVd), condmask_false0);
+            Csg = vec8_blend(Csg, Csg * dVgsteff_dVg, condmask_false0);
+          }
+          else
+          {
+            qsrc = vec8_blend(qsrc, (-0.5) * (qgate + qbulk), condmask_false0);
+            Csg = vec8_blend(Csg, (-0.5) * (Cgg1 + Cbg1), condmask_false0);
+            Csb = vec8_blend(Csb, (-0.5) * (Cgb1 + Cbb1), condmask_false0);
+            Csd = vec8_blend(Csd, (-0.5) * (Cgd1 + Cbd1), condmask_false0);
+          }
+
+
+          Cgb = vec8_blend(Cgb, Cgb * dVbseff_dVb, condmask_false0);
+          Cbb = vec8_blend(Cbb, Cbb * dVbseff_dVb, condmask_false0);
+          Csb = vec8_blend(Csb, Csb * dVbseff_dVb, condmask_false0);
+        }
+      }
+
+      qdrn = -((qgate + qbulk) + qsrc);
+      {
+        heres[0]->BSIM3v32cggb = Cgg[0];
+        heres[1]->BSIM3v32cggb = Cgg[1];
+        heres[2]->BSIM3v32cggb = Cgg[2];
+        heres[3]->BSIM3v32cggb = Cgg[3];
+        heres[4]->BSIM3v32cggb = Cgg[4];
+        heres[5]->BSIM3v32cggb = Cgg[5];
+        heres[6]->BSIM3v32cggb = Cgg[6];
+        heres[7]->BSIM3v32cggb = Cgg[7];
+      }
+      {
+        Vec8d val = -((Cgg + Cgd) + Cgb);
+        heres[0]->BSIM3v32cgsb = val[0];
+        heres[1]->BSIM3v32cgsb = val[1];
+        heres[2]->BSIM3v32cgsb = val[2];
+        heres[3]->BSIM3v32cgsb = val[3];
+        heres[4]->BSIM3v32cgsb = val[4];
+        heres[5]->BSIM3v32cgsb = val[5];
+        heres[6]->BSIM3v32cgsb = val[6];
+        heres[7]->BSIM3v32cgsb = val[7];
+      }
+      {
+        heres[0]->BSIM3v32cgdb = Cgd[0];
+        heres[1]->BSIM3v32cgdb = Cgd[1];
+        heres[2]->BSIM3v32cgdb = Cgd[2];
+        heres[3]->BSIM3v32cgdb = Cgd[3];
+        heres[4]->BSIM3v32cgdb = Cgd[4];
+        heres[5]->BSIM3v32cgdb = Cgd[5];
+        heres[6]->BSIM3v32cgdb = Cgd[6];
+        heres[7]->BSIM3v32cgdb = Cgd[7];
+      }
+      {
+        Vec8d val = -((Cgg + Cbg) + Csg);
+        heres[0]->BSIM3v32cdgb = val[0];
+        heres[1]->BSIM3v32cdgb = val[1];
+        heres[2]->BSIM3v32cdgb = val[2];
+        heres[3]->BSIM3v32cdgb = val[3];
+        heres[4]->BSIM3v32cdgb = val[4];
+        heres[5]->BSIM3v32cdgb = val[5];
+        heres[6]->BSIM3v32cdgb = val[6];
+        heres[7]->BSIM3v32cdgb = val[7];
+      }
+      {
+        Vec8d val = (((((((Cgg + Cgd) + Cgb) + Cbg) + Cbd) + Cbb) + Csg) + Csd) + Csb;
+        heres[0]->BSIM3v32cdsb = val[0];
+        heres[1]->BSIM3v32cdsb = val[1];
+        heres[2]->BSIM3v32cdsb = val[2];
+        heres[3]->BSIM3v32cdsb = val[3];
+        heres[4]->BSIM3v32cdsb = val[4];
+        heres[5]->BSIM3v32cdsb = val[5];
+        heres[6]->BSIM3v32cdsb = val[6];
+        heres[7]->BSIM3v32cdsb = val[7];
+      }
+      {
+        Vec8d val = -((Cgd + Cbd) + Csd);
+        heres[0]->BSIM3v32cddb = val[0];
+        heres[1]->BSIM3v32cddb = val[1];
+        heres[2]->BSIM3v32cddb = val[2];
+        heres[3]->BSIM3v32cddb = val[3];
+        heres[4]->BSIM3v32cddb = val[4];
+        heres[5]->BSIM3v32cddb = val[5];
+        heres[6]->BSIM3v32cddb = val[6];
+        heres[7]->BSIM3v32cddb = val[7];
+      }
+      {
+        heres[0]->BSIM3v32cbgb = Cbg[0];
+        heres[1]->BSIM3v32cbgb = Cbg[1];
+        heres[2]->BSIM3v32cbgb = Cbg[2];
+        heres[3]->BSIM3v32cbgb = Cbg[3];
+        heres[4]->BSIM3v32cbgb = Cbg[4];
+        heres[5]->BSIM3v32cbgb = Cbg[5];
+        heres[6]->BSIM3v32cbgb = Cbg[6];
+        heres[7]->BSIM3v32cbgb = Cbg[7];
+      }
+      {
+        Vec8d val = -((Cbg + Cbd) + Cbb);
+        heres[0]->BSIM3v32cbsb = val[0];
+        heres[1]->BSIM3v32cbsb = val[1];
+        heres[2]->BSIM3v32cbsb = val[2];
+        heres[3]->BSIM3v32cbsb = val[3];
+        heres[4]->BSIM3v32cbsb = val[4];
+        heres[5]->BSIM3v32cbsb = val[5];
+        heres[6]->BSIM3v32cbsb = val[6];
+        heres[7]->BSIM3v32cbsb = val[7];
+      }
+      {
+        heres[0]->BSIM3v32cbdb = Cbd[0];
+        heres[1]->BSIM3v32cbdb = Cbd[1];
+        heres[2]->BSIM3v32cbdb = Cbd[2];
+        heres[3]->BSIM3v32cbdb = Cbd[3];
+        heres[4]->BSIM3v32cbdb = Cbd[4];
+        heres[5]->BSIM3v32cbdb = Cbd[5];
+        heres[6]->BSIM3v32cbdb = Cbd[6];
+        heres[7]->BSIM3v32cbdb = Cbd[7];
+      }
+      {
+        Vec8d val = -(qgate + qbulk);
+        heres[0]->BSIM3v32qinv = val[0];
+        heres[1]->BSIM3v32qinv = val[1];
+        heres[2]->BSIM3v32qinv = val[2];
+        heres[3]->BSIM3v32qinv = val[3];
+        heres[4]->BSIM3v32qinv = val[4];
+        heres[5]->BSIM3v32qinv = val[5];
+        heres[6]->BSIM3v32qinv = val[6];
+        heres[7]->BSIM3v32qinv = val[7];
+      }
+    }
+    else
+      if (model->BSIM3v32capMod == 2)
+    {
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          Vfb = (Vec8d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb, heres[4]->BSIM3v32vfbzb, heres[5]->BSIM3v32vfbzb, heres[6]->BSIM3v32vfbzb, heres[7]->BSIM3v32vfbzb};
+          break;
+
+        case BSIM3v32V32:
+          Vfb = (Vec8d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb, heres[4]->BSIM3v32vfbzb, heres[5]->BSIM3v32vfbzb, heres[6]->BSIM3v32vfbzb, heres[7]->BSIM3v32vfbzb};
+          dVfb_dVb = (dVfb_dVd = vec8_SIMDTOVECTOR(0.0));
+          break;
+
+        default:
+          Vfb = (Vth - pParam->BSIM3v32phi) - (pParam->BSIM3v32k1ox * sqrtPhis);
+          dVfb_dVb = dVth_dVb - (pParam->BSIM3v32k1ox * dsqrtPhis_dVb);
+          dVfb_dVd = dVth_dVd;
+
+      }
+
+      V3 = ((Vfb - Vgs_eff) + VbseffCV) - DELTA_3;
+      T0 = V3 * V3;
+      T2 = (4.0 * DELTA_3) * Vfb;
+      if (1)
+      {
+        Vec8m condmask0 = Vfb <= 0.0;
+        Vec8m condmask_true0 = condmask0;
+        Vec8m condmask_false0 = ~condmask0;
+        {
+          T0 = vec8_blend(T0, T0 - T2, condmask_true0);
+          T2 = vec8_blend(T2, vec8_SIMDTOVECTOR(-DELTA_3), condmask_true0);
+        }
+        {
+          T0 = vec8_blend(T0, T0 + T2, condmask_false0);
+          T2 = vec8_blend(T2, vec8_SIMDTOVECTOR(DELTA_3), condmask_false0);
+        }
+      }
+
+      T0 = vec8_sqrt(T0);
+      T2 = T2 / T0;
+      T1 = 0.5 * (1.0 + (V3 / T0));
+      Vfbeff = Vfb - (0.5 * (V3 + T0));
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          dVfbeff_dVd = ((1.0 - T1) - T2) * dVfb_dVd;
+
+      }
+
+      dVfbeff_dVg = T1 * dVgs_eff_dVg;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          dVfbeff_dVb = (-T1) * dVbseffCV_dVb;
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          dVfbeff_dVb = (((1.0 - T1) - T2) * dVfb_dVb) - (T1 * dVbseffCV_dVb);
+
+      }
+
+      Qac0 = CoxWL * (Vfbeff - Vfb);
+      dQac0_dVg = CoxWL * dVfbeff_dVg;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          dQac0_dVd = CoxWL * (dVfbeff_dVd - dVfb_dVd);
+
+      }
+
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          dQac0_dVb = CoxWL * dVfbeff_dVb;
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          dQac0_dVb = CoxWL * (dVfbeff_dVb - dVfb_dVb);
+
+      }
+
+      T0 = vec8_SIMDTOVECTOR(0.5 * pParam->BSIM3v32k1ox);
+      T3 = ((Vgs_eff - Vfbeff) - VbseffCV) - Vgsteff;
+      if (pParam->BSIM3v32k1ox == 0.0)
+      {
+        T1 = vec8_SIMDTOVECTOR(0.0);
+        T2 = vec8_SIMDTOVECTOR(0.0);
+      }
+      else
+        if (1)
+      {
+        Vec8m condmask0 = T3 < 0.0;
+        Vec8m condmask_true0 = condmask0;
+        Vec8m condmask_false0 = ~condmask0;
+        {
+          T1 = vec8_blend(T1, T0 + (T3 / pParam->BSIM3v32k1ox), condmask_true0);
+          T2 = vec8_blend(T2, vec8_SIMDTOVECTOR(CoxWL), condmask_true0);
+        }
+        {
+          T1 = vec8_blend(T1, vec8_sqrt((T0 * T0) + T3), condmask_false0);
+          T2 = vec8_blend(T2, (CoxWL * T0) / T1, condmask_false0);
+        }
+      }
+
+
+      Qsub0 = (CoxWL * pParam->BSIM3v32k1ox) * (T1 - T0);
+      dQsub0_dVg = T2 * ((dVgs_eff_dVg - dVfbeff_dVg) - dVgsteff_dVg);
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          dQsub0_dVd = (-T2) * dVgsteff_dVd;
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          dQsub0_dVd = (-T2) * (dVfbeff_dVd + dVgsteff_dVd);
+
+      }
+
+      dQsub0_dVb = (-T2) * ((dVfbeff_dVb + dVbseffCV_dVb) + dVgsteff_dVb);
+      AbulkCV = Abulk0 * pParam->BSIM3v32abulkCVfactor;
+      dAbulkCV_dVb = pParam->BSIM3v32abulkCVfactor * dAbulk0_dVb;
+      VdsatCV = Vgsteff / AbulkCV;
+      V4 = (VdsatCV - Vds) - DELTA_4;
+      T0 = vec8_sqrt((V4 * V4) + ((4.0 * DELTA_4) * VdsatCV));
+      VdseffCV = VdsatCV - (0.5 * (V4 + T0));
+      T1 = 0.5 * (1.0 + (V4 / T0));
+      T2 = DELTA_4 / T0;
+      T3 = ((1.0 - T1) - T2) / AbulkCV;
+      dVdseffCV_dVg = T3;
+      dVdseffCV_dVd = T1;
+      dVdseffCV_dVb = ((-T3) * VdsatCV) * dAbulkCV_dVb;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          if (1)
+        {
+          Vec8m condmask0 = Vds == 0.0;
+          Vec8m condmask_true0 = condmask0;
+          {
+            VdseffCV = vec8_blend(VdseffCV, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+            dVdseffCV_dVg = vec8_blend(dVdseffCV_dVg, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+            dVdseffCV_dVb = vec8_blend(dVdseffCV_dVb, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+          }
+        }
+
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          break;
+
+      }
+
+      T0 = AbulkCV * VdseffCV;
+      T1 = 12.0 * ((Vgsteff - (0.5 * T0)) + 1e-20);
+      T2 = VdseffCV / T1;
+      T3 = T0 * T2;
+      T4 = 1.0 - (((12.0 * T2) * T2) * AbulkCV);
+      T5 = (((6.0 * T0) * ((4.0 * Vgsteff) - T0)) / (T1 * T1)) - 0.5;
+      T6 = ((12.0 * T2) * T2) * Vgsteff;
+      qinoi = (-CoxWL) * ((Vgsteff - (0.5 * T0)) + (AbulkCV * T3));
+      qgate = CoxWL * ((Vgsteff - (0.5 * VdseffCV)) + T3);
+      Cgg1 = CoxWL * (T4 + (T5 * dVdseffCV_dVg));
+      Cgd1 = ((CoxWL * T5) * dVdseffCV_dVd) + (Cgg1 * dVgsteff_dVd);
+      Cgb1 = (CoxWL * ((T5 * dVdseffCV_dVb) + (T6 * dAbulkCV_dVb))) + (Cgg1 * dVgsteff_dVb);
+      Cgg1 *= dVgsteff_dVg;
+      T7 = 1.0 - AbulkCV;
+      qbulk = (CoxWL * T7) * ((0.5 * VdseffCV) - T3);
+      T4 = (-T7) * (T4 - 1.0);
+      T5 = (-T7) * T5;
+      T6 = -((T7 * T6) + ((0.5 * VdseffCV) - T3));
+      Cbg1 = CoxWL * (T4 + (T5 * dVdseffCV_dVg));
+      Cbd1 = ((CoxWL * T5) * dVdseffCV_dVd) + (Cbg1 * dVgsteff_dVd);
+      Cbb1 = (CoxWL * ((T5 * dVdseffCV_dVb) + (T6 * dAbulkCV_dVb))) + (Cbg1 * dVgsteff_dVb);
+      Cbg1 *= dVgsteff_dVg;
+      if (model->BSIM3v32xpart > 0.5)
+      {
+        T1 = T1 + T1;
+        qsrc = (-CoxWL) * (((0.5 * Vgsteff) + (0.25 * T0)) - ((T0 * T0) / T1));
+        T7 = ((4.0 * Vgsteff) - T0) / (T1 * T1);
+        T4 = -(0.5 + (((24.0 * T0) * T0) / (T1 * T1)));
+        T5 = -((0.25 * AbulkCV) - (((12.0 * AbulkCV) * T0) * T7));
+        T6 = -((0.25 * VdseffCV) - (((12.0 * T0) * VdseffCV) * T7));
+        Csg = CoxWL * (T4 + (T5 * dVdseffCV_dVg));
+        Csd = ((CoxWL * T5) * dVdseffCV_dVd) + (Csg * dVgsteff_dVd);
+        Csb = (CoxWL * ((T5 * dVdseffCV_dVb) + (T6 * dAbulkCV_dVb))) + (Csg * dVgsteff_dVb);
+        Csg *= dVgsteff_dVg;
+      }
+      else
+        if (model->BSIM3v32xpart < 0.5)
+      {
+        T1 = T1 / 12.0;
+        T2 = (0.5 * CoxWL) / (T1 * T1);
+        T3 = (Vgsteff * ((((2.0 * T0) * T0) / 3.0) + (Vgsteff * (Vgsteff - ((4.0 * T0) / 3.0))))) - ((((2.0 * T0) * T0) * T0) / 15.0);
+        qsrc = (-T2) * T3;
+        T7 = (((4.0 / 3.0) * Vgsteff) * (Vgsteff - T0)) + ((0.4 * T0) * T0);
+        T4 = (((-2.0) * qsrc) / T1) - (T2 * ((Vgsteff * ((3.0 * Vgsteff) - ((8.0 * T0) / 3.0))) + (((2.0 * T0) * T0) / 3.0)));
+        T5 = ((qsrc / T1) + (T2 * T7)) * AbulkCV;
+        T6 = ((qsrc / T1) * VdseffCV) + ((T2 * T7) * VdseffCV);
+        Csg = T4 + (T5 * dVdseffCV_dVg);
+        Csd = (T5 * dVdseffCV_dVd) + (Csg * dVgsteff_dVd);
+        Csb = ((T5 * dVdseffCV_dVb) + (T6 * dAbulkCV_dVb)) + (Csg * dVgsteff_dVb);
+        Csg *= dVgsteff_dVg;
+      }
+      else
+      {
+        qsrc = (-0.5) * (qgate + qbulk);
+        Csg = (-0.5) * (Cgg1 + Cbg1);
+        Csb = (-0.5) * (Cgb1 + Cbb1);
+        Csd = (-0.5) * (Cgd1 + Cbd1);
+      }
+
+
+      qgate += Qac0 + Qsub0;
+      qbulk -= Qac0 + Qsub0;
+      qdrn = -((qgate + qbulk) + qsrc);
+      Cgg = (dQac0_dVg + dQsub0_dVg) + Cgg1;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          Cgd = dQsub0_dVd + Cgd1;
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          Cgd = (dQac0_dVd + dQsub0_dVd) + Cgd1;
+
+      }
+
+      Cgb = (dQac0_dVb + dQsub0_dVb) + Cgb1;
+      Cbg = (Cbg1 - dQac0_dVg) - dQsub0_dVg;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          Cbd = Cbd1 - dQsub0_dVd;
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          Cbd = (Cbd1 - dQac0_dVd) - dQsub0_dVd;
+
+      }
+
+      Cbb = (Cbb1 - dQac0_dVb) - dQsub0_dVb;
+      Cgb *= dVbseff_dVb;
+      Cbb *= dVbseff_dVb;
+      Csb *= dVbseff_dVb;
+      {
+        heres[0]->BSIM3v32cggb = Cgg[0];
+        heres[1]->BSIM3v32cggb = Cgg[1];
+        heres[2]->BSIM3v32cggb = Cgg[2];
+        heres[3]->BSIM3v32cggb = Cgg[3];
+        heres[4]->BSIM3v32cggb = Cgg[4];
+        heres[5]->BSIM3v32cggb = Cgg[5];
+        heres[6]->BSIM3v32cggb = Cgg[6];
+        heres[7]->BSIM3v32cggb = Cgg[7];
+      }
+      {
+        Vec8d val = -((Cgg + Cgd) + Cgb);
+        heres[0]->BSIM3v32cgsb = val[0];
+        heres[1]->BSIM3v32cgsb = val[1];
+        heres[2]->BSIM3v32cgsb = val[2];
+        heres[3]->BSIM3v32cgsb = val[3];
+        heres[4]->BSIM3v32cgsb = val[4];
+        heres[5]->BSIM3v32cgsb = val[5];
+        heres[6]->BSIM3v32cgsb = val[6];
+        heres[7]->BSIM3v32cgsb = val[7];
+      }
+      {
+        heres[0]->BSIM3v32cgdb = Cgd[0];
+        heres[1]->BSIM3v32cgdb = Cgd[1];
+        heres[2]->BSIM3v32cgdb = Cgd[2];
+        heres[3]->BSIM3v32cgdb = Cgd[3];
+        heres[4]->BSIM3v32cgdb = Cgd[4];
+        heres[5]->BSIM3v32cgdb = Cgd[5];
+        heres[6]->BSIM3v32cgdb = Cgd[6];
+        heres[7]->BSIM3v32cgdb = Cgd[7];
+      }
+      {
+        Vec8d val = -((Cgg + Cbg) + Csg);
+        heres[0]->BSIM3v32cdgb = val[0];
+        heres[1]->BSIM3v32cdgb = val[1];
+        heres[2]->BSIM3v32cdgb = val[2];
+        heres[3]->BSIM3v32cdgb = val[3];
+        heres[4]->BSIM3v32cdgb = val[4];
+        heres[5]->BSIM3v32cdgb = val[5];
+        heres[6]->BSIM3v32cdgb = val[6];
+        heres[7]->BSIM3v32cdgb = val[7];
+      }
+      {
+        Vec8d val = (((((((Cgg + Cgd) + Cgb) + Cbg) + Cbd) + Cbb) + Csg) + Csd) + Csb;
+        heres[0]->BSIM3v32cdsb = val[0];
+        heres[1]->BSIM3v32cdsb = val[1];
+        heres[2]->BSIM3v32cdsb = val[2];
+        heres[3]->BSIM3v32cdsb = val[3];
+        heres[4]->BSIM3v32cdsb = val[4];
+        heres[5]->BSIM3v32cdsb = val[5];
+        heres[6]->BSIM3v32cdsb = val[6];
+        heres[7]->BSIM3v32cdsb = val[7];
+      }
+      {
+        Vec8d val = -((Cgd + Cbd) + Csd);
+        heres[0]->BSIM3v32cddb = val[0];
+        heres[1]->BSIM3v32cddb = val[1];
+        heres[2]->BSIM3v32cddb = val[2];
+        heres[3]->BSIM3v32cddb = val[3];
+        heres[4]->BSIM3v32cddb = val[4];
+        heres[5]->BSIM3v32cddb = val[5];
+        heres[6]->BSIM3v32cddb = val[6];
+        heres[7]->BSIM3v32cddb = val[7];
+      }
+      {
+        heres[0]->BSIM3v32cbgb = Cbg[0];
+        heres[1]->BSIM3v32cbgb = Cbg[1];
+        heres[2]->BSIM3v32cbgb = Cbg[2];
+        heres[3]->BSIM3v32cbgb = Cbg[3];
+        heres[4]->BSIM3v32cbgb = Cbg[4];
+        heres[5]->BSIM3v32cbgb = Cbg[5];
+        heres[6]->BSIM3v32cbgb = Cbg[6];
+        heres[7]->BSIM3v32cbgb = Cbg[7];
+      }
+      {
+        Vec8d val = -((Cbg + Cbd) + Cbb);
+        heres[0]->BSIM3v32cbsb = val[0];
+        heres[1]->BSIM3v32cbsb = val[1];
+        heres[2]->BSIM3v32cbsb = val[2];
+        heres[3]->BSIM3v32cbsb = val[3];
+        heres[4]->BSIM3v32cbsb = val[4];
+        heres[5]->BSIM3v32cbsb = val[5];
+        heres[6]->BSIM3v32cbsb = val[6];
+        heres[7]->BSIM3v32cbsb = val[7];
+      }
+      {
+        heres[0]->BSIM3v32cbdb = Cbd[0];
+        heres[1]->BSIM3v32cbdb = Cbd[1];
+        heres[2]->BSIM3v32cbdb = Cbd[2];
+        heres[3]->BSIM3v32cbdb = Cbd[3];
+        heres[4]->BSIM3v32cbdb = Cbd[4];
+        heres[5]->BSIM3v32cbdb = Cbd[5];
+        heres[6]->BSIM3v32cbdb = Cbd[6];
+        heres[7]->BSIM3v32cbdb = Cbd[7];
+      }
+      {
+        heres[0]->BSIM3v32qinv = qinoi[0];
+        heres[1]->BSIM3v32qinv = qinoi[1];
+        heres[2]->BSIM3v32qinv = qinoi[2];
+        heres[3]->BSIM3v32qinv = qinoi[3];
+        heres[4]->BSIM3v32qinv = qinoi[4];
+        heres[5]->BSIM3v32qinv = qinoi[5];
+        heres[6]->BSIM3v32qinv = qinoi[6];
+        heres[7]->BSIM3v32qinv = qinoi[7];
+      }
+    }
+    else
+      if (model->BSIM3v32capMod == 3)
+    {
+      V3 = ((((Vec8d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb, heres[4]->BSIM3v32vfbzb, heres[5]->BSIM3v32vfbzb, heres[6]->BSIM3v32vfbzb, heres[7]->BSIM3v32vfbzb}) - Vgs_eff) + VbseffCV) - DELTA_3;
+      T0 = V3 * V3;
+      T2 = (4.0 * DELTA_3) * ((Vec8d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb, heres[4]->BSIM3v32vfbzb, heres[5]->BSIM3v32vfbzb, heres[6]->BSIM3v32vfbzb, heres[7]->BSIM3v32vfbzb});
+      if (1)
+      {
+        Vec8m condmask0 = ((Vec8d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb, heres[4]->BSIM3v32vfbzb, heres[5]->BSIM3v32vfbzb, heres[6]->BSIM3v32vfbzb, heres[7]->BSIM3v32vfbzb}) <= 0.0;
+        Vec8m condmask_true0 = condmask0;
+        Vec8m condmask_false0 = ~condmask0;
+        {
+          T0 = vec8_blend(T0, T0 - T2, condmask_true0);
+          T2 = vec8_blend(T2, vec8_SIMDTOVECTOR(-DELTA_3), condmask_true0);
+        }
+        {
+          T0 = vec8_blend(T0, T0 + T2, condmask_false0);
+          T2 = vec8_blend(T2, vec8_SIMDTOVECTOR(DELTA_3), condmask_false0);
+        }
+      }
+
+      T0 = vec8_sqrt(T0);
+      T2 = T2 / T0;
+      T1 = 0.5 * (1.0 + (V3 / T0));
+      Vfbeff = ((Vec8d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb, heres[4]->BSIM3v32vfbzb, heres[5]->BSIM3v32vfbzb, heres[6]->BSIM3v32vfbzb, heres[7]->BSIM3v32vfbzb}) - (0.5 * (V3 + T0));
+      dVfbeff_dVg = T1 * dVgs_eff_dVg;
+      dVfbeff_dVb = (-T1) * dVbseffCV_dVb;
+      Cox = model->BSIM3v32cox;
+      Tox = 1.0e8 * model->BSIM3v32tox;
+      T0 = ((Vgs_eff - VbseffCV) - ((Vec8d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb, heres[4]->BSIM3v32vfbzb, heres[5]->BSIM3v32vfbzb, heres[6]->BSIM3v32vfbzb, heres[7]->BSIM3v32vfbzb})) / Tox;
+      dT0_dVg = dVgs_eff_dVg / Tox;
+      dT0_dVb = (-dVbseffCV_dVb) / Tox;
+      tmp = T0 * pParam->BSIM3v32acde;
+      dTcen_dVg = (dTcen_dVb = vec8_SIMDTOVECTOR(0.0));
+      if (1)
+      {
+        Vec8m condmask0 = ((-EXP_THRESHOLD) < tmp) & (tmp < EXP_THRESHOLD);
+        Vec8m condmask_true0 = condmask0;
+        Vec8m condmask_false0 = ~condmask0;
+        {
+          Tcen = vec8_blend(Tcen, pParam->BSIM3v32ldeb * vec8_exp(tmp), condmask_true0);
+          dTcen_dVg = vec8_blend(dTcen_dVg, pParam->BSIM3v32acde * Tcen, condmask_true0);
+          dTcen_dVb = vec8_blend(dTcen_dVb, dTcen_dVg * dT0_dVb, condmask_true0);
+          dTcen_dVg = vec8_blend(dTcen_dVg, dTcen_dVg * dT0_dVg, condmask_true0);
+        }
+        if (1)
+        {
+          Vec8m condmask1 = tmp <= (-EXP_THRESHOLD);
+          Vec8m condmask_true1 = condmask_false0 & condmask1;
+          Vec8m condmask_false1 = condmask_false0 & (~condmask1);
+          {
+            Tcen = vec8_blend(Tcen, vec8_SIMDTOVECTOR(pParam->BSIM3v32ldeb * MIN_EXP), condmask_true1);
+          }
+          {
+            Tcen = vec8_blend(Tcen, vec8_SIMDTOVECTOR(pParam->BSIM3v32ldeb * MAX_EXP), condmask_false1);
+          }
+        }
+
+      }
+
+      LINK = 1.0e-3 * model->BSIM3v32tox;
+      V3 = (pParam->BSIM3v32ldeb - Tcen) - LINK;
+      V4 = vec8_sqrt((V3 * V3) + ((4.0 * LINK) * pParam->BSIM3v32ldeb));
+      Tcen = pParam->BSIM3v32ldeb - (0.5 * (V3 + V4));
+      T1 = 0.5 * (1.0 + (V3 / V4));
+      dTcen_dVg *= T1;
+      dTcen_dVb *= T1;
+      Ccen = EPSSI / Tcen;
+      T2 = Cox / (Cox + Ccen);
+      Coxeff = T2 * Ccen;
+      T3 = (-Ccen) / Tcen;
+      dCoxeff_dVg = (T2 * T2) * T3;
+      dCoxeff_dVb = dCoxeff_dVg * dTcen_dVb;
+      dCoxeff_dVg *= dTcen_dVg;
+      CoxWLcen = (CoxWL * Coxeff) / Cox;
+      Qac0 = CoxWLcen * (Vfbeff - ((Vec8d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb, heres[4]->BSIM3v32vfbzb, heres[5]->BSIM3v32vfbzb, heres[6]->BSIM3v32vfbzb, heres[7]->BSIM3v32vfbzb}));
+      QovCox = Qac0 / Coxeff;
+      dQac0_dVg = (CoxWLcen * dVfbeff_dVg) + (QovCox * dCoxeff_dVg);
+      dQac0_dVb = (CoxWLcen * dVfbeff_dVb) + (QovCox * dCoxeff_dVb);
+      T0 = vec8_SIMDTOVECTOR(0.5 * pParam->BSIM3v32k1ox);
+      T3 = ((Vgs_eff - Vfbeff) - VbseffCV) - Vgsteff;
+      if (pParam->BSIM3v32k1ox == 0.0)
+      {
+        T1 = vec8_SIMDTOVECTOR(0.0);
+        T2 = vec8_SIMDTOVECTOR(0.0);
+      }
+      else
+        if (1)
+      {
+        Vec8m condmask0 = T3 < 0.0;
+        Vec8m condmask_true0 = condmask0;
+        Vec8m condmask_false0 = ~condmask0;
+        {
+          T1 = vec8_blend(T1, T0 + (T3 / pParam->BSIM3v32k1ox), condmask_true0);
+          T2 = vec8_blend(T2, CoxWLcen, condmask_true0);
+        }
+        {
+          T1 = vec8_blend(T1, vec8_sqrt((T0 * T0) + T3), condmask_false0);
+          T2 = vec8_blend(T2, (CoxWLcen * T0) / T1, condmask_false0);
+        }
+      }
+
+
+      Qsub0 = (CoxWLcen * pParam->BSIM3v32k1ox) * (T1 - T0);
+      QovCox = Qsub0 / Coxeff;
+      dQsub0_dVg = (T2 * ((dVgs_eff_dVg - dVfbeff_dVg) - dVgsteff_dVg)) + (QovCox * dCoxeff_dVg);
+      dQsub0_dVd = (-T2) * dVgsteff_dVd;
+      dQsub0_dVb = ((-T2) * ((dVfbeff_dVb + dVbseffCV_dVb) + dVgsteff_dVb)) + (QovCox * dCoxeff_dVb);
+      if (pParam->BSIM3v32k1ox <= 0.0)
+      {
+        Denomi = vec8_SIMDTOVECTOR((0.25 * pParam->BSIM3v32moin) * Vtm);
+        T0 = vec8_SIMDTOVECTOR(0.5 * pParam->BSIM3v32sqrtPhi);
+      }
+      else
+      {
+        Denomi = vec8_SIMDTOVECTOR(((pParam->BSIM3v32moin * Vtm) * pParam->BSIM3v32k1ox) * pParam->BSIM3v32k1ox);
+        T0 = vec8_SIMDTOVECTOR(pParam->BSIM3v32k1ox * pParam->BSIM3v32sqrtPhi);
+      }
+
+      T1 = (2.0 * T0) + Vgsteff;
+      DeltaPhi = Vtm * vec8_log(1.0 + ((T1 * Vgsteff) / Denomi));
+      dDeltaPhi_dVg = ((2.0 * Vtm) * (T1 - T0)) / (Denomi + (T1 * Vgsteff));
+      dDeltaPhi_dVd = dDeltaPhi_dVg * dVgsteff_dVd;
+      dDeltaPhi_dVb = dDeltaPhi_dVg * dVgsteff_dVb;
+      T3 = 4.0 * ((Vth - ((Vec8d ){heres[0]->BSIM3v32vfbzb, heres[1]->BSIM3v32vfbzb, heres[2]->BSIM3v32vfbzb, heres[3]->BSIM3v32vfbzb, heres[4]->BSIM3v32vfbzb, heres[5]->BSIM3v32vfbzb, heres[6]->BSIM3v32vfbzb, heres[7]->BSIM3v32vfbzb})) - pParam->BSIM3v32phi);
+      Tox += Tox;
+      if (1)
+      {
+        Vec8m condmask0 = T3 >= 0.0;
+        Vec8m condmask_true0 = condmask0;
+        Vec8m condmask_false0 = ~condmask0;
+        {
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+
+            case BSIM3v32V322:
+              T0 = vec8_blend(T0, (Vgsteff + T3) / Tox, condmask_true0);
+              dT0_dVd = vec8_blend(dT0_dVd, (dVgsteff_dVd + (4.0 * dVth_dVd)) / Tox, condmask_true0);
+              dT0_dVb = vec8_blend(dT0_dVb, (dVgsteff_dVb + (4.0 * dVth_dVb)) / Tox, condmask_true0);
+              break;
+
+            case BSIM3v32V32:
+
+            default:
+              T0 = vec8_blend(T0, (Vgsteff + T3) / Tox, condmask_true0);
+
+          }
+
+        }
+        {
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+
+            case BSIM3v32V322:
+              T0 = vec8_blend(T0, (Vgsteff + 1.0e-20) / Tox, condmask_false0);
+              dT0_dVd = vec8_blend(dT0_dVd, dVgsteff_dVd / Tox, condmask_false0);
+              dT0_dVb = vec8_blend(dT0_dVb, dVgsteff_dVb / Tox, condmask_false0);
+              break;
+
+            case BSIM3v32V32:
+
+            default:
+              T0 = vec8_blend(T0, (Vgsteff + 1.0e-20) / Tox, condmask_false0);
+
+          }
+
+        }
+      }
+
+      tmp = vec8_exp(0.7 * vec8_log(T0));
+      T1 = 1.0 + tmp;
+      T2 = (0.7 * tmp) / (T0 * Tox);
+      Tcen = 1.9e-9 / T1;
+      dTcen_dVg = (((-1.9e-9) * T2) / T1) / T1;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          dTcen_dVd = Tox * dTcen_dVg;
+          dTcen_dVb = dTcen_dVd * dT0_dVb;
+          dTcen_dVd *= dT0_dVd;
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          dTcen_dVd = dTcen_dVg * ((4.0 * dVth_dVd) + dVgsteff_dVd);
+          dTcen_dVb = dTcen_dVg * ((4.0 * dVth_dVb) + dVgsteff_dVb);
+
+      }
+
+      dTcen_dVg *= dVgsteff_dVg;
+      Ccen = EPSSI / Tcen;
+      T0 = Cox / (Cox + Ccen);
+      Coxeff = T0 * Ccen;
+      T1 = (-Ccen) / Tcen;
+      dCoxeff_dVg = (T0 * T0) * T1;
+      dCoxeff_dVd = dCoxeff_dVg * dTcen_dVd;
+      dCoxeff_dVb = dCoxeff_dVg * dTcen_dVb;
+      dCoxeff_dVg *= dTcen_dVg;
+      CoxWLcen = (CoxWL * Coxeff) / Cox;
+      AbulkCV = Abulk0 * pParam->BSIM3v32abulkCVfactor;
+      dAbulkCV_dVb = pParam->BSIM3v32abulkCVfactor * dAbulk0_dVb;
+      VdsatCV = (Vgsteff - DeltaPhi) / AbulkCV;
+      V4 = (VdsatCV - Vds) - DELTA_4;
+      T0 = vec8_sqrt((V4 * V4) + ((4.0 * DELTA_4) * VdsatCV));
+      VdseffCV = VdsatCV - (0.5 * (V4 + T0));
+      T1 = 0.5 * (1.0 + (V4 / T0));
+      T2 = DELTA_4 / T0;
+      T3 = ((1.0 - T1) - T2) / AbulkCV;
+      T4 = T3 * (1.0 - dDeltaPhi_dVg);
+      dVdseffCV_dVg = T4;
+      dVdseffCV_dVd = T1;
+      dVdseffCV_dVb = ((-T3) * VdsatCV) * dAbulkCV_dVb;
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+
+        case BSIM3v32V322:
+          if (1)
+        {
+          Vec8m condmask0 = Vds == 0.0;
+          Vec8m condmask_true0 = condmask0;
+          {
+            VdseffCV = vec8_blend(VdseffCV, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+            dVdseffCV_dVg = vec8_blend(dVdseffCV_dVg, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+            dVdseffCV_dVb = vec8_blend(dVdseffCV_dVb, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+          }
+        }
+
+          break;
+
+        case BSIM3v32V32:
+
+        default:
+          break;
+
+      }
+
+      T0 = AbulkCV * VdseffCV;
+      T1 = Vgsteff - DeltaPhi;
+      T2 = 12.0 * ((T1 - (0.5 * T0)) + 1.0e-20);
+      T3 = T0 / T2;
+      T4 = 1.0 - ((12.0 * T3) * T3);
+      T5 = AbulkCV * ((((6.0 * T0) * ((4.0 * T1) - T0)) / (T2 * T2)) - 0.5);
+      T6 = (T5 * VdseffCV) / AbulkCV;
+      qgate = (qinoi = CoxWLcen * (T1 - (T0 * (0.5 - T3))));
+      QovCox = qgate / Coxeff;
+      Cgg1 = CoxWLcen * ((T4 * (1.0 - dDeltaPhi_dVg)) + (T5 * dVdseffCV_dVg));
+      Cgd1 = (((CoxWLcen * T5) * dVdseffCV_dVd) + (Cgg1 * dVgsteff_dVd)) + (QovCox * dCoxeff_dVd);
+      Cgb1 = ((CoxWLcen * ((T5 * dVdseffCV_dVb) + (T6 * dAbulkCV_dVb))) + (Cgg1 * dVgsteff_dVb)) + (QovCox * dCoxeff_dVb);
+      Cgg1 = (Cgg1 * dVgsteff_dVg) + (QovCox * dCoxeff_dVg);
+      T7 = 1.0 - AbulkCV;
+      T8 = T2 * T2;
+      T9 = (((12.0 * T7) * T0) * T0) / (T8 * AbulkCV);
+      T10 = T9 * (1.0 - dDeltaPhi_dVg);
+      T11 = ((-T7) * T5) / AbulkCV;
+      T12 = -(((T9 * T1) / AbulkCV) + (VdseffCV * (0.5 - (T0 / T2))));
+      qbulk = (CoxWLcen * T7) * ((0.5 * VdseffCV) - ((T0 * VdseffCV) / T2));
+      QovCox = qbulk / Coxeff;
+      Cbg1 = CoxWLcen * (T10 + (T11 * dVdseffCV_dVg));
+      Cbd1 = (((CoxWLcen * T11) * dVdseffCV_dVd) + (Cbg1 * dVgsteff_dVd)) + (QovCox * dCoxeff_dVd);
+      Cbb1 = ((CoxWLcen * ((T11 * dVdseffCV_dVb) + (T12 * dAbulkCV_dVb))) + (Cbg1 * dVgsteff_dVb)) + (QovCox * dCoxeff_dVb);
+      Cbg1 = (Cbg1 * dVgsteff_dVg) + (QovCox * dCoxeff_dVg);
+      if (model->BSIM3v32xpart > 0.5)
+      {
+        qsrc = (-CoxWLcen) * (((T1 / 2.0) + (T0 / 4.0)) - (((0.5 * T0) * T0) / T2));
+        QovCox = qsrc / Coxeff;
+        T2 += T2;
+        T3 = T2 * T2;
+        T7 = -(0.25 - (((12.0 * T0) * ((4.0 * T1) - T0)) / T3));
+        T4 = (-(0.5 + (((24.0 * T0) * T0) / T3))) * (1.0 - dDeltaPhi_dVg);
+        T5 = T7 * AbulkCV;
+        T6 = T7 * VdseffCV;
+        Csg = CoxWLcen * (T4 + (T5 * dVdseffCV_dVg));
+        Csd = (((CoxWLcen * T5) * dVdseffCV_dVd) + (Csg * dVgsteff_dVd)) + (QovCox * dCoxeff_dVd);
+        Csb = ((CoxWLcen * ((T5 * dVdseffCV_dVb) + (T6 * dAbulkCV_dVb))) + (Csg * dVgsteff_dVb)) + (QovCox * dCoxeff_dVb);
+        Csg = (Csg * dVgsteff_dVg) + (QovCox * dCoxeff_dVg);
+      }
+      else
+        if (model->BSIM3v32xpart < 0.5)
+      {
+        T2 = T2 / 12.0;
+        T3 = (0.5 * CoxWLcen) / (T2 * T2);
+        T4 = (T1 * ((((2.0 * T0) * T0) / 3.0) + (T1 * (T1 - ((4.0 * T0) / 3.0))))) - ((((2.0 * T0) * T0) * T0) / 15.0);
+        qsrc = (-T3) * T4;
+        QovCox = qsrc / Coxeff;
+        T8 = (((4.0 / 3.0) * T1) * (T1 - T0)) + ((0.4 * T0) * T0);
+        T5 = (((-2.0) * qsrc) / T2) - (T3 * ((T1 * ((3.0 * T1) - ((8.0 * T0) / 3.0))) + (((2.0 * T0) * T0) / 3.0)));
+        T6 = AbulkCV * ((qsrc / T2) + (T3 * T8));
+        T7 = (T6 * VdseffCV) / AbulkCV;
+        Csg = (T5 * (1.0 - dDeltaPhi_dVg)) + (T6 * dVdseffCV_dVg);
+        Csd = ((Csg * dVgsteff_dVd) + (T6 * dVdseffCV_dVd)) + (QovCox * dCoxeff_dVd);
+        Csb = (((Csg * dVgsteff_dVb) + (T6 * dVdseffCV_dVb)) + (T7 * dAbulkCV_dVb)) + (QovCox * dCoxeff_dVb);
+        Csg = (Csg * dVgsteff_dVg) + (QovCox * dCoxeff_dVg);
+      }
+      else
+      {
+        qsrc = (-0.5) * qgate;
+        Csg = (-0.5) * Cgg1;
+        Csd = (-0.5) * Cgd1;
+        Csb = (-0.5) * Cgb1;
+      }
+
+
+      qgate += (Qac0 + Qsub0) - qbulk;
+      qbulk -= Qac0 + Qsub0;
+      qdrn = -((qgate + qbulk) + qsrc);
+      Cbg = (Cbg1 - dQac0_dVg) - dQsub0_dVg;
+      Cbd = Cbd1 - dQsub0_dVd;
+      Cbb = (Cbb1 - dQac0_dVb) - dQsub0_dVb;
+      Cgg = Cgg1 - Cbg;
+      Cgd = Cgd1 - Cbd;
+      Cgb = Cgb1 - Cbb;
+      Cgb *= dVbseff_dVb;
+      Cbb *= dVbseff_dVb;
+      Csb *= dVbseff_dVb;
+      {
+        heres[0]->BSIM3v32cggb = Cgg[0];
+        heres[1]->BSIM3v32cggb = Cgg[1];
+        heres[2]->BSIM3v32cggb = Cgg[2];
+        heres[3]->BSIM3v32cggb = Cgg[3];
+        heres[4]->BSIM3v32cggb = Cgg[4];
+        heres[5]->BSIM3v32cggb = Cgg[5];
+        heres[6]->BSIM3v32cggb = Cgg[6];
+        heres[7]->BSIM3v32cggb = Cgg[7];
+      }
+      {
+        Vec8d val = -((Cgg + Cgd) + Cgb);
+        heres[0]->BSIM3v32cgsb = val[0];
+        heres[1]->BSIM3v32cgsb = val[1];
+        heres[2]->BSIM3v32cgsb = val[2];
+        heres[3]->BSIM3v32cgsb = val[3];
+        heres[4]->BSIM3v32cgsb = val[4];
+        heres[5]->BSIM3v32cgsb = val[5];
+        heres[6]->BSIM3v32cgsb = val[6];
+        heres[7]->BSIM3v32cgsb = val[7];
+      }
+      {
+        heres[0]->BSIM3v32cgdb = Cgd[0];
+        heres[1]->BSIM3v32cgdb = Cgd[1];
+        heres[2]->BSIM3v32cgdb = Cgd[2];
+        heres[3]->BSIM3v32cgdb = Cgd[3];
+        heres[4]->BSIM3v32cgdb = Cgd[4];
+        heres[5]->BSIM3v32cgdb = Cgd[5];
+        heres[6]->BSIM3v32cgdb = Cgd[6];
+        heres[7]->BSIM3v32cgdb = Cgd[7];
+      }
+      {
+        Vec8d val = -((Cgg + Cbg) + Csg);
+        heres[0]->BSIM3v32cdgb = val[0];
+        heres[1]->BSIM3v32cdgb = val[1];
+        heres[2]->BSIM3v32cdgb = val[2];
+        heres[3]->BSIM3v32cdgb = val[3];
+        heres[4]->BSIM3v32cdgb = val[4];
+        heres[5]->BSIM3v32cdgb = val[5];
+        heres[6]->BSIM3v32cdgb = val[6];
+        heres[7]->BSIM3v32cdgb = val[7];
+      }
+      {
+        Vec8d val = (((((((Cgg + Cgd) + Cgb) + Cbg) + Cbd) + Cbb) + Csg) + Csd) + Csb;
+        heres[0]->BSIM3v32cdsb = val[0];
+        heres[1]->BSIM3v32cdsb = val[1];
+        heres[2]->BSIM3v32cdsb = val[2];
+        heres[3]->BSIM3v32cdsb = val[3];
+        heres[4]->BSIM3v32cdsb = val[4];
+        heres[5]->BSIM3v32cdsb = val[5];
+        heres[6]->BSIM3v32cdsb = val[6];
+        heres[7]->BSIM3v32cdsb = val[7];
+      }
+      {
+        Vec8d val = -((Cgd + Cbd) + Csd);
+        heres[0]->BSIM3v32cddb = val[0];
+        heres[1]->BSIM3v32cddb = val[1];
+        heres[2]->BSIM3v32cddb = val[2];
+        heres[3]->BSIM3v32cddb = val[3];
+        heres[4]->BSIM3v32cddb = val[4];
+        heres[5]->BSIM3v32cddb = val[5];
+        heres[6]->BSIM3v32cddb = val[6];
+        heres[7]->BSIM3v32cddb = val[7];
+      }
+      {
+        heres[0]->BSIM3v32cbgb = Cbg[0];
+        heres[1]->BSIM3v32cbgb = Cbg[1];
+        heres[2]->BSIM3v32cbgb = Cbg[2];
+        heres[3]->BSIM3v32cbgb = Cbg[3];
+        heres[4]->BSIM3v32cbgb = Cbg[4];
+        heres[5]->BSIM3v32cbgb = Cbg[5];
+        heres[6]->BSIM3v32cbgb = Cbg[6];
+        heres[7]->BSIM3v32cbgb = Cbg[7];
+      }
+      {
+        Vec8d val = -((Cbg + Cbd) + Cbb);
+        heres[0]->BSIM3v32cbsb = val[0];
+        heres[1]->BSIM3v32cbsb = val[1];
+        heres[2]->BSIM3v32cbsb = val[2];
+        heres[3]->BSIM3v32cbsb = val[3];
+        heres[4]->BSIM3v32cbsb = val[4];
+        heres[5]->BSIM3v32cbsb = val[5];
+        heres[6]->BSIM3v32cbsb = val[6];
+        heres[7]->BSIM3v32cbsb = val[7];
+      }
+      {
+        heres[0]->BSIM3v32cbdb = Cbd[0];
+        heres[1]->BSIM3v32cbdb = Cbd[1];
+        heres[2]->BSIM3v32cbdb = Cbd[2];
+        heres[3]->BSIM3v32cbdb = Cbd[3];
+        heres[4]->BSIM3v32cbdb = Cbd[4];
+        heres[5]->BSIM3v32cbdb = Cbd[5];
+        heres[6]->BSIM3v32cbdb = Cbd[6];
+        heres[7]->BSIM3v32cbdb = Cbd[7];
+      }
+      {
+        Vec8d val = -qinoi;
+        heres[0]->BSIM3v32qinv = val[0];
+        heres[1]->BSIM3v32qinv = val[1];
+        heres[2]->BSIM3v32qinv = val[2];
+        heres[3]->BSIM3v32qinv = val[3];
+        heres[4]->BSIM3v32qinv = val[4];
+        heres[5]->BSIM3v32qinv = val[5];
+        heres[6]->BSIM3v32qinv = val[6];
+        heres[7]->BSIM3v32qinv = val[7];
+      }
+    }
+
+
+
+  }
+
+
+  finished:
+  {
+    heres[0]->BSIM3v32qgate = qgate[0];
+    heres[1]->BSIM3v32qgate = qgate[1];
+    heres[2]->BSIM3v32qgate = qgate[2];
+    heres[3]->BSIM3v32qgate = qgate[3];
+    heres[4]->BSIM3v32qgate = qgate[4];
+    heres[5]->BSIM3v32qgate = qgate[5];
+    heres[6]->BSIM3v32qgate = qgate[6];
+    heres[7]->BSIM3v32qgate = qgate[7];
+  }
+
+  {
+    heres[0]->BSIM3v32qbulk = qbulk[0];
+    heres[1]->BSIM3v32qbulk = qbulk[1];
+    heres[2]->BSIM3v32qbulk = qbulk[2];
+    heres[3]->BSIM3v32qbulk = qbulk[3];
+    heres[4]->BSIM3v32qbulk = qbulk[4];
+    heres[5]->BSIM3v32qbulk = qbulk[5];
+    heres[6]->BSIM3v32qbulk = qbulk[6];
+    heres[7]->BSIM3v32qbulk = qbulk[7];
+  }
+  {
+    heres[0]->BSIM3v32qdrn = qdrn[0];
+    heres[1]->BSIM3v32qdrn = qdrn[1];
+    heres[2]->BSIM3v32qdrn = qdrn[2];
+    heres[3]->BSIM3v32qdrn = qdrn[3];
+    heres[4]->BSIM3v32qdrn = qdrn[4];
+    heres[5]->BSIM3v32qdrn = qdrn[5];
+    heres[6]->BSIM3v32qdrn = qdrn[6];
+    heres[7]->BSIM3v32qdrn = qdrn[7];
+  }
+  {
+    heres[0]->BSIM3v32cd = cdrain[0];
+    heres[1]->BSIM3v32cd = cdrain[1];
+    heres[2]->BSIM3v32cd = cdrain[2];
+    heres[3]->BSIM3v32cd = cdrain[3];
+    heres[4]->BSIM3v32cd = cdrain[4];
+    heres[5]->BSIM3v32cd = cdrain[5];
+    heres[6]->BSIM3v32cd = cdrain[6];
+    heres[7]->BSIM3v32cd = cdrain[7];
+  }
+  if (ChargeComputationNeeded)
+  {
+    Vec8d nstate_qbs = vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32qbs, heres[1]->BSIM3v32qbs, heres[2]->BSIM3v32qbs, heres[3]->BSIM3v32qbs, heres[4]->BSIM3v32qbs, heres[5]->BSIM3v32qbs, heres[6]->BSIM3v32qbs, heres[7]->BSIM3v32qbs});
+    Vec8d nstate_qbd = vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32qbd, heres[1]->BSIM3v32qbd, heres[2]->BSIM3v32qbd, heres[3]->BSIM3v32qbd, heres[4]->BSIM3v32qbd, heres[5]->BSIM3v32qbd, heres[6]->BSIM3v32qbd, heres[7]->BSIM3v32qbd});
+    if (model->BSIM3v32acmMod == 0)
+    {
+      switch (model->BSIM3v32intVersion)
+      {
+        case BSIM3v32V324:
+
+        case BSIM3v32V323:
+          czbd = model->BSIM3v32unitAreaTempJctCap * ((Vec8d ){heres[0]->BSIM3v32drainArea, heres[1]->BSIM3v32drainArea, heres[2]->BSIM3v32drainArea, heres[3]->BSIM3v32drainArea, heres[4]->BSIM3v32drainArea, heres[5]->BSIM3v32drainArea, heres[6]->BSIM3v32drainArea, heres[7]->BSIM3v32drainArea});
+          czbs = model->BSIM3v32unitAreaTempJctCap * ((Vec8d ){heres[0]->BSIM3v32sourceArea, heres[1]->BSIM3v32sourceArea, heres[2]->BSIM3v32sourceArea, heres[3]->BSIM3v32sourceArea, heres[4]->BSIM3v32sourceArea, heres[5]->BSIM3v32sourceArea, heres[6]->BSIM3v32sourceArea, heres[7]->BSIM3v32sourceArea});
+          break;
+
+        case BSIM3v32V322:
+
+        case BSIM3v32V32:
+
+        default:
+          czbd = model->BSIM3v32unitAreaJctCap * ((Vec8d ){heres[0]->BSIM3v32drainArea, heres[1]->BSIM3v32drainArea, heres[2]->BSIM3v32drainArea, heres[3]->BSIM3v32drainArea, heres[4]->BSIM3v32drainArea, heres[5]->BSIM3v32drainArea, heres[6]->BSIM3v32drainArea, heres[7]->BSIM3v32drainArea});
+          czbs = model->BSIM3v32unitAreaJctCap * ((Vec8d ){heres[0]->BSIM3v32sourceArea, heres[1]->BSIM3v32sourceArea, heres[2]->BSIM3v32sourceArea, heres[3]->BSIM3v32sourceArea, heres[4]->BSIM3v32sourceArea, heres[5]->BSIM3v32sourceArea, heres[6]->BSIM3v32sourceArea, heres[7]->BSIM3v32sourceArea});
+
+      }
+
+      if (1)
+      {
+        Vec8m condmask0 = ((Vec8d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter, heres[4]->BSIM3v32drainPerimeter, heres[5]->BSIM3v32drainPerimeter, heres[6]->BSIM3v32drainPerimeter, heres[7]->BSIM3v32drainPerimeter}) < pParam->BSIM3v32weff;
+        Vec8m condmask_true0 = condmask0;
+        Vec8m condmask_false0 = ~condmask0;
+        {
+          czbdsw = vec8_blend(czbdsw, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+              czbdswg = vec8_blend(czbdswg, model->BSIM3v32unitLengthGateSidewallTempJctCap * ((Vec8d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter, heres[4]->BSIM3v32drainPerimeter, heres[5]->BSIM3v32drainPerimeter, heres[6]->BSIM3v32drainPerimeter, heres[7]->BSIM3v32drainPerimeter}), condmask_true0);
+              break;
+
+            case BSIM3v32V322:
+
+            case BSIM3v32V32:
+
+            default:
+              czbdswg = vec8_blend(czbdswg, model->BSIM3v32unitLengthGateSidewallJctCap * ((Vec8d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter, heres[4]->BSIM3v32drainPerimeter, heres[5]->BSIM3v32drainPerimeter, heres[6]->BSIM3v32drainPerimeter, heres[7]->BSIM3v32drainPerimeter}), condmask_true0);
+
+          }
+
+        }
+        {
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+              czbdsw = vec8_blend(czbdsw, model->BSIM3v32unitLengthSidewallTempJctCap * (((Vec8d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter, heres[4]->BSIM3v32drainPerimeter, heres[5]->BSIM3v32drainPerimeter, heres[6]->BSIM3v32drainPerimeter, heres[7]->BSIM3v32drainPerimeter}) - pParam->BSIM3v32weff), condmask_false0);
+              czbdswg = vec8_blend(czbdswg, vec8_SIMDTOVECTOR(model->BSIM3v32unitLengthGateSidewallTempJctCap * pParam->BSIM3v32weff), condmask_false0);
+              break;
+
+            case BSIM3v32V322:
+
+            case BSIM3v32V32:
+
+            default:
+              czbdsw = vec8_blend(czbdsw, model->BSIM3v32unitLengthSidewallJctCap * (((Vec8d ){heres[0]->BSIM3v32drainPerimeter, heres[1]->BSIM3v32drainPerimeter, heres[2]->BSIM3v32drainPerimeter, heres[3]->BSIM3v32drainPerimeter, heres[4]->BSIM3v32drainPerimeter, heres[5]->BSIM3v32drainPerimeter, heres[6]->BSIM3v32drainPerimeter, heres[7]->BSIM3v32drainPerimeter}) - pParam->BSIM3v32weff), condmask_false0);
+              czbdswg = vec8_blend(czbdswg, vec8_SIMDTOVECTOR(model->BSIM3v32unitLengthGateSidewallJctCap * pParam->BSIM3v32weff), condmask_false0);
+
+          }
+
+        }
+      }
+
+      if (1)
+      {
+        Vec8m condmask0 = ((Vec8d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter, heres[4]->BSIM3v32sourcePerimeter, heres[5]->BSIM3v32sourcePerimeter, heres[6]->BSIM3v32sourcePerimeter, heres[7]->BSIM3v32sourcePerimeter}) < pParam->BSIM3v32weff;
+        Vec8m condmask_true0 = condmask0;
+        Vec8m condmask_false0 = ~condmask0;
+        {
+          czbssw = vec8_blend(czbssw, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+              czbsswg = vec8_blend(czbsswg, model->BSIM3v32unitLengthGateSidewallTempJctCap * ((Vec8d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter, heres[4]->BSIM3v32sourcePerimeter, heres[5]->BSIM3v32sourcePerimeter, heres[6]->BSIM3v32sourcePerimeter, heres[7]->BSIM3v32sourcePerimeter}), condmask_true0);
+              break;
+
+            case BSIM3v32V322:
+
+            case BSIM3v32V32:
+
+            default:
+              czbsswg = vec8_blend(czbsswg, model->BSIM3v32unitLengthGateSidewallJctCap * ((Vec8d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter, heres[4]->BSIM3v32sourcePerimeter, heres[5]->BSIM3v32sourcePerimeter, heres[6]->BSIM3v32sourcePerimeter, heres[7]->BSIM3v32sourcePerimeter}), condmask_true0);
+
+          }
+
+        }
+        {
+          switch (model->BSIM3v32intVersion)
+          {
+            case BSIM3v32V324:
+
+            case BSIM3v32V323:
+              czbssw = vec8_blend(czbssw, model->BSIM3v32unitLengthSidewallTempJctCap * (((Vec8d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter, heres[4]->BSIM3v32sourcePerimeter, heres[5]->BSIM3v32sourcePerimeter, heres[6]->BSIM3v32sourcePerimeter, heres[7]->BSIM3v32sourcePerimeter}) - pParam->BSIM3v32weff), condmask_false0);
+              czbsswg = vec8_blend(czbsswg, vec8_SIMDTOVECTOR(model->BSIM3v32unitLengthGateSidewallTempJctCap * pParam->BSIM3v32weff), condmask_false0);
+              break;
+
+            case BSIM3v32V322:
+
+            case BSIM3v32V32:
+
+            default:
+              czbssw = vec8_blend(czbssw, model->BSIM3v32unitLengthSidewallJctCap * (((Vec8d ){heres[0]->BSIM3v32sourcePerimeter, heres[1]->BSIM3v32sourcePerimeter, heres[2]->BSIM3v32sourcePerimeter, heres[3]->BSIM3v32sourcePerimeter, heres[4]->BSIM3v32sourcePerimeter, heres[5]->BSIM3v32sourcePerimeter, heres[6]->BSIM3v32sourcePerimeter, heres[7]->BSIM3v32sourcePerimeter}) - pParam->BSIM3v32weff), condmask_false0);
+              czbsswg = vec8_blend(czbsswg, vec8_SIMDTOVECTOR(model->BSIM3v32unitLengthGateSidewallJctCap * pParam->BSIM3v32weff), condmask_false0);
+
+          }
+
+        }
+      }
+
+    }
+    else
+    {
+      error = vec8_BSIM3v32_ACM_junctionCapacitances(model, heres, &czbd, &czbdsw, &czbdswg, &czbs, &czbssw, &czbsswg);
+      if (SIMDANY(error))
+        return error;
+
+    }
+
+    MJ = model->BSIM3v32bulkJctBotGradingCoeff;
+    MJSW = model->BSIM3v32bulkJctSideGradingCoeff;
+    MJSWG = model->BSIM3v32bulkJctGateSideGradingCoeff;
+    if (1)
+    {
+      Vec8m condmask0 = vbs == 0.0;
+      Vec8m condmask_true0 = condmask0;
+      Vec8m condmask_false0 = ~condmask0;
+      {
+        nstate_qbs = vec8_blend(nstate_qbs, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+        {
+          Vec8d val = (czbs + czbssw) + czbsswg;
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32capbs = val[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32capbs = val[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32capbs = val[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32capbs = val[3];
+
+          if (condmask_true0[4])
+            heres[4]->BSIM3v32capbs = val[4];
+
+          if (condmask_true0[5])
+            heres[5]->BSIM3v32capbs = val[5];
+
+          if (condmask_true0[6])
+            heres[6]->BSIM3v32capbs = val[6];
+
+          if (condmask_true0[7])
+            heres[7]->BSIM3v32capbs = val[7];
+
+        }
+      }
+      if (1)
+      {
+        Vec8m condmask1 = vbs < 0.0;
+        Vec8m condmask_true1 = condmask_false0 & condmask1;
+        Vec8m condmask_false1 = condmask_false0 & (~condmask1);
+        {
+          if (1)
+          {
+            Vec8m condmask2 = czbs > 0.0;
+            Vec8m condmask_true2 = condmask_true1 & condmask2;
+            Vec8m condmask_false2 = condmask_true1 & (~condmask2);
+            {
+              arg = vec8_blend(arg, 1.0 - (vbs / model->BSIM3v32PhiB), condmask_true2);
+              if (MJ == 0.5)
+                sarg = vec8_blend(sarg, 1.0 / vec8_sqrt(arg), condmask_true2);
+              else
+                sarg = vec8_blend(sarg, vec8_exp((-MJ) * vec8_log(arg)), condmask_true2);
+
+              nstate_qbs = vec8_blend(nstate_qbs, ((model->BSIM3v32PhiB * czbs) * (1.0 - (arg * sarg))) / (1.0 - MJ), condmask_true2);
+              {
+                Vec8d val = czbs * sarg;
+                if (condmask_true2[0])
+                  heres[0]->BSIM3v32capbs = val[0];
+
+                if (condmask_true2[1])
+                  heres[1]->BSIM3v32capbs = val[1];
+
+                if (condmask_true2[2])
+                  heres[2]->BSIM3v32capbs = val[2];
+
+                if (condmask_true2[3])
+                  heres[3]->BSIM3v32capbs = val[3];
+
+                if (condmask_true2[4])
+                  heres[4]->BSIM3v32capbs = val[4];
+
+                if (condmask_true2[5])
+                  heres[5]->BSIM3v32capbs = val[5];
+
+                if (condmask_true2[6])
+                  heres[6]->BSIM3v32capbs = val[6];
+
+                if (condmask_true2[7])
+                  heres[7]->BSIM3v32capbs = val[7];
+
+              }
+            }
+            {
+              nstate_qbs = vec8_blend(nstate_qbs, vec8_SIMDTOVECTOR(0.0), condmask_false2);
+              {
+                if (condmask_false2[0])
+                  heres[0]->BSIM3v32capbs = 0.0;
+
+                if (condmask_false2[1])
+                  heres[1]->BSIM3v32capbs = 0.0;
+
+                if (condmask_false2[2])
+                  heres[2]->BSIM3v32capbs = 0.0;
+
+                if (condmask_false2[3])
+                  heres[3]->BSIM3v32capbs = 0.0;
+
+                if (condmask_false2[4])
+                  heres[4]->BSIM3v32capbs = 0.0;
+
+                if (condmask_false2[5])
+                  heres[5]->BSIM3v32capbs = 0.0;
+
+                if (condmask_false2[6])
+                  heres[6]->BSIM3v32capbs = 0.0;
+
+                if (condmask_false2[7])
+                  heres[7]->BSIM3v32capbs = 0.0;
+
+              }
+            }
+          }
+
+          if (1)
+          {
+            Vec8m condmask2 = czbssw > 0.0;
+            Vec8m condmask_true2 = condmask_true1 & condmask2;
+            {
+              arg = vec8_blend(arg, 1.0 - (vbs / model->BSIM3v32PhiBSW), condmask_true2);
+              if (MJSW == 0.5)
+                sarg = vec8_blend(sarg, 1.0 / vec8_sqrt(arg), condmask_true2);
+              else
+                sarg = vec8_blend(sarg, vec8_exp((-MJSW) * vec8_log(arg)), condmask_true2);
+
+              nstate_qbs = vec8_blend(nstate_qbs, nstate_qbs + (((model->BSIM3v32PhiBSW * czbssw) * (1.0 - (arg * sarg))) / (1.0 - MJSW)), condmask_true2);
+              {
+                Vec8d val = czbssw * sarg;
+                if (condmask_true2[0])
+                  heres[0]->BSIM3v32capbs += val[0];
+
+                if (condmask_true2[1])
+                  heres[1]->BSIM3v32capbs += val[1];
+
+                if (condmask_true2[2])
+                  heres[2]->BSIM3v32capbs += val[2];
+
+                if (condmask_true2[3])
+                  heres[3]->BSIM3v32capbs += val[3];
+
+                if (condmask_true2[4])
+                  heres[4]->BSIM3v32capbs += val[4];
+
+                if (condmask_true2[5])
+                  heres[5]->BSIM3v32capbs += val[5];
+
+                if (condmask_true2[6])
+                  heres[6]->BSIM3v32capbs += val[6];
+
+                if (condmask_true2[7])
+                  heres[7]->BSIM3v32capbs += val[7];
+
+              }
+            }
+          }
+
+          if (1)
+          {
+            Vec8m condmask2 = czbsswg > 0.0;
+            Vec8m condmask_true2 = condmask_true1 & condmask2;
+            {
+              arg = vec8_blend(arg, 1.0 - (vbs / model->BSIM3v32PhiBSWG), condmask_true2);
+              if (MJSWG == 0.5)
+                sarg = vec8_blend(sarg, 1.0 / vec8_sqrt(arg), condmask_true2);
+              else
+                sarg = vec8_blend(sarg, vec8_exp((-MJSWG) * vec8_log(arg)), condmask_true2);
+
+              nstate_qbs = vec8_blend(nstate_qbs, nstate_qbs + (((model->BSIM3v32PhiBSWG * czbsswg) * (1.0 - (arg * sarg))) / (1.0 - MJSWG)), condmask_true2);
+              {
+                Vec8d val = czbsswg * sarg;
+                if (condmask_true2[0])
+                  heres[0]->BSIM3v32capbs += val[0];
+
+                if (condmask_true2[1])
+                  heres[1]->BSIM3v32capbs += val[1];
+
+                if (condmask_true2[2])
+                  heres[2]->BSIM3v32capbs += val[2];
+
+                if (condmask_true2[3])
+                  heres[3]->BSIM3v32capbs += val[3];
+
+                if (condmask_true2[4])
+                  heres[4]->BSIM3v32capbs += val[4];
+
+                if (condmask_true2[5])
+                  heres[5]->BSIM3v32capbs += val[5];
+
+                if (condmask_true2[6])
+                  heres[6]->BSIM3v32capbs += val[6];
+
+                if (condmask_true2[7])
+                  heres[7]->BSIM3v32capbs += val[7];
+
+              }
+            }
+          }
+
+        }
+        {
+          T0 = vec8_blend(T0, (czbs + czbssw) + czbsswg, condmask_false1);
+          T1 = vec8_blend(T1, vbs * ((((czbs * MJ) / model->BSIM3v32PhiB) + ((czbssw * MJSW) / model->BSIM3v32PhiBSW)) + ((czbsswg * MJSWG) / model->BSIM3v32PhiBSWG)), condmask_false1);
+          nstate_qbs = vec8_blend(nstate_qbs, vbs * (T0 + (0.5 * T1)), condmask_false1);
+          {
+            Vec8d val = T0 + T1;
+            if (condmask_false1[0])
+              heres[0]->BSIM3v32capbs = val[0];
+
+            if (condmask_false1[1])
+              heres[1]->BSIM3v32capbs = val[1];
+
+            if (condmask_false1[2])
+              heres[2]->BSIM3v32capbs = val[2];
+
+            if (condmask_false1[3])
+              heres[3]->BSIM3v32capbs = val[3];
+
+            if (condmask_false1[4])
+              heres[4]->BSIM3v32capbs = val[4];
+
+            if (condmask_false1[5])
+              heres[5]->BSIM3v32capbs = val[5];
+
+            if (condmask_false1[6])
+              heres[6]->BSIM3v32capbs = val[6];
+
+            if (condmask_false1[7])
+              heres[7]->BSIM3v32capbs = val[7];
+
+          }
+        }
+      }
+
+    }
+
+    vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32qbs, heres[1]->BSIM3v32qbs, heres[2]->BSIM3v32qbs, heres[3]->BSIM3v32qbs, heres[4]->BSIM3v32qbs, heres[5]->BSIM3v32qbs, heres[6]->BSIM3v32qbs, heres[7]->BSIM3v32qbs}, nstate_qbs);
+    if (1)
+    {
+      Vec8m condmask0 = vbd == 0.0;
+      Vec8m condmask_true0 = condmask0;
+      Vec8m condmask_false0 = ~condmask0;
+      {
+        nstate_qbd = vec8_blend(nstate_qbd, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+        {
+          Vec8d val = (czbd + czbdsw) + czbdswg;
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32capbd = val[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32capbd = val[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32capbd = val[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32capbd = val[3];
+
+          if (condmask_true0[4])
+            heres[4]->BSIM3v32capbd = val[4];
+
+          if (condmask_true0[5])
+            heres[5]->BSIM3v32capbd = val[5];
+
+          if (condmask_true0[6])
+            heres[6]->BSIM3v32capbd = val[6];
+
+          if (condmask_true0[7])
+            heres[7]->BSIM3v32capbd = val[7];
+
+        }
+      }
+      if (1)
+      {
+        Vec8m condmask1 = vbd < 0.0;
+        Vec8m condmask_true1 = condmask_false0 & condmask1;
+        Vec8m condmask_false1 = condmask_false0 & (~condmask1);
+        {
+          if (1)
+          {
+            Vec8m condmask2 = czbd > 0.0;
+            Vec8m condmask_true2 = condmask_true1 & condmask2;
+            Vec8m condmask_false2 = condmask_true1 & (~condmask2);
+            {
+              arg = vec8_blend(arg, 1.0 - (vbd / model->BSIM3v32PhiB), condmask_true2);
+              if (MJ == 0.5)
+                sarg = vec8_blend(sarg, 1.0 / vec8_sqrt(arg), condmask_true2);
+              else
+                sarg = vec8_blend(sarg, vec8_exp((-MJ) * vec8_log(arg)), condmask_true2);
+
+              nstate_qbd = vec8_blend(nstate_qbd, ((model->BSIM3v32PhiB * czbd) * (1.0 - (arg * sarg))) / (1.0 - MJ), condmask_true2);
+              {
+                Vec8d val = czbd * sarg;
+                if (condmask_true2[0])
+                  heres[0]->BSIM3v32capbd = val[0];
+
+                if (condmask_true2[1])
+                  heres[1]->BSIM3v32capbd = val[1];
+
+                if (condmask_true2[2])
+                  heres[2]->BSIM3v32capbd = val[2];
+
+                if (condmask_true2[3])
+                  heres[3]->BSIM3v32capbd = val[3];
+
+                if (condmask_true2[4])
+                  heres[4]->BSIM3v32capbd = val[4];
+
+                if (condmask_true2[5])
+                  heres[5]->BSIM3v32capbd = val[5];
+
+                if (condmask_true2[6])
+                  heres[6]->BSIM3v32capbd = val[6];
+
+                if (condmask_true2[7])
+                  heres[7]->BSIM3v32capbd = val[7];
+
+              }
+            }
+            {
+              nstate_qbd = vec8_blend(nstate_qbd, vec8_SIMDTOVECTOR(0.0), condmask_false2);
+              {
+                if (condmask_false2[0])
+                  heres[0]->BSIM3v32capbd = 0.0;
+
+                if (condmask_false2[1])
+                  heres[1]->BSIM3v32capbd = 0.0;
+
+                if (condmask_false2[2])
+                  heres[2]->BSIM3v32capbd = 0.0;
+
+                if (condmask_false2[3])
+                  heres[3]->BSIM3v32capbd = 0.0;
+
+                if (condmask_false2[4])
+                  heres[4]->BSIM3v32capbd = 0.0;
+
+                if (condmask_false2[5])
+                  heres[5]->BSIM3v32capbd = 0.0;
+
+                if (condmask_false2[6])
+                  heres[6]->BSIM3v32capbd = 0.0;
+
+                if (condmask_false2[7])
+                  heres[7]->BSIM3v32capbd = 0.0;
+
+              }
+            }
+          }
+
+          if (1)
+          {
+            Vec8m condmask2 = czbdsw > 0.0;
+            Vec8m condmask_true2 = condmask_true1 & condmask2;
+            {
+              arg = vec8_blend(arg, 1.0 - (vbd / model->BSIM3v32PhiBSW), condmask_true2);
+              if (MJSW == 0.5)
+                sarg = vec8_blend(sarg, 1.0 / vec8_sqrt(arg), condmask_true2);
+              else
+                sarg = vec8_blend(sarg, vec8_exp((-MJSW) * vec8_log(arg)), condmask_true2);
+
+              nstate_qbd = vec8_blend(nstate_qbd, nstate_qbd + (((model->BSIM3v32PhiBSW * czbdsw) * (1.0 - (arg * sarg))) / (1.0 - MJSW)), condmask_true2);
+              {
+                Vec8d val = czbdsw * sarg;
+                if (condmask_true2[0])
+                  heres[0]->BSIM3v32capbd += val[0];
+
+                if (condmask_true2[1])
+                  heres[1]->BSIM3v32capbd += val[1];
+
+                if (condmask_true2[2])
+                  heres[2]->BSIM3v32capbd += val[2];
+
+                if (condmask_true2[3])
+                  heres[3]->BSIM3v32capbd += val[3];
+
+                if (condmask_true2[4])
+                  heres[4]->BSIM3v32capbd += val[4];
+
+                if (condmask_true2[5])
+                  heres[5]->BSIM3v32capbd += val[5];
+
+                if (condmask_true2[6])
+                  heres[6]->BSIM3v32capbd += val[6];
+
+                if (condmask_true2[7])
+                  heres[7]->BSIM3v32capbd += val[7];
+
+              }
+            }
+          }
+
+          if (1)
+          {
+            Vec8m condmask2 = czbdswg > 0.0;
+            Vec8m condmask_true2 = condmask_true1 & condmask2;
+            {
+              arg = vec8_blend(arg, 1.0 - (vbd / model->BSIM3v32PhiBSWG), condmask_true2);
+              if (MJSWG == 0.5)
+                sarg = vec8_blend(sarg, 1.0 / vec8_sqrt(arg), condmask_true2);
+              else
+                sarg = vec8_blend(sarg, vec8_exp((-MJSWG) * vec8_log(arg)), condmask_true2);
+
+              nstate_qbd = vec8_blend(nstate_qbd, nstate_qbd + (((model->BSIM3v32PhiBSWG * czbdswg) * (1.0 - (arg * sarg))) / (1.0 - MJSWG)), condmask_true2);
+              {
+                Vec8d val = czbdswg * sarg;
+                if (condmask_true2[0])
+                  heres[0]->BSIM3v32capbd += val[0];
+
+                if (condmask_true2[1])
+                  heres[1]->BSIM3v32capbd += val[1];
+
+                if (condmask_true2[2])
+                  heres[2]->BSIM3v32capbd += val[2];
+
+                if (condmask_true2[3])
+                  heres[3]->BSIM3v32capbd += val[3];
+
+                if (condmask_true2[4])
+                  heres[4]->BSIM3v32capbd += val[4];
+
+                if (condmask_true2[5])
+                  heres[5]->BSIM3v32capbd += val[5];
+
+                if (condmask_true2[6])
+                  heres[6]->BSIM3v32capbd += val[6];
+
+                if (condmask_true2[7])
+                  heres[7]->BSIM3v32capbd += val[7];
+
+              }
+            }
+          }
+
+        }
+        {
+          T0 = vec8_blend(T0, (czbd + czbdsw) + czbdswg, condmask_false1);
+          T1 = vec8_blend(T1, vbd * ((((czbd * MJ) / model->BSIM3v32PhiB) + ((czbdsw * MJSW) / model->BSIM3v32PhiBSW)) + ((czbdswg * MJSWG) / model->BSIM3v32PhiBSWG)), condmask_false1);
+          nstate_qbd = vec8_blend(nstate_qbd, vbd * (T0 + (0.5 * T1)), condmask_false1);
+          {
+            Vec8d val = T0 + T1;
+            if (condmask_false1[0])
+              heres[0]->BSIM3v32capbd = val[0];
+
+            if (condmask_false1[1])
+              heres[1]->BSIM3v32capbd = val[1];
+
+            if (condmask_false1[2])
+              heres[2]->BSIM3v32capbd = val[2];
+
+            if (condmask_false1[3])
+              heres[3]->BSIM3v32capbd = val[3];
+
+            if (condmask_false1[4])
+              heres[4]->BSIM3v32capbd = val[4];
+
+            if (condmask_false1[5])
+              heres[5]->BSIM3v32capbd = val[5];
+
+            if (condmask_false1[6])
+              heres[6]->BSIM3v32capbd = val[6];
+
+            if (condmask_false1[7])
+              heres[7]->BSIM3v32capbd = val[7];
+
+          }
+        }
+      }
+
+    }
+
+    vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32qbd, heres[1]->BSIM3v32qbd, heres[2]->BSIM3v32qbd, heres[3]->BSIM3v32qbd, heres[4]->BSIM3v32qbd, heres[5]->BSIM3v32qbd, heres[6]->BSIM3v32qbd, heres[7]->BSIM3v32qbd}, nstate_qbd);
+  }
+
+  if ((heres[0]->BSIM3v32off == 0) || (!(ckt->CKTmode & MODEINITFIX)))
+  {
+    Vec8m nonconcount;
+    nonconcount = Check;
+    ckt->CKTnoncon += vec8_SIMDCOUNT(nonconcount);
+  }
+
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32vbs, heres[1]->BSIM3v32vbs, heres[2]->BSIM3v32vbs, heres[3]->BSIM3v32vbs, heres[4]->BSIM3v32vbs, heres[5]->BSIM3v32vbs, heres[6]->BSIM3v32vbs, heres[7]->BSIM3v32vbs}, vbs);
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32vbd, heres[1]->BSIM3v32vbd, heres[2]->BSIM3v32vbd, heres[3]->BSIM3v32vbd, heres[4]->BSIM3v32vbd, heres[5]->BSIM3v32vbd, heres[6]->BSIM3v32vbd, heres[7]->BSIM3v32vbd}, vbd);
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32vgs, heres[1]->BSIM3v32vgs, heres[2]->BSIM3v32vgs, heres[3]->BSIM3v32vgs, heres[4]->BSIM3v32vgs, heres[5]->BSIM3v32vgs, heres[6]->BSIM3v32vgs, heres[7]->BSIM3v32vgs}, vgs);
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32vds, heres[1]->BSIM3v32vds, heres[2]->BSIM3v32vds, heres[3]->BSIM3v32vds, heres[4]->BSIM3v32vds, heres[5]->BSIM3v32vds, heres[6]->BSIM3v32vds, heres[7]->BSIM3v32vds}, vds);
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32qdef, heres[1]->BSIM3v32qdef, heres[2]->BSIM3v32qdef, heres[3]->BSIM3v32qdef, heres[4]->BSIM3v32qdef, heres[5]->BSIM3v32qdef, heres[6]->BSIM3v32qdef, heres[7]->BSIM3v32qdef}, qdef);
+  if (!ChargeComputationNeeded)
+    goto line850;
+
+  line755:
+  if (heres[0]->BSIM3v32nqsMod)
+  {
+    qcheq = -(qbulk + qgate);
+    {
+      Vec8d val = -(((Vec8d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb, heres[4]->BSIM3v32cggb, heres[5]->BSIM3v32cggb, heres[6]->BSIM3v32cggb, heres[7]->BSIM3v32cggb}) + ((Vec8d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb, heres[4]->BSIM3v32cbgb, heres[5]->BSIM3v32cbgb, heres[6]->BSIM3v32cbgb, heres[7]->BSIM3v32cbgb}));
+      heres[0]->BSIM3v32cqgb = val[0];
+      heres[1]->BSIM3v32cqgb = val[1];
+      heres[2]->BSIM3v32cqgb = val[2];
+      heres[3]->BSIM3v32cqgb = val[3];
+      heres[4]->BSIM3v32cqgb = val[4];
+      heres[5]->BSIM3v32cqgb = val[5];
+      heres[6]->BSIM3v32cqgb = val[6];
+      heres[7]->BSIM3v32cqgb = val[7];
+    }
+    {
+      Vec8d val = -(((Vec8d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb, heres[4]->BSIM3v32cgdb, heres[5]->BSIM3v32cgdb, heres[6]->BSIM3v32cgdb, heres[7]->BSIM3v32cgdb}) + ((Vec8d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb, heres[4]->BSIM3v32cbdb, heres[5]->BSIM3v32cbdb, heres[6]->BSIM3v32cbdb, heres[7]->BSIM3v32cbdb}));
+      heres[0]->BSIM3v32cqdb = val[0];
+      heres[1]->BSIM3v32cqdb = val[1];
+      heres[2]->BSIM3v32cqdb = val[2];
+      heres[3]->BSIM3v32cqdb = val[3];
+      heres[4]->BSIM3v32cqdb = val[4];
+      heres[5]->BSIM3v32cqdb = val[5];
+      heres[6]->BSIM3v32cqdb = val[6];
+      heres[7]->BSIM3v32cqdb = val[7];
+    }
+    {
+      Vec8d val = -(((Vec8d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb, heres[4]->BSIM3v32cgsb, heres[5]->BSIM3v32cgsb, heres[6]->BSIM3v32cgsb, heres[7]->BSIM3v32cgsb}) + ((Vec8d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb, heres[4]->BSIM3v32cbsb, heres[5]->BSIM3v32cbsb, heres[6]->BSIM3v32cbsb, heres[7]->BSIM3v32cbsb}));
+      heres[0]->BSIM3v32cqsb = val[0];
+      heres[1]->BSIM3v32cqsb = val[1];
+      heres[2]->BSIM3v32cqsb = val[2];
+      heres[3]->BSIM3v32cqsb = val[3];
+      heres[4]->BSIM3v32cqsb = val[4];
+      heres[5]->BSIM3v32cqsb = val[5];
+      heres[6]->BSIM3v32cqsb = val[6];
+      heres[7]->BSIM3v32cqsb = val[7];
+    }
+    {
+      Vec8d val = -((((Vec8d ){heres[0]->BSIM3v32cqgb, heres[1]->BSIM3v32cqgb, heres[2]->BSIM3v32cqgb, heres[3]->BSIM3v32cqgb, heres[4]->BSIM3v32cqgb, heres[5]->BSIM3v32cqgb, heres[6]->BSIM3v32cqgb, heres[7]->BSIM3v32cqgb}) + ((Vec8d ){heres[0]->BSIM3v32cqdb, heres[1]->BSIM3v32cqdb, heres[2]->BSIM3v32cqdb, heres[3]->BSIM3v32cqdb, heres[4]->BSIM3v32cqdb, heres[5]->BSIM3v32cqdb, heres[6]->BSIM3v32cqdb, heres[7]->BSIM3v32cqdb})) + ((Vec8d ){heres[0]->BSIM3v32cqsb, heres[1]->BSIM3v32cqsb, heres[2]->BSIM3v32cqsb, heres[3]->BSIM3v32cqsb, heres[4]->BSIM3v32cqsb, heres[5]->BSIM3v32cqsb, heres[6]->BSIM3v32cqsb, heres[7]->BSIM3v32cqsb}));
+      heres[0]->BSIM3v32cqbb = val[0];
+      heres[1]->BSIM3v32cqbb = val[1];
+      heres[2]->BSIM3v32cqbb = val[2];
+      heres[3]->BSIM3v32cqbb = val[3];
+      heres[4]->BSIM3v32cqbb = val[4];
+      heres[5]->BSIM3v32cqbb = val[5];
+      heres[6]->BSIM3v32cqbb = val[6];
+      heres[7]->BSIM3v32cqbb = val[7];
+    }
+    gtau_drift = vec8_fabs(((Vec8d ){heres[0]->BSIM3v32tconst, heres[1]->BSIM3v32tconst, heres[2]->BSIM3v32tconst, heres[3]->BSIM3v32tconst, heres[4]->BSIM3v32tconst, heres[5]->BSIM3v32tconst, heres[6]->BSIM3v32tconst, heres[7]->BSIM3v32tconst}) * qcheq) * ScalingFactor;
+    T0 = vec8_SIMDTOVECTOR(pParam->BSIM3v32leffCV * pParam->BSIM3v32leffCV);
+    gtau_diff = (((16.0 * ((Vec8d ){heres[0]->BSIM3v32u0temp, heres[1]->BSIM3v32u0temp, heres[2]->BSIM3v32u0temp, heres[3]->BSIM3v32u0temp, heres[4]->BSIM3v32u0temp, heres[5]->BSIM3v32u0temp, heres[6]->BSIM3v32u0temp, heres[7]->BSIM3v32u0temp})) * model->BSIM3v32vtm) / T0) * ScalingFactor;
+    {
+      Vec8d val = gtau_drift + gtau_diff;
+      heres[0]->BSIM3v32gtau = val[0];
+      heres[1]->BSIM3v32gtau = val[1];
+      heres[2]->BSIM3v32gtau = val[2];
+      heres[3]->BSIM3v32gtau = val[3];
+      heres[4]->BSIM3v32gtau = val[4];
+      heres[5]->BSIM3v32gtau = val[5];
+      heres[6]->BSIM3v32gtau = val[6];
+      heres[7]->BSIM3v32gtau = val[7];
+    }
+  }
+
+
+  if (model->BSIM3v32capMod == 0)
+  {
+    cgdo = vec8_SIMDTOVECTOR(pParam->BSIM3v32cgdo);
+    qgdo = pParam->BSIM3v32cgdo * vgd;
+    cgso = vec8_SIMDTOVECTOR(pParam->BSIM3v32cgso);
+    qgso = pParam->BSIM3v32cgso * vgs;
+  }
+  else
+    if (model->BSIM3v32capMod == 1)
+  {
+    if (1)
+    {
+      Vec8m condmask0 = vgd < 0.0;
+      Vec8m condmask_true0 = condmask0;
+      Vec8m condmask_false0 = ~condmask0;
+      {
+        T1 = vec8_blend(T1, vec8_sqrt(1.0 - ((4.0 * vgd) / pParam->BSIM3v32ckappa)), condmask_true0);
+        cgdo = vec8_blend(cgdo, pParam->BSIM3v32cgdo + ((pParam->BSIM3v32weffCV * pParam->BSIM3v32cgdl) / T1), condmask_true0);
+        qgdo = vec8_blend(qgdo, (pParam->BSIM3v32cgdo * vgd) - ((((pParam->BSIM3v32weffCV * 0.5) * pParam->BSIM3v32cgdl) * pParam->BSIM3v32ckappa) * (T1 - 1.0)), condmask_true0);
+      }
+      {
+        cgdo = vec8_blend(cgdo, vec8_SIMDTOVECTOR(pParam->BSIM3v32cgdo + (pParam->BSIM3v32weffCV * pParam->BSIM3v32cgdl)), condmask_false0);
+        qgdo = vec8_blend(qgdo, ((pParam->BSIM3v32weffCV * pParam->BSIM3v32cgdl) + pParam->BSIM3v32cgdo) * vgd, condmask_false0);
+      }
+    }
+
+    if (1)
+    {
+      Vec8m condmask0 = vgs < 0.0;
+      Vec8m condmask_true0 = condmask0;
+      Vec8m condmask_false0 = ~condmask0;
+      {
+        T1 = vec8_blend(T1, vec8_sqrt(1.0 - ((4.0 * vgs) / pParam->BSIM3v32ckappa)), condmask_true0);
+        cgso = vec8_blend(cgso, pParam->BSIM3v32cgso + ((pParam->BSIM3v32weffCV * pParam->BSIM3v32cgsl) / T1), condmask_true0);
+        qgso = vec8_blend(qgso, (pParam->BSIM3v32cgso * vgs) - ((((pParam->BSIM3v32weffCV * 0.5) * pParam->BSIM3v32cgsl) * pParam->BSIM3v32ckappa) * (T1 - 1.0)), condmask_true0);
+      }
+      {
+        cgso = vec8_blend(cgso, vec8_SIMDTOVECTOR(pParam->BSIM3v32cgso + (pParam->BSIM3v32weffCV * pParam->BSIM3v32cgsl)), condmask_false0);
+        qgso = vec8_blend(qgso, ((pParam->BSIM3v32weffCV * pParam->BSIM3v32cgsl) + pParam->BSIM3v32cgso) * vgs, condmask_false0);
+      }
+    }
+
+  }
+  else
+  {
+    T0 = vgd + DELTA_1;
+    T1 = vec8_sqrt((T0 * T0) + (4.0 * DELTA_1));
+    T2 = 0.5 * (T0 - T1);
+    T3 = vec8_SIMDTOVECTOR(pParam->BSIM3v32weffCV * pParam->BSIM3v32cgdl);
+    T4 = vec8_sqrt(1.0 - ((4.0 * T2) / pParam->BSIM3v32ckappa));
+    cgdo = (pParam->BSIM3v32cgdo + T3) - ((T3 * (1.0 - (1.0 / T4))) * (0.5 - ((0.5 * T0) / T1)));
+    qgdo = ((pParam->BSIM3v32cgdo + T3) * vgd) - (T3 * (T2 + ((0.5 * pParam->BSIM3v32ckappa) * (T4 - 1.0))));
+    T0 = vgs + DELTA_1;
+    T1 = vec8_sqrt((T0 * T0) + (4.0 * DELTA_1));
+    T2 = 0.5 * (T0 - T1);
+    T3 = vec8_SIMDTOVECTOR(pParam->BSIM3v32weffCV * pParam->BSIM3v32cgsl);
+    T4 = vec8_sqrt(1.0 - ((4.0 * T2) / pParam->BSIM3v32ckappa));
+    cgso = (pParam->BSIM3v32cgso + T3) - ((T3 * (1.0 - (1.0 / T4))) * (0.5 - ((0.5 * T0) / T1)));
+    qgso = ((pParam->BSIM3v32cgso + T3) * vgs) - (T3 * (T2 + ((0.5 * pParam->BSIM3v32ckappa) * (T4 - 1.0))));
+  }
+
+
+  {
+    heres[0]->BSIM3v32cgdo = cgdo[0];
+    heres[1]->BSIM3v32cgdo = cgdo[1];
+    heres[2]->BSIM3v32cgdo = cgdo[2];
+    heres[3]->BSIM3v32cgdo = cgdo[3];
+    heres[4]->BSIM3v32cgdo = cgdo[4];
+    heres[5]->BSIM3v32cgdo = cgdo[5];
+    heres[6]->BSIM3v32cgdo = cgdo[6];
+    heres[7]->BSIM3v32cgdo = cgdo[7];
+  }
+  {
+    heres[0]->BSIM3v32cgso = cgso[0];
+    heres[1]->BSIM3v32cgso = cgso[1];
+    heres[2]->BSIM3v32cgso = cgso[2];
+    heres[3]->BSIM3v32cgso = cgso[3];
+    heres[4]->BSIM3v32cgso = cgso[4];
+    heres[5]->BSIM3v32cgso = cgso[5];
+    heres[6]->BSIM3v32cgso = cgso[6];
+    heres[7]->BSIM3v32cgso = cgso[7];
+  }
+  ag0 = ckt->CKTag[0];
+  ddxpart_dVd = (ddxpart_dVg = (ddxpart_dVb = (ddxpart_dVs = vec8_SIMDTOVECTOR(0.0))));
+  dsxpart_dVd = (dsxpart_dVg = (dsxpart_dVb = (dsxpart_dVs = vec8_SIMDTOVECTOR(0.0))));
+  ggtg = (ggtd = (ggtb = (ggts = vec8_SIMDTOVECTOR(0.0))));
+  if (1)
+  {
+    Vec8m condmask0 = BSIM3v32mode;
+    Vec8m condmask_true0 = condmask0;
+    Vec8m condmask_false0 = ~condmask0;
+    {
+      if (heres[0]->BSIM3v32nqsMod == 0)
+      {
+        gcggb = vec8_blend(gcggb, (((((Vec8d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb, heres[4]->BSIM3v32cggb, heres[5]->BSIM3v32cggb, heres[6]->BSIM3v32cggb, heres[7]->BSIM3v32cggb}) + cgdo) + cgso) + pParam->BSIM3v32cgbo) * ag0, condmask_true0);
+        gcgdb = vec8_blend(gcgdb, (((Vec8d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb, heres[4]->BSIM3v32cgdb, heres[5]->BSIM3v32cgdb, heres[6]->BSIM3v32cgdb, heres[7]->BSIM3v32cgdb}) - cgdo) * ag0, condmask_true0);
+        gcgsb = vec8_blend(gcgsb, (((Vec8d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb, heres[4]->BSIM3v32cgsb, heres[5]->BSIM3v32cgsb, heres[6]->BSIM3v32cgsb, heres[7]->BSIM3v32cgsb}) - cgso) * ag0, condmask_true0);
+        gcdgb = vec8_blend(gcdgb, (((Vec8d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb, heres[4]->BSIM3v32cdgb, heres[5]->BSIM3v32cdgb, heres[6]->BSIM3v32cdgb, heres[7]->BSIM3v32cdgb}) - cgdo) * ag0, condmask_true0);
+        gcddb = vec8_blend(gcddb, ((((Vec8d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb, heres[4]->BSIM3v32cddb, heres[5]->BSIM3v32cddb, heres[6]->BSIM3v32cddb, heres[7]->BSIM3v32cddb}) + ((Vec8d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd, heres[4]->BSIM3v32capbd, heres[5]->BSIM3v32capbd, heres[6]->BSIM3v32capbd, heres[7]->BSIM3v32capbd})) + cgdo) * ag0, condmask_true0);
+        gcdsb = vec8_blend(gcdsb, ((Vec8d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb, heres[4]->BSIM3v32cdsb, heres[5]->BSIM3v32cdsb, heres[6]->BSIM3v32cdsb, heres[7]->BSIM3v32cdsb}) * ag0, condmask_true0);
+        gcsgb = vec8_blend(gcsgb, (-(((((Vec8d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb, heres[4]->BSIM3v32cggb, heres[5]->BSIM3v32cggb, heres[6]->BSIM3v32cggb, heres[7]->BSIM3v32cggb}) + ((Vec8d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb, heres[4]->BSIM3v32cbgb, heres[5]->BSIM3v32cbgb, heres[6]->BSIM3v32cbgb, heres[7]->BSIM3v32cbgb})) + ((Vec8d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb, heres[4]->BSIM3v32cdgb, heres[5]->BSIM3v32cdgb, heres[6]->BSIM3v32cdgb, heres[7]->BSIM3v32cdgb})) + cgso)) * ag0, condmask_true0);
+        gcsdb = vec8_blend(gcsdb, (-((((Vec8d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb, heres[4]->BSIM3v32cgdb, heres[5]->BSIM3v32cgdb, heres[6]->BSIM3v32cgdb, heres[7]->BSIM3v32cgdb}) + ((Vec8d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb, heres[4]->BSIM3v32cbdb, heres[5]->BSIM3v32cbdb, heres[6]->BSIM3v32cbdb, heres[7]->BSIM3v32cbdb})) + ((Vec8d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb, heres[4]->BSIM3v32cddb, heres[5]->BSIM3v32cddb, heres[6]->BSIM3v32cddb, heres[7]->BSIM3v32cddb}))) * ag0, condmask_true0);
+        gcssb = vec8_blend(gcssb, ((((Vec8d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs, heres[4]->BSIM3v32capbs, heres[5]->BSIM3v32capbs, heres[6]->BSIM3v32capbs, heres[7]->BSIM3v32capbs}) + cgso) - ((((Vec8d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb, heres[4]->BSIM3v32cgsb, heres[5]->BSIM3v32cgsb, heres[6]->BSIM3v32cgsb, heres[7]->BSIM3v32cgsb}) + ((Vec8d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb, heres[4]->BSIM3v32cbsb, heres[5]->BSIM3v32cbsb, heres[6]->BSIM3v32cbsb, heres[7]->BSIM3v32cbsb})) + ((Vec8d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb, heres[4]->BSIM3v32cdsb, heres[5]->BSIM3v32cdsb, heres[6]->BSIM3v32cdsb, heres[7]->BSIM3v32cdsb}))) * ag0, condmask_true0);
+        gcbgb = vec8_blend(gcbgb, (((Vec8d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb, heres[4]->BSIM3v32cbgb, heres[5]->BSIM3v32cbgb, heres[6]->BSIM3v32cbgb, heres[7]->BSIM3v32cbgb}) - pParam->BSIM3v32cgbo) * ag0, condmask_true0);
+        gcbdb = vec8_blend(gcbdb, (((Vec8d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb, heres[4]->BSIM3v32cbdb, heres[5]->BSIM3v32cbdb, heres[6]->BSIM3v32cbdb, heres[7]->BSIM3v32cbdb}) - ((Vec8d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd, heres[4]->BSIM3v32capbd, heres[5]->BSIM3v32capbd, heres[6]->BSIM3v32capbd, heres[7]->BSIM3v32capbd})) * ag0, condmask_true0);
+        gcbsb = vec8_blend(gcbsb, (((Vec8d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb, heres[4]->BSIM3v32cbsb, heres[5]->BSIM3v32cbsb, heres[6]->BSIM3v32cbsb, heres[7]->BSIM3v32cbsb}) - ((Vec8d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs, heres[4]->BSIM3v32capbs, heres[5]->BSIM3v32capbs, heres[6]->BSIM3v32capbs, heres[7]->BSIM3v32capbs})) * ag0, condmask_true0);
+        qgd = vec8_blend(qgd, qgdo, condmask_true0);
+        qgs = vec8_blend(qgs, qgso, condmask_true0);
+        qgb = vec8_blend(qgb, pParam->BSIM3v32cgbo * vgb, condmask_true0);
+        qgate = vec8_blend(qgate, qgate + ((qgd + qgs) + qgb), condmask_true0);
+        qbulk = vec8_blend(qbulk, qbulk - qgb, condmask_true0);
+        qdrn = vec8_blend(qdrn, qdrn - qgd, condmask_true0);
+        qsrc = vec8_blend(qsrc, -((qgate + qbulk) + qdrn), condmask_true0);
+        sxpart = vec8_blend(sxpart, vec8_SIMDTOVECTOR(0.6), condmask_true0);
+        dxpart = vec8_blend(dxpart, vec8_SIMDTOVECTOR(0.4), condmask_true0);
+      }
+      else
+      {
+        if (1)
+        {
+          Vec8m condmask1 = qcheq > 0.0;
+          Vec8m condmask_true1 = condmask_true0 & condmask1;
+          Vec8m condmask_false1 = condmask_true0 & (~condmask1);
+          T0 = vec8_blend(T0, (((Vec8d ){heres[0]->BSIM3v32tconst, heres[1]->BSIM3v32tconst, heres[2]->BSIM3v32tconst, heres[3]->BSIM3v32tconst, heres[4]->BSIM3v32tconst, heres[5]->BSIM3v32tconst, heres[6]->BSIM3v32tconst, heres[7]->BSIM3v32tconst}) * qdef) * ScalingFactor, condmask_true1);
+          T0 = vec8_blend(T0, ((-((Vec8d ){heres[0]->BSIM3v32tconst, heres[1]->BSIM3v32tconst, heres[2]->BSIM3v32tconst, heres[3]->BSIM3v32tconst, heres[4]->BSIM3v32tconst, heres[5]->BSIM3v32tconst, heres[6]->BSIM3v32tconst, heres[7]->BSIM3v32tconst})) * qdef) * ScalingFactor, condmask_false1);
+        }
+
+        ggtg = vec8_blend(ggtg, T0 * ((Vec8d ){heres[0]->BSIM3v32cqgb, heres[1]->BSIM3v32cqgb, heres[2]->BSIM3v32cqgb, heres[3]->BSIM3v32cqgb, heres[4]->BSIM3v32cqgb, heres[5]->BSIM3v32cqgb, heres[6]->BSIM3v32cqgb, heres[7]->BSIM3v32cqgb}), condmask_true0);
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32gtg = ggtg[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32gtg = ggtg[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32gtg = ggtg[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32gtg = ggtg[3];
+
+          if (condmask_true0[4])
+            heres[4]->BSIM3v32gtg = ggtg[4];
+
+          if (condmask_true0[5])
+            heres[5]->BSIM3v32gtg = ggtg[5];
+
+          if (condmask_true0[6])
+            heres[6]->BSIM3v32gtg = ggtg[6];
+
+          if (condmask_true0[7])
+            heres[7]->BSIM3v32gtg = ggtg[7];
+
+        }
+        ggtd = vec8_blend(ggtd, T0 * ((Vec8d ){heres[0]->BSIM3v32cqdb, heres[1]->BSIM3v32cqdb, heres[2]->BSIM3v32cqdb, heres[3]->BSIM3v32cqdb, heres[4]->BSIM3v32cqdb, heres[5]->BSIM3v32cqdb, heres[6]->BSIM3v32cqdb, heres[7]->BSIM3v32cqdb}), condmask_true0);
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32gtd = ggtd[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32gtd = ggtd[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32gtd = ggtd[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32gtd = ggtd[3];
+
+          if (condmask_true0[4])
+            heres[4]->BSIM3v32gtd = ggtd[4];
+
+          if (condmask_true0[5])
+            heres[5]->BSIM3v32gtd = ggtd[5];
+
+          if (condmask_true0[6])
+            heres[6]->BSIM3v32gtd = ggtd[6];
+
+          if (condmask_true0[7])
+            heres[7]->BSIM3v32gtd = ggtd[7];
+
+        }
+        ggts = vec8_blend(ggts, T0 * ((Vec8d ){heres[0]->BSIM3v32cqsb, heres[1]->BSIM3v32cqsb, heres[2]->BSIM3v32cqsb, heres[3]->BSIM3v32cqsb, heres[4]->BSIM3v32cqsb, heres[5]->BSIM3v32cqsb, heres[6]->BSIM3v32cqsb, heres[7]->BSIM3v32cqsb}), condmask_true0);
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32gts = ggts[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32gts = ggts[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32gts = ggts[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32gts = ggts[3];
+
+          if (condmask_true0[4])
+            heres[4]->BSIM3v32gts = ggts[4];
+
+          if (condmask_true0[5])
+            heres[5]->BSIM3v32gts = ggts[5];
+
+          if (condmask_true0[6])
+            heres[6]->BSIM3v32gts = ggts[6];
+
+          if (condmask_true0[7])
+            heres[7]->BSIM3v32gts = ggts[7];
+
+        }
+        ggtb = vec8_blend(ggtb, T0 * ((Vec8d ){heres[0]->BSIM3v32cqbb, heres[1]->BSIM3v32cqbb, heres[2]->BSIM3v32cqbb, heres[3]->BSIM3v32cqbb, heres[4]->BSIM3v32cqbb, heres[5]->BSIM3v32cqbb, heres[6]->BSIM3v32cqbb, heres[7]->BSIM3v32cqbb}), condmask_true0);
+        {
+          if (condmask_true0[0])
+            heres[0]->BSIM3v32gtb = ggtb[0];
+
+          if (condmask_true0[1])
+            heres[1]->BSIM3v32gtb = ggtb[1];
+
+          if (condmask_true0[2])
+            heres[2]->BSIM3v32gtb = ggtb[2];
+
+          if (condmask_true0[3])
+            heres[3]->BSIM3v32gtb = ggtb[3];
+
+          if (condmask_true0[4])
+            heres[4]->BSIM3v32gtb = ggtb[4];
+
+          if (condmask_true0[5])
+            heres[5]->BSIM3v32gtb = ggtb[5];
+
+          if (condmask_true0[6])
+            heres[6]->BSIM3v32gtb = ggtb[6];
+
+          if (condmask_true0[7])
+            heres[7]->BSIM3v32gtb = ggtb[7];
+
+        }
+        gqdef = vec8_blend(gqdef, vec8_SIMDTOVECTOR(ScalingFactor * ag0), condmask_true0);
+        gcqgb = vec8_blend(gcqgb, ((Vec8d ){heres[0]->BSIM3v32cqgb, heres[1]->BSIM3v32cqgb, heres[2]->BSIM3v32cqgb, heres[3]->BSIM3v32cqgb, heres[4]->BSIM3v32cqgb, heres[5]->BSIM3v32cqgb, heres[6]->BSIM3v32cqgb, heres[7]->BSIM3v32cqgb}) * ag0, condmask_true0);
+        gcqdb = vec8_blend(gcqdb, ((Vec8d ){heres[0]->BSIM3v32cqdb, heres[1]->BSIM3v32cqdb, heres[2]->BSIM3v32cqdb, heres[3]->BSIM3v32cqdb, heres[4]->BSIM3v32cqdb, heres[5]->BSIM3v32cqdb, heres[6]->BSIM3v32cqdb, heres[7]->BSIM3v32cqdb}) * ag0, condmask_true0);
+        gcqsb = vec8_blend(gcqsb, ((Vec8d ){heres[0]->BSIM3v32cqsb, heres[1]->BSIM3v32cqsb, heres[2]->BSIM3v32cqsb, heres[3]->BSIM3v32cqsb, heres[4]->BSIM3v32cqsb, heres[5]->BSIM3v32cqsb, heres[6]->BSIM3v32cqsb, heres[7]->BSIM3v32cqsb}) * ag0, condmask_true0);
+        gcqbb = vec8_blend(gcqbb, ((Vec8d ){heres[0]->BSIM3v32cqbb, heres[1]->BSIM3v32cqbb, heres[2]->BSIM3v32cqbb, heres[3]->BSIM3v32cqbb, heres[4]->BSIM3v32cqbb, heres[5]->BSIM3v32cqbb, heres[6]->BSIM3v32cqbb, heres[7]->BSIM3v32cqbb}) * ag0, condmask_true0);
+        gcggb = vec8_blend(gcggb, ((cgdo + cgso) + pParam->BSIM3v32cgbo) * ag0, condmask_true0);
+        gcgdb = vec8_blend(gcgdb, (-cgdo) * ag0, condmask_true0);
+        gcgsb = vec8_blend(gcgsb, (-cgso) * ag0, condmask_true0);
+        gcdgb = vec8_blend(gcdgb, (-cgdo) * ag0, condmask_true0);
+        gcddb = vec8_blend(gcddb, (((Vec8d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd, heres[4]->BSIM3v32capbd, heres[5]->BSIM3v32capbd, heres[6]->BSIM3v32capbd, heres[7]->BSIM3v32capbd}) + cgdo) * ag0, condmask_true0);
+        gcdsb = vec8_blend(gcdsb, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+        gcsgb = vec8_blend(gcsgb, (-cgso) * ag0, condmask_true0);
+        gcsdb = vec8_blend(gcsdb, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+        gcssb = vec8_blend(gcssb, (((Vec8d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs, heres[4]->BSIM3v32capbs, heres[5]->BSIM3v32capbs, heres[6]->BSIM3v32capbs, heres[7]->BSIM3v32capbs}) + cgso) * ag0, condmask_true0);
+        gcbgb = vec8_blend(gcbgb, vec8_SIMDTOVECTOR((-pParam->BSIM3v32cgbo) * ag0), condmask_true0);
+        gcbdb = vec8_blend(gcbdb, (-((Vec8d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd, heres[4]->BSIM3v32capbd, heres[5]->BSIM3v32capbd, heres[6]->BSIM3v32capbd, heres[7]->BSIM3v32capbd})) * ag0, condmask_true0);
+        gcbsb = vec8_blend(gcbsb, (-((Vec8d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs, heres[4]->BSIM3v32capbs, heres[5]->BSIM3v32capbs, heres[6]->BSIM3v32capbs, heres[7]->BSIM3v32capbs})) * ag0, condmask_true0);
+        if (1)
+        {
+          Vec8m condmask1 = vec8_fabs(qcheq) <= (1.0e-5 * CoxWL);
+          Vec8m condmask_true1 = condmask_true0 & condmask1;
+          Vec8m condmask_false1 = condmask_true0 & (~condmask1);
+          {
+            if (model->BSIM3v32xpart < 0.5)
+            {
+              dxpart = vec8_blend(dxpart, vec8_SIMDTOVECTOR(0.4), condmask_true1);
+            }
+            else
+              if (model->BSIM3v32xpart > 0.5)
+            {
+              dxpart = vec8_blend(dxpart, vec8_SIMDTOVECTOR(0.0), condmask_true1);
+            }
+            else
+            {
+              dxpart = vec8_blend(dxpart, vec8_SIMDTOVECTOR(0.5), condmask_true1);
+            }
+
+
+          }
+          {
+            dxpart = vec8_blend(dxpart, qdrn / qcheq, condmask_false1);
+            Cdd = vec8_blend(Cdd, (Vec8d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb, heres[4]->BSIM3v32cddb, heres[5]->BSIM3v32cddb, heres[6]->BSIM3v32cddb, heres[7]->BSIM3v32cddb}, condmask_false1);
+            Csd = vec8_blend(Csd, -((((Vec8d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb, heres[4]->BSIM3v32cgdb, heres[5]->BSIM3v32cgdb, heres[6]->BSIM3v32cgdb, heres[7]->BSIM3v32cgdb}) + ((Vec8d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb, heres[4]->BSIM3v32cddb, heres[5]->BSIM3v32cddb, heres[6]->BSIM3v32cddb, heres[7]->BSIM3v32cddb})) + ((Vec8d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb, heres[4]->BSIM3v32cbdb, heres[5]->BSIM3v32cbdb, heres[6]->BSIM3v32cbdb, heres[7]->BSIM3v32cbdb})), condmask_false1);
+            ddxpart_dVd = vec8_blend(ddxpart_dVd, (Cdd - (dxpart * (Cdd + Csd))) / qcheq, condmask_false1);
+            Cdg = vec8_blend(Cdg, (Vec8d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb, heres[4]->BSIM3v32cdgb, heres[5]->BSIM3v32cdgb, heres[6]->BSIM3v32cdgb, heres[7]->BSIM3v32cdgb}, condmask_false1);
+            Csg = vec8_blend(Csg, -((((Vec8d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb, heres[4]->BSIM3v32cggb, heres[5]->BSIM3v32cggb, heres[6]->BSIM3v32cggb, heres[7]->BSIM3v32cggb}) + ((Vec8d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb, heres[4]->BSIM3v32cdgb, heres[5]->BSIM3v32cdgb, heres[6]->BSIM3v32cdgb, heres[7]->BSIM3v32cdgb})) + ((Vec8d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb, heres[4]->BSIM3v32cbgb, heres[5]->BSIM3v32cbgb, heres[6]->BSIM3v32cbgb, heres[7]->BSIM3v32cbgb})), condmask_false1);
+            ddxpart_dVg = vec8_blend(ddxpart_dVg, (Cdg - (dxpart * (Cdg + Csg))) / qcheq, condmask_false1);
+            Cds = vec8_blend(Cds, (Vec8d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb, heres[4]->BSIM3v32cdsb, heres[5]->BSIM3v32cdsb, heres[6]->BSIM3v32cdsb, heres[7]->BSIM3v32cdsb}, condmask_false1);
+            Css = vec8_blend(Css, -((((Vec8d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb, heres[4]->BSIM3v32cgsb, heres[5]->BSIM3v32cgsb, heres[6]->BSIM3v32cgsb, heres[7]->BSIM3v32cgsb}) + ((Vec8d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb, heres[4]->BSIM3v32cdsb, heres[5]->BSIM3v32cdsb, heres[6]->BSIM3v32cdsb, heres[7]->BSIM3v32cdsb})) + ((Vec8d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb, heres[4]->BSIM3v32cbsb, heres[5]->BSIM3v32cbsb, heres[6]->BSIM3v32cbsb, heres[7]->BSIM3v32cbsb})), condmask_false1);
+            ddxpart_dVs = vec8_blend(ddxpart_dVs, (Cds - (dxpart * (Cds + Css))) / qcheq, condmask_false1);
+            ddxpart_dVb = vec8_blend(ddxpart_dVb, -((ddxpart_dVd + ddxpart_dVg) + ddxpart_dVs), condmask_false1);
+          }
+        }
+
+        sxpart = vec8_blend(sxpart, 1.0 - dxpart, condmask_true0);
+        dsxpart_dVd = vec8_blend(dsxpart_dVd, -ddxpart_dVd, condmask_true0);
+        dsxpart_dVg = vec8_blend(dsxpart_dVg, -ddxpart_dVg, condmask_true0);
+        dsxpart_dVs = vec8_blend(dsxpart_dVs, -ddxpart_dVs, condmask_true0);
+        dsxpart_dVb = vec8_blend(dsxpart_dVb, -((dsxpart_dVd + dsxpart_dVg) + dsxpart_dVs), condmask_true0);
+        qgd = vec8_blend(qgd, qgdo, condmask_true0);
+        qgs = vec8_blend(qgs, qgso, condmask_true0);
+        qgb = vec8_blend(qgb, pParam->BSIM3v32cgbo * vgb, condmask_true0);
+        qgate = vec8_blend(qgate, (qgd + qgs) + qgb, condmask_true0);
+        qbulk = vec8_blend(qbulk, -qgb, condmask_true0);
+        qdrn = vec8_blend(qdrn, -qgd, condmask_true0);
+        qsrc = vec8_blend(qsrc, -((qgate + qbulk) + qdrn), condmask_true0);
+      }
+
+    }
+    {
+      if (heres[0]->BSIM3v32nqsMod == 0)
+      {
+        gcggb = vec8_blend(gcggb, (((((Vec8d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb, heres[4]->BSIM3v32cggb, heres[5]->BSIM3v32cggb, heres[6]->BSIM3v32cggb, heres[7]->BSIM3v32cggb}) + cgdo) + cgso) + pParam->BSIM3v32cgbo) * ag0, condmask_false0);
+        gcgdb = vec8_blend(gcgdb, (((Vec8d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb, heres[4]->BSIM3v32cgsb, heres[5]->BSIM3v32cgsb, heres[6]->BSIM3v32cgsb, heres[7]->BSIM3v32cgsb}) - cgdo) * ag0, condmask_false0);
+        gcgsb = vec8_blend(gcgsb, (((Vec8d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb, heres[4]->BSIM3v32cgdb, heres[5]->BSIM3v32cgdb, heres[6]->BSIM3v32cgdb, heres[7]->BSIM3v32cgdb}) - cgso) * ag0, condmask_false0);
+        gcdgb = vec8_blend(gcdgb, (-(((((Vec8d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb, heres[4]->BSIM3v32cggb, heres[5]->BSIM3v32cggb, heres[6]->BSIM3v32cggb, heres[7]->BSIM3v32cggb}) + ((Vec8d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb, heres[4]->BSIM3v32cbgb, heres[5]->BSIM3v32cbgb, heres[6]->BSIM3v32cbgb, heres[7]->BSIM3v32cbgb})) + ((Vec8d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb, heres[4]->BSIM3v32cdgb, heres[5]->BSIM3v32cdgb, heres[6]->BSIM3v32cdgb, heres[7]->BSIM3v32cdgb})) + cgdo)) * ag0, condmask_false0);
+        gcddb = vec8_blend(gcddb, ((((Vec8d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd, heres[4]->BSIM3v32capbd, heres[5]->BSIM3v32capbd, heres[6]->BSIM3v32capbd, heres[7]->BSIM3v32capbd}) + cgdo) - ((((Vec8d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb, heres[4]->BSIM3v32cgsb, heres[5]->BSIM3v32cgsb, heres[6]->BSIM3v32cgsb, heres[7]->BSIM3v32cgsb}) + ((Vec8d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb, heres[4]->BSIM3v32cbsb, heres[5]->BSIM3v32cbsb, heres[6]->BSIM3v32cbsb, heres[7]->BSIM3v32cbsb})) + ((Vec8d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb, heres[4]->BSIM3v32cdsb, heres[5]->BSIM3v32cdsb, heres[6]->BSIM3v32cdsb, heres[7]->BSIM3v32cdsb}))) * ag0, condmask_false0);
+        gcdsb = vec8_blend(gcdsb, (-((((Vec8d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb, heres[4]->BSIM3v32cgdb, heres[5]->BSIM3v32cgdb, heres[6]->BSIM3v32cgdb, heres[7]->BSIM3v32cgdb}) + ((Vec8d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb, heres[4]->BSIM3v32cbdb, heres[5]->BSIM3v32cbdb, heres[6]->BSIM3v32cbdb, heres[7]->BSIM3v32cbdb})) + ((Vec8d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb, heres[4]->BSIM3v32cddb, heres[5]->BSIM3v32cddb, heres[6]->BSIM3v32cddb, heres[7]->BSIM3v32cddb}))) * ag0, condmask_false0);
+        gcsgb = vec8_blend(gcsgb, (((Vec8d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb, heres[4]->BSIM3v32cdgb, heres[5]->BSIM3v32cdgb, heres[6]->BSIM3v32cdgb, heres[7]->BSIM3v32cdgb}) - cgso) * ag0, condmask_false0);
+        gcsdb = vec8_blend(gcsdb, ((Vec8d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb, heres[4]->BSIM3v32cdsb, heres[5]->BSIM3v32cdsb, heres[6]->BSIM3v32cdsb, heres[7]->BSIM3v32cdsb}) * ag0, condmask_false0);
+        gcssb = vec8_blend(gcssb, ((((Vec8d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb, heres[4]->BSIM3v32cddb, heres[5]->BSIM3v32cddb, heres[6]->BSIM3v32cddb, heres[7]->BSIM3v32cddb}) + ((Vec8d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs, heres[4]->BSIM3v32capbs, heres[5]->BSIM3v32capbs, heres[6]->BSIM3v32capbs, heres[7]->BSIM3v32capbs})) + cgso) * ag0, condmask_false0);
+        gcbgb = vec8_blend(gcbgb, (((Vec8d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb, heres[4]->BSIM3v32cbgb, heres[5]->BSIM3v32cbgb, heres[6]->BSIM3v32cbgb, heres[7]->BSIM3v32cbgb}) - pParam->BSIM3v32cgbo) * ag0, condmask_false0);
+        gcbdb = vec8_blend(gcbdb, (((Vec8d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb, heres[4]->BSIM3v32cbsb, heres[5]->BSIM3v32cbsb, heres[6]->BSIM3v32cbsb, heres[7]->BSIM3v32cbsb}) - ((Vec8d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd, heres[4]->BSIM3v32capbd, heres[5]->BSIM3v32capbd, heres[6]->BSIM3v32capbd, heres[7]->BSIM3v32capbd})) * ag0, condmask_false0);
+        gcbsb = vec8_blend(gcbsb, (((Vec8d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb, heres[4]->BSIM3v32cbdb, heres[5]->BSIM3v32cbdb, heres[6]->BSIM3v32cbdb, heres[7]->BSIM3v32cbdb}) - ((Vec8d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs, heres[4]->BSIM3v32capbs, heres[5]->BSIM3v32capbs, heres[6]->BSIM3v32capbs, heres[7]->BSIM3v32capbs})) * ag0, condmask_false0);
+        qgd = vec8_blend(qgd, qgdo, condmask_false0);
+        qgs = vec8_blend(qgs, qgso, condmask_false0);
+        qgb = vec8_blend(qgb, pParam->BSIM3v32cgbo * vgb, condmask_false0);
+        qgate = vec8_blend(qgate, qgate + ((qgd + qgs) + qgb), condmask_false0);
+        qbulk = vec8_blend(qbulk, qbulk - qgb, condmask_false0);
+        qsrc = vec8_blend(qsrc, qdrn - qgs, condmask_false0);
+        qdrn = vec8_blend(qdrn, -((qgate + qbulk) + qsrc), condmask_false0);
+        sxpart = vec8_blend(sxpart, vec8_SIMDTOVECTOR(0.4), condmask_false0);
+        dxpart = vec8_blend(dxpart, vec8_SIMDTOVECTOR(0.6), condmask_false0);
+      }
+      else
+      {
+        if (1)
+        {
+          Vec8m condmask1 = qcheq > 0.0;
+          Vec8m condmask_true1 = condmask_false0 & condmask1;
+          Vec8m condmask_false1 = condmask_false0 & (~condmask1);
+          T0 = vec8_blend(T0, (((Vec8d ){heres[0]->BSIM3v32tconst, heres[1]->BSIM3v32tconst, heres[2]->BSIM3v32tconst, heres[3]->BSIM3v32tconst, heres[4]->BSIM3v32tconst, heres[5]->BSIM3v32tconst, heres[6]->BSIM3v32tconst, heres[7]->BSIM3v32tconst}) * qdef) * ScalingFactor, condmask_true1);
+          T0 = vec8_blend(T0, ((-((Vec8d ){heres[0]->BSIM3v32tconst, heres[1]->BSIM3v32tconst, heres[2]->BSIM3v32tconst, heres[3]->BSIM3v32tconst, heres[4]->BSIM3v32tconst, heres[5]->BSIM3v32tconst, heres[6]->BSIM3v32tconst, heres[7]->BSIM3v32tconst})) * qdef) * ScalingFactor, condmask_false1);
+        }
+
+        ggtg = vec8_blend(ggtg, T0 * ((Vec8d ){heres[0]->BSIM3v32cqgb, heres[1]->BSIM3v32cqgb, heres[2]->BSIM3v32cqgb, heres[3]->BSIM3v32cqgb, heres[4]->BSIM3v32cqgb, heres[5]->BSIM3v32cqgb, heres[6]->BSIM3v32cqgb, heres[7]->BSIM3v32cqgb}), condmask_false0);
+        {
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32gtg = ggtg[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32gtg = ggtg[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32gtg = ggtg[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32gtg = ggtg[3];
+
+          if (condmask_false0[4])
+            heres[4]->BSIM3v32gtg = ggtg[4];
+
+          if (condmask_false0[5])
+            heres[5]->BSIM3v32gtg = ggtg[5];
+
+          if (condmask_false0[6])
+            heres[6]->BSIM3v32gtg = ggtg[6];
+
+          if (condmask_false0[7])
+            heres[7]->BSIM3v32gtg = ggtg[7];
+
+        }
+        ggts = vec8_blend(ggts, T0 * ((Vec8d ){heres[0]->BSIM3v32cqdb, heres[1]->BSIM3v32cqdb, heres[2]->BSIM3v32cqdb, heres[3]->BSIM3v32cqdb, heres[4]->BSIM3v32cqdb, heres[5]->BSIM3v32cqdb, heres[6]->BSIM3v32cqdb, heres[7]->BSIM3v32cqdb}), condmask_false0);
+        {
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32gtd = ggts[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32gtd = ggts[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32gtd = ggts[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32gtd = ggts[3];
+
+          if (condmask_false0[4])
+            heres[4]->BSIM3v32gtd = ggts[4];
+
+          if (condmask_false0[5])
+            heres[5]->BSIM3v32gtd = ggts[5];
+
+          if (condmask_false0[6])
+            heres[6]->BSIM3v32gtd = ggts[6];
+
+          if (condmask_false0[7])
+            heres[7]->BSIM3v32gtd = ggts[7];
+
+        }
+        ggtd = vec8_blend(ggtd, T0 * ((Vec8d ){heres[0]->BSIM3v32cqsb, heres[1]->BSIM3v32cqsb, heres[2]->BSIM3v32cqsb, heres[3]->BSIM3v32cqsb, heres[4]->BSIM3v32cqsb, heres[5]->BSIM3v32cqsb, heres[6]->BSIM3v32cqsb, heres[7]->BSIM3v32cqsb}), condmask_false0);
+        {
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32gts = ggtd[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32gts = ggtd[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32gts = ggtd[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32gts = ggtd[3];
+
+          if (condmask_false0[4])
+            heres[4]->BSIM3v32gts = ggtd[4];
+
+          if (condmask_false0[5])
+            heres[5]->BSIM3v32gts = ggtd[5];
+
+          if (condmask_false0[6])
+            heres[6]->BSIM3v32gts = ggtd[6];
+
+          if (condmask_false0[7])
+            heres[7]->BSIM3v32gts = ggtd[7];
+
+        }
+        ggtb = vec8_blend(ggtb, T0 * ((Vec8d ){heres[0]->BSIM3v32cqbb, heres[1]->BSIM3v32cqbb, heres[2]->BSIM3v32cqbb, heres[3]->BSIM3v32cqbb, heres[4]->BSIM3v32cqbb, heres[5]->BSIM3v32cqbb, heres[6]->BSIM3v32cqbb, heres[7]->BSIM3v32cqbb}), condmask_false0);
+        {
+          if (condmask_false0[0])
+            heres[0]->BSIM3v32gtb = ggtb[0];
+
+          if (condmask_false0[1])
+            heres[1]->BSIM3v32gtb = ggtb[1];
+
+          if (condmask_false0[2])
+            heres[2]->BSIM3v32gtb = ggtb[2];
+
+          if (condmask_false0[3])
+            heres[3]->BSIM3v32gtb = ggtb[3];
+
+          if (condmask_false0[4])
+            heres[4]->BSIM3v32gtb = ggtb[4];
+
+          if (condmask_false0[5])
+            heres[5]->BSIM3v32gtb = ggtb[5];
+
+          if (condmask_false0[6])
+            heres[6]->BSIM3v32gtb = ggtb[6];
+
+          if (condmask_false0[7])
+            heres[7]->BSIM3v32gtb = ggtb[7];
+
+        }
+        gqdef = vec8_blend(gqdef, vec8_SIMDTOVECTOR(ScalingFactor * ag0), condmask_false0);
+        gcqgb = vec8_blend(gcqgb, ((Vec8d ){heres[0]->BSIM3v32cqgb, heres[1]->BSIM3v32cqgb, heres[2]->BSIM3v32cqgb, heres[3]->BSIM3v32cqgb, heres[4]->BSIM3v32cqgb, heres[5]->BSIM3v32cqgb, heres[6]->BSIM3v32cqgb, heres[7]->BSIM3v32cqgb}) * ag0, condmask_false0);
+        gcqdb = vec8_blend(gcqdb, ((Vec8d ){heres[0]->BSIM3v32cqsb, heres[1]->BSIM3v32cqsb, heres[2]->BSIM3v32cqsb, heres[3]->BSIM3v32cqsb, heres[4]->BSIM3v32cqsb, heres[5]->BSIM3v32cqsb, heres[6]->BSIM3v32cqsb, heres[7]->BSIM3v32cqsb}) * ag0, condmask_false0);
+        gcqsb = vec8_blend(gcqsb, ((Vec8d ){heres[0]->BSIM3v32cqdb, heres[1]->BSIM3v32cqdb, heres[2]->BSIM3v32cqdb, heres[3]->BSIM3v32cqdb, heres[4]->BSIM3v32cqdb, heres[5]->BSIM3v32cqdb, heres[6]->BSIM3v32cqdb, heres[7]->BSIM3v32cqdb}) * ag0, condmask_false0);
+        gcqbb = vec8_blend(gcqbb, ((Vec8d ){heres[0]->BSIM3v32cqbb, heres[1]->BSIM3v32cqbb, heres[2]->BSIM3v32cqbb, heres[3]->BSIM3v32cqbb, heres[4]->BSIM3v32cqbb, heres[5]->BSIM3v32cqbb, heres[6]->BSIM3v32cqbb, heres[7]->BSIM3v32cqbb}) * ag0, condmask_false0);
+        gcggb = vec8_blend(gcggb, ((cgdo + cgso) + pParam->BSIM3v32cgbo) * ag0, condmask_false0);
+        gcgdb = vec8_blend(gcgdb, (-cgdo) * ag0, condmask_false0);
+        gcgsb = vec8_blend(gcgsb, (-cgso) * ag0, condmask_false0);
+        gcdgb = vec8_blend(gcdgb, (-cgdo) * ag0, condmask_false0);
+        gcddb = vec8_blend(gcddb, (((Vec8d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd, heres[4]->BSIM3v32capbd, heres[5]->BSIM3v32capbd, heres[6]->BSIM3v32capbd, heres[7]->BSIM3v32capbd}) + cgdo) * ag0, condmask_false0);
+        gcdsb = vec8_blend(gcdsb, vec8_SIMDTOVECTOR(0.0), condmask_false0);
+        gcsgb = vec8_blend(gcsgb, (-cgso) * ag0, condmask_false0);
+        gcsdb = vec8_blend(gcsdb, vec8_SIMDTOVECTOR(0.0), condmask_false0);
+        gcssb = vec8_blend(gcssb, (((Vec8d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs, heres[4]->BSIM3v32capbs, heres[5]->BSIM3v32capbs, heres[6]->BSIM3v32capbs, heres[7]->BSIM3v32capbs}) + cgso) * ag0, condmask_false0);
+        gcbgb = vec8_blend(gcbgb, vec8_SIMDTOVECTOR((-pParam->BSIM3v32cgbo) * ag0), condmask_false0);
+        gcbdb = vec8_blend(gcbdb, (-((Vec8d ){heres[0]->BSIM3v32capbd, heres[1]->BSIM3v32capbd, heres[2]->BSIM3v32capbd, heres[3]->BSIM3v32capbd, heres[4]->BSIM3v32capbd, heres[5]->BSIM3v32capbd, heres[6]->BSIM3v32capbd, heres[7]->BSIM3v32capbd})) * ag0, condmask_false0);
+        gcbsb = vec8_blend(gcbsb, (-((Vec8d ){heres[0]->BSIM3v32capbs, heres[1]->BSIM3v32capbs, heres[2]->BSIM3v32capbs, heres[3]->BSIM3v32capbs, heres[4]->BSIM3v32capbs, heres[5]->BSIM3v32capbs, heres[6]->BSIM3v32capbs, heres[7]->BSIM3v32capbs})) * ag0, condmask_false0);
+        if (1)
+        {
+          Vec8m condmask1 = vec8_fabs(qcheq) <= (1.0e-5 * CoxWL);
+          Vec8m condmask_true1 = condmask_false0 & condmask1;
+          Vec8m condmask_false1 = condmask_false0 & (~condmask1);
+          {
+            if (model->BSIM3v32xpart < 0.5)
+            {
+              sxpart = vec8_blend(sxpart, vec8_SIMDTOVECTOR(0.4), condmask_true1);
+            }
+            else
+              if (model->BSIM3v32xpart > 0.5)
+            {
+              sxpart = vec8_blend(sxpart, vec8_SIMDTOVECTOR(0.0), condmask_true1);
+            }
+            else
+            {
+              sxpart = vec8_blend(sxpart, vec8_SIMDTOVECTOR(0.5), condmask_true1);
+            }
+
+
+          }
+          {
+            sxpart = vec8_blend(sxpart, qdrn / qcheq, condmask_false1);
+            Css = vec8_blend(Css, (Vec8d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb, heres[4]->BSIM3v32cddb, heres[5]->BSIM3v32cddb, heres[6]->BSIM3v32cddb, heres[7]->BSIM3v32cddb}, condmask_false1);
+            Cds = vec8_blend(Cds, -((((Vec8d ){heres[0]->BSIM3v32cgdb, heres[1]->BSIM3v32cgdb, heres[2]->BSIM3v32cgdb, heres[3]->BSIM3v32cgdb, heres[4]->BSIM3v32cgdb, heres[5]->BSIM3v32cgdb, heres[6]->BSIM3v32cgdb, heres[7]->BSIM3v32cgdb}) + ((Vec8d ){heres[0]->BSIM3v32cddb, heres[1]->BSIM3v32cddb, heres[2]->BSIM3v32cddb, heres[3]->BSIM3v32cddb, heres[4]->BSIM3v32cddb, heres[5]->BSIM3v32cddb, heres[6]->BSIM3v32cddb, heres[7]->BSIM3v32cddb})) + ((Vec8d ){heres[0]->BSIM3v32cbdb, heres[1]->BSIM3v32cbdb, heres[2]->BSIM3v32cbdb, heres[3]->BSIM3v32cbdb, heres[4]->BSIM3v32cbdb, heres[5]->BSIM3v32cbdb, heres[6]->BSIM3v32cbdb, heres[7]->BSIM3v32cbdb})), condmask_false1);
+            dsxpart_dVs = vec8_blend(dsxpart_dVs, (Css - (sxpart * (Css + Cds))) / qcheq, condmask_false1);
+            Csg = vec8_blend(Csg, (Vec8d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb, heres[4]->BSIM3v32cdgb, heres[5]->BSIM3v32cdgb, heres[6]->BSIM3v32cdgb, heres[7]->BSIM3v32cdgb}, condmask_false1);
+            Cdg = vec8_blend(Cdg, -((((Vec8d ){heres[0]->BSIM3v32cggb, heres[1]->BSIM3v32cggb, heres[2]->BSIM3v32cggb, heres[3]->BSIM3v32cggb, heres[4]->BSIM3v32cggb, heres[5]->BSIM3v32cggb, heres[6]->BSIM3v32cggb, heres[7]->BSIM3v32cggb}) + ((Vec8d ){heres[0]->BSIM3v32cdgb, heres[1]->BSIM3v32cdgb, heres[2]->BSIM3v32cdgb, heres[3]->BSIM3v32cdgb, heres[4]->BSIM3v32cdgb, heres[5]->BSIM3v32cdgb, heres[6]->BSIM3v32cdgb, heres[7]->BSIM3v32cdgb})) + ((Vec8d ){heres[0]->BSIM3v32cbgb, heres[1]->BSIM3v32cbgb, heres[2]->BSIM3v32cbgb, heres[3]->BSIM3v32cbgb, heres[4]->BSIM3v32cbgb, heres[5]->BSIM3v32cbgb, heres[6]->BSIM3v32cbgb, heres[7]->BSIM3v32cbgb})), condmask_false1);
+            dsxpart_dVg = vec8_blend(dsxpart_dVg, (Csg - (sxpart * (Csg + Cdg))) / qcheq, condmask_false1);
+            Csd = vec8_blend(Csd, (Vec8d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb, heres[4]->BSIM3v32cdsb, heres[5]->BSIM3v32cdsb, heres[6]->BSIM3v32cdsb, heres[7]->BSIM3v32cdsb}, condmask_false1);
+            Cdd = vec8_blend(Cdd, -((((Vec8d ){heres[0]->BSIM3v32cgsb, heres[1]->BSIM3v32cgsb, heres[2]->BSIM3v32cgsb, heres[3]->BSIM3v32cgsb, heres[4]->BSIM3v32cgsb, heres[5]->BSIM3v32cgsb, heres[6]->BSIM3v32cgsb, heres[7]->BSIM3v32cgsb}) + ((Vec8d ){heres[0]->BSIM3v32cdsb, heres[1]->BSIM3v32cdsb, heres[2]->BSIM3v32cdsb, heres[3]->BSIM3v32cdsb, heres[4]->BSIM3v32cdsb, heres[5]->BSIM3v32cdsb, heres[6]->BSIM3v32cdsb, heres[7]->BSIM3v32cdsb})) + ((Vec8d ){heres[0]->BSIM3v32cbsb, heres[1]->BSIM3v32cbsb, heres[2]->BSIM3v32cbsb, heres[3]->BSIM3v32cbsb, heres[4]->BSIM3v32cbsb, heres[5]->BSIM3v32cbsb, heres[6]->BSIM3v32cbsb, heres[7]->BSIM3v32cbsb})), condmask_false1);
+            dsxpart_dVd = vec8_blend(dsxpart_dVd, (Csd - (sxpart * (Csd + Cdd))) / qcheq, condmask_false1);
+            dsxpart_dVb = vec8_blend(dsxpart_dVb, -((dsxpart_dVd + dsxpart_dVg) + dsxpart_dVs), condmask_false1);
+          }
+        }
+
+        dxpart = vec8_blend(dxpart, 1.0 - sxpart, condmask_false0);
+        ddxpart_dVd = vec8_blend(ddxpart_dVd, -dsxpart_dVd, condmask_false0);
+        ddxpart_dVg = vec8_blend(ddxpart_dVg, -dsxpart_dVg, condmask_false0);
+        ddxpart_dVs = vec8_blend(ddxpart_dVs, -dsxpart_dVs, condmask_false0);
+        ddxpart_dVb = vec8_blend(ddxpart_dVb, -((ddxpart_dVd + ddxpart_dVg) + ddxpart_dVs), condmask_false0);
+        qgd = vec8_blend(qgd, qgdo, condmask_false0);
+        qgs = vec8_blend(qgs, qgso, condmask_false0);
+        qgb = vec8_blend(qgb, pParam->BSIM3v32cgbo * vgb, condmask_false0);
+        qgate = vec8_blend(qgate, (qgd + qgs) + qgb, condmask_false0);
+        qbulk = vec8_blend(qbulk, -qgb, condmask_false0);
+        qsrc = vec8_blend(qsrc, -qgs, condmask_false0);
+        qdrn = vec8_blend(qdrn, -((qgate + qbulk) + qsrc), condmask_false0);
+      }
+
+    }
+  }
+
+  cqdef = (cqcheq = vec8_SIMDTOVECTOR(0.0));
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32qg, heres[1]->BSIM3v32qg, heres[2]->BSIM3v32qg, heres[3]->BSIM3v32qg, heres[4]->BSIM3v32qg, heres[5]->BSIM3v32qg, heres[6]->BSIM3v32qg, heres[7]->BSIM3v32qg}, qgate);
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32qd, heres[1]->BSIM3v32qd, heres[2]->BSIM3v32qd, heres[3]->BSIM3v32qd, heres[4]->BSIM3v32qd, heres[5]->BSIM3v32qd, heres[6]->BSIM3v32qd, heres[7]->BSIM3v32qd}, qdrn - vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32qbd, heres[1]->BSIM3v32qbd, heres[2]->BSIM3v32qbd, heres[3]->BSIM3v32qbd, heres[4]->BSIM3v32qbd, heres[5]->BSIM3v32qbd, heres[6]->BSIM3v32qbd, heres[7]->BSIM3v32qbd}));
+  vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32qb, heres[1]->BSIM3v32qb, heres[2]->BSIM3v32qb, heres[3]->BSIM3v32qb, heres[4]->BSIM3v32qb, heres[5]->BSIM3v32qb, heres[6]->BSIM3v32qb, heres[7]->BSIM3v32qb}, (qbulk + vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32qbd, heres[1]->BSIM3v32qbd, heres[2]->BSIM3v32qbd, heres[3]->BSIM3v32qbd, heres[4]->BSIM3v32qbd, heres[5]->BSIM3v32qbd, heres[6]->BSIM3v32qbd, heres[7]->BSIM3v32qbd})) + vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32qbs, heres[1]->BSIM3v32qbs, heres[2]->BSIM3v32qbs, heres[3]->BSIM3v32qbs, heres[4]->BSIM3v32qbs, heres[5]->BSIM3v32qbs, heres[6]->BSIM3v32qbs, heres[7]->BSIM3v32qbs}));
+  if (heres[0]->BSIM3v32nqsMod)
+  {
+    vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32qcdump, heres[1]->BSIM3v32qcdump, heres[2]->BSIM3v32qcdump, heres[3]->BSIM3v32qcdump, heres[4]->BSIM3v32qcdump, heres[5]->BSIM3v32qcdump, heres[6]->BSIM3v32qcdump, heres[7]->BSIM3v32qcdump}, qdef * ScalingFactor);
+    vec4_BSIM3v32_StateStore(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32qcheq, heres[1]->BSIM3v32qcheq, heres[2]->BSIM3v32qcheq, heres[3]->BSIM3v32qcheq, heres[4]->BSIM3v32qcheq, heres[5]->BSIM3v32qcheq, heres[6]->BSIM3v32qcheq, heres[7]->BSIM3v32qcheq}, qcheq);
+  }
+
+  if (ckt->CKTmode & MODEINITSMSIG)
+  {
+    goto line1000;
+  }
+
+  if (!ChargeComputationNeeded)
+    goto line850;
+
+  if (ckt->CKTmode & MODEINITTRAN)
+  {
+    vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec8m ){heres[0]->BSIM3v32qb, heres[1]->BSIM3v32qb, heres[2]->BSIM3v32qb, heres[3]->BSIM3v32qb, heres[4]->BSIM3v32qb, heres[5]->BSIM3v32qb, heres[6]->BSIM3v32qb, heres[7]->BSIM3v32qb}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32qb, heres[1]->BSIM3v32qb, heres[2]->BSIM3v32qb, heres[3]->BSIM3v32qb, heres[4]->BSIM3v32qb, heres[5]->BSIM3v32qb, heres[6]->BSIM3v32qb, heres[7]->BSIM3v32qb}));
+    vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec8m ){heres[0]->BSIM3v32qg, heres[1]->BSIM3v32qg, heres[2]->BSIM3v32qg, heres[3]->BSIM3v32qg, heres[4]->BSIM3v32qg, heres[5]->BSIM3v32qg, heres[6]->BSIM3v32qg, heres[7]->BSIM3v32qg}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32qg, heres[1]->BSIM3v32qg, heres[2]->BSIM3v32qg, heres[3]->BSIM3v32qg, heres[4]->BSIM3v32qg, heres[5]->BSIM3v32qg, heres[6]->BSIM3v32qg, heres[7]->BSIM3v32qg}));
+    vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec8m ){heres[0]->BSIM3v32qd, heres[1]->BSIM3v32qd, heres[2]->BSIM3v32qd, heres[3]->BSIM3v32qd, heres[4]->BSIM3v32qd, heres[5]->BSIM3v32qd, heres[6]->BSIM3v32qd, heres[7]->BSIM3v32qd}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32qd, heres[1]->BSIM3v32qd, heres[2]->BSIM3v32qd, heres[3]->BSIM3v32qd, heres[4]->BSIM3v32qd, heres[5]->BSIM3v32qd, heres[6]->BSIM3v32qd, heres[7]->BSIM3v32qd}));
+    if (heres[0]->BSIM3v32nqsMod)
+    {
+      vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec8m ){heres[0]->BSIM3v32qcheq, heres[1]->BSIM3v32qcheq, heres[2]->BSIM3v32qcheq, heres[3]->BSIM3v32qcheq, heres[4]->BSIM3v32qcheq, heres[5]->BSIM3v32qcheq, heres[6]->BSIM3v32qcheq, heres[7]->BSIM3v32qcheq}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32qcheq, heres[1]->BSIM3v32qcheq, heres[2]->BSIM3v32qcheq, heres[3]->BSIM3v32qcheq, heres[4]->BSIM3v32qcheq, heres[5]->BSIM3v32qcheq, heres[6]->BSIM3v32qcheq, heres[7]->BSIM3v32qcheq}));
+      vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec8m ){heres[0]->BSIM3v32qcdump, heres[1]->BSIM3v32qcdump, heres[2]->BSIM3v32qcdump, heres[3]->BSIM3v32qcdump, heres[4]->BSIM3v32qcdump, heres[5]->BSIM3v32qcdump, heres[6]->BSIM3v32qcdump, heres[7]->BSIM3v32qcdump}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32qcdump, heres[1]->BSIM3v32qcdump, heres[2]->BSIM3v32qcdump, heres[3]->BSIM3v32qcdump, heres[4]->BSIM3v32qcdump, heres[5]->BSIM3v32qcdump, heres[6]->BSIM3v32qcdump, heres[7]->BSIM3v32qcdump}));
+    }
+
+  }
+
+  error = vec8_NIintegrate(ckt, &geq, &ceq, 0.0, (Vec8m ){heres[0]->BSIM3v32qb, heres[1]->BSIM3v32qb, heres[2]->BSIM3v32qb, heres[3]->BSIM3v32qb, heres[4]->BSIM3v32qb, heres[5]->BSIM3v32qb, heres[6]->BSIM3v32qb, heres[7]->BSIM3v32qb});
+  if (SIMDANY(error))
+    return error;
+
+  error = vec8_NIintegrate(ckt, &geq, &ceq, 0.0, (Vec8m ){heres[0]->BSIM3v32qg, heres[1]->BSIM3v32qg, heres[2]->BSIM3v32qg, heres[3]->BSIM3v32qg, heres[4]->BSIM3v32qg, heres[5]->BSIM3v32qg, heres[6]->BSIM3v32qg, heres[7]->BSIM3v32qg});
+  if (SIMDANY(error))
+    return error;
+
+  error = vec8_NIintegrate(ckt, &geq, &ceq, 0.0, (Vec8m ){heres[0]->BSIM3v32qd, heres[1]->BSIM3v32qd, heres[2]->BSIM3v32qd, heres[3]->BSIM3v32qd, heres[4]->BSIM3v32qd, heres[5]->BSIM3v32qd, heres[6]->BSIM3v32qd, heres[7]->BSIM3v32qd});
+  if (SIMDANY(error))
+    return error;
+
+  if (heres[0]->BSIM3v32nqsMod)
+  {
+    error = vec8_NIintegrate(ckt, &geq, &ceq, 0.0, (Vec8m ){heres[0]->BSIM3v32qcdump, heres[1]->BSIM3v32qcdump, heres[2]->BSIM3v32qcdump, heres[3]->BSIM3v32qcdump, heres[4]->BSIM3v32qcdump, heres[5]->BSIM3v32qcdump, heres[6]->BSIM3v32qcdump, heres[7]->BSIM3v32qcdump});
+    if (SIMDANY(error))
+      return error;
+
+    error = vec8_NIintegrate(ckt, &geq, &ceq, 0.0, (Vec8m ){heres[0]->BSIM3v32qcheq, heres[1]->BSIM3v32qcheq, heres[2]->BSIM3v32qcheq, heres[3]->BSIM3v32qcheq, heres[4]->BSIM3v32qcheq, heres[5]->BSIM3v32qcheq, heres[6]->BSIM3v32qcheq, heres[7]->BSIM3v32qcheq});
+    if (SIMDANY(error))
+      return error;
+
+  }
+
+  goto line860;
+  line850:
+  ceqqg = (ceqqb = (ceqqd = vec8_SIMDTOVECTOR(0.0)));
+
+  cqcheq = (cqdef = vec8_SIMDTOVECTOR(0.0));
+  gcdgb = (gcddb = (gcdsb = vec8_SIMDTOVECTOR(0.0)));
+  gcsgb = (gcsdb = (gcssb = vec8_SIMDTOVECTOR(0.0)));
+  gcggb = (gcgdb = (gcgsb = vec8_SIMDTOVECTOR(0.0)));
+  gcbgb = (gcbdb = (gcbsb = vec8_SIMDTOVECTOR(0.0)));
+  gqdef = (gcqgb = (gcqdb = (gcqsb = (gcqbb = vec8_SIMDTOVECTOR(0.0)))));
+  ggtg = (ggtd = (ggtb = (ggts = vec8_SIMDTOVECTOR(0.0))));
+  dxpart = vec8_SIMDTOVECTOR(0.6);
+  if (1)
+  {
+    Vec8m condmask0 = BSIM3v32mode;
+    Vec8m condmask_true0 = condmask0;
+    dxpart = vec8_blend(dxpart, vec8_SIMDTOVECTOR(0.4), condmask_true0);
+  }
+
+  sxpart = 1.0 - dxpart;
+  ddxpart_dVd = (ddxpart_dVg = (ddxpart_dVb = (ddxpart_dVs = vec8_SIMDTOVECTOR(0.0))));
+  dsxpart_dVd = (dsxpart_dVg = (dsxpart_dVb = (dsxpart_dVs = vec8_SIMDTOVECTOR(0.0))));
+  if (heres[0]->BSIM3v32nqsMod)
+  {
+    Vec8d val = ((((16.0 * ((Vec8d ){heres[0]->BSIM3v32u0temp, heres[1]->BSIM3v32u0temp, heres[2]->BSIM3v32u0temp, heres[3]->BSIM3v32u0temp, heres[4]->BSIM3v32u0temp, heres[5]->BSIM3v32u0temp, heres[6]->BSIM3v32u0temp, heres[7]->BSIM3v32u0temp})) * model->BSIM3v32vtm) / pParam->BSIM3v32leffCV) / pParam->BSIM3v32leffCV) * ScalingFactor;
+    heres[0]->BSIM3v32gtau = val[0];
+    heres[1]->BSIM3v32gtau = val[1];
+    heres[2]->BSIM3v32gtau = val[2];
+    heres[3]->BSIM3v32gtau = val[3];
+    heres[4]->BSIM3v32gtau = val[4];
+    heres[5]->BSIM3v32gtau = val[5];
+    heres[6]->BSIM3v32gtau = val[6];
+    heres[7]->BSIM3v32gtau = val[7];
+  }
+  else
+  {
+    heres[0]->BSIM3v32gtau = 0.0;
+    heres[1]->BSIM3v32gtau = 0.0;
+    heres[2]->BSIM3v32gtau = 0.0;
+    heres[3]->BSIM3v32gtau = 0.0;
+    heres[4]->BSIM3v32gtau = 0.0;
+    heres[5]->BSIM3v32gtau = 0.0;
+    heres[6]->BSIM3v32gtau = 0.0;
+    heres[7]->BSIM3v32gtau = 0.0;
+  }
+
+  goto line900;
+  line860:
+  cqgate = vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32cqg, heres[1]->BSIM3v32cqg, heres[2]->BSIM3v32cqg, heres[3]->BSIM3v32cqg, heres[4]->BSIM3v32cqg, heres[5]->BSIM3v32cqg, heres[6]->BSIM3v32cqg, heres[7]->BSIM3v32cqg});
+
+  cqbulk = vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32cqb, heres[1]->BSIM3v32cqb, heres[2]->BSIM3v32cqb, heres[3]->BSIM3v32cqb, heres[4]->BSIM3v32cqb, heres[5]->BSIM3v32cqb, heres[6]->BSIM3v32cqb, heres[7]->BSIM3v32cqb});
+  cqdrn = vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32cqd, heres[1]->BSIM3v32cqd, heres[2]->BSIM3v32cqd, heres[3]->BSIM3v32cqd, heres[4]->BSIM3v32cqd, heres[5]->BSIM3v32cqd, heres[6]->BSIM3v32cqd, heres[7]->BSIM3v32cqd});
+  ceqqg = ((cqgate - (gcggb * vgb)) + (gcgdb * vbd)) + (gcgsb * vbs);
+  ceqqb = ((cqbulk - (gcbgb * vgb)) + (gcbdb * vbd)) + (gcbsb * vbs);
+  ceqqd = ((cqdrn - (gcdgb * vgb)) + (gcddb * vbd)) + (gcdsb * vbs);
+  if (heres[0]->BSIM3v32nqsMod)
+  {
+    T0 = ((ggtg * vgb) - (ggtd * vbd)) - (ggts * vbs);
+    ceqqg += T0;
+    T1 = qdef * ((Vec8d ){heres[0]->BSIM3v32gtau, heres[1]->BSIM3v32gtau, heres[2]->BSIM3v32gtau, heres[3]->BSIM3v32gtau, heres[4]->BSIM3v32gtau, heres[5]->BSIM3v32gtau, heres[6]->BSIM3v32gtau, heres[7]->BSIM3v32gtau});
+    ceqqd -= (dxpart * T0) + (T1 * (((ddxpart_dVg * vgb) - (ddxpart_dVd * vbd)) - (ddxpart_dVs * vbs)));
+    cqdef = vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32cqcdump, heres[1]->BSIM3v32cqcdump, heres[2]->BSIM3v32cqcdump, heres[3]->BSIM3v32cqcdump, heres[4]->BSIM3v32cqcdump, heres[5]->BSIM3v32cqcdump, heres[6]->BSIM3v32cqcdump, heres[7]->BSIM3v32cqcdump}) - (gqdef * qdef);
+    cqcheq = (vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32cqcheq, heres[1]->BSIM3v32cqcheq, heres[2]->BSIM3v32cqcheq, heres[3]->BSIM3v32cqcheq, heres[4]->BSIM3v32cqcheq, heres[5]->BSIM3v32cqcheq, heres[6]->BSIM3v32cqcheq, heres[7]->BSIM3v32cqcheq}) - (((gcqgb * vgb) - (gcqdb * vbd)) - (gcqsb * vbs))) + T0;
+  }
+
+  if (ckt->CKTmode & MODEINITTRAN)
+  {
+    vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec8m ){heres[0]->BSIM3v32cqb, heres[1]->BSIM3v32cqb, heres[2]->BSIM3v32cqb, heres[3]->BSIM3v32cqb, heres[4]->BSIM3v32cqb, heres[5]->BSIM3v32cqb, heres[6]->BSIM3v32cqb, heres[7]->BSIM3v32cqb}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32cqb, heres[1]->BSIM3v32cqb, heres[2]->BSIM3v32cqb, heres[3]->BSIM3v32cqb, heres[4]->BSIM3v32cqb, heres[5]->BSIM3v32cqb, heres[6]->BSIM3v32cqb, heres[7]->BSIM3v32cqb}));
+    vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec8m ){heres[0]->BSIM3v32cqg, heres[1]->BSIM3v32cqg, heres[2]->BSIM3v32cqg, heres[3]->BSIM3v32cqg, heres[4]->BSIM3v32cqg, heres[5]->BSIM3v32cqg, heres[6]->BSIM3v32cqg, heres[7]->BSIM3v32cqg}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32cqg, heres[1]->BSIM3v32cqg, heres[2]->BSIM3v32cqg, heres[3]->BSIM3v32cqg, heres[4]->BSIM3v32cqg, heres[5]->BSIM3v32cqg, heres[6]->BSIM3v32cqg, heres[7]->BSIM3v32cqg}));
+    vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec8m ){heres[0]->BSIM3v32cqd, heres[1]->BSIM3v32cqd, heres[2]->BSIM3v32cqd, heres[3]->BSIM3v32cqd, heres[4]->BSIM3v32cqd, heres[5]->BSIM3v32cqd, heres[6]->BSIM3v32cqd, heres[7]->BSIM3v32cqd}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32cqd, heres[1]->BSIM3v32cqd, heres[2]->BSIM3v32cqd, heres[3]->BSIM3v32cqd, heres[4]->BSIM3v32cqd, heres[5]->BSIM3v32cqd, heres[6]->BSIM3v32cqd, heres[7]->BSIM3v32cqd}));
+    if (heres[0]->BSIM3v32nqsMod)
+    {
+      vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec8m ){heres[0]->BSIM3v32cqcheq, heres[1]->BSIM3v32cqcheq, heres[2]->BSIM3v32cqcheq, heres[3]->BSIM3v32cqcheq, heres[4]->BSIM3v32cqcheq, heres[5]->BSIM3v32cqcheq, heres[6]->BSIM3v32cqcheq, heres[7]->BSIM3v32cqcheq}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32cqcheq, heres[1]->BSIM3v32cqcheq, heres[2]->BSIM3v32cqcheq, heres[3]->BSIM3v32cqcheq, heres[4]->BSIM3v32cqcheq, heres[5]->BSIM3v32cqcheq, heres[6]->BSIM3v32cqcheq, heres[7]->BSIM3v32cqcheq}));
+      vec4_BSIM3v32_StateStore(ckt->CKTstate1, (Vec8m ){heres[0]->BSIM3v32cqcdump, heres[1]->BSIM3v32cqcdump, heres[2]->BSIM3v32cqcdump, heres[3]->BSIM3v32cqcdump, heres[4]->BSIM3v32cqcdump, heres[5]->BSIM3v32cqcdump, heres[6]->BSIM3v32cqcdump, heres[7]->BSIM3v32cqcdump}, vec4_BSIM3v32_StateAccess(ckt->CKTstate0, (Vec8m ){heres[0]->BSIM3v32cqcdump, heres[1]->BSIM3v32cqcdump, heres[2]->BSIM3v32cqcdump, heres[3]->BSIM3v32cqcdump, heres[4]->BSIM3v32cqcdump, heres[5]->BSIM3v32cqcdump, heres[6]->BSIM3v32cqcdump, heres[7]->BSIM3v32cqcdump}));
+    }
+
+  }
+
+  line900:
+  ;
+
+  if (1)
+  {
+    Vec8m condmask0 = BSIM3v32mode;
+    Vec8m condmask_true0 = condmask0;
+    Vec8m condmask_false0 = ~condmask0;
+    {
+      Gm = vec8_blend(Gm, (Vec8d ){heres[0]->BSIM3v32gm, heres[1]->BSIM3v32gm, heres[2]->BSIM3v32gm, heres[3]->BSIM3v32gm, heres[4]->BSIM3v32gm, heres[5]->BSIM3v32gm, heres[6]->BSIM3v32gm, heres[7]->BSIM3v32gm}, condmask_true0);
+      Gmbs = vec8_blend(Gmbs, (Vec8d ){heres[0]->BSIM3v32gmbs, heres[1]->BSIM3v32gmbs, heres[2]->BSIM3v32gmbs, heres[3]->BSIM3v32gmbs, heres[4]->BSIM3v32gmbs, heres[5]->BSIM3v32gmbs, heres[6]->BSIM3v32gmbs, heres[7]->BSIM3v32gmbs}, condmask_true0);
+      FwdSum = vec8_blend(FwdSum, Gm + Gmbs, condmask_true0);
+      RevSum = vec8_blend(RevSum, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+      cdreq = vec8_blend(cdreq, model->BSIM3v32type * (((cdrain - (((Vec8d ){heres[0]->BSIM3v32gds, heres[1]->BSIM3v32gds, heres[2]->BSIM3v32gds, heres[3]->BSIM3v32gds, heres[4]->BSIM3v32gds, heres[5]->BSIM3v32gds, heres[6]->BSIM3v32gds, heres[7]->BSIM3v32gds}) * vds)) - (Gm * vgs)) - (Gmbs * vbs)), condmask_true0);
+      ceqbd = vec8_blend(ceqbd, (-model->BSIM3v32type) * (((((Vec8d ){heres[0]->BSIM3v32csub, heres[1]->BSIM3v32csub, heres[2]->BSIM3v32csub, heres[3]->BSIM3v32csub, heres[4]->BSIM3v32csub, heres[5]->BSIM3v32csub, heres[6]->BSIM3v32csub, heres[7]->BSIM3v32csub}) - (((Vec8d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds, heres[4]->BSIM3v32gbds, heres[5]->BSIM3v32gbds, heres[6]->BSIM3v32gbds, heres[7]->BSIM3v32gbds}) * vds)) - (((Vec8d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs, heres[4]->BSIM3v32gbgs, heres[5]->BSIM3v32gbgs, heres[6]->BSIM3v32gbgs, heres[7]->BSIM3v32gbgs}) * vgs)) - (((Vec8d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs, heres[4]->BSIM3v32gbbs, heres[5]->BSIM3v32gbbs, heres[6]->BSIM3v32gbbs, heres[7]->BSIM3v32gbbs}) * vbs)), condmask_true0);
+      ceqbs = vec8_blend(ceqbs, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+      gbbdp = vec8_blend(gbbdp, -((Vec8d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds, heres[4]->BSIM3v32gbds, heres[5]->BSIM3v32gbds, heres[6]->BSIM3v32gbds, heres[7]->BSIM3v32gbds}), condmask_true0);
+      gbbsp = vec8_blend(gbbsp, (((Vec8d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds, heres[4]->BSIM3v32gbds, heres[5]->BSIM3v32gbds, heres[6]->BSIM3v32gbds, heres[7]->BSIM3v32gbds}) + ((Vec8d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs, heres[4]->BSIM3v32gbgs, heres[5]->BSIM3v32gbgs, heres[6]->BSIM3v32gbgs, heres[7]->BSIM3v32gbgs})) + ((Vec8d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs, heres[4]->BSIM3v32gbbs, heres[5]->BSIM3v32gbbs, heres[6]->BSIM3v32gbbs, heres[7]->BSIM3v32gbbs}), condmask_true0);
+      gbdpg = vec8_blend(gbdpg, (Vec8d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs, heres[4]->BSIM3v32gbgs, heres[5]->BSIM3v32gbgs, heres[6]->BSIM3v32gbgs, heres[7]->BSIM3v32gbgs}, condmask_true0);
+      gbdpdp = vec8_blend(gbdpdp, (Vec8d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds, heres[4]->BSIM3v32gbds, heres[5]->BSIM3v32gbds, heres[6]->BSIM3v32gbds, heres[7]->BSIM3v32gbds}, condmask_true0);
+      gbdpb = vec8_blend(gbdpb, (Vec8d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs, heres[4]->BSIM3v32gbbs, heres[5]->BSIM3v32gbbs, heres[6]->BSIM3v32gbbs, heres[7]->BSIM3v32gbbs}, condmask_true0);
+      gbdpsp = vec8_blend(gbdpsp, -((gbdpg + gbdpdp) + gbdpb), condmask_true0);
+      gbspg = vec8_blend(gbspg, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+      gbspdp = vec8_blend(gbspdp, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+      gbspb = vec8_blend(gbspb, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+      gbspsp = vec8_blend(gbspsp, vec8_SIMDTOVECTOR(0.0), condmask_true0);
+    }
+    {
+      Gm = vec8_blend(Gm, -((Vec8d ){heres[0]->BSIM3v32gm, heres[1]->BSIM3v32gm, heres[2]->BSIM3v32gm, heres[3]->BSIM3v32gm, heres[4]->BSIM3v32gm, heres[5]->BSIM3v32gm, heres[6]->BSIM3v32gm, heres[7]->BSIM3v32gm}), condmask_false0);
+      Gmbs = vec8_blend(Gmbs, -((Vec8d ){heres[0]->BSIM3v32gmbs, heres[1]->BSIM3v32gmbs, heres[2]->BSIM3v32gmbs, heres[3]->BSIM3v32gmbs, heres[4]->BSIM3v32gmbs, heres[5]->BSIM3v32gmbs, heres[6]->BSIM3v32gmbs, heres[7]->BSIM3v32gmbs}), condmask_false0);
+      FwdSum = vec8_blend(FwdSum, vec8_SIMDTOVECTOR(0.0), condmask_false0);
+      RevSum = vec8_blend(RevSum, -(Gm + Gmbs), condmask_false0);
+      cdreq = vec8_blend(cdreq, (-model->BSIM3v32type) * (((cdrain + (((Vec8d ){heres[0]->BSIM3v32gds, heres[1]->BSIM3v32gds, heres[2]->BSIM3v32gds, heres[3]->BSIM3v32gds, heres[4]->BSIM3v32gds, heres[5]->BSIM3v32gds, heres[6]->BSIM3v32gds, heres[7]->BSIM3v32gds}) * vds)) + (Gm * vgd)) + (Gmbs * vbd)), condmask_false0);
+      ceqbs = vec8_blend(ceqbs, (-model->BSIM3v32type) * (((((Vec8d ){heres[0]->BSIM3v32csub, heres[1]->BSIM3v32csub, heres[2]->BSIM3v32csub, heres[3]->BSIM3v32csub, heres[4]->BSIM3v32csub, heres[5]->BSIM3v32csub, heres[6]->BSIM3v32csub, heres[7]->BSIM3v32csub}) + (((Vec8d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds, heres[4]->BSIM3v32gbds, heres[5]->BSIM3v32gbds, heres[6]->BSIM3v32gbds, heres[7]->BSIM3v32gbds}) * vds)) - (((Vec8d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs, heres[4]->BSIM3v32gbgs, heres[5]->BSIM3v32gbgs, heres[6]->BSIM3v32gbgs, heres[7]->BSIM3v32gbgs}) * vgd)) - (((Vec8d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs, heres[4]->BSIM3v32gbbs, heres[5]->BSIM3v32gbbs, heres[6]->BSIM3v32gbbs, heres[7]->BSIM3v32gbbs}) * vbd)), condmask_false0);
+      ceqbd = vec8_blend(ceqbd, vec8_SIMDTOVECTOR(0.0), condmask_false0);
+      gbbsp = vec8_blend(gbbsp, -((Vec8d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds, heres[4]->BSIM3v32gbds, heres[5]->BSIM3v32gbds, heres[6]->BSIM3v32gbds, heres[7]->BSIM3v32gbds}), condmask_false0);
+      gbbdp = vec8_blend(gbbdp, (((Vec8d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds, heres[4]->BSIM3v32gbds, heres[5]->BSIM3v32gbds, heres[6]->BSIM3v32gbds, heres[7]->BSIM3v32gbds}) + ((Vec8d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs, heres[4]->BSIM3v32gbgs, heres[5]->BSIM3v32gbgs, heres[6]->BSIM3v32gbgs, heres[7]->BSIM3v32gbgs})) + ((Vec8d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs, heres[4]->BSIM3v32gbbs, heres[5]->BSIM3v32gbbs, heres[6]->BSIM3v32gbbs, heres[7]->BSIM3v32gbbs}), condmask_false0);
+      gbdpg = vec8_blend(gbdpg, vec8_SIMDTOVECTOR(0.0), condmask_false0);
+      gbdpsp = vec8_blend(gbdpsp, vec8_SIMDTOVECTOR(0.0), condmask_false0);
+      gbdpb = vec8_blend(gbdpb, vec8_SIMDTOVECTOR(0.0), condmask_false0);
+      gbdpdp = vec8_blend(gbdpdp, vec8_SIMDTOVECTOR(0.0), condmask_false0);
+      gbspg = vec8_blend(gbspg, (Vec8d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs, heres[4]->BSIM3v32gbgs, heres[5]->BSIM3v32gbgs, heres[6]->BSIM3v32gbgs, heres[7]->BSIM3v32gbgs}, condmask_false0);
+      gbspsp = vec8_blend(gbspsp, (Vec8d ){heres[0]->BSIM3v32gbds, heres[1]->BSIM3v32gbds, heres[2]->BSIM3v32gbds, heres[3]->BSIM3v32gbds, heres[4]->BSIM3v32gbds, heres[5]->BSIM3v32gbds, heres[6]->BSIM3v32gbds, heres[7]->BSIM3v32gbds}, condmask_false0);
+      gbspb = vec8_blend(gbspb, (Vec8d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs, heres[4]->BSIM3v32gbbs, heres[5]->BSIM3v32gbbs, heres[6]->BSIM3v32gbbs, heres[7]->BSIM3v32gbbs}, condmask_false0);
+      gbspdp = vec8_blend(gbspdp, -((gbspg + gbspsp) + gbspb), condmask_false0);
+    }
+  }
+
+  if (model->BSIM3v32type > 0)
+  {
+    ceqbs += ((Vec8d ){heres[0]->BSIM3v32cbs, heres[1]->BSIM3v32cbs, heres[2]->BSIM3v32cbs, heres[3]->BSIM3v32cbs, heres[4]->BSIM3v32cbs, heres[5]->BSIM3v32cbs, heres[6]->BSIM3v32cbs, heres[7]->BSIM3v32cbs}) - (((Vec8d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs, heres[4]->BSIM3v32gbs, heres[5]->BSIM3v32gbs, heres[6]->BSIM3v32gbs, heres[7]->BSIM3v32gbs}) * vbs);
+    ceqbd += ((Vec8d ){heres[0]->BSIM3v32cbd, heres[1]->BSIM3v32cbd, heres[2]->BSIM3v32cbd, heres[3]->BSIM3v32cbd, heres[4]->BSIM3v32cbd, heres[5]->BSIM3v32cbd, heres[6]->BSIM3v32cbd, heres[7]->BSIM3v32cbd}) - (((Vec8d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd, heres[4]->BSIM3v32gbd, heres[5]->BSIM3v32gbd, heres[6]->BSIM3v32gbd, heres[7]->BSIM3v32gbd}) * vbd);
+  }
+  else
+  {
+    ceqbs -= ((Vec8d ){heres[0]->BSIM3v32cbs, heres[1]->BSIM3v32cbs, heres[2]->BSIM3v32cbs, heres[3]->BSIM3v32cbs, heres[4]->BSIM3v32cbs, heres[5]->BSIM3v32cbs, heres[6]->BSIM3v32cbs, heres[7]->BSIM3v32cbs}) - (((Vec8d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs, heres[4]->BSIM3v32gbs, heres[5]->BSIM3v32gbs, heres[6]->BSIM3v32gbs, heres[7]->BSIM3v32gbs}) * vbs);
+    ceqbd -= ((Vec8d ){heres[0]->BSIM3v32cbd, heres[1]->BSIM3v32cbd, heres[2]->BSIM3v32cbd, heres[3]->BSIM3v32cbd, heres[4]->BSIM3v32cbd, heres[5]->BSIM3v32cbd, heres[6]->BSIM3v32cbd, heres[7]->BSIM3v32cbd}) - (((Vec8d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd, heres[4]->BSIM3v32gbd, heres[5]->BSIM3v32gbd, heres[6]->BSIM3v32gbd, heres[7]->BSIM3v32gbd}) * vbd);
+    ceqqg = -ceqqg;
+    ceqqb = -ceqqb;
+    ceqqd = -ceqqd;
+    cqdef = -cqdef;
+    cqcheq = -cqcheq;
+  }
+
+  m = (Vec8d ){heres[0]->BSIM3v32m, heres[1]->BSIM3v32m, heres[2]->BSIM3v32m, heres[3]->BSIM3v32m, heres[4]->BSIM3v32m, heres[5]->BSIM3v32m, heres[6]->BSIM3v32m, heres[7]->BSIM3v32m};
+  vec4_BSIM3v32_StateSub(ckt->CKTrhs, (Vec8m ){heres[0]->BSIM3v32gNode, heres[1]->BSIM3v32gNode, heres[2]->BSIM3v32gNode, heres[3]->BSIM3v32gNode, heres[4]->BSIM3v32gNode, heres[5]->BSIM3v32gNode, heres[6]->BSIM3v32gNode, heres[7]->BSIM3v32gNode}, m * ceqqg);
+  vec4_BSIM3v32_StateSub(ckt->CKTrhs, (Vec8m ){heres[0]->BSIM3v32bNode, heres[1]->BSIM3v32bNode, heres[2]->BSIM3v32bNode, heres[3]->BSIM3v32bNode, heres[4]->BSIM3v32bNode, heres[5]->BSIM3v32bNode, heres[6]->BSIM3v32bNode, heres[7]->BSIM3v32bNode}, m * ((ceqbs + ceqbd) + ceqqb));
+  vec4_BSIM3v32_StateAdd(ckt->CKTrhs, (Vec8m ){heres[0]->BSIM3v32dNodePrime, heres[1]->BSIM3v32dNodePrime, heres[2]->BSIM3v32dNodePrime, heres[3]->BSIM3v32dNodePrime, heres[4]->BSIM3v32dNodePrime, heres[5]->BSIM3v32dNodePrime, heres[6]->BSIM3v32dNodePrime, heres[7]->BSIM3v32dNodePrime}, m * ((ceqbd - cdreq) - ceqqd));
+  vec4_BSIM3v32_StateAdd(ckt->CKTrhs, (Vec8m ){heres[0]->BSIM3v32sNodePrime, heres[1]->BSIM3v32sNodePrime, heres[2]->BSIM3v32sNodePrime, heres[3]->BSIM3v32sNodePrime, heres[4]->BSIM3v32sNodePrime, heres[5]->BSIM3v32sNodePrime, heres[6]->BSIM3v32sNodePrime, heres[7]->BSIM3v32sNodePrime}, m * ((((cdreq + ceqbs) + ceqqg) + ceqqb) + ceqqd));
+  if (heres[0]->BSIM3v32nqsMod)
+    vec4_BSIM3v32_StateAdd(ckt->CKTrhs, (Vec8m ){heres[0]->BSIM3v32qNode, heres[1]->BSIM3v32qNode, heres[2]->BSIM3v32qNode, heres[3]->BSIM3v32qNode, heres[4]->BSIM3v32qNode, heres[5]->BSIM3v32qNode, heres[6]->BSIM3v32qNode, heres[7]->BSIM3v32qNode}, m * (cqcheq - cqdef));
+
+  T1 = qdef * ((Vec8d ){heres[0]->BSIM3v32gtau, heres[1]->BSIM3v32gtau, heres[2]->BSIM3v32gtau, heres[3]->BSIM3v32gtau, heres[4]->BSIM3v32gtau, heres[5]->BSIM3v32gtau, heres[6]->BSIM3v32gtau, heres[7]->BSIM3v32gtau});
+  {
+    Vec8d val = m * ((Vec8d ){heres[0]->BSIM3v32drainConductance, heres[1]->BSIM3v32drainConductance, heres[2]->BSIM3v32drainConductance, heres[3]->BSIM3v32drainConductance, heres[4]->BSIM3v32drainConductance, heres[5]->BSIM3v32drainConductance, heres[6]->BSIM3v32drainConductance, heres[7]->BSIM3v32drainConductance});
+    *heres[0]->BSIM3v32DdPtr += val[0];
+    *heres[1]->BSIM3v32DdPtr += val[1];
+    *heres[2]->BSIM3v32DdPtr += val[2];
+    *heres[3]->BSIM3v32DdPtr += val[3];
+    *heres[4]->BSIM3v32DdPtr += val[4];
+    *heres[5]->BSIM3v32DdPtr += val[5];
+    *heres[6]->BSIM3v32DdPtr += val[6];
+    *heres[7]->BSIM3v32DdPtr += val[7];
+  }
+  {
+    Vec8d val = m * (gcggb - ggtg);
+    *heres[0]->BSIM3v32GgPtr += val[0];
+    *heres[1]->BSIM3v32GgPtr += val[1];
+    *heres[2]->BSIM3v32GgPtr += val[2];
+    *heres[3]->BSIM3v32GgPtr += val[3];
+    *heres[4]->BSIM3v32GgPtr += val[4];
+    *heres[5]->BSIM3v32GgPtr += val[5];
+    *heres[6]->BSIM3v32GgPtr += val[6];
+    *heres[7]->BSIM3v32GgPtr += val[7];
+  }
+  {
+    Vec8d val = m * ((Vec8d ){heres[0]->BSIM3v32sourceConductance, heres[1]->BSIM3v32sourceConductance, heres[2]->BSIM3v32sourceConductance, heres[3]->BSIM3v32sourceConductance, heres[4]->BSIM3v32sourceConductance, heres[5]->BSIM3v32sourceConductance, heres[6]->BSIM3v32sourceConductance, heres[7]->BSIM3v32sourceConductance});
+    *heres[0]->BSIM3v32SsPtr += val[0];
+    *heres[1]->BSIM3v32SsPtr += val[1];
+    *heres[2]->BSIM3v32SsPtr += val[2];
+    *heres[3]->BSIM3v32SsPtr += val[3];
+    *heres[4]->BSIM3v32SsPtr += val[4];
+    *heres[5]->BSIM3v32SsPtr += val[5];
+    *heres[6]->BSIM3v32SsPtr += val[6];
+    *heres[7]->BSIM3v32SsPtr += val[7];
+  }
+  {
+    Vec8d val = m * (((((((Vec8d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd, heres[4]->BSIM3v32gbd, heres[5]->BSIM3v32gbd, heres[6]->BSIM3v32gbd, heres[7]->BSIM3v32gbd}) + ((Vec8d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs, heres[4]->BSIM3v32gbs, heres[5]->BSIM3v32gbs, heres[6]->BSIM3v32gbs, heres[7]->BSIM3v32gbs})) - gcbgb) - gcbdb) - gcbsb) - ((Vec8d ){heres[0]->BSIM3v32gbbs, heres[1]->BSIM3v32gbbs, heres[2]->BSIM3v32gbbs, heres[3]->BSIM3v32gbbs, heres[4]->BSIM3v32gbbs, heres[5]->BSIM3v32gbbs, heres[6]->BSIM3v32gbbs, heres[7]->BSIM3v32gbbs}));
+    *heres[0]->BSIM3v32BbPtr += val[0];
+    *heres[1]->BSIM3v32BbPtr += val[1];
+    *heres[2]->BSIM3v32BbPtr += val[2];
+    *heres[3]->BSIM3v32BbPtr += val[3];
+    *heres[4]->BSIM3v32BbPtr += val[4];
+    *heres[5]->BSIM3v32BbPtr += val[5];
+    *heres[6]->BSIM3v32BbPtr += val[6];
+    *heres[7]->BSIM3v32BbPtr += val[7];
+  }
+  {
+    Vec8d val = m * (((((((((Vec8d ){heres[0]->BSIM3v32drainConductance, heres[1]->BSIM3v32drainConductance, heres[2]->BSIM3v32drainConductance, heres[3]->BSIM3v32drainConductance, heres[4]->BSIM3v32drainConductance, heres[5]->BSIM3v32drainConductance, heres[6]->BSIM3v32drainConductance, heres[7]->BSIM3v32drainConductance}) + ((Vec8d ){heres[0]->BSIM3v32gds, heres[1]->BSIM3v32gds, heres[2]->BSIM3v32gds, heres[3]->BSIM3v32gds, heres[4]->BSIM3v32gds, heres[5]->BSIM3v32gds, heres[6]->BSIM3v32gds, heres[7]->BSIM3v32gds})) + ((Vec8d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd, heres[4]->BSIM3v32gbd, heres[5]->BSIM3v32gbd, heres[6]->BSIM3v32gbd, heres[7]->BSIM3v32gbd})) + RevSum) + gcddb) + (dxpart * ggtd)) + (T1 * ddxpart_dVd)) + gbdpdp);
+    *heres[0]->BSIM3v32DPdpPtr += val[0];
+    *heres[1]->BSIM3v32DPdpPtr += val[1];
+    *heres[2]->BSIM3v32DPdpPtr += val[2];
+    *heres[3]->BSIM3v32DPdpPtr += val[3];
+    *heres[4]->BSIM3v32DPdpPtr += val[4];
+    *heres[5]->BSIM3v32DPdpPtr += val[5];
+    *heres[6]->BSIM3v32DPdpPtr += val[6];
+    *heres[7]->BSIM3v32DPdpPtr += val[7];
+  }
+  {
+    Vec8d val = m * (((((((((Vec8d ){heres[0]->BSIM3v32sourceConductance, heres[1]->BSIM3v32sourceConductance, heres[2]->BSIM3v32sourceConductance, heres[3]->BSIM3v32sourceConductance, heres[4]->BSIM3v32sourceConductance, heres[5]->BSIM3v32sourceConductance, heres[6]->BSIM3v32sourceConductance, heres[7]->BSIM3v32sourceConductance}) + ((Vec8d ){heres[0]->BSIM3v32gds, heres[1]->BSIM3v32gds, heres[2]->BSIM3v32gds, heres[3]->BSIM3v32gds, heres[4]->BSIM3v32gds, heres[5]->BSIM3v32gds, heres[6]->BSIM3v32gds, heres[7]->BSIM3v32gds})) + ((Vec8d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs, heres[4]->BSIM3v32gbs, heres[5]->BSIM3v32gbs, heres[6]->BSIM3v32gbs, heres[7]->BSIM3v32gbs})) + FwdSum) + gcssb) + (sxpart * ggts)) + (T1 * dsxpart_dVs)) + gbspsp);
+    *heres[0]->BSIM3v32SPspPtr += val[0];
+    *heres[1]->BSIM3v32SPspPtr += val[1];
+    *heres[2]->BSIM3v32SPspPtr += val[2];
+    *heres[3]->BSIM3v32SPspPtr += val[3];
+    *heres[4]->BSIM3v32SPspPtr += val[4];
+    *heres[5]->BSIM3v32SPspPtr += val[5];
+    *heres[6]->BSIM3v32SPspPtr += val[6];
+    *heres[7]->BSIM3v32SPspPtr += val[7];
+  }
+  {
+    Vec8d val = m * ((Vec8d ){heres[0]->BSIM3v32drainConductance, heres[1]->BSIM3v32drainConductance, heres[2]->BSIM3v32drainConductance, heres[3]->BSIM3v32drainConductance, heres[4]->BSIM3v32drainConductance, heres[5]->BSIM3v32drainConductance, heres[6]->BSIM3v32drainConductance, heres[7]->BSIM3v32drainConductance});
+    *heres[0]->BSIM3v32DdpPtr -= val[0];
+    *heres[1]->BSIM3v32DdpPtr -= val[1];
+    *heres[2]->BSIM3v32DdpPtr -= val[2];
+    *heres[3]->BSIM3v32DdpPtr -= val[3];
+    *heres[4]->BSIM3v32DdpPtr -= val[4];
+    *heres[5]->BSIM3v32DdpPtr -= val[5];
+    *heres[6]->BSIM3v32DdpPtr -= val[6];
+    *heres[7]->BSIM3v32DdpPtr -= val[7];
+  }
+  {
+    Vec8d val = m * (((gcggb + gcgdb) + gcgsb) + ggtb);
+    *heres[0]->BSIM3v32GbPtr -= val[0];
+    *heres[1]->BSIM3v32GbPtr -= val[1];
+    *heres[2]->BSIM3v32GbPtr -= val[2];
+    *heres[3]->BSIM3v32GbPtr -= val[3];
+    *heres[4]->BSIM3v32GbPtr -= val[4];
+    *heres[5]->BSIM3v32GbPtr -= val[5];
+    *heres[6]->BSIM3v32GbPtr -= val[6];
+    *heres[7]->BSIM3v32GbPtr -= val[7];
+  }
+  {
+    Vec8d val = m * (gcgdb - ggtd);
+    *heres[0]->BSIM3v32GdpPtr += val[0];
+    *heres[1]->BSIM3v32GdpPtr += val[1];
+    *heres[2]->BSIM3v32GdpPtr += val[2];
+    *heres[3]->BSIM3v32GdpPtr += val[3];
+    *heres[4]->BSIM3v32GdpPtr += val[4];
+    *heres[5]->BSIM3v32GdpPtr += val[5];
+    *heres[6]->BSIM3v32GdpPtr += val[6];
+    *heres[7]->BSIM3v32GdpPtr += val[7];
+  }
+  {
+    Vec8d val = m * (gcgsb - ggts);
+    *heres[0]->BSIM3v32GspPtr += val[0];
+    *heres[1]->BSIM3v32GspPtr += val[1];
+    *heres[2]->BSIM3v32GspPtr += val[2];
+    *heres[3]->BSIM3v32GspPtr += val[3];
+    *heres[4]->BSIM3v32GspPtr += val[4];
+    *heres[5]->BSIM3v32GspPtr += val[5];
+    *heres[6]->BSIM3v32GspPtr += val[6];
+    *heres[7]->BSIM3v32GspPtr += val[7];
+  }
+  {
+    Vec8d val = m * ((Vec8d ){heres[0]->BSIM3v32sourceConductance, heres[1]->BSIM3v32sourceConductance, heres[2]->BSIM3v32sourceConductance, heres[3]->BSIM3v32sourceConductance, heres[4]->BSIM3v32sourceConductance, heres[5]->BSIM3v32sourceConductance, heres[6]->BSIM3v32sourceConductance, heres[7]->BSIM3v32sourceConductance});
+    *heres[0]->BSIM3v32SspPtr -= val[0];
+    *heres[1]->BSIM3v32SspPtr -= val[1];
+    *heres[2]->BSIM3v32SspPtr -= val[2];
+    *heres[3]->BSIM3v32SspPtr -= val[3];
+    *heres[4]->BSIM3v32SspPtr -= val[4];
+    *heres[5]->BSIM3v32SspPtr -= val[5];
+    *heres[6]->BSIM3v32SspPtr -= val[6];
+    *heres[7]->BSIM3v32SspPtr -= val[7];
+  }
+  {
+    Vec8d val = m * (gcbgb - ((Vec8d ){heres[0]->BSIM3v32gbgs, heres[1]->BSIM3v32gbgs, heres[2]->BSIM3v32gbgs, heres[3]->BSIM3v32gbgs, heres[4]->BSIM3v32gbgs, heres[5]->BSIM3v32gbgs, heres[6]->BSIM3v32gbgs, heres[7]->BSIM3v32gbgs}));
+    *heres[0]->BSIM3v32BgPtr += val[0];
+    *heres[1]->BSIM3v32BgPtr += val[1];
+    *heres[2]->BSIM3v32BgPtr += val[2];
+    *heres[3]->BSIM3v32BgPtr += val[3];
+    *heres[4]->BSIM3v32BgPtr += val[4];
+    *heres[5]->BSIM3v32BgPtr += val[5];
+    *heres[6]->BSIM3v32BgPtr += val[6];
+    *heres[7]->BSIM3v32BgPtr += val[7];
+  }
+  {
+    Vec8d val = m * ((gcbdb - ((Vec8d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd, heres[4]->BSIM3v32gbd, heres[5]->BSIM3v32gbd, heres[6]->BSIM3v32gbd, heres[7]->BSIM3v32gbd})) + gbbdp);
+    *heres[0]->BSIM3v32BdpPtr += val[0];
+    *heres[1]->BSIM3v32BdpPtr += val[1];
+    *heres[2]->BSIM3v32BdpPtr += val[2];
+    *heres[3]->BSIM3v32BdpPtr += val[3];
+    *heres[4]->BSIM3v32BdpPtr += val[4];
+    *heres[5]->BSIM3v32BdpPtr += val[5];
+    *heres[6]->BSIM3v32BdpPtr += val[6];
+    *heres[7]->BSIM3v32BdpPtr += val[7];
+  }
+  {
+    Vec8d val = m * ((gcbsb - ((Vec8d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs, heres[4]->BSIM3v32gbs, heres[5]->BSIM3v32gbs, heres[6]->BSIM3v32gbs, heres[7]->BSIM3v32gbs})) + gbbsp);
+    *heres[0]->BSIM3v32BspPtr += val[0];
+    *heres[1]->BSIM3v32BspPtr += val[1];
+    *heres[2]->BSIM3v32BspPtr += val[2];
+    *heres[3]->BSIM3v32BspPtr += val[3];
+    *heres[4]->BSIM3v32BspPtr += val[4];
+    *heres[5]->BSIM3v32BspPtr += val[5];
+    *heres[6]->BSIM3v32BspPtr += val[6];
+    *heres[7]->BSIM3v32BspPtr += val[7];
+  }
+  {
+    Vec8d val = m * ((Vec8d ){heres[0]->BSIM3v32drainConductance, heres[1]->BSIM3v32drainConductance, heres[2]->BSIM3v32drainConductance, heres[3]->BSIM3v32drainConductance, heres[4]->BSIM3v32drainConductance, heres[5]->BSIM3v32drainConductance, heres[6]->BSIM3v32drainConductance, heres[7]->BSIM3v32drainConductance});
+    *heres[0]->BSIM3v32DPdPtr -= val[0];
+    *heres[1]->BSIM3v32DPdPtr -= val[1];
+    *heres[2]->BSIM3v32DPdPtr -= val[2];
+    *heres[3]->BSIM3v32DPdPtr -= val[3];
+    *heres[4]->BSIM3v32DPdPtr -= val[4];
+    *heres[5]->BSIM3v32DPdPtr -= val[5];
+    *heres[6]->BSIM3v32DPdPtr -= val[6];
+    *heres[7]->BSIM3v32DPdPtr -= val[7];
+  }
+  {
+    Vec8d val = m * ((((Gm + gcdgb) + (dxpart * ggtg)) + (T1 * ddxpart_dVg)) + gbdpg);
+    *heres[0]->BSIM3v32DPgPtr += val[0];
+    *heres[1]->BSIM3v32DPgPtr += val[1];
+    *heres[2]->BSIM3v32DPgPtr += val[2];
+    *heres[3]->BSIM3v32DPgPtr += val[3];
+    *heres[4]->BSIM3v32DPgPtr += val[4];
+    *heres[5]->BSIM3v32DPgPtr += val[5];
+    *heres[6]->BSIM3v32DPgPtr += val[6];
+    *heres[7]->BSIM3v32DPgPtr += val[7];
+  }
+  {
+    Vec8d val = m * (((((((((Vec8d ){heres[0]->BSIM3v32gbd, heres[1]->BSIM3v32gbd, heres[2]->BSIM3v32gbd, heres[3]->BSIM3v32gbd, heres[4]->BSIM3v32gbd, heres[5]->BSIM3v32gbd, heres[6]->BSIM3v32gbd, heres[7]->BSIM3v32gbd}) - Gmbs) + gcdgb) + gcddb) + gcdsb) - (dxpart * ggtb)) - (T1 * ddxpart_dVb)) - gbdpb);
+    *heres[0]->BSIM3v32DPbPtr -= val[0];
+    *heres[1]->BSIM3v32DPbPtr -= val[1];
+    *heres[2]->BSIM3v32DPbPtr -= val[2];
+    *heres[3]->BSIM3v32DPbPtr -= val[3];
+    *heres[4]->BSIM3v32DPbPtr -= val[4];
+    *heres[5]->BSIM3v32DPbPtr -= val[5];
+    *heres[6]->BSIM3v32DPbPtr -= val[6];
+    *heres[7]->BSIM3v32DPbPtr -= val[7];
+  }
+  {
+    Vec8d val = m * (((((((Vec8d ){heres[0]->BSIM3v32gds, heres[1]->BSIM3v32gds, heres[2]->BSIM3v32gds, heres[3]->BSIM3v32gds, heres[4]->BSIM3v32gds, heres[5]->BSIM3v32gds, heres[6]->BSIM3v32gds, heres[7]->BSIM3v32gds}) + FwdSum) - gcdsb) - (dxpart * ggts)) - (T1 * ddxpart_dVs)) - gbdpsp);
+    *heres[0]->BSIM3v32DPspPtr -= val[0];
+    *heres[1]->BSIM3v32DPspPtr -= val[1];
+    *heres[2]->BSIM3v32DPspPtr -= val[2];
+    *heres[3]->BSIM3v32DPspPtr -= val[3];
+    *heres[4]->BSIM3v32DPspPtr -= val[4];
+    *heres[5]->BSIM3v32DPspPtr -= val[5];
+    *heres[6]->BSIM3v32DPspPtr -= val[6];
+    *heres[7]->BSIM3v32DPspPtr -= val[7];
+  }
+  {
+    Vec8d val = m * ((((gcsgb - Gm) + (sxpart * ggtg)) + (T1 * dsxpart_dVg)) + gbspg);
+    *heres[0]->BSIM3v32SPgPtr += val[0];
+    *heres[1]->BSIM3v32SPgPtr += val[1];
+    *heres[2]->BSIM3v32SPgPtr += val[2];
+    *heres[3]->BSIM3v32SPgPtr += val[3];
+    *heres[4]->BSIM3v32SPgPtr += val[4];
+    *heres[5]->BSIM3v32SPgPtr += val[5];
+    *heres[6]->BSIM3v32SPgPtr += val[6];
+    *heres[7]->BSIM3v32SPgPtr += val[7];
+  }
+  {
+    Vec8d val = m * ((Vec8d ){heres[0]->BSIM3v32sourceConductance, heres[1]->BSIM3v32sourceConductance, heres[2]->BSIM3v32sourceConductance, heres[3]->BSIM3v32sourceConductance, heres[4]->BSIM3v32sourceConductance, heres[5]->BSIM3v32sourceConductance, heres[6]->BSIM3v32sourceConductance, heres[7]->BSIM3v32sourceConductance});
+    *heres[0]->BSIM3v32SPsPtr -= val[0];
+    *heres[1]->BSIM3v32SPsPtr -= val[1];
+    *heres[2]->BSIM3v32SPsPtr -= val[2];
+    *heres[3]->BSIM3v32SPsPtr -= val[3];
+    *heres[4]->BSIM3v32SPsPtr -= val[4];
+    *heres[5]->BSIM3v32SPsPtr -= val[5];
+    *heres[6]->BSIM3v32SPsPtr -= val[6];
+    *heres[7]->BSIM3v32SPsPtr -= val[7];
+  }
+  {
+    Vec8d val = m * (((((((((Vec8d ){heres[0]->BSIM3v32gbs, heres[1]->BSIM3v32gbs, heres[2]->BSIM3v32gbs, heres[3]->BSIM3v32gbs, heres[4]->BSIM3v32gbs, heres[5]->BSIM3v32gbs, heres[6]->BSIM3v32gbs, heres[7]->BSIM3v32gbs}) + Gmbs) + gcsgb) + gcsdb) + gcssb) - (sxpart * ggtb)) - (T1 * dsxpart_dVb)) - gbspb);
+    *heres[0]->BSIM3v32SPbPtr -= val[0];
+    *heres[1]->BSIM3v32SPbPtr -= val[1];
+    *heres[2]->BSIM3v32SPbPtr -= val[2];
+    *heres[3]->BSIM3v32SPbPtr -= val[3];
+    *heres[4]->BSIM3v32SPbPtr -= val[4];
+    *heres[5]->BSIM3v32SPbPtr -= val[5];
+    *heres[6]->BSIM3v32SPbPtr -= val[6];
+    *heres[7]->BSIM3v32SPbPtr -= val[7];
+  }
+  {
+    Vec8d val = m * (((((((Vec8d ){heres[0]->BSIM3v32gds, heres[1]->BSIM3v32gds, heres[2]->BSIM3v32gds, heres[3]->BSIM3v32gds, heres[4]->BSIM3v32gds, heres[5]->BSIM3v32gds, heres[6]->BSIM3v32gds, heres[7]->BSIM3v32gds}) + RevSum) - gcsdb) - (sxpart * ggtd)) - (T1 * dsxpart_dVd)) - gbspdp);
+    *heres[0]->BSIM3v32SPdpPtr -= val[0];
+    *heres[1]->BSIM3v32SPdpPtr -= val[1];
+    *heres[2]->BSIM3v32SPdpPtr -= val[2];
+    *heres[3]->BSIM3v32SPdpPtr -= val[3];
+    *heres[4]->BSIM3v32SPdpPtr -= val[4];
+    *heres[5]->BSIM3v32SPdpPtr -= val[5];
+    *heres[6]->BSIM3v32SPdpPtr -= val[6];
+    *heres[7]->BSIM3v32SPdpPtr -= val[7];
+  }
+  if (heres[0]->BSIM3v32nqsMod)
+  {
+    {
+      Vec8d val = m * (gqdef + ((Vec8d ){heres[0]->BSIM3v32gtau, heres[1]->BSIM3v32gtau, heres[2]->BSIM3v32gtau, heres[3]->BSIM3v32gtau, heres[4]->BSIM3v32gtau, heres[5]->BSIM3v32gtau, heres[6]->BSIM3v32gtau, heres[7]->BSIM3v32gtau}));
+      *heres[0]->BSIM3v32QqPtr += val[0];
+      *heres[1]->BSIM3v32QqPtr += val[1];
+      *heres[2]->BSIM3v32QqPtr += val[2];
+      *heres[3]->BSIM3v32QqPtr += val[3];
+      *heres[4]->BSIM3v32QqPtr += val[4];
+      *heres[5]->BSIM3v32QqPtr += val[5];
+      *heres[6]->BSIM3v32QqPtr += val[6];
+      *heres[7]->BSIM3v32QqPtr += val[7];
+    }
+    {
+      Vec8d val = m * (dxpart * ((Vec8d ){heres[0]->BSIM3v32gtau, heres[1]->BSIM3v32gtau, heres[2]->BSIM3v32gtau, heres[3]->BSIM3v32gtau, heres[4]->BSIM3v32gtau, heres[5]->BSIM3v32gtau, heres[6]->BSIM3v32gtau, heres[7]->BSIM3v32gtau}));
+      *heres[0]->BSIM3v32DPqPtr += val[0];
+      *heres[1]->BSIM3v32DPqPtr += val[1];
+      *heres[2]->BSIM3v32DPqPtr += val[2];
+      *heres[3]->BSIM3v32DPqPtr += val[3];
+      *heres[4]->BSIM3v32DPqPtr += val[4];
+      *heres[5]->BSIM3v32DPqPtr += val[5];
+      *heres[6]->BSIM3v32DPqPtr += val[6];
+      *heres[7]->BSIM3v32DPqPtr += val[7];
+    }
+    {
+      Vec8d val = m * (sxpart * ((Vec8d ){heres[0]->BSIM3v32gtau, heres[1]->BSIM3v32gtau, heres[2]->BSIM3v32gtau, heres[3]->BSIM3v32gtau, heres[4]->BSIM3v32gtau, heres[5]->BSIM3v32gtau, heres[6]->BSIM3v32gtau, heres[7]->BSIM3v32gtau}));
+      *heres[0]->BSIM3v32SPqPtr += val[0];
+      *heres[1]->BSIM3v32SPqPtr += val[1];
+      *heres[2]->BSIM3v32SPqPtr += val[2];
+      *heres[3]->BSIM3v32SPqPtr += val[3];
+      *heres[4]->BSIM3v32SPqPtr += val[4];
+      *heres[5]->BSIM3v32SPqPtr += val[5];
+      *heres[6]->BSIM3v32SPqPtr += val[6];
+      *heres[7]->BSIM3v32SPqPtr += val[7];
+    }
+    {
+      Vec8d val = m * ((Vec8d ){heres[0]->BSIM3v32gtau, heres[1]->BSIM3v32gtau, heres[2]->BSIM3v32gtau, heres[3]->BSIM3v32gtau, heres[4]->BSIM3v32gtau, heres[5]->BSIM3v32gtau, heres[6]->BSIM3v32gtau, heres[7]->BSIM3v32gtau});
+      *heres[0]->BSIM3v32GqPtr -= val[0];
+      *heres[1]->BSIM3v32GqPtr -= val[1];
+      *heres[2]->BSIM3v32GqPtr -= val[2];
+      *heres[3]->BSIM3v32GqPtr -= val[3];
+      *heres[4]->BSIM3v32GqPtr -= val[4];
+      *heres[5]->BSIM3v32GqPtr -= val[5];
+      *heres[6]->BSIM3v32GqPtr -= val[6];
+      *heres[7]->BSIM3v32GqPtr -= val[7];
+    }
+    {
+      Vec8d val = m * (ggtg - gcqgb);
+      *heres[0]->BSIM3v32QgPtr += val[0];
+      *heres[1]->BSIM3v32QgPtr += val[1];
+      *heres[2]->BSIM3v32QgPtr += val[2];
+      *heres[3]->BSIM3v32QgPtr += val[3];
+      *heres[4]->BSIM3v32QgPtr += val[4];
+      *heres[5]->BSIM3v32QgPtr += val[5];
+      *heres[6]->BSIM3v32QgPtr += val[6];
+      *heres[7]->BSIM3v32QgPtr += val[7];
+    }
+    {
+      Vec8d val = m * (ggtd - gcqdb);
+      *heres[0]->BSIM3v32QdpPtr += val[0];
+      *heres[1]->BSIM3v32QdpPtr += val[1];
+      *heres[2]->BSIM3v32QdpPtr += val[2];
+      *heres[3]->BSIM3v32QdpPtr += val[3];
+      *heres[4]->BSIM3v32QdpPtr += val[4];
+      *heres[5]->BSIM3v32QdpPtr += val[5];
+      *heres[6]->BSIM3v32QdpPtr += val[6];
+      *heres[7]->BSIM3v32QdpPtr += val[7];
+    }
+    {
+      Vec8d val = m * (ggts - gcqsb);
+      *heres[0]->BSIM3v32QspPtr += val[0];
+      *heres[1]->BSIM3v32QspPtr += val[1];
+      *heres[2]->BSIM3v32QspPtr += val[2];
+      *heres[3]->BSIM3v32QspPtr += val[3];
+      *heres[4]->BSIM3v32QspPtr += val[4];
+      *heres[5]->BSIM3v32QspPtr += val[5];
+      *heres[6]->BSIM3v32QspPtr += val[6];
+      *heres[7]->BSIM3v32QspPtr += val[7];
+    }
+    {
+      Vec8d val = m * (ggtb - gcqbb);
+      *heres[0]->BSIM3v32QbPtr += val[0];
+      *heres[1]->BSIM3v32QbPtr += val[1];
+      *heres[2]->BSIM3v32QbPtr += val[2];
+      *heres[3]->BSIM3v32QbPtr += val[3];
+      *heres[4]->BSIM3v32QbPtr += val[4];
+      *heres[5]->BSIM3v32QbPtr += val[5];
+      *heres[6]->BSIM3v32QbPtr += val[6];
+      *heres[7]->BSIM3v32QbPtr += val[7];
+    }
+  }
+
+  line1000:
+  ;
+
+  return OK;
+}
+
diff --git a/src/spicelib/devices/bsim3v32/b3v32ldsimd.c b/src/spicelib/devices/bsim3v32/b3v32ldsimd.c
new file mode 100644
index 000000000..cb53d7545
--- /dev/null
+++ b/src/spicelib/devices/bsim3v32/b3v32ldsimd.c
@@ -0,0 +1,347 @@
+/*******************************************************************************
+ * Copyright 2020 Florian Ballenegger, Anamosic Ballenegger Design
+ *******************************************************************************
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ ******************************************************************************/
+
+#include <math.h>
+#include <x86intrin.h>
+#include <signal.h>
+
+#include "ngspice/ngspice.h"
+#include "ngspice/cktdefs.h"
+#include "bsim3v32def.h"
+#include "b3v32acm.h"
+#include "ngspice/trandefs.h"
+#include "ngspice/const.h"
+#include "ngspice/sperror.h"
+#include "ngspice/devdefs.h"
+#include "ngspice/suffix.h"
+
+#define MAX_EXP 5.834617425e14
+#define MIN_EXP 1.713908431e-15
+#define EXP_THRESHOLD 34.0
+#define EPSOX 3.453133e-11
+#define EPSSI 1.03594e-10
+#define Charge_q 1.60219e-19
+#define DELTA_1 0.02
+#define DELTA_2 0.02
+#define DELTA_3 0.02
+#define DELTA_4 0.02
+
+#define USEX86INTRINSICS 1
+
+typedef double Vec4d __attribute__ ((vector_size (sizeof(double)*NSIMD), aligned (sizeof(double)*NSIMD)));
+typedef long int Vec4m __attribute__ ((vector_size (sizeof(double)*NSIMD), aligned (sizeof(double)*NSIMD)));
+
+
+#define SIMDANY(err) (err!=0)
+#define SIMDIFYCMD(cmd) /* empty */
+#define SIMDifySaveScope(sc) /* empty */
+
+#define vec4_pow0p7(x,p) vec4_mypow(x,p)
+#define vec4_powMJ(x,p) vec4_mypow(x,p)
+#define vec4_powMJSW(x,p) vec4_mypow(x,p)
+#define vec4_powMJSWG(x,p) vec4_mypow(x,p)
+
+#if USEX86INTRINSICS==1
+/* libmvec prototypes */
+/* Caution: those libmvec functions are not as precise as std libm */
+__m256d _ZGVdN4v_exp(__m256d x);
+__m256d _ZGVdN4v_log(__m256d x);
+
+#define vec4_MAX(a,b) _mm256_max_pd(a,b)
+#define vec4_exp(a) _ZGVdN4v_exp(a) 
+#define vec4_log(a) _ZGVdN4v_log(a)
+#define vec4_sqrt(a) _mm256_sqrt_pd(a)
+
+
+static inline Vec4d vec4_blend(Vec4d fa, Vec4d tr, Vec4m mask)
+{
+	return _mm256_blendv_pd(fa,tr, (Vec4d) mask);
+}
+
+static inline Vec4d vec4_fabs(Vec4d x)
+{
+	return vec4_blend(x,-x,x<0);
+}
+
+#else
+/* vector-libm prototypes */
+Vec4d vec4_exp_vectorlibm(Vec4d x); /* defined in vec4_exp.c */
+Vec4d vec4_log_vectorlibm(Vec4d x); /* defined in vec4_log.c */
+#define vec4_exp(a) vec4_exp_vectorlibm(a)
+#define vec4_log(a) vec4_log_vectorlibm(a)
+static inline Vec4d vec4_MAX(Vec4d a, Vec4d b)
+{
+	return vec4_blend(a,b,a<b);
+}
+static inline Vec4d vec4_blend(Vec4d fa, Vec4d tr, Vec4m mask)
+{
+	/* hope for good vectorization by the compiler ! */
+	Vec4d res;
+	#pragma omp simd
+	for(int i=0;i<4;i++)
+	{
+		res[i] = mask[i] ? tr[i] : fa[i];
+	}
+	return res;
+}
+static inline Vec4d vec4_fabs(Vec4d x)
+{
+	/* hope for good vectorization by the compiler ! */
+	Vec4d res;
+	#pragma omp simd
+	for(int i=0;i<4;i++)
+	{
+		res[i] = (x[i] < 0) ? -x[i] : x[i];
+	}
+	return res;
+}
+static inline Vec4d vec4_sqrt(Vec4d x)
+{
+	/* hope for good vectorization by the compiler ! */
+	Vec4d res;
+	#pragma omp simd
+	for(int i=0;i<4;i++)
+	{
+		res[i] = sqrt(x[i]);
+	}
+	return res;
+}
+#endif
+
+static inline Vec4d vec4_mypow(Vec4d x, double p)
+{
+	return vec4_exp(vec4_log(x)*p);
+}
+
+
+/* some debug utils functions */
+void vec4_printd(const char* msg, const char* name, Vec4d vecd)
+{
+	printf("%s %s %g %g %g %g\n",msg,name,vecd[0],vecd[1],vecd[2],vecd[3]);	
+}
+
+void vec4_printm(const char* msg, const char* name, Vec4m vecm)
+{
+	printf("%s %s %ld %ld %ld %ld\n",msg,name,vecm[0],vecm[1],vecm[2],vecm[3]);	
+}
+
+void vec4_CheckCollisions(Vec4m stateindexes, const char* msg)
+{
+	for(int i=0;i<NSIMD;i++)
+	for(int j=0;j<NSIMD;j++)
+	if(i!=j)
+	if(stateindexes[i]==stateindexes[j])
+	{
+		printf("%s, collisions %ld %ld %ld %ld!\n",msg,stateindexes[0],stateindexes[1],stateindexes[2],stateindexes[3]);
+		raise(SIGINT);
+	}
+}
+
+/* useful vectorized functions */
+static inline Vec4d SIMDLOADDATA(int idx, double data[7][NSIMD])
+{
+	return (Vec4d) {data[idx][0],data[idx][1],data[idx][2],data[idx][3]};
+}
+
+static inline Vec4d vec4_BSIM3v32_StateAccess(double* cktstate, Vec4m stateindexes)
+{
+	return (Vec4d) {
+	 cktstate[stateindexes[0]],
+	 cktstate[stateindexes[1]],
+	 cktstate[stateindexes[2]],
+	 cktstate[stateindexes[3]]
+	};
+}
+
+
+static inline void vec4_BSIM3v32_StateStore(double* cktstate, Vec4m stateindexes, Vec4d values)
+{
+	if(0) vec4_CheckCollisions(stateindexes,"SateStore");
+	for(int idx=0;idx<NSIMD;idx++)
+	{
+		cktstate[stateindexes[idx]] = values[idx];
+	}
+}
+
+static inline void vec4_BSIM3v32_StateAdd(double* cktstate, Vec4m stateindexes, Vec4d values)
+{
+	if(0) vec4_CheckCollisions(stateindexes,"StateAdd");
+	for(int idx=0;idx<NSIMD;idx++)
+	{
+		cktstate[stateindexes[idx]] += values[idx];
+	}
+}
+
+static inline void vec4_BSIM3v32_StateSub(double* cktstate, Vec4m stateindexes, Vec4d values)
+{
+	if(0) vec4_CheckCollisions(stateindexes,"StateSub");
+	for(int idx=0;idx<NSIMD;idx++)
+	{
+		cktstate[stateindexes[idx]] -= values[idx];
+	}
+}
+
+static inline Vec4d vec4_exp_seq(Vec4d val)
+{
+	return (Vec4d) {exp(val[0]),exp(val[1]),exp(val[2]),exp(val[3])};
+}
+static inline Vec4d vec4_log_seq(Vec4d val)
+{
+	return (Vec4d) {log(val[0]),log(val[1]),log(val[2]),log(val[3])};
+}
+static inline Vec4d vec4_sqrt_seq(Vec4d val)
+{
+	return (Vec4d) {sqrt(val[0]),sqrt(val[1]),sqrt(val[2]),sqrt(val[3])};
+}
+static inline Vec4d vec4_MAX_seq(Vec4d a, Vec4d b)
+{
+	return (Vec4d) {MAX(a[0],b[0]),MAX(a[1],b[1]),MAX(a[2],b[2]),MAX(a[3],b[3])};
+}
+
+static inline int vec4_BSIM3v32_ACM_saturationCurrents
+(
+	BSIM3v32model *model,
+	BSIM3v32instance **heres,
+        Vec4d *DrainSatCurrent,
+        Vec4d *SourceSatCurrent
+)
+{
+	int	error;
+	double dsat,ssat;
+	for(int idx=0;idx<NSIMD;idx++)
+	{
+		error = BSIM3v32_ACM_saturationCurrents(
+		      model, heres[idx],
+		      &dsat,
+		      &ssat
+		);
+		(*DrainSatCurrent)[idx] = dsat;
+		(*SourceSatCurrent)[idx] = ssat;
+		if(error) return error;
+	}
+	return error;
+}
+
+static inline int vec4_BSIM3v32_ACM_junctionCapacitances(
+	BSIM3v32model *model,
+	BSIM3v32instance **heres,
+	Vec4d *areaDrainBulkCapacitance,
+	Vec4d *periDrainBulkCapacitance,
+	Vec4d *gateDrainBulkCapacitance,
+	Vec4d *areaSourceBulkCapacitance,
+	Vec4d *periSourceBulkCapacitance,
+	Vec4d *gateSourceBulkCapacitance
+
+)
+{
+	int	error;
+	double areaDB,periDB,gateDB,areaSB,periSB,gateSB;
+	
+	for(int idx=0;idx<NSIMD;idx++)
+	{
+		error = BSIM3v32_ACM_junctionCapacitances(
+		      model, heres[idx],
+		      &areaDB,
+		      &periDB,
+		      &gateDB,
+		      &areaSB,
+		      &periSB,
+		      &gateSB
+		);
+		(*areaDrainBulkCapacitance)[idx]=areaDB;
+		(*periDrainBulkCapacitance)[idx]=periDB;
+		(*gateDrainBulkCapacitance)[idx]=gateDB;
+		(*areaSourceBulkCapacitance)[idx]=areaSB;
+		(*periSourceBulkCapacitance)[idx]=periSB;
+		(*gateSourceBulkCapacitance)[idx]=gateSB;
+		if(error) return error;
+	}
+	return error;
+}
+
+/* geq, ceq, and zero are not translated to vectors because there are unused */
+static inline int vec4_NIintegrate(CKTcircuit* ckt, double* geq, double *ceq, double zero, Vec4m chargestate)
+{
+	int	error;
+	if (0) vec4_CheckCollisions(chargestate, "NIIntegrate");
+	for(int idx=0;idx<NSIMD;idx++)
+	{
+		error = NIintegrate(ckt,geq,ceq,zero,chargestate[idx]);
+		if(error) return error;
+	}
+	return error;
+}
+
+static inline int vec4_SIMDCOUNT(Vec4m mask) {
+	return (mask[0] ? 1 : 0) + (mask[1] ? 1 : 0) + (mask[2] ? 1 : 0) + (mask[3] ? 1 : 0);
+}
+
+static inline Vec4d vec4_SIMDTOVECTOR(double val)
+{
+	return (Vec4d) {val,val,val,val};
+}
+static inline Vec4m vec4_SIMDTOVECTORMASK(int val)
+{
+	return (Vec4m) {val,val,val,val};
+}
+
+
+int BSIM3v32LoadSIMD(BSIM3v32instance **heres, CKTcircuit *ckt
+#ifndef USE_OMP
+	, double data[7][NSIMD]
+#endif
+)
+{
+    BSIM3v32model *model = BSIM3v32modPtr(heres[0]);
+    struct bsim3v32SizeDependParam *pParam;
+    pParam = heres[0]->pParam; /* same of all NSIMD instances */
+
+#if NSIMD==4
+#ifdef USE_OMP
+    #pragma message "Use OMP SIMD4 version"
+    #include "b3v32ldseq_simd4_omp.c"
+#else
+    #include "b3v32ldseq_simd4.c"
+#endif
+#elif NSIMD==8
+#ifdef USE_OMP
+    #pragma message "Use OMP SIMD8 version"
+    #include "b3v32ldseq_simd8_omp.c"
+#else
+    #include "b3v32ldseq_simd8.c"
+#endif
+#else
+#error Unsupported value for NSIMD
+#endif
+	
+    return(OK);
+	
+}
+
diff --git a/src/spicelib/devices/bsim3v32/b3v32ldsimd8.c b/src/spicelib/devices/bsim3v32/b3v32ldsimd8.c
new file mode 100644
index 000000000..86751196c
--- /dev/null
+++ b/src/spicelib/devices/bsim3v32/b3v32ldsimd8.c
@@ -0,0 +1,274 @@
+/*******************************************************************************
+ * Copyright 2020 Florian Ballenegger, Anamosic Ballenegger Design
+ *******************************************************************************
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ ****************************
+/* draft version, not tested, not even compiled */
+
+#include <math.h>
+#include <x86intrin.h>
+#include <signal.h>
+
+#include "ngspice/ngspice.h"
+#include "ngspice/cktdefs.h"
+#include "bsim3v32def.h"
+#include "b3v32acm.h"
+#include "ngspice/trandefs.h"
+#include "ngspice/const.h"
+#include "ngspice/sperror.h"
+#include "ngspice/devdefs.h"
+#include "ngspice/suffix.h"
+
+#define MAX_EXP 5.834617425e14
+#define MIN_EXP 1.713908431e-15
+#define EXP_THRESHOLD 34.0
+#define EPSOX 3.453133e-11
+#define EPSSI 1.03594e-10
+#define Charge_q 1.60219e-19
+#define DELTA_1 0.02
+#define DELTA_2 0.02
+#define DELTA_3 0.02
+#define DELTA_4 0.02
+
+#define NSIMD 8
+#define USEX86INTRINSICS 1
+
+typedef double Vec8d __attribute__ ((vector_size (sizeof(double)*NSIMD), aligned (sizeof(double)*NSIMD)));
+typedef long int Vec8m __attribute__ ((vector_size (sizeof(double)*NSIMD), aligned (sizeof(double)*NSIMD)));
+
+
+#define SIMDANY(err) (err!=0)
+#define SIMDIFYCMD(cmd) /* empty */
+#define SIMDifySaveScope(sc) /* empty */
+#define SIMDVECTORMACRO(val) ((Vec8d) {val,val,val,val})
+#define SIMDVECTOR(val) vec8_SIMDTOVECTOR(val)
+#define SIMDVECTORMASK(val) vec8_SIMDTOVECTORMASK(val)
+
+
+#ifdef USEX86INTRINSICS
+#define vec8_MAX(a,b) _mm512_max_pd(a,b)
+#define vec8_exp(a) _mm512_exp_pd(a)
+#define vec8_log(a) _mm512_log_pd(a)
+#define vec8_sqrt(a) _mm512_sqrt_pd(a)
+
+static inline Vec8d vec8_blend(Vec8d fa, Vec8d tr, Vec8m mask)
+{
+	/* mask follow gcc vector extension comparison results false=0 true=-1 */
+	/* so we can't use the new _mm512_mask_blendv_pd intrinsics */
+	#define SAFER
+	#ifdef SAFER
+	/* support mask = false:0, true: 1 or -1 */
+	return (Vec8d) _mm512_ternarylogic_epi64(
+		_mm512_castpd_si512(fa),
+		_mm512_castpd_si512(tr),
+		_mm512_srai_epi64(_mm512_castpd_si512(mask), 63),
+		0xd8);
+	#else
+	/* support only mask 0 or -1, mask=1 will fail, but should be OK with this code */
+	return (Vec8d) _mm512_ternarylogic_epi64(
+		_mm512_castpd_si512(fa),
+		_mm512_castpd_si512(tr),
+		_mm512_castpd_si512(mask),
+		0xd8);
+	#endif
+	
+	
+}
+static inline Vec8d vec8_fabs(Vec8d x)
+{
+	return (Vec8d) _mm512_abs_pd(x);
+}
+#else
+#error X86 AVX512 instrinsics required for using SIMD8 version
+#endif
+
+
+/* some debug utils functions */
+void vec8_printd(const char* msg, const char* name, Vec8d vecd)
+{
+	printf("%s %s %g %g %g %g\n",msg,name,vecd[0],vecd[1],vecd[2],vecd[3]);	
+}
+
+void vec8_printm(const char* msg, const char* name, Vec8m vecm)
+{
+	printf("%s %s %ld %ld %ld %ld\n",msg,name,vecm[0],vecm[1],vecm[2],vecm[3]);	
+}
+
+void vec8_CheckCollisions(Vec8m stateindexes, const char* msg)
+{
+	for(int i=0;i<NSIMD;i++)
+	for(int j=0;j<NSIMD;j++)
+	if(i!=j)
+	if(stateindexes[i]==stateindexes[j])
+	{
+		printf("%s, collisions %ld %ld %ld %ld!\n",msg,stateindexes[0],stateindexes[1],stateindexes[2],stateindexes[3]);
+		raise(SIGINT);
+	}
+}
+
+/* useful vectorized functions */
+static inline Vec8d SIMDLOADDATA(int idx, double data[7][NSIMD])
+{
+	return (Vec8d) {data[idx][0],data[idx][1],data[idx][2],data[idx][3],
+		data[idx][4],data[idx][5],data[idx][6],data[idx][7]};
+}
+
+static inline Vec8d vec8_BSIM3v32_StateAccess(double* cktstate, Vec8m stateindexes)
+{
+	return (Vec8d) {
+	 cktstate[stateindexes[0]],
+	 cktstate[stateindexes[1]],
+	 cktstate[stateindexes[2]],
+	 cktstate[stateindexes[3]],
+	 cktstate[stateindexes[4]],
+	 cktstate[stateindexes[5]],
+	 cktstate[stateindexes[6]],
+	 cktstate[stateindexes[7]]
+	};
+}
+
+
+static inline void vec8_BSIM3v32_StateStore(double* cktstate, Vec8m stateindexes, Vec8d values)
+{
+	if(0) vec8_CheckCollisions(stateindexes,"SateStore");
+	for(int idx=0;idx<NSIMD;idx++)
+	{
+		cktstate[stateindexes[idx]] = values[idx];
+	}
+}
+
+static inline void vec8_BSIM3v32_StateAdd(double* cktstate, Vec8m stateindexes, Vec8d values)
+{
+	if(0) vec8_CheckCollisions(stateindexes,"StateAdd");
+	for(int idx=0;idx<NSIMD;idx++)
+	{
+		cktstate[stateindexes[idx]] += values[idx];
+	}
+}
+
+static inline void vec8_BSIM3v32_StateSub(double* cktstate, Vec8m stateindexes, Vec8d values)
+{
+	if(0) vec8_CheckCollisions(stateindexes,"StateSub");
+	for(int idx=0;idx<NSIMD;idx++)
+	{
+		cktstate[stateindexes[idx]] -= values[idx];
+	}
+}
+
+
+static inline int vec8_BSIM3v32_ACM_saturationCurrents
+(
+	BSIM3v32model *model,
+	BSIM3v32instance **heres,
+        Vec8d *DrainSatCurrent,
+        Vec8d *SourceSatCurrent
+)
+{
+	int	error;
+	for(int idx=0;idx<NSIMD;idx++)
+	{
+		error = BSIM3v32_ACM_saturationCurrents(
+		      model, heres[idx],
+		      &((*DrainSatCurrent)[idx]),
+		      &((*SourceSatCurrent)[idx])
+		);
+		if(error) return error;
+	}
+	return error;
+}
+
+static inline int vec8_BSIM3v32_ACM_junctionCapacitances(
+	BSIM3v32model *model,
+	BSIM3v32instance **heres,
+	Vec8d *areaDrainBulkCapacitance,
+	Vec8d *periDrainBulkCapacitance,
+	Vec8d *gateDrainBulkCapacitance,
+	Vec8d *areaSourceBulkCapacitance,
+	Vec8d *periSourceBulkCapacitance,
+	Vec8d *gateSourceBulkCapacitance
+
+)
+{
+	int	error;
+	for(int idx=0;idx<NSIMD;idx++)
+	{
+		error = BSIM3v32_ACM_junctionCapacitances(
+		      model, heres[idx],
+		      &((*areaDrainBulkCapacitance)[idx]),
+		      &((*periDrainBulkCapacitance)[idx]),
+		      &((*gateDrainBulkCapacitance)[idx]),
+		      &((*areaSourceBulkCapacitance)[idx]),
+		      &((*periSourceBulkCapacitance)[idx]),
+		      &((*gateSourceBulkCapacitance)[idx])
+		);
+		if(error) return error;
+	}
+	return error;
+}
+
+/* geq, ceq, and zero are not vectors because there are unused */
+static inline int vec8_NIintegrate(CKTcircuit* ckt, double* geq, double *ceq, double zero, Vec8m chargestate)
+{
+	int	error;
+	if (0) vec8_CheckCollisions(chargestate, "NIIntegrate");
+	for(int idx=0;idx<NSIMD;idx++)
+	{
+		error = NIintegrate(ckt,geq,ceq,zero,chargestate[idx]);
+		if(error) return error;
+	}
+	return error;
+}
+
+static inline int vec8_SIMDCOUNT(Vec8m mask) {
+	return (mask[0] ? 1 : 0) + (mask[1] ? 1 : 0) + (mask[2] ? 1 : 0) + (mask[3] ? 1 : 0)
+	 + (mask[4] ? 1 : 0) + (mask[5] ? 1 : 0) + (mask[6] ? 1 : 0) + (mask[7] ? 1 : 0);
+}
+
+static inline Vec8d vec8_SIMDTOVECTOR(double val)
+{
+	return (Vec8d) {val,val,val,val,val,val,val,val};
+}
+static inline Vec8m vec8_SIMDTOVECTORMASK(int val)
+{
+	return (Vec8m) {val,val,val,val,val,val,val,val};
+}
+
+
+int BSIM3v32LoadSIMD8(BSIM3v32instance **heres, CKTcircuit *ckt, double data[7][NSIMD]) {
+    BSIM3v32model *model = BSIM3v32modPtr(heres[0]);
+    if(0) printf("BSIM3v32LoadSIMD %s model %s\n", heres[0]->gen.GENname, model->gen.GENmodName);
+    struct bsim3v32SizeDependParam *pParam;
+    pParam = heres[0]->pParam; /* same of all NSIMD instances */
+
+#if 1    
+    #include "b3v32ldseq_simd8.c"
+#endif
+    
+    return(OK);
+	
+}
+
diff --git a/src/spicelib/devices/bsim3v32/vec4_exp.c b/src/spicelib/devices/bsim3v32/vec4_exp.c
new file mode 100644
index 000000000..51019351e
--- /dev/null
+++ b/src/spicelib/devices/bsim3v32/vec4_exp.c
@@ -0,0 +1,654 @@
+/* This program implements a show-case vector (vectorizable) double
+   precision exponential with a 4 ulp error bound.
+
+   Author: Christoph Lauter,
+
+           Sorbonne Université - LIP6 - PEQUAN team.
+
+   This program uses code generated using Sollya and Metalibm; see the
+   licences and exception texts below.
+
+   This program is
+
+   Copyright 2014-2018 Christoph Lauter Sorbonne Université
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following
+   disclaimer in the documentation and/or other materials provided
+   with the distribution.
+
+   3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived
+   from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+   FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+   COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+   OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+/* 
+
+    This code was generated using non-trivial code generation commands
+    of the Metalibm software program.
+    
+    Before using, modifying and/or integrating this code into other
+    software, review the copyright and license status of this
+    generated code. In particular, see the exception below.
+
+    This generated program is partly or entirely based on a program
+    generated using non-trivial code generation commands of the Sollya
+    software program. See the copyright notice and exception text
+    referring to that Sollya-generated part of this program generated
+    with Metalibm below.
+
+    Metalibm is
+ 
+    Copyright 2008-2013 by 
+
+    Laboratoire de l'Informatique du Parallélisme, 
+    UMR CNRS - ENS Lyon - UCB Lyon 1 - INRIA 5668
+
+    and by
+
+    Laboratoire d'Informatique de Paris 6, equipe PEQUAN,
+    UPMC Universite Paris 06 - CNRS - UMR 7606 - LIP6, Paris, France.
+
+    Contributors: Christoph Quirin Lauter 
+                  (UPMC LIP6 PEQUAN formerly LIP/ENS Lyon) 
+                  christoph.lauter@lip6.fr
+
+		  and
+
+		  Olga Kupriianova 
+		  (UPMC LIP6 PEQUAN)
+		  olga.kupriianova@lip6.fr
+
+    Metalibm was formerly developed by the Arenaire project at Ecole
+    Normale Superieure de Lyon and is now developed by Equipe PEQUAN
+    at Universite Pierre et Marie Curie Paris 6.
+
+    The Metalibm software program is free software; you can
+    redistribute it and/or modify it under the terms of the GNU Lesser
+    General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option)
+    any later version.
+
+    Metalibm is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with the Metalibm program; if not, write to the Free
+    Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+    02111-1307, USA.
+
+    This generated program is distributed WITHOUT ANY WARRANTY; without
+    even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+    PARTICULAR PURPOSE.
+    
+    As a special exception, you may create a larger work that contains
+    part or all of this software generated using Metalibm and
+    distribute that work under terms of your choice, so long as that
+    work isn't itself a numerical code generator using the skeleton of
+    this code or a modified version thereof as a code skeleton.
+    Alternatively, if you modify or redistribute this generated code
+    itself, or its skeleton, you may (at your option) remove this
+    special exception, which will cause this generated code and its
+    skeleton and the resulting Metalibm output files to be licensed
+    under the General Public licence (version 2) without this special
+    exception.
+    
+    This special exception was added by the Metalibm copyright holders 
+    on November 20th 2013.
+    
+*/
+
+
+
+/*
+    This code was generated using non-trivial code generation commands of
+    the Sollya software program.
+    
+    Before using, modifying and/or integrating this code into other
+    software, review the copyright and license status of this generated
+    code. In particular, see the exception below.
+    
+    Sollya is
+    
+    Copyright 2006-2013 by
+    
+    Laboratoire de l'Informatique du Parallelisme, UMR CNRS - ENS Lyon -
+    UCB Lyon 1 - INRIA 5668,
+    
+    Laboratoire d'Informatique de Paris 6, equipe PEQUAN, UPMC Universite
+    Paris 06 - CNRS - UMR 7606 - LIP6, Paris, France
+    
+    and by
+    
+    Centre de recherche INRIA Sophia-Antipolis Mediterranee, equipe APICS,
+    Sophia Antipolis, France.
+    
+    Contributors Ch. Lauter, S. Chevillard, M. Joldes
+    
+    christoph.lauter@ens-lyon.org
+    sylvain.chevillard@ens-lyon.org
+    joldes@lass.fr
+    
+    The Sollya software is a computer program whose purpose is to provide
+    an environment for safe floating-point code development. It is
+    particularily targeted to the automatized implementation of
+    mathematical floating-point libraries (libm). Amongst other features,
+    it offers a certified infinity norm, an automatic polynomial
+    implementer and a fast Remez algorithm.
+    
+    The Sollya software is governed by the CeCILL-C license under French
+    law and abiding by the rules of distribution of free software.  You
+    can use, modify and/ or redistribute the software under the terms of
+    the CeCILL-C license as circulated by CEA, CNRS and INRIA at the
+    following URL "http://www.cecill.info".
+    
+    As a counterpart to the access to the source code and rights to copy,
+    modify and redistribute granted by the license, users are provided
+    only with a limited warranty and the software's author, the holder of
+    the economic rights, and the successive licensors have only limited
+    liability.
+    
+    In this respect, the user's attention is drawn to the risks associated
+    with loading, using, modifying and/or developing or reproducing the
+    software by the user in light of its specific status of free software,
+    that may mean that it is complicated to manipulate, and that also
+    therefore means that it is reserved for developers and experienced
+    professionals having in-depth computer knowledge. Users are therefore
+    encouraged to load and test the software's suitability as regards
+    their requirements in conditions enabling the security of their
+    systems and/or data to be ensured and, more generally, to use and
+    operate it in the same conditions as regards security.
+    
+    The fact that you are presently reading this means that you have had
+    knowledge of the CeCILL-C license and that you accept its terms.
+    
+    The Sollya program is distributed WITHOUT ANY WARRANTY; without even
+    the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+    PURPOSE.
+    
+    This generated program is distributed WITHOUT ANY WARRANTY; without
+    even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+    PARTICULAR PURPOSE.
+    
+    As a special exception, you may create a larger work that contains
+    part or all of this software generated using Sollya and distribute
+    that work under terms of your choice, so long as that work isn't
+    itself a numerical code generator using the skeleton of this code or a
+    modified version thereof as a code skeleton.  Alternatively, if you
+    modify or redistribute this generated code itself, or its skeleton,
+    you may (at your option) remove this special exception, which will
+    cause this generated code and its skeleton and the resulting Sollya
+    output files to be licensed under the CeCILL-C licence without this
+    special exception.
+    
+    This special exception was added by the Sollya copyright holders in
+    version 4.1 of Sollya.
+    
+*/
+
+#include <stdint.h>
+
+#define VECTOR_LENGTH 4
+#define RESTRICT restrict
+#define CONST const
+
+typedef double Vec4d __attribute__ ((vector_size (sizeof(double)*VECTOR_LENGTH),
+ aligned (sizeof(double)*VECTOR_LENGTH)));
+
+/* Two caster types */
+typedef union _dblcast {
+  double   d;
+  uint64_t i;
+} dblcast;
+
+typedef union {
+  int64_t l;
+  double d;
+} db_number;
+
+/* Macro implementations of some double-double operations */
+#define Add12(s, r, a, b)                       \
+  {double _z, _a=a, _b=b;                       \
+    s = _a + _b;                                \
+    _z = s - _a;                                \
+    r = _b - _z;   }
+
+#define Mul12(rh,rl,u,v)                                \
+  {                                                     \
+    CONST double c  = 134217729.; /* 2^27 +1 */         \
+    double up, u1, u2, vp, v1, v2;                      \
+    double _u =u, _v=v;                                 \
+                                                        \
+    up = _u*c;        vp = _v*c;                        \
+    u1 = (_u-up)+up;  v1 = (_v-vp)+vp;                  \
+    u2 = _u-u1;       v2 = _v-v1;                       \
+                                                        \
+    *rh = _u*_v;                                        \
+    *rl = (((u1*v1-*rh)+(u1*v2))+(u2*v1))+(u2*v2);      \
+  }
+
+#define Mul122(resh,resl,a,bh,bl)               \
+  {                                             \
+    double _t1, _t2, _t3, _t4;                  \
+                                                \
+    Mul12(&_t1,&_t2,(a),(bh));                  \
+    _t3 = (a) * (bl);                           \
+    _t4 = _t2 + _t3;                            \
+    Add12((*(resh)),(*(resl)),_t1,_t4);         \
+  }
+
+#define Mul22(zh,zl,xh,xl,yh,yl)                        \
+  {                                                     \
+    double mh, ml;                                      \
+                                                        \
+    CONST double c = 134217729.;                        \
+    double up, u1, u2, vp, v1, v2;                      \
+                                                        \
+    up = (xh)*c;        vp = (yh)*c;                    \
+    u1 = ((xh)-up)+up;  v1 = ((yh)-vp)+vp;              \
+    u2 = (xh)-u1;       v2 = (yh)-v1;                   \
+                                                        \
+    mh = (xh)*(yh);                                     \
+    ml = (((u1*v1-mh)+(u1*v2))+(u2*v1))+(u2*v2);        \
+                                                        \
+    ml += (xh)*(yl) + (xl)*(yh);                        \
+    *zh = mh+ml;                                        \
+    *zl = mh - (*zh) + ml;                              \
+  }
+
+/* Need fabs */
+double fabs(double);
+
+
+/* Some constants */
+#define LOG2_E    1.442695040888963407359924681001892137426645954153
+#define LOG_2_HI  0.693147180559890330187045037746429443359375
+#define LOG_2_LO  5.4979230187083711552420206887059365096458163346682e-14
+#define SHIFTER   6755399441055744.0
+
+/* A metalibm generated function for the callout */
+#define f_approx_exp_arg_red_coeff_0h 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000e+00
+#define f_approx_exp_arg_red_coeff_1h 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000e+00
+#define f_approx_exp_arg_red_coeff_2h 5.00000000000032307490016592055326327681541442871093750000000000000000000000000000e-01
+#define f_approx_exp_arg_red_coeff_3h 1.66666666664336909908783468381443526595830917358398437500000000000000000000000000e-01
+#define f_approx_exp_arg_red_coeff_4h 4.16666661063678778198493546369718387722969055175781250000000000000000000000000000e-02
+#define f_approx_exp_arg_red_coeff_5h 8.33337739276391979703628720699271070770919322967529296875000000000000000000000000e-03
+#define f_approx_exp_arg_red_coeff_6h 1.39156772666044516173489142829566844739019870758056640625000000000000000000000000e-03
+
+
+static inline void f_approx_exp_arg_red(double * RESTRICT f_approx_exp_arg_red_resh, double * RESTRICT f_approx_exp_arg_red_resm, double x) {
+
+
+
+
+  double f_approx_exp_arg_red_t_1_0h;
+  double f_approx_exp_arg_red_t_2_0h;
+  double f_approx_exp_arg_red_t_3_0h;
+  double f_approx_exp_arg_red_t_4_0h;
+  double f_approx_exp_arg_red_t_5_0h;
+  double f_approx_exp_arg_red_t_6_0h;
+  double f_approx_exp_arg_red_t_7_0h;
+  double f_approx_exp_arg_red_t_8_0h;
+  double f_approx_exp_arg_red_t_9_0h;
+  double f_approx_exp_arg_red_t_10_0h;
+  double f_approx_exp_arg_red_t_11_0h;
+  double f_approx_exp_arg_red_t_12_0h;
+  double f_approx_exp_arg_red_t_13_0h, f_approx_exp_arg_red_t_13_0m;
+ 
+
+
+  f_approx_exp_arg_red_t_1_0h = f_approx_exp_arg_red_coeff_6h;
+  f_approx_exp_arg_red_t_2_0h = f_approx_exp_arg_red_t_1_0h * x;
+  f_approx_exp_arg_red_t_3_0h = f_approx_exp_arg_red_coeff_5h + f_approx_exp_arg_red_t_2_0h;
+  f_approx_exp_arg_red_t_4_0h = f_approx_exp_arg_red_t_3_0h * x;
+  f_approx_exp_arg_red_t_5_0h = f_approx_exp_arg_red_coeff_4h + f_approx_exp_arg_red_t_4_0h;
+  f_approx_exp_arg_red_t_6_0h = f_approx_exp_arg_red_t_5_0h * x;
+  f_approx_exp_arg_red_t_7_0h = f_approx_exp_arg_red_coeff_3h + f_approx_exp_arg_red_t_6_0h;
+  f_approx_exp_arg_red_t_8_0h = f_approx_exp_arg_red_t_7_0h * x;
+  f_approx_exp_arg_red_t_9_0h = f_approx_exp_arg_red_coeff_2h + f_approx_exp_arg_red_t_8_0h;
+  f_approx_exp_arg_red_t_10_0h = f_approx_exp_arg_red_t_9_0h * x;
+  f_approx_exp_arg_red_t_11_0h = f_approx_exp_arg_red_coeff_1h + f_approx_exp_arg_red_t_10_0h;
+  f_approx_exp_arg_red_t_12_0h = f_approx_exp_arg_red_t_11_0h * x;
+  Add12(f_approx_exp_arg_red_t_13_0h,f_approx_exp_arg_red_t_13_0m,f_approx_exp_arg_red_coeff_0h,f_approx_exp_arg_red_t_12_0h);
+  *f_approx_exp_arg_red_resh = f_approx_exp_arg_red_t_13_0h; *f_approx_exp_arg_red_resm = f_approx_exp_arg_red_t_13_0m;
+
+
+}
+
+static CONST double f_approx_twoPower_Index_Hi[32] = {
+  1,
+  1.021897148654116627,
+  1.0442737824274137548,
+  1.067140400676823697,
+  1.0905077326652576897,
+  1.114386742595892432,
+  1.1387886347566915646,
+  1.1637248587775774755,
+  1.1892071150027210269,
+  1.2152473599804689552,
+  1.241857812073484002,
+  1.2690509571917332199,
+  1.2968395546510096406,
+  1.3252366431597413232,
+  1.3542555469368926513,
+  1.3839098819638320226,
+  1.4142135623730951455,
+  1.4451808069770466503,
+  1.4768261459394993462,
+  1.5091644275934228414,
+  1.542210825407940744,
+  1.5759808451078864966,
+  1.6104903319492542835,
+  1.6457554781539649458,
+  1.681792830507429004,
+  1.718619298122477934,
+  1.7562521603732994535,
+  1.794709075003107168,
+  1.8340080864093424307,
+  1.8741676341102999626,
+  1.9152065613971474,
+  1.9571441241754001794
+};
+
+static CONST double f_approx_twoPower_Index_Mi[32] = {
+  0,
+  5.109225028973443893e-17,
+  8.551889705537964892e-17,
+  -7.899853966841582122e-17,
+  -3.046782079812471147e-17,
+  1.0410278456845570955e-16,
+  8.912812676025407777e-17,
+  3.8292048369240934987e-17,
+  3.982015231465646111e-17,
+  -7.71263069268148813e-17,
+  4.658027591836936791e-17,
+  2.667932131342186095e-18,
+  2.5382502794888314959e-17,
+  -2.858731210038861373e-17,
+  7.700948379802989461e-17,
+  -6.770511658794786287e-17,
+  -9.66729331345291345e-17,
+  -3.023758134993987319e-17,
+  -3.4839945568927957958e-17,
+  -1.016455327754295039e-16,
+  7.949834809697620856e-17,
+  -1.013691647127830398e-17,
+  2.470719256979788785e-17,
+  -1.0125679913674772604e-16,
+  8.19901002058149652e-17,
+  -1.851380418263110988e-17,
+  2.960140695448873307e-17,
+  1.822745842791208677e-17,
+  3.283107224245627203e-17,
+  -6.122763413004142561e-17,
+  -1.0619946056195962638e-16,
+  8.960767791036667767e-17
+};
+
+#define f_approx_argred_log2_of_base_times_two_to_w 4.616624130844682838e1
+#define f_approx_argred_minus_logbase_of_2_times_two_to_minus_w_hi -2.1660849392498290195e-2
+#define f_approx_argred_minus_logbase_of_2_times_two_to_minus_w_mi -7.24702129326968612e-19
+#define f_approx_argred_shifter 6755399441055744.0
+#define f_approx_argred_w 5
+#define f_approx_argred_idx_mask 31ull
+#define f_approx_argred_lower_32_bits 0xffffffffull
+
+static inline void scalar_exp_callout_inner(double * RESTRICT res_resh, double * RESTRICT res_resm, double xh) {
+  double zh;
+  double poly_resh, poly_resm;
+
+  double t;
+  double shifted_t;
+  double mAsDouble;
+  db_number argRedCaster;
+  int mAsInt;
+  int E;
+  int E1;
+  int E2;
+  int idx;
+  double rescaled_m_hi;
+  double rescaled_m_mi;
+  double table_hi;
+  double table_mi;
+  double tableTimesPoly_hi;
+  double tableTimesPoly_mi;
+  db_number twoE1;
+  db_number twoE2;
+  double twoE1tablePoly_hi;
+  double twoE1tablePoly_mi;
+
+
+  t = xh * f_approx_argred_log2_of_base_times_two_to_w;
+  shifted_t = t + f_approx_argred_shifter;
+  mAsDouble = shifted_t - f_approx_argred_shifter;
+  argRedCaster.d = shifted_t;
+  mAsInt = (int) (argRedCaster.l & f_approx_argred_lower_32_bits);
+  E = mAsInt >> f_approx_argred_w;
+  E1 = E >> 1;
+  E2 = E - E1;
+  idx = mAsInt & f_approx_argred_idx_mask;
+  Mul122(&rescaled_m_hi, &rescaled_m_mi, mAsDouble, f_approx_argred_minus_logbase_of_2_times_two_to_minus_w_hi, f_approx_argred_minus_logbase_of_2_times_two_to_minus_w_mi);
+  zh = (xh + rescaled_m_hi) + rescaled_m_mi;
+
+  f_approx_exp_arg_red(&poly_resh, &poly_resm, zh);
+
+  table_hi = f_approx_twoPower_Index_Hi[idx];
+  table_mi = f_approx_twoPower_Index_Mi[idx];
+  Mul22(&tableTimesPoly_hi,&tableTimesPoly_mi,table_hi,table_mi,poly_resh,poly_resm);
+  twoE1.l = E1 + 1023ll;
+  twoE1.l <<= 52;
+  twoE2.l = E2 + 1023ll;
+  twoE2.l <<= 52;
+  twoE1tablePoly_hi = twoE1.d * tableTimesPoly_hi;
+  twoE1tablePoly_mi = twoE1.d * tableTimesPoly_mi;
+  *res_resh = twoE2.d * twoE1tablePoly_hi;
+  *res_resm = twoE2.d * twoE1tablePoly_mi;
+
+}
+
+/* A scalar exponential for the callout */
+static inline double scalar_exp_callout(double x) {
+  dblcast xdb, xAbsdb;
+  double yh, yl, twoM600, two600;
+
+  xdb.d = x;
+  xAbsdb.i = xdb.i & 0x7fffffffffffffffull;
+  if (xAbsdb.i >= 0x7ff0000000000000ull) {
+    /* If we are here, we have an Inf or a Nan */
+    if (xAbsdb.i == 0x7ff0000000000000ull) {
+      /* Here, the input is an Inf */
+      if (xdb.i >> 63) {
+	/* x = -Inf, return 0 */
+	return 0.0;
+      }
+      /* x = +Inf, return +Inf */
+      return x;
+    }
+
+    /* Here, the input is a NaN */
+    return 1.0 + x;
+  }
+
+  /* Here, the input is real. 
+     
+     Start by checking if we have evident under- or overflow.
+
+     We have evident underflow if x <= -746.0 
+     and     evident overflow  if x >= 711.0. 
+  */
+  if (x <= -746.0) {
+    /* Return a completely underflowed result */
+    twoM600 = 2.4099198651028841177407500347125089364310049545099e-181;
+    
+    return twoM600 * twoM600;
+  }
+  if (x >= 711.0) {
+    /* Return a completely overflowed result */
+    two600 = 4.1495155688809929585124078636911611510124462322424e180;
+    
+    return two600 * two600;
+  }
+
+  /* Here, the input will not provoke any huge overflow or underflow
+     but there might still be some under- or overflow.
+
+     Now check if x is that small in magnitude that returning 1.0 + x
+     suffices to well approximate the exponential (up to a relative
+     error of 2^-53). This is surely the case when abs(x) <= 0.75 *
+     2^-26.
+  */
+  if (fabs(x) <= 1.11758708953857421875e-8) {
+    return 1.0 + x;
+  }
+
+  /* Here, the input is real. There might still be some slight under-
+     or overflow on output.
+
+     Just use a metalibm generated function.
+
+  */
+  scalar_exp_callout_inner(&yh, &yl, x);
+
+  return yh + yl;
+}
+
+/* A vector exponential callout */
+static inline Vec4d vector_exp_callout(Vec4d x) {
+  int i;
+  Vec4d y;
+  for (i=0;i<VECTOR_LENGTH;i++) {
+    y[i] = scalar_exp_callout(x[i]);
+  }
+  return y;
+}
+
+/* Generated polynomial for vector exponential */
+
+#define vector_exp_poly_coeff_0h 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000e+00
+#define vector_exp_poly_coeff_1h 1.00000000000000643929354282590793445706367492675781250000000000000000000000000000e+00
+#define vector_exp_poly_coeff_2h 4.99999999999983513188084316425374709069728851318359375000000000000000000000000000e-01
+#define vector_exp_poly_coeff_3h 1.66666666665578222517041240280377678573131561279296875000000000000000000000000000e-01
+#define vector_exp_poly_coeff_4h 4.16666666679390979011188278491317760199308395385742187500000000000000000000000000e-02
+#define vector_exp_poly_coeff_5h 8.33333338463836288678709962596258264966309070587158203125000000000000000000000000e-03
+#define vector_exp_poly_coeff_6h 1.38888885906261988316401367882235717843286693096160888671875000000000000000000000e-03
+#define vector_exp_poly_coeff_7h 1.98411714150174687447750199176255136990221217274665832519531250000000000000000000e-04
+#define vector_exp_poly_coeff_8h 2.48018422092243362301385717350044046725088264793157577514648437500000000000000000e-05
+#define vector_exp_poly_coeff_9h 2.76397570196414793205260534980638453816936817020177841186523437500000000000000000e-06
+#define vector_exp_poly_coeff_10h 2.75111392508451855531313940197990497438240709016099572181701660156250000000000000e-07
+
+
+static inline void vector_exp_poly(double * RESTRICT vector_exp_poly_resh, double x) {
+
+
+
+
+  double vector_exp_poly_t_1_0h;
+  double vector_exp_poly_t_2_0h;
+  double vector_exp_poly_t_3_0h;
+  double vector_exp_poly_t_4_0h;
+  double vector_exp_poly_t_5_0h;
+  double vector_exp_poly_t_6_0h;
+  double vector_exp_poly_t_7_0h;
+  double vector_exp_poly_t_8_0h;
+  double vector_exp_poly_t_9_0h;
+  double vector_exp_poly_t_10_0h;
+  double vector_exp_poly_t_11_0h;
+  double vector_exp_poly_t_12_0h;
+  double vector_exp_poly_t_13_0h;
+  double vector_exp_poly_t_14_0h;
+  double vector_exp_poly_t_15_0h;
+  double vector_exp_poly_t_16_0h;
+  double vector_exp_poly_t_17_0h;
+  double vector_exp_poly_t_18_0h;
+  double vector_exp_poly_t_19_0h;
+  double vector_exp_poly_t_20_0h;
+  double vector_exp_poly_t_21_0h;
+ 
+
+
+  vector_exp_poly_t_1_0h = vector_exp_poly_coeff_10h;
+  vector_exp_poly_t_2_0h = vector_exp_poly_t_1_0h * x;
+  vector_exp_poly_t_3_0h = vector_exp_poly_coeff_9h + vector_exp_poly_t_2_0h;
+  vector_exp_poly_t_4_0h = vector_exp_poly_t_3_0h * x;
+  vector_exp_poly_t_5_0h = vector_exp_poly_coeff_8h + vector_exp_poly_t_4_0h;
+  vector_exp_poly_t_6_0h = vector_exp_poly_t_5_0h * x;
+  vector_exp_poly_t_7_0h = vector_exp_poly_coeff_7h + vector_exp_poly_t_6_0h;
+  vector_exp_poly_t_8_0h = vector_exp_poly_t_7_0h * x;
+  vector_exp_poly_t_9_0h = vector_exp_poly_coeff_6h + vector_exp_poly_t_8_0h;
+  vector_exp_poly_t_10_0h = vector_exp_poly_t_9_0h * x;
+  vector_exp_poly_t_11_0h = vector_exp_poly_coeff_5h + vector_exp_poly_t_10_0h;
+  vector_exp_poly_t_12_0h = vector_exp_poly_t_11_0h * x;
+  vector_exp_poly_t_13_0h = vector_exp_poly_coeff_4h + vector_exp_poly_t_12_0h;
+  vector_exp_poly_t_14_0h = vector_exp_poly_t_13_0h * x;
+  vector_exp_poly_t_15_0h = vector_exp_poly_coeff_3h + vector_exp_poly_t_14_0h;
+  vector_exp_poly_t_16_0h = vector_exp_poly_t_15_0h * x;
+  vector_exp_poly_t_17_0h = vector_exp_poly_coeff_2h + vector_exp_poly_t_16_0h;
+  vector_exp_poly_t_18_0h = vector_exp_poly_t_17_0h * x;
+  vector_exp_poly_t_19_0h = vector_exp_poly_coeff_1h + vector_exp_poly_t_18_0h;
+  vector_exp_poly_t_20_0h = vector_exp_poly_t_19_0h * x;
+  vector_exp_poly_t_21_0h = vector_exp_poly_coeff_0h + vector_exp_poly_t_20_0h;
+  *vector_exp_poly_resh = vector_exp_poly_t_21_0h;
+
+
+}
+
+/* A vector exponential */
+Vec4d vec4_exp_vectorlibm(Vec4d x) {
+  int i;
+  int okaySlots;
+  Vec4d y;
+  double shiftedXTLog2e, eDouble, t, r;
+  int E;
+  double p;
+  dblcast twoE;
+
+  /* Check if we can handle all inputs */
+  okaySlots = 0;
+  for (i=0;i<VECTOR_LENGTH;i++) {
+    okaySlots += (fabs(x[i]) < 700.0); 
+  }
+
+  /* Perform a callout if we cannot handle the input in one slot */
+  if (okaySlots != VECTOR_LENGTH) {
+    return vector_exp_callout(x);
+  }
+
+  /* Here we know that all inputs are real and do not provoke under-
+     or overflow in output 
+  */
+  for (i=0;i<VECTOR_LENGTH;i++) {
+    shiftedXTLog2e = x[i] * LOG2_E + SHIFTER;
+    eDouble = shiftedXTLog2e - SHIFTER;
+    E = (int) eDouble;
+    t = x[i] - eDouble * LOG_2_HI; /* exact: trailing bits of constant 0, Sterbenz */
+    r = t - eDouble * LOG_2_LO;
+    vector_exp_poly(&p,r);
+    twoE.i = E + 1023;
+    twoE.i <<= 52;
+    y[i] = twoE.d * p;
+  }
+  return y;
+}
diff --git a/src/spicelib/devices/bsim3v32/vec4_log.c b/src/spicelib/devices/bsim3v32/vec4_log.c
new file mode 100644
index 000000000..d6374fc5e
--- /dev/null
+++ b/src/spicelib/devices/bsim3v32/vec4_log.c
@@ -0,0 +1,769 @@
+/* This program implements a show-case vector (vectorizable) double
+   precision logarithm with a 4 ulp error bound.
+
+   Author: Christoph Lauter,
+
+           Sorbonne Université - LIP6 - PEQUAN team.
+
+   This program uses code generated using Sollya and Metalibm; see the
+   licences and exception texts below.
+
+   This program is
+
+   Copyright 2014-2018 Christoph Lauter Sorbonne Université
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following
+   disclaimer in the documentation and/or other materials provided
+   with the distribution.
+
+   3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived
+   from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+   FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+   COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+   OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+/*
+
+    This code was generated using non-trivial code generation commands
+    of the Metalibm software program.
+
+    Before using, modifying and/or integrating this code into other
+    software, review the copyright and license status of this
+    generated code. In particular, see the exception below.
+
+    This generated program is partly or entirely based on a program
+    generated using non-trivial code generation commands of the Sollya
+    software program. See the copyright notice and exception text
+    referring to that Sollya-generated part of this program generated
+    with Metalibm below.
+
+    Metalibm is
+
+    Copyright 2008-2013 by
+
+    Laboratoire de l'Informatique du Parallélisme,
+    UMR CNRS - ENS Lyon - UCB Lyon 1 - INRIA 5668
+
+    and by
+
+    Laboratoire d'Informatique de Paris 6, equipe PEQUAN,
+    UPMC Universite Paris 06 - CNRS - UMR 7606 - LIP6, Paris, France.
+
+    Contributors: Christoph Quirin Lauter
+                  (UPMC LIP6 PEQUAN formerly LIP/ENS Lyon)
+                  christoph.lauter@lip6.fr
+
+		  and
+
+		  Olga Kupriianova
+		  (UPMC LIP6 PEQUAN)
+		  olga.kupriianova@lip6.fr
+
+    Metalibm was formerly developed by the Arenaire project at Ecole
+    Normale Superieure de Lyon and is now developed by Equipe PEQUAN
+    at Universite Pierre et Marie Curie Paris 6.
+
+    The Metalibm software program is free software; you can
+    redistribute it and/or modify it under the terms of the GNU Lesser
+    General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option)
+    any later version.
+
+    Metalibm is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with the Metalibm program; if not, write to the Free
+    Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+    02111-1307, USA.
+
+    This generated program is distributed WITHOUT ANY WARRANTY; without
+    even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+    PARTICULAR PURPOSE.
+
+    As a special exception, you may create a larger work that contains
+    part or all of this software generated using Metalibm and
+    distribute that work under terms of your choice, so long as that
+    work isn't itself a numerical code generator using the skeleton of
+    this code or a modified version thereof as a code skeleton.
+    Alternatively, if you modify or redistribute this generated code
+    itself, or its skeleton, you may (at your option) remove this
+    special exception, which will cause this generated code and its
+    skeleton and the resulting Metalibm output files to be licensed
+    under the General Public licence (version 2) without this special
+    exception.
+
+    This special exception was added by the Metalibm copyright holders
+    on November 20th 2013.
+
+*/
+
+
+
+/*
+    This code was generated using non-trivial code generation commands of
+    the Sollya software program.
+
+    Before using, modifying and/or integrating this code into other
+    software, review the copyright and license status of this generated
+    code. In particular, see the exception below.
+
+    Sollya is
+
+    Copyright 2006-2013 by
+
+    Laboratoire de l'Informatique du Parallelisme, UMR CNRS - ENS Lyon -
+    UCB Lyon 1 - INRIA 5668,
+
+    Laboratoire d'Informatique de Paris 6, equipe PEQUAN, UPMC Universite
+    Paris 06 - CNRS - UMR 7606 - LIP6, Paris, France
+
+    and by
+
+    Centre de recherche INRIA Sophia-Antipolis Mediterranee, equipe APICS,
+    Sophia Antipolis, France.
+
+    Contributors Ch. Lauter, S. Chevillard, M. Joldes
+
+    christoph.lauter@ens-lyon.org
+    sylvain.chevillard@ens-lyon.org
+    joldes@lass.fr
+
+    The Sollya software is a computer program whose purpose is to provide
+    an environment for safe floating-point code development. It is
+    particularily targeted to the automatized implementation of
+    mathematical floating-point libraries (libm). Amongst other features,
+    it offers a certified infinity norm, an automatic polynomial
+    implementer and a fast Remez algorithm.
+
+    The Sollya software is governed by the CeCILL-C license under French
+    law and abiding by the rules of distribution of free software.  You
+    can use, modify and/ or redistribute the software under the terms of
+    the CeCILL-C license as circulated by CEA, CNRS and INRIA at the
+    following URL "http://www.cecill.info".
+
+    As a counterpart to the access to the source code and rights to copy,
+    modify and redistribute granted by the license, users are provided
+    only with a limited warranty and the software's author, the holder of
+    the economic rights, and the successive licensors have only limited
+    liability.
+
+    In this respect, the user's attention is drawn to the risks associated
+    with loading, using, modifying and/or developing or reproducing the
+    software by the user in light of its specific status of free software,
+    that may mean that it is complicated to manipulate, and that also
+    therefore means that it is reserved for developers and experienced
+    professionals having in-depth computer knowledge. Users are therefore
+    encouraged to load and test the software's suitability as regards
+    their requirements in conditions enabling the security of their
+    systems and/or data to be ensured and, more generally, to use and
+    operate it in the same conditions as regards security.
+
+    The fact that you are presently reading this means that you have had
+    knowledge of the CeCILL-C license and that you accept its terms.
+
+    The Sollya program is distributed WITHOUT ANY WARRANTY; without even
+    the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+    PURPOSE.
+
+    This generated program is distributed WITHOUT ANY WARRANTY; without
+    even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+    PARTICULAR PURPOSE.
+
+    As a special exception, you may create a larger work that contains
+    part or all of this software generated using Sollya and distribute
+    that work under terms of your choice, so long as that work isn't
+    itself a numerical code generator using the skeleton of this code or a
+    modified version thereof as a code skeleton.  Alternatively, if you
+    modify or redistribute this generated code itself, or its skeleton,
+    you may (at your option) remove this special exception, which will
+    cause this generated code and its skeleton and the resulting Sollya
+    output files to be licensed under the CeCILL-C licence without this
+    special exception.
+
+    This special exception was added by the Sollya copyright holders in
+    version 4.1 of Sollya.
+
+*/
+
+/* Modified by Florian Ballenegger, Anamosic Ballenegger Design, 2020.
+Use of gcc vector extensions instead of pointers to double */
+
+#include <stdint.h>
+
+/* Two caster types */
+typedef union _dblcast {
+  double   d;
+  uint64_t i;
+} dblcast;
+
+typedef union {
+  int64_t l;
+  double d;
+} db_number;
+
+/* Compiler tricks and hints */
+#define INLINE inline
+#define RESTRICT restrict
+#define STATIC static
+#define CONST const
+
+/* Vector length */
+#define VECTOR_LENGTH 4
+typedef double Vec4d __attribute__ ((vector_size (sizeof(double)*VECTOR_LENGTH),
+ aligned (sizeof(double)*VECTOR_LENGTH)));
+
+/* Macro implementations of some double-double operations */
+#define Add12(s, r, a, b)                       \
+  {double _z, _a=a, _b=b;                       \
+    s = _a + _b;                                \
+    _z = s - _a;                                \
+    r = _b - _z;   }
+
+#define Mul22(zh,zl,xh,xl,yh,yl)                        \
+  {                                                     \
+    double mh, ml;                                      \
+                                                        \
+    const double c = 134217729.;                        \
+    double up, u1, u2, vp, v1, v2;                      \
+                                                        \
+    up = (xh)*c;        vp = (yh)*c;                    \
+    u1 = ((xh)-up)+up;  v1 = ((yh)-vp)+vp;              \
+    u2 = (xh)-u1;       v2 = (yh)-v1;                   \
+                                                        \
+    mh = (xh)*(yh);                                     \
+    ml = (((u1*v1-mh)+(u1*v2))+(u2*v1))+(u2*v2);        \
+                                                        \
+    ml += (xh)*(yl) + (xl)*(yh);                        \
+    *zh = mh+ml;                                        \
+    *zl = mh - (*zh) + ml;                              \
+  }
+
+#define Mul122(resh,resl,a,bh,bl)               \
+  {                                             \
+    double _t1, _t2, _t3, _t4;                  \
+                                                \
+    Mul12(&_t1,&_t2,(a),(bh));                  \
+    _t3 = (a) * (bl);                           \
+    _t4 = _t2 + _t3;                            \
+    Add12((*(resh)),(*(resl)),_t1,_t4);         \
+  }
+
+#define Add22(zh,zl,xh,xl,yh,yl)                \
+  do {                                          \
+    double _r,_s;                               \
+    _r = (xh)+(yh);                             \
+    _s = ((((xh)-_r) +(yh)) + (yl)) + (xl);     \
+    *zh = _r+_s;                                \
+    *zl = (_r - (*zh)) + _s;                    \
+  } while(0)
+
+#define Mul12(rh,rl,u,v)                                \
+  {                                                     \
+    const double c  = 134217729.; /* 2^27 +1 */         \
+    double up, u1, u2, vp, v1, v2;                      \
+    double _u =u, _v=v;                                 \
+                                                        \
+    up = _u*c;        vp = _v*c;                        \
+    u1 = (_u-up)+up;  v1 = (_v-vp)+vp;                  \
+    u2 = _u-u1;       v2 = _v-v1;                       \
+                                                        \
+    *rh = _u*_v;                                        \
+    *rl = (((u1*v1-*rh)+(u1*v2))+(u2*v1))+(u2*v2);      \
+  }
+
+
+/* Need fabs */
+double fabs(double);
+
+
+/* Some constants */
+
+#define LOG_TWO_HI 0.693147180559890330187045037746429443359375
+#define LOG_TWO_LO 5.4979230187083711552420206887059365096458163346682e-14
+
+
+/* A metalibm generated function for the callout */
+#define f_approx_log_arg_red_coeff_1h 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000e+00
+#define f_approx_log_arg_red_coeff_2h -4.99999999999998390176614293523016385734081268310546875000000000000000000000000000e-01
+#define f_approx_log_arg_red_coeff_3h 3.33333333333923731434111914495588280260562896728515625000000000000000000000000000e-01
+#define f_approx_log_arg_red_coeff_4h -2.50000000052116866378071335930144414305686950683593750000000000000000000000000000e-01
+#define f_approx_log_arg_red_coeff_5h 1.99999988486698782041983690760389436036348342895507812500000000000000000000000000e-01
+#define f_approx_log_arg_red_coeff_6h -1.66666258081627438603078417145297862589359283447265625000000000000000000000000000e-01
+#define f_approx_log_arg_red_coeff_7h 1.42921894210221167575980416586389765143394470214843750000000000000000000000000000e-01
+#define f_approx_log_arg_red_coeff_8h -1.25915254741829296669664017827017232775688171386718750000000000000000000000000000e-01
+
+
+STATIC INLINE void f_approx_log_arg_red(double * RESTRICT f_approx_log_arg_red_resh, double * RESTRICT f_approx_log_arg_red_resm, double xh, double xm) {
+
+
+
+
+  double f_approx_log_arg_red_t_1_0h;
+  double f_approx_log_arg_red_t_2_0h;
+  double f_approx_log_arg_red_t_3_0h;
+  double f_approx_log_arg_red_t_4_0h;
+  double f_approx_log_arg_red_t_5_0h;
+  double f_approx_log_arg_red_t_6_0h;
+  double f_approx_log_arg_red_t_7_0h;
+  double f_approx_log_arg_red_t_8_0h;
+  double f_approx_log_arg_red_t_9_0h;
+  double f_approx_log_arg_red_t_10_0h;
+  double f_approx_log_arg_red_t_11_0h;
+  double f_approx_log_arg_red_t_12_0h;
+  double f_approx_log_arg_red_t_13_0h;
+  double f_approx_log_arg_red_t_14_0h;
+  double f_approx_log_arg_red_t_15_0h, f_approx_log_arg_red_t_15_0m;
+  double f_approx_log_arg_red_t_16_0h, f_approx_log_arg_red_t_16_0m;
+ 
+
+
+  f_approx_log_arg_red_t_1_0h = f_approx_log_arg_red_coeff_8h;
+  f_approx_log_arg_red_t_2_0h = f_approx_log_arg_red_t_1_0h * xh;
+  f_approx_log_arg_red_t_3_0h = f_approx_log_arg_red_coeff_7h + f_approx_log_arg_red_t_2_0h;
+  f_approx_log_arg_red_t_4_0h = f_approx_log_arg_red_t_3_0h * xh;
+  f_approx_log_arg_red_t_5_0h = f_approx_log_arg_red_coeff_6h + f_approx_log_arg_red_t_4_0h;
+  f_approx_log_arg_red_t_6_0h = f_approx_log_arg_red_t_5_0h * xh;
+  f_approx_log_arg_red_t_7_0h = f_approx_log_arg_red_coeff_5h + f_approx_log_arg_red_t_6_0h;
+  f_approx_log_arg_red_t_8_0h = f_approx_log_arg_red_t_7_0h * xh;
+  f_approx_log_arg_red_t_9_0h = f_approx_log_arg_red_coeff_4h + f_approx_log_arg_red_t_8_0h;
+  f_approx_log_arg_red_t_10_0h = f_approx_log_arg_red_t_9_0h * xh;
+  f_approx_log_arg_red_t_11_0h = f_approx_log_arg_red_coeff_3h + f_approx_log_arg_red_t_10_0h;
+  f_approx_log_arg_red_t_12_0h = f_approx_log_arg_red_t_11_0h * xh;
+  f_approx_log_arg_red_t_13_0h = f_approx_log_arg_red_coeff_2h + f_approx_log_arg_red_t_12_0h;
+  f_approx_log_arg_red_t_14_0h = f_approx_log_arg_red_t_13_0h * xh;
+  Add12(f_approx_log_arg_red_t_15_0h,f_approx_log_arg_red_t_15_0m,f_approx_log_arg_red_coeff_1h,f_approx_log_arg_red_t_14_0h);
+  Mul22(&f_approx_log_arg_red_t_16_0h,&f_approx_log_arg_red_t_16_0m,f_approx_log_arg_red_t_15_0h,f_approx_log_arg_red_t_15_0m,xh,xm);
+  *f_approx_log_arg_red_resh = f_approx_log_arg_red_t_16_0h; *f_approx_log_arg_red_resm = f_approx_log_arg_red_t_16_0m;
+
+
+}
+
+
+#define f_approx_tablewidth 5
+#define f_approx_maxindex 14
+#define f_approx_rcpr_log_two_of_base_hi 0.69314718055994528622676398299518041312694549560546875
+#define f_approx_rcpr_log_two_of_base_mi 2.3190468138462995584177710797133615750739959242786823734316925538223586045205593109130859375e-17
+
+static const double f_approx_log_rcpr_tbl_hi[33] = {
+  0,
+  3.17486983145802981187699742804397828876972198486328125e-2,
+  6.453852113757117814341057737692608498036861419677734375e-2,
+  8.985632912186104770402295116582536138594150543212890625e-2,
+  0.1158318155251217007606356901305844075977802276611328125,
+  0.142500062607283040083672176479012705385684967041015625,
+  0.169899036795397473387225772967212833464145660400390625,
+  0.1980699137620937910764240541539038531482219696044921875,
+  0.2270574506353460753071971112149185501039028167724609375,
+  0.2468600779315257842672082233548280782997608184814453125,
+  0.2670627852490452536216025691828690469264984130859375,
+  0.2981533723190763485177967595518566668033599853515625,
+  0.319430770766361227241958431477542035281658172607421875,
+  0.34117075740276714412857472780160605907440185546875,
+  -0.329753286372467979692402195723843760788440704345703125,
+  -0.30702503529491187439504074063734151422977447509765625,
+  -0.28376817313064461867355703361681662499904632568359375,
+  -0.27193371548364175804834985683555714786052703857421875,
+  -0.2478361639045812692128123444490483961999416351318359375,
+  -0.223143551314209764857565687634632922708988189697265625,
+  -0.2105647691073496419189581274622469209134578704833984375,
+  -0.1849223384940119896402421773018431849777698516845703125,
+  -0.17185025692665922836255276706651784479618072509765625,
+  -0.1451820098444978890395162807180895470082759857177734375,
+  -0.1315763577887192614657152489598956890404224395751953125,
+  -0.1177830356563834557359626842298894189298152923583984375,
+  -0.10379679368164355934833764649738441221415996551513671875,
+  -7.522342123758753162920953627690323628485202789306640625e-2,
+  -6.062462181643483993820353816772694699466228485107421875e-2,
+  -4.58095360312942012637194011404062621295452117919921875e-2,
+  -3.077165866675368732785500469617545604705810546875e-2,
+  -1.5504186535965254478686148331689764745533466339111328125e-2,
+  0
+};
+
+
+static const double f_approx_log_rcpr_tbl_mi[33] = {
+  0,
+  3.03822630846808578765259986229142635550407126467467068542394059704747633077204227447509765625e-18,
+  -6.4704866616929329974161813916713618427728286285169519154170103547585313208401203155517578125e-18,
+  -6.2737601636895940223772151595043522169967894903434509935868934604741298244334757328033447265625e-19,
+  4.33848436980809595557198228135728192959103146527353490891076859270469867624342441558837890625e-18,
+  -9.9263882342257491397106905651454915981827472977916566876377402195430477149784564971923828125e-18,
+  -4.8680087644390707941393631766999763543363602831990049994714819803220962057821452617645263671875e-19,
+  3.74284348246143901356926696786621497402944711010920782190414257684096810407936573028564453125e-18,
+  9.551415762738488431492098722158984238118586922020904206309666051311069168150424957275390625e-18,
+  1.3617433717483680171009009478499574446783469284919833308666881066528731025755405426025390625e-17,
+  -7.3289153273201694886198949831953541788954485227476805253576941368010011501610279083251953125e-18,
+  -1.72069586744586603715170366469832022772114935873187524517646806998527608811855316162109375e-17,
+  1.3542568572648110745997524461078410815028703905694095442624469427528310916386544704437255859375e-18,
+  -1.936679006260286699473802044740827141118261398825169117277056329839979298412799835205078125e-17,
+  -2.122020616196946023332814001844389995179410458238009572207172226399052306078374385833740234375e-18,
+  1.231991620010196428468632499036271595368677926845939196720536301654647104442119598388671875e-17,
+  2.0326655811266561230291019136542876238402571524729010865595313362064189277589321136474609375e-17,
+  -7.8331963769744201243220009945333356568337002449775477268267831476578066940419375896453857421875e-19,
+  1.24322095787025231818185093190325423423584424116919953939852661051190807484090328216552734375e-17,
+  9.091270597324799048711045191818233254271755021066504787174977764152572490274906158447265625e-18,
+  4.24940531472989532850360049655226441340213720053550945643383585093033616431057453155517578125e-18,
+  -3.023661415357406426577090417003710240867302228907377570354952922571101225912570953369140625e-18,
+  6.0224538210113704760318352588172818979944380808860641962620974254605243913829326629638671875e-18,
+  -8.2424187830224753896228153425798328521705177161500548155270706729425000958144664764404296875e-18,
+  -1.112300087972958802991298461231701795529693224825161512736571012283093295991420745849609375e-17,
+  1.197168574759367729935408317875380291366461975031726568119427867031845380552113056182861328125e-18,
+  -5.47772415726659012592706002045618002605660904524354816447218041730593540705740451812744140625e-18,
+  5.93060419629324071708218111258442537327230935598090626192924190718258614651858806610107421875e-18,
+  -2.642402593872693418157455274069099088532417945381102798718675472855466068722307682037353515625e-18,
+  -1.90295986647425706325531188416869176372485943199669260195161513138373265974223613739013671875e-18,
+  -1.0431732029005967805059792190367890366163673586242621564579291515428849379532039165496826171875e-18,
+  3.27832102289242912962985506573138544887782756899054594813824881072150674299336969852447509765625e-19,
+  0
+};
+
+
+static const double f_approx_rcpr_tbl[33] = {
+  1.0,
+  0.96875,
+  0.9375,
+  0.9140625,
+  0.890625,
+  0.8671875,
+  0.84375,
+  0.8203125,
+  0.796875,
+  0.78125,
+  0.765625,
+  0.7421875,
+  0.7265625,
+  0.7109375,
+  0.6953125,
+  0.6796875,
+  0.6640625,
+  0.65625,
+  0.640625,
+  0.625,
+  0.6171875,
+  0.6015625,
+  0.59375,
+  0.578125,
+  0.5703125,
+  0.5625,
+  0.5546875,
+  0.5390625,
+  0.53125,
+  0.5234375,
+  0.515625,
+  0.5078125,
+  0.5
+};
+
+STATIC INLINE void scalar_log_callout_inner(double * RESTRICT res_resh, double * RESTRICT res_resm, double xh) {
+
+  db_number argRedCaster;
+  int E;
+  int index;
+  double ed;
+  double m;
+  double r;
+  double zh;
+  double zm;
+  double mrh, mrl;
+  double temp;
+  double polyHi;
+  double polyMi;
+  double tableHi;
+  double tableMi;
+  double scaledExpoHi;
+  double scaledExpoMi;
+  double logMHi;
+  double logMMi;
+
+
+  argRedCaster.d = xh;
+  E = 0;
+  if (argRedCaster.l < 0x0010000000000000) {
+    argRedCaster.d *= 9007199254740992.0;
+    E -= 53;
+  }
+  E += (int) ((argRedCaster.l >> 52) - 1023ll);
+  index = (int) ((argRedCaster.l & 0x000fffffffffffffull) >> (52 - f_approx_tablewidth - 1));
+  index = (index + 1) >> 1;
+  if (index >= f_approx_maxindex) E++;
+  ed = (double) E;
+  argRedCaster.l = (argRedCaster.l & 0x800fffffffffffffull) | 0x3ff0000000000000ull;
+  m = argRedCaster.d;
+  r = f_approx_rcpr_tbl[index];
+  Mul12(&mrh,&mrl,m,r);
+  temp = mrh - 1.0;
+  Add12(zh,zm,temp,mrl);
+
+  f_approx_log_arg_red(&polyHi, &polyMi, zh, zm);
+
+
+  tableHi = f_approx_log_rcpr_tbl_hi[index];
+  tableMi = f_approx_log_rcpr_tbl_mi[index];
+  Mul122(&scaledExpoHi,&scaledExpoMi,ed,f_approx_rcpr_log_two_of_base_hi,f_approx_rcpr_log_two_of_base_mi);
+  Add22(&logMHi,&logMMi,tableHi,tableMi,polyHi,polyMi);
+  Add22(res_resh,res_resm,scaledExpoHi,scaledExpoMi,logMHi,logMMi);
+
+
+}
+
+
+/* A scalar logarithm for the callout */
+STATIC INLINE double scalar_log_callout(double x) {
+  dblcast xdb;
+  double yh, yl;
+  double temp;
+
+  /* Check for special inputs: x less than the smallest positive
+     subnormal, x Inf or NaN 
+  */
+  xdb.d = x;
+  if ((xdb.i == 0x0ull) || (xdb.i >= 0x7ff0000000000000ull)) {
+    /* Here, we have a special case to handle 
+
+       The input is either +/-0, negative, +/-Inf or +/- NaN.
+
+    */
+    if ((xdb.i & 0x7fffffffffffffffull) >= 0x7ff0000000000000ull) {
+      /* The input is either Inf or NaN */
+      if ((xdb.i & 0x7fffffffffffffffull) > 0x7ff0000000000000ull) {
+	/* The input is NaN. Return the quietized NaN */
+	return 1.0 + x;
+      }
+      /* The input is +Inf or -Inf */
+      if ((xdb.i & 0x8000000000000000ull) == 0x0ull) {
+	/* The input is +Inf. Return log(+Inf) = + Inf. */
+	return x;
+      }
+      /* The input is -Inf. Let the case fall through */
+    }
+
+    /* The input is +/- 0, -Inf or a negative real number */
+    if (x == 0.0) {
+      /* The input is +/-0. Return -Inf and raise the division-by-zero
+	 exception.
+      */
+      temp = 1.0 - 1.0;   /* temp = +0.0 or -0.0 */
+      temp = temp * temp; /* temp = +0.0 */
+      return -1.0 / temp; /* Return -Inf and raise div-by-zero. */
+    }
+
+    /* The input is -Inf or a negative real number.
+
+       Return NaN and raise the invalid exception.
+
+    */
+    temp = 0.0;
+    return temp / temp; /* Return NaN and raise invalid. */
+  }
+
+  /* Here the input is a positive subnormal or normal 
+
+     Just call a Metalibm generated function.
+ 
+  */
+  scalar_log_callout_inner(&yh, &yl, x);
+
+  /* Return the result */
+  return yh + yl;
+}
+
+/* A vector logarithm callout */
+STATIC INLINE Vec4d vector_log_callout(Vec4d x) {
+  int i;
+  Vec4d y;
+  for (i=0;i<VECTOR_LENGTH;i++) {
+    y[i] = scalar_log_callout(x[i]);
+  }
+  return y;
+}
+
+/* Generated polynomial for vector logarithm */
+#define vector_log_poly_coeff_1h 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000e+00
+#define vector_log_poly_coeff_2h -5.00000000000000999200722162640886381268501281738281250000000000000000000000000000e-01
+#define vector_log_poly_coeff_3h 3.33333333333384995711412557284347712993621826171875000000000000000000000000000000e-01
+#define vector_log_poly_coeff_4h -2.49999999999541949735615276040334720164537429809570312500000000000000000000000000e-01
+#define vector_log_poly_coeff_5h 1.99999999982921977670358160139585379511117935180664062500000000000000000000000000e-01
+#define vector_log_poly_coeff_6h -1.66666666708135652319455743963771965354681015014648437500000000000000000000000000e-01
+#define vector_log_poly_coeff_7h 1.42857144801517760290821001945005264133214950561523437500000000000000000000000000e-01
+#define vector_log_poly_coeff_8h -1.25000000676456918258239170427259523421525955200195312500000000000000000000000000e-01
+#define vector_log_poly_coeff_9h 1.11111007470194977919675238808849826455116271972656250000000000000000000000000000e-01
+#define vector_log_poly_coeff_10h -9.99997732686361273657382753299316391348838806152343750000000000000000000000000000e-02
+#define vector_log_poly_coeff_11h 9.09118368248343633464259028187370859086513519287109375000000000000000000000000000e-02
+#define vector_log_poly_coeff_12h -8.33440688797140172283661740948446094989776611328125000000000000000000000000000000e-02
+#define vector_log_poly_coeff_13h 7.68928106123701327057062826497713103890419006347656250000000000000000000000000000e-02
+#define vector_log_poly_coeff_14h -7.12109533797148086531336730331531725823879241943359375000000000000000000000000000e-02
+#define vector_log_poly_coeff_15h 6.65850051807088672006784690893255174160003662109375000000000000000000000000000000e-02
+#define vector_log_poly_coeff_16h -6.43233317758114681028658310424361843615770339965820312500000000000000000000000000e-02
+#define vector_log_poly_coeff_17h 6.31209736682013661246415381356200668960809707641601562500000000000000000000000000e-02
+#define vector_log_poly_coeff_18h -5.44324247927492413379191305011772783473134040832519531250000000000000000000000000e-02
+#define vector_log_poly_coeff_19h 3.23620871610351343306000160282565047964453697204589843750000000000000000000000000e-02
+#define vector_log_poly_coeff_20h -9.16877113215055876416226254832508857361972332000732421875000000000000000000000000e-03
+
+
+STATIC void vector_log_poly(double * RESTRICT vector_log_poly_resh, double x) {
+
+
+
+
+  double vector_log_poly_t_1_0h;
+  double vector_log_poly_t_2_0h;
+  double vector_log_poly_t_3_0h;
+  double vector_log_poly_t_4_0h;
+  double vector_log_poly_t_5_0h;
+  double vector_log_poly_t_6_0h;
+  double vector_log_poly_t_7_0h;
+  double vector_log_poly_t_8_0h;
+  double vector_log_poly_t_9_0h;
+  double vector_log_poly_t_10_0h;
+  double vector_log_poly_t_11_0h;
+  double vector_log_poly_t_12_0h;
+  double vector_log_poly_t_13_0h;
+  double vector_log_poly_t_14_0h;
+  double vector_log_poly_t_15_0h;
+  double vector_log_poly_t_16_0h;
+  double vector_log_poly_t_17_0h;
+  double vector_log_poly_t_18_0h;
+  double vector_log_poly_t_19_0h;
+  double vector_log_poly_t_20_0h;
+  double vector_log_poly_t_21_0h;
+  double vector_log_poly_t_22_0h;
+  double vector_log_poly_t_23_0h;
+  double vector_log_poly_t_24_0h;
+  double vector_log_poly_t_25_0h;
+  double vector_log_poly_t_26_0h;
+  double vector_log_poly_t_27_0h;
+  double vector_log_poly_t_28_0h;
+  double vector_log_poly_t_29_0h;
+  double vector_log_poly_t_30_0h;
+  double vector_log_poly_t_31_0h;
+  double vector_log_poly_t_32_0h;
+  double vector_log_poly_t_33_0h;
+  double vector_log_poly_t_34_0h;
+  double vector_log_poly_t_35_0h;
+  double vector_log_poly_t_36_0h;
+  double vector_log_poly_t_37_0h;
+  double vector_log_poly_t_38_0h;
+  double vector_log_poly_t_39_0h;
+  double vector_log_poly_t_40_0h;
+ 
+
+
+  vector_log_poly_t_1_0h = vector_log_poly_coeff_20h;
+  vector_log_poly_t_2_0h = vector_log_poly_t_1_0h * x;
+  vector_log_poly_t_3_0h = vector_log_poly_coeff_19h + vector_log_poly_t_2_0h;
+  vector_log_poly_t_4_0h = vector_log_poly_t_3_0h * x;
+  vector_log_poly_t_5_0h = vector_log_poly_coeff_18h + vector_log_poly_t_4_0h;
+  vector_log_poly_t_6_0h = vector_log_poly_t_5_0h * x;
+  vector_log_poly_t_7_0h = vector_log_poly_coeff_17h + vector_log_poly_t_6_0h;
+  vector_log_poly_t_8_0h = vector_log_poly_t_7_0h * x;
+  vector_log_poly_t_9_0h = vector_log_poly_coeff_16h + vector_log_poly_t_8_0h;
+  vector_log_poly_t_10_0h = vector_log_poly_t_9_0h * x;
+  vector_log_poly_t_11_0h = vector_log_poly_coeff_15h + vector_log_poly_t_10_0h;
+  vector_log_poly_t_12_0h = vector_log_poly_t_11_0h * x;
+  vector_log_poly_t_13_0h = vector_log_poly_coeff_14h + vector_log_poly_t_12_0h;
+  vector_log_poly_t_14_0h = vector_log_poly_t_13_0h * x;
+  vector_log_poly_t_15_0h = vector_log_poly_coeff_13h + vector_log_poly_t_14_0h;
+  vector_log_poly_t_16_0h = vector_log_poly_t_15_0h * x;
+  vector_log_poly_t_17_0h = vector_log_poly_coeff_12h + vector_log_poly_t_16_0h;
+  vector_log_poly_t_18_0h = vector_log_poly_t_17_0h * x;
+  vector_log_poly_t_19_0h = vector_log_poly_coeff_11h + vector_log_poly_t_18_0h;
+  vector_log_poly_t_20_0h = vector_log_poly_t_19_0h * x;
+  vector_log_poly_t_21_0h = vector_log_poly_coeff_10h + vector_log_poly_t_20_0h;
+  vector_log_poly_t_22_0h = vector_log_poly_t_21_0h * x;
+  vector_log_poly_t_23_0h = vector_log_poly_coeff_9h + vector_log_poly_t_22_0h;
+  vector_log_poly_t_24_0h = vector_log_poly_t_23_0h * x;
+  vector_log_poly_t_25_0h = vector_log_poly_coeff_8h + vector_log_poly_t_24_0h;
+  vector_log_poly_t_26_0h = vector_log_poly_t_25_0h * x;
+  vector_log_poly_t_27_0h = vector_log_poly_coeff_7h + vector_log_poly_t_26_0h;
+  vector_log_poly_t_28_0h = vector_log_poly_t_27_0h * x;
+  vector_log_poly_t_29_0h = vector_log_poly_coeff_6h + vector_log_poly_t_28_0h;
+  vector_log_poly_t_30_0h = vector_log_poly_t_29_0h * x;
+  vector_log_poly_t_31_0h = vector_log_poly_coeff_5h + vector_log_poly_t_30_0h;
+  vector_log_poly_t_32_0h = vector_log_poly_t_31_0h * x;
+  vector_log_poly_t_33_0h = vector_log_poly_coeff_4h + vector_log_poly_t_32_0h;
+  vector_log_poly_t_34_0h = vector_log_poly_t_33_0h * x;
+  vector_log_poly_t_35_0h = vector_log_poly_coeff_3h + vector_log_poly_t_34_0h;
+  vector_log_poly_t_36_0h = vector_log_poly_t_35_0h * x;
+  vector_log_poly_t_37_0h = vector_log_poly_coeff_2h + vector_log_poly_t_36_0h;
+  vector_log_poly_t_38_0h = vector_log_poly_t_37_0h * x;
+  vector_log_poly_t_39_0h = vector_log_poly_coeff_1h + vector_log_poly_t_38_0h;
+  vector_log_poly_t_40_0h = vector_log_poly_t_39_0h * x;
+  *vector_log_poly_resh = vector_log_poly_t_40_0h;
+
+
+}
+
+/* A vector logarithm */
+Vec4d vec4_log_vectorlibm(Vec4d x) {
+  int i;
+  int okaySlots;
+  Vec4d y;
+  dblcast xdb;
+  uint64_t tui1, tui2, tui3;
+  int E;
+  double eDouble, m, r, p, elog2h, elog2l;
+  double t1h, t1l, t2, t3;
+
+  /* Check if we can handle all inputs */
+  okaySlots = 0;
+  for (i=0;i<VECTOR_LENGTH;i++) {
+    xdb.d = x[i];
+    okaySlots += ((xdb.i >= 0x0020000000000000ull) && (xdb.i < 0x7ff0000000000000ull));
+  }
+
+  /* Perform a callout if we cannot handle the input in one slot */
+  if (okaySlots != VECTOR_LENGTH) {
+    return vector_log_callout(x);
+  }
+
+  /* Here, the input is real, and far enough from the subnormal
+     range
+  */
+  for (i=0;i<VECTOR_LENGTH;i++) {
+    xdb.d = x[i];
+    tui1 = xdb.i;
+    tui2 = tui1 + 0x0008000000000000ull;
+    tui1 >>= 52;
+    tui2 >>= 52;
+    tui3 = tui2 - tui1;
+    tui3 <<= 52;
+    E = ((int) tui2) - 1023;
+    eDouble = (double) E;
+    xdb.i = ((xdb.i & 0x000fffffffffffffull) | 0x3ff0000000000000ull) - tui3;
+    m = xdb.d;                     /* 2^E * m = x exactly, 0.75 <= m < 1.5 */
+    r = m - 1.0;                   /* exact: Sterbenz */
+    vector_log_poly(&p, r);
+    elog2h = eDouble * LOG_TWO_HI; /* exact: trailing zeros */
+    elog2l = eDouble * LOG_TWO_LO;
+    t1h = elog2h + p;
+    t2 = t1h - elog2h;
+    t1l = p - t2;                  /* exact: t1h + t1l = elog2h + p */
+    t3 = elog2l + t1l;
+    y[i] = t1h + t3; 
+  }
+  return y;
+}
diff --git a/visualc/vngspice.vcxproj b/visualc/vngspice.vcxproj
index 157b44b85..0ee0c8d48 100644
--- a/visualc/vngspice.vcxproj
+++ b/visualc/vngspice.vcxproj
@@ -667,13 +667,15 @@
       <BasicRuntimeChecks>Default</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <RuntimeTypeInfo>true</RuntimeTypeInfo>
-      <OpenMPSupport>true</OpenMPSupport>
+      <OpenMPSupport>
+      </OpenMPSupport>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level4</WarningLevel>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
       <CompileAs>CompileAsC</CompileAs>
       <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <AdditionalOptions>/openmp:experimental %(AdditionalOptions)</AdditionalOptions>
     </ClCompile>
     <Link>
       <AdditionalDependencies>psapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
@@ -1111,6 +1113,7 @@
     <ClInclude Include="..\src\spicelib\devices\bsim3v1\bsim3v1ext.h" />
     <ClInclude Include="..\src\spicelib\devices\bsim3v1\bsim3v1init.h" />
     <ClInclude Include="..\src\spicelib\devices\bsim3v1\bsim3v1itf.h" />
+    <ClInclude Include="..\src\spicelib\devices\bsim3v32\b3v32acm.h" />
     <ClInclude Include="..\src\spicelib\devices\bsim3v32\bsim3v32def.h" />
     <ClInclude Include="..\src\spicelib\devices\bsim3v32\bsim3v32ext.h" />
     <ClInclude Include="..\src\spicelib\devices\bsim3v32\bsim3v32init.h" />
@@ -1846,11 +1849,19 @@
     <ClCompile Include="..\src\spicelib\devices\bsim3v1\bsim3v1init.c" />
     <ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32.c" />
     <ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32acld.c" />
+    <ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32acm.c" />
     <ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32ask.c" />
     <ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32check.c" />
     <ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32cvtest.c" />
     <ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32getic.c" />
     <ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32ld.c" />
+    <ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32ldsel.c" />
+    <ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32ldseq.c" />
+    <ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32ldseq_simd4.c" />
+    <ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32ldseq_simd4_omp.c" />
+    <ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32ldseq_simd8.c" />
+    <ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32ldsimd.c" />
+    <ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32ldsimd8.c" />
     <ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32mask.c" />
     <ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32mdel.c" />
     <ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32mpar.c" />
@@ -1862,6 +1873,8 @@
     <ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32temp.c" />
     <ClCompile Include="..\src\spicelib\devices\bsim3v32\b3v32trunc.c" />
     <ClCompile Include="..\src\spicelib\devices\bsim3v32\bsim3v32init.c" />
+    <ClCompile Include="..\src\spicelib\devices\bsim3v32\vec4_exp.c" />
+    <ClCompile Include="..\src\spicelib\devices\bsim3v32\vec4_log.c" />
     <ClCompile Include="..\src\spicelib\devices\bsim3\b3.c" />
     <ClCompile Include="..\src\spicelib\devices\bsim3\b3acld.c" />
     <ClCompile Include="..\src\spicelib\devices\bsim3\b3ask.c" />