Moved Truncation Error Calculation into GPU for CUSPICE

This commit is contained in:
Francesco Lannutti 2014-05-12 08:55:25 +02:00 committed by rlar
parent e668ce5791
commit dcb62b8259
28 changed files with 719 additions and 35 deletions

View File

@ -33,29 +33,33 @@ int cuCKTrhsOldUpdateDtoH (CKTcircuit *) ;
int cuCKTsetup (CKTcircuit *) ;
int cuCKTsystemDtoH (CKTcircuit *) ;
int cuCKTstatesFlush (CKTcircuit *) ;
int cuCKTstatesUpdateDtoH (CKTcircuit *) ;
int cuCKTstate0UpdateHtoD (CKTcircuit *) ;
int cuCKTstate0UpdateDtoH (CKTcircuit *) ;
int cuCKTstate01copy (CKTcircuit *) ;
int cuCKTstatesCircularBuffer (CKTcircuit *) ;
int cuCKTstate123copy (CKTcircuit *) ;
int cuCKTdeltaOldUpdateHtoD (CKTcircuit *) ;
int cuCKTtrunc (CKTcircuit *, double, double *) ;
int cuBSIM4v7destroy (GENmodel *) ;
int cuBSIM4v7getic (GENmodel *) ;
int cuBSIM4v7load (GENmodel *, CKTcircuit *) ;
int cuBSIM4v7setup (GENmodel *) ;
int cuBSIM4v7temp (GENmodel *) ;
int cuBSIM4v7trunc (GENmodel *, CKTcircuit *, double *) ;
int cuCAPdestroy (GENmodel *) ;
int cuCAPgetic (GENmodel *) ;
int cuCAPload (GENmodel *, CKTcircuit *) ;
int cuCAPsetup (GENmodel *) ;
int cuCAPtemp (GENmodel *) ;
int cuCAPtrunc (GENmodel *, CKTcircuit *, double *) ;
int cuINDdestroy (GENmodel *) ;
int cuINDload (GENmodel *, CKTcircuit *) ;
int cuINDsetup (GENmodel *) ;
int cuINDtemp (GENmodel *) ;
int cuINDtrunc (GENmodel *, CKTcircuit *, double *) ;
int cuISRCdestroy (GENmodel *) ;
int cuISRCload (GENmodel *, CKTcircuit *) ;

View File

@ -306,6 +306,7 @@ struct CKTcircuit {
#ifdef USE_CUSPICE
double *(d_CKTstates[8]);
double **dD_CKTstates;
#define d_CKTstate0 d_CKTstates[0]
#define d_CKTstate1 d_CKTstates[1]
#define d_CKTstate2 d_CKTstates[2]
@ -315,6 +316,8 @@ struct CKTcircuit {
#define d_CKTstate6 d_CKTstates[6]
#define d_CKTstate7 d_CKTstates[7]
double *d_CKTdeltaOld;
double *d_CKTrhsOld;
int *d_CKTnoncon;
int d_MatrixSize;
@ -341,6 +344,11 @@ struct CKTcircuit {
int *d_CKTtopologyMatrixCSRpRHS;
int *d_CKTtopologyMatrixCSRjRHS;
double *d_CKTtopologyMatrixCSRxRHS;
int total_n_timeSteps;
double *CKTtimeSteps;
double *d_CKTtimeSteps;
double *d_CKTtimeStepsOut;
#endif
};

View File

@ -58,7 +58,7 @@ CKTcircuit *ckt
)
{
int i ;
long unsigned int m, mRHS, n, nz, TopologyNNZ, TopologyNNZRHS, size1, size2 ;
long unsigned int m, mRHS, n, nz, TopologyNNZ, TopologyNNZRHS, size1, size2, size3 ;
cudaError_t status ;
n = (long unsigned int)ckt->CKTmatrix->CKTkluN ;
@ -74,6 +74,7 @@ CKTcircuit *ckt
size1 = (long unsigned int)(ckt->d_MatrixSize + 1) ;
size2 = (long unsigned int)ckt->CKTnumStates ;
size3 = (long unsigned int)ckt->total_n_timeSteps ;
/* Topology Matrix Handling */
status = cudaMalloc ((void **)&(ckt->CKTmatrix->d_CKTrhs), (n + 1) * sizeof(double)) ;
@ -141,5 +142,22 @@ CKTcircuit *ckt
CUDAMALLOCCHECK (ckt->d_CKTstates[i], size2, double, status)
}
/* Truncation Error */
status = cudaMalloc ((void **)&(ckt->dD_CKTstates), 8 * sizeof(double *)) ;
CUDAMALLOCCHECK (ckt->dD_CKTstates, 8, double *, status)
status = cudaMemcpy (ckt->dD_CKTstates, ckt->d_CKTstates, 8 * sizeof(double *), cudaMemcpyHostToDevice) ;
CUDAMEMCPYCHECK (ckt->dD_CKTstates, 8, double *, status)
status = cudaMalloc ((void **)&(ckt->d_CKTdeltaOld), 7 * sizeof(double)) ;
CUDAMALLOCCHECK (ckt->d_CKTdeltaOld, 7, double, status)
// ckt->CKTtimeSteps = (double *) malloc (size3 * sizeof(double)) ;
status = cudaMalloc ((void **)&(ckt->d_CKTtimeSteps), size3 * sizeof(double)) ;
CUDAMALLOCCHECK (ckt->d_CKTtimeSteps, size3, double, status)
status = cudaMalloc ((void **)&(ckt->d_CKTtimeStepsOut), size3 * sizeof(double)) ;
CUDAMALLOCCHECK (ckt->d_CKTtimeStepsOut, size3, double, status)
return (OK) ;
}

View File

@ -39,30 +39,6 @@
return (E_NOMEM) ; \
}
int
cuCKTstatesUpdateDtoH
(
CKTcircuit *ckt
)
{
int i ;
long unsigned int size ;
cudaError_t status ;
size = (long unsigned int)ckt->CKTnumStates ;
for (i = 0 ; i < 8 ; i++)
{
if (ckt->CKTstates[i] != NULL)
{
status = cudaMemcpy (ckt->CKTstates[i], ckt->d_CKTstates[i], size * sizeof(double), cudaMemcpyDeviceToHost) ;
CUDAMEMCPYCHECK (ckt->CKTstates[i], size, double, status)
}
}
return (OK) ;
}
int
cuCKTstatesFlush
(
@ -162,3 +138,17 @@ CKTcircuit *ckt
return (OK) ;
}
int
cuCKTdeltaOldUpdateHtoD
(
CKTcircuit *ckt
)
{
cudaError_t status ;
status = cudaMemcpy (ckt->d_CKTdeltaOld, ckt->CKTdeltaOld, 7 * sizeof(double), cudaMemcpyHostToDevice) ;
CUDAMEMCPYCHECK (ckt->d_CKTdeltaOld, 7, double, status)
return (OK) ;
}

View File

@ -0,0 +1,99 @@
/**********
Copyright 1990 Regents of the University of California. All rights reserved.
Author: 1985 Thomas L. Quarles
**********/
extern "C"
__device__
static
int
cuCKTterr
(
int qcap, double **CKTstates, double *d_CKTdeltaOld,
double d_CKTdelta, int d_CKTorder, int d_CKTintegrateMethod,
double d_CKTabsTol, double d_CKTrelTol, double d_CKTchgTol, double d_CKTtrTol,
//Return Value
double *timeStep
)
{
#define ccap (qcap+1)
/* known integration methods */
#define TRAPEZOIDAL 1
#define GEAR 2
#define MAX(a,b) ((a) > (b) ? (a) : (b))
double volttol, chargetol, tol, del ;
double diff [8] ;
double deltmp [8] ;
double factor = 0 ;
int i, j ;
double gearCoeff [] = {
.5,
.2222222222,
.1363636364,
.096,
.07299270073,
.05830903790
} ;
double trapCoeff [] = {
.5,
.08333333333
} ;
volttol = d_CKTabsTol + d_CKTrelTol * MAX (fabs (CKTstates [0] [ccap]), fabs (CKTstates [1] [ccap])) ;
chargetol = MAX (fabs (CKTstates [0] [qcap]), fabs (CKTstates [1] [qcap])) ;
chargetol = d_CKTrelTol * MAX (chargetol, d_CKTchgTol) / d_CKTdelta ;
tol = MAX (volttol, chargetol) ;
/* now divided differences */
for (i = d_CKTorder + 1 ; i >= 0 ; i--)
{
diff [i] = CKTstates [i] [qcap] ;
}
for (i = 0 ; i <= d_CKTorder ; i++)
{
deltmp [i] = d_CKTdeltaOld [i] ;
}
j = d_CKTorder ;
for (;;)
{
for (i = 0 ; i <= j ; i++)
{
diff [i] = (diff [i] - diff [i + 1]) / deltmp [i] ;
}
if (--j < 0)
break ;
for (i = 0 ; i <= j ; i++)
{
deltmp [i] = deltmp [i + 1] + d_CKTdeltaOld [i] ;
}
}
switch (d_CKTintegrateMethod)
{
case GEAR:
factor = gearCoeff [d_CKTorder - 1] ;
break ;
case TRAPEZOIDAL:
factor = trapCoeff [d_CKTorder - 1] ;
break ;
}
del = d_CKTtrTol * tol / MAX (d_CKTabsTol, factor * fabs (diff [0])) ;
if (d_CKTorder == 2)
{
del = sqrt (del) ;
} else if (d_CKTorder > 2) {
del = exp (log (del) / d_CKTorder) ;
}
*timeStep = del ;
return 0 ;
}

View File

@ -0,0 +1,135 @@
/**********
Copyright 2014 - NGSPICE Software
Author: 2014 Francesco Lannutti
**********/
#include "ngspice/config.h"
#include "ngspice/cktdefs.h"
#include "cuda_runtime_api.h"
#include "ngspice/macros.h"
/* cudaMemcpy MACRO to check it for errors --> CUDAMEMCPYCHECK(name of pointer, dimension, type, status) */
#define CUDAMEMCPYCHECK(a, b, c, d) \
if (d != cudaSuccess) \
{ \
fprintf (stderr, "cuCKTtrunc routine...\n") ; \
fprintf (stderr, "Error: cudaMemcpy failed on %s size of %d bytes\n", #a, (int)(b * sizeof(c))) ; \
fprintf (stderr, "Error: %s = %d, %s\n", #d, d, cudaGetErrorString (d)) ; \
return (E_NOMEM) ; \
}
extern "C"
__global__ void cuCKTtrunc_kernel
(
double *, double *, int
) ;
extern "C"
int
cuCKTtrunc
(
CKTcircuit *ckt, double timetemp, double *timeStep
)
{
long unsigned int size ;
double timetempGPU ;
int thread_x, thread_y, block_x ;
cudaError_t status ;
/* Determining how many blocks should exist in the kernel */
thread_x = 1 ;
thread_y = 256 ;
if (ckt->total_n_timeSteps % thread_y != 0)
block_x = (int)((ckt->total_n_timeSteps + thread_y - 1) / thread_y) ;
else
block_x = ckt->total_n_timeSteps / thread_y ;
dim3 thread (thread_x, thread_y) ;
/* Kernel launch */
status = cudaGetLastError () ; // clear error status
cuCKTtrunc_kernel <<< block_x, thread, thread_y * sizeof(double) >>> (ckt->d_CKTtimeSteps, ckt->d_CKTtimeStepsOut, ckt->total_n_timeSteps) ;
cudaDeviceSynchronize () ;
cuCKTtrunc_kernel <<< 1, thread, thread_y * sizeof(double) >>> (ckt->d_CKTtimeStepsOut, ckt->d_CKTtimeSteps, block_x) ;
cudaDeviceSynchronize () ;
status = cudaGetLastError () ; // check for launch error
if (status != cudaSuccess)
{
fprintf (stderr, "Kernel launch failure in cuCKTtrunc\n\n") ;
return (E_NOMEM) ;
}
/* Copy back the reduction result */
size = (long unsigned int)(1) ;
status = cudaMemcpy (&timetempGPU, ckt->d_CKTtimeSteps, size * sizeof(double), cudaMemcpyDeviceToHost) ;
CUDAMEMCPYCHECK (&timetempGPU, size, double, status)
/* Final Comparison */
if (timetempGPU < timetemp)
{
timetemp = timetempGPU ;
}
if (2 * *timeStep < timetemp)
{
*timeStep = 2 * *timeStep ;
} else {
*timeStep = timetemp ;
}
return 0 ;
}
extern "C"
__global__
void
cuCKTtrunc_kernel
(
double *g_idata, double *g_odata, int n
)
{
extern __shared__ double sdata [] ;
unsigned int i, tid ;
tid = threadIdx.y ;
// i = blockIdx.x * (blockDim.y * 2) + tid ;
i = blockIdx.x * blockDim.y + tid ;
if (i < n)
{
// sdata [tid] = MIN (g_idata [i], g_idata [i + blockDim.y]) ;
sdata [tid] = g_idata [i] ;
}
__syncthreads () ;
if ((tid < 128) && (i + 128 < n))
{
sdata [tid] = MIN (sdata [tid], sdata [tid + 128]) ;
}
__syncthreads () ;
if ((tid < 64) && (i + 64 < n))
{
sdata [tid] = MIN (sdata [tid], sdata [tid + 64]) ;
}
__syncthreads () ;
if ((tid < 32) && (i + 32 < n))
{
sdata [tid] = MIN (sdata [tid], sdata [tid + 32]) ;
sdata [tid] = MIN (sdata [tid], sdata [tid + 16]) ;
sdata [tid] = MIN (sdata [tid], sdata [tid + 8]) ;
sdata [tid] = MIN (sdata [tid], sdata [tid + 4]) ;
sdata [tid] = MIN (sdata [tid], sdata [tid + 2]) ;
sdata [tid] = MIN (sdata [tid], sdata [tid + 1]) ;
}
if (tid == 0)
{
g_odata [blockIdx.x] = sdata [0] ;
}
}

View File

@ -111,6 +111,8 @@ AM_CPPFLAGS = @AM_CPPFLAGS@ -I$(top_srcdir)/src/include -I$(top_srcdir)/src/spi
AM_CFLAGS = $(STATIC)
if USE_CUSPICE_WANTED
.cu.lo:
$(AM_V_GEN)$(top_srcdir)/src/libtool_wrapper_for_cuda.tcl $@ $(AM_CFLAGS) $(NVCC) $(CUDA_CFLAGS) $(AM_CPPFLAGS) -c $<
libckt_la_SOURCES += \
CUSPICE/cucktflush.c \
@ -118,7 +120,8 @@ libckt_la_SOURCES += \
CUSPICE/cucktrhsoldupdate.c \
CUSPICE/cucktsetup.c \
CUSPICE/cucktstatesupdate.c \
CUSPICE/cucktsystem.c
CUSPICE/cucktsystem.c \
CUSPICE/cuckttrunc.cu
AM_CPPFLAGS += $(CUDA_CPPFLAGS)
AM_LDFLAGS = $(CUDA_LIBS) -lcusparse

View File

@ -110,6 +110,8 @@ CKTsetup(CKTcircuit *ckt)
ckt->total_n_valuesRHS = 0 ;
ckt->total_n_PtrRHS = 0 ;
ckt->total_n_timeSteps = 0 ;
#endif
int i;

View File

@ -15,6 +15,9 @@ Author: 1985 Thomas L. Quarles
#include "ngspice/devdefs.h"
#include "ngspice/sperror.h"
#ifdef USE_CUSPICE
#include "ngspice/CUSPICE/CUSPICE.h"
#endif
int
CKTtrunc (CKTcircuit *ckt, double *timeStep)
@ -58,7 +61,16 @@ CKTtrunc (CKTcircuit *ckt, double *timeStep)
}
}
#ifdef USE_CUSPICE
int status ;
status = cuCKTtrunc (ckt, HUGE, timeStep) ;
if (status != 0)
return (E_NOMEM) ;
#else
*timeStep = MIN (2 * *timeStep, timetemp) ;
#endif
ckt->CKTstat->STATtranTruncTime += SPfrontEnd->IFseconds () - startTime ;
return (OK) ;

View File

@ -884,7 +884,7 @@ resume:
}
#ifdef USE_CUSPICE
status = cuCKTstatesUpdateDtoH (ckt) ;
status = cuCKTdeltaOldUpdateHtoD (ckt) ;
if (status != 0)
return (E_NOMEM) ;
#endif

View File

@ -73,6 +73,12 @@ GENmodel *inModel
status = cudaMemcpy (model->d_PositionVectorRHS, model->PositionVectorRHS, size * sizeof(int), cudaMemcpyHostToDevice) ;
CUDAMEMCPYCHECK (model->d_PositionVectorRHS, size, int, status)
status = cudaMalloc ((void **)&(model->d_PositionVector_timeSteps), size * sizeof(int)) ;
CUDAMALLOCCHECK (model->d_PositionVector_timeSteps, size, int, status)
status = cudaMemcpy (model->d_PositionVector_timeSteps, model->PositionVector_timeSteps, size * sizeof(int), cudaMemcpyHostToDevice) ;
CUDAMEMCPYCHECK (model->d_PositionVector_timeSteps, size, int, status)
/* DOUBLE */
model->BSIM4v7paramCPU.BSIM4v7gbsRWArray = (double *) malloc (size * sizeof(double)) ;
status = cudaMalloc ((void **)&(model->BSIM4v7paramGPU.d_BSIM4v7gbsRWArray), size * sizeof(double)) ;

View File

@ -0,0 +1,137 @@
/**********
Copyright 2014 - NGSPICE Software
Author: 2014 Francesco Lannutti
**********/
#include "ngspice/config.h"
#include "CUSPICE/cucktterr.cuh"
#include "bsim4v7def.h"
extern "C"
__global__ void cuBSIM4v7trunc_kernel (BSIM4v7paramGPUstruct, int, double **, double *, double,
int, int, double, double, double, double, double *, int *) ;
extern "C"
int
cuBSIM4v7trunc
(
GENmodel *inModel, CKTcircuit *ckt, double *timeStep
)
{
(void)timeStep ;
BSIM4v7model *model = (BSIM4v7model *)inModel ;
int thread_x, thread_y, block_x ;
cudaError_t status ;
/* loop through all the BSIM4v7 models */
for ( ; model != NULL ; model = BSIM4v7nextModel(model))
{
/* Determining how many blocks should exist in the kernel */
thread_x = 1 ;
thread_y = 256 ;
if (model->n_instances % thread_y != 0)
block_x = (int)((model->n_instances + thread_y - 1) / thread_y) ;
else
block_x = model->n_instances / thread_y ;
dim3 thread (thread_x, thread_y) ;
/* Kernel launch */
status = cudaGetLastError () ; // clear error status
cuBSIM4v7trunc_kernel <<< block_x, thread >>> (model->BSIM4v7paramGPU, model->n_instances,
ckt->dD_CKTstates, ckt->d_CKTdeltaOld,
ckt->CKTdelta, ckt->CKTorder, ckt->CKTintegrateMethod,
ckt->CKTabstol, ckt->CKTreltol, ckt->CKTchgtol, ckt->CKTtrtol,
ckt->d_CKTtimeSteps, model->d_PositionVector_timeSteps) ;
cudaDeviceSynchronize () ;
status = cudaGetLastError () ; // check for launch error
if (status != cudaSuccess)
{
fprintf (stderr, "Kernel launch failure in the Trunc BSIM4v7 Model\n\n") ;
return (E_NOMEM) ;
}
}
return (OK) ;
}
extern "C"
__global__
void
cuBSIM4v7trunc_kernel
(
BSIM4v7paramGPUstruct BSIM4v7entry, int n_instances, double **CKTstates,
double *CKTdeltaOld, double CKTdelta, int CKTorder, int CKTintegrateMethod,
double CKTabsTol, double CKTrelTol, double CKTchgTol, double CKTtrTol,
double *CKTtimeSteps, int *PositionVector_timeSteps
)
{
int instance_ID, i ;
instance_ID = threadIdx.y + blockDim.y * blockIdx.x ;
if (instance_ID < n_instances)
{
if (threadIdx.x == 0)
{
i = 0 ;
cuCKTterr (BSIM4v7entry.d_BSIM4v7statesArray [instance_ID] + 11, CKTstates,
CKTdeltaOld, CKTdelta, CKTorder, CKTintegrateMethod,
CKTabsTol, CKTrelTol, CKTchgTol, CKTtrTol,
&(CKTtimeSteps [PositionVector_timeSteps [instance_ID] + i])) ;
i++ ;
cuCKTterr (BSIM4v7entry.d_BSIM4v7statesArray [instance_ID] + 13, CKTstates,
CKTdeltaOld, CKTdelta, CKTorder, CKTintegrateMethod,
CKTabsTol, CKTrelTol, CKTchgTol, CKTtrTol,
&(CKTtimeSteps [PositionVector_timeSteps [instance_ID] + i])) ;
i++ ;
cuCKTterr (BSIM4v7entry.d_BSIM4v7statesArray [instance_ID] + 15, CKTstates,
CKTdeltaOld, CKTdelta, CKTorder, CKTintegrateMethod,
CKTabsTol, CKTrelTol, CKTchgTol, CKTtrTol,
&(CKTtimeSteps [PositionVector_timeSteps [instance_ID] + i])) ;
i++ ;
if (BSIM4v7entry.d_BSIM4v7trnqsModArray [instance_ID])
{
cuCKTterr (BSIM4v7entry.d_BSIM4v7statesArray [instance_ID] + 25, CKTstates,
CKTdeltaOld, CKTdelta, CKTorder, CKTintegrateMethod,
CKTabsTol, CKTrelTol, CKTchgTol, CKTtrTol,
&(CKTtimeSteps [PositionVector_timeSteps [instance_ID] + i])) ;
i++ ;
}
if (BSIM4v7entry.d_BSIM4v7rbodyModArray [instance_ID])
{
cuCKTterr (BSIM4v7entry.d_BSIM4v7statesArray [instance_ID] + 19, CKTstates,
CKTdeltaOld, CKTdelta, CKTorder, CKTintegrateMethod,
CKTabsTol, CKTrelTol, CKTchgTol, CKTtrTol,
&(CKTtimeSteps [PositionVector_timeSteps [instance_ID] + i])) ;
i++ ;
cuCKTterr (BSIM4v7entry.d_BSIM4v7statesArray [instance_ID] + 21, CKTstates,
CKTdeltaOld, CKTdelta, CKTorder, CKTintegrateMethod,
CKTabsTol, CKTrelTol, CKTchgTol, CKTtrTol,
&(CKTtimeSteps [PositionVector_timeSteps [instance_ID] + i])) ;
i++ ;
}
if (BSIM4v7entry.d_BSIM4v7rgateModArray [instance_ID] == 3)
{
cuCKTterr (BSIM4v7entry.d_BSIM4v7statesArray [instance_ID] + 17, CKTstates,
CKTdeltaOld, CKTdelta, CKTorder, CKTintegrateMethod,
CKTabsTol, CKTrelTol, CKTchgTol, CKTtrTol,
&(CKTtimeSteps [PositionVector_timeSteps [instance_ID] + i])) ;
i++ ;
}
}
}
return ;
}

View File

@ -47,9 +47,10 @@ libbsim4v7_la_SOURCES += \
CUSPICE/cubsim4v7getic.c \
CUSPICE/cubsim4v7load.cu \
CUSPICE/cubsim4v7setup.c \
CUSPICE/cubsim4v7temp.c
CUSPICE/cubsim4v7temp.c \
CUSPICE/cubsim4v7trunc.cu
AM_CPPFLAGS += $(CUDA_CPPFLAGS)
AM_CPPFLAGS += $(CUDA_CPPFLAGS) -I$(top_srcdir)/src/spicelib/analysis
endif
MAINTAINERCLEANFILES = Makefile.in

View File

@ -2587,7 +2587,7 @@ do { if((here->ptr = SMPmakeElt(matrix,here->first,here->second))==(double *)NUL
}
#ifdef USE_CUSPICE
int i, j, jRHS, l, lRHS, status ;
int i, j, jRHS, l, lRHS, lTimeSteps, status ;
/* Counting the instances */
for (model = (BSIM4v7model *)inModel ; model != NULL ; model = BSIM4v7nextModel(model))
@ -2612,15 +2612,22 @@ do { if((here->ptr = SMPmakeElt(matrix,here->first,here->second))==(double *)NUL
/* Position Vector Allocation for the RHS */
model->PositionVectorRHS = TMALLOC (int, model->n_instances) ;
/* Position Vector Allocation for timeSteps */
model->PositionVector_timeSteps = TMALLOC (int, model->n_instances) ;
model->offset = ckt->total_n_values ;
model->offsetRHS = ckt->total_n_valuesRHS ;
model->offset_timeSteps = ckt->total_n_timeSteps ;
i = 0 ;
j = 0 ;
jRHS = 0 ;
l = 0 ;
lRHS = 0 ;
lTimeSteps = 0 ;
/* loop through all the instances of the model */
for (here = BSIM4v7instances(model); here != NULL ; here = BSIM4v7nextInstance(here))
@ -2631,6 +2638,9 @@ do { if((here->ptr = SMPmakeElt(matrix,here->first,here->second))==(double *)NUL
/* Position Vector Assignment for the RHS */
model->PositionVectorRHS [i] = model->offsetRHS + lRHS ;
/* Position Vector Assignment for timeSteps */
model->PositionVector_timeSteps [i] = model->offset_timeSteps + lTimeSteps ;
/* For the Matrix */
if (here->BSIM4v7rgateMod == 1)
@ -2779,6 +2789,9 @@ do { if((here->ptr = SMPmakeElt(matrix,here->first,here->second))==(double *)NUL
/* Different Values for the CKTloadOutput */
l += 14 ;
/* Different TimeSteps */
lTimeSteps += 1 ;
} else {
/* m * (gcggb - ggtg + gIgtotg) */
if ((here->BSIM4v7gNodePrime != 0) && (here->BSIM4v7gNodePrime != 0))
@ -2911,6 +2924,9 @@ do { if((here->ptr = SMPmakeElt(matrix,here->first,here->second))==(double *)NUL
/* Different Values for the CKTloadOutput */
l += 16 ;
/* Different TimeSteps */
lTimeSteps += 3 ;
if (here->BSIM4v7rbodyMod)
{
@ -2988,6 +3004,9 @@ do { if((here->ptr = SMPmakeElt(matrix,here->first,here->second))==(double *)NUL
/* Different Values for the CKTloadOutput */
l += 12 ;
/* Different TimeSteps */
lTimeSteps += 2 ;
}
@ -3027,6 +3046,9 @@ do { if((here->ptr = SMPmakeElt(matrix,here->first,here->second))==(double *)NUL
/* Different Values for the CKTloadOutput */
l += 8 ;
/* Different TimeSteps */
lTimeSteps += 1 ;
}
@ -3137,6 +3159,9 @@ do { if((here->ptr = SMPmakeElt(matrix,here->first,here->second))==(double *)NUL
model->n_PtrRHS = jRHS ;
ckt->total_n_PtrRHS += model->n_PtrRHS ;
model->n_timeSteps = lTimeSteps ;
ckt->total_n_timeSteps += model->n_timeSteps ;
}
/* loop through all the BSIM4v7 models */

View File

@ -3347,6 +3347,11 @@ typedef struct sBSIM4v7model
int *PositionVectorRHS;
int *d_PositionVectorRHS;
int offset_timeSteps;
int n_timeSteps;
int *PositionVector_timeSteps;
int *d_PositionVector_timeSteps;
int n_instances;
#endif

View File

@ -45,7 +45,11 @@ SPICEdev BSIM4v7info = {
.DEVunsetup = BSIM4v7unsetup,
.DEVpzSetup = BSIM4v7setup,
.DEVtemperature = BSIM4v7temp,
#ifdef USE_CUSPICE
.DEVtrunc = cuBSIM4v7trunc,
#else
.DEVtrunc = BSIM4v7trunc,
#endif
.DEVfindBranch = NULL,
.DEVacLoad = BSIM4v7acLoad,
.DEVaccept = NULL,

View File

@ -73,6 +73,12 @@ GENmodel *inModel
status = cudaMemcpy (model->d_PositionVectorRHS, model->PositionVectorRHS, size * sizeof(int), cudaMemcpyHostToDevice) ;
CUDAMEMCPYCHECK (model->d_PositionVectorRHS, size, int, status)
status = cudaMalloc ((void **)&(model->d_PositionVector_timeSteps), size * sizeof(int)) ;
CUDAMALLOCCHECK (model->d_PositionVector_timeSteps, size, int, status)
status = cudaMemcpy (model->d_PositionVector_timeSteps, model->PositionVector_timeSteps, size * sizeof(int), cudaMemcpyHostToDevice) ;
CUDAMEMCPYCHECK (model->d_PositionVector_timeSteps, size, int, status)
/* DOUBLE */
model->CAPparamCPU.CAPinitCondArray = (double *) malloc (size * sizeof(double)) ;
status = cudaMalloc ((void **)&(model->CAPparamGPU.d_CAPinitCondArray), size * sizeof(double)) ;

View File

@ -0,0 +1,89 @@
/**********
Copyright 2014 - NGSPICE Software
Author: 2014 Francesco Lannutti
**********/
#include "ngspice/config.h"
#include "CUSPICE/cucktterr.cuh"
#include "capdefs.h"
extern "C"
__global__ void cuCAPtrunc_kernel (CAPparamGPUstruct, int, double **, double *, double,
int, int, double, double, double, double, double *, int *) ;
extern "C"
int
cuCAPtrunc
(
GENmodel *inModel, CKTcircuit *ckt, double *timeStep
)
{
(void)timeStep ;
CAPmodel *model = (CAPmodel *)inModel ;
int thread_x, thread_y, block_x ;
cudaError_t status ;
/* loop through all the capacitor models */
for ( ; model != NULL ; model = CAPnextModel(model))
{
/* Determining how many blocks should exist in the kernel */
thread_x = 1 ;
thread_y = 256 ;
if (model->n_instances % thread_y != 0)
block_x = (int)((model->n_instances + thread_y - 1) / thread_y) ;
else
block_x = model->n_instances / thread_y ;
dim3 thread (thread_x, thread_y) ;
/* Kernel launch */
status = cudaGetLastError () ; // clear error status
cuCAPtrunc_kernel <<< block_x, thread >>> (model->CAPparamGPU, model->n_instances,
ckt->dD_CKTstates, ckt->d_CKTdeltaOld,
ckt->CKTdelta, ckt->CKTorder, ckt->CKTintegrateMethod,
ckt->CKTabstol, ckt->CKTreltol, ckt->CKTchgtol, ckt->CKTtrtol,
ckt->d_CKTtimeSteps, model->d_PositionVector_timeSteps) ;
cudaDeviceSynchronize () ;
status = cudaGetLastError () ; // check for launch error
if (status != cudaSuccess)
{
fprintf (stderr, "Kernel launch failure in the Trunc Capacitor Model\n\n") ;
return (E_NOMEM) ;
}
}
return (OK) ;
}
extern "C"
__global__
void
cuCAPtrunc_kernel
(
CAPparamGPUstruct CAPentry, int n_instances, double **CKTstates,
double *CKTdeltaOld, double CKTdelta, int CKTorder, int CKTintegrateMethod,
double CKTabsTol, double CKTrelTol, double CKTchgTol, double CKTtrTol,
double *CKTtimeSteps, int *PositionVector_timeSteps
)
{
int instance_ID ;
instance_ID = threadIdx.y + blockDim.y * blockIdx.x ;
if (instance_ID < n_instances)
{
if (threadIdx.x == 0)
{
cuCKTterr (CAPentry.d_CAPstateArray [instance_ID], CKTstates,
CKTdeltaOld, CKTdelta, CKTorder, CKTintegrateMethod,
CKTabsTol, CKTrelTol, CKTchgTol, CKTtrTol,
&(CKTtimeSteps [PositionVector_timeSteps [instance_ID]])) ;
}
}
return ;
}

View File

@ -48,9 +48,10 @@ libcap_la_SOURCES += \
CUSPICE/cucapgetic.c \
CUSPICE/cucapload.cu \
CUSPICE/cucapsetup.c \
CUSPICE/cucaptemp.c
CUSPICE/cucaptemp.c \
CUSPICE/cucaptrunc.cu
AM_CPPFLAGS += $(CUDA_CPPFLAGS)
AM_CPPFLAGS += $(CUDA_CPPFLAGS) -I$(top_srcdir)/src/spicelib/analysis
endif
MAINTAINERCLEANFILES = Makefile.in

View File

@ -165,6 +165,11 @@ typedef struct sCAPmodel { /* model structure for a capacitor */
int *PositionVectorRHS ;
int *d_PositionVectorRHS ;
int offset_timeSteps ;
int n_timeSteps ;
int *PositionVector_timeSteps ;
int *d_PositionVector_timeSteps ;
int n_instances ;
#endif

View File

@ -45,7 +45,11 @@ SPICEdev CAPinfo = {
.DEVunsetup = NULL,
.DEVpzSetup = CAPsetup,
.DEVtemperature = CAPtemp,
#ifdef USE_CUSPICE
.DEVtrunc = cuCAPtrunc,
#else
.DEVtrunc = CAPtrunc,
#endif
.DEVfindBranch = NULL,
.DEVacLoad = CAPacLoad,
.DEVaccept = NULL,

View File

@ -195,6 +195,18 @@ do { if((here->ptr = SMPmakeElt(matrix, here->first, here->second)) == NULL){\
for (j = 0 ; j < model->n_instances; j++)
model->PositionVectorRHS [j] = model->offsetRHS + j ;
/* Position Vector for timeSteps */
model->offset_timeSteps = ckt->total_n_timeSteps ;
model->n_timeSteps = model->n_instances;
ckt->total_n_timeSteps += model->n_timeSteps ;
/* Position Vector assignment for timeSteps */
model->PositionVector_timeSteps = TMALLOC (int, model->n_instances) ;
for (j = 0 ; j < model->n_instances; j++)
model->PositionVector_timeSteps [j] = model->offset_timeSteps + j ;
}
/* loop through all the capacitor models */

View File

@ -73,6 +73,12 @@ GENmodel *inModel
status = cudaMemcpy (model->d_PositionVectorRHS, model->PositionVectorRHS, size * sizeof(int), cudaMemcpyHostToDevice) ;
CUDAMEMCPYCHECK (model->d_PositionVectorRHS, size, int, status)
status = cudaMalloc ((void **)&(model->d_PositionVector_timeSteps), size * sizeof(int)) ;
CUDAMALLOCCHECK (model->d_PositionVector_timeSteps, size, int, status)
status = cudaMemcpy (model->d_PositionVector_timeSteps, model->PositionVector_timeSteps, size * sizeof(int), cudaMemcpyHostToDevice) ;
CUDAMEMCPYCHECK (model->d_PositionVector_timeSteps, size, int, status)
/* DOUBLE */
model->INDparamCPU.INDinitCondArray = (double *) malloc (size * sizeof(double)) ;
status = cudaMalloc ((void **)&(model->INDparamGPU.d_INDinitCondArray), size * sizeof(double)) ;

View File

@ -0,0 +1,89 @@
/**********
Copyright 2014 - NGSPICE Software
Author: 2014 Francesco Lannutti
**********/
#include "ngspice/config.h"
#include "CUSPICE/cucktterr.cuh"
#include "inddefs.h"
extern "C"
__global__ void cuINDtrunc_kernel (INDparamGPUstruct, int, double **, double *, double,
int, int, double, double, double, double, double *, int *) ;
extern "C"
int
cuINDtrunc
(
GENmodel *inModel, CKTcircuit *ckt, double *timeStep
)
{
(void)timeStep ;
INDmodel *model = (INDmodel *)inModel ;
int thread_x, thread_y, block_x ;
cudaError_t status ;
/* loop through all the inductor models */
for ( ; model != NULL ; model = INDnextModel(model))
{
/* Determining how many blocks should exist in the kernel */
thread_x = 1 ;
thread_y = 256 ;
if (model->n_instances % thread_y != 0)
block_x = (int)((model->n_instances + thread_y - 1) / thread_y) ;
else
block_x = model->n_instances / thread_y ;
dim3 thread (thread_x, thread_y) ;
/* Kernel launch */
status = cudaGetLastError () ; // clear error status
cuINDtrunc_kernel <<< block_x, thread >>> (model->INDparamGPU, model->n_instances,
ckt->dD_CKTstates, ckt->d_CKTdeltaOld,
ckt->CKTdelta, ckt->CKTorder, ckt->CKTintegrateMethod,
ckt->CKTabstol, ckt->CKTreltol, ckt->CKTchgtol, ckt->CKTtrtol,
ckt->d_CKTtimeSteps, model->d_PositionVector_timeSteps) ;
cudaDeviceSynchronize () ;
status = cudaGetLastError () ; // check for launch error
if (status != cudaSuccess)
{
fprintf (stderr, "Kernel launch failure in the Trunc Inductor Model\n\n") ;
return (E_NOMEM) ;
}
}
return (OK) ;
}
extern "C"
__global__
void
cuINDtrunc_kernel
(
INDparamGPUstruct INDentry, int n_instances, double **CKTstates,
double *CKTdeltaOld, double CKTdelta, int CKTorder, int CKTintegrateMethod,
double CKTabsTol, double CKTrelTol, double CKTchgTol, double CKTtrTol,
double *CKTtimeSteps, int *PositionVector_timeSteps
)
{
int instance_ID ;
instance_ID = threadIdx.y + blockDim.y * blockIdx.x ;
if (instance_ID < n_instances)
{
if (threadIdx.x == 0)
{
cuCKTterr (INDentry.d_INDstateArray [instance_ID], CKTstates,
CKTdeltaOld, CKTdelta, CKTorder, CKTintegrateMethod,
CKTabsTol, CKTrelTol, CKTchgTol, CKTtrTol,
&(CKTtimeSteps [PositionVector_timeSteps [instance_ID]])) ;
}
}
return ;
}

View File

@ -57,7 +57,8 @@ libind_la_SOURCES += \
CUSPICE/cuindfree.c \
CUSPICE/cuindload.cu \
CUSPICE/cuindsetup.c \
CUSPICE/cuindtemp.c
CUSPICE/cuindtemp.c \
CUSPICE/cuindtrunc.cu
libind_la_SOURCES += \
CUSPICE/muttopology.c \
@ -66,7 +67,7 @@ libind_la_SOURCES += \
CUSPICE/cumutsetup.c \
CUSPICE/cumuttemp.c
AM_CPPFLAGS += $(CUDA_CPPFLAGS)
AM_CPPFLAGS += $(CUDA_CPPFLAGS) -I$(top_srcdir)/src/spicelib/analysis
endif
MAINTAINERCLEANFILES = Makefile.in

View File

@ -166,6 +166,11 @@ struct sINDmodel { /* model structure for an inductor */
int *PositionVectorRHS;
int *d_PositionVectorRHS;
int offset_timeSteps;
int n_timeSteps;
int *PositionVector_timeSteps;
int *d_PositionVector_timeSteps;
int n_instances;
#endif

View File

@ -45,7 +45,11 @@ SPICEdev INDinfo = {
.DEVunsetup = INDunsetup,
.DEVpzSetup = INDsetup,
.DEVtemperature = INDtemp,
#ifdef USE_CUSPICE
.DEVtrunc = cuINDtrunc,
#else
.DEVtrunc = INDtrunc,
#endif
.DEVfindBranch = NULL,
.DEVacLoad = INDacLoad,
.DEVaccept = NULL,

View File

@ -186,6 +186,19 @@ do { if((here->ptr = SMPmakeElt(matrix, here->first, here->second)) == NULL){\
for (j = 0 ; j < model->n_instances; j++)
model->PositionVectorRHS [j] = model->offsetRHS + j ;
/* Position Vector for timeSteps */
model->offset_timeSteps = ckt->total_n_timeSteps ;
model->n_timeSteps = model->n_instances;
ckt->total_n_timeSteps += model->n_timeSteps ;
/* Position Vector assignment for timeSteps */
model->PositionVector_timeSteps = TMALLOC (int, model->n_instances) ;
for (j = 0 ; j < model->n_instances; j++)
model->PositionVector_timeSteps [j] = model->offset_timeSteps + j ;
}
/* loop through all the inductor models */