Moved Truncation Error Calculation into GPU for CUSPICE
This commit is contained in:
parent
e668ce5791
commit
dcb62b8259
|
|
@ -33,29 +33,33 @@ int cuCKTrhsOldUpdateDtoH (CKTcircuit *) ;
|
|||
int cuCKTsetup (CKTcircuit *) ;
|
||||
int cuCKTsystemDtoH (CKTcircuit *) ;
|
||||
int cuCKTstatesFlush (CKTcircuit *) ;
|
||||
int cuCKTstatesUpdateDtoH (CKTcircuit *) ;
|
||||
int cuCKTstate0UpdateHtoD (CKTcircuit *) ;
|
||||
int cuCKTstate0UpdateDtoH (CKTcircuit *) ;
|
||||
int cuCKTstate01copy (CKTcircuit *) ;
|
||||
int cuCKTstatesCircularBuffer (CKTcircuit *) ;
|
||||
int cuCKTstate123copy (CKTcircuit *) ;
|
||||
int cuCKTdeltaOldUpdateHtoD (CKTcircuit *) ;
|
||||
int cuCKTtrunc (CKTcircuit *, double, double *) ;
|
||||
|
||||
int cuBSIM4v7destroy (GENmodel *) ;
|
||||
int cuBSIM4v7getic (GENmodel *) ;
|
||||
int cuBSIM4v7load (GENmodel *, CKTcircuit *) ;
|
||||
int cuBSIM4v7setup (GENmodel *) ;
|
||||
int cuBSIM4v7temp (GENmodel *) ;
|
||||
int cuBSIM4v7trunc (GENmodel *, CKTcircuit *, double *) ;
|
||||
|
||||
int cuCAPdestroy (GENmodel *) ;
|
||||
int cuCAPgetic (GENmodel *) ;
|
||||
int cuCAPload (GENmodel *, CKTcircuit *) ;
|
||||
int cuCAPsetup (GENmodel *) ;
|
||||
int cuCAPtemp (GENmodel *) ;
|
||||
int cuCAPtrunc (GENmodel *, CKTcircuit *, double *) ;
|
||||
|
||||
int cuINDdestroy (GENmodel *) ;
|
||||
int cuINDload (GENmodel *, CKTcircuit *) ;
|
||||
int cuINDsetup (GENmodel *) ;
|
||||
int cuINDtemp (GENmodel *) ;
|
||||
int cuINDtrunc (GENmodel *, CKTcircuit *, double *) ;
|
||||
|
||||
int cuISRCdestroy (GENmodel *) ;
|
||||
int cuISRCload (GENmodel *, CKTcircuit *) ;
|
||||
|
|
|
|||
|
|
@ -306,6 +306,7 @@ struct CKTcircuit {
|
|||
|
||||
#ifdef USE_CUSPICE
|
||||
double *(d_CKTstates[8]);
|
||||
double **dD_CKTstates;
|
||||
#define d_CKTstate0 d_CKTstates[0]
|
||||
#define d_CKTstate1 d_CKTstates[1]
|
||||
#define d_CKTstate2 d_CKTstates[2]
|
||||
|
|
@ -315,6 +316,8 @@ struct CKTcircuit {
|
|||
#define d_CKTstate6 d_CKTstates[6]
|
||||
#define d_CKTstate7 d_CKTstates[7]
|
||||
|
||||
double *d_CKTdeltaOld;
|
||||
|
||||
double *d_CKTrhsOld;
|
||||
int *d_CKTnoncon;
|
||||
int d_MatrixSize;
|
||||
|
|
@ -341,6 +344,11 @@ struct CKTcircuit {
|
|||
int *d_CKTtopologyMatrixCSRpRHS;
|
||||
int *d_CKTtopologyMatrixCSRjRHS;
|
||||
double *d_CKTtopologyMatrixCSRxRHS;
|
||||
|
||||
int total_n_timeSteps;
|
||||
double *CKTtimeSteps;
|
||||
double *d_CKTtimeSteps;
|
||||
double *d_CKTtimeStepsOut;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ CKTcircuit *ckt
|
|||
)
|
||||
{
|
||||
int i ;
|
||||
long unsigned int m, mRHS, n, nz, TopologyNNZ, TopologyNNZRHS, size1, size2 ;
|
||||
long unsigned int m, mRHS, n, nz, TopologyNNZ, TopologyNNZRHS, size1, size2, size3 ;
|
||||
cudaError_t status ;
|
||||
|
||||
n = (long unsigned int)ckt->CKTmatrix->CKTkluN ;
|
||||
|
|
@ -74,6 +74,7 @@ CKTcircuit *ckt
|
|||
|
||||
size1 = (long unsigned int)(ckt->d_MatrixSize + 1) ;
|
||||
size2 = (long unsigned int)ckt->CKTnumStates ;
|
||||
size3 = (long unsigned int)ckt->total_n_timeSteps ;
|
||||
|
||||
/* Topology Matrix Handling */
|
||||
status = cudaMalloc ((void **)&(ckt->CKTmatrix->d_CKTrhs), (n + 1) * sizeof(double)) ;
|
||||
|
|
@ -141,5 +142,22 @@ CKTcircuit *ckt
|
|||
CUDAMALLOCCHECK (ckt->d_CKTstates[i], size2, double, status)
|
||||
}
|
||||
|
||||
|
||||
/* Truncation Error */
|
||||
status = cudaMalloc ((void **)&(ckt->dD_CKTstates), 8 * sizeof(double *)) ;
|
||||
CUDAMALLOCCHECK (ckt->dD_CKTstates, 8, double *, status)
|
||||
|
||||
status = cudaMemcpy (ckt->dD_CKTstates, ckt->d_CKTstates, 8 * sizeof(double *), cudaMemcpyHostToDevice) ;
|
||||
CUDAMEMCPYCHECK (ckt->dD_CKTstates, 8, double *, status)
|
||||
|
||||
status = cudaMalloc ((void **)&(ckt->d_CKTdeltaOld), 7 * sizeof(double)) ;
|
||||
CUDAMALLOCCHECK (ckt->d_CKTdeltaOld, 7, double, status)
|
||||
|
||||
// ckt->CKTtimeSteps = (double *) malloc (size3 * sizeof(double)) ;
|
||||
status = cudaMalloc ((void **)&(ckt->d_CKTtimeSteps), size3 * sizeof(double)) ;
|
||||
CUDAMALLOCCHECK (ckt->d_CKTtimeSteps, size3, double, status)
|
||||
status = cudaMalloc ((void **)&(ckt->d_CKTtimeStepsOut), size3 * sizeof(double)) ;
|
||||
CUDAMALLOCCHECK (ckt->d_CKTtimeStepsOut, size3, double, status)
|
||||
|
||||
return (OK) ;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,30 +39,6 @@
|
|||
return (E_NOMEM) ; \
|
||||
}
|
||||
|
||||
int
|
||||
cuCKTstatesUpdateDtoH
|
||||
(
|
||||
CKTcircuit *ckt
|
||||
)
|
||||
{
|
||||
int i ;
|
||||
long unsigned int size ;
|
||||
cudaError_t status ;
|
||||
|
||||
size = (long unsigned int)ckt->CKTnumStates ;
|
||||
|
||||
for (i = 0 ; i < 8 ; i++)
|
||||
{
|
||||
if (ckt->CKTstates[i] != NULL)
|
||||
{
|
||||
status = cudaMemcpy (ckt->CKTstates[i], ckt->d_CKTstates[i], size * sizeof(double), cudaMemcpyDeviceToHost) ;
|
||||
CUDAMEMCPYCHECK (ckt->CKTstates[i], size, double, status)
|
||||
}
|
||||
}
|
||||
|
||||
return (OK) ;
|
||||
}
|
||||
|
||||
int
|
||||
cuCKTstatesFlush
|
||||
(
|
||||
|
|
@ -162,3 +138,17 @@ CKTcircuit *ckt
|
|||
|
||||
return (OK) ;
|
||||
}
|
||||
|
||||
int
|
||||
cuCKTdeltaOldUpdateHtoD
|
||||
(
|
||||
CKTcircuit *ckt
|
||||
)
|
||||
{
|
||||
cudaError_t status ;
|
||||
|
||||
status = cudaMemcpy (ckt->d_CKTdeltaOld, ckt->CKTdeltaOld, 7 * sizeof(double), cudaMemcpyHostToDevice) ;
|
||||
CUDAMEMCPYCHECK (ckt->d_CKTdeltaOld, 7, double, status)
|
||||
|
||||
return (OK) ;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,99 @@
|
|||
/**********
|
||||
Copyright 1990 Regents of the University of California. All rights reserved.
|
||||
Author: 1985 Thomas L. Quarles
|
||||
**********/
|
||||
|
||||
extern "C"
|
||||
__device__
|
||||
static
|
||||
int
|
||||
cuCKTterr
|
||||
(
|
||||
int qcap, double **CKTstates, double *d_CKTdeltaOld,
|
||||
double d_CKTdelta, int d_CKTorder, int d_CKTintegrateMethod,
|
||||
double d_CKTabsTol, double d_CKTrelTol, double d_CKTchgTol, double d_CKTtrTol,
|
||||
//Return Value
|
||||
double *timeStep
|
||||
)
|
||||
{
|
||||
|
||||
#define ccap (qcap+1)
|
||||
|
||||
/* known integration methods */
|
||||
#define TRAPEZOIDAL 1
|
||||
#define GEAR 2
|
||||
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
double volttol, chargetol, tol, del ;
|
||||
double diff [8] ;
|
||||
double deltmp [8] ;
|
||||
double factor = 0 ;
|
||||
int i, j ;
|
||||
double gearCoeff [] = {
|
||||
.5,
|
||||
.2222222222,
|
||||
.1363636364,
|
||||
.096,
|
||||
.07299270073,
|
||||
.05830903790
|
||||
} ;
|
||||
double trapCoeff [] = {
|
||||
.5,
|
||||
.08333333333
|
||||
} ;
|
||||
|
||||
volttol = d_CKTabsTol + d_CKTrelTol * MAX (fabs (CKTstates [0] [ccap]), fabs (CKTstates [1] [ccap])) ;
|
||||
|
||||
chargetol = MAX (fabs (CKTstates [0] [qcap]), fabs (CKTstates [1] [qcap])) ;
|
||||
chargetol = d_CKTrelTol * MAX (chargetol, d_CKTchgTol) / d_CKTdelta ;
|
||||
|
||||
tol = MAX (volttol, chargetol) ;
|
||||
|
||||
/* now divided differences */
|
||||
for (i = d_CKTorder + 1 ; i >= 0 ; i--)
|
||||
{
|
||||
diff [i] = CKTstates [i] [qcap] ;
|
||||
}
|
||||
for (i = 0 ; i <= d_CKTorder ; i++)
|
||||
{
|
||||
deltmp [i] = d_CKTdeltaOld [i] ;
|
||||
}
|
||||
j = d_CKTorder ;
|
||||
for (;;)
|
||||
{
|
||||
for (i = 0 ; i <= j ; i++)
|
||||
{
|
||||
diff [i] = (diff [i] - diff [i + 1]) / deltmp [i] ;
|
||||
}
|
||||
|
||||
if (--j < 0)
|
||||
break ;
|
||||
|
||||
for (i = 0 ; i <= j ; i++)
|
||||
{
|
||||
deltmp [i] = deltmp [i + 1] + d_CKTdeltaOld [i] ;
|
||||
}
|
||||
}
|
||||
switch (d_CKTintegrateMethod)
|
||||
{
|
||||
case GEAR:
|
||||
factor = gearCoeff [d_CKTorder - 1] ;
|
||||
break ;
|
||||
|
||||
case TRAPEZOIDAL:
|
||||
factor = trapCoeff [d_CKTorder - 1] ;
|
||||
break ;
|
||||
}
|
||||
del = d_CKTtrTol * tol / MAX (d_CKTabsTol, factor * fabs (diff [0])) ;
|
||||
if (d_CKTorder == 2)
|
||||
{
|
||||
del = sqrt (del) ;
|
||||
} else if (d_CKTorder > 2) {
|
||||
del = exp (log (del) / d_CKTorder) ;
|
||||
}
|
||||
|
||||
*timeStep = del ;
|
||||
|
||||
return 0 ;
|
||||
}
|
||||
|
|
@ -0,0 +1,135 @@
|
|||
/**********
|
||||
Copyright 2014 - NGSPICE Software
|
||||
Author: 2014 Francesco Lannutti
|
||||
**********/
|
||||
|
||||
#include "ngspice/config.h"
|
||||
#include "ngspice/cktdefs.h"
|
||||
#include "cuda_runtime_api.h"
|
||||
#include "ngspice/macros.h"
|
||||
|
||||
/* cudaMemcpy MACRO to check it for errors --> CUDAMEMCPYCHECK(name of pointer, dimension, type, status) */
|
||||
#define CUDAMEMCPYCHECK(a, b, c, d) \
|
||||
if (d != cudaSuccess) \
|
||||
{ \
|
||||
fprintf (stderr, "cuCKTtrunc routine...\n") ; \
|
||||
fprintf (stderr, "Error: cudaMemcpy failed on %s size of %d bytes\n", #a, (int)(b * sizeof(c))) ; \
|
||||
fprintf (stderr, "Error: %s = %d, %s\n", #d, d, cudaGetErrorString (d)) ; \
|
||||
return (E_NOMEM) ; \
|
||||
}
|
||||
|
||||
extern "C"
|
||||
__global__ void cuCKTtrunc_kernel
|
||||
(
|
||||
double *, double *, int
|
||||
) ;
|
||||
|
||||
extern "C"
|
||||
int
|
||||
cuCKTtrunc
|
||||
(
|
||||
CKTcircuit *ckt, double timetemp, double *timeStep
|
||||
)
|
||||
{
|
||||
long unsigned int size ;
|
||||
double timetempGPU ;
|
||||
int thread_x, thread_y, block_x ;
|
||||
|
||||
cudaError_t status ;
|
||||
|
||||
/* Determining how many blocks should exist in the kernel */
|
||||
thread_x = 1 ;
|
||||
thread_y = 256 ;
|
||||
if (ckt->total_n_timeSteps % thread_y != 0)
|
||||
block_x = (int)((ckt->total_n_timeSteps + thread_y - 1) / thread_y) ;
|
||||
else
|
||||
block_x = ckt->total_n_timeSteps / thread_y ;
|
||||
|
||||
dim3 thread (thread_x, thread_y) ;
|
||||
|
||||
/* Kernel launch */
|
||||
status = cudaGetLastError () ; // clear error status
|
||||
|
||||
cuCKTtrunc_kernel <<< block_x, thread, thread_y * sizeof(double) >>> (ckt->d_CKTtimeSteps, ckt->d_CKTtimeStepsOut, ckt->total_n_timeSteps) ;
|
||||
|
||||
cudaDeviceSynchronize () ;
|
||||
|
||||
cuCKTtrunc_kernel <<< 1, thread, thread_y * sizeof(double) >>> (ckt->d_CKTtimeStepsOut, ckt->d_CKTtimeSteps, block_x) ;
|
||||
|
||||
cudaDeviceSynchronize () ;
|
||||
|
||||
status = cudaGetLastError () ; // check for launch error
|
||||
if (status != cudaSuccess)
|
||||
{
|
||||
fprintf (stderr, "Kernel launch failure in cuCKTtrunc\n\n") ;
|
||||
return (E_NOMEM) ;
|
||||
}
|
||||
|
||||
/* Copy back the reduction result */
|
||||
size = (long unsigned int)(1) ;
|
||||
status = cudaMemcpy (&timetempGPU, ckt->d_CKTtimeSteps, size * sizeof(double), cudaMemcpyDeviceToHost) ;
|
||||
CUDAMEMCPYCHECK (&timetempGPU, size, double, status)
|
||||
|
||||
/* Final Comparison */
|
||||
if (timetempGPU < timetemp)
|
||||
{
|
||||
timetemp = timetempGPU ;
|
||||
}
|
||||
if (2 * *timeStep < timetemp)
|
||||
{
|
||||
*timeStep = 2 * *timeStep ;
|
||||
} else {
|
||||
*timeStep = timetemp ;
|
||||
}
|
||||
|
||||
return 0 ;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
__global__
|
||||
void
|
||||
cuCKTtrunc_kernel
|
||||
(
|
||||
double *g_idata, double *g_odata, int n
|
||||
)
|
||||
{
|
||||
extern __shared__ double sdata [] ;
|
||||
unsigned int i, tid ;
|
||||
|
||||
tid = threadIdx.y ;
|
||||
// i = blockIdx.x * (blockDim.y * 2) + tid ;
|
||||
i = blockIdx.x * blockDim.y + tid ;
|
||||
if (i < n)
|
||||
{
|
||||
// sdata [tid] = MIN (g_idata [i], g_idata [i + blockDim.y]) ;
|
||||
sdata [tid] = g_idata [i] ;
|
||||
}
|
||||
__syncthreads () ;
|
||||
|
||||
if ((tid < 128) && (i + 128 < n))
|
||||
{
|
||||
sdata [tid] = MIN (sdata [tid], sdata [tid + 128]) ;
|
||||
}
|
||||
__syncthreads () ;
|
||||
|
||||
if ((tid < 64) && (i + 64 < n))
|
||||
{
|
||||
sdata [tid] = MIN (sdata [tid], sdata [tid + 64]) ;
|
||||
}
|
||||
__syncthreads () ;
|
||||
|
||||
if ((tid < 32) && (i + 32 < n))
|
||||
{
|
||||
sdata [tid] = MIN (sdata [tid], sdata [tid + 32]) ;
|
||||
sdata [tid] = MIN (sdata [tid], sdata [tid + 16]) ;
|
||||
sdata [tid] = MIN (sdata [tid], sdata [tid + 8]) ;
|
||||
sdata [tid] = MIN (sdata [tid], sdata [tid + 4]) ;
|
||||
sdata [tid] = MIN (sdata [tid], sdata [tid + 2]) ;
|
||||
sdata [tid] = MIN (sdata [tid], sdata [tid + 1]) ;
|
||||
}
|
||||
|
||||
if (tid == 0)
|
||||
{
|
||||
g_odata [blockIdx.x] = sdata [0] ;
|
||||
}
|
||||
}
|
||||
|
|
@ -111,6 +111,8 @@ AM_CPPFLAGS = @AM_CPPFLAGS@ -I$(top_srcdir)/src/include -I$(top_srcdir)/src/spi
|
|||
AM_CFLAGS = $(STATIC)
|
||||
|
||||
if USE_CUSPICE_WANTED
|
||||
.cu.lo:
|
||||
$(AM_V_GEN)$(top_srcdir)/src/libtool_wrapper_for_cuda.tcl $@ $(AM_CFLAGS) $(NVCC) $(CUDA_CFLAGS) $(AM_CPPFLAGS) -c $<
|
||||
|
||||
libckt_la_SOURCES += \
|
||||
CUSPICE/cucktflush.c \
|
||||
|
|
@ -118,7 +120,8 @@ libckt_la_SOURCES += \
|
|||
CUSPICE/cucktrhsoldupdate.c \
|
||||
CUSPICE/cucktsetup.c \
|
||||
CUSPICE/cucktstatesupdate.c \
|
||||
CUSPICE/cucktsystem.c
|
||||
CUSPICE/cucktsystem.c \
|
||||
CUSPICE/cuckttrunc.cu
|
||||
|
||||
AM_CPPFLAGS += $(CUDA_CPPFLAGS)
|
||||
AM_LDFLAGS = $(CUDA_LIBS) -lcusparse
|
||||
|
|
|
|||
|
|
@ -110,6 +110,8 @@ CKTsetup(CKTcircuit *ckt)
|
|||
|
||||
ckt->total_n_valuesRHS = 0 ;
|
||||
ckt->total_n_PtrRHS = 0 ;
|
||||
|
||||
ckt->total_n_timeSteps = 0 ;
|
||||
#endif
|
||||
|
||||
int i;
|
||||
|
|
|
|||
|
|
@ -15,6 +15,9 @@ Author: 1985 Thomas L. Quarles
|
|||
#include "ngspice/devdefs.h"
|
||||
#include "ngspice/sperror.h"
|
||||
|
||||
#ifdef USE_CUSPICE
|
||||
#include "ngspice/CUSPICE/CUSPICE.h"
|
||||
#endif
|
||||
|
||||
int
|
||||
CKTtrunc (CKTcircuit *ckt, double *timeStep)
|
||||
|
|
@ -58,7 +61,16 @@ CKTtrunc (CKTcircuit *ckt, double *timeStep)
|
|||
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_CUSPICE
|
||||
int status ;
|
||||
|
||||
status = cuCKTtrunc (ckt, HUGE, timeStep) ;
|
||||
if (status != 0)
|
||||
return (E_NOMEM) ;
|
||||
#else
|
||||
*timeStep = MIN (2 * *timeStep, timetemp) ;
|
||||
#endif
|
||||
|
||||
ckt->CKTstat->STATtranTruncTime += SPfrontEnd->IFseconds () - startTime ;
|
||||
return (OK) ;
|
||||
|
|
|
|||
|
|
@ -884,7 +884,7 @@ resume:
|
|||
}
|
||||
|
||||
#ifdef USE_CUSPICE
|
||||
status = cuCKTstatesUpdateDtoH (ckt) ;
|
||||
status = cuCKTdeltaOldUpdateHtoD (ckt) ;
|
||||
if (status != 0)
|
||||
return (E_NOMEM) ;
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -73,6 +73,12 @@ GENmodel *inModel
|
|||
status = cudaMemcpy (model->d_PositionVectorRHS, model->PositionVectorRHS, size * sizeof(int), cudaMemcpyHostToDevice) ;
|
||||
CUDAMEMCPYCHECK (model->d_PositionVectorRHS, size, int, status)
|
||||
|
||||
status = cudaMalloc ((void **)&(model->d_PositionVector_timeSteps), size * sizeof(int)) ;
|
||||
CUDAMALLOCCHECK (model->d_PositionVector_timeSteps, size, int, status)
|
||||
|
||||
status = cudaMemcpy (model->d_PositionVector_timeSteps, model->PositionVector_timeSteps, size * sizeof(int), cudaMemcpyHostToDevice) ;
|
||||
CUDAMEMCPYCHECK (model->d_PositionVector_timeSteps, size, int, status)
|
||||
|
||||
/* DOUBLE */
|
||||
model->BSIM4v7paramCPU.BSIM4v7gbsRWArray = (double *) malloc (size * sizeof(double)) ;
|
||||
status = cudaMalloc ((void **)&(model->BSIM4v7paramGPU.d_BSIM4v7gbsRWArray), size * sizeof(double)) ;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,137 @@
|
|||
/**********
|
||||
Copyright 2014 - NGSPICE Software
|
||||
Author: 2014 Francesco Lannutti
|
||||
**********/
|
||||
|
||||
#include "ngspice/config.h"
|
||||
#include "CUSPICE/cucktterr.cuh"
|
||||
#include "bsim4v7def.h"
|
||||
|
||||
extern "C"
|
||||
__global__ void cuBSIM4v7trunc_kernel (BSIM4v7paramGPUstruct, int, double **, double *, double,
|
||||
int, int, double, double, double, double, double *, int *) ;
|
||||
|
||||
extern "C"
|
||||
int
|
||||
cuBSIM4v7trunc
|
||||
(
|
||||
GENmodel *inModel, CKTcircuit *ckt, double *timeStep
|
||||
)
|
||||
{
|
||||
(void)timeStep ;
|
||||
|
||||
BSIM4v7model *model = (BSIM4v7model *)inModel ;
|
||||
int thread_x, thread_y, block_x ;
|
||||
|
||||
cudaError_t status ;
|
||||
|
||||
/* loop through all the BSIM4v7 models */
|
||||
for ( ; model != NULL ; model = BSIM4v7nextModel(model))
|
||||
{
|
||||
/* Determining how many blocks should exist in the kernel */
|
||||
thread_x = 1 ;
|
||||
thread_y = 256 ;
|
||||
if (model->n_instances % thread_y != 0)
|
||||
block_x = (int)((model->n_instances + thread_y - 1) / thread_y) ;
|
||||
else
|
||||
block_x = model->n_instances / thread_y ;
|
||||
|
||||
dim3 thread (thread_x, thread_y) ;
|
||||
|
||||
/* Kernel launch */
|
||||
status = cudaGetLastError () ; // clear error status
|
||||
|
||||
cuBSIM4v7trunc_kernel <<< block_x, thread >>> (model->BSIM4v7paramGPU, model->n_instances,
|
||||
ckt->dD_CKTstates, ckt->d_CKTdeltaOld,
|
||||
ckt->CKTdelta, ckt->CKTorder, ckt->CKTintegrateMethod,
|
||||
ckt->CKTabstol, ckt->CKTreltol, ckt->CKTchgtol, ckt->CKTtrtol,
|
||||
ckt->d_CKTtimeSteps, model->d_PositionVector_timeSteps) ;
|
||||
|
||||
cudaDeviceSynchronize () ;
|
||||
|
||||
status = cudaGetLastError () ; // check for launch error
|
||||
if (status != cudaSuccess)
|
||||
{
|
||||
fprintf (stderr, "Kernel launch failure in the Trunc BSIM4v7 Model\n\n") ;
|
||||
return (E_NOMEM) ;
|
||||
}
|
||||
}
|
||||
|
||||
return (OK) ;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
__global__
|
||||
void
|
||||
cuBSIM4v7trunc_kernel
|
||||
(
|
||||
BSIM4v7paramGPUstruct BSIM4v7entry, int n_instances, double **CKTstates,
|
||||
double *CKTdeltaOld, double CKTdelta, int CKTorder, int CKTintegrateMethod,
|
||||
double CKTabsTol, double CKTrelTol, double CKTchgTol, double CKTtrTol,
|
||||
double *CKTtimeSteps, int *PositionVector_timeSteps
|
||||
)
|
||||
{
|
||||
int instance_ID, i ;
|
||||
|
||||
instance_ID = threadIdx.y + blockDim.y * blockIdx.x ;
|
||||
if (instance_ID < n_instances)
|
||||
{
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
i = 0 ;
|
||||
|
||||
cuCKTterr (BSIM4v7entry.d_BSIM4v7statesArray [instance_ID] + 11, CKTstates,
|
||||
CKTdeltaOld, CKTdelta, CKTorder, CKTintegrateMethod,
|
||||
CKTabsTol, CKTrelTol, CKTchgTol, CKTtrTol,
|
||||
&(CKTtimeSteps [PositionVector_timeSteps [instance_ID] + i])) ;
|
||||
i++ ;
|
||||
|
||||
cuCKTterr (BSIM4v7entry.d_BSIM4v7statesArray [instance_ID] + 13, CKTstates,
|
||||
CKTdeltaOld, CKTdelta, CKTorder, CKTintegrateMethod,
|
||||
CKTabsTol, CKTrelTol, CKTchgTol, CKTtrTol,
|
||||
&(CKTtimeSteps [PositionVector_timeSteps [instance_ID] + i])) ;
|
||||
i++ ;
|
||||
|
||||
cuCKTterr (BSIM4v7entry.d_BSIM4v7statesArray [instance_ID] + 15, CKTstates,
|
||||
CKTdeltaOld, CKTdelta, CKTorder, CKTintegrateMethod,
|
||||
CKTabsTol, CKTrelTol, CKTchgTol, CKTtrTol,
|
||||
&(CKTtimeSteps [PositionVector_timeSteps [instance_ID] + i])) ;
|
||||
i++ ;
|
||||
|
||||
if (BSIM4v7entry.d_BSIM4v7trnqsModArray [instance_ID])
|
||||
{
|
||||
cuCKTterr (BSIM4v7entry.d_BSIM4v7statesArray [instance_ID] + 25, CKTstates,
|
||||
CKTdeltaOld, CKTdelta, CKTorder, CKTintegrateMethod,
|
||||
CKTabsTol, CKTrelTol, CKTchgTol, CKTtrTol,
|
||||
&(CKTtimeSteps [PositionVector_timeSteps [instance_ID] + i])) ;
|
||||
i++ ;
|
||||
}
|
||||
|
||||
if (BSIM4v7entry.d_BSIM4v7rbodyModArray [instance_ID])
|
||||
{
|
||||
cuCKTterr (BSIM4v7entry.d_BSIM4v7statesArray [instance_ID] + 19, CKTstates,
|
||||
CKTdeltaOld, CKTdelta, CKTorder, CKTintegrateMethod,
|
||||
CKTabsTol, CKTrelTol, CKTchgTol, CKTtrTol,
|
||||
&(CKTtimeSteps [PositionVector_timeSteps [instance_ID] + i])) ;
|
||||
i++ ;
|
||||
|
||||
cuCKTterr (BSIM4v7entry.d_BSIM4v7statesArray [instance_ID] + 21, CKTstates,
|
||||
CKTdeltaOld, CKTdelta, CKTorder, CKTintegrateMethod,
|
||||
CKTabsTol, CKTrelTol, CKTchgTol, CKTtrTol,
|
||||
&(CKTtimeSteps [PositionVector_timeSteps [instance_ID] + i])) ;
|
||||
i++ ;
|
||||
}
|
||||
|
||||
if (BSIM4v7entry.d_BSIM4v7rgateModArray [instance_ID] == 3)
|
||||
{
|
||||
cuCKTterr (BSIM4v7entry.d_BSIM4v7statesArray [instance_ID] + 17, CKTstates,
|
||||
CKTdeltaOld, CKTdelta, CKTorder, CKTintegrateMethod,
|
||||
CKTabsTol, CKTrelTol, CKTchgTol, CKTtrTol,
|
||||
&(CKTtimeSteps [PositionVector_timeSteps [instance_ID] + i])) ;
|
||||
i++ ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ;
|
||||
}
|
||||
|
|
@ -47,9 +47,10 @@ libbsim4v7_la_SOURCES += \
|
|||
CUSPICE/cubsim4v7getic.c \
|
||||
CUSPICE/cubsim4v7load.cu \
|
||||
CUSPICE/cubsim4v7setup.c \
|
||||
CUSPICE/cubsim4v7temp.c
|
||||
CUSPICE/cubsim4v7temp.c \
|
||||
CUSPICE/cubsim4v7trunc.cu
|
||||
|
||||
AM_CPPFLAGS += $(CUDA_CPPFLAGS)
|
||||
AM_CPPFLAGS += $(CUDA_CPPFLAGS) -I$(top_srcdir)/src/spicelib/analysis
|
||||
endif
|
||||
|
||||
MAINTAINERCLEANFILES = Makefile.in
|
||||
|
|
|
|||
|
|
@ -2587,7 +2587,7 @@ do { if((here->ptr = SMPmakeElt(matrix,here->first,here->second))==(double *)NUL
|
|||
}
|
||||
|
||||
#ifdef USE_CUSPICE
|
||||
int i, j, jRHS, l, lRHS, status ;
|
||||
int i, j, jRHS, l, lRHS, lTimeSteps, status ;
|
||||
|
||||
/* Counting the instances */
|
||||
for (model = (BSIM4v7model *)inModel ; model != NULL ; model = BSIM4v7nextModel(model))
|
||||
|
|
@ -2612,15 +2612,22 @@ do { if((here->ptr = SMPmakeElt(matrix,here->first,here->second))==(double *)NUL
|
|||
/* Position Vector Allocation for the RHS */
|
||||
model->PositionVectorRHS = TMALLOC (int, model->n_instances) ;
|
||||
|
||||
/* Position Vector Allocation for timeSteps */
|
||||
model->PositionVector_timeSteps = TMALLOC (int, model->n_instances) ;
|
||||
|
||||
|
||||
model->offset = ckt->total_n_values ;
|
||||
model->offsetRHS = ckt->total_n_valuesRHS ;
|
||||
|
||||
model->offset_timeSteps = ckt->total_n_timeSteps ;
|
||||
|
||||
|
||||
i = 0 ;
|
||||
j = 0 ;
|
||||
jRHS = 0 ;
|
||||
l = 0 ;
|
||||
lRHS = 0 ;
|
||||
lTimeSteps = 0 ;
|
||||
|
||||
/* loop through all the instances of the model */
|
||||
for (here = BSIM4v7instances(model); here != NULL ; here = BSIM4v7nextInstance(here))
|
||||
|
|
@ -2631,6 +2638,9 @@ do { if((here->ptr = SMPmakeElt(matrix,here->first,here->second))==(double *)NUL
|
|||
/* Position Vector Assignment for the RHS */
|
||||
model->PositionVectorRHS [i] = model->offsetRHS + lRHS ;
|
||||
|
||||
/* Position Vector Assignment for timeSteps */
|
||||
model->PositionVector_timeSteps [i] = model->offset_timeSteps + lTimeSteps ;
|
||||
|
||||
|
||||
/* For the Matrix */
|
||||
if (here->BSIM4v7rgateMod == 1)
|
||||
|
|
@ -2779,6 +2789,9 @@ do { if((here->ptr = SMPmakeElt(matrix,here->first,here->second))==(double *)NUL
|
|||
|
||||
/* Different Values for the CKTloadOutput */
|
||||
l += 14 ;
|
||||
|
||||
/* Different TimeSteps */
|
||||
lTimeSteps += 1 ;
|
||||
} else {
|
||||
/* m * (gcggb - ggtg + gIgtotg) */
|
||||
if ((here->BSIM4v7gNodePrime != 0) && (here->BSIM4v7gNodePrime != 0))
|
||||
|
|
@ -2911,6 +2924,9 @@ do { if((here->ptr = SMPmakeElt(matrix,here->first,here->second))==(double *)NUL
|
|||
/* Different Values for the CKTloadOutput */
|
||||
l += 16 ;
|
||||
|
||||
/* Different TimeSteps */
|
||||
lTimeSteps += 3 ;
|
||||
|
||||
|
||||
if (here->BSIM4v7rbodyMod)
|
||||
{
|
||||
|
|
@ -2988,6 +3004,9 @@ do { if((here->ptr = SMPmakeElt(matrix,here->first,here->second))==(double *)NUL
|
|||
|
||||
/* Different Values for the CKTloadOutput */
|
||||
l += 12 ;
|
||||
|
||||
/* Different TimeSteps */
|
||||
lTimeSteps += 2 ;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -3027,6 +3046,9 @@ do { if((here->ptr = SMPmakeElt(matrix,here->first,here->second))==(double *)NUL
|
|||
|
||||
/* Different Values for the CKTloadOutput */
|
||||
l += 8 ;
|
||||
|
||||
/* Different TimeSteps */
|
||||
lTimeSteps += 1 ;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -3137,6 +3159,9 @@ do { if((here->ptr = SMPmakeElt(matrix,here->first,here->second))==(double *)NUL
|
|||
|
||||
model->n_PtrRHS = jRHS ;
|
||||
ckt->total_n_PtrRHS += model->n_PtrRHS ;
|
||||
|
||||
model->n_timeSteps = lTimeSteps ;
|
||||
ckt->total_n_timeSteps += model->n_timeSteps ;
|
||||
}
|
||||
|
||||
/* loop through all the BSIM4v7 models */
|
||||
|
|
|
|||
|
|
@ -3347,6 +3347,11 @@ typedef struct sBSIM4v7model
|
|||
int *PositionVectorRHS;
|
||||
int *d_PositionVectorRHS;
|
||||
|
||||
int offset_timeSteps;
|
||||
int n_timeSteps;
|
||||
int *PositionVector_timeSteps;
|
||||
int *d_PositionVector_timeSteps;
|
||||
|
||||
int n_instances;
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -45,7 +45,11 @@ SPICEdev BSIM4v7info = {
|
|||
.DEVunsetup = BSIM4v7unsetup,
|
||||
.DEVpzSetup = BSIM4v7setup,
|
||||
.DEVtemperature = BSIM4v7temp,
|
||||
#ifdef USE_CUSPICE
|
||||
.DEVtrunc = cuBSIM4v7trunc,
|
||||
#else
|
||||
.DEVtrunc = BSIM4v7trunc,
|
||||
#endif
|
||||
.DEVfindBranch = NULL,
|
||||
.DEVacLoad = BSIM4v7acLoad,
|
||||
.DEVaccept = NULL,
|
||||
|
|
|
|||
|
|
@ -73,6 +73,12 @@ GENmodel *inModel
|
|||
status = cudaMemcpy (model->d_PositionVectorRHS, model->PositionVectorRHS, size * sizeof(int), cudaMemcpyHostToDevice) ;
|
||||
CUDAMEMCPYCHECK (model->d_PositionVectorRHS, size, int, status)
|
||||
|
||||
status = cudaMalloc ((void **)&(model->d_PositionVector_timeSteps), size * sizeof(int)) ;
|
||||
CUDAMALLOCCHECK (model->d_PositionVector_timeSteps, size, int, status)
|
||||
|
||||
status = cudaMemcpy (model->d_PositionVector_timeSteps, model->PositionVector_timeSteps, size * sizeof(int), cudaMemcpyHostToDevice) ;
|
||||
CUDAMEMCPYCHECK (model->d_PositionVector_timeSteps, size, int, status)
|
||||
|
||||
/* DOUBLE */
|
||||
model->CAPparamCPU.CAPinitCondArray = (double *) malloc (size * sizeof(double)) ;
|
||||
status = cudaMalloc ((void **)&(model->CAPparamGPU.d_CAPinitCondArray), size * sizeof(double)) ;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,89 @@
|
|||
/**********
|
||||
Copyright 2014 - NGSPICE Software
|
||||
Author: 2014 Francesco Lannutti
|
||||
**********/
|
||||
|
||||
#include "ngspice/config.h"
|
||||
#include "CUSPICE/cucktterr.cuh"
|
||||
#include "capdefs.h"
|
||||
|
||||
extern "C"
|
||||
__global__ void cuCAPtrunc_kernel (CAPparamGPUstruct, int, double **, double *, double,
|
||||
int, int, double, double, double, double, double *, int *) ;
|
||||
|
||||
extern "C"
|
||||
int
|
||||
cuCAPtrunc
|
||||
(
|
||||
GENmodel *inModel, CKTcircuit *ckt, double *timeStep
|
||||
)
|
||||
{
|
||||
(void)timeStep ;
|
||||
|
||||
CAPmodel *model = (CAPmodel *)inModel ;
|
||||
int thread_x, thread_y, block_x ;
|
||||
|
||||
cudaError_t status ;
|
||||
|
||||
/* loop through all the capacitor models */
|
||||
for ( ; model != NULL ; model = CAPnextModel(model))
|
||||
{
|
||||
/* Determining how many blocks should exist in the kernel */
|
||||
thread_x = 1 ;
|
||||
thread_y = 256 ;
|
||||
if (model->n_instances % thread_y != 0)
|
||||
block_x = (int)((model->n_instances + thread_y - 1) / thread_y) ;
|
||||
else
|
||||
block_x = model->n_instances / thread_y ;
|
||||
|
||||
dim3 thread (thread_x, thread_y) ;
|
||||
|
||||
/* Kernel launch */
|
||||
status = cudaGetLastError () ; // clear error status
|
||||
|
||||
cuCAPtrunc_kernel <<< block_x, thread >>> (model->CAPparamGPU, model->n_instances,
|
||||
ckt->dD_CKTstates, ckt->d_CKTdeltaOld,
|
||||
ckt->CKTdelta, ckt->CKTorder, ckt->CKTintegrateMethod,
|
||||
ckt->CKTabstol, ckt->CKTreltol, ckt->CKTchgtol, ckt->CKTtrtol,
|
||||
ckt->d_CKTtimeSteps, model->d_PositionVector_timeSteps) ;
|
||||
|
||||
cudaDeviceSynchronize () ;
|
||||
|
||||
status = cudaGetLastError () ; // check for launch error
|
||||
if (status != cudaSuccess)
|
||||
{
|
||||
fprintf (stderr, "Kernel launch failure in the Trunc Capacitor Model\n\n") ;
|
||||
return (E_NOMEM) ;
|
||||
}
|
||||
}
|
||||
|
||||
return (OK) ;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
__global__
|
||||
void
|
||||
cuCAPtrunc_kernel
|
||||
(
|
||||
CAPparamGPUstruct CAPentry, int n_instances, double **CKTstates,
|
||||
double *CKTdeltaOld, double CKTdelta, int CKTorder, int CKTintegrateMethod,
|
||||
double CKTabsTol, double CKTrelTol, double CKTchgTol, double CKTtrTol,
|
||||
double *CKTtimeSteps, int *PositionVector_timeSteps
|
||||
)
|
||||
{
|
||||
int instance_ID ;
|
||||
|
||||
instance_ID = threadIdx.y + blockDim.y * blockIdx.x ;
|
||||
if (instance_ID < n_instances)
|
||||
{
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
cuCKTterr (CAPentry.d_CAPstateArray [instance_ID], CKTstates,
|
||||
CKTdeltaOld, CKTdelta, CKTorder, CKTintegrateMethod,
|
||||
CKTabsTol, CKTrelTol, CKTchgTol, CKTtrTol,
|
||||
&(CKTtimeSteps [PositionVector_timeSteps [instance_ID]])) ;
|
||||
}
|
||||
}
|
||||
|
||||
return ;
|
||||
}
|
||||
|
|
@ -48,9 +48,10 @@ libcap_la_SOURCES += \
|
|||
CUSPICE/cucapgetic.c \
|
||||
CUSPICE/cucapload.cu \
|
||||
CUSPICE/cucapsetup.c \
|
||||
CUSPICE/cucaptemp.c
|
||||
CUSPICE/cucaptemp.c \
|
||||
CUSPICE/cucaptrunc.cu
|
||||
|
||||
AM_CPPFLAGS += $(CUDA_CPPFLAGS)
|
||||
AM_CPPFLAGS += $(CUDA_CPPFLAGS) -I$(top_srcdir)/src/spicelib/analysis
|
||||
endif
|
||||
|
||||
MAINTAINERCLEANFILES = Makefile.in
|
||||
|
|
|
|||
|
|
@ -165,6 +165,11 @@ typedef struct sCAPmodel { /* model structure for a capacitor */
|
|||
int *PositionVectorRHS ;
|
||||
int *d_PositionVectorRHS ;
|
||||
|
||||
int offset_timeSteps ;
|
||||
int n_timeSteps ;
|
||||
int *PositionVector_timeSteps ;
|
||||
int *d_PositionVector_timeSteps ;
|
||||
|
||||
int n_instances ;
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -45,7 +45,11 @@ SPICEdev CAPinfo = {
|
|||
.DEVunsetup = NULL,
|
||||
.DEVpzSetup = CAPsetup,
|
||||
.DEVtemperature = CAPtemp,
|
||||
#ifdef USE_CUSPICE
|
||||
.DEVtrunc = cuCAPtrunc,
|
||||
#else
|
||||
.DEVtrunc = CAPtrunc,
|
||||
#endif
|
||||
.DEVfindBranch = NULL,
|
||||
.DEVacLoad = CAPacLoad,
|
||||
.DEVaccept = NULL,
|
||||
|
|
|
|||
|
|
@ -195,6 +195,18 @@ do { if((here->ptr = SMPmakeElt(matrix, here->first, here->second)) == NULL){\
|
|||
|
||||
for (j = 0 ; j < model->n_instances; j++)
|
||||
model->PositionVectorRHS [j] = model->offsetRHS + j ;
|
||||
|
||||
|
||||
/* Position Vector for timeSteps */
|
||||
model->offset_timeSteps = ckt->total_n_timeSteps ;
|
||||
model->n_timeSteps = model->n_instances;
|
||||
ckt->total_n_timeSteps += model->n_timeSteps ;
|
||||
|
||||
/* Position Vector assignment for timeSteps */
|
||||
model->PositionVector_timeSteps = TMALLOC (int, model->n_instances) ;
|
||||
|
||||
for (j = 0 ; j < model->n_instances; j++)
|
||||
model->PositionVector_timeSteps [j] = model->offset_timeSteps + j ;
|
||||
}
|
||||
|
||||
/* loop through all the capacitor models */
|
||||
|
|
|
|||
|
|
@ -73,6 +73,12 @@ GENmodel *inModel
|
|||
status = cudaMemcpy (model->d_PositionVectorRHS, model->PositionVectorRHS, size * sizeof(int), cudaMemcpyHostToDevice) ;
|
||||
CUDAMEMCPYCHECK (model->d_PositionVectorRHS, size, int, status)
|
||||
|
||||
status = cudaMalloc ((void **)&(model->d_PositionVector_timeSteps), size * sizeof(int)) ;
|
||||
CUDAMALLOCCHECK (model->d_PositionVector_timeSteps, size, int, status)
|
||||
|
||||
status = cudaMemcpy (model->d_PositionVector_timeSteps, model->PositionVector_timeSteps, size * sizeof(int), cudaMemcpyHostToDevice) ;
|
||||
CUDAMEMCPYCHECK (model->d_PositionVector_timeSteps, size, int, status)
|
||||
|
||||
/* DOUBLE */
|
||||
model->INDparamCPU.INDinitCondArray = (double *) malloc (size * sizeof(double)) ;
|
||||
status = cudaMalloc ((void **)&(model->INDparamGPU.d_INDinitCondArray), size * sizeof(double)) ;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,89 @@
|
|||
/**********
|
||||
Copyright 2014 - NGSPICE Software
|
||||
Author: 2014 Francesco Lannutti
|
||||
**********/
|
||||
|
||||
#include "ngspice/config.h"
|
||||
#include "CUSPICE/cucktterr.cuh"
|
||||
#include "inddefs.h"
|
||||
|
||||
extern "C"
|
||||
__global__ void cuINDtrunc_kernel (INDparamGPUstruct, int, double **, double *, double,
|
||||
int, int, double, double, double, double, double *, int *) ;
|
||||
|
||||
extern "C"
|
||||
int
|
||||
cuINDtrunc
|
||||
(
|
||||
GENmodel *inModel, CKTcircuit *ckt, double *timeStep
|
||||
)
|
||||
{
|
||||
(void)timeStep ;
|
||||
|
||||
INDmodel *model = (INDmodel *)inModel ;
|
||||
int thread_x, thread_y, block_x ;
|
||||
|
||||
cudaError_t status ;
|
||||
|
||||
/* loop through all the inductor models */
|
||||
for ( ; model != NULL ; model = INDnextModel(model))
|
||||
{
|
||||
/* Determining how many blocks should exist in the kernel */
|
||||
thread_x = 1 ;
|
||||
thread_y = 256 ;
|
||||
if (model->n_instances % thread_y != 0)
|
||||
block_x = (int)((model->n_instances + thread_y - 1) / thread_y) ;
|
||||
else
|
||||
block_x = model->n_instances / thread_y ;
|
||||
|
||||
dim3 thread (thread_x, thread_y) ;
|
||||
|
||||
/* Kernel launch */
|
||||
status = cudaGetLastError () ; // clear error status
|
||||
|
||||
cuINDtrunc_kernel <<< block_x, thread >>> (model->INDparamGPU, model->n_instances,
|
||||
ckt->dD_CKTstates, ckt->d_CKTdeltaOld,
|
||||
ckt->CKTdelta, ckt->CKTorder, ckt->CKTintegrateMethod,
|
||||
ckt->CKTabstol, ckt->CKTreltol, ckt->CKTchgtol, ckt->CKTtrtol,
|
||||
ckt->d_CKTtimeSteps, model->d_PositionVector_timeSteps) ;
|
||||
|
||||
cudaDeviceSynchronize () ;
|
||||
|
||||
status = cudaGetLastError () ; // check for launch error
|
||||
if (status != cudaSuccess)
|
||||
{
|
||||
fprintf (stderr, "Kernel launch failure in the Trunc Inductor Model\n\n") ;
|
||||
return (E_NOMEM) ;
|
||||
}
|
||||
}
|
||||
|
||||
return (OK) ;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
__global__
|
||||
void
|
||||
cuINDtrunc_kernel
|
||||
(
|
||||
INDparamGPUstruct INDentry, int n_instances, double **CKTstates,
|
||||
double *CKTdeltaOld, double CKTdelta, int CKTorder, int CKTintegrateMethod,
|
||||
double CKTabsTol, double CKTrelTol, double CKTchgTol, double CKTtrTol,
|
||||
double *CKTtimeSteps, int *PositionVector_timeSteps
|
||||
)
|
||||
{
|
||||
int instance_ID ;
|
||||
|
||||
instance_ID = threadIdx.y + blockDim.y * blockIdx.x ;
|
||||
if (instance_ID < n_instances)
|
||||
{
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
cuCKTterr (INDentry.d_INDstateArray [instance_ID], CKTstates,
|
||||
CKTdeltaOld, CKTdelta, CKTorder, CKTintegrateMethod,
|
||||
CKTabsTol, CKTrelTol, CKTchgTol, CKTtrTol,
|
||||
&(CKTtimeSteps [PositionVector_timeSteps [instance_ID]])) ;
|
||||
}
|
||||
}
|
||||
|
||||
return ;
|
||||
}
|
||||
|
|
@ -57,7 +57,8 @@ libind_la_SOURCES += \
|
|||
CUSPICE/cuindfree.c \
|
||||
CUSPICE/cuindload.cu \
|
||||
CUSPICE/cuindsetup.c \
|
||||
CUSPICE/cuindtemp.c
|
||||
CUSPICE/cuindtemp.c \
|
||||
CUSPICE/cuindtrunc.cu
|
||||
|
||||
libind_la_SOURCES += \
|
||||
CUSPICE/muttopology.c \
|
||||
|
|
@ -66,7 +67,7 @@ libind_la_SOURCES += \
|
|||
CUSPICE/cumutsetup.c \
|
||||
CUSPICE/cumuttemp.c
|
||||
|
||||
AM_CPPFLAGS += $(CUDA_CPPFLAGS)
|
||||
AM_CPPFLAGS += $(CUDA_CPPFLAGS) -I$(top_srcdir)/src/spicelib/analysis
|
||||
endif
|
||||
|
||||
MAINTAINERCLEANFILES = Makefile.in
|
||||
|
|
|
|||
|
|
@ -166,6 +166,11 @@ struct sINDmodel { /* model structure for an inductor */
|
|||
int *PositionVectorRHS;
|
||||
int *d_PositionVectorRHS;
|
||||
|
||||
int offset_timeSteps;
|
||||
int n_timeSteps;
|
||||
int *PositionVector_timeSteps;
|
||||
int *d_PositionVector_timeSteps;
|
||||
|
||||
int n_instances;
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -45,7 +45,11 @@ SPICEdev INDinfo = {
|
|||
.DEVunsetup = INDunsetup,
|
||||
.DEVpzSetup = INDsetup,
|
||||
.DEVtemperature = INDtemp,
|
||||
#ifdef USE_CUSPICE
|
||||
.DEVtrunc = cuINDtrunc,
|
||||
#else
|
||||
.DEVtrunc = INDtrunc,
|
||||
#endif
|
||||
.DEVfindBranch = NULL,
|
||||
.DEVacLoad = INDacLoad,
|
||||
.DEVaccept = NULL,
|
||||
|
|
|
|||
|
|
@ -186,6 +186,19 @@ do { if((here->ptr = SMPmakeElt(matrix, here->first, here->second)) == NULL){\
|
|||
|
||||
for (j = 0 ; j < model->n_instances; j++)
|
||||
model->PositionVectorRHS [j] = model->offsetRHS + j ;
|
||||
|
||||
|
||||
/* Position Vector for timeSteps */
|
||||
model->offset_timeSteps = ckt->total_n_timeSteps ;
|
||||
model->n_timeSteps = model->n_instances;
|
||||
ckt->total_n_timeSteps += model->n_timeSteps ;
|
||||
|
||||
/* Position Vector assignment for timeSteps */
|
||||
model->PositionVector_timeSteps = TMALLOC (int, model->n_instances) ;
|
||||
|
||||
for (j = 0 ; j < model->n_instances; j++)
|
||||
model->PositionVector_timeSteps [j] = model->offset_timeSteps + j ;
|
||||
|
||||
}
|
||||
|
||||
/* loop through all the inductor models */
|
||||
|
|
|
|||
Loading…
Reference in New Issue