Added the support for CUDA and non-CUDA models within the same netlist in CUSPICE
This commit is contained in:
parent
a4a3da9dfd
commit
b5763eb0d8
|
|
@ -44,6 +44,10 @@ struct GENmodel { /* model structure for a resistor */
|
||||||
GENinstance *GENinstances; /* pointer to list of instances that have this
|
GENinstance *GENinstances; /* pointer to list of instances that have this
|
||||||
* model */
|
* model */
|
||||||
IFuid GENmodName; /* pointer to character string naming this model */
|
IFuid GENmodName; /* pointer to character string naming this model */
|
||||||
|
|
||||||
|
#ifdef USE_CUSPICE
|
||||||
|
unsigned int has_cuda:1 ; /* flag to indicate is the model supports CUDA */
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,7 @@ cuCKTflush
|
||||||
CKTcircuit *ckt
|
CKTcircuit *ckt
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) {
|
||||||
long unsigned int m, mRHS ;
|
long unsigned int m, mRHS ;
|
||||||
|
|
||||||
m = (long unsigned int)(ckt->total_n_values + 1) ; // + 1 because of CKTdiagGmin
|
m = (long unsigned int)(ckt->total_n_values + 1) ; // + 1 because of CKTdiagGmin
|
||||||
|
|
@ -45,6 +46,15 @@ CKTcircuit *ckt
|
||||||
|
|
||||||
/* Clean-up the CKTloadOutputRHS */
|
/* Clean-up the CKTloadOutputRHS */
|
||||||
cudaMemset (ckt->d_CKTloadOutputRHS, 0, mRHS * sizeof(double)) ;
|
cudaMemset (ckt->d_CKTloadOutputRHS, 0, mRHS * sizeof(double)) ;
|
||||||
|
} else {
|
||||||
|
int i, size ;
|
||||||
|
|
||||||
|
size = SMPmatSize (ckt->CKTmatrix) ;
|
||||||
|
for (i = 0 ; i <= size ; i++)
|
||||||
|
*(ckt->CKTrhs + i) = 0 ;
|
||||||
|
|
||||||
|
SMPclear (ckt->CKTmatrix) ;
|
||||||
|
}
|
||||||
|
|
||||||
return (OK) ;
|
return (OK) ;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -47,8 +47,10 @@ CKTcircuit *ckt
|
||||||
{
|
{
|
||||||
cudaError_t status ;
|
cudaError_t status ;
|
||||||
|
|
||||||
|
if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) {
|
||||||
status = cudaMemcpy (ckt->d_CKTnoncon, &(ckt->CKTnoncon), sizeof(int), cudaMemcpyHostToDevice) ;
|
status = cudaMemcpy (ckt->d_CKTnoncon, &(ckt->CKTnoncon), sizeof(int), cudaMemcpyHostToDevice) ;
|
||||||
CUDAMEMCPYCHECK (ckt->d_CKTnoncon, 1, int, status)
|
CUDAMEMCPYCHECK (ckt->d_CKTnoncon, 1, int, status)
|
||||||
|
}
|
||||||
|
|
||||||
return (OK) ;
|
return (OK) ;
|
||||||
}
|
}
|
||||||
|
|
@ -61,8 +63,10 @@ CKTcircuit *ckt
|
||||||
{
|
{
|
||||||
cudaError_t status ;
|
cudaError_t status ;
|
||||||
|
|
||||||
|
if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) {
|
||||||
status = cudaMemcpy (&(ckt->CKTnoncon), ckt->d_CKTnoncon, sizeof(int), cudaMemcpyDeviceToHost) ;
|
status = cudaMemcpy (&(ckt->CKTnoncon), ckt->d_CKTnoncon, sizeof(int), cudaMemcpyDeviceToHost) ;
|
||||||
CUDAMEMCPYCHECK (&(ckt->CKTnoncon), 1, int, status)
|
CUDAMEMCPYCHECK (&(ckt->CKTnoncon), 1, int, status)
|
||||||
|
}
|
||||||
|
|
||||||
return (OK) ;
|
return (OK) ;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -47,8 +47,10 @@ CKTcircuit *ckt
|
||||||
{
|
{
|
||||||
long unsigned int size ;
|
long unsigned int size ;
|
||||||
|
|
||||||
|
if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) {
|
||||||
size = (long unsigned int)(ckt->d_MatrixSize + 1) ;
|
size = (long unsigned int)(ckt->d_MatrixSize + 1) ;
|
||||||
cudaMemset (ckt->d_CKTrhsOld, 0, size * sizeof(double)) ;
|
cudaMemset (ckt->d_CKTrhsOld, 0, size * sizeof(double)) ;
|
||||||
|
}
|
||||||
|
|
||||||
return (OK) ;
|
return (OK) ;
|
||||||
}
|
}
|
||||||
|
|
@ -62,9 +64,11 @@ CKTcircuit *ckt
|
||||||
long unsigned int size ;
|
long unsigned int size ;
|
||||||
cudaError_t status ;
|
cudaError_t status ;
|
||||||
|
|
||||||
|
if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) {
|
||||||
size = (long unsigned int)(ckt->d_MatrixSize + 1) ;
|
size = (long unsigned int)(ckt->d_MatrixSize + 1) ;
|
||||||
status = cudaMemcpy (ckt->d_CKTrhsOld, ckt->CKTrhsOld, size * sizeof(double), cudaMemcpyHostToDevice) ;
|
status = cudaMemcpy (ckt->d_CKTrhsOld, ckt->CKTrhsOld, size * sizeof(double), cudaMemcpyHostToDevice) ;
|
||||||
CUDAMEMCPYCHECK (ckt->d_CKTrhsOld, size, double, status)
|
CUDAMEMCPYCHECK (ckt->d_CKTrhsOld, size, double, status)
|
||||||
|
}
|
||||||
|
|
||||||
return (OK) ;
|
return (OK) ;
|
||||||
}
|
}
|
||||||
|
|
@ -78,9 +82,11 @@ CKTcircuit *ckt
|
||||||
long unsigned int size ;
|
long unsigned int size ;
|
||||||
cudaError_t status ;
|
cudaError_t status ;
|
||||||
|
|
||||||
|
if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) {
|
||||||
size = (long unsigned int)(ckt->d_MatrixSize + 1) ;
|
size = (long unsigned int)(ckt->d_MatrixSize + 1) ;
|
||||||
status = cudaMemcpy (ckt->CKTrhsOld, ckt->d_CKTrhsOld, size * sizeof(double), cudaMemcpyDeviceToHost) ;
|
status = cudaMemcpy (ckt->CKTrhsOld, ckt->d_CKTrhsOld, size * sizeof(double), cudaMemcpyDeviceToHost) ;
|
||||||
CUDAMEMCPYCHECK (ckt->CKTrhsOld, size, double, status)
|
CUDAMEMCPYCHECK (ckt->CKTrhsOld, size, double, status)
|
||||||
|
}
|
||||||
|
|
||||||
return (OK) ;
|
return (OK) ;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -76,6 +76,7 @@ CKTcircuit *ckt
|
||||||
size2 = (long unsigned int)ckt->CKTnumStates ;
|
size2 = (long unsigned int)ckt->CKTnumStates ;
|
||||||
size3 = (long unsigned int)ckt->total_n_timeSteps ;
|
size3 = (long unsigned int)ckt->total_n_timeSteps ;
|
||||||
|
|
||||||
|
if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) {
|
||||||
/* Topology Matrix Handling */
|
/* Topology Matrix Handling */
|
||||||
status = cudaMalloc ((void **)&(ckt->CKTmatrix->d_CKTrhs), (n + 1) * sizeof(double)) ;
|
status = cudaMalloc ((void **)&(ckt->CKTmatrix->d_CKTrhs), (n + 1) * sizeof(double)) ;
|
||||||
CUDAMALLOCCHECK (ckt->CKTmatrix->d_CKTrhs, (n + 1), double, status)
|
CUDAMALLOCCHECK (ckt->CKTmatrix->d_CKTrhs, (n + 1), double, status)
|
||||||
|
|
@ -158,6 +159,7 @@ CKTcircuit *ckt
|
||||||
CUDAMALLOCCHECK (ckt->d_CKTtimeSteps, size3, double, status)
|
CUDAMALLOCCHECK (ckt->d_CKTtimeSteps, size3, double, status)
|
||||||
status = cudaMalloc ((void **)&(ckt->d_CKTtimeStepsOut), size3 * sizeof(double)) ;
|
status = cudaMalloc ((void **)&(ckt->d_CKTtimeStepsOut), size3 * sizeof(double)) ;
|
||||||
CUDAMALLOCCHECK (ckt->d_CKTtimeStepsOut, size3, double, status)
|
CUDAMALLOCCHECK (ckt->d_CKTtimeStepsOut, size3, double, status)
|
||||||
|
}
|
||||||
|
|
||||||
return (OK) ;
|
return (OK) ;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@
|
||||||
#include "ngspice/sperror.h"
|
#include "ngspice/sperror.h"
|
||||||
#include "cuda_runtime_api.h"
|
#include "cuda_runtime_api.h"
|
||||||
#include "ngspice/CUSPICE/CUSPICE.h"
|
#include "ngspice/CUSPICE/CUSPICE.h"
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
/* cudaMemcpy MACRO to check it for errors --> CUDAMEMCPYCHECK(name of pointer, dimension, type, status) */
|
/* cudaMemcpy MACRO to check it for errors --> CUDAMEMCPYCHECK(name of pointer, dimension, type, status) */
|
||||||
#define CUDAMEMCPYCHECK(a, b, c, d) \
|
#define CUDAMEMCPYCHECK(a, b, c, d) \
|
||||||
|
|
@ -47,8 +48,10 @@ CKTcircuit *ckt
|
||||||
{
|
{
|
||||||
long unsigned int size ;
|
long unsigned int size ;
|
||||||
|
|
||||||
|
if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) {
|
||||||
size = (long unsigned int)ckt->CKTnumStates ;
|
size = (long unsigned int)ckt->CKTnumStates ;
|
||||||
cudaMemset (ckt->d_CKTstate0, 0, size * sizeof(double)) ;
|
cudaMemset (ckt->d_CKTstate0, 0, size * sizeof(double)) ;
|
||||||
|
}
|
||||||
|
|
||||||
return (OK) ;
|
return (OK) ;
|
||||||
}
|
}
|
||||||
|
|
@ -62,9 +65,11 @@ CKTcircuit *ckt
|
||||||
long unsigned int size ;
|
long unsigned int size ;
|
||||||
cudaError_t status ;
|
cudaError_t status ;
|
||||||
|
|
||||||
|
if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) {
|
||||||
size = (long unsigned int)ckt->CKTnumStates ;
|
size = (long unsigned int)ckt->CKTnumStates ;
|
||||||
status = cudaMemcpy (ckt->d_CKTstate0, ckt->CKTstate0, size * sizeof(double), cudaMemcpyHostToDevice) ;
|
status = cudaMemcpy (ckt->d_CKTstate0, ckt->CKTstate0, size * sizeof(double), cudaMemcpyHostToDevice) ;
|
||||||
CUDAMEMCPYCHECK (ckt->d_CKTstate0, size, double, status)
|
CUDAMEMCPYCHECK (ckt->d_CKTstate0, size, double, status)
|
||||||
|
}
|
||||||
|
|
||||||
return (OK) ;
|
return (OK) ;
|
||||||
}
|
}
|
||||||
|
|
@ -78,9 +83,11 @@ CKTcircuit *ckt
|
||||||
long unsigned int size ;
|
long unsigned int size ;
|
||||||
cudaError_t status ;
|
cudaError_t status ;
|
||||||
|
|
||||||
|
if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) {
|
||||||
size = (long unsigned int)ckt->CKTnumStates ;
|
size = (long unsigned int)ckt->CKTnumStates ;
|
||||||
status = cudaMemcpy (ckt->CKTstate0, ckt->d_CKTstate0, size * sizeof(double), cudaMemcpyDeviceToHost) ;
|
status = cudaMemcpy (ckt->CKTstate0, ckt->d_CKTstate0, size * sizeof(double), cudaMemcpyDeviceToHost) ;
|
||||||
CUDAMEMCPYCHECK (ckt->CKTstate0, size, double, status)
|
CUDAMEMCPYCHECK (ckt->CKTstate0, size, double, status)
|
||||||
|
}
|
||||||
|
|
||||||
return (OK) ;
|
return (OK) ;
|
||||||
}
|
}
|
||||||
|
|
@ -91,12 +98,16 @@ cuCKTstate01copy
|
||||||
CKTcircuit *ckt
|
CKTcircuit *ckt
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) {
|
||||||
long unsigned int size ;
|
long unsigned int size ;
|
||||||
cudaError_t status ;
|
cudaError_t status ;
|
||||||
|
|
||||||
size = (long unsigned int)ckt->CKTnumStates ;
|
size = (long unsigned int)ckt->CKTnumStates ;
|
||||||
status = cudaMemcpy (ckt->d_CKTstate1, ckt->d_CKTstate0, size * sizeof(double), cudaMemcpyDeviceToDevice) ;
|
status = cudaMemcpy (ckt->d_CKTstate1, ckt->d_CKTstate0, size * sizeof(double), cudaMemcpyDeviceToDevice) ;
|
||||||
CUDAMEMCPYCHECK (ckt->d_CKTstate1, size, double, status)
|
CUDAMEMCPYCHECK (ckt->d_CKTstate1, size, double, status)
|
||||||
|
} else {
|
||||||
|
memcpy (ckt->CKTstate1, ckt->CKTstate0, (size_t) ckt->CKTnumStates * sizeof(double)) ;
|
||||||
|
}
|
||||||
|
|
||||||
return (OK) ;
|
return (OK) ;
|
||||||
}
|
}
|
||||||
|
|
@ -110,11 +121,19 @@ CKTcircuit *ckt
|
||||||
int i ;
|
int i ;
|
||||||
double *temp ;
|
double *temp ;
|
||||||
|
|
||||||
|
if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) {
|
||||||
temp = ckt->d_CKTstates [ckt->CKTmaxOrder + 1] ;
|
temp = ckt->d_CKTstates [ckt->CKTmaxOrder + 1] ;
|
||||||
for (i = ckt->CKTmaxOrder ; i >= 0 ; i--)
|
for (i = ckt->CKTmaxOrder ; i >= 0 ; i--)
|
||||||
ckt->d_CKTstates [i + 1] = ckt->d_CKTstates [i] ;
|
ckt->d_CKTstates [i + 1] = ckt->d_CKTstates [i] ;
|
||||||
|
|
||||||
ckt->d_CKTstates [0] = temp ;
|
ckt->d_CKTstates [0] = temp ;
|
||||||
|
} else {
|
||||||
|
temp = ckt->CKTstates [ckt->CKTmaxOrder + 1] ;
|
||||||
|
for (i = ckt->CKTmaxOrder ; i >= 0 ; i--) {
|
||||||
|
ckt->CKTstates [i + 1] = ckt->CKTstates [i] ;
|
||||||
|
}
|
||||||
|
ckt->CKTstates [0] = temp ;
|
||||||
|
}
|
||||||
|
|
||||||
return (OK) ;
|
return (OK) ;
|
||||||
}
|
}
|
||||||
|
|
@ -125,6 +144,7 @@ cuCKTstate123copy
|
||||||
CKTcircuit *ckt
|
CKTcircuit *ckt
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) {
|
||||||
long unsigned int size ;
|
long unsigned int size ;
|
||||||
cudaError_t status ;
|
cudaError_t status ;
|
||||||
|
|
||||||
|
|
@ -135,6 +155,10 @@ CKTcircuit *ckt
|
||||||
|
|
||||||
status = cudaMemcpy (ckt->d_CKTstate3, ckt->d_CKTstate1, size * sizeof(double), cudaMemcpyDeviceToDevice) ;
|
status = cudaMemcpy (ckt->d_CKTstate3, ckt->d_CKTstate1, size * sizeof(double), cudaMemcpyDeviceToDevice) ;
|
||||||
CUDAMEMCPYCHECK (ckt->d_CKTstate3, size, double, status)
|
CUDAMEMCPYCHECK (ckt->d_CKTstate3, size, double, status)
|
||||||
|
} else {
|
||||||
|
memcpy (ckt->CKTstate2, ckt->CKTstate1, (size_t) ckt->CKTnumStates * sizeof(double)) ;
|
||||||
|
memcpy (ckt->CKTstate3, ckt->CKTstate1, (size_t) ckt->CKTnumStates * sizeof(double)) ;
|
||||||
|
}
|
||||||
|
|
||||||
return (OK) ;
|
return (OK) ;
|
||||||
}
|
}
|
||||||
|
|
@ -147,8 +171,10 @@ CKTcircuit *ckt
|
||||||
{
|
{
|
||||||
cudaError_t status ;
|
cudaError_t status ;
|
||||||
|
|
||||||
|
if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) {
|
||||||
status = cudaMemcpy (ckt->d_CKTdeltaOld, ckt->CKTdeltaOld, 7 * sizeof(double), cudaMemcpyHostToDevice) ;
|
status = cudaMemcpy (ckt->d_CKTdeltaOld, ckt->CKTdeltaOld, 7 * sizeof(double), cudaMemcpyHostToDevice) ;
|
||||||
CUDAMEMCPYCHECK (ckt->d_CKTdeltaOld, 7, double, status)
|
CUDAMEMCPYCHECK (ckt->d_CKTdeltaOld, 7, double, status)
|
||||||
|
}
|
||||||
|
|
||||||
return (OK) ;
|
return (OK) ;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -48,6 +48,7 @@ CKTcircuit *ckt
|
||||||
long unsigned int nz, n ;
|
long unsigned int nz, n ;
|
||||||
cudaError_t status ;
|
cudaError_t status ;
|
||||||
|
|
||||||
|
if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) {
|
||||||
nz = (long unsigned int)ckt->CKTmatrix->CKTklunz ;
|
nz = (long unsigned int)ckt->CKTmatrix->CKTklunz ;
|
||||||
n = (long unsigned int)ckt->CKTmatrix->CKTkluN ;
|
n = (long unsigned int)ckt->CKTmatrix->CKTkluN ;
|
||||||
|
|
||||||
|
|
@ -58,6 +59,7 @@ CKTcircuit *ckt
|
||||||
/* Copy back the RHS */
|
/* Copy back the RHS */
|
||||||
status = cudaMemcpy (ckt->CKTrhs, ckt->CKTmatrix->d_CKTrhs, (n + 1) * sizeof(double), cudaMemcpyDeviceToHost) ;
|
status = cudaMemcpy (ckt->CKTrhs, ckt->CKTmatrix->d_CKTrhs, (n + 1) * sizeof(double), cudaMemcpyDeviceToHost) ;
|
||||||
CUDAMEMCPYCHECK (ckt->CKTrhs, (n + 1), double, status)
|
CUDAMEMCPYCHECK (ckt->CKTrhs, (n + 1), double, status)
|
||||||
|
}
|
||||||
|
|
||||||
return (OK) ;
|
return (OK) ;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,7 @@ cuCKTtrunc
|
||||||
CKTcircuit *ckt, double timetemp, double *timeStep
|
CKTcircuit *ckt, double timetemp, double *timeStep
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) {
|
||||||
long unsigned int size ;
|
long unsigned int size ;
|
||||||
double timetempGPU ;
|
double timetempGPU ;
|
||||||
int thread_x, thread_y, block_x ;
|
int thread_x, thread_y, block_x ;
|
||||||
|
|
@ -88,6 +89,9 @@ CKTcircuit *ckt, double timetemp, double *timeStep
|
||||||
} else {
|
} else {
|
||||||
*timeStep = timetemp ;
|
*timeStep = timetemp ;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
*timeStep = MIN (2 * *timeStep, timetemp) ;
|
||||||
|
}
|
||||||
|
|
||||||
return 0 ;
|
return 0 ;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -103,8 +103,15 @@ CKTload(CKTcircuit *ckt)
|
||||||
return (E_NOMEM) ;
|
return (E_NOMEM) ;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Load Sparse Matrix and RHS of all the CUDA supported models */
|
||||||
for (i = 0; i < DEVmaxnum; i++) {
|
for (i = 0; i < DEVmaxnum; i++) {
|
||||||
|
|
||||||
|
#ifdef USE_CUSPICE
|
||||||
|
if (DEVices[i] && DEVices[i]->DEVload && ckt->CKThead[i] && ckt->CKThead[i]->has_cuda) {
|
||||||
|
#else
|
||||||
if (DEVices[i] && DEVices[i]->DEVload && ckt->CKThead[i]) {
|
if (DEVices[i] && DEVices[i]->DEVload && ckt->CKThead[i]) {
|
||||||
|
#endif
|
||||||
|
|
||||||
error = DEVices[i]->DEVload (ckt->CKThead[i], ckt);
|
error = DEVices[i]->DEVload (ckt->CKThead[i], ckt);
|
||||||
|
|
||||||
#ifdef USE_CUSPICE
|
#ifdef USE_CUSPICE
|
||||||
|
|
@ -127,6 +134,13 @@ CKTload(CKTcircuit *ckt)
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef USE_CUSPICE
|
#ifdef USE_CUSPICE
|
||||||
|
int TopologyNNZ, TopologyNNZRHS ;
|
||||||
|
|
||||||
|
TopologyNNZ = ckt->total_n_Ptr + ckt->CKTdiagElements ; // + ckt->CKTdiagElements because of CKTdiagGmin
|
||||||
|
// without the zeroes along the diagonal
|
||||||
|
TopologyNNZRHS = ckt->total_n_PtrRHS ;
|
||||||
|
|
||||||
|
if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) {
|
||||||
/* Copy the CKTdiagGmin value to the GPU */
|
/* Copy the CKTdiagGmin value to the GPU */
|
||||||
// The real Gmin is needed only when the matrix will reside entirely on the GPU
|
// The real Gmin is needed only when the matrix will reside entirely on the GPU
|
||||||
// Right now, only some models support CUDA, so the matrix is only partially created on the GPU
|
// Right now, only some models support CUDA, so the matrix is only partially created on the GPU
|
||||||
|
|
@ -171,6 +185,26 @@ CKTload(CKTcircuit *ckt)
|
||||||
status = cuCKTsystemDtoH (ckt) ;
|
status = cuCKTsystemDtoH (ckt) ;
|
||||||
if (status != 0)
|
if (status != 0)
|
||||||
return (E_NOMEM) ;
|
return (E_NOMEM) ;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Load Sparse Matrix and RHS of all the CUDA unsupported models */
|
||||||
|
for (i = 0; i < DEVmaxnum; i++) {
|
||||||
|
if (DEVices[i] && DEVices[i]->DEVload && ckt->CKThead[i] && !ckt->CKThead[i]->has_cuda) {
|
||||||
|
error = DEVices[i]->DEVload (ckt->CKThead[i], ckt);
|
||||||
|
|
||||||
|
if (ckt->CKTnoncon)
|
||||||
|
ckt->CKTtroubleNode = 0;
|
||||||
|
#ifdef STEPDEBUG
|
||||||
|
if (noncon != ckt->CKTnoncon) {
|
||||||
|
printf("device type %s nonconvergence\n",
|
||||||
|
DEVices[i]->DEVpublic.name);
|
||||||
|
noncon = ckt->CKTnoncon;
|
||||||
|
}
|
||||||
|
#endif /* STEPDEBUG */
|
||||||
|
if (error) return(error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef XSPICE
|
#ifdef XSPICE
|
||||||
|
|
|
||||||
|
|
@ -246,6 +246,8 @@ CKTsetup(CKTcircuit *ckt)
|
||||||
ckt->CKTtopologyMatrixCOOxRHS = TMALLOC (double, TopologyNNZRHS) ;
|
ckt->CKTtopologyMatrixCOOxRHS = TMALLOC (double, TopologyNNZRHS) ;
|
||||||
|
|
||||||
|
|
||||||
|
if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) {
|
||||||
|
|
||||||
/* Topology Matrix Pre-Allocation in CSR format */
|
/* Topology Matrix Pre-Allocation in CSR format */
|
||||||
ckt->CKTtopologyMatrixCSRp = TMALLOC (int, nz + 1) ;
|
ckt->CKTtopologyMatrixCSRp = TMALLOC (int, nz + 1) ;
|
||||||
|
|
||||||
|
|
@ -325,6 +327,7 @@ CKTsetup(CKTcircuit *ckt)
|
||||||
ret = Compress (ckt->CKTtopologyMatrixCOOiRHS, ckt->CKTtopologyMatrixCSRpRHS, n + 1, TopologyNNZRHS) ;
|
ret = Compress (ckt->CKTtopologyMatrixCOOiRHS, ckt->CKTtopologyMatrixCSRpRHS, n + 1, TopologyNNZRHS) ;
|
||||||
|
|
||||||
/* Multiply the Topology Matrix by the M Vector to build the Final CSC Matrix - after the CKTload Call */
|
/* Multiply the Topology Matrix by the M Vector to build the Final CSC Matrix - after the CKTload Call */
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -337,6 +340,7 @@ CKTsetup(CKTcircuit *ckt)
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef USE_CUSPICE
|
#ifdef USE_CUSPICE
|
||||||
|
if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) {
|
||||||
ckt->d_MatrixSize = SMPmatSize (ckt->CKTmatrix) ;
|
ckt->d_MatrixSize = SMPmatSize (ckt->CKTmatrix) ;
|
||||||
status = cuCKTsetup (ckt) ;
|
status = cuCKTsetup (ckt) ;
|
||||||
if (status != 0)
|
if (status != 0)
|
||||||
|
|
@ -361,6 +365,7 @@ CKTsetup(CKTcircuit *ckt)
|
||||||
/* CUSPARSE Matrix Properties Definition */
|
/* CUSPARSE Matrix Properties Definition */
|
||||||
cusparseSetMatType ((cusparseMatDescr_t)(ckt->CKTmatrix->CKTcsrmvDescr), CUSPARSE_MATRIX_TYPE_GENERAL) ;
|
cusparseSetMatType ((cusparseMatDescr_t)(ckt->CKTmatrix->CKTcsrmvDescr), CUSPARSE_MATRIX_TYPE_GENERAL) ;
|
||||||
cusparseSetMatIndexBase ((cusparseMatDescr_t)(ckt->CKTmatrix->CKTcsrmvDescr), CUSPARSE_INDEX_BASE_ZERO) ;
|
cusparseSetMatIndexBase ((cusparseMatDescr_t)(ckt->CKTmatrix->CKTcsrmvDescr), CUSPARSE_INDEX_BASE_ZERO) ;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef WANT_SENSE2
|
#ifdef WANT_SENSE2
|
||||||
|
|
|
||||||
|
|
@ -2601,6 +2601,9 @@ do { if((here->ptr = SMPmakeElt(matrix,here->first,here->second))==(double *)NUL
|
||||||
|
|
||||||
/* How much instances we have */
|
/* How much instances we have */
|
||||||
model->n_instances = i ;
|
model->n_instances = i ;
|
||||||
|
|
||||||
|
/* This model supports CUDA */
|
||||||
|
model->gen.has_cuda = 1 ;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* loop through all the BSIM4v7 models */
|
/* loop through all the BSIM4v7 models */
|
||||||
|
|
|
||||||
|
|
@ -136,6 +136,9 @@ do { if((here->ptr = SMPmakeElt(matrix, here->first, here->second)) == NULL){\
|
||||||
|
|
||||||
/* How much instances we have */
|
/* How much instances we have */
|
||||||
model->n_instances = i ;
|
model->n_instances = i ;
|
||||||
|
|
||||||
|
/* This model supports CUDA */
|
||||||
|
model->gen.has_cuda = 1 ;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* loop through all the capacitor models */
|
/* loop through all the capacitor models */
|
||||||
|
|
|
||||||
|
|
@ -123,6 +123,9 @@ do { if((here->ptr = SMPmakeElt(matrix, here->first, here->second)) == NULL){\
|
||||||
|
|
||||||
/* How much instances we have */
|
/* How much instances we have */
|
||||||
model->n_instances = i ;
|
model->n_instances = i ;
|
||||||
|
|
||||||
|
/* This model supports CUDA */
|
||||||
|
model->gen.has_cuda = 1 ;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* loop through all the inductor models */
|
/* loop through all the inductor models */
|
||||||
|
|
|
||||||
|
|
@ -79,6 +79,9 @@ MUTsetup(SMPmatrix *matrix, GENmodel *inModel, CKTcircuit *ckt, int *states)
|
||||||
|
|
||||||
/* How much instances we have */
|
/* How much instances we have */
|
||||||
model->n_instances = i;
|
model->n_instances = i;
|
||||||
|
|
||||||
|
/* This model supports CUDA */
|
||||||
|
model->gen.has_cuda = 1 ;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* loop through all the mutual inductor models */
|
/* loop through all the mutual inductor models */
|
||||||
|
|
|
||||||
|
|
@ -94,6 +94,9 @@ do { if((here->ptr = SMPmakeElt(matrix, here->first, here->second)) == NULL){\
|
||||||
|
|
||||||
/* How much instances we have */
|
/* How much instances we have */
|
||||||
model->n_instances = i ;
|
model->n_instances = i ;
|
||||||
|
|
||||||
|
/* This model supports CUDA */
|
||||||
|
model->gen.has_cuda = 1 ;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* loop through all the resistor models */
|
/* loop through all the resistor models */
|
||||||
|
|
|
||||||
|
|
@ -80,6 +80,9 @@ do { if((here->ptr = SMPmakeElt(matrix, here->first, here->second)) == NULL){\
|
||||||
|
|
||||||
/* How much instances we have */
|
/* How much instances we have */
|
||||||
model->n_instances = i ;
|
model->n_instances = i ;
|
||||||
|
|
||||||
|
/* This model supports CUDA */
|
||||||
|
model->gen.has_cuda = 1 ;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* loop through all the voltage source models */
|
/* loop through all the voltage source models */
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue