From b5763eb0d863dffd7ef93d9203223a20f7c833a9 Mon Sep 17 00:00:00 2001 From: Francesco Lannutti Date: Tue, 19 Sep 2017 21:19:23 +0200 Subject: [PATCH] Added the support for CUDA and non-CUDA models within the same netlist in CUSPICE --- src/include/ngspice/gendefs.h | 4 +++ src/spicelib/analysis/CUSPICE/cucktflush.c | 10 ++++++ .../analysis/CUSPICE/cucktnonconupdate.c | 4 +++ .../analysis/CUSPICE/cucktrhsoldupdate.c | 6 ++++ src/spicelib/analysis/CUSPICE/cucktsetup.c | 2 ++ .../analysis/CUSPICE/cucktstatesupdate.c | 26 ++++++++++++++ src/spicelib/analysis/CUSPICE/cucktsystem.c | 2 ++ src/spicelib/analysis/CUSPICE/cuckttrunc.cu | 4 +++ src/spicelib/analysis/cktload.c | 34 +++++++++++++++++++ src/spicelib/analysis/cktsetup.c | 5 +++ src/spicelib/devices/bsim4v7/b4v7set.c | 3 ++ src/spicelib/devices/cap/capsetup.c | 3 ++ src/spicelib/devices/ind/indsetup.c | 3 ++ src/spicelib/devices/ind/mutsetup.c | 3 ++ src/spicelib/devices/res/ressetup.c | 3 ++ src/spicelib/devices/vsrc/vsrcset.c | 3 ++ 16 files changed, 115 insertions(+) diff --git a/src/include/ngspice/gendefs.h b/src/include/ngspice/gendefs.h index 27c89d2e3..ac3a25d6d 100644 --- a/src/include/ngspice/gendefs.h +++ b/src/include/ngspice/gendefs.h @@ -44,6 +44,10 @@ struct GENmodel { /* model structure for a resistor */ GENinstance *GENinstances; /* pointer to list of instances that have this * model */ IFuid GENmodName; /* pointer to character string naming this model */ + +#ifdef USE_CUSPICE + unsigned int has_cuda:1 ; /* flag to indicate is the model supports CUDA */ +#endif }; diff --git a/src/spicelib/analysis/CUSPICE/cucktflush.c b/src/spicelib/analysis/CUSPICE/cucktflush.c index 7a5c21470..b72b06f3b 100644 --- a/src/spicelib/analysis/CUSPICE/cucktflush.c +++ b/src/spicelib/analysis/CUSPICE/cucktflush.c @@ -35,6 +35,7 @@ cuCKTflush CKTcircuit *ckt ) { + if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) { long unsigned int m, mRHS ; m = (long unsigned int)(ckt->total_n_values + 1) ; // + 1 because of CKTdiagGmin @@ -45,6 +46,15 @@ CKTcircuit *ckt /* Clean-up the CKTloadOutputRHS */ cudaMemset (ckt->d_CKTloadOutputRHS, 0, mRHS * sizeof(double)) ; + } else { + int i, size ; + + size = SMPmatSize (ckt->CKTmatrix) ; + for (i = 0 ; i <= size ; i++) + *(ckt->CKTrhs + i) = 0 ; + + SMPclear (ckt->CKTmatrix) ; + } return (OK) ; } diff --git a/src/spicelib/analysis/CUSPICE/cucktnonconupdate.c b/src/spicelib/analysis/CUSPICE/cucktnonconupdate.c index 7f7658fa2..4bdddc9a9 100644 --- a/src/spicelib/analysis/CUSPICE/cucktnonconupdate.c +++ b/src/spicelib/analysis/CUSPICE/cucktnonconupdate.c @@ -47,8 +47,10 @@ CKTcircuit *ckt { cudaError_t status ; + if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) { status = cudaMemcpy (ckt->d_CKTnoncon, &(ckt->CKTnoncon), sizeof(int), cudaMemcpyHostToDevice) ; CUDAMEMCPYCHECK (ckt->d_CKTnoncon, 1, int, status) + } return (OK) ; } @@ -61,8 +63,10 @@ CKTcircuit *ckt { cudaError_t status ; + if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) { status = cudaMemcpy (&(ckt->CKTnoncon), ckt->d_CKTnoncon, sizeof(int), cudaMemcpyDeviceToHost) ; CUDAMEMCPYCHECK (&(ckt->CKTnoncon), 1, int, status) + } return (OK) ; } diff --git a/src/spicelib/analysis/CUSPICE/cucktrhsoldupdate.c b/src/spicelib/analysis/CUSPICE/cucktrhsoldupdate.c index bdd7801c7..78feb1f98 100644 --- a/src/spicelib/analysis/CUSPICE/cucktrhsoldupdate.c +++ b/src/spicelib/analysis/CUSPICE/cucktrhsoldupdate.c @@ -47,8 +47,10 @@ CKTcircuit *ckt { long unsigned int size ; + if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) { size = (long unsigned int)(ckt->d_MatrixSize + 1) ; cudaMemset (ckt->d_CKTrhsOld, 0, size * sizeof(double)) ; + } return (OK) ; } @@ -62,9 +64,11 @@ CKTcircuit *ckt long unsigned int size ; cudaError_t status ; + if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) { size = (long unsigned int)(ckt->d_MatrixSize + 1) ; status = cudaMemcpy (ckt->d_CKTrhsOld, ckt->CKTrhsOld, size * sizeof(double), cudaMemcpyHostToDevice) ; CUDAMEMCPYCHECK (ckt->d_CKTrhsOld, size, double, status) + } return (OK) ; } @@ -78,9 +82,11 @@ CKTcircuit *ckt long unsigned int size ; cudaError_t status ; + if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) { size = (long unsigned int)(ckt->d_MatrixSize + 1) ; status = cudaMemcpy (ckt->CKTrhsOld, ckt->d_CKTrhsOld, size * sizeof(double), cudaMemcpyDeviceToHost) ; CUDAMEMCPYCHECK (ckt->CKTrhsOld, size, double, status) + } return (OK) ; } diff --git a/src/spicelib/analysis/CUSPICE/cucktsetup.c b/src/spicelib/analysis/CUSPICE/cucktsetup.c index 75d997465..320d5bb47 100644 --- a/src/spicelib/analysis/CUSPICE/cucktsetup.c +++ b/src/spicelib/analysis/CUSPICE/cucktsetup.c @@ -76,6 +76,7 @@ CKTcircuit *ckt size2 = (long unsigned int)ckt->CKTnumStates ; size3 = (long unsigned int)ckt->total_n_timeSteps ; + if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) { /* Topology Matrix Handling */ status = cudaMalloc ((void **)&(ckt->CKTmatrix->d_CKTrhs), (n + 1) * sizeof(double)) ; CUDAMALLOCCHECK (ckt->CKTmatrix->d_CKTrhs, (n + 1), double, status) @@ -158,6 +159,7 @@ CKTcircuit *ckt CUDAMALLOCCHECK (ckt->d_CKTtimeSteps, size3, double, status) status = cudaMalloc ((void **)&(ckt->d_CKTtimeStepsOut), size3 * sizeof(double)) ; CUDAMALLOCCHECK (ckt->d_CKTtimeStepsOut, size3, double, status) + } return (OK) ; } diff --git a/src/spicelib/analysis/CUSPICE/cucktstatesupdate.c b/src/spicelib/analysis/CUSPICE/cucktstatesupdate.c index 2fb453807..d2d26e0cd 100644 --- a/src/spicelib/analysis/CUSPICE/cucktstatesupdate.c +++ b/src/spicelib/analysis/CUSPICE/cucktstatesupdate.c @@ -28,6 +28,7 @@ #include "ngspice/sperror.h" #include "cuda_runtime_api.h" #include "ngspice/CUSPICE/CUSPICE.h" +#include /* cudaMemcpy MACRO to check it for errors --> CUDAMEMCPYCHECK(name of pointer, dimension, type, status) */ #define CUDAMEMCPYCHECK(a, b, c, d) \ @@ -47,8 +48,10 @@ CKTcircuit *ckt { long unsigned int size ; + if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) { size = (long unsigned int)ckt->CKTnumStates ; cudaMemset (ckt->d_CKTstate0, 0, size * sizeof(double)) ; + } return (OK) ; } @@ -62,9 +65,11 @@ CKTcircuit *ckt long unsigned int size ; cudaError_t status ; + if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) { size = (long unsigned int)ckt->CKTnumStates ; status = cudaMemcpy (ckt->d_CKTstate0, ckt->CKTstate0, size * sizeof(double), cudaMemcpyHostToDevice) ; CUDAMEMCPYCHECK (ckt->d_CKTstate0, size, double, status) + } return (OK) ; } @@ -78,9 +83,11 @@ CKTcircuit *ckt long unsigned int size ; cudaError_t status ; + if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) { size = (long unsigned int)ckt->CKTnumStates ; status = cudaMemcpy (ckt->CKTstate0, ckt->d_CKTstate0, size * sizeof(double), cudaMemcpyDeviceToHost) ; CUDAMEMCPYCHECK (ckt->CKTstate0, size, double, status) + } return (OK) ; } @@ -91,12 +98,16 @@ cuCKTstate01copy CKTcircuit *ckt ) { + if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) { long unsigned int size ; cudaError_t status ; size = (long unsigned int)ckt->CKTnumStates ; status = cudaMemcpy (ckt->d_CKTstate1, ckt->d_CKTstate0, size * sizeof(double), cudaMemcpyDeviceToDevice) ; CUDAMEMCPYCHECK (ckt->d_CKTstate1, size, double, status) + } else { + memcpy (ckt->CKTstate1, ckt->CKTstate0, (size_t) ckt->CKTnumStates * sizeof(double)) ; + } return (OK) ; } @@ -110,11 +121,19 @@ CKTcircuit *ckt int i ; double *temp ; + if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) { temp = ckt->d_CKTstates [ckt->CKTmaxOrder + 1] ; for (i = ckt->CKTmaxOrder ; i >= 0 ; i--) ckt->d_CKTstates [i + 1] = ckt->d_CKTstates [i] ; ckt->d_CKTstates [0] = temp ; + } else { + temp = ckt->CKTstates [ckt->CKTmaxOrder + 1] ; + for (i = ckt->CKTmaxOrder ; i >= 0 ; i--) { + ckt->CKTstates [i + 1] = ckt->CKTstates [i] ; + } + ckt->CKTstates [0] = temp ; + } return (OK) ; } @@ -125,6 +144,7 @@ cuCKTstate123copy CKTcircuit *ckt ) { + if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) { long unsigned int size ; cudaError_t status ; @@ -135,6 +155,10 @@ CKTcircuit *ckt status = cudaMemcpy (ckt->d_CKTstate3, ckt->d_CKTstate1, size * sizeof(double), cudaMemcpyDeviceToDevice) ; CUDAMEMCPYCHECK (ckt->d_CKTstate3, size, double, status) + } else { + memcpy (ckt->CKTstate2, ckt->CKTstate1, (size_t) ckt->CKTnumStates * sizeof(double)) ; + memcpy (ckt->CKTstate3, ckt->CKTstate1, (size_t) ckt->CKTnumStates * sizeof(double)) ; + } return (OK) ; } @@ -147,8 +171,10 @@ CKTcircuit *ckt { cudaError_t status ; + if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) { status = cudaMemcpy (ckt->d_CKTdeltaOld, ckt->CKTdeltaOld, 7 * sizeof(double), cudaMemcpyHostToDevice) ; CUDAMEMCPYCHECK (ckt->d_CKTdeltaOld, 7, double, status) + } return (OK) ; } diff --git a/src/spicelib/analysis/CUSPICE/cucktsystem.c b/src/spicelib/analysis/CUSPICE/cucktsystem.c index e512d4a4b..b186bab28 100644 --- a/src/spicelib/analysis/CUSPICE/cucktsystem.c +++ b/src/spicelib/analysis/CUSPICE/cucktsystem.c @@ -48,6 +48,7 @@ CKTcircuit *ckt long unsigned int nz, n ; cudaError_t status ; + if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) { nz = (long unsigned int)ckt->CKTmatrix->CKTklunz ; n = (long unsigned int)ckt->CKTmatrix->CKTkluN ; @@ -58,6 +59,7 @@ CKTcircuit *ckt /* Copy back the RHS */ status = cudaMemcpy (ckt->CKTrhs, ckt->CKTmatrix->d_CKTrhs, (n + 1) * sizeof(double), cudaMemcpyDeviceToHost) ; CUDAMEMCPYCHECK (ckt->CKTrhs, (n + 1), double, status) + } return (OK) ; } diff --git a/src/spicelib/analysis/CUSPICE/cuckttrunc.cu b/src/spicelib/analysis/CUSPICE/cuckttrunc.cu index f12acda2c..c3d159968 100644 --- a/src/spicelib/analysis/CUSPICE/cuckttrunc.cu +++ b/src/spicelib/analysis/CUSPICE/cuckttrunc.cu @@ -31,6 +31,7 @@ cuCKTtrunc CKTcircuit *ckt, double timetemp, double *timeStep ) { + if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) { long unsigned int size ; double timetempGPU ; int thread_x, thread_y, block_x ; @@ -88,6 +89,9 @@ CKTcircuit *ckt, double timetemp, double *timeStep } else { *timeStep = timetemp ; } + } else { + *timeStep = MIN (2 * *timeStep, timetemp) ; + } return 0 ; } diff --git a/src/spicelib/analysis/cktload.c b/src/spicelib/analysis/cktload.c index 7ffbe4016..be3659642 100644 --- a/src/spicelib/analysis/cktload.c +++ b/src/spicelib/analysis/cktload.c @@ -103,8 +103,15 @@ CKTload(CKTcircuit *ckt) return (E_NOMEM) ; #endif + /* Load Sparse Matrix and RHS of all the CUDA supported models */ for (i = 0; i < DEVmaxnum; i++) { + +#ifdef USE_CUSPICE + if (DEVices[i] && DEVices[i]->DEVload && ckt->CKThead[i] && ckt->CKThead[i]->has_cuda) { +#else if (DEVices[i] && DEVices[i]->DEVload && ckt->CKThead[i]) { +#endif + error = DEVices[i]->DEVload (ckt->CKThead[i], ckt); #ifdef USE_CUSPICE @@ -127,6 +134,13 @@ CKTload(CKTcircuit *ckt) } #ifdef USE_CUSPICE + int TopologyNNZ, TopologyNNZRHS ; + + TopologyNNZ = ckt->total_n_Ptr + ckt->CKTdiagElements ; // + ckt->CKTdiagElements because of CKTdiagGmin + // without the zeroes along the diagonal + TopologyNNZRHS = ckt->total_n_PtrRHS ; + + if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) { /* Copy the CKTdiagGmin value to the GPU */ // The real Gmin is needed only when the matrix will reside entirely on the GPU // Right now, only some models support CUDA, so the matrix is only partially created on the GPU @@ -171,6 +185,26 @@ CKTload(CKTcircuit *ckt) status = cuCKTsystemDtoH (ckt) ; if (status != 0) return (E_NOMEM) ; + } + + /* Load Sparse Matrix and RHS of all the CUDA unsupported models */ + for (i = 0; i < DEVmaxnum; i++) { + if (DEVices[i] && DEVices[i]->DEVload && ckt->CKThead[i] && !ckt->CKThead[i]->has_cuda) { + error = DEVices[i]->DEVload (ckt->CKThead[i], ckt); + + if (ckt->CKTnoncon) + ckt->CKTtroubleNode = 0; +#ifdef STEPDEBUG + if (noncon != ckt->CKTnoncon) { + printf("device type %s nonconvergence\n", + DEVices[i]->DEVpublic.name); + noncon = ckt->CKTnoncon; + } +#endif /* STEPDEBUG */ + if (error) return(error); + } + } + #endif #ifdef XSPICE diff --git a/src/spicelib/analysis/cktsetup.c b/src/spicelib/analysis/cktsetup.c index 4d3d7366f..379faea1a 100644 --- a/src/spicelib/analysis/cktsetup.c +++ b/src/spicelib/analysis/cktsetup.c @@ -246,6 +246,8 @@ CKTsetup(CKTcircuit *ckt) ckt->CKTtopologyMatrixCOOxRHS = TMALLOC (double, TopologyNNZRHS) ; + if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) { + /* Topology Matrix Pre-Allocation in CSR format */ ckt->CKTtopologyMatrixCSRp = TMALLOC (int, nz + 1) ; @@ -325,6 +327,7 @@ CKTsetup(CKTcircuit *ckt) ret = Compress (ckt->CKTtopologyMatrixCOOiRHS, ckt->CKTtopologyMatrixCSRpRHS, n + 1, TopologyNNZRHS) ; /* Multiply the Topology Matrix by the M Vector to build the Final CSC Matrix - after the CKTload Call */ + } #endif } else { @@ -337,6 +340,7 @@ CKTsetup(CKTcircuit *ckt) } #ifdef USE_CUSPICE + if (ckt->total_n_Ptr > 0 && ckt->total_n_PtrRHS > 0) { ckt->d_MatrixSize = SMPmatSize (ckt->CKTmatrix) ; status = cuCKTsetup (ckt) ; if (status != 0) @@ -361,6 +365,7 @@ CKTsetup(CKTcircuit *ckt) /* CUSPARSE Matrix Properties Definition */ cusparseSetMatType ((cusparseMatDescr_t)(ckt->CKTmatrix->CKTcsrmvDescr), CUSPARSE_MATRIX_TYPE_GENERAL) ; cusparseSetMatIndexBase ((cusparseMatDescr_t)(ckt->CKTmatrix->CKTcsrmvDescr), CUSPARSE_INDEX_BASE_ZERO) ; + } #endif #ifdef WANT_SENSE2 diff --git a/src/spicelib/devices/bsim4v7/b4v7set.c b/src/spicelib/devices/bsim4v7/b4v7set.c index 91bfe2c9d..a462a1e16 100644 --- a/src/spicelib/devices/bsim4v7/b4v7set.c +++ b/src/spicelib/devices/bsim4v7/b4v7set.c @@ -2601,6 +2601,9 @@ do { if((here->ptr = SMPmakeElt(matrix,here->first,here->second))==(double *)NUL /* How much instances we have */ model->n_instances = i ; + + /* This model supports CUDA */ + model->gen.has_cuda = 1 ; } /* loop through all the BSIM4v7 models */ diff --git a/src/spicelib/devices/cap/capsetup.c b/src/spicelib/devices/cap/capsetup.c index 671e8e4d0..d6d91ef9b 100644 --- a/src/spicelib/devices/cap/capsetup.c +++ b/src/spicelib/devices/cap/capsetup.c @@ -136,6 +136,9 @@ do { if((here->ptr = SMPmakeElt(matrix, here->first, here->second)) == NULL){\ /* How much instances we have */ model->n_instances = i ; + + /* This model supports CUDA */ + model->gen.has_cuda = 1 ; } /* loop through all the capacitor models */ diff --git a/src/spicelib/devices/ind/indsetup.c b/src/spicelib/devices/ind/indsetup.c index 9f73b4ea7..889cb468d 100644 --- a/src/spicelib/devices/ind/indsetup.c +++ b/src/spicelib/devices/ind/indsetup.c @@ -123,6 +123,9 @@ do { if((here->ptr = SMPmakeElt(matrix, here->first, here->second)) == NULL){\ /* How much instances we have */ model->n_instances = i ; + + /* This model supports CUDA */ + model->gen.has_cuda = 1 ; } /* loop through all the inductor models */ diff --git a/src/spicelib/devices/ind/mutsetup.c b/src/spicelib/devices/ind/mutsetup.c index 0abf21076..d66162ddf 100644 --- a/src/spicelib/devices/ind/mutsetup.c +++ b/src/spicelib/devices/ind/mutsetup.c @@ -79,6 +79,9 @@ MUTsetup(SMPmatrix *matrix, GENmodel *inModel, CKTcircuit *ckt, int *states) /* How much instances we have */ model->n_instances = i; + + /* This model supports CUDA */ + model->gen.has_cuda = 1 ; } /* loop through all the mutual inductor models */ diff --git a/src/spicelib/devices/res/ressetup.c b/src/spicelib/devices/res/ressetup.c index ad5accfcb..d1fa24273 100644 --- a/src/spicelib/devices/res/ressetup.c +++ b/src/spicelib/devices/res/ressetup.c @@ -94,6 +94,9 @@ do { if((here->ptr = SMPmakeElt(matrix, here->first, here->second)) == NULL){\ /* How much instances we have */ model->n_instances = i ; + + /* This model supports CUDA */ + model->gen.has_cuda = 1 ; } /* loop through all the resistor models */ diff --git a/src/spicelib/devices/vsrc/vsrcset.c b/src/spicelib/devices/vsrc/vsrcset.c index f83225c6e..835d6bd75 100644 --- a/src/spicelib/devices/vsrc/vsrcset.c +++ b/src/spicelib/devices/vsrc/vsrcset.c @@ -80,6 +80,9 @@ do { if((here->ptr = SMPmakeElt(matrix, here->first, here->second)) == NULL){\ /* How much instances we have */ model->n_instances = i ; + + /* This model supports CUDA */ + model->gen.has_cuda = 1 ; } /* loop through all the voltage source models */