transient noise simulation

2010-11-27 16:36:03 +00:00 · 2010-11-27 16:36:03 +00:00 · 5e1ed023c6
parent eaadf5d2bc
commit 5e1ed023c6
40 changed files with 15572 additions and 9055 deletions
--- a/configure.ac
+++ b/configure.ac
@ -1073,10 +1073,12 @@ AC_CONFIG_FILES([Makefile
                 src/frontend/help/Makefile
                 src/frontend/parser/Makefile
                 src/frontend/plotting/Makefile
+				 src/frontend/trannoise/Makefile
                 src/frontend/wdisp/Makefile
                 src/include/Makefile
                 src/maths/Makefile
                 src/maths/cmaths/Makefile
+                 src/maths/fft/Makefile				 
                 src/maths/misc/Makefile
                 src/maths/ni/Makefile
                 src/maths/deriv/Makefile
--- a/examples/transient-noise/noi-ring51-demo.cir
+++ b/examples/transient-noise/noi-ring51-demo.cir
@ -0,0 +1,59 @@
+* 51 stage Ring-Osc. BSIM3, transient noise
+* will need 45 min on a i7 860 with 4 threads
+
+* closes the loop between inverters xinv1 and xinv5
+vin in out dc 0.5 pulse 0.5 0 0.1n 5n 1 1 1
+
+vdd dd 0 dc 0 pulse 0 2.2 0 1n 1 1 1
+
+vss ss 0 dc 0
+ve  sub  0 dc 0
+
+vpe well 0 2.2
+
+* noisy inverters
+xiinv2 dd ss sub well out25 out50 inv253
+xiinv1 dd ss sub well in out25 inv253
+
+*very noisy inverter
+xiinv5 dd ss sub well out50 out inv1_2
+*output amplifier
+xiinv11 dd ss sub well out25 bufout inv1
+cout  bufout ss 0.2pF
+
+.option itl1=500 gmin=1e-15 itl4=10  noacct
+
+* .dc vdd 0 2 0.01
+.tran 0.01n 500n
+
+.save in bufout v(t1)
+
+.include D:\Spice_Win\Exam_BSIM3\Modelcards\modelcard.nmos
+.include D:\Spice_Win\Exam_BSIM3\Modelcards\modelcard.pmos
+
+.include noilib-demo.h
+
+.control
+unset ngdebug
+* first run
+save bufout  $ needed for restricting memory usage
+rusage
+tran 8p 10000n
+rusage
+plot bufout xlimit 90n 95n
+linearize
+fft bufout
+* next run
+reset
+save bufout
+alter @v.xiinv5.vn1[trnoise] = [ 0 0 0 0 ] $ no noise
+tran 8p 10000n
+rusage
+plot bufout xlimit 90n 95n
+linearize
+fft bufout
+plot mag(bufout) mag(sp2.bufout) xlimit 0 2G ylimit 1e-11 0.1 ylog
+.endc
+
+
+.end
--- a/examples/transient-noise/noi-sc-tr.cir
+++ b/examples/transient-noise/noi-sc-tr.cir
@ -0,0 +1,53 @@
+* simple sample & hold, transient noise
+
+* switch control
+* PULSE(V1 V2 TD TR TF PW PER)
+vgate1 ga1 0 dc 0 pulse (0 1 0 10n 10n 90n 200n)
+
+Switch1 1 2 ga1 0 smodel1
+
+* noisy input
+* rms value white,  time step, exponent < 2, rms value 1/f
+vin 1 0 dc 0 trnoise 0.1m 0.2n 1 0.1m
+*vin 1 0 dc 0 trnoise 0.1m 0.2n 0 0.1m
+
+* output
+c2 2 0 10p
+
+* second S&H
+vgate2 ga2 0 dc 0 pulse (0 1 140n 10n 10n 30n 200n)
+*Buffer EXXXXXXX N+ N- NC+ NC- VALUE
+e1 4 0 2 0 1
+Switch2 4 3 ga2 0 smodel2
+c3 3 0 10p
+
+.option itl1=500 gmin=1e-15 itl4=10  acct
+
+.model smodel1 sw vt=0.5 ron=100
+.model smodel2 sw vt=0.5 ron=100
+
+.tran 0.4n 100u
+
+
+.control
+unset ngdebug
+set filetype=ascii
+rusage
+run
+rusage all
+write noi_test.out v(1)
+plot  v(2) v(3) xlimit 4u 5u
+plot  v(ga1) v(ga2) xlimit 4u 5u
+linearize
+*rms v(1)
+fft v(3)
+plot mag(v(3)) loglog xlimit 1e4 1e8 ylimit 1e-10 1e-4
+setplot tran1
+linearize
+psd 101 v(3)
+plot mag(v(3)) xlimit 0 3e7 ylimit 0 10u
+
+.endc
+
+
+.end
--- a/examples/transient-noise/noilib-demo.h
+++ b/examples/transient-noise/noilib-demo.h
@ -0,0 +1,56 @@
+
+* standard inverter made noisy
+*.subckt inv1 dd ss sub well in out
+*vn1 out outi dc 0 noise 0.1 0.3n 1.0 0.1
+*mn1  outi in  ss  sub  n1  w=2u  l=0.25u  AS=3p AD=3p PS=4u PD=4u
+*mp1  outi in  dd  well  p1  w=4u l=0.25u  AS=7p AD=7p PS=6u PD=6u
+*.ends inv1
+
+* standard inverter
+.subckt inv1 dd ss sub well in out
+mn1  out in  ss  sub  n1  w=2u  l=0.25u  AS=3p AD=3p PS=4u PD=4u
+mp1  out in  dd  well  p1  w=4u l=0.25u  AS=7p AD=7p PS=6u PD=6u
+.ends inv1
+
+* very noisy inverter (noise on vdd and well)
+.subckt inv1_1 dd ss sub well in out
+vn1 dd idd dc 0 trnoise 0.05 0.05n 1 0.05
+vn2 well iwell dc 0 trnoise 0.05 0.05n 1 0.05
+mn1  out in  ss  sub  n1  w=2u  l=0.25u  AS=3p AD=3p PS=4u PD=4u
+mp1  out in  idd  iwell  p1  w=4u l=0.25u  AS=7p AD=7p PS=6u PD=6u
+*Cout out 0 0.1p
+.ends inv1_1
+
+
+* another very noisy inverter
+.subckt inv1_2 dd ss sub well in out
+vn1 out outi dc 0 trnoise 0.05 8p 1.0 0.001
+mn1  outi in  ss  sub  n1  w=2u  l=0.25u  AS=3p AD=3p PS=4u PD=4u
+mp1  outi in  dd  well  p1  w=4u l=0.25u  AS=7p AD=7p PS=6u PD=6u
+*Cout out 0 0.1p
+.ends inv1_2
+
+* another very noisy inverter with current souces parallel to transistor
+.subckt inv13 dd ss sub well in outi
+in1 ss outi dc 0 noise 200u 0.05n 1.0 50u
+mn1  outi in  ss  sub  n1  w=2u  l=0.25u  AS=3p AD=3p PS=4u PD=4u
+in2 dd outi dc 0 noise 200u 0.05n 1.0 50u
+mp1  outi in  dd  well  p1  w=4u l=0.25u  AS=7p AD=7p PS=6u PD=6u
+*Cout out 0 0.1p
+.ends inv13
+
+.subckt inv53 dd ss sub well in out
+xinv1 dd ss sub well in 1 inv1
+xinv2 dd ss sub well 1  2 inv1
+xinv3 dd ss sub well 2  3 inv1
+xinv4 dd ss sub well 3  4 inv1
+xinv5 dd ss sub well 4 out inv1
+.ends inv53
+
+.subckt inv253 dd ss sub well in out
+xinv1 dd ss sub well in 1 inv53
+xinv2 dd ss sub well 1  2 inv53
+xinv3 dd ss sub well 2  3 inv53
+xinv4 dd ss sub well 3  4 inv53
+xinv5 dd ss sub well 4 out inv53
+.ends inv253
--- a/examples/transient-noise/shot_ng.cir
+++ b/examples/transient-noise/shot_ng.cir
@ -0,0 +1,27 @@
+* Shot noise test with B source, diode
+* voltage on device (diode, forward)
+Vdev out 0 DC 0 PULSE(0.4 0.45 10u)
+* diode, forward direction, to be modeled with noise
+D1 mess 0 DMOD
+.model DMOD D IS=1e-14 N=1 
+X1 0 mess out ishot
+* device between 1 and 2
+* new output terminals of device including noise: 1 and 3
+.subckt ishot 1 2 3
+* white noise source with rms 1V
+VNG 0 11 DC 0 TRNOISE(1 1n 0 0)
+*measure the current i(v1)
+V1 2 3 DC 0
+* calculate the shot noise
+* sqrt(2*current*q*bandwidth)
+BI 1 3 I=sqrt(2*abs(i(v1))*1.6e-19*1e7)*v(11)
+.ends ishot
+* 20000 sample points
+.tran 1n 20u
+.control
+run
+plot (-1)*i(vdev)
+meas tran vdev_rms avg i(vdev) from=0u to=9.9u
+meas tran vdev_rms avg i(vdev) from=10.1u to=20u
+.endc
+.end
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -156,6 +156,7 @@ endif
 ngspice_LDADD += \
 	frontend/parser/libparser.la \
 	frontend/numparam/libnumparam.la \
+	frontend/trannoise/libtrannoise.la \	
 	spicelib/parser/libinp.la

 if CIDER_WANTED
@ -170,6 +171,7 @@ ngspice_LDADD += \
 	maths/deriv/libderiv.la \
 	maths/cmaths/libcmaths.la \
 	maths/misc/libmathmisc.la \
+	maths/fft/libmathfft.la \	
 	maths/poly/libpoly.la \
 	maths/ni/libni.la \
 	maths/sparse/libsparse.la \
@ -208,8 +210,10 @@ ngnutmeg_LDADD += \
 	frontend/plotting/libplotting.la \
 	frontend/parser/libparser.la \
 	frontend/numparam/libnumparam.la \
+	frontend/trannoise/libtrannoise.la \	
 	maths/cmaths/libcmaths.la \
 	maths/misc/libmathmisc.la \
+	maths/fft/libmathfft.la \		
 	maths/poly/libpoly.la \
 	misc/libmisc.la \
 	spicelib/parser/libinp.la
@ -384,6 +388,7 @@ libspice_la_LIBADD += \
 	maths/deriv/libderiv.la \
 	maths/cmaths/libcmaths.la \
 	maths/misc/libmathmisc.la \
+	maths/fft/libmathfft.la \	
 	maths/poly/libpoly.la \
 	maths/ni/libni.la \
 	maths/sparse/libsparse.la \
--- a/src/frontend/Makefile.am
+++ b/src/frontend/Makefile.am
@ -1,8 +1,8 @@
 ## Process this file with automake to produce Makefile.in
 ## $Id$

-SUBDIRS = plotting help parser wdisp numparam
-DIST_SUBDIRS = plotting help parser wdisp numparam
+SUBDIRS = plotting help parser wdisp numparam trannoise
+DIST_SUBDIRS = plotting help parser wdisp numparam trannoise
 EXTRA_DIST = testcommands.c parse-bison.y
 ## For Windows with Visual Studio
 EXTRA_DIST += parse-bison.c parse-bison.h
--- a/src/frontend/com_fft.c
+++ b/src/frontend/com_fft.c
@ -16,10 +16,10 @@ Author:   2008 Holger Vogt
 #include "variable.h"
 #include "parse.h"
 #include "../misc/misc_time.h"
+#include "../maths/fft/fftext.h"

 static void fftext(double*, double*, long int, long int, int);

-
 void
 com_fft(wordlist *wl)
 {
@ -250,6 +250,302 @@ com_fft(wordlist *wl)
    tfree(win);
 }

+void
+com_psd(wordlist *wl)
+{
+    ngcomplex_t **fdvec;
+    double  **tdvec;
+    double  *freq, *win, *time, *ave;
+    double  delta_t, span, noipower;
+    int     ngood, mm;
+    unsigned long fpts, i, j, tlen, jj, smooth, hsmooth;
+    char    *s;
+    struct dvec  *f, *vlist, *lv, *vec;
+    struct pnode *names, *first_name;
+
+    float *reald, *imagd;
+    int size, sign, isreal;
+    double scaling, sum;
+    int order;
+    double scale, sigma;
+
+    if (!plot_cur || !plot_cur->pl_scale) {
+        fprintf(cp_err, "Error: no vectors loaded.\n");
+        return;
+    }
+    if (!isreal(plot_cur->pl_scale) || 
+        ((plot_cur->pl_scale)->v_type != SV_TIME)) {
+        fprintf(cp_err, "Error: fft needs real time scale\n");
+        return;
+    }
+		
+    tlen = (plot_cur->pl_scale)->v_length;
+    time = (plot_cur->pl_scale)->v_realdata;
+    span = time[tlen-1] - time[0];
+    delta_t = span/(tlen - 1);
+    
+    // get filter length from parameter input
+    s = wl->wl_word;
+    if (!(ave = ft_numparse(&s, FALSE)) || (*ave < 1.0)) {
+    	fprintf(cp_out, "Number of averaged data points:  %d\n", 1);
+        smooth = 1;
+    }
+    else smooth = (int)(*ave);    
+    wl = wl->wl_next;    
+    
+    // size of input vector is power of two and larger than spice vector
+    size = 1;
+    mm = 0;
+    while (size < tlen) {
+        size <<= 1;
+        mm++;
+    }
+
+    // output vector has length of size/2
+    fpts = size>>1;    
+
+    // window function   
+    win = TMALLOC(double, tlen);
+    {
+       char   window[BSIZE_SP];
+       double maxt = time[tlen-1];
+       if (!cp_getvar("specwindow", CP_STRING, window)) 
+           strcpy(window,"blackman");
+       if (eq(window, "none"))
+          for(i=0; i<tlen; i++) {
+             win[i] = 1;
+          }
+       else if (eq(window, "rectangular"))
+           for(i=0; i<tlen; i++) {
+             if (maxt-time[i] > span) {
+                win[i] = 0;
+             } else {
+                win[i] = 1;
+             }
+          }
+       else if (eq(window, "hanning") || eq(window, "cosine"))
+          for(i=0; i<tlen; i++) {
+             if (maxt-time[i] > span) {
+                win[i] = 0;
+             } else {
+                win[i] = 1 - cos(2*M_PI*(time[i]-maxt)/span);
+             }
+          }
+       else if (eq(window, "hamming"))
+          for(i=0; i<tlen; i++) {
+             if (maxt-time[i] > span) {
+                win[i] = 0;
+             } else {
+                win[i] = 1 - 0.92/1.08*cos(2*M_PI*(time[i]-maxt)/span);
+             }
+          }
+       else if (eq(window, "triangle") || eq(window, "bartlet"))
+          for(i=0; i<tlen; i++) {
+             if (maxt-time[i] > span) {
+                win[i] = 0;
+             } else {
+                win[i] = 2 - fabs(2+4*(time[i]-maxt)/span);
+             }
+          }
+       else if (eq(window, "blackman")) {
+          int order;
+          if (!cp_getvar("specwindoworder", CP_NUM, &order)) order = 2;
+          if (order < 2) order = 2;  /* only order 2 supported here */
+          for(i=0; i<tlen; i++) {
+             if (maxt-time[i] > span) {
+                win[i] = 0;
+             } else {
+                win[i]  = 1;
+                win[i] -= 0.50/0.42*cos(2*M_PI*(time[i]-maxt)/span);
+                win[i] += 0.08/0.42*cos(4*M_PI*(time[i]-maxt)/span);
+             }
+          }
+       } else if (eq(window, "gaussian")) {
+          if (!cp_getvar("specwindoworder", CP_NUM, &order)) order = 2;
+          if (order < 2) order = 2;
+          sigma=1.0/order;
+          scale=0.83/sigma;
+          for(i=0; i<tlen; i++) {
+             if (maxt-time[i] > span) {
+                win[i] = 0;
+             } else {
+                win[i] = scale*exp(-0.5*pow((time[i]-maxt/2)/(sigma*maxt/2),2));
+             }
+          }
+/*          int order;
+          double scale;
+          extern double erfc(double);
+          if (!cp_getvar("specwindoworder", CP_NUM, &order)) order = 2;
+          if (order < 2) order = 2;
+          scale = pow(2*M_PI/order,0.5)*(0.5-erfc(pow(order,0.5)));
+          for(i=0; i<tlen; i++) {
+             if (maxt-time[i] > span) {
+                win[i] = 0;
+             } else {
+                win[i] = exp(-0.5*order*(1-2*(maxt-time[i])/span)
+                                       *(1-2*(maxt-time[i])/span))/scale;
+             }
+          }
+*/
+      } else {
+          fprintf(cp_err, "Warning: unknown window type %s\n", window);
+          tfree(win);
+          return;
+       }
+    }
+
+    names = ft_getpnames(wl, TRUE);
+    first_name = names;
+    vlist = NULL;
+    ngood = 0;
+    while (names) {
+        vec = ft_evaluate(names);
+        names = names->pn_next;
+        while (vec) {
+            if (vec->v_length != tlen) {
+                fprintf(cp_err, "Error: lengths of %s vectors don't match: %d, %d\n",
+                        vec->v_name, vec->v_length, tlen);
+                vec = vec->v_link2;
+                continue;
+            }
+            if (!isreal(vec)) {
+                fprintf(cp_err, "Error: %s isn't real!\n", 
+                        vec->v_name);
+                vec = vec->v_link2;
+                continue;
+            }
+            if (vec->v_type == SV_TIME) {
+                vec = vec->v_link2;
+                continue;
+            }
+            if (!vlist)
+                vlist = vec;
+            else
+                lv->v_link2 = vec;
+            lv = vec;
+            vec = vec->v_link2;
+            ngood++;
+        }
+    }
+    free_pnode(first_name);
+    if (!ngood) {
+       return;
+    }
+ 
+    plot_cur = plot_alloc("spectrum");
+    plot_cur->pl_next = plot_list;
+    plot_list = plot_cur;
+    plot_cur->pl_title = copy((plot_cur->pl_next)->pl_title);
+    plot_cur->pl_name = copy("PSD");
+    plot_cur->pl_date = copy(datestring( ));
+
+    freq = (double *) tmalloc(fpts * sizeof(double));
+    f = alloc(struct dvec);
+    ZERO(f, struct dvec);
+    f->v_name = copy("frequency");
+    f->v_type = SV_FREQUENCY;
+    f->v_flags = (VF_REAL | VF_PERMANENT | VF_PRINT);
+    f->v_length = fpts;
+    f->v_realdata = freq;
+    vec_new(f);
+
+    for (i = 0; i<fpts; i++) freq[i] = i*1./span*tlen/size;
+
+    tdvec = TMALLOC(double*, ngood);
+    fdvec = TMALLOC(ngcomplex_t*, ngood);
+    for (i = 0, vec = vlist; i<ngood; i++) {
+       tdvec[i] = vec->v_realdata; /* real input data */
+       fdvec[i] = TMALLOC(ngcomplex_t, fpts); /* complex output data */
+       f = alloc(struct dvec);
+       ZERO(f, struct dvec);
+       f->v_name = vec_basename(vec);
+       f->v_type = SV_NOTYPE; //vec->v_type;
+       f->v_flags = (VF_COMPLEX | VF_PERMANENT);
+       f->v_length = fpts;
+       f->v_compdata = fdvec[i];
+       vec_new(f);
+       vec = vec->v_link2;
+    }
+
+    printf("PSD: Time span: %g s, input length: %d, zero padding: %d\n", span, size, size-tlen);
+    printf("PSD: Freq. resolution: %g Hz, output length: %d\n", 1.0/span*tlen/size, fpts);
+
+    sign = 1;
+    isreal = 1;
+    
+    reald = TMALLOC(float, size);
+    imagd = TMALLOC(float, size);
+        
+//        scale = 0.66;
+
+    for (i = 0; i<ngood; i++) {
+        for (j = 0; j < tlen; j++){
+    	   reald[j] = tdvec[i][j]*win[j];
+           imagd[j] = 0.;
+        }            
+        for (j = tlen; j < size; j++){
+    	   reald[j] = 0.;
+           imagd[j] = 0.;
+        }  
+                 
+        // Green's FFT 
+        fftInit(mm);
+        rffts(reald, mm, 1);
+        fftFree();
+        scaling = size*0.3;
+
+/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
+        noipower = fdvec[i][0].cx_real = (double)reald[0]*(double)reald[0];
+        fdvec[i][fpts-1].cx_real = (double)reald[1]*(double)reald[1];
+        noipower += fdvec[i][fpts-1].cx_real;    
+        for (j=1; j<(fpts - 1); j++){
+           jj = j<<1;
+           fdvec[i][j].cx_real = ((double)reald[jj]*(double)reald[jj] + (double)reald[jj + 1]*(double)reald[jj + 1]);
+           fdvec[i][j].cx_imag = 0;
+           noipower += fdvec[i][j].cx_real;
+        }
+        printf("Total noise power up to Nyquist frequency %5.3e Hz:\n%e V^2 (or A^2), \nnoise voltage or current %e V (or A)\n", 
+           freq[fpts-1],noipower/span*tlen/size/scaling, sqrt(noipower/span*tlen/size/scaling));
+/*         for (j=0; j<fpts ; j++)
+           fdvec[i][j].cx_real = sqrt(fdvec[i][j].cx_real)/scaling;
+*/          
+        /* smoothing with rectangular window of width "smooth",
+           plotting V/sqrt(Hz) or I/sqrt(Hz) */
+        hsmooth = smooth>>1;
+        for (j=0; j<hsmooth; j++){
+           sum = 0.;
+           for (jj = 0; jj < hsmooth + j; jj++)
+              sum += fdvec[i][jj].cx_real;
+           sum /= (double)(hsmooth + j);        
+           reald[j] = sqrt(sum)/scaling;
+        }
+        for (j=hsmooth; j<fpts-hsmooth; j++){
+           sum = 0.;
+           for (jj = 0; jj < smooth; jj++)
+              sum += fdvec[i][j-hsmooth+jj].cx_real;
+           sum /= (double)smooth;        
+           reald[j] = sqrt(sum)/scaling;
+        }
+        for (j=fpts-hsmooth; j<fpts; j++){
+           sum = 0.;
+           for (jj = 0; jj < smooth; jj++)
+              sum += fdvec[i][j-hsmooth+jj].cx_real;
+           sum /= (double)(fpts - j + hsmooth - 1);        
+           reald[j] = sqrt(sum)/scaling;
+        }        
+        for (j=0; j<fpts; j++)
+           fdvec[i][j].cx_real = reald[j];
+    }
+        
+    free(reald);
+    free(imagd);
+    
+    tfree(tdvec);
+    tfree(fdvec);
+}
+
+
+

 static void fftext(double* x, double* y, long int n, long int nn, int dir)
 {
--- a/src/frontend/com_fft.h
+++ b/src/frontend/com_fft.h
@ -7,5 +7,6 @@
 #define FFT_H_INCLUDED

 void com_fft(wordlist *wl);
+void com_psd(wordlist *wl);

 #endif
--- a/src/frontend/commands.c
+++ b/src/frontend/commands.c
@ -315,7 +315,11 @@ struct comm spcp_coms[] = {
    { "fft", com_fft, FALSE, TRUE,
      { 0, 0, 0, 0 }, E_DEFHMASK, 1, LOTS,
      NULL,
-      "vector ... : Create a frequency domain plot with FFT." } ,      
+      "vector ... : Create a frequency domain plot with FFT." } ,    
+    { "psd", com_psd, FALSE, TRUE,
+      { 0, 0, 0, 0 }, E_DEFHMASK, 2, LOTS,
+      NULL,
+      "vector ... : Create a power spetral density plot with FFT." } ,  
    { "fourier", com_fourier, FALSE, TRUE,
      { 0, 040000, 040000, 040000 }, E_DEFHMASK, 1, LOTS,
      NULL,
@ -713,7 +717,11 @@ struct comm nutcp_coms[] = {
    { "fft", com_fft, FALSE, TRUE,
      { 0, 0, 0, 0 }, E_DEFHMASK, 1, LOTS,
      NULL,
-      "vector ... : Create a frequency domain plot with FFT." } ,      
+      "vector ... : Create a frequency domain plot with FFT." } , 
+    { "psd", com_psd, FALSE, TRUE,
+      { 0, 0, 0, 0 }, E_DEFHMASK, 2, LOTS,
+      NULL,
+      "vector ... : Create a power spetral density plot with FFT." } , 
    { "fourier", com_fourier, FALSE, TRUE,
      { 0, 040000, 040000, 040000 }, E_DEFHMASK, 1, LOTS,
      NULL,
--- a/src/frontend/trannoise/1-f-code.c
+++ b/src/frontend/trannoise/1-f-code.c
@ -0,0 +1,62 @@
+/* Copyright: Holger Vogt, 2008
+ Generates 1/f noise values according to:
+ "Discrete simulation of colored noise and stochastic 
+ processes and 1/fa power law noise generation"
+ Kasdin, N.J.;
+ Proceedings of the IEEE
+ Volume 83,  Issue 5,  May 1995 Page(s):802 - 827
+*/
+
+#include <math.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdarg.h>			// var. argumente
+#include "1-f-code.h"
+#include "ngspice.h"
+
+#include "fftext.h"
+#include "wallace.h"
+
+
+void f_alpha(int n_pts, int n_exp, float X[], float Q_d,
+float alpha)
+{
+   int i;
+   float *hfa, *wfa;
+   float ha;
+     
+   ha = alpha/2.0f ;
+//   Q_d = sqrt(Q_d); /* find the deviation of the noise */
+   hfa = TMALLOC(float,n_pts);
+   wfa = TMALLOC(float,n_pts);
+   hfa[0] = 1.0f;
+   wfa[0] = Q_d * (float)GaussWa;
+   /* generate the coefficients hk */
+   for (i=1 ; i < n_pts; i++) {
+      /* generate the coefficients hk */
+      hfa[i] = hfa[i-1] * (ha + (float)(i-1)) / ( (float)(i) );
+      /* fill the sequence wk with white noise */
+      wfa[i] = Q_d * (float)GaussWa;
+   }
+
+//   for (i=0 ; i < n_pts; i++)
+//      printf("rnd %e, hk %e\n", wfa[i],	hfa[i]);
+
+   /* perform the discrete Fourier transform */
+   fftInit(n_exp);
+   rffts(hfa, n_exp, 1);
+   rffts(wfa, n_exp, 1) ;
+
+   /* multiply the two complex vectors */
+   rspectprod(hfa, wfa, X, n_pts);
+   /* inverse transform */
+   riffts(X, n_exp, 1);
+
+   free(hfa) ;
+   free(wfa);
+   /* fft tables will be freed in vsrcaccept.c and isrcaccept.c 
+   fftFree(); */
+   fprintf(stdout,"%d (2e%d) one over f values created\n", n_pts, n_exp);
+}
+
--- a/src/frontend/trannoise/1-f-code_d.c
+++ b/src/frontend/trannoise/1-f-code_d.c
@ -0,0 +1,61 @@
+/* Copyright: Holger Vogt, 2008
+ Discrete simulation of colored noise and stochastic 
+ processes and 1/fa power law noise generation
+ Kasdin, N.J.;
+ Proceedings of the IEEE
+ Volume 83,  Issue 5,  May 1995 Page(s):802 - 827
+*/
+
+#include <math.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdarg.h>			// var. argumente
+#include "1-f-code.h"
+#include "ngspice.h"
+
+#include "fftext.h"
+#include "wallace.h"
+
+
+void f_alpha(int n_pts, int n_exp, double X[], double Q_d,
+double alpha)
+{
+   unsigned int i;
+   double *hfa, *wfa;
+   double ha;
+     
+   ha = alpha/2.0f ;
+//   Q_d = sqrt(Q_d); /* find the deviation of the noise */
+   hfa = TMALLOC(double,n_pts);
+   wfa = TMALLOC(double,n_pts);
+   hfa[0] = 1.0f;
+   wfa[0] = Q_d * GaussWa;
+   /* generate the coefficients hk */
+   for (i=1 ; i < n_pts; i++) {
+      /* generate the coefficients hk */
+      hfa[i] = hfa[i-1] * (ha + (double)(i-1)) / ( (double)(i) );
+      /* fill the sequence wk with white noise */
+      wfa[i] = Q_d * GaussWa;
+   }
+
+//   for (i=0 ; i < n_pts; i++)
+//      printf("rnd %e, hk %e\n", wfa[i],	hfa[i]);
+
+   /* perform the discrete Fourier transform */
+   fftInit(n_exp);
+   rffts(hfa, n_exp, 1);
+   rffts(wfa, n_exp, 1) ;
+
+   /* multiply the two complex vectors */
+   rspectprod(hfa, wfa, X, n_pts);
+   /* inverse transform */
+   riffts(X, n_exp, 1);
+
+   free(hfa) ;
+   free(wfa);
+   /* fft tables will be freed in vsrcaccept.c and isrcaccept.c 
+   fftFree(); */
+   fprintf(stdout,"%d (2e%d) one over f values created\n", n_pts, n_exp);
+}
+
--- a/src/frontend/trannoise/FastNorm3.c
+++ b/src/frontend/trannoise/FastNorm3.c
@ -0,0 +1,846 @@
+/*	This is file FastNorm3.c   */
+/*	SUPERCEDES FastNorm.c, FastNorm2.c.  Use with FastNorm3.h  */
+/*		24  June  2003   */
+
+/*	A package containing a very fast generator of pseudo-random
+	Unit NORMAL variates, and some fairly high-quality UNIFORM
+	generators. It also contains a straightforward implementation of
+	a ChiSquared and Gamma generator copied from Ahrens and Dieter.
+	*/
+
+/*	Version 3 with double transformations and controllable extension
+	to repeat the double transformations for higher quality at lower
+	speed.
+		Dated 17 May 20003.
+		Copyright Christopher Stewart Wallace.
+	*/
+/*
+%A C. S. Wallace
+%T Fast Pseudo-Random Generators for Normal and Exponential Variates.
+%J ACM Trans. Math. Software
+%V 22
+%N 1
+%P 119-127
+%M MAR
+%D 1996
+%O TR 94/197, May 1994, Dept. Computer Science, Monash University
+%K CSW, CSWallace, Monash, pseudo random number generator, algorithm,
+   jrnl, TOMS, numbers, normal, probability, distribution, PRNG, RNG, Gaussian,
+   distribution, jrnl, ACM, TOMS, TR 94 197, TR197, c1996, c199x, c19xx
+*/
+/*	Use of this package requires the file "FastNorm3.h" which must be
+	#include-ed in any C files using this package.
+
+	The main purpose of this package is to provide a very fast source
+of pseudo-random variates from the Unit Normal N(0,1) distribution, having
+the density function
+
+	f(x) = (1/sqrt(2*PI)) * exp (-0.5 * x^2)
+
+Variates are obtained not by calling a function, but by use of a macro
+"FastNorm" defined in FastNorm3.h. In a C program, this macro may appear
+anywhere a (double) expression could appear, e.g in statements like
+	z += FastNorm;
+	if (FastNorm < 1.1) .....
+	q = fabs (FastNorm);   etc.
+
+The revision history, and a reference to the method description, is given
+later in this file under the heading "Revision history Fastnorm".
+
+	Major sections of this file, such as the revision history and the
+major subroutines, are all headed by a line containing a row of minus signs (-)
+and the name of the section or subroutine.
+
+	The generators included are:
+a Uniform source of integers, unsigned integers and doubles.
+Chi-sq(N)  (based on Ahrens and Dieter)
+Gamma(N)	(= 0.5 * Chi-sq(2N))
+Normal	(a very fast routine)
+	*/
+
+/*	----------------- inclusions and some definitions ------------  */
+#include <math.h>
+#ifndef NOSPICE
+#include "ngspice.h"
+#endif
+#include "FastNorm3.h"
+
+
+/*	--------------- (Uniform) c7rand, irandm, urandm ----------  */
+/*
+c	A random number generator called as a function by
+c	c7rand (iseed)	or	irandm (iseed)  or urandm (iseed)
+
+c	The parameter should be a pointer to a 2-element Sw vector.
+c	The first call gives a double uniform in 0 .. 1.
+c	The second gives an Sw integer uniform in 0 .. 2**31-1
+c	The third gives an Sw integer with 32 bits, so unif in
+c	-2**31 .. 2**31-1 if used in 32-bit signed arithmetic.
+c	All update iseed[] in exactly the same way.
+c	iseed[] must be a 2-element Sw vector.
+c	The initial value of iseed[1] may be any 32-bit integer.
+c	The initial value of iseed[0] may be any 32-bit integer except -1.
+c
+c	The period of the random sequence is 2**32 * (2**32-1)
+c	Its quality is quite good. It is based on the mixed multiplicative
+c	congruential (Lehmer) generator
+		x[n+1] = (69069 * x[n] + odd constant) MOD 2^32
+c	but avoids most of the well-known defects of this type of generator
+c	by, in effect, generating x[n+k] from x[n] as defined by the
+c	sequence above, where k is chosen randomly in 1 ... 128 with the
+c	help of a subsidiary Tauseworth-type generator.
+c		For the positve integer generator irandm, the less
+c	significant digits are more random than is usual for a Lehmer
+c	generator. The last n<31 digits do not repeat with a period of 2^n.
+c	This is also true of the unsigned integer generator urandm, but less
+c	so.
+
+c	This is an implementation in C of the algorithm described in
+c	Technical Report "A Long-Period Pseudo-Random Generator"
+c	TR89/123, Computer Science, Monash University,
+c		Clayton, Vic 3168 AUSTRALIA
+c			by
+c
+c		C.S.Wallace	csw@cs.monash.edu.au
+
+c	The table mt[0:127] is defined by mt[i] = 69069 ** (128-i)
+	*/
+
+#define MASK ((Sw) 0x12DD4922)
+/*	or in decimal, 316492066	*/
+#define SCALE ((double) 1.0 / (1024.0 * 1024.0 * 1024.0 * 2.0))
+/*	i.e. 2 to power -31	*/
+
+static Sw mt [128] =	{
+	902906369,
+	2030498053,
+	-473499623,
+	1640834941,
+	723406961,
+	1993558325,
+	-257162999,
+	-1627724755,
+	913952737,
+	278845029,
+	1327502073,
+	-1261253155,
+	981676113,
+	-1785280363,
+	1700077033,
+	366908557,
+	-1514479167,
+	-682799163,
+	141955545,
+	-830150595,
+	317871153,
+	1542036469,
+	-946413879,
+	-1950779155,
+	985397153,
+	626515237,
+	530871481,
+	783087261,
+	-1512358895,
+	1031357269,
+	-2007710807,
+	-1652747955,
+	-1867214463,
+	928251525,
+	1243003801,
+	-2132510467,
+	1874683889,
+	-717013323,
+	218254473,
+	-1628774995,
+	-2064896159,
+	69678053,
+	281568889,
+	-2104168611,
+	-165128239,
+	1536495125,
+	-39650967,
+	546594317,
+	-725987007,
+	1392966981,
+	1044706649,
+	687331773,
+	-2051306575,
+	1544302965,
+	-758494647,
+	-1243934099,
+	-75073759,
+	293132965,
+	-1935153095,
+	118929437,
+	807830417,
+	-1416222507,
+	-1550074071,
+	-84903219,
+	1355292929,
+	-380482555,
+	-1818444007,
+	-204797315,
+	170442609,
+	-1636797387,
+	868931593,
+	-623503571,
+	1711722209,
+	381210981,
+	-161547783,
+	-272740131,
+	-1450066095,
+	2116588437,
+	1100682473,
+	358442893,
+	-1529216831,
+	2116152005,
+	-776333095,
+	1265240893,
+	-482278607,
+	1067190005,
+	333444553,
+	86502381,
+	753481377,
+	39000101,
+	1779014585,
+	219658653,
+	-920253679,
+	2029538901,
+	1207761577,
+	-1515772851,
+	-236195711,
+	442620293,
+	423166617,
+	-1763648515,
+	-398436623,
+	-1749358155,
+	-538598519,
+	-652439379,
+	430550625,
+	-1481396507,
+	2093206905,
+	-1934691747,
+	-962631983,
+	1454463253,
+	-1877118871,
+	-291917555,
+	-1711673279,
+	201201733,
+	-474645415,
+	-96764739,
+	-1587365199,
+	1945705589,
+	1303896393,
+	1744831853,
+	381957665,
+	2135332261,
+	-55996615,
+	-1190135011,
+	1790562961,
+	-1493191723,
+	475559465,
+	69069
+		};
+
+double c7rand (Sw *is)
+{
+	Sw it, leh;
+
+	it = is [0];
+	leh = is [1];
+/*	Do a 7-place right cyclic shift of it  */
+	it = ((it >> 7) & 0x01FFFFFF) + ((it & 0x7F) << 25);
+	if (!(it & 0x80000000)) it = it ^ MASK;
+	leh = (leh * mt[it & 127] + it) & 0xFFFFFFFF;
+	is [0] = it;	is [1] = leh;
+	if (leh & 0x80000000) leh = leh ^ 0xFFFFFFFF;
+	return (SCALE * leh);
+}
+
+
+
+Sw irandm (Sw *is)
+{
+	Sw it, leh;
+
+	it = is [0];
+	leh = is [1];
+/*	Do a 7-place right cyclic shift of it  */
+	it = ((it >> 7) & 0x01FFFFFF) + ((it & 0x7F) << 25);
+	if (!(it & 0x80000000)) it = it ^ MASK;
+	leh = (leh * mt[it & 127] + it) & 0xFFFFFFFF;
+	is [0] = it;	is [1] = leh;
+	if (leh & 0x80000000) leh = leh ^ 0xFFFFFFFF;
+	return (leh);
+}
+
+
+unsigned int urandm (Sw *is)
+{
+	Sw it, leh;
+
+	it = is [0];
+	leh = is [1];
+/*	Do a 7-place right cyclic shift of it  */
+	it = ((it >> 7) & 0x01FFFFFF) + ((it & 0x7F) << 25);
+	if (!(it & 0x80000000)) it = it ^ MASK;
+	leh = (leh * mt[it & 127] + it) & 0xFFFFFFFF;
+	is [0] = it;	is [1] = leh;
+	return (leh);
+}
+
+
+/*	---------------  (Chi-squared) adchi -----------------------  */
+/*	Simple implementation of Ahrens and Dieter method for a chi-sq
+random variate of order a >> 1.  Uses c7rand, maths library  */
+/*	13 July 1998  */
+/*	Slightly faster if 'a' is the same as on previous call  */
+/*	This routine is no longer used in the fastnorm code, but is included
+because it may be useful  */
+
+
+static double gorder, gm, rt2gm, aold;
+
+double adchi (double a, int *is)
+{
+	double x, y, z, sq;
+
+	if (a != aold)	{
+		aold = a;  gorder = 0.5 * a;
+		gm = gorder - 1.0;
+		rt2gm = sqrt (aold - 1.0);
+		}
+
+polar:
+	x = 2.0 * c7rand(is) - 1.0;  z = c7rand(is);
+	sq = x*x + z*z;
+	if ((sq > 1.0) || (sq < 0.25)) goto polar;
+	y = x / z;
+	x = rt2gm * y + gm;
+	if (x < 0.0) goto polar;
+
+	z = (1.0 + y*y) * exp (gm * log(x/gm) - rt2gm * y);
+	if (c7rand(is) > z) goto polar;
+
+	return (2.0 * x);
+}
+
+/*	-------------------- (Gamma) rgamma (g, is) -----------	*/
+
+double rgamma (double g, int *is)
+{
+	double x, y, z, sq;
+
+	if (g != gorder)	{
+		gorder = g;
+		gm = gorder - 1.0;	aold = 2.0 * gorder;
+		rt2gm = sqrt (aold - 1.0);
+		}
+
+polar:
+	x = 2.0 * c7rand(is) - 1.0;  z = c7rand(is);
+	sq = x*x + z*z;
+	if ((sq > 1.0) || (sq < 0.25)) goto polar;
+	y = x / z;
+	x = rt2gm * y + gm;
+	if (x < 0.0) goto polar;
+
+	z = (1.0 + y*y) * exp (gm * log(x/gm) - rt2gm * y);
+	if (c7rand(is) > z) goto polar;
+
+	return (x);
+}
+
+
+/*	------------------  Revision history Fastnorm  -------------  */
+/*	Items in this revision history appear in chronological order,
+so the most recent revsion appears last.
+	Revision items are separated by a line of '+' characters.
+
+	++++++++++++++++++++++++++++++++++++++++++++++++++++++
+	This is a revised version of the algorithm decribed in
+
+	ACM Transactions on Mathematical Software, Vol 22, No 1
+		March 1996, pp 119-127.
+
+	A fast generator of pseudo-random variates from the unit Normal
+distribution. It keeps a pool of about 1000 variates, and generates new
+ones by picking 4 from the pool, rotating the 4-vector with these as its
+components, and replacing the old variates with the components of the
+rotated vector.
+
+	The program should initialize the generator by calling initnorm(seed)
+with seed a Sw integer seed value. Different seed values will give
+different sequences of Normals. Seed may be any 32-bit integer.
+	BUT SEE REVISION of 17 May 2003 for initnorm() parameters.
+	The revised initnorm requires two integer parameters, iseed and
+		quoll, the latter specifying a tradeoff between speed and
+		quality.
+	Then, wherever the program needs a new Normal variate, it should
+use the macro FastNorm, e.g. in statements like:
+	x = FastNorm;  (Sets x to a random Normal value)
+or
+	x += a + FastNorm * b;  (Adds a normal with mean a, SD b, to x)
+	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+	Changed basic formula, which was:
+		t = (p+q+r+s)*0.5; p = p-t; q = t-q; r = t-r; s = t-s;
+	This gives sum of new p+q+r+s = 2p(old) which may not be a great
+choice. The new version is:
+		t = (p+q+r+s)*0.5; p = p-t; q = q-t; r = t-r; s = t-s;
+	which gives new p+q+r+s = p+q-r-s (old) which may be better.
+	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+	Revision 14 November 1998
+	The older version "FastNorm" which was available via ftp was found
+to have a defect which could affect some applications.
+
+	Dr Christine Rueb, (Max Planck Institut fur Infomatik,
+		Im Stadtwald W 66123 Saabrucken, F.G.R.,
+			(rueb@mpi-sb.mpg.de)
+
+found that if a large number N of consecutive variates were summed to give
+a variate S with nominally N(0,N) distribution, the variance of S was in some
+cases too small. The effect was noticed with N=400, and was particularly strong
+for N=1023 if the first several (about 128) variates from FastNorm were
+discarded. Dr. Rueb traced the effect to an unexpected tendency of FastNorm
+to concentrate values with an anomolous correlation into the first 128
+elements of the variate pool.
+	With the help of her analysis, the algorithm has been revised in a
+way which appears to overcome the problem, at the cost of about a 19%
+reduction in speed (which still leaves the method very fast.)
+
+	IT  MUST  BE  RECOGNISED  THAT  THIS  ALGORITHM  IS  NOVEL
+AND  WHILE  IT  PASSES  A  NUMBER  OF  STANDARD  TESTS  FOR  DISTRIBUTIONAL
+FORM,  LACK  OF  SERIAL  CORRELATION  ETC.,  IT  MAY  STILL  HAVE  DEFECTS.
+
+RECALL  THE  NUMBER  OF  YEARS  WHICH  IT  TOOK  FOR  THE  LIMITATIONS  OF
+THE  LEHMER  GENERATOR  FOR  UNIFORM  VARIATES  TO  BECOME  APPARENT !!!
+
+UNTIL  MORE  EXPERIENCE  IS  GAINED  WITH  THIS  TYPE  OF  GENERATOR,  IT
+WOULD  BE  WISE  IN  ANY  CRITICAL  APPLICATION  TO  COMPARE  RESULTS
+OBTAINED  USING  IT  WITH  RESULTS  OBTAINED  USING  A  "STANDARD"  FORM
+OF  GENERATOR  OF  NORMAL  VARIATES  COUPLED  WITH  A  WELL-DOCUMENTED
+GENERATOR  OF  UNIFORM  VARIATES.
+	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+	Revision 1 April 2003.
+	Trying a scanning process proposed by R.P.Brent. It needs 2 pool
+vectors, as it cannot update in-situ, but may be more robust.
+	It is a bit slower on a 133 Mhz PC but just as fast on a newer PC
+(moggie) at about 16 ns per call in the 'speed.c' test.
+	The extreme-value defect is the same on old and new versions.
+If one finds a value 'A' such that a batch of B genuine Normal variates has
+probability 0.2 of containing a variate with abolute value greater than A,
+then the probability that both of two consecive batches of B will contain
+such a value should be 0.2 times 0.2, or 0.04. Instead, both versions give
+the extreme value prob. as 0.200 (over a million batches) but give the
+consective-pair prob as 0.050 for batch size B = 1024.
+	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+	Revision 17 May 2003.
+	The fundamental defect of the method, namely an inadequate 'mixing'
+of squared value ('energy') between one generation of the pool and the next,
+cannot readily be removed. In going from one pool to the next, the energy
+in an old variate is shared among just 4 variates in the new pool. Hence it
+takes many generations before the energy of some original variate can be
+distributed across the whole pool. The number of generations needed cannot
+be less than the log to base 4 of the pool size, or 5 for a pool size of
+1024. In fact, the pseudo-random indexing of the pool means that rather
+more generations are needed on average.
+	The defect is readily revealed by the following test. One picks a
+"batch size" comparable to the pool size, say 500 or 1000. One then
+computes a value A such that a batch will with probability 0.2 contain one
+or more variates with absolute value exceeding A.
+One then draws batches from FastNorm,
+and tests each batch to see if it contains such an extreme value.
+Over many batches, one counts the frequency of such 'extreme' batches,
+and finds (with FastNorm2) that it is indeed about 0.2. However, when one counts
+the frequency with which succesive batches are both extreme, one finds it to
+be higher than the proper value (0.2)^2 = 0.04. For batch sizes round the pool
+size, it can be as high as 0.05. That is, although the frequncy of extreme
+values is about right, their occurrence in the stream is correlated over a
+scale of the order of the pool size.
+	The same correlation effect is seen in the average 4th moment of
+successive batches.
+	Since this inter-generational correlation cannot be avoided, the
+this revision seeks to reduce it by performing at least two simple
+rotations of the pool at each generation. Obviously, some speed is lost,
+but the correlations are reduced.
+	To allow the user to trade off speed and quality, the initialization
+function initnorm() now provides a QUALITY parameter 'quoll' which controls
+how many double-rotations are done for each generation.
+	See the comments in initnorm() for more detail.
+	++++++++++  End of revision notes  +++++++++  */
+
+
+
+/*	-----------------  Some test results  ------------------------  */
+/*
+General form:
+	Some simple tests were conducted by transforming FastNorm variates
+in several ways to yield a variable nominally uniformly distributed in 0 ... 1.
+Uniformity of the derived variate was then tested by a ChiSquared test on a
+100-cell histogram with cell counts around 10000. These tests are crude, but
+showed no untoward results on the present version.
+	Transformations included:
+	y = 0.5 * (1.0 + erf (n1 / sqrt(2))
+
+	y = 0.5 * (n1 / (n1^2 + n2^2 + n3^2) - 1)
+
+	y = exp (-0.5 * (n1^2 + n2^2))
+
+	y = (n1^2 + n2^2) / (n1^2 + n2^2 + n3^2 + n4^2)
+
+		where n1, n2 etc are successive Normal variates.
+It may be noted that some of these are sensitive to serial correlation if
+present.
+
+Fourth moment of batches:
+	Extensive tests for correlation among the fourth moments of successive
+batches of variates were made, with batch sizes comparabe to or (worst case)
+equal to the size of the variate pool (4096 in this revision).
+	With 'quality' 1, significant correlation appears after 10^6 batches
+of worst-case size.
+	With quality 2, no significant correlation is evident after 10^7
+batches. A just-significant correlation appears after 3.6*10^7 batches.
+As this requires some 1.4*10^11 deviates to be drawn, it may be irrelevent
+for many applications. The observed correlation coefficent was 0.0008.
+	With quality 3, results are OK after 10^8 batches, or more than
+4*10^11 variates.
+	No tests have been done with quality 4 as yet.
+
+Speed:
+	Speed tests were done on a PC running RedHat Linux, using "-O"
+compiler optimization. The test loop was
+	for (i = 0; i < 500000000; i++)  {
+		a += FastNorm;  a -= FastNorm;
+		}
+	Thus the test makes 10^9 uses of FastNorm. The time taken, (which
+includes time for a call in 'initnorm' and the loop overhead) depends on
+the 'quality' set by initnorm.
+	Quality 1:	21.5 sec
+	Quality 2:	32.1 sec
+	Quality 3:	42.5 sec
+	Quality 4:	53.1 sec
+
+By way of comparison, the same 10^9 call loop was timed with the Unix library
+"random()" routine substituted for FastNorm, and the variable 'a' defined as
+integer rather than double.  Also, since most use of a Uniform generator such
+as "random()" requires that the returned integer be scaled into a floating-
+point number in 0 ... 1, the timing was repeated with
+	"a += random" ('a' integer) replaced by "a += Scale*random()" where
+'a' is double and Scale = 2^(-31). The times obtained were:
+	Random (integer): 44.1 sec
+	Random (double) : 47.7 sec
+
+	It can be seen that FastNorm (even at quality 3) is faster than a
+commonly-used Uniform generator. To some extent, this result may vary on
+different computers and compilers. Since FastNorm (at least for qualities
+above 1) no doubt does more arithmetic per variate than "random()", much of
+its speed advantage must come from its replacement of a function call per
+variate by a macro which makes only one function call every 4095 variates.
+Computers with lower 'call' overheads than the PC used here might show
+differnt results.
+	Incidently, the Uniform generator 'c7rand()' included in this
+package, which returns a double uniform in 0 ... 1, and is of fairly high
+quality, gives a time in the same test of 36.8 sec, a little faster than
+'random()'.
+	*/
+
+
+/*	-----------------  globals  -------------------------  */
+/*	A pool must have a length which is a multiple of 4.
+ *	During regeneration of a new pool, the pool is treated as 4
+ *	consecutive vectors, each of length VL.
+	*/
+
+#define VE 10
+#define VL (1 << VE)
+#define VM (VL-1)
+#define WL (4*VL)
+#define WM (WL-1)
+
+Sw gaussfaze;
+Sf *gausssave;
+Sf GScale;
+/*	GScale,fastnorm,gaussfaze, -save must be visible to callers*/
+static Sf chic1, chic2;   /* Constants used in getting ChiSq_WL */
+Sw gslew;  /*  Counts generations  */
+static Sw qual;    /*  Sets number of double transforms per generation.  */
+static Sw c7g [2];	/*  seed values for c7rand  */
+
+Sf wk1 [WL], wk2 [WL];   /*  Pools of variates.  */
+
+
+/*	------------------  regen  ----------------------    */
+/*	Takes variates from wk1[], transforms to wk[2], then back to wk1[].
+	*/
+void regen ()
+{
+	Sw i, j, k, m;
+	Sf p, q, r, s, t;
+	Sw topv[6], ord[4], *top;
+	Sf *ppt[4], *ptn;
+
+/*      Choose 4 random start points in the wk1[] vector
+	I want them all different.  */
+
+	top = topv + 1;
+/*	Set limiting values in top[-1], top[4]  */
+	top[-1] = VL;  top[4] = 0;
+reran1:
+	m = irandm (c7g);   /* positive 32-bit random */
+/*	Extract two VE-sized randoms from m, which has 31 useable digits */
+	m  = m >> (31 - 2*VE);
+	top[0] = m & VM;  m = m >> VE;  top[1] = m & VM;
+	m = irandm (c7g);   /* positive 32-bit random */
+/*	Extract two VE-sized randoms from m, which has 31 useable digits */
+	m  = m >> (31 - 2*VE);
+	top[2] = m & VM;  m = m >> VE;  top[3] = m & VM;
+	for (i = 0; i < 4; i++) ord[i] = i;
+/*	Sort in decreasing size   */
+	for (i = 2; i >= 0; i--)  {
+		for (j = 0; j <= i; j++)	{
+			if (top[j] < top[j+1])	{
+				k = top[j];  top[j] = top[j+1];
+				top[j+1] = k;
+				k = ord[j];  ord[j] = ord[j+1];
+				ord[j+1] = k;
+				}
+			}
+		}
+/*	Ensure all different  */
+	for (i = 0; i < 3; i++) { if (top[i] == top[i+1]) goto reran1; }
+
+/*	Set pt pointers to their start values for the first chunk.  */
+	for (i = 0; i < 4; i++)	{
+		j = ord[i];
+		ppt[j] = wk2 + j * VL + top[i];
+		}
+
+/*	Set ptn to point into wk1  */
+	ptn = wk1;
+
+/*	Now ready to do five chunks. The length of chunk i is
+		top[i-1] - top[i]  (I hope)
+	At the end of chunk i, pointer ord[i] should have reached the end
+	of its part, and need to be wrapped down to the start of its part.
+		*/
+	i = 0;
+
+chunk:
+	j = top[i] - top[i-1];   /* Minus the chunk length */
+	for (;  j < 0;  j++)	{
+		p = *ptn++;  s = *ptn++;  q = *ptn++;  r = *ptn++;
+		t = (p + q + r + s) * 0.5;
+		*ppt[0]++ = t - p;
+		*ppt[1]++ = t - q;
+		*ppt[2]++ = r - t;
+		*ppt[3]++ = s - t;
+		}
+/*	This should end the chunk.  See if all done  */
+	if (i == 4) goto passdone;
+
+/*	The pointer for part ord[i] should have passed its end  */
+	j = ord[i];
+#ifdef dddd
+printf ("Chunk %1d done. Ptr %1d now %4d\n", i, j, ppt[j]-wk2);
+#endif
+	ppt[j] -= VL;
+	i++;
+	goto chunk;
+
+passdone:
+/*	wk1[] values have been transformed and placed in wk2[]
+	Transform from wk2 to wk1 with a simple shuffle  */
+	m = (irandm (c7g) >> (29 - VE)) & WM;
+	j = 0;
+	for (i = 0; i < 4; i++) ppt[i] = wk1 + i * VL;
+	for (i = 0; i < VL; i++)  {
+		p = wk2[j^m];  j++;
+		s = wk2[j^m];  j++;
+		q = wk2[j^m];  j++;
+		r = wk2[j^m];  j++;
+		t = (p + q + r + s) * 0.5;
+		*ppt[0]++ = t - p;
+		*ppt[1]++ = q - t;
+		*ppt[2]++ = t - r;
+		*ppt[3]++ = s - t;
+		}
+
+/*	We have a new lot of variates in wk1  */
+	return;
+}
+
+
+/*	-------------------  renormalize  ---------------------------   */
+/*	Rescales wk1[] so sum of squares = WL  */
+/*	Returns the original sum-of-squares  */
+Sf renormalize (void)
+{
+	Sf ts, vv;
+	Sw i;
+
+	ts = 0.0;
+	for (i = 0; i < WL; i++)	{
+		ts += wk1[i] * wk1[i];
+		}
+	vv = sqrt (WL / ts);
+	for (i = 0; i < WL; i++) wk1[i] *= vv;
+	return (ts);
+}
+
+
+/*	------------------------  BoxMuller  ----------------------   */
+/*	Fills block gvec of length ll with proper normals  */
+void boxmuller (Sf *gvec, Sw ll)
+{
+	Sw i;
+	Sf tx, ty, tr, tz;
+
+/*	Here, replace the whole pool with conventional Normal variates  */
+	i = 0;
+nextpair:
+	tx = 2.0 * c7rand(c7g) - 1.0;  /* Uniform in -1..1 */
+	ty = 2.0 * c7rand(c7g) - 1.0;  /* Uniform in -1..1 */
+	tr = tx * tx + ty * ty;
+	if ((tr > 1.0) || (tr < 0.25)) goto nextpair;
+	tz = -2.0 * log (c7rand(c7g));	/* Sum of squares */
+	tz = sqrt ( tz / tr );
+	gvec [i++] = tx * tz;	gvec [i++] = ty * tz;
+	if (i < ll) goto nextpair;
+/*	Horrid, but good enough	*/
+	return;
+}
+
+
+/*	-------------------------  initnorm  ----------------------  */
+/*	To initialize, given a seed integer and a quality level.
+	The seed can be any integer. The quality level quoll should be
+	between 1 and 4. Quoll = 1 gives high speed, but leaves some
+	correlation between the 4th moments of successive batches of values.
+	Higher values of quoll give lower speed but less correlation.
+
+	If called with quoll = 0, initnorm performs a check that the
+	most crucial routine (regen) is performing correctly. In this
+	case, the value of 'iseed' is ignored. Initnorm will report the
+	results of the test, which compares pool values with check17 and
+	check98, which are defined below.
+	When a check call is made, a proper call on initnorm must then
+	be made before using the FastNorm macro. A check call does not
+	properly initialize the routines even if it succeeds.
+		*/
+static Sf check17 = 0.1255789;
+static Sf check98 = -0.7113293;
+
+void initnorm (Sw seed, Sw quoll)
+{
+	Sw i;
+
+/*      At one stage, we need to generate a random variable Z such that
+        (WL * Z*Z) has a Chi-squared-WL density. Now, a var with
+        an approximate Chi-sq-K distn can be got as
+                (A + B*n)**2 where n has unit Normal distn,
+        A**2 = K * sqrt (1 - 1/K),  A**2 + B**2 = K.  (For large K)
+                So we form Z as (1/sqrt(WL)) * (A + B*n)
+        or   chic1 + chic2 * n   where
+        chic1 = A / sqrt(WL), chic2 = B / sqrt(WL).
+		Hence
+        chic1 = sqrt (A*A / WL) = sqrt ( sqrt (1 - 1/WL)),
+        chic2 = sqrt (1 - chic1*chic1)
+        */
+
+        chic1 = sqrt ( sqrt (1.0 - 1.0 / WL));
+        chic2 = sqrt (1.0 - chic1 * chic1);
+
+/*	Set regen counter "gslew" which will affect renormalizations.
+	Since pools are OK already, we wont't need to renorm for a
+	while  */
+	gslew = 1;
+/*	Finally, set "gaussfaze" to return all of wk1
+ *	except the last entry at WL-1  */
+	gaussfaze = WL-1;
+	gausssave = wk1;
+
+/*	If quoll = 0, do a check on installation   */
+	if (quoll == 0) goto docheck;
+	qual = quoll;
+/*	Check sensible values for quoll, say 1 to 4  */
+	if ((quoll < 0) || (quoll > 4))  {
+		printf ("From initnorm(): quoll parameter %d out of\
+ range 1 to 4\n", quoll);
+		return;
+		}
+	c7g[0] = seed;  c7g[1] = -3337792;
+
+/*	Fill wk1[] with good normals  */
+	boxmuller (wk1,  WL);
+/*	Scale so sum-of-squares = WL  */
+	GScale = sqrt (renormalize () / WL);
+/*	We have set
+	GScale to restore the original ChiSq_WL sum-of-squares  */
+	return;
+
+docheck:
+/*	Set a simple pattern in wk1[] and test results of regen  */
+	for (i = 0; i < WL; i++) wk1[i] = wk2[i] = 0.0;
+	wk1[0] = sqrt ((double) WL);
+	c7g[0] = 1234567;  c7g[1] = 9876543;
+	for (i = 0; i < 60; i++) regen();
+/*	Check a couple of values  */
+	if ((fabs (wk1[17] - check17) > 0.00001) ||
+	    (fabs (wk1[98] - check98) > 0.00001))  {
+		printf ("\nInitnorm check failed.\n");
+		printf ("Expected %8.5f got %10.7f\n", check17, wk1[17]);
+		printf ("Expected %8.5f got %10.7f\n", check98, wk1[98]);
+		}
+	else printf ("\nInitnorm check OK\n");
+	return;
+}
+
+
+/*	----------------------  fastnorm  --------------------------  */
+/*	If gslew shows time is ripe, renormalizes the pool
+		fastnorm() returns the value GScale*gausssave[0].
+		*/
+
+Sf fastnorm ()
+{
+	Sf sos;
+	Sw n1;
+
+	if (! (gslew & 0xFFFF))  {
+		sos = renormalize ();
+		}
+
+/*	The last entry of gausssave, at WL-1, will not have been used.
+	Use it to get an approx. to sqrt (ChiSq_WL / WL).
+	See initnorm() code for details  */
+	GScale = chic1 + chic2 * GScale * gausssave [WL-1];
+	for (n1 = 0; n1 < qual; n1++) regen ();
+	gslew++;
+
+	gaussfaze = WL - 1;
+
+	return (GScale * gausssave [0]);
+}
+
+
+/*	---------------------  (test) main  -------------------------  */
+#ifdef Main
+#include "FastNorm3.h"
+int main()
+{
+	Sf x;  Sw i;
+	initnorm (0, 0);
+	initnorm (77, 2);
+	printf ("SoS %20.6f\n", renormalize());
+//	for (i = 0; i < 2000000; i++) x = FastNorm;
+        for (i = 0; i < 200; i++) {
+        	x = FastNorm;
+        	printf("%d\t%f\n", i, x);
+        }	
+	printf ("SoS %20.6f\n", renormalize());
+	exit (1);
+}
+#endif
--- a/src/frontend/trannoise/Makefile.am
+++ b/src/frontend/trannoise/Makefile.am
@ -0,0 +1,10 @@
+noinst_LTLIBRARIES = libtrannoise.la
+
+libtrannoise_la_SOURCES = \
+	FastNorm3.c	\
+	1-f-code.c	\
+	wallace.c	
+
+AM_CPPFLAGS = -I$(top_srcdir)/src/include -I$(top_srcdir)/src/frontend
+
+MAINTAINERCLEANFILES = Makefile.in
--- a/src/frontend/trannoise/wallace.c
+++ b/src/frontend/trannoise/wallace.c
@ -0,0 +1,532 @@
+/* Wallace generator for normally distributed random variates
+   Copyright: Holger Vogt, 2008
+   
+*/   
+
+//#define FASTNORM_ORIG
+
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef _MSC_VER
+#include <process.h>
+#define getpid _getpid
+#else
+#include <unistd.h>
+#endif
+#include <math.h>
+#include "wallace.h"
+#include "FastNorm3.h"
+
+#ifdef HasMain
+#include <sys/timeb.h>
+#else
+#ifndef NOSPICE
+#include "ngspice.h"
+#endif
+#endif
+
+#define POOLSIZE 4096
+#define LPOOLSIZE 12
+#define NOTRANS 3 /* number of (dual) transformations */
+
+#define VE 10
+#define VL (1 << VE)
+#define VM (VL-1)
+#define WL (4*VL)
+#define WM (WL-1)
+
+double *outgauss; /* output vector for user access */
+unsigned int variate_used; /* actual index of variate called by user */
+double ScaleGauss;
+
+static double *pool1;
+static double *pool2;
+static unsigned int *addrif, *addrib;
+static unsigned n = POOLSIZE;
+static double chi1, chi2; /* chi^2 correction values */
+static unsigned int newpools;
+
+extern double drand(void);
+extern unsigned int CombLCGTausInt(void);
+extern void TausSeed(void);
+extern unsigned int CombLCGTausInt2(void);
+
+
+void PolarGauss(double* py1, double* py2)
+{
+	double x1, x2, w;
+
+         do {
+                 x1 = drand();
+                 x2 = drand();
+                 w = x1 * x1 + x2 * x2;
+         } while (( w > 1.0 ) || ( w < 0.25));
+
+         w = sqrt( (-2.0 * log( w ) ) / w );
+
+	*py1 = (double)(x1 * w);
+	*py2 = (double)(x2 * w);
+}	
+
+
+
+
+
+void initw(void)
+{
+   unsigned i;
+   double totsqr, nomsqr;
+   unsigned long int coa, cob, s;
+   
+   /* initialize the uniform generator */
+   srand(getpid());   
+//   srand(17);
+   TausSeed();
+   
+   ScaleGauss = 1.;
+   newpools = 1;
+   
+   /* set up the two pools */
+   pool1 = TMALLOC(double, n); //(double*)malloc(n * sizeof(double));
+   pool2 = TMALLOC(double, n); //(double*)malloc(n * sizeof(double));
+   addrif = TMALLOC(unsigned int, (n + NOTRANS)); //(unsigned int*)malloc((n + NOTRANS) * sizeof(unsigned int));
+   addrib = TMALLOC(unsigned int, (n + NOTRANS)); //(unsigned int*)malloc((n + NOTRANS) * sizeof(unsigned int));
+   
+   /* fill the first pool with normally distributed values */
+   PolarGauss(&pool1[0], &pool1[1]);
+   for (i = 1; i < n>>1; i++) {
+      PolarGauss(&pool1[i<<1], &pool1[(i<<1) + 1]);
+   }
+   /* normalize pool content */
+/*   totsqr = totsum = 0.0;
+   for (i = 0; i < n; i++) {
+      totsqr += pool1[i] * pool1[i];
+      totsum += pool1[i];
+   }
+   totsum = totsum/n;
+   for (i = 0; i < n; i++) {
+      totsqr += (pool1[i] - totsum) * (pool1[i] - totsum);
+   }       
+   nomsqr = sqrt(n / totsqr);
+   for (i = 0; i < n; i++)
+      pool1[i] = (pool1[i] - totsum) * nomsqr;
+*/
+   totsqr = 0.0;
+   for (i = 0; i < n; i++)
+      totsqr += pool1[i] * pool1[i];
+   nomsqr = sqrt(n / totsqr);
+   for (i = 0; i < n; i++)
+      pool1[i] *= nomsqr;  
+         
+   /* calculate ch^2 value */
+   chi1 = sqrt ( sqrt (1.0 - 1.0/n));
+   chi2 = sqrt ( 1.0 - chi1*chi1);
+   
+   /* first scaling, based on unused pool1[n-2] */
+   ScaleGauss = chi1 + chi2 * ScaleGauss * pool1[n-2];      
+   /* access to first pool */
+   outgauss = pool1;
+   /* set data counter, we return n-2 values here */
+   variate_used = n - 2;
+
+   /* generate random reading addresses using a LCG */   
+   s = 0;
+   coa = 241;
+   cob = 59;
+   for (i=0; i < (n + NOTRANS); i++) {
+//      addrif[i] = s = (s * coa + cob) % ( n );
+      coa = CombLCGTausInt();
+      addrif[i] = coa >> (32 - LPOOLSIZE);
+//      printf ("Random add:\t%ld\n" , s);
+   }   
+   s = 0;
+   coa = 193;
+   cob = 15;
+   for (i=0; i < (n + NOTRANS); i++) {
+//      addrib[i] = s = (s * coa + cob) % ( n ); 
+      coa = CombLCGTausInt();
+      addrib[i] = coa >> (32 - LPOOLSIZE);
+//      printf ("Random add:\t%ld\n" , addrib[i]);
+   }        
+   
+//   printf("norm for orig. Gauss: %e, chi^2 scale: %e\n", nomsqr, ScaleGauss);
+//   NewWa();     
+}
+
+/* original FastNorm3.c code */
+#ifdef FASTNORM_ORIG
+float NewWa ()
+{
+   int i, j, k, m;
+   float p, q, r, s, t;
+   int topv[6], ord[4], *top;
+   float *ppt[4], *ptn;
+
+   float nulval, endval;
+   float totsqr, nomsqr;  
+   nulval = ScaleGauss * pool1[0];
+   endval = pool1[n-1]; 
+
+/*      Choose 4 random start points in the wk1[] vector
+	I want them all different.  */
+
+	top = topv + 1;
+/*	Set limiting values in top[-1], top[4]  */
+	top[-1] = VL;  top[4] = 0;
+reran1:
+	m = CombLCGTausInt();   /* positive 32-bit random */
+/*	Extract two VE-sized randoms from m, which has 31 useable digits */
+	m  = m >> (31 - 2*VE);
+	top[0] = m & VM;  m = m >> VE;  top[1] = m & VM;
+	m = CombLCGTausInt();   /* positive 32-bit random */
+/*	Extract two VE-sized randoms from m, which has 31 useable digits */
+	m  = m >> (31 - 2*VE);
+	top[2] = m & VM;  m = m >> VE;  top[3] = m & VM;
+	for (i = 0; i < 4; i++) ord[i] = i;
+/*	Sort in decreasing size   */
+	for (i = 2; i >= 0; i--)  {
+		for (j = 0; j <= i; j++)	{
+			if (top[j] < top[j+1])	{
+				k = top[j];  top[j] = top[j+1];
+				top[j+1] = k;
+				k = ord[j];  ord[j] = ord[j+1];
+				ord[j+1] = k;
+				}
+			}
+		}
+/*	Ensure all different  */
+	for (i = 0; i < 3; i++) { if (top[i] == top[i+1]) goto reran1; }
+
+/*	Set pt pointers to their start values for the first chunk.  */
+	for (i = 0; i < 4; i++)	{
+		j = ord[i];
+		ppt[j] = pool2 + j * VL + top[i];
+		}
+
+/*	Set ptn to point into wk1  */
+	ptn = pool1;
+
+/*	Now ready to do five chunks. The length of chunk i is
+		top[i-1] - top[i]  (I hope)
+	At the end of chunk i, pointer ord[i] should have reached the end
+	of its part, and need to be wrapped down to the start of its part.
+		*/
+	i = 0;
+
+chunk:
+	j = top[i] - top[i-1];   /* Minus the chunk length */
+	for (;  j < 0;  j++)	{
+		p = *ptn++;  s = *ptn++;  q = *ptn++;  r = *ptn++;
+		t = (p + q + r + s) * 0.5;
+		*ppt[0]++ = t - p;
+		*ppt[1]++ = t - q;
+		*ppt[2]++ = r - t;
+		*ppt[3]++ = s - t;
+		}
+/*	This should end the chunk.  See if all done  */
+	if (i == 4) goto passdone;
+
+/*	The pointer for part ord[i] should have passed its end  */
+	j = ord[i];
+#ifdef dddd
+printf ("Chunk %1d done. Ptr %1d now %4d\n", i, j, ppt[j]-pool2);
+#endif
+	ppt[j] -= VL;
+	i++;
+	goto chunk;
+
+passdone:
+/*	wk1[] values have been transformed and placed in wk2[]
+	Transform from wk2 to wk1 with a simple shuffle  */
+	m = (CombLCGTausInt2() >> (29 - VE)) & WM;
+	j = 0;
+	for (i = 0; i < 4; i++) ppt[i] = pool1 + i * VL;
+	for (i = 0; i < VL; i++)  {
+		p = pool2[j^m];  j++;
+		s = pool2[j^m];  j++;
+		q = pool2[j^m];  j++;
+		r = pool2[j^m];  j++;
+		t = (p + q + r + s) * 0.5;
+		*ppt[0]++ = t - p;
+		*ppt[1]++ = q - t;
+		*ppt[2]++ = t - r;
+		*ppt[3]++ = s - t;
+		}
+
+   /* renormalize again if number of pools beyond limit */
+   if (!(newpools & 0xFFFF)) {
+      totsqr = 0.0;
+      for (i = 0; i < n; i++)
+         totsqr += pool1[i] * pool1[i];
+      nomsqr = sqrt(n / totsqr);
+      for (i = 0; i < n; i++)
+         pool1[i] *= nomsqr;      
+   }
+
+   outgauss = pool1;   
+   /* reset data counter */
+   variate_used = n - 1;
+   
+   /* set counter counting nomber of pools made */
+   newpools++;
+   
+   /* new scale factor using ch^2 correction,
+   using pool1[n-1] from last pool */
+   ScaleGauss = chi1 + chi2 * ScaleGauss * endval; 
+
+//   printf("Pool number: %d, chi^2 scale: %e\n", newpools, ScaleGauss);
+      
+   return nulval; /* use old scale */
+
+}
+
+#else
+
+/* Simplified code according to an algorithm published by C. S. Wallace:
+   "Fast Pseudorandom Generators for Normal and Exponential Variates",
+   ACM Transactions on Mathmatical Software, Vol. 22, No. 1, March 1996, pp. 119-127.
+   Transform pool1 to pool2 and back to pool1 NOTRANS times
+   by orthogonal 4 x 4 Hadamard-Matrix.
+   Mixing of values is very important: Any value in the pool should contribute to 
+   every value in the new pools, at least after several passes (number of passes
+   is set by NOTRANS to 2 or 3).
+   4 values are read in a continuous sequence from the total of POOLSIZE values.
+   Values are stored in steps modulo POOLSIZE/4.
+   During backward transformation the values are shuffled by a random number jj.
+*/
+   
+double NewWa(void)
+{
+   double nulval, endval;
+   double bl1, bl2, bl3, bl4; /* the four values to be transformed */
+   double bsum;
+   double totsqr, nomsqr;   
+   unsigned int i, j, jj, m, mm, mmm;
+   
+   nulval = ScaleGauss * pool1[0];
+   endval = pool1[n-1]; 
+   m = n >> 2;
+//   printf("New pool after next value\n");
+   
+   /* generate new pool by transformation 
+      Transformation is repeated NOTRANS times */
+   for (i=0; i < NOTRANS; i++) {
+      mm = m << 1;
+      mmm = mm + m;      
+      /* forward transformation */
+//      for (j=0; j < n; j += 4) {
+      for (j=0; j < m; j++) {         
+         bl1 = pool1[j];
+         bl2 = pool1[j+m];
+         bl3 = pool1[j+mm];
+         bl4 = pool1[j+mmm];                 
+         /* Hadamard-Matrix */
+         bsum = (bl1 + bl2 + bl3 + bl4) * 0.5f;
+         jj = j<<2;
+         pool2[jj]   = bl1 - bsum;
+         pool2[jj+1] = bl2 - bsum;
+         pool2[jj+2] = bsum - bl3;
+         pool2[jj+3] = bsum - bl4;         
+      }
+      /* backward transformation */
+      jj = (CombLCGTausInt2() >> (31 - LPOOLSIZE)) & (n - 1);
+      for (j=0; j < m; j++) {
+         bl1 = pool2[j^jj];
+         bl2 = pool2[(j+m)^jj];
+         bl3 = pool2[(j+mm)^jj];
+         bl4 = pool2[(j+mmm)^jj];
+         /* Hadamard-Matrix */
+         bsum = (bl1 + bl2 + bl3 + bl4) * 0.5f;
+         jj = j<<2;
+         pool1[jj]   = bl1 - bsum;
+         pool1[jj+1] = bl2 - bsum;
+         pool1[jj+2] = bsum - bl3;
+         pool1[jj+3] = bsum - bl4;
+      }      
+   }
+                  
+   /* renormalize again if number of pools beyond limit */
+   if (!(newpools & 0xFFFF)) {
+      totsqr = 0.0;
+      for (i = 0; i < n; i++)
+         totsqr += pool1[i] * pool1[i];
+      nomsqr = sqrt(n / totsqr);
+      for (i = 0; i < n; i++)
+         pool1[i] *= nomsqr;      
+   }
+
+   outgauss = pool1;   
+   /* reset data counter */
+   variate_used = n - 1;
+   
+   /* set counter counting nomber of pools made */
+   newpools++;
+   
+   /* new scale factor using ch^2 correction,
+   using pool1[n-1] from previous pool */
+   ScaleGauss = chi1 + chi2 * ScaleGauss * endval; 
+
+//   printf("Pool number: %d, chi^2 scale: %e\n", newpools, ScaleGauss);
+      
+   return nulval; /* use old scale */
+//   return pool1[0]; /* use new scale */
+}
+
+#endif
+
+
+#ifdef FASTNORMTEST
+float NewWa_not(void)
+{
+   float nulval, endval;
+   float bl1, bl2, bl3, bl4; /* the four values to be transformed */
+   float bsum;
+   float totsqr, nomsqr;   
+   unsigned int i, j, jj;
+   nulval = ScaleGauss * pool1[0];
+   endval = pool1[n-1];
+    
+//   printf("New pool after next value\n");
+   
+   /* generate new pool by transformation 
+      Transformation is repeated NOTRANS times */
+   for (i=0; i < NOTRANS; i++) {
+      
+      /* forward transformation */
+      for (j=0; j < n; j += 4) {
+         jj = j + i;
+         bl1 = pool1[addrif[jj]];
+         bl2 = pool1[addrif[jj+1]];
+         bl3 = pool1[addrif[jj+2]];
+         bl4 = pool1[addrif[jj+3]];
+/*         s = (s*coa + cob) & (n - 1);
+         bl1 = pool1[s];
+         s = (s*coa + cob) & (n - 1);
+         bl2 = pool1[s + 1];         
+         s = (s*coa + cob) & (n - 1);
+         bl3 = pool1[s + 2];         
+         s = (s*coa + cob) & (n - 1);
+         bl4 = pool1[s + 3];   */       
+/*         jj = j + i;
+         bl1 = pool1[addrif[jj]];
+         bl2 = pool1[addrif[jj+1]];
+         bl3 = pool1[addrif[jj+2]];
+         bl4 = pool1[addrif[jj+3]]; */
+/*         bl1 = pool1[j];
+         bl2 = pool1[j+1];
+         bl3 = pool1[j+2];
+         bl4 = pool1[j+3]; */         
+         /* Hadamard-Matrix */
+         bsum = (bl1 + bl2 + bl3 + bl4) * 0.5;
+/*         pool2[j] = bl1 - bsum;
+         pool2[j+1] = bl2 - bsum;
+         pool2[j+2] = bsum - bl3;
+         pool2[j+3] = bsum - bl4; */
+         pool2[addrib[jj]] = bl1 - bsum;
+         pool2[addrib[jj+1]] = bl2 - bsum;
+         pool2[addrib[jj+2]] = bsum - bl3;
+         pool2[addrib[jj+3]] = bsum - bl4;       
+      }
+      /* backward transformation */
+      for (j=0; j < n; j += 4) {
+         bl1 = pool2[j];
+         bl2 = pool2[j+1];
+         bl3 = pool2[j+2];
+         bl4 = pool2[j+3];
+/*         bl1 = pool2[addrib[j]];
+         bl2 = pool2[addrib[j+1]];
+         bl3 = pool2[addrib[j+2]];
+         bl4 = pool2[addrib[j+3]]; */
+         /* Hadamard-Matrix */
+         bsum = (bl1 + bl2 + bl3 + bl4) * 0.5;
+         pool1[j] = bl1 - bsum;
+         pool1[j+1] = bl2 - bsum;
+         pool1[j+2] = bsum - bl3;
+         pool1[j+3] = bsum - bl4;
+      }      
+   }
+
+                  
+   /* renormalize again if number of pools beyond limit */
+   if (!(newpools & 0xFFFF)) {
+      totsqr = 0.0;
+      for (i = 0; i < n; i++)
+         totsqr += pool1[i] * pool1[i];
+      nomsqr = sqrt(n / totsqr);
+      for (i = 0; i < n; i++)
+         pool1[i] *= nomsqr;      
+   }
+
+   outgauss = pool1;   
+   /* reset data counter */
+   variate_used = n - 1;
+   
+   /* set counter counting nomber of pools made */
+   newpools++;
+   
+   /* new scale factor using ch^2 correction,
+   using pool1[n-1] from last pool */
+   ScaleGauss = chi1 + chi2 * ScaleGauss * endval; 
+
+//   printf("Pool number: %d, chi^2 scale: %e\n", newpools, ScaleGauss);
+      
+   return nulval; /* use old scale */
+//   return pool1[0]; /* use new scale */
+}
+#endif
+
+/*	---------------------  (test) main  -------------------------  */
+/* gcc -Wall -g  -DHasMain -I../../include  wallace.c CombTaus.o -o watest.exe */
+#ifdef HasMain
+#include "wallace.h"
+
+struct timeb timenow;	
+struct timeb timebegin;	
+int sec, msec;
+   
+void timediff(struct timeb *now, struct timeb *begin, int *sec, int *msec)
+{
+
+    *msec = now->millitm - begin->millitm;
+    *sec = now->time - begin->time;
+    if (*msec < 0) {
+      *msec += 1000;
+      (*sec)--;
+    }
+    return;
+
+}
+
+
+int main()
+{
+	float x;  
+	unsigned int i;
+	long int count;
+	
+	initw();
+   ftime(&timebegin);
+   count = 100000000;
+   for (i = 0; i < count; i++) {
+      x = GaussWa;
+//      printf("%d\t%f\n", i, x);
+   }
+   ftime(&timenow);
+   timediff(&timenow, &timebegin, &sec, &msec);
+   printf("WallaceHV: %ld normal variates: %f s\n", count, sec + (float) msec / 1000.0); 	
+
+	initnorm(0, 0);
+	initnorm(77, 3);
+   ftime(&timebegin);
+   count = 100000000;
+   for (i = 0; i < count; i++) {
+      x = FastNorm;
+//      printf("%d\t%f\n", i, x);
+   }
+   ftime(&timenow);
+   timediff(&timenow, &timebegin, &sec, &msec);
+   printf("FastNorm3: %ld normal variates: %f s\n", count, sec + (float) msec / 1000.0); 	
+         	
+	return (1);
+}
+#endif
--- a/src/include/1-f-code.h
+++ b/src/include/1-f-code.h
@ -0,0 +1,7 @@
+
+
+
+void f_alpha(int n_pts, int n_exp, float X[], float Q_d,
+float alpha);
+
+void rvfft(float X[], unsigned long int n);
--- a/src/include/FastNorm3.h
+++ b/src/include/FastNorm3.h
@ -0,0 +1,32 @@
+/*	Last revised 28-1-1999	*/
+/*	This is the header file FastNorm3.h to be included in code files
+	using FastNorm3.c	*/
+/*	                      I M P O R T A N T   ! ! ! ! !
+
+	The definition below should be altered to ensure that integer
+arithmetic is done on 32-bit words. It may need to be changed from int to
+long on some platforms.  The 32-bit requirement arises from the use of
+a Uniform pseudo-random generator in part of the code, which assumes 32-bit
+twos-complement arithmetic.  In dire need, replace this generator with
+another more suitable for the platform. The rest of the code assumes only
+that signed integers up to a bit less than 2^31 can be handled.
+	*/
+
+#define Sw int		/* MUST define Sw as a 32-bit integer or longer */
+#define Sf double
+
+extern int gaussfaze;
+extern int gaussmask;
+extern double *gausssave;
+extern double GScale;
+
+#define FastNorm ((--gaussfaze)?GScale*gausssave[gaussfaze]:fastnorm())
+
+void initnorm(Sw seed, Sw quoll);
+Sf fastnorm (void);
+Sf c7rand(Sw*);
+Sw irandm(Sw*);
+unsigned Sw urandm(Sw*);
+double adchi (double a, int *is);
+double rgamma (double g, int *is);
+Sf renormalize(void);
--- a/src/include/bool.h
+++ b/src/include/bool.h
@ -1,7 +1,9 @@
 #ifndef _BOOL_H
 #define _BOOL_H

-typedef unsigned char bool;
+//typedef unsigned char bool;
+typedef int bool;
+
 typedef int BOOL ;

 #define BOOLEAN int
--- a/src/include/fftext.h
+++ b/src/include/fftext.h
@ -0,0 +1,108 @@
+/*******************************************************************
+	This file extends the fftlib with calls to maintain the cosine and bit reversed tables
+	for you (including mallocs and free's).  Call the init routine for each fft size you will
+	be using.  Then you can call the fft routines below which will make the fftlib library 
+	call with the appropriate tables passed.  When you are done with all fft's you can call 
+	fftfree to release the storage for the tables.  Note that you can call fftinit repeatedly
+	with the same size, the extra calls will be ignored. So, you could make a macro to call
+	fftInit every time you call ffts. For example you could have someting like:
+	#define FFT(a,n) if(!fftInit(roundtol(LOG2(n)))) ffts(a,roundtol(LOG2(n)),1);else printf("fft error\n");
+*******************************************************************/
+
+int fftInit(long M);
+// malloc and init cosine and bit reversed tables for a given size fft, ifft, rfft, rifft
+/* INPUTS */
+/* M = log2 of fft size	(ex M=10 for 1024 point fft) */
+/* OUTPUTS */
+/* private cosine and bit reversed tables	*/
+
+void fftFree();
+// release storage for all private cosine and bit reversed tables
+
+void ffts(float *data, long M, long Rows);
+/* Compute in-place complex fft on the rows of the input array	*/
+/* INPUTS */
+/* *ioptr = input data array	*/
+/* M = log2 of fft size	(ex M=10 for 1024 point fft) */
+/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft)	*/
+/* OUTPUTS */
+/* *ioptr = output data array	*/
+
+void iffts(float *data, long M, long Rows);
+/* Compute in-place inverse complex fft on the rows of the input array	*/
+/* INPUTS */
+/* *ioptr = input data array	*/
+/* M = log2 of fft size	(ex M=10 for 1024 point fft) */
+/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft)	*/
+/* OUTPUTS */
+/* *ioptr = output data array	*/
+
+void rffts(float *data, long M, long Rows);
+/* Compute in-place real fft on the rows of the input array	*/
+/* The result is the complex spectra of the positive frequencies */
+/* except the location for the first complex number contains the real */
+/* values for DC and Nyquest */
+/* See rspectprod for multiplying two of these spectra together- ex. for fast convolution */
+/* INPUTS */
+/* *ioptr = real input data array	*/
+/* M = log2 of fft size	(ex M=10 for 1024 point fft) */
+/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft)	*/
+/* OUTPUTS */
+/* *ioptr = output data array	in the following order */
+/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
+
+void riffts(float *data, long M, long Rows);
+/* Compute in-place real ifft on the rows of the input array	*/
+/* data order as from rffts */
+/* INPUTS */
+/* *ioptr = input data array in the following order	*/
+/* M = log2 of fft size	(ex M=10 for 1024 point fft) */
+/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
+/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft)	*/
+/* OUTPUTS */
+/* *ioptr = real output data array	*/
+
+void rspectprod(float *data1, float *data2, float *outdata, long N);
+// When multiplying a pair of spectra from rfft care must be taken to multiply the
+// two real values seperately from the complex ones. This routine does it correctly.
+// the result can be stored in-place over one of the inputs
+/* INPUTS */
+/* *data1 = input data array	first spectra */
+/* *data2 = input data array	second spectra */
+/* N = fft input size for both data1 and data2 */
+/* OUTPUTS */
+/* *outdata = output data array spectra */
+
+
+/* The following is FYI
+
+
+Note that most of the fft routines require full matrices, ie Rsiz==Ncols
+This is how I like to define a real matrix:
+struct matrix {		// real matrix
+	float *d; 		// pointer to data
+	long Nrows;		// number of rows in the matrix
+	long Ncols;		// number of columns in the matrix (can be less than Rsiz)
+	long Rsiz;		// number of floats from one row to the next
+};
+typedef struct matrix matrix;
+
+
+
+ CACHEFILLMALLOC and CEILCACHELINE can be used instead of malloc to make
+ arrays that start exactly on a cache line start.
+ First we CACHEFILLMALLOC a void * (use this void * when free'ing),
+ then we set our array pointer equal to the properly cast CEILCACHELINE of this void *
+ example:
+ aInit = CACHEFILLMALLOC( NUMFLOATS*sizeof(float) );
+ a = (float *) CEILCACHELINE(ainit);
+ ... main body of code ...
+ free(aInit);
+
+ To disable this alignment, set CACHELINESIZE to 1
+#define CACHELINESIZE 32				// Bytes per cache line
+#define CACHELINEFILL (CACHELINESIZE-1)
+#define CEILCACHELINE(p) ((((unsigned long)p+CACHELINEFILL)/CACHELINESIZE)*CACHELINESIZE)
+#define CACHEFILLMALLOC(n) malloc((n)+CACHELINEFILL)
+*/
+
--- a/src/include/ngspice.h
+++ b/src/include/ngspice.h
@ -174,15 +174,10 @@
 #define inline _inline
 #endif

-/*
-#ifndef HAVE_RANDOM
-#define srandom(a) srand(a)
-#define random rand
-#define RR_MAX RAND_MAX
-#else
-#define RR_MAX LONG_MAX
-#endif
-*/
+
+/* Fast random number generator */
+//#define FastRand
+#define WaGauss
 #define RR_MAX RAND_MAX

 #ifdef HAVE_INDEX
--- a/src/include/wallace.h
+++ b/src/include/wallace.h
@ -0,0 +1,22 @@
+/* Wallace generator for normally distributed random variates
+   Copyright Holger Vogt, 2008
+   
+   Calling sequence:
+   initw(void); initialize using srand(seed)
+   double x = GaussWa;  returns normally distributed random variate
+   
+*/   
+
+
+
+extern double *outgauss; /*output vector for user access */
+extern unsigned int variate_used; /* actual index of variate called by user */
+extern double ScaleGauss; /* scale factor, including chi square correction */
+
+double NewWa(void); /* generate new pool, return outgauss[0] */
+
+#define GaussWa ((--variate_used)?(outgauss[variate_used]*ScaleGauss):NewWa())
+
+void initw(void); /* initialization of Wallace generator */
+
+void PolarGauss(double* py1, double* py2);
--- a/src/main.c
+++ b/src/main.c
@ -208,6 +208,8 @@ extern int OUTbeginDomain(void *,IFuid,int,IFvalue *);
 extern int OUTendDomain(void *), OUTstopnow(void), OUTerror(int,char *,IFuid *); 
 extern int OUTattributes(void *,IFuid,int,IFvalue *);

+extern void initw(void);
+
 IFfrontEnd nutmeginfo = {
    IFnewUid,
    IFdelUid,
@ -757,8 +759,9 @@ xmain(int argc, char **argv)
 main(int argc, char **argv)
 #endif /* HAS_WINDOWS */
 {
-    int c;
-    int   err;
+    int c, err;
+    unsigned int rseed;
+    time_t acttime;
    bool  gotone = FALSE;
    char* copystring;
    bool  addctrlsect = TRUE; /* PN: for autorun */
@ -1106,6 +1109,24 @@ bot:
    err = 0;

 #ifdef SIMULATOR
+#ifdef FastRand
+// initialization and seed for FastNorm Gaussian random generator
+    initnorm (0, 0);
+    rseed = 66;
+    if (!cp_getvar("rndseed", CP_NUM, (char *) &rseed)) {
+        acttime = time(NULL);
+        rseed = (int)acttime;
+    }
+    initnorm (rseed, 2);
+    fprintf (cp_out, "SoS %f, seed value: %ld\n", renormalize(), rseed);
+#elif defined (WaGauss)
+    if (!cp_getvar("rndseed", CP_NUM, (char *) &rseed)) {
+        acttime = time(NULL);
+        rseed = (int)acttime;
+    }
+    srand(rseed);
+    initw();
+#endif
    if (!ft_servermode && !ft_nutmeg) {
    /* Concatenate all non-option arguments into a temporary file
       and load that file into the spice core.
--- a/src/maths/Makefile.am
+++ b/src/maths/Makefile.am
@ -1,6 +1,6 @@
 ## Process this file with automake

-SUBDIRS = cmaths ni sparse poly deriv misc
-DIST_SUBDIRS = cmaths ni sparse poly deriv misc
+SUBDIRS = cmaths ni sparse poly deriv misc fft
+DIST_SUBDIRS = cmaths ni sparse poly deriv misc fft

 MAINTAINERCLEANFILES = Makefile.in
--- a/src/maths/fft/Makefile.am
+++ b/src/maths/fft/Makefile.am
@ -0,0 +1,13 @@
+## Process this file with automake to produce Makefile.in
+
+noinst_LTLIBRARIES = libmathfft.la
+
+libmathfft_la_SOURCES =		\
+		fftext.c	\
+		fftlib.c	\
+		matlib.c	
+
+
+
+AM_CPPFLAGS = -I$(top_srcdir)/src/include
+MAINTAINERCLEANFILES = Makefile.in
--- a/src/maths/fft/NOTE
+++ b/src/maths/fft/NOTE
@ -0,0 +1,37 @@
+Subject:     FFT for RISC 2.0
+To:          macgifts@sumex-aim.stanford.edu
+Enclosure:   FFTs-for-RISC-2.sit
+
+Enclosed is a stuffit archive of version 2.0 of my 'C' source code fft library.
+
+   Very-Fast Fourier Transform routines.  Routines are provided for real and complex
+forward and inverse 1d and 2d fourier transforms and 3d complex forward and inverse ffts.
+I coded these to optimize execution speed on Risc processors like the PowerPC.
+All fft sizes must still be a power of two.
+Test programs based on the Numerical Recipes in C routines are provided.
+Also included are some simple applications with source code which time the FFTs.
+See the enclosed read me file for more information.
+
+Revision version 2.0:
+		Rewrote code to rely more on compiler optimization (and be less ugly.)
+		Removed restrictions on too small or too large ffts.
+		Provided a library extension that manages memory for cosine and bit
+reversed counter tables.
+		Added 2d and 3d complex and 2d real ffts.
+		Speeded routines for data too large to fit in primary cache.
+		Changed most testing from Matlab to Numerical Recipes based (because its cheaper.)
+		Changed call parameters (watch out.)
+Revision version 1.21:
+       line 126 of rfftTest.c corrected.
+Revisions version 1.2:
+       I now store the Nyquest point of the real transform where the 0 for the DC term's
+imaginary part used to be.  !! WATCH OUT FOR THIS IF YOU USE rfft !!
+       Added the real inverse Fourier transform.
+
+Revisions version 1.1:
+       Re-arranged to put fft routines in a shared library and changed source file name to fftlib.c.
+       Removed some ugly optimizations that are no longer needed for CodeWarrier.
+
+This code is public domain, do anything you want to with it.
+
+[Moderators- This file should replace ffts-for-risc-121-c.hqx and can be included on any CD]
--- a/src/maths/fft/Read
+++ b/src/maths/fft/Read
@ -0,0 +1,70 @@
+This directory contains a public domain FFT library which was optimized
+for speed on RISC processors such as the PowerPC.  All ffts
+use single precision floats, for double precision just use a
+global search and replace to change float to double in all 
+source files.
+Codewarrier Pro 1.0 project files are also supplied.
+
+** Warning **   Perform rigorous testing to
+your own standards before using this code.
+
+ (John Green) green_jt@vsdec.npt.nuwc.navy.mil
+
+files:
+	fftTiming
+Application to time complex ffts
+
+	rfftTiming
+Application to time real ffts
+
+// Directory: fft libraries
+
+files:
+
+	fftext.c
+Library of in-place fast fourier transforms. Contains forward 
+and inverse complex and real transforms.  The real fft's expect the
+frequency domain data to have the real part of the fsamp/2 bin (which
+has a 0 imaginary part) to be stored in the location for the imaginary
+part of the DC bin (the DC bin of real data is also strictly real.)
+You must first call an initialization routine fftInit  before calling 
+the fft computation routines ffts, iffts, rffts and riffts.
+The init routines malloc the memory to store the cosine and
+bit reversed counter tables as well as initializing their values.
+
+	fftlib.c
+Lower level library of in-place fast fourier transforms. Same as fftext.c but you
+need to manage the mallocs for the cosine and bit reversed tables yourself.
+
+
+	fft2d.c
+Library of 2d and 3d complex and 2d real in-place fast fourier transforms.
+The init routine fft2dInit must be called before using the 2d routines and
+fft3dInit must be called before using the 3d routines.  These init routines
+will also call the appropriate 1d init routines in fftext.c
+
+	matlib.c
+Matrix transpose routines used by fft2d.c and complex vector multiply
+for forming the product of two spectra.
+
+	dxpose.c
+Double precision matrix transpose for quick single precision complex transposing
+
+// Directory: timing code
+This directory contains the source to fftTiming and rfftTiming
+
+// Directory: Numerical Recipes testing
+This directory contains files used to test the various fft routines using
+the Numerical Recipes in C routines as a baseline.  These routines can be purchased
+in PeeCee (after expanding you can move them to a Mac) format from:
+http://cfata2.harvard.edu/numerical-recipes/
+Unfortunately Numerical Recipes defines its forward and inverse fft's backwards.
+For complex fft's I just use their inverse fft as a forward one, but for real ffts
+their forward fft followed by my inverse fft reverses the data.  They also have ugly matrix
+and tensor data types and start their indices with one, Fortran style, but these are
+minor annoyances.
+
+// Directory: Matlab testing
+This directory contains files to test fast 1d and 2d convolution with Matlab used to
+verify the results.  An example of using Matlab to test the fft library routines is
+also given for the 2d real fft.
--- a/src/maths/fft/fftext.c
+++ b/src/maths/fft/fftext.c
@ -0,0 +1,156 @@
+/*******************************************************************
+	This file extends the fftlib with calls to maintain the cosine and bit reversed tables
+	for you (including mallocs and free's).  Call the init routine for each fft size you will
+	be using.  Then you can call the fft routines below which will make the fftlib library 
+	call with the appropriate tables passed.  When you are done with all fft's you can call 
+	fftfree to release the storage for the tables.  Note that you can call fftinit repeatedly
+	with the same size, the extra calls will be ignored. So, you could make a macro to call
+	fftInit every time you call ffts. For example you could have someting like:
+	#define FFT(a,n) if(!fftInit(roundtol(LOG2(n)))) ffts(a,roundtol(LOG2(n)),1);else printf("fft error\n");
+*******************************************************************/
+#include <stdlib.h>
+#include "fftlib.h"
+#include "matlib.h"
+#include "fftext.h"
+
+// pointers to storage of Utbl's and  BRLow's
+static float *UtblArray[8*sizeof(long)] = {0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+									0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0};
+static short *BRLowArray[8*sizeof(long)/2]  = {0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0};
+
+int fftInit(long M){
+// malloc and init cosine and bit reversed tables for a given size fft, ifft, rfft, rifft
+/* INPUTS */
+/* M = log2 of fft size	(ex M=10 for 1024 point fft) */
+/* OUTPUTS */
+/* private cosine and bit reversed tables	*/
+
+int theError = 1;
+/*** I did NOT test cases with M>27 ***/
+if ((M >= 0) && (M < 8*sizeof(long))){
+	theError = 0;
+	if (UtblArray[M] == 0){	// have we not inited this size fft yet?
+		// init cos table
+		UtblArray[M] = (float *) malloc( (POW2(M)/4+1)*sizeof(float) );
+		if (UtblArray[M] == 0)
+			theError = 2;
+		else{
+			fftCosInit(M, UtblArray[M]);
+		}
+		if (M > 1){
+			if (BRLowArray[M/2] == 0){	// init bit reversed table for cmplx fft
+				BRLowArray[M/2] = (short *) malloc( POW2(M/2-1)*sizeof(short) );
+				if (BRLowArray[M/2] == 0)
+					theError = 2;
+				else{
+					fftBRInit(M, BRLowArray[M/2]);
+				}
+			}
+		}
+		if (M > 2){
+			if (BRLowArray[(M-1)/2] == 0){	// init bit reversed table for real fft
+				BRLowArray[(M-1)/2] = (short *) malloc( POW2((M-1)/2-1)*sizeof(short) );
+				if (BRLowArray[(M-1)/2] == 0)
+					theError = 2;
+				else{
+					fftBRInit(M-1, BRLowArray[(M-1)/2]);
+				}
+			}
+		}
+	}
+};
+return theError;
+}
+
+void fftFree(void){
+// release storage for all private cosine and bit reversed tables
+long i1;
+for (i1=8*sizeof(long)/2-1; i1>=0; i1--){
+	if (BRLowArray[i1] != 0){
+		free(BRLowArray[i1]);
+		BRLowArray[i1] = 0;
+	};
+};
+for (i1=8*sizeof(long)-1; i1>=0; i1--){
+	if (UtblArray[i1] != 0){
+		free(UtblArray[i1]);
+		UtblArray[i1] = 0;
+	};
+};
+}
+
+/*************************************************
+ The following calls are easier than calling to fftlib directly.
+ Just make sure fftlib has been called for each M first.
+**************************************************/
+
+void ffts(float *data, long M, long Rows){
+/* Compute in-place complex fft on the rows of the input array	*/
+/* INPUTS */
+/* *ioptr = input data array	*/
+/* M = log2 of fft size	(ex M=10 for 1024 point fft) */
+/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft)	*/
+/* OUTPUTS */
+/* *ioptr = output data array	*/
+	ffts1(data, M, Rows, UtblArray[M], BRLowArray[M/2]);
+}
+
+void iffts(float *data, long M, long Rows){
+/* Compute in-place inverse complex fft on the rows of the input array	*/
+/* INPUTS */
+/* *ioptr = input data array	*/
+/* M = log2 of fft size	(ex M=10 for 1024 point fft) */
+/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft)	*/
+/* OUTPUTS */
+/* *ioptr = output data array	*/
+	iffts1(data, M, Rows, UtblArray[M], BRLowArray[M/2]);
+}
+
+void rffts(float *data, long M, long Rows){
+/* Compute in-place real fft on the rows of the input array	*/
+/* The result is the complex spectra of the positive frequencies */
+/* except the location for the first complex number contains the real */
+/* values for DC and Nyquest */
+/* See rspectprod for multiplying two of these spectra together- ex. for fast convolution */
+/* INPUTS */
+/* *ioptr = real input data array	*/
+/* M = log2 of fft size	(ex M=10 for 1024 point fft) */
+/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft)	*/
+/* OUTPUTS */
+/* *ioptr = output data array	in the following order */
+/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
+	rffts1(data, M, Rows, UtblArray[M], BRLowArray[(M-1)/2]);
+}
+
+void riffts(float *data, long M, long Rows){
+/* Compute in-place real ifft on the rows of the input array	*/
+/* data order as from rffts */
+/* INPUTS */
+/* *ioptr = input data array in the following order	*/
+/* M = log2 of fft size	(ex M=10 for 1024 point fft) */
+/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
+/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft)	*/
+/* OUTPUTS */
+/* *ioptr = real output data array	*/
+	riffts1(data, M, Rows, UtblArray[M], BRLowArray[(M-1)/2]);
+}
+
+void rspectprod(float *data1, float *data2, float *outdata, long N){
+// When multiplying a pair of spectra from rfft care must be taken to multiply the
+// two real values seperately from the complex ones. This routine does it correctly.
+// the result can be stored in-place over one of the inputs
+/* INPUTS */
+/* *data1 = input data array	first spectra */
+/* *data2 = input data array	second spectra */
+/* N = fft input size for both data1 and data2 */
+/* OUTPUTS */
+/* *outdata = output data array spectra */
+if(N>1){
+	outdata[0] = data1[0] * data2[0];	// multiply the zero freq values
+	outdata[1] = data1[1] * data2[1];	// multiply the nyquest freq values
+	cvprod(data1 + 2, data2 + 2, outdata + 2, N/2-1);	// multiply the other positive freq values
+}
+else{
+	outdata[0] = data1[0] * data2[0];
+}
+}
--- a/src/maths/fft/fftext.h
+++ b/src/maths/fft/fftext.h
@ -0,0 +1,106 @@
+/*******************************************************************
+	This file extends the fftlib with calls to maintain the cosine and bit reversed tables
+	for you (including mallocs and free's).  Call the init routine for each fft size you will
+	be using.  Then you can call the fft routines below which will make the fftlib library 
+	call with the appropriate tables passed.  When you are done with all fft's you can call 
+	fftfree to release the storage for the tables.  Note that you can call fftinit repeatedly
+	with the same size, the extra calls will be ignored. So, you could make a macro to call
+	fftInit every time you call ffts. For example you could have someting like:
+	#define FFT(a,n) if(!fftInit(roundtol(LOG2(n)))) ffts(a,roundtol(LOG2(n)),1);else printf("fft error\n");
+*******************************************************************/
+
+int fftInit(long M);
+// malloc and init cosine and bit reversed tables for a given size fft, ifft, rfft, rifft
+/* INPUTS */
+/* M = log2 of fft size	(ex M=10 for 1024 point fft) */
+/* OUTPUTS */
+/* private cosine and bit reversed tables	*/
+
+void fftFree(void);
+// release storage for all private cosine and bit reversed tables
+
+void ffts(float *data, long M, long Rows);
+/* Compute in-place complex fft on the rows of the input array	*/
+/* INPUTS */
+/* *ioptr = input data array	*/
+/* M = log2 of fft size	(ex M=10 for 1024 point fft) */
+/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft)	*/
+/* OUTPUTS */
+/* *ioptr = output data array	*/
+
+void iffts(float *data, long M, long Rows);
+/* Compute in-place inverse complex fft on the rows of the input array	*/
+/* INPUTS */
+/* *ioptr = input data array	*/
+/* M = log2 of fft size	(ex M=10 for 1024 point fft) */
+/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft)	*/
+/* OUTPUTS */
+/* *ioptr = output data array	*/
+
+void rffts(float *data, long M, long Rows);
+/* Compute in-place real fft on the rows of the input array	*/
+/* The result is the complex spectra of the positive frequencies */
+/* except the location for the first complex number contains the real */
+/* values for DC and Nyquest */
+/* See rspectprod for multiplying two of these spectra together- ex. for fast convolution */
+/* INPUTS */
+/* *ioptr = real input data array	*/
+/* M = log2 of fft size	(ex M=10 for 1024 point fft) */
+/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft)	*/
+/* OUTPUTS */
+/* *ioptr = output data array	in the following order */
+/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
+
+void riffts(float *data, long M, long Rows);
+/* Compute in-place real ifft on the rows of the input array	*/
+/* data order as from rffts */
+/* INPUTS */
+/* *ioptr = input data array in the following order	*/
+/* M = log2 of fft size	(ex M=10 for 1024 point fft) */
+/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
+/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft)	*/
+/* OUTPUTS */
+/* *ioptr = real output data array	*/
+
+void rspectprod(float *data1, float *data2, float *outdata, long N);
+// When multiplying a pair of spectra from rfft care must be taken to multiply the
+// two real values seperately from the complex ones. This routine does it correctly.
+// the result can be stored in-place over one of the inputs
+/* INPUTS */
+/* *data1 = input data array	first spectra */
+/* *data2 = input data array	second spectra */
+/* N = fft input size for both data1 and data2 */
+/* OUTPUTS */
+/* *outdata = output data array spectra */
+
+
+// The following is FYI
+
+
+//Note that most of the fft routines require full matrices, ie Rsiz==Ncols
+//This is how I like to define a real matrix:
+//struct matrix {		// real matrix
+//	float *d; 		// pointer to data
+//	long Nrows;		// number of rows in the matrix
+//	long Ncols;		// number of columns in the matrix (can be less than Rsiz)
+//	long Rsiz;		// number of floats from one row to the next
+//};
+//typedef struct matrix matrix;
+
+
+
+// CACHEFILLMALLOC and CEILCACHELINE can be used instead of malloc to make
+// arrays that start exactly on a cache line start.
+// First we CACHEFILLMALLOC a void * (use this void * when free'ing),
+// then we set our array pointer equal to the properly cast CEILCACHELINE of this void *
+// example:
+// aInit = CACHEFILLMALLOC( NUMFLOATS*sizeof(float) );
+// a = (float *) CEILCACHELINE(ainit);
+// ... main body of code ...
+// free(aInit);
+//
+// To disable this alignment, set CACHELINESIZE to 1
+//#define CACHELINESIZE 32				// Bytes per cache line
+//#define CACHELINEFILL (CACHELINESIZE-1)
+//#define CEILCACHELINE(p) ((((unsigned long)p+CACHELINEFILL)/CACHELINESIZE)*CACHELINESIZE)
+//#define CACHEFILLMALLOC(n) malloc((n)+CACHELINEFILL)
--- a/src/maths/fft/fftlib.c
+++ b/src/maths/fft/fftlib.c
--- a/src/maths/fft/fftlib.h
+++ b/src/maths/fft/fftlib.h
@ -0,0 +1,76 @@
+#define MYRECIPLN2	1.442695040888963407359924681001892137426	// 1.0/log(2)
+
+/* some useful conversions between a number and its power of 2 */
+#define LOG2(a)	(MYRECIPLN2*log(a))	// floating point logarithm base 2
+#define POW2(m) ((unsigned long) 1 << (m))	// integer power of 2 for m<32
+
+/*******************************************************************
+lower level fft stuff called by routines in fftext.c and fft2d.c
+*******************************************************************/
+
+void fftCosInit(long M, float *Utbl);
+/* Compute Utbl, the cosine table for ffts	*/
+/* of size (pow(2,M)/4 +1)	*/
+/* INPUTS */
+/* M = log2 of fft size	*/
+/* OUTPUTS */
+/* *Utbl = cosine table	*/
+
+void fftBRInit(long M, short *BRLow);
+/* Compute BRLow, the bit reversed table for ffts	*/
+/* of size pow(2,M/2 -1)	*/
+/* INPUTS */
+/* M = log2 of fft size	*/
+/* OUTPUTS */
+/* *BRLow = bit reversed counter table	*/
+
+void ffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow);
+/* Compute in-place complex fft on the rows of the input array	*/
+/* INPUTS */
+/* *ioptr = input data array	*/
+/* M = log2 of fft size	(ex M=10 for 1024 point fft) */
+/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array)	*/
+/* *Utbl = cosine table	*/
+/* *BRLow = bit reversed counter table	*/
+/* OUTPUTS */
+/* *ioptr = output data array	*/
+
+void iffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow);
+/* Compute in-place inverse complex fft on the rows of the input array	*/
+/* INPUTS */
+/* *ioptr = input data array	*/
+/* M = log2 of fft size	*/
+/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array)	*/
+/* *Utbl = cosine table	*/
+/* *BRLow = bit reversed counter table	*/
+/* OUTPUTS */
+/* *ioptr = output data array	*/
+
+void rffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow);
+/* Compute in-place real fft on the rows of the input array	*/
+/* The result is the complex spectra of the positive frequencies */
+/* except the location for the first complex number contains the real */
+/* values for DC and Nyquest */
+/* INPUTS */
+/* *ioptr = real input data array	*/
+/* M = log2 of fft size	*/
+/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array)	*/
+/* *Utbl = cosine table	*/
+/* *BRLow = bit reversed counter table	*/
+/* OUTPUTS */
+/* *ioptr = output data array	in the following order */
+/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
+
+
+void riffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow);
+/* Compute in-place real ifft on the rows of the input array	*/
+/* data order as from rffts1 */
+/* INPUTS */
+/* *ioptr = input data array in the following order	*/
+/* M = log2 of fft size	*/
+/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
+/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array)	*/
+/* *Utbl = cosine table	*/
+/* *BRLow = bit reversed counter table	*/
+/* OUTPUTS */
+/* *ioptr = real output data array	*/
--- a/src/maths/fft/matlib.c
+++ b/src/maths/fft/matlib.c
@ -0,0 +1,297 @@
+/* a few routines from a vector/matrix library */
+#include "matlib.h"
+
+void xpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, long Ncols){
+/* not in-place matrix transpose	*/
+/* INPUTS */
+/* *indata = input data array	*/
+/* iRsiz = offset to between rows of input data array	*/
+/* oRsiz = offset to between rows of output data array	*/
+/* Nrows = number of rows in input data array	*/
+/* Ncols = number of columns in input data array	*/
+/* OUTPUTS */
+/* *outdata = output data array	*/
+
+float	*irow; 		/* pointer to input row start */
+float	*ocol; 		/* pointer to output col start */
+float	*idata; 	/* pointer to input data */
+float	*odata; 	/* pointer to output data */
+long 	RowCnt;		/* row counter */
+long 	ColCnt;		/* col counter */
+float	T0; 		/* data storage */
+float	T1; 		/* data storage */
+float	T2; 		/* data storage */
+float	T3; 		/* data storage */
+float	T4; 		/* data storage */
+float	T5; 		/* data storage */
+float	T6; 		/* data storage */
+float	T7; 		/* data storage */
+const long inRsizd1 = iRsiz;
+const long inRsizd2 = 2*iRsiz;
+const long inRsizd3 = inRsizd2+iRsiz;
+const long inRsizd4 = 4*iRsiz;
+const long inRsizd5 = inRsizd3+inRsizd2;
+const long inRsizd6 = inRsizd4+inRsizd2;
+const long inRsizd7 = inRsizd4+inRsizd3;
+const long inRsizd8 = 8*iRsiz;
+
+ocol = outdata;
+irow = indata;
+for (RowCnt=Nrows/8; RowCnt>0; RowCnt--){
+	idata = irow;
+	odata = ocol;
+	for (ColCnt=Ncols; ColCnt>0; ColCnt--){
+		T0 = *idata;
+		T1 = *(idata+inRsizd1);
+		T2 = *(idata+inRsizd2);
+		T3 = *(idata+inRsizd3);
+		T4 = *(idata+inRsizd4);
+		T5 = *(idata+inRsizd5);
+		T6 = *(idata+inRsizd6);
+		T7 = *(idata+inRsizd7);
+		*odata = T0;
+		*(odata+1) = T1;
+		*(odata+2) = T2;
+		*(odata+3) = T3;
+		*(odata+4) = T4;
+		*(odata+5) = T5;
+		*(odata+6) = T6;
+		*(odata+7) = T7;
+		idata++;
+		odata += oRsiz;
+	}
+	irow += inRsizd8;
+	ocol += 8;
+}
+if (Nrows%8 != 0){
+	for (ColCnt=Ncols; ColCnt>0; ColCnt--){
+		idata = irow++;
+		odata = ocol;
+		ocol += oRsiz;
+		for (RowCnt=Nrows%8; RowCnt>0; RowCnt--){
+			T0 = *idata;
+			*odata++ = T0;
+			idata += iRsiz;
+		}
+	}
+}
+}
+
+void cxpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, long Ncols){
+/* not in-place complex float matrix transpose	*/
+/* INPUTS */
+/* *indata = input data array	*/
+/* iRsiz = offset to between rows of input data array	*/
+/* oRsiz = offset to between rows of output data array	*/
+/* Nrows = number of rows in input data array	*/
+/* Ncols = number of columns in input data array	*/
+/* OUTPUTS */
+/* *outdata = output data array	*/
+
+float	*irow; 		/* pointer to input row start */
+float	*ocol; 		/* pointer to output col start */
+float	*idata; 	/* pointer to input data */
+float	*odata; 	/* pointer to output data */
+long 	RowCnt;		/* row counter */
+long 	ColCnt;		/* col counter */
+float	T0r; 		/* data storage */
+float	T0i; 		/* data storage */
+float	T1r; 		/* data storage */
+float	T1i; 		/* data storage */
+float	T2r; 		/* data storage */
+float	T2i; 		/* data storage */
+float	T3r; 		/* data storage */
+float	T3i; 		/* data storage */
+const long inRsizd1 = 2*iRsiz;
+const long inRsizd1i = 2*iRsiz + 1;
+const long inRsizd2 = 4*iRsiz;
+const long inRsizd2i = 4*iRsiz + 1;
+const long inRsizd3 = inRsizd2+inRsizd1;
+const long inRsizd3i = inRsizd2+inRsizd1 + 1;
+const long inRsizd4 = 8*iRsiz;
+
+ocol = outdata;
+irow = indata;
+for (RowCnt=Nrows/4; RowCnt>0; RowCnt--){
+	idata = irow;
+	odata = ocol;
+	for (ColCnt=Ncols; ColCnt>0; ColCnt--){
+		T0r = *idata;
+		T0i = *(idata +1);
+		T1r = *(idata+inRsizd1);
+		T1i = *(idata+inRsizd1i);
+		T2r = *(idata+inRsizd2);
+		T2i = *(idata+inRsizd2i);
+		T3r = *(idata+inRsizd3);
+		T3i = *(idata+inRsizd3i);
+		*odata = T0r;
+		*(odata+1) = T0i;
+		*(odata+2) = T1r;
+		*(odata+3) = T1i;
+		*(odata+4) = T2r;
+		*(odata+5) = T2i;
+		*(odata+6) = T3r;
+		*(odata+7) = T3i;
+		idata+=2;
+		odata += 2*oRsiz;
+	}
+	irow += inRsizd4;
+	ocol += 8;
+}
+if (Nrows%4 != 0){
+	for (ColCnt=Ncols; ColCnt>0; ColCnt--){
+		idata = irow;
+		odata = ocol;
+		for (RowCnt=Nrows%4; RowCnt>0; RowCnt--){
+			T0r = *idata;
+			T0i = *(idata+1);
+			*odata = T0r;
+			*(odata+1) = T0i;
+			odata+=2;
+			idata += 2*iRsiz;
+		}
+		irow+=2;
+		ocol += 2*oRsiz;
+	}
+}
+}
+
+void cvprod(float *a, float *b, float *out, long N){
+/* complex vector product, can be in-place */
+/* product of complex vector *a times complex vector *b */
+/* INPUTS */
+/* N vector length */
+/* *a complex vector length N complex numbers */
+/* *b complex vector length N complex numbers */
+/* OUTPUTS */
+/* *out complex vector length N */
+
+long	OutCnt;		/* output counter */
+float	A0R; 		/* A storage */
+float	A0I; 		/* A storage */
+float	A1R; 		/* A storage */
+float	A1I; 		/* A storage */
+float	A2R; 		/* A storage */
+float	A2I; 		/* A storage */
+float	A3R; 		/* A storage */
+float	A3I; 		/* A storage */
+float	B0R; 		/* B storage */
+float	B0I; 		/* B storage */
+float	B1R; 		/* B storage */
+float	B1I; 		/* B storage */
+float	B2R; 		/* B storage */
+float	B2I; 		/* B storage */
+float	B3R; 		/* B storage */
+float	B3I; 		/* B storage */
+float	T0R; 		/* TMP storage */
+float	T0I; 		/* TMP storage */
+float	T1R; 		/* TMP storage */
+float	T1I; 		/* TMP storage */
+float	T2R; 		/* TMP storage */
+float	T2I; 		/* TMP storage */
+float	T3R; 		/* TMP storage */
+float	T3I; 		/* TMP storage */
+
+if (N>=4){
+	A0R = *a;
+	B0R = *b;
+	A0I = *(a +1);
+	B0I = *(b +1);
+	A1R = *(a +2);
+	B1R = *(b +2);
+	A1I = *(a +3);
+	B1I = *(b +3);
+	A2R = *(a +4);
+	B2R = *(b +4);
+	A2I = *(a +5);
+	B2I = *(b +5);
+	A3R = *(a +6);
+	B3R = *(b +6);
+	A3I = *(a +7);
+	B3I = *(b +7);
+	T0R = A0R * B0R;
+	T0I = (A0R * B0I);
+	T1R = A1R * B1R;
+	T1I = (A1R * B1I);
+	T2R = A2R * B2R;
+	T2I = (A2R * B2I);
+	T3R = A3R * B3R;
+	T3I = (A3R * B3I);
+	T0R -= (A0I * B0I);
+	T0I = A0I * B0R + T0I;
+	T1R -= (A1I * B1I);
+	T1I = A1I * B1R + T1I;
+	T2R -= (A2I * B2I);
+	T2I = A2I * B2R + T2I;
+	T3R -= (A3I * B3I);
+	T3I = A3I * B3R + T3I;
+	for (OutCnt=N/4-1; OutCnt > 0; OutCnt--){
+		a += 8;
+		b += 8;
+		A0R = *a;
+		B0R = *b;
+		A0I = *(a +1);
+		B0I = *(b +1);
+		A1R = *(a +2);
+		B1R = *(b +2);
+		A1I = *(a +3);
+		B1I = *(b +3);
+		A2R = *(a +4);
+		B2R = *(b +4);
+		A2I = *(a +5);
+		B2I = *(b +5);
+		A3R = *(a +6);
+		B3R = *(b +6);
+		A3I = *(a +7);
+		B3I = *(b +7);
+		*out = T0R;
+		*(out +1) = T0I;
+		*(out +2) = T1R;
+		*(out +3) = T1I;
+		*(out +4) = T2R;
+		*(out +5) = T2I;
+		*(out +6) = T3R;
+		*(out +7) = T3I;
+		T0R = A0R * B0R;
+		T0I = (A0R * B0I);
+		T1R = A1R * B1R;
+		T1I = (A1R * B1I);
+		T2R = A2R * B2R;
+		T2I = (A2R * B2I);
+		T3R = A3R * B3R;
+		T3I = (A3R * B3I);
+		T0R -= (A0I * B0I);
+		T0I = A0I * B0R + T0I;
+		T1R -= (A1I * B1I);
+		T1I = A1I * B1R + T1I;
+		T2R -= (A2I * B2I);
+		T2I = A2I * B2R + T2I;
+		T3R -= (A3I * B3I);
+		T3I = A3I * B3R + T3I;
+		out += 8;
+	}
+	a += 8;
+	b += 8;
+	*out = T0R;
+	*(out +1) = T0I;
+	*(out +2) = T1R;
+	*(out +3) = T1I;
+	*(out +4) = T2R;
+	*(out +5) = T2I;
+	*(out +6) = T3R;
+	*(out +7) = T3I;
+	out += 8;
+}
+for (OutCnt=N%4; OutCnt > 0; OutCnt--){
+	A0R = *a++;
+	B0R = *b++;
+	A0I = *a++;
+	B0I = *b++;
+	T0R = A0R * B0R;
+	T0I = (A0R * B0I);
+	T0R -= (A0I * B0I);
+	T0I = A0I * B0R + T0I;
+	*out++ = T0R;
+	*out++ = T0I;
+}
+}
--- a/src/maths/fft/matlib.h
+++ b/src/maths/fft/matlib.h
@ -0,0 +1,33 @@
+/* a few routines from a vector/matrix library */
+
+void xpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, long Ncols);
+/* not in-place matrix transpose	*/
+/* INPUTS */
+/* *indata = input data array	*/
+/* iRsiz = offset to between rows of input data array	*/
+/* oRsiz = offset to between rows of output data array	*/
+/* Nrows = number of rows in input data array	*/
+/* Ncols = number of columns in input data array	*/
+/* OUTPUTS */
+/* *outdata = output data array	*/
+
+void cxpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, long Ncols);
+/* not in-place complex matrix transpose	*/
+/* INPUTS */
+/* *indata = input data array	*/
+/* iRsiz = offset to between rows of input data array	*/
+/* oRsiz = offset to between rows of output data array	*/
+/* Nrows = number of rows in input data array	*/
+/* Ncols = number of columns in input data array	*/
+/* OUTPUTS */
+/* *outdata = output data array	*/
+
+void cvprod(float *a, float *b, float *out, long N);
+/* complex vector product, can be in-place */
+/* product of complex vector *a times complex vector *b */
+/* INPUTS */
+/* N vector length */
+/* *a complex vector length N complex numbers */
+/* *b complex vector length N complex numbers */
+/* OUTPUTS */
+/* *out complex vector length N */
--- a/src/maths/misc/randnumb.c
+++ b/src/maths/misc/randnumb.c
@ -216,3 +216,27 @@ double gauss(void)
    return glgset;
  }
 }
+
+/* Polar form of the Box-Muller generator for Gaussian distributed
+   random variates.
+   Generator will be fed with two uniformly distributed random variates.
+   Delivers two values per call
+*/
+
+void rgauss(double* py1, double* py2)
+{
+	double x1, x2, w;
+
+         do {
+                 x1 = 2.0 * CombLCGTaus() - 1.0;
+                 x2 = 2.0 * CombLCGTaus() - 1.0;
+                 w = x1 * x1 + x2 * x2;
+         } while ( w >= 1.0 );
+
+         w = sqrt( (-2.0 * log( w ) ) / w );
+
+	*py1 = x1 * w;
+	*py2 = x2 * w;
+}	
+
+
--- a/src/spicelib/devices/vsrc/vsrc.c
+++ b/src/spicelib/devices/vsrc/vsrc.c
@ -21,8 +21,9 @@ IFparm VSRCpTable[] = { /* parameters */
 IOP ("pwl",     VSRC_PWL,       IF_REALVEC,"Piecewise linear description"),
 IOP ("sffm",    VSRC_SFFM,      IF_REALVEC,"Single freq. FM descripton"),
 IOP ("am",      VSRC_AM,        IF_REALVEC,"Amplitude modulation descripton"),
+ IOP ("trnoise", VSRC_TRNOISE,   IF_REALVEC,"Transient noise descripton"), 
 
-OPU ("pos_node",VSRC_POS_NODE,  IF_INTEGER,"Positive node of source"),
+ OPU ("pos_node",VSRC_POS_NODE,  IF_INTEGER,"Positive node of source"),
 OPU ("neg_node",VSRC_NEG_NODE,  IF_INTEGER,"Negative node of source"),
 OPU ("function",VSRC_FCN_TYPE,  IF_INTEGER,"Function of the source"),
 OPU ("order",   VSRC_FCN_ORDER, IF_INTEGER,"Order of the source function"),
--- a/src/spicelib/devices/vsrc/vsrcacct.c
+++ b/src/spicelib/devices/vsrc/vsrcacct.c
@ -11,6 +11,10 @@ Author: 1985 Thomas L. Quarles
 #include "suffix.h"
 #include "missing_math.h"

+extern int fftInit(long M);
+extern void fftFree(void);
+extern void rffts(float *data, long M, long Rows);
+
 #define SAMETIME(a,b)    (fabs((a)-(b))<= TIMETOL * PW)
 #define TIMETOL    1e-7
 		
@ -74,6 +78,7 @@ VSRCaccept(CKTcircuit *ckt, GENmodel *inModel)
                    /* offset time by delay */
                    time = ckt->CKTtime - TD;
                    tshift = TD;
+
 #ifdef XSPICE					
 		         /* normalize phase to 0 - 360° */ 
 		         /* normalize phase to cycles */
@ -180,6 +185,52 @@ VSRCaccept(CKTcircuit *ckt, GENmodel *inModel)
                    }
                    break;
                }
+
+/**** tansient noise routines: 
+VNoi2 2 0  DC 0 TRNOISE(10n 0.5n 0 0n) : generate gaussian distributed noise
+                        rms value, time step, 0 0
+VNoi1 1 0  DC 0 TRNOISE(0n 0.5n 1 10n) : generate 1/f noise
+                        0,  time step, exponent < 2, rms value
+*/
+                case TRNOISE: {
+                  double NA, NT, TS, time, basetime = 0.;
+
+#define NSAMETIME(a,b) (fabs((a)-(b))<= NTIMETOL * TS)
+#define NTIMETOL 1e-7
+
+                  NA = here->VSRCcoeffs[0]; // input is rms value
+                  NT = here->VSRCcoeffs[1]; // time step
+                  if (NT == 0.) // no further breakpoint if value not given
+                      break;
+//                  TS = NT > ckt->CKTstep ? NT : ckt->CKTstep;
+                  TS = NT;
+                  time = ckt->CKTtime;
+
+                  if(time >= TS) {
+                        /* repeating signal - figure out where we are
+                           in period */
+                     basetime = TS * floor(time*1.000000000001/TS);
+//                     basetime = TS * floor(time/TS);
+//                     basetime = TS * here->VSRCncount;
+                     time -= basetime;
+                  }
+                  if(ckt->CKTbreak &&  NSAMETIME(time,0)) {
+                     /* set next breakpoint */
+//                     error = CKTsetBreak(ckt, TS * ((double)here->VSRCncount + 1.));
+                     error = CKTsetBreak(ckt, basetime + TS);
+                     if(error) return(error);
+                  } 
+ /*                 else if (ckt->CKTbreak &&  NSAMETIME(time,TS)) {
+                     // set next breakpoint
+                     error = CKTsetBreak(ckt, basetime + TS + TS);
+                     if(error) return(error);
+                  } */
+                  if (ckt->CKTtime == 0.) {
+//                   printf("VSRC: free fft tables\n");
+                     fftFree();
+                  }
+               }
+               break;    				
                }
            }
 bkptset: ;
--- a/src/spicelib/devices/vsrc/vsrcask.c
+++ b/src/spicelib/devices/vsrc/vsrcask.c
@ -46,6 +46,7 @@ VSRCask(CKTcircuit *ckt, GENinstance *inst, int which, IFvalue *value, IFvalue *
        case VSRC_PWL:
        case VSRC_SFFM:
 	    case VSRC_AM:
+	    case VSRC_TRNOISE:		
        case VSRC_FCN_COEFFS:
            temp = value->v.numValue = here->VSRCfunctionOrder;
            v = value->v.vec.rVec = TMALLOC(double, here->VSRCfunctionOrder);
--- a/src/spicelib/devices/vsrc/vsrcdefs.h
+++ b/src/spicelib/devices/vsrc/vsrcdefs.h
@ -48,7 +48,16 @@ typedef struct sVSRCinstance {
    double VSRCdF2mag; /* distortion f2 magnitude */
    double VSRCdF1phase; /* distortion f1 phase */
    double VSRCdF2phase; /* distortion f2 phase */
-
+    
+    /*transient noise*/
+    double VSRCprevTime; /*last time a new random value was issued*/
+    double VSRCprevVal;  /*last value issued at prevTime*/
+    double VSRCnewVal;   /*new value issued at prevTime*/    
+    double VSRCsecRand;  /*second random value not yet used*/
+    float *VSRConeof;    /*pointer to array of 1 over f noise values */
+    long int VSRCncount; /* counter to retrieve noise values */
+    /*end of noise*/
+	
    double VSRCr;           /* pwl repeat */
    double VSRCrdelay;     /* pwl delay period */
    double *VSRCposIbrptr;  /* pointer to sparse matrix element at 
@ -93,6 +102,7 @@ typedef struct sVSRCmodel {
 #define SFFM 4
 #define PWL 5
 #define AM 6
+#define TRNOISE 7
 #endif /*PULSE*/

 /* device parameters */
@ -121,6 +131,7 @@ typedef struct sVSRCmodel {
 #define VSRC_AM 22
 #define VSRC_R 23
 #define VSRC_TD 24
+#define VSRC_TRNOISE 25

 /* model parameters */

--- a/src/spicelib/devices/vsrc/vsrcload.c
+++ b/src/spicelib/devices/vsrc/vsrcload.c
@ -11,6 +11,15 @@ $Id$
 #include "trandefs.h"
 #include "sperror.h"
 #include "suffix.h"
+#undef WaGauss
+#ifdef FastRand
+#include "FastNorm3.h"
+#elif defined (WaGauss)
+#include "wallace.h"
+#else
+extern void rgauss(double* py1, double* py2);
+#endif
+#include "1-f-code.h"

 #ifdef XSPICE_EXP
 /* gtri - begin - wbk - modify for supply ramping option */
@ -27,7 +36,7 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
    VSRCmodel *model = (VSRCmodel *)inModel;
    VSRCinstance *here;
    double time;
-    double value;
+    double value = 0.0;

    /*  loop through all the voltage source models */
    for( ; model != NULL; model = model->VSRCnextModel ) {
@ -35,7 +44,7 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
        /* loop through all the instances of the model */
        for (here = model->VSRCinstances; here != NULL ;
                here=here->VSRCnextInstance) {
-	    if (here->VSRCowner != ARCHme) continue;
+            if (here->VSRCowner != ARCHme) continue;
            
            *(here->VSRCposIbrptr) += 1.0 ;
            *(here->VSRCnegIbrptr) -= 1.0 ;
@ -63,29 +72,29 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
                }
                
                case PULSE: {
-		    double	V1, V2, TD, TR, TF, PW, PER;
-                    double	basetime = 0;
+		            double V1, V2, TD, TR, TF, PW, PER;
+                    double basetime = 0;
 #ifdef XSPICE
                    double PHASE;
                    double phase;
                    double deltat;
 #endif
-		    V1 = here->VSRCcoeffs[0];
-		    V2 = here->VSRCcoeffs[1];
-		    TD = here->VSRCfunctionOrder > 2
-			? here->VSRCcoeffs[2] : 0.0;
-		    TR = here->VSRCfunctionOrder > 3
-			&& here->VSRCcoeffs[3] != 0.0
-			? here->VSRCcoeffs[3] : ckt->CKTstep;
-		    TF = here->VSRCfunctionOrder > 4
-			&& here->VSRCcoeffs[4] != 0.0
-			? here->VSRCcoeffs[4] : ckt->CKTstep;
-		    PW = here->VSRCfunctionOrder > 5
-			&& here->VSRCcoeffs[5] != 0.0
-			? here->VSRCcoeffs[5] : ckt->CKTfinalTime;
-		    PER = here->VSRCfunctionOrder > 6
-			&& here->VSRCcoeffs[6] != 0.0
-			? here->VSRCcoeffs[6] : ckt->CKTfinalTime;
+                    V1 = here->VSRCcoeffs[0];
+                    V2 = here->VSRCcoeffs[1];
+                    TD = here->VSRCfunctionOrder > 2
+                       ? here->VSRCcoeffs[2] : 0.0;
+                    TR = here->VSRCfunctionOrder > 3
+                       && here->VSRCcoeffs[3] != 0.0
+                       ? here->VSRCcoeffs[3] : ckt->CKTstep;
+                    TF = here->VSRCfunctionOrder > 4
+                       && here->VSRCcoeffs[4] != 0.0
+                       ? here->VSRCcoeffs[4] : ckt->CKTstep;
+                    PW = here->VSRCfunctionOrder > 5
+                       && here->VSRCcoeffs[5] != 0.0
+                       ? here->VSRCcoeffs[5] : ckt->CKTfinalTime;
+                    PER = here->VSRCfunctionOrder > 6
+                       && here->VSRCcoeffs[6] != 0.0
+                       ? here->VSRCcoeffs[6] : ckt->CKTfinalTime;

                    /* shift time by delay time TD */                   
                    time -=  TD;
@ -126,25 +135,25 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)

                case SINE: {
 		
-		    double VO, VA, FREQ, TD, THETA;
-/* gtri - begin - wbk - add PHASE parameter */
+                    double VO, VA, FREQ, TD, THETA;
+                    /* gtri - begin - wbk - add PHASE parameter */
 #ifdef XSPICE
                    double PHASE;
-		    double phase;
+                    double phase;

                    PHASE = here->VSRCfunctionOrder > 5
-		           ? here->VSRCcoeffs[5] : 0.0;
+                       ? here->VSRCcoeffs[5] : 0.0;
 		       		
-	     	    /* compute phase in radians */ 
+	     	        /* compute phase in radians */ 
                    phase = PHASE * M_PI / 180.0;
 #endif
                    VO = here->VSRCcoeffs[0];
-	            VA = here->VSRCcoeffs[1];
+                    VA = here->VSRCcoeffs[1];
                    FREQ =  here->VSRCfunctionOrder > 2 
-	                 && here->VSRCcoeffs[2] != 0.0
-		          ? here->VSRCcoeffs[2] : (1/ckt->CKTfinalTime);
-	            TD = here->VSRCfunctionOrder > 3
-	                ? here->VSRCcoeffs[3] : 0.0;
+	                   && here->VSRCcoeffs[2] != 0.0
+		               ? here->VSRCcoeffs[2] : (1/ckt->CKTfinalTime);
+	                TD = here->VSRCfunctionOrder > 3
+	                   ? here->VSRCcoeffs[3] : 0.0;
                    THETA = here->VSRCfunctionOrder > 4
 	                   ? here->VSRCcoeffs[4] : 0.0;
 			   
@ -155,12 +164,12 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
                    } else {

                        value = VO + VA * sin(FREQ*time * 2.0 * M_PI + phase) * 
-                                exp(-time*THETA);
+                           exp(-time*THETA);
 #else						    
                        value = VO;
                    } else {                        
                       value = VO + VA * sin(FREQ * time * 2.0 * M_PI) * 
-                                exp(-(time*THETA));
+                           exp(-(time*THETA));
 #endif
 /* gtri - end - wbk - add PHASE parameter */				
                    }
@ -168,24 +177,23 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
                break;

                case EXP: {
-		    double V1, V2, TD1, TD2, TAU1, TAU2;
+                    double V1, V2, TD1, TD2, TAU1, TAU2;
                    
                    V1  = here->VSRCcoeffs[0];
-		    V2  = here->VSRCcoeffs[1];
-		    TD1 = here->VSRCfunctionOrder > 2 
-		        && here->VSRCcoeffs[2] != 0.0
-			 ? here->VSRCcoeffs[2] : ckt->CKTstep;
-		    TAU1 = here->VSRCfunctionOrder > 3 
-		         && here->VSRCcoeffs[3] != 0.0
-			  ? here->VSRCcoeffs[3] : ckt->CKTstep;
+                    V2  = here->VSRCcoeffs[1];
+                    TD1 = here->VSRCfunctionOrder > 2 
+                       && here->VSRCcoeffs[2] != 0.0
+                       ? here->VSRCcoeffs[2] : ckt->CKTstep;
+                    TAU1 = here->VSRCfunctionOrder > 3 
+                       && here->VSRCcoeffs[3] != 0.0
+                       ? here->VSRCcoeffs[3] : ckt->CKTstep;
                    TD2  = here->VSRCfunctionOrder > 4 
-		         && here->VSRCcoeffs[4] != 0.0
-			  ? here->VSRCcoeffs[4] : TD1 + ckt->CKTstep;
+                       && here->VSRCcoeffs[4] != 0.0
+                       ? here->VSRCcoeffs[4] : TD1 + ckt->CKTstep;
                    TAU2 = here->VSRCfunctionOrder > 5 
-		         && here->VSRCcoeffs[5]
-			  ? here->VSRCcoeffs[5] : ckt->CKTstep;
-                    
-		    
+                       && here->VSRCcoeffs[5]
+                       ? here->VSRCcoeffs[5] : ckt->CKTstep;
+ 		    
                    if(time <= TD1)  {
                        value = V1;
                    } else if (time <= TD2) {
@ -199,7 +207,7 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)

                case SFFM:{
 		
-		    double VO, VA, FC, MDI, FS;
+                    double VO, VA, FC, MDI, FS;
 /* gtri - begin - wbk - add PHASE parameters */
 #ifdef XSPICE

@ -208,25 +216,24 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
                    double phases;
 		    
                    PHASEC = here->VSRCfunctionOrder > 5
-		            ? here->VSRCcoeffs[5] : 0.0;
+		               ? here->VSRCcoeffs[5] : 0.0;
                    PHASES = here->VSRCfunctionOrder > 6
-		            ? here->VSRCcoeffs[6] : 0.0;
+		               ? here->VSRCcoeffs[6] : 0.0;
 			
                    /* compute phases in radians */
                    phasec = PHASEC * M_PI / 180.0;
                    phases = PHASES * M_PI / 180.0;    
-
 #endif				    
-                   VO = here->VSRCcoeffs[0];
-                   VA = here->VSRCcoeffs[1];
-                   FC = here->VSRCfunctionOrder > 2 
-		      && here->VSRCcoeffs[2]
-		       ? here->VSRCcoeffs[2] : (1/ckt->CKTfinalTime);
-                   MDI = here->VSRCfunctionOrder > 3
-		        ? here->VSRCcoeffs[3] : 0.0;
-                   FS  = here->VSRCfunctionOrder > 4 
-		       && here->VSRCcoeffs[4]
-		        ? here->VSRCcoeffs[4] : (1/ckt->CKTfinalTime);
+                    VO = here->VSRCcoeffs[0];
+                    VA = here->VSRCcoeffs[1];
+                    FC = here->VSRCfunctionOrder > 2 
+                       && here->VSRCcoeffs[2]
+                       ? here->VSRCcoeffs[2] : (1/ckt->CKTfinalTime);
+                    MDI = here->VSRCfunctionOrder > 3
+                       ? here->VSRCcoeffs[3] : 0.0;
+                    FS  = here->VSRCfunctionOrder > 4 
+                       && here->VSRCcoeffs[4]
+                       ? here->VSRCcoeffs[4] : (1/ckt->CKTfinalTime);
 #ifdef XSPICE
                    /* compute waveform value */
                    value = VO + VA * 
@ -242,10 +249,9 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
                break;
                case AM:{
 		
-		double VA, FC, MF, VO, TD;
+                    double VA, FC, MF, VO, TD;
 /* gtri - begin - wbk - add PHASE parameters */
 #ifdef XSPICE
-
                    double PHASEC, PHASES;
                    double phasec;
                    double phases;
@ -260,49 +266,40 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
                    phases = PHASES * M_PI / 180.0;    

 #endif			
-		
-		   VA = here->VSRCcoeffs[0];
-                   VO = here->VSRCcoeffs[1];
-                   MF = here->VSRCfunctionOrder > 2 
-		      && here->VSRCcoeffs[2]
-		       ? here->VSRCcoeffs[2] : (1/ckt->CKTfinalTime);
-                   FC = here->VSRCfunctionOrder > 3
-		        ? here->VSRCcoeffs[3] : 0.0;
-                   TD  = here->VSRCfunctionOrder > 4 
-		       && here->VSRCcoeffs[4]
-		        ? here->VSRCcoeffs[4] : 0.0;
+                    VA = here->VSRCcoeffs[0];
+                    VO = here->VSRCcoeffs[1];
+                    MF = here->VSRCfunctionOrder > 2 
+                       && here->VSRCcoeffs[2]
+                       ? here->VSRCcoeffs[2] : (1/ckt->CKTfinalTime);
+                    FC = here->VSRCfunctionOrder > 3
+                       ? here->VSRCcoeffs[3] : 0.0;
+                    TD  = here->VSRCfunctionOrder > 4 
+                       && here->VSRCcoeffs[4]
+                       ? here->VSRCcoeffs[4] : 0.0;

                    time -= TD;
                    if (time <= 0) {
                        value = 0;
                    } else {
 #ifdef XSPICE
-                    /* compute waveform value */
-		    value = VA * (VO + sin(2.0 * M_PI * MF * time + phases )) *
-		        sin(2 * M_PI * FC * time + phases);
+                        /* compute waveform value */
+                        value = VA * (VO + sin(2.0 * M_PI * MF * time + phases )) *
+                           sin(2 * M_PI * FC * time + phases);
                    
 #else /* XSPICE */		    
-		        value = VA * (VO + sin(2.0 * M_PI * MF * time)) *
-		        sin(2 * M_PI * FC * time);
+                        value = VA * (VO + sin(2.0 * M_PI * MF * time)) *
+                           sin(2 * M_PI * FC * time);
 #endif			
-		    }
+                    }
 		    
 /* gtri - end - wbk - add PHASE parameters */
-		}
-		break;
+                }
+                break;
                case PWL: {
                    int i = 0, num_repeat = 0, ii = 0;
                    double foo, repeat_time = 0, end_time, breakpt_time, itime;

                    time -= here->VSRCrdelay;
-//                    if(time > PER) {
-                        /* repeating signal - figure out where we are */
-                        /* in period */
-//                        basetime = PER * floor(time/PER);
-//                        time -= basetime;
-//                    }
-
-

                    if(time < *(here->VSRCcoeffs)) {
                        foo = *(here->VSRCcoeffs + 1) ;
@ -310,35 +307,243 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
                        goto loadDone;
                    }

-		    do {
-		      for(i=ii ; i<(here->VSRCfunctionOrder/2)-1; i++ ) {
-                  itime = *(here->VSRCcoeffs+2*i);
-                  if (  AlmostEqualUlps(itime+repeat_time, time, 3 )) {
-//                  if ( fabs( (*(here->VSRCcoeffs+2*i)+repeat_time) - time ) < 1e-20 ) {
-                      foo   = *(here->VSRCcoeffs+2*i+1);
-                      value = foo;
-                      goto loadDone;
-                   } else if ( (*(here->VSRCcoeffs+2*i)+repeat_time < time) && (*(here->VSRCcoeffs+2*(i+1))+repeat_time > time) ) {
-                       foo   = *(here->VSRCcoeffs+2*i+1) + (((time-(*(here->VSRCcoeffs+2*i)+repeat_time))/
-								(*(here->VSRCcoeffs+2*(i+1)) - *(here->VSRCcoeffs+2*i))) *
+                    do {
+                        for(i=ii ; i<(here->VSRCfunctionOrder/2)-1; i++ ) {
+                            itime = *(here->VSRCcoeffs+2*i);
+                            if (  AlmostEqualUlps(itime+repeat_time, time, 3 )) {
+                                foo   = *(here->VSRCcoeffs+2*i+1);
+                                value = foo;
+                                goto loadDone;
+                            } else if ( (*(here->VSRCcoeffs+2*i)+repeat_time < time) 
+							   && (*(here->VSRCcoeffs+2*(i+1))+repeat_time > time) ) {
+                                foo   = *(here->VSRCcoeffs+2*i+1) + (((time-(*(here->VSRCcoeffs+2*i)+repeat_time))/
+								   (*(here->VSRCcoeffs+2*(i+1)) - *(here->VSRCcoeffs+2*i))) *
 							       (*(here->VSRCcoeffs+2*i+3)    - *(here->VSRCcoeffs+2*i+1)));
-                       value = foo;
-                       goto loadDone;
-                   }
-		      }
-		      foo = *(here->VSRCcoeffs+ here->VSRCfunctionOrder-1) ;
-		      value = foo;
+                                value = foo;
+                                goto loadDone;
+                            }
+                        }
+                        foo = *(here->VSRCcoeffs+ here->VSRCfunctionOrder-1) ;
+                        value = foo;

-		      if ( !here->VSRCrGiven ) goto loadDone;
+                        if ( !here->VSRCrGiven ) goto loadDone;
 		      
-		      end_time     = *(here->VSRCcoeffs + here->VSRCfunctionOrder-2);
-		      breakpt_time = *(here->VSRCcoeffs + here->VSRCrBreakpt);
-		      repeat_time  = end_time + (end_time - breakpt_time)*num_repeat++ - breakpt_time;
-		      ii            = here->VSRCrBreakpt/2;
-		    } while ( here->VSRCrGiven );
+                        end_time = *(here->VSRCcoeffs + here->VSRCfunctionOrder-2);
+                        breakpt_time = *(here->VSRCcoeffs + here->VSRCrBreakpt);
+                        repeat_time  = end_time + (end_time - breakpt_time)*num_repeat++ - breakpt_time;
+                        ii            = here->VSRCrBreakpt/2;
+                    } while ( here->VSRCrGiven );
                    break;
                }
-                }
+
+/**** tansient noise routines: 
+VNoi2 2 0  DC 0 TRNOISE(10n 0.5n 0 0n) : generate gaussian distributed noise
+                        rms value, time step, 0 0
+VNoi1 1 0  DC 0 TRNOISE(0n 0.5n 1 10n) : generate 1/f noise
+                        0,  time step, exponent < 2, rms value
+*/
+                case TRNOISE: {
+                /* Generate voltage point every TS with amplitude NA * ra,
+                   where ra is drawn from a random number generator with
+                   gaussian distribution with mean 0 and standard deviation 1 
+				*/
+
+//#define PRVAL
+//                    typedef int bool;
+   
+                    double newval=0.0, lastval=0.0, lasttime=0.0;
+                    double NA, NT, TS;                    
+                    double V1, V2, basetime = 0.;
+                    double scalef, ra1, ra2;
+                    float NALPHA, NAMP;
+   
+                    long int nosteps, newsteps = 1, newexp = 0;
+             
+                    bool aof = FALSE;   
+                       
+                    NA = here->VSRCcoeffs[0]; // input is rms value
+                    NT = here->VSRCcoeffs[1]; // time step
+
+                    scalef = NA;
+//                    scalef = NA*1.32;
+
+                    NALPHA = here->VSRCfunctionOrder > 2
+                       ? (float)here->VSRCcoeffs[2] : 0.0f;
+                    NAMP = here->VSRCfunctionOrder > 3
+                       && here->VSRCcoeffs[3] != 0.0
+                       && here->VSRCcoeffs[2] != 0.0       
+                       ? (float)here->VSRCcoeffs[3] : 0.0f;
+
+                    if ((NT == 0.) || ((NA == 0.) && (NAMP == 0.))) {
+                        value =  here->VSRCdcValue;
+                        goto noiDone;
+                    }
+                    else
+                        TS = NT; /* time step for noise */
+        
+                    if ((NALPHA > 0.0) && (NAMP > 0.0)) aof = TRUE;
+   
+                    lasttime = here->VSRCprevTime;
+                    lastval = here->VSRCprevVal;
+                    newval = here->VSRCnewVal;
+                    /* set all data: DC, white, 1of */
+                    if (time <= 0 /*ckt->CKTstep*/) {
+                        /* data are already set */
+                        if ((here->VSRCprevVal != 0) || (here->VSRCnewVal != 0)) {
+                            value = here->VSRCprevVal;
+                            goto noiDone;
+                        }
+                        lasttime = 0.0;
+                        here->VSRCsecRand = 2.; /* > 1, invalid number out of the random number range */
+                        /* get two random samples */
+#ifdef FastRand
+                        // use FastNorm3
+                        here->VSRCprevVal = scalef * GaussWa;
+                        here->VSRCnewVal = scalef * GaussWa; 
+#elif defined (WaGauss)
+                        // use WallaceHV
+                        here->VSRCprevVal = scalef * GaussWa;
+                        here->VSRCnewVal = scalef * GaussWa;
+#else
+                        // make use of two random variables per call to rgauss()
+                        rgauss(&ra1, &ra2);
+                        here->VSRCprevVal = scalef * ra1;
+                        // choose to set start value to 0
+                        here->VSRCprevVal = 0;
+                        here->VSRCnewVal = scalef * ra2;
+#endif
+                        /* generate 1 over f noise at time 0 */
+                        if (aof) {
+                            if (here->VSRCncount==0) {
+                                // add 10 steps for start up sequence
+                                nosteps = (long)((ckt->CKTfinalTime)/TS) + 10;
+                                // generate number of steps as power of 2
+                                while(newsteps < nosteps) {
+                                    newsteps <<= 1;
+                                    newexp++;
+                                }
+                                here->VSRConeof = TMALLOC(float, newsteps); //(float *)tmalloc(sizeof(float) * newsteps);
+#ifdef PRVAL
+                                printf("ALPHA: %f, GAIN: %e\n", NALPHA, NAMP);
+#endif
+                                f_alpha(newsteps, newexp, here->VSRConeof, NAMP, NALPHA);
+#ifdef PRVAL
+                                printf("Noi1: %e, Noi2: %e\n", here->VSRConeof[10], here->VSRConeof[100]);                            
+#endif
+                                here->VSRCprevVal += here->VSRConeof[here->VSRCncount];                         
+                                here->VSRCncount++;
+                                here->VSRCnewVal += here->VSRConeof[here->VSRCncount];
+                                here->VSRCncount++;
+                                value = newval;
+                                // add DC
+                                here->VSRCprevVal += here->VSRCdcValue;
+                                here->VSRCnewVal += here->VSRCdcValue;
+                                value = here->VSRCprevVal;
+#ifdef PRVAL
+                                printf("start1, time: %e, outp: %e, rnd: %e\n", time, newval, testval);
+#endif
+                            } else { // here->VSRCncount > 0
+                                // add DC
+                                here->VSRCprevVal += here->VSRCdcValue;
+                                here->VSRCnewVal += here->VSRCdcValue;
+                                value = here->VSRCprevVal;
+#ifdef PRVAL
+						        printf("start2, time: %e, outp: %e, rnd: %e\n", time, here->VSRCprevVal, testval);
+#endif                        
+                            }         
+#ifdef PRVAL
+                            printf("time 0 value: %e for %s\n", here->VSRCprevVal, here->VSRCname);
+#endif   
+                            goto loadDone;                         
+                        }  //aof
+                        // add DC
+                        here->VSRCprevVal += here->VSRCdcValue;
+                        here->VSRCnewVal += here->VSRCdcValue;
+                        value = here->VSRCprevVal;
+                        here->VSRCprevTime = 0.;
+                        goto loadDone;
+                    }  // time < 0                   
+
+                    V1 = here->VSRCprevVal;
+                    V2 = here->VSRCnewVal;
+                    if (here->VSRCprevTime == ckt->CKTtime) {
+                        value = here->VSRCprevVal;
+                        goto noiDone;
+                    }
+
+                    if (time > 0 && time < TS) {
+                        value = V1 + (V2 - V1) * (time) / TS;
+                    }
+                    else if (time >= TS) {
+                     /* repeating signal - figure out where we are in period */
+                     /* numerical correction to avoid basetime less than 
+                     next step, e.g. 4.99999999999999995 delivers a floor
+                     of 4 instead of 5 */
+                        basetime = TS * floor(time*1.000000000001/TS);
+                        time -= basetime;
+
+#define NSAMETIME(a,b) (fabs((a)-(b))<= NTIMETOL * TS)
+#define NTIMETOL 1e-7
+
+                        if NSAMETIME(time,0.) {
+
+                        /* get new random number */
+#ifdef FastRand
+                            // use FastNorm3
+                            newval = scalef * FastNorm;
+#elif defined (WaGauss)
+                            // use WallaceHV
+                            newval = scalef * GaussWa;
+#else
+                            // make use of two random variables per call to rgauss()
+                            if (here->VSRCsecRand == 2.0) {  
+                                rgauss(&ra1, &ra2);
+                                newval = scalef * ra1;
+                                here->VSRCsecRand = scalef * ra2;
+                            }
+                            else {
+                                newval = here->VSRCsecRand;
+                                here->VSRCsecRand = 2.0;
+                            }
+#endif
+                            V1 = here->VSRCprevVal = here->VSRCnewVal;
+                            V2 = newval; // scale factor t.b.d.
+                            if(here->VSRCdcGiven) V2 += here->VSRCdcValue;
+                            if (aof) {                        
+                                V2 += here->VSRConeof[here->VSRCncount];
+#ifdef PRVAL
+                                printf("aof: %d\n", here->VSRCncount);
+#endif
+                            }
+                            here->VSRCncount++;
+                            value = V1;
+                            here->VSRCnewVal = V2;
+                        } else if (time > 0 && time < TS) {
+                            value = V1 + (V2 - V1) * (time) / TS;
+#ifdef PRVAL
+                            printf("if1, time: %e, outp: %e, rnd: %e\n", ckt->CKTtime, 
+						        V1 + (V2 - V1) * (time) / TS, V2);
+#endif
+                        } else { /* time > TS should be never reached */
+                            value = V1 + (V2 - V1) * (time-TS) / TS;
+#ifdef PRVAL
+                            printf("if2, time: %e, outp: %e, rnd: %e\n", ckt->CKTtime, 
+						        V1 + (V2 - V1) * (time-TS) / TS, V2);
+#endif                  
+                        }
+                        here->VSRCprevTime = ckt->CKTtime;
+                    }
+noiDone:                   
+                    if (time >=ckt->CKTfinalTime) {
+                        /* free the 1of memory */
+                        if (here->VSRConeof) tfree(here->VSRConeof);
+                        /* reset the 1of counter */
+                        here->VSRCncount = 0;
+                    }
+                    goto loadDone;     
+                } // case
+                break; 				
+                } // switch
            }
 loadDone:
 /* gtri - begin - wbk - modify for supply ramping option */
@ -346,11 +551,12 @@ loadDone:
            value *= ckt->CKTsrcFact;
            value *= cm_analog_ramp_factor();
 #else
-if (ckt->CKTmode & MODETRANOP) value *= ckt->CKTsrcFact;
-          *(ckt->CKTrhs + (here->VSRCbranch)) += value;
+            if (ckt->CKTmode & MODETRANOP) value *= ckt->CKTsrcFact;
+            /* load the new voltage value into the matrix */
+			*(ckt->CKTrhs + (here->VSRCbranch)) += value;
 #endif
 /* gtri - end - wbk - modify to process srcFact, etc. for all sources */
-        }
-    }
+        } // for loop instances
+    } // for loop models
    return(OK);
 }
--- a/src/spicelib/devices/vsrc/vsrcpar.c
+++ b/src/spicelib/devices/vsrc/vsrcpar.c
@ -169,6 +169,13 @@ VSRCparam(int param, IFvalue *value, GENinstance *inst, IFvalue *select)
 		    return(E_BADPARM);
 	    }
 	    break;
+        case VSRC_TRNOISE:
+            here->VSRCfunctionType = TRNOISE;
+            here->VSRCfuncTGiven = TRUE;
+            here->VSRCcoeffs = value->v.vec.rVec;
+            here->VSRCfunctionOrder = value->v.numValue;
+            here->VSRCcoeffsGiven = TRUE;
+            break;		
        default:
            return(E_BADPARM);
    }
--- a/visualc/vngspice.vcproj
+++ b/visualc/vngspice.vcproj