transient noise simulation

This commit is contained in:
h_vogt 2010-11-27 16:36:03 +00:00
parent eaadf5d2bc
commit 5e1ed023c6
40 changed files with 15572 additions and 9055 deletions

View File

@ -1073,10 +1073,12 @@ AC_CONFIG_FILES([Makefile
src/frontend/help/Makefile
src/frontend/parser/Makefile
src/frontend/plotting/Makefile
src/frontend/trannoise/Makefile
src/frontend/wdisp/Makefile
src/include/Makefile
src/maths/Makefile
src/maths/cmaths/Makefile
src/maths/fft/Makefile
src/maths/misc/Makefile
src/maths/ni/Makefile
src/maths/deriv/Makefile

View File

@ -0,0 +1,59 @@
* 51 stage Ring-Osc. BSIM3, transient noise
* will need 45 min on a i7 860 with 4 threads
* closes the loop between inverters xinv1 and xinv5
vin in out dc 0.5 pulse 0.5 0 0.1n 5n 1 1 1
vdd dd 0 dc 0 pulse 0 2.2 0 1n 1 1 1
vss ss 0 dc 0
ve sub 0 dc 0
vpe well 0 2.2
* noisy inverters
xiinv2 dd ss sub well out25 out50 inv253
xiinv1 dd ss sub well in out25 inv253
*very noisy inverter
xiinv5 dd ss sub well out50 out inv1_2
*output amplifier
xiinv11 dd ss sub well out25 bufout inv1
cout bufout ss 0.2pF
.option itl1=500 gmin=1e-15 itl4=10 noacct
* .dc vdd 0 2 0.01
.tran 0.01n 500n
.save in bufout v(t1)
.include D:\Spice_Win\Exam_BSIM3\Modelcards\modelcard.nmos
.include D:\Spice_Win\Exam_BSIM3\Modelcards\modelcard.pmos
.include noilib-demo.h
.control
unset ngdebug
* first run
save bufout $ needed for restricting memory usage
rusage
tran 8p 10000n
rusage
plot bufout xlimit 90n 95n
linearize
fft bufout
* next run
reset
save bufout
alter @v.xiinv5.vn1[trnoise] = [ 0 0 0 0 ] $ no noise
tran 8p 10000n
rusage
plot bufout xlimit 90n 95n
linearize
fft bufout
plot mag(bufout) mag(sp2.bufout) xlimit 0 2G ylimit 1e-11 0.1 ylog
.endc
.end

View File

@ -0,0 +1,53 @@
* simple sample & hold, transient noise
* switch control
* PULSE(V1 V2 TD TR TF PW PER)
vgate1 ga1 0 dc 0 pulse (0 1 0 10n 10n 90n 200n)
Switch1 1 2 ga1 0 smodel1
* noisy input
* rms value white, time step, exponent < 2, rms value 1/f
vin 1 0 dc 0 trnoise 0.1m 0.2n 1 0.1m
*vin 1 0 dc 0 trnoise 0.1m 0.2n 0 0.1m
* output
c2 2 0 10p
* second S&H
vgate2 ga2 0 dc 0 pulse (0 1 140n 10n 10n 30n 200n)
*Buffer EXXXXXXX N+ N- NC+ NC- VALUE
e1 4 0 2 0 1
Switch2 4 3 ga2 0 smodel2
c3 3 0 10p
.option itl1=500 gmin=1e-15 itl4=10 acct
.model smodel1 sw vt=0.5 ron=100
.model smodel2 sw vt=0.5 ron=100
.tran 0.4n 100u
.control
unset ngdebug
set filetype=ascii
rusage
run
rusage all
write noi_test.out v(1)
plot v(2) v(3) xlimit 4u 5u
plot v(ga1) v(ga2) xlimit 4u 5u
linearize
*rms v(1)
fft v(3)
plot mag(v(3)) loglog xlimit 1e4 1e8 ylimit 1e-10 1e-4
setplot tran1
linearize
psd 101 v(3)
plot mag(v(3)) xlimit 0 3e7 ylimit 0 10u
.endc
.end

View File

@ -0,0 +1,56 @@
* standard inverter made noisy
*.subckt inv1 dd ss sub well in out
*vn1 out outi dc 0 noise 0.1 0.3n 1.0 0.1
*mn1 outi in ss sub n1 w=2u l=0.25u AS=3p AD=3p PS=4u PD=4u
*mp1 outi in dd well p1 w=4u l=0.25u AS=7p AD=7p PS=6u PD=6u
*.ends inv1
* standard inverter
.subckt inv1 dd ss sub well in out
mn1 out in ss sub n1 w=2u l=0.25u AS=3p AD=3p PS=4u PD=4u
mp1 out in dd well p1 w=4u l=0.25u AS=7p AD=7p PS=6u PD=6u
.ends inv1
* very noisy inverter (noise on vdd and well)
.subckt inv1_1 dd ss sub well in out
vn1 dd idd dc 0 trnoise 0.05 0.05n 1 0.05
vn2 well iwell dc 0 trnoise 0.05 0.05n 1 0.05
mn1 out in ss sub n1 w=2u l=0.25u AS=3p AD=3p PS=4u PD=4u
mp1 out in idd iwell p1 w=4u l=0.25u AS=7p AD=7p PS=6u PD=6u
*Cout out 0 0.1p
.ends inv1_1
* another very noisy inverter
.subckt inv1_2 dd ss sub well in out
vn1 out outi dc 0 trnoise 0.05 8p 1.0 0.001
mn1 outi in ss sub n1 w=2u l=0.25u AS=3p AD=3p PS=4u PD=4u
mp1 outi in dd well p1 w=4u l=0.25u AS=7p AD=7p PS=6u PD=6u
*Cout out 0 0.1p
.ends inv1_2
* another very noisy inverter with current souces parallel to transistor
.subckt inv13 dd ss sub well in outi
in1 ss outi dc 0 noise 200u 0.05n 1.0 50u
mn1 outi in ss sub n1 w=2u l=0.25u AS=3p AD=3p PS=4u PD=4u
in2 dd outi dc 0 noise 200u 0.05n 1.0 50u
mp1 outi in dd well p1 w=4u l=0.25u AS=7p AD=7p PS=6u PD=6u
*Cout out 0 0.1p
.ends inv13
.subckt inv53 dd ss sub well in out
xinv1 dd ss sub well in 1 inv1
xinv2 dd ss sub well 1 2 inv1
xinv3 dd ss sub well 2 3 inv1
xinv4 dd ss sub well 3 4 inv1
xinv5 dd ss sub well 4 out inv1
.ends inv53
.subckt inv253 dd ss sub well in out
xinv1 dd ss sub well in 1 inv53
xinv2 dd ss sub well 1 2 inv53
xinv3 dd ss sub well 2 3 inv53
xinv4 dd ss sub well 3 4 inv53
xinv5 dd ss sub well 4 out inv53
.ends inv253

View File

@ -0,0 +1,27 @@
* Shot noise test with B source, diode
* voltage on device (diode, forward)
Vdev out 0 DC 0 PULSE(0.4 0.45 10u)
* diode, forward direction, to be modeled with noise
D1 mess 0 DMOD
.model DMOD D IS=1e-14 N=1
X1 0 mess out ishot
* device between 1 and 2
* new output terminals of device including noise: 1 and 3
.subckt ishot 1 2 3
* white noise source with rms 1V
VNG 0 11 DC 0 TRNOISE(1 1n 0 0)
*measure the current i(v1)
V1 2 3 DC 0
* calculate the shot noise
* sqrt(2*current*q*bandwidth)
BI 1 3 I=sqrt(2*abs(i(v1))*1.6e-19*1e7)*v(11)
.ends ishot
* 20000 sample points
.tran 1n 20u
.control
run
plot (-1)*i(vdev)
meas tran vdev_rms avg i(vdev) from=0u to=9.9u
meas tran vdev_rms avg i(vdev) from=10.1u to=20u
.endc
.end

View File

@ -156,6 +156,7 @@ endif
ngspice_LDADD += \
frontend/parser/libparser.la \
frontend/numparam/libnumparam.la \
frontend/trannoise/libtrannoise.la \
spicelib/parser/libinp.la
if CIDER_WANTED
@ -170,6 +171,7 @@ ngspice_LDADD += \
maths/deriv/libderiv.la \
maths/cmaths/libcmaths.la \
maths/misc/libmathmisc.la \
maths/fft/libmathfft.la \
maths/poly/libpoly.la \
maths/ni/libni.la \
maths/sparse/libsparse.la \
@ -208,8 +210,10 @@ ngnutmeg_LDADD += \
frontend/plotting/libplotting.la \
frontend/parser/libparser.la \
frontend/numparam/libnumparam.la \
frontend/trannoise/libtrannoise.la \
maths/cmaths/libcmaths.la \
maths/misc/libmathmisc.la \
maths/fft/libmathfft.la \
maths/poly/libpoly.la \
misc/libmisc.la \
spicelib/parser/libinp.la
@ -384,6 +388,7 @@ libspice_la_LIBADD += \
maths/deriv/libderiv.la \
maths/cmaths/libcmaths.la \
maths/misc/libmathmisc.la \
maths/fft/libmathfft.la \
maths/poly/libpoly.la \
maths/ni/libni.la \
maths/sparse/libsparse.la \

View File

@ -1,8 +1,8 @@
## Process this file with automake to produce Makefile.in
## $Id$
SUBDIRS = plotting help parser wdisp numparam
DIST_SUBDIRS = plotting help parser wdisp numparam
SUBDIRS = plotting help parser wdisp numparam trannoise
DIST_SUBDIRS = plotting help parser wdisp numparam trannoise
EXTRA_DIST = testcommands.c parse-bison.y
## For Windows with Visual Studio
EXTRA_DIST += parse-bison.c parse-bison.h

View File

@ -16,10 +16,10 @@ Author: 2008 Holger Vogt
#include "variable.h"
#include "parse.h"
#include "../misc/misc_time.h"
#include "../maths/fft/fftext.h"
static void fftext(double*, double*, long int, long int, int);
void
com_fft(wordlist *wl)
{
@ -250,6 +250,302 @@ com_fft(wordlist *wl)
tfree(win);
}
void
com_psd(wordlist *wl)
{
ngcomplex_t **fdvec;
double **tdvec;
double *freq, *win, *time, *ave;
double delta_t, span, noipower;
int ngood, mm;
unsigned long fpts, i, j, tlen, jj, smooth, hsmooth;
char *s;
struct dvec *f, *vlist, *lv, *vec;
struct pnode *names, *first_name;
float *reald, *imagd;
int size, sign, isreal;
double scaling, sum;
int order;
double scale, sigma;
if (!plot_cur || !plot_cur->pl_scale) {
fprintf(cp_err, "Error: no vectors loaded.\n");
return;
}
if (!isreal(plot_cur->pl_scale) ||
((plot_cur->pl_scale)->v_type != SV_TIME)) {
fprintf(cp_err, "Error: fft needs real time scale\n");
return;
}
tlen = (plot_cur->pl_scale)->v_length;
time = (plot_cur->pl_scale)->v_realdata;
span = time[tlen-1] - time[0];
delta_t = span/(tlen - 1);
// get filter length from parameter input
s = wl->wl_word;
if (!(ave = ft_numparse(&s, FALSE)) || (*ave < 1.0)) {
fprintf(cp_out, "Number of averaged data points: %d\n", 1);
smooth = 1;
}
else smooth = (int)(*ave);
wl = wl->wl_next;
// size of input vector is power of two and larger than spice vector
size = 1;
mm = 0;
while (size < tlen) {
size <<= 1;
mm++;
}
// output vector has length of size/2
fpts = size>>1;
// window function
win = TMALLOC(double, tlen);
{
char window[BSIZE_SP];
double maxt = time[tlen-1];
if (!cp_getvar("specwindow", CP_STRING, window))
strcpy(window,"blackman");
if (eq(window, "none"))
for(i=0; i<tlen; i++) {
win[i] = 1;
}
else if (eq(window, "rectangular"))
for(i=0; i<tlen; i++) {
if (maxt-time[i] > span) {
win[i] = 0;
} else {
win[i] = 1;
}
}
else if (eq(window, "hanning") || eq(window, "cosine"))
for(i=0; i<tlen; i++) {
if (maxt-time[i] > span) {
win[i] = 0;
} else {
win[i] = 1 - cos(2*M_PI*(time[i]-maxt)/span);
}
}
else if (eq(window, "hamming"))
for(i=0; i<tlen; i++) {
if (maxt-time[i] > span) {
win[i] = 0;
} else {
win[i] = 1 - 0.92/1.08*cos(2*M_PI*(time[i]-maxt)/span);
}
}
else if (eq(window, "triangle") || eq(window, "bartlet"))
for(i=0; i<tlen; i++) {
if (maxt-time[i] > span) {
win[i] = 0;
} else {
win[i] = 2 - fabs(2+4*(time[i]-maxt)/span);
}
}
else if (eq(window, "blackman")) {
int order;
if (!cp_getvar("specwindoworder", CP_NUM, &order)) order = 2;
if (order < 2) order = 2; /* only order 2 supported here */
for(i=0; i<tlen; i++) {
if (maxt-time[i] > span) {
win[i] = 0;
} else {
win[i] = 1;
win[i] -= 0.50/0.42*cos(2*M_PI*(time[i]-maxt)/span);
win[i] += 0.08/0.42*cos(4*M_PI*(time[i]-maxt)/span);
}
}
} else if (eq(window, "gaussian")) {
if (!cp_getvar("specwindoworder", CP_NUM, &order)) order = 2;
if (order < 2) order = 2;
sigma=1.0/order;
scale=0.83/sigma;
for(i=0; i<tlen; i++) {
if (maxt-time[i] > span) {
win[i] = 0;
} else {
win[i] = scale*exp(-0.5*pow((time[i]-maxt/2)/(sigma*maxt/2),2));
}
}
/* int order;
double scale;
extern double erfc(double);
if (!cp_getvar("specwindoworder", CP_NUM, &order)) order = 2;
if (order < 2) order = 2;
scale = pow(2*M_PI/order,0.5)*(0.5-erfc(pow(order,0.5)));
for(i=0; i<tlen; i++) {
if (maxt-time[i] > span) {
win[i] = 0;
} else {
win[i] = exp(-0.5*order*(1-2*(maxt-time[i])/span)
*(1-2*(maxt-time[i])/span))/scale;
}
}
*/
} else {
fprintf(cp_err, "Warning: unknown window type %s\n", window);
tfree(win);
return;
}
}
names = ft_getpnames(wl, TRUE);
first_name = names;
vlist = NULL;
ngood = 0;
while (names) {
vec = ft_evaluate(names);
names = names->pn_next;
while (vec) {
if (vec->v_length != tlen) {
fprintf(cp_err, "Error: lengths of %s vectors don't match: %d, %d\n",
vec->v_name, vec->v_length, tlen);
vec = vec->v_link2;
continue;
}
if (!isreal(vec)) {
fprintf(cp_err, "Error: %s isn't real!\n",
vec->v_name);
vec = vec->v_link2;
continue;
}
if (vec->v_type == SV_TIME) {
vec = vec->v_link2;
continue;
}
if (!vlist)
vlist = vec;
else
lv->v_link2 = vec;
lv = vec;
vec = vec->v_link2;
ngood++;
}
}
free_pnode(first_name);
if (!ngood) {
return;
}
plot_cur = plot_alloc("spectrum");
plot_cur->pl_next = plot_list;
plot_list = plot_cur;
plot_cur->pl_title = copy((plot_cur->pl_next)->pl_title);
plot_cur->pl_name = copy("PSD");
plot_cur->pl_date = copy(datestring( ));
freq = (double *) tmalloc(fpts * sizeof(double));
f = alloc(struct dvec);
ZERO(f, struct dvec);
f->v_name = copy("frequency");
f->v_type = SV_FREQUENCY;
f->v_flags = (VF_REAL | VF_PERMANENT | VF_PRINT);
f->v_length = fpts;
f->v_realdata = freq;
vec_new(f);
for (i = 0; i<fpts; i++) freq[i] = i*1./span*tlen/size;
tdvec = TMALLOC(double*, ngood);
fdvec = TMALLOC(ngcomplex_t*, ngood);
for (i = 0, vec = vlist; i<ngood; i++) {
tdvec[i] = vec->v_realdata; /* real input data */
fdvec[i] = TMALLOC(ngcomplex_t, fpts); /* complex output data */
f = alloc(struct dvec);
ZERO(f, struct dvec);
f->v_name = vec_basename(vec);
f->v_type = SV_NOTYPE; //vec->v_type;
f->v_flags = (VF_COMPLEX | VF_PERMANENT);
f->v_length = fpts;
f->v_compdata = fdvec[i];
vec_new(f);
vec = vec->v_link2;
}
printf("PSD: Time span: %g s, input length: %d, zero padding: %d\n", span, size, size-tlen);
printf("PSD: Freq. resolution: %g Hz, output length: %d\n", 1.0/span*tlen/size, fpts);
sign = 1;
isreal = 1;
reald = TMALLOC(float, size);
imagd = TMALLOC(float, size);
// scale = 0.66;
for (i = 0; i<ngood; i++) {
for (j = 0; j < tlen; j++){
reald[j] = tdvec[i][j]*win[j];
imagd[j] = 0.;
}
for (j = tlen; j < size; j++){
reald[j] = 0.;
imagd[j] = 0.;
}
// Green's FFT
fftInit(mm);
rffts(reald, mm, 1);
fftFree();
scaling = size*0.3;
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
noipower = fdvec[i][0].cx_real = (double)reald[0]*(double)reald[0];
fdvec[i][fpts-1].cx_real = (double)reald[1]*(double)reald[1];
noipower += fdvec[i][fpts-1].cx_real;
for (j=1; j<(fpts - 1); j++){
jj = j<<1;
fdvec[i][j].cx_real = ((double)reald[jj]*(double)reald[jj] + (double)reald[jj + 1]*(double)reald[jj + 1]);
fdvec[i][j].cx_imag = 0;
noipower += fdvec[i][j].cx_real;
}
printf("Total noise power up to Nyquist frequency %5.3e Hz:\n%e V^2 (or A^2), \nnoise voltage or current %e V (or A)\n",
freq[fpts-1],noipower/span*tlen/size/scaling, sqrt(noipower/span*tlen/size/scaling));
/* for (j=0; j<fpts ; j++)
fdvec[i][j].cx_real = sqrt(fdvec[i][j].cx_real)/scaling;
*/
/* smoothing with rectangular window of width "smooth",
plotting V/sqrt(Hz) or I/sqrt(Hz) */
hsmooth = smooth>>1;
for (j=0; j<hsmooth; j++){
sum = 0.;
for (jj = 0; jj < hsmooth + j; jj++)
sum += fdvec[i][jj].cx_real;
sum /= (double)(hsmooth + j);
reald[j] = sqrt(sum)/scaling;
}
for (j=hsmooth; j<fpts-hsmooth; j++){
sum = 0.;
for (jj = 0; jj < smooth; jj++)
sum += fdvec[i][j-hsmooth+jj].cx_real;
sum /= (double)smooth;
reald[j] = sqrt(sum)/scaling;
}
for (j=fpts-hsmooth; j<fpts; j++){
sum = 0.;
for (jj = 0; jj < smooth; jj++)
sum += fdvec[i][j-hsmooth+jj].cx_real;
sum /= (double)(fpts - j + hsmooth - 1);
reald[j] = sqrt(sum)/scaling;
}
for (j=0; j<fpts; j++)
fdvec[i][j].cx_real = reald[j];
}
free(reald);
free(imagd);
tfree(tdvec);
tfree(fdvec);
}
static void fftext(double* x, double* y, long int n, long int nn, int dir)
{

View File

@ -7,5 +7,6 @@
#define FFT_H_INCLUDED
void com_fft(wordlist *wl);
void com_psd(wordlist *wl);
#endif

View File

@ -315,7 +315,11 @@ struct comm spcp_coms[] = {
{ "fft", com_fft, FALSE, TRUE,
{ 0, 0, 0, 0 }, E_DEFHMASK, 1, LOTS,
NULL,
"vector ... : Create a frequency domain plot with FFT." } ,
"vector ... : Create a frequency domain plot with FFT." } ,
{ "psd", com_psd, FALSE, TRUE,
{ 0, 0, 0, 0 }, E_DEFHMASK, 2, LOTS,
NULL,
"vector ... : Create a power spetral density plot with FFT." } ,
{ "fourier", com_fourier, FALSE, TRUE,
{ 0, 040000, 040000, 040000 }, E_DEFHMASK, 1, LOTS,
NULL,
@ -713,7 +717,11 @@ struct comm nutcp_coms[] = {
{ "fft", com_fft, FALSE, TRUE,
{ 0, 0, 0, 0 }, E_DEFHMASK, 1, LOTS,
NULL,
"vector ... : Create a frequency domain plot with FFT." } ,
"vector ... : Create a frequency domain plot with FFT." } ,
{ "psd", com_psd, FALSE, TRUE,
{ 0, 0, 0, 0 }, E_DEFHMASK, 2, LOTS,
NULL,
"vector ... : Create a power spetral density plot with FFT." } ,
{ "fourier", com_fourier, FALSE, TRUE,
{ 0, 040000, 040000, 040000 }, E_DEFHMASK, 1, LOTS,
NULL,

View File

@ -0,0 +1,62 @@
/* Copyright: Holger Vogt, 2008
Generates 1/f noise values according to:
"Discrete simulation of colored noise and stochastic
processes and 1/fa power law noise generation"
Kasdin, N.J.;
Proceedings of the IEEE
Volume 83, Issue 5, May 1995 Page(s):802 - 827
*/
#include <math.h>
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdarg.h> // var. argumente
#include "1-f-code.h"
#include "ngspice.h"
#include "fftext.h"
#include "wallace.h"
void f_alpha(int n_pts, int n_exp, float X[], float Q_d,
float alpha)
{
int i;
float *hfa, *wfa;
float ha;
ha = alpha/2.0f ;
// Q_d = sqrt(Q_d); /* find the deviation of the noise */
hfa = TMALLOC(float,n_pts);
wfa = TMALLOC(float,n_pts);
hfa[0] = 1.0f;
wfa[0] = Q_d * (float)GaussWa;
/* generate the coefficients hk */
for (i=1 ; i < n_pts; i++) {
/* generate the coefficients hk */
hfa[i] = hfa[i-1] * (ha + (float)(i-1)) / ( (float)(i) );
/* fill the sequence wk with white noise */
wfa[i] = Q_d * (float)GaussWa;
}
// for (i=0 ; i < n_pts; i++)
// printf("rnd %e, hk %e\n", wfa[i], hfa[i]);
/* perform the discrete Fourier transform */
fftInit(n_exp);
rffts(hfa, n_exp, 1);
rffts(wfa, n_exp, 1) ;
/* multiply the two complex vectors */
rspectprod(hfa, wfa, X, n_pts);
/* inverse transform */
riffts(X, n_exp, 1);
free(hfa) ;
free(wfa);
/* fft tables will be freed in vsrcaccept.c and isrcaccept.c
fftFree(); */
fprintf(stdout,"%d (2e%d) one over f values created\n", n_pts, n_exp);
}

View File

@ -0,0 +1,61 @@
/* Copyright: Holger Vogt, 2008
Discrete simulation of colored noise and stochastic
processes and 1/fa power law noise generation
Kasdin, N.J.;
Proceedings of the IEEE
Volume 83, Issue 5, May 1995 Page(s):802 - 827
*/
#include <math.h>
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdarg.h> // var. argumente
#include "1-f-code.h"
#include "ngspice.h"
#include "fftext.h"
#include "wallace.h"
void f_alpha(int n_pts, int n_exp, double X[], double Q_d,
double alpha)
{
unsigned int i;
double *hfa, *wfa;
double ha;
ha = alpha/2.0f ;
// Q_d = sqrt(Q_d); /* find the deviation of the noise */
hfa = TMALLOC(double,n_pts);
wfa = TMALLOC(double,n_pts);
hfa[0] = 1.0f;
wfa[0] = Q_d * GaussWa;
/* generate the coefficients hk */
for (i=1 ; i < n_pts; i++) {
/* generate the coefficients hk */
hfa[i] = hfa[i-1] * (ha + (double)(i-1)) / ( (double)(i) );
/* fill the sequence wk with white noise */
wfa[i] = Q_d * GaussWa;
}
// for (i=0 ; i < n_pts; i++)
// printf("rnd %e, hk %e\n", wfa[i], hfa[i]);
/* perform the discrete Fourier transform */
fftInit(n_exp);
rffts(hfa, n_exp, 1);
rffts(wfa, n_exp, 1) ;
/* multiply the two complex vectors */
rspectprod(hfa, wfa, X, n_pts);
/* inverse transform */
riffts(X, n_exp, 1);
free(hfa) ;
free(wfa);
/* fft tables will be freed in vsrcaccept.c and isrcaccept.c
fftFree(); */
fprintf(stdout,"%d (2e%d) one over f values created\n", n_pts, n_exp);
}

View File

@ -0,0 +1,846 @@
/* This is file FastNorm3.c */
/* SUPERCEDES FastNorm.c, FastNorm2.c. Use with FastNorm3.h */
/* 24 June 2003 */
/* A package containing a very fast generator of pseudo-random
Unit NORMAL variates, and some fairly high-quality UNIFORM
generators. It also contains a straightforward implementation of
a ChiSquared and Gamma generator copied from Ahrens and Dieter.
*/
/* Version 3 with double transformations and controllable extension
to repeat the double transformations for higher quality at lower
speed.
Dated 17 May 20003.
Copyright Christopher Stewart Wallace.
*/
/*
%A C. S. Wallace
%T Fast Pseudo-Random Generators for Normal and Exponential Variates.
%J ACM Trans. Math. Software
%V 22
%N 1
%P 119-127
%M MAR
%D 1996
%O TR 94/197, May 1994, Dept. Computer Science, Monash University
%K CSW, CSWallace, Monash, pseudo random number generator, algorithm,
jrnl, TOMS, numbers, normal, probability, distribution, PRNG, RNG, Gaussian,
distribution, jrnl, ACM, TOMS, TR 94 197, TR197, c1996, c199x, c19xx
*/
/* Use of this package requires the file "FastNorm3.h" which must be
#include-ed in any C files using this package.
The main purpose of this package is to provide a very fast source
of pseudo-random variates from the Unit Normal N(0,1) distribution, having
the density function
f(x) = (1/sqrt(2*PI)) * exp (-0.5 * x^2)
Variates are obtained not by calling a function, but by use of a macro
"FastNorm" defined in FastNorm3.h. In a C program, this macro may appear
anywhere a (double) expression could appear, e.g in statements like
z += FastNorm;
if (FastNorm < 1.1) .....
q = fabs (FastNorm); etc.
The revision history, and a reference to the method description, is given
later in this file under the heading "Revision history Fastnorm".
Major sections of this file, such as the revision history and the
major subroutines, are all headed by a line containing a row of minus signs (-)
and the name of the section or subroutine.
The generators included are:
a Uniform source of integers, unsigned integers and doubles.
Chi-sq(N) (based on Ahrens and Dieter)
Gamma(N) (= 0.5 * Chi-sq(2N))
Normal (a very fast routine)
*/
/* ----------------- inclusions and some definitions ------------ */
#include <math.h>
#ifndef NOSPICE
#include "ngspice.h"
#endif
#include "FastNorm3.h"
/* --------------- (Uniform) c7rand, irandm, urandm ---------- */
/*
c A random number generator called as a function by
c c7rand (iseed) or irandm (iseed) or urandm (iseed)
c The parameter should be a pointer to a 2-element Sw vector.
c The first call gives a double uniform in 0 .. 1.
c The second gives an Sw integer uniform in 0 .. 2**31-1
c The third gives an Sw integer with 32 bits, so unif in
c -2**31 .. 2**31-1 if used in 32-bit signed arithmetic.
c All update iseed[] in exactly the same way.
c iseed[] must be a 2-element Sw vector.
c The initial value of iseed[1] may be any 32-bit integer.
c The initial value of iseed[0] may be any 32-bit integer except -1.
c
c The period of the random sequence is 2**32 * (2**32-1)
c Its quality is quite good. It is based on the mixed multiplicative
c congruential (Lehmer) generator
x[n+1] = (69069 * x[n] + odd constant) MOD 2^32
c but avoids most of the well-known defects of this type of generator
c by, in effect, generating x[n+k] from x[n] as defined by the
c sequence above, where k is chosen randomly in 1 ... 128 with the
c help of a subsidiary Tauseworth-type generator.
c For the positve integer generator irandm, the less
c significant digits are more random than is usual for a Lehmer
c generator. The last n<31 digits do not repeat with a period of 2^n.
c This is also true of the unsigned integer generator urandm, but less
c so.
c This is an implementation in C of the algorithm described in
c Technical Report "A Long-Period Pseudo-Random Generator"
c TR89/123, Computer Science, Monash University,
c Clayton, Vic 3168 AUSTRALIA
c by
c
c C.S.Wallace csw@cs.monash.edu.au
c The table mt[0:127] is defined by mt[i] = 69069 ** (128-i)
*/
#define MASK ((Sw) 0x12DD4922)
/* or in decimal, 316492066 */
#define SCALE ((double) 1.0 / (1024.0 * 1024.0 * 1024.0 * 2.0))
/* i.e. 2 to power -31 */
static Sw mt [128] = {
902906369,
2030498053,
-473499623,
1640834941,
723406961,
1993558325,
-257162999,
-1627724755,
913952737,
278845029,
1327502073,
-1261253155,
981676113,
-1785280363,
1700077033,
366908557,
-1514479167,
-682799163,
141955545,
-830150595,
317871153,
1542036469,
-946413879,
-1950779155,
985397153,
626515237,
530871481,
783087261,
-1512358895,
1031357269,
-2007710807,
-1652747955,
-1867214463,
928251525,
1243003801,
-2132510467,
1874683889,
-717013323,
218254473,
-1628774995,
-2064896159,
69678053,
281568889,
-2104168611,
-165128239,
1536495125,
-39650967,
546594317,
-725987007,
1392966981,
1044706649,
687331773,
-2051306575,
1544302965,
-758494647,
-1243934099,
-75073759,
293132965,
-1935153095,
118929437,
807830417,
-1416222507,
-1550074071,
-84903219,
1355292929,
-380482555,
-1818444007,
-204797315,
170442609,
-1636797387,
868931593,
-623503571,
1711722209,
381210981,
-161547783,
-272740131,
-1450066095,
2116588437,
1100682473,
358442893,
-1529216831,
2116152005,
-776333095,
1265240893,
-482278607,
1067190005,
333444553,
86502381,
753481377,
39000101,
1779014585,
219658653,
-920253679,
2029538901,
1207761577,
-1515772851,
-236195711,
442620293,
423166617,
-1763648515,
-398436623,
-1749358155,
-538598519,
-652439379,
430550625,
-1481396507,
2093206905,
-1934691747,
-962631983,
1454463253,
-1877118871,
-291917555,
-1711673279,
201201733,
-474645415,
-96764739,
-1587365199,
1945705589,
1303896393,
1744831853,
381957665,
2135332261,
-55996615,
-1190135011,
1790562961,
-1493191723,
475559465,
69069
};
double c7rand (Sw *is)
{
Sw it, leh;
it = is [0];
leh = is [1];
/* Do a 7-place right cyclic shift of it */
it = ((it >> 7) & 0x01FFFFFF) + ((it & 0x7F) << 25);
if (!(it & 0x80000000)) it = it ^ MASK;
leh = (leh * mt[it & 127] + it) & 0xFFFFFFFF;
is [0] = it; is [1] = leh;
if (leh & 0x80000000) leh = leh ^ 0xFFFFFFFF;
return (SCALE * leh);
}
Sw irandm (Sw *is)
{
Sw it, leh;
it = is [0];
leh = is [1];
/* Do a 7-place right cyclic shift of it */
it = ((it >> 7) & 0x01FFFFFF) + ((it & 0x7F) << 25);
if (!(it & 0x80000000)) it = it ^ MASK;
leh = (leh * mt[it & 127] + it) & 0xFFFFFFFF;
is [0] = it; is [1] = leh;
if (leh & 0x80000000) leh = leh ^ 0xFFFFFFFF;
return (leh);
}
unsigned int urandm (Sw *is)
{
Sw it, leh;
it = is [0];
leh = is [1];
/* Do a 7-place right cyclic shift of it */
it = ((it >> 7) & 0x01FFFFFF) + ((it & 0x7F) << 25);
if (!(it & 0x80000000)) it = it ^ MASK;
leh = (leh * mt[it & 127] + it) & 0xFFFFFFFF;
is [0] = it; is [1] = leh;
return (leh);
}
/* --------------- (Chi-squared) adchi ----------------------- */
/* Simple implementation of Ahrens and Dieter method for a chi-sq
random variate of order a >> 1. Uses c7rand, maths library */
/* 13 July 1998 */
/* Slightly faster if 'a' is the same as on previous call */
/* This routine is no longer used in the fastnorm code, but is included
because it may be useful */
static double gorder, gm, rt2gm, aold;
double adchi (double a, int *is)
{
double x, y, z, sq;
if (a != aold) {
aold = a; gorder = 0.5 * a;
gm = gorder - 1.0;
rt2gm = sqrt (aold - 1.0);
}
polar:
x = 2.0 * c7rand(is) - 1.0; z = c7rand(is);
sq = x*x + z*z;
if ((sq > 1.0) || (sq < 0.25)) goto polar;
y = x / z;
x = rt2gm * y + gm;
if (x < 0.0) goto polar;
z = (1.0 + y*y) * exp (gm * log(x/gm) - rt2gm * y);
if (c7rand(is) > z) goto polar;
return (2.0 * x);
}
/* -------------------- (Gamma) rgamma (g, is) ----------- */
double rgamma (double g, int *is)
{
double x, y, z, sq;
if (g != gorder) {
gorder = g;
gm = gorder - 1.0; aold = 2.0 * gorder;
rt2gm = sqrt (aold - 1.0);
}
polar:
x = 2.0 * c7rand(is) - 1.0; z = c7rand(is);
sq = x*x + z*z;
if ((sq > 1.0) || (sq < 0.25)) goto polar;
y = x / z;
x = rt2gm * y + gm;
if (x < 0.0) goto polar;
z = (1.0 + y*y) * exp (gm * log(x/gm) - rt2gm * y);
if (c7rand(is) > z) goto polar;
return (x);
}
/* ------------------ Revision history Fastnorm ------------- */
/* Items in this revision history appear in chronological order,
so the most recent revsion appears last.
Revision items are separated by a line of '+' characters.
++++++++++++++++++++++++++++++++++++++++++++++++++++++
This is a revised version of the algorithm decribed in
ACM Transactions on Mathematical Software, Vol 22, No 1
March 1996, pp 119-127.
A fast generator of pseudo-random variates from the unit Normal
distribution. It keeps a pool of about 1000 variates, and generates new
ones by picking 4 from the pool, rotating the 4-vector with these as its
components, and replacing the old variates with the components of the
rotated vector.
The program should initialize the generator by calling initnorm(seed)
with seed a Sw integer seed value. Different seed values will give
different sequences of Normals. Seed may be any 32-bit integer.
BUT SEE REVISION of 17 May 2003 for initnorm() parameters.
The revised initnorm requires two integer parameters, iseed and
quoll, the latter specifying a tradeoff between speed and
quality.
Then, wherever the program needs a new Normal variate, it should
use the macro FastNorm, e.g. in statements like:
x = FastNorm; (Sets x to a random Normal value)
or
x += a + FastNorm * b; (Adds a normal with mean a, SD b, to x)
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Changed basic formula, which was:
t = (p+q+r+s)*0.5; p = p-t; q = t-q; r = t-r; s = t-s;
This gives sum of new p+q+r+s = 2p(old) which may not be a great
choice. The new version is:
t = (p+q+r+s)*0.5; p = p-t; q = q-t; r = t-r; s = t-s;
which gives new p+q+r+s = p+q-r-s (old) which may be better.
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Revision 14 November 1998
The older version "FastNorm" which was available via ftp was found
to have a defect which could affect some applications.
Dr Christine Rueb, (Max Planck Institut fur Infomatik,
Im Stadtwald W 66123 Saabrucken, F.G.R.,
(rueb@mpi-sb.mpg.de)
found that if a large number N of consecutive variates were summed to give
a variate S with nominally N(0,N) distribution, the variance of S was in some
cases too small. The effect was noticed with N=400, and was particularly strong
for N=1023 if the first several (about 128) variates from FastNorm were
discarded. Dr. Rueb traced the effect to an unexpected tendency of FastNorm
to concentrate values with an anomolous correlation into the first 128
elements of the variate pool.
With the help of her analysis, the algorithm has been revised in a
way which appears to overcome the problem, at the cost of about a 19%
reduction in speed (which still leaves the method very fast.)
IT MUST BE RECOGNISED THAT THIS ALGORITHM IS NOVEL
AND WHILE IT PASSES A NUMBER OF STANDARD TESTS FOR DISTRIBUTIONAL
FORM, LACK OF SERIAL CORRELATION ETC., IT MAY STILL HAVE DEFECTS.
RECALL THE NUMBER OF YEARS WHICH IT TOOK FOR THE LIMITATIONS OF
THE LEHMER GENERATOR FOR UNIFORM VARIATES TO BECOME APPARENT !!!
UNTIL MORE EXPERIENCE IS GAINED WITH THIS TYPE OF GENERATOR, IT
WOULD BE WISE IN ANY CRITICAL APPLICATION TO COMPARE RESULTS
OBTAINED USING IT WITH RESULTS OBTAINED USING A "STANDARD" FORM
OF GENERATOR OF NORMAL VARIATES COUPLED WITH A WELL-DOCUMENTED
GENERATOR OF UNIFORM VARIATES.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Revision 1 April 2003.
Trying a scanning process proposed by R.P.Brent. It needs 2 pool
vectors, as it cannot update in-situ, but may be more robust.
It is a bit slower on a 133 Mhz PC but just as fast on a newer PC
(moggie) at about 16 ns per call in the 'speed.c' test.
The extreme-value defect is the same on old and new versions.
If one finds a value 'A' such that a batch of B genuine Normal variates has
probability 0.2 of containing a variate with abolute value greater than A,
then the probability that both of two consecive batches of B will contain
such a value should be 0.2 times 0.2, or 0.04. Instead, both versions give
the extreme value prob. as 0.200 (over a million batches) but give the
consective-pair prob as 0.050 for batch size B = 1024.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Revision 17 May 2003.
The fundamental defect of the method, namely an inadequate 'mixing'
of squared value ('energy') between one generation of the pool and the next,
cannot readily be removed. In going from one pool to the next, the energy
in an old variate is shared among just 4 variates in the new pool. Hence it
takes many generations before the energy of some original variate can be
distributed across the whole pool. The number of generations needed cannot
be less than the log to base 4 of the pool size, or 5 for a pool size of
1024. In fact, the pseudo-random indexing of the pool means that rather
more generations are needed on average.
The defect is readily revealed by the following test. One picks a
"batch size" comparable to the pool size, say 500 or 1000. One then
computes a value A such that a batch will with probability 0.2 contain one
or more variates with absolute value exceeding A.
One then draws batches from FastNorm,
and tests each batch to see if it contains such an extreme value.
Over many batches, one counts the frequency of such 'extreme' batches,
and finds (with FastNorm2) that it is indeed about 0.2. However, when one counts
the frequency with which succesive batches are both extreme, one finds it to
be higher than the proper value (0.2)^2 = 0.04. For batch sizes round the pool
size, it can be as high as 0.05. That is, although the frequncy of extreme
values is about right, their occurrence in the stream is correlated over a
scale of the order of the pool size.
The same correlation effect is seen in the average 4th moment of
successive batches.
Since this inter-generational correlation cannot be avoided, the
this revision seeks to reduce it by performing at least two simple
rotations of the pool at each generation. Obviously, some speed is lost,
but the correlations are reduced.
To allow the user to trade off speed and quality, the initialization
function initnorm() now provides a QUALITY parameter 'quoll' which controls
how many double-rotations are done for each generation.
See the comments in initnorm() for more detail.
++++++++++ End of revision notes +++++++++ */
/* ----------------- Some test results ------------------------ */
/*
General form:
Some simple tests were conducted by transforming FastNorm variates
in several ways to yield a variable nominally uniformly distributed in 0 ... 1.
Uniformity of the derived variate was then tested by a ChiSquared test on a
100-cell histogram with cell counts around 10000. These tests are crude, but
showed no untoward results on the present version.
Transformations included:
y = 0.5 * (1.0 + erf (n1 / sqrt(2))
y = 0.5 * (n1 / (n1^2 + n2^2 + n3^2) - 1)
y = exp (-0.5 * (n1^2 + n2^2))
y = (n1^2 + n2^2) / (n1^2 + n2^2 + n3^2 + n4^2)
where n1, n2 etc are successive Normal variates.
It may be noted that some of these are sensitive to serial correlation if
present.
Fourth moment of batches:
Extensive tests for correlation among the fourth moments of successive
batches of variates were made, with batch sizes comparabe to or (worst case)
equal to the size of the variate pool (4096 in this revision).
With 'quality' 1, significant correlation appears after 10^6 batches
of worst-case size.
With quality 2, no significant correlation is evident after 10^7
batches. A just-significant correlation appears after 3.6*10^7 batches.
As this requires some 1.4*10^11 deviates to be drawn, it may be irrelevent
for many applications. The observed correlation coefficent was 0.0008.
With quality 3, results are OK after 10^8 batches, or more than
4*10^11 variates.
No tests have been done with quality 4 as yet.
Speed:
Speed tests were done on a PC running RedHat Linux, using "-O"
compiler optimization. The test loop was
for (i = 0; i < 500000000; i++) {
a += FastNorm; a -= FastNorm;
}
Thus the test makes 10^9 uses of FastNorm. The time taken, (which
includes time for a call in 'initnorm' and the loop overhead) depends on
the 'quality' set by initnorm.
Quality 1: 21.5 sec
Quality 2: 32.1 sec
Quality 3: 42.5 sec
Quality 4: 53.1 sec
By way of comparison, the same 10^9 call loop was timed with the Unix library
"random()" routine substituted for FastNorm, and the variable 'a' defined as
integer rather than double. Also, since most use of a Uniform generator such
as "random()" requires that the returned integer be scaled into a floating-
point number in 0 ... 1, the timing was repeated with
"a += random" ('a' integer) replaced by "a += Scale*random()" where
'a' is double and Scale = 2^(-31). The times obtained were:
Random (integer): 44.1 sec
Random (double) : 47.7 sec
It can be seen that FastNorm (even at quality 3) is faster than a
commonly-used Uniform generator. To some extent, this result may vary on
different computers and compilers. Since FastNorm (at least for qualities
above 1) no doubt does more arithmetic per variate than "random()", much of
its speed advantage must come from its replacement of a function call per
variate by a macro which makes only one function call every 4095 variates.
Computers with lower 'call' overheads than the PC used here might show
differnt results.
Incidently, the Uniform generator 'c7rand()' included in this
package, which returns a double uniform in 0 ... 1, and is of fairly high
quality, gives a time in the same test of 36.8 sec, a little faster than
'random()'.
*/
/* ----------------- globals ------------------------- */
/* A pool must have a length which is a multiple of 4.
* During regeneration of a new pool, the pool is treated as 4
* consecutive vectors, each of length VL.
*/
#define VE 10
#define VL (1 << VE)
#define VM (VL-1)
#define WL (4*VL)
#define WM (WL-1)
Sw gaussfaze;
Sf *gausssave;
Sf GScale;
/* GScale,fastnorm,gaussfaze, -save must be visible to callers*/
static Sf chic1, chic2; /* Constants used in getting ChiSq_WL */
Sw gslew; /* Counts generations */
static Sw qual; /* Sets number of double transforms per generation. */
static Sw c7g [2]; /* seed values for c7rand */
Sf wk1 [WL], wk2 [WL]; /* Pools of variates. */
/* ------------------ regen ---------------------- */
/* Takes variates from wk1[], transforms to wk[2], then back to wk1[].
*/
void regen ()
{
Sw i, j, k, m;
Sf p, q, r, s, t;
Sw topv[6], ord[4], *top;
Sf *ppt[4], *ptn;
/* Choose 4 random start points in the wk1[] vector
I want them all different. */
top = topv + 1;
/* Set limiting values in top[-1], top[4] */
top[-1] = VL; top[4] = 0;
reran1:
m = irandm (c7g); /* positive 32-bit random */
/* Extract two VE-sized randoms from m, which has 31 useable digits */
m = m >> (31 - 2*VE);
top[0] = m & VM; m = m >> VE; top[1] = m & VM;
m = irandm (c7g); /* positive 32-bit random */
/* Extract two VE-sized randoms from m, which has 31 useable digits */
m = m >> (31 - 2*VE);
top[2] = m & VM; m = m >> VE; top[3] = m & VM;
for (i = 0; i < 4; i++) ord[i] = i;
/* Sort in decreasing size */
for (i = 2; i >= 0; i--) {
for (j = 0; j <= i; j++) {
if (top[j] < top[j+1]) {
k = top[j]; top[j] = top[j+1];
top[j+1] = k;
k = ord[j]; ord[j] = ord[j+1];
ord[j+1] = k;
}
}
}
/* Ensure all different */
for (i = 0; i < 3; i++) { if (top[i] == top[i+1]) goto reran1; }
/* Set pt pointers to their start values for the first chunk. */
for (i = 0; i < 4; i++) {
j = ord[i];
ppt[j] = wk2 + j * VL + top[i];
}
/* Set ptn to point into wk1 */
ptn = wk1;
/* Now ready to do five chunks. The length of chunk i is
top[i-1] - top[i] (I hope)
At the end of chunk i, pointer ord[i] should have reached the end
of its part, and need to be wrapped down to the start of its part.
*/
i = 0;
chunk:
j = top[i] - top[i-1]; /* Minus the chunk length */
for (; j < 0; j++) {
p = *ptn++; s = *ptn++; q = *ptn++; r = *ptn++;
t = (p + q + r + s) * 0.5;
*ppt[0]++ = t - p;
*ppt[1]++ = t - q;
*ppt[2]++ = r - t;
*ppt[3]++ = s - t;
}
/* This should end the chunk. See if all done */
if (i == 4) goto passdone;
/* The pointer for part ord[i] should have passed its end */
j = ord[i];
#ifdef dddd
printf ("Chunk %1d done. Ptr %1d now %4d\n", i, j, ppt[j]-wk2);
#endif
ppt[j] -= VL;
i++;
goto chunk;
passdone:
/* wk1[] values have been transformed and placed in wk2[]
Transform from wk2 to wk1 with a simple shuffle */
m = (irandm (c7g) >> (29 - VE)) & WM;
j = 0;
for (i = 0; i < 4; i++) ppt[i] = wk1 + i * VL;
for (i = 0; i < VL; i++) {
p = wk2[j^m]; j++;
s = wk2[j^m]; j++;
q = wk2[j^m]; j++;
r = wk2[j^m]; j++;
t = (p + q + r + s) * 0.5;
*ppt[0]++ = t - p;
*ppt[1]++ = q - t;
*ppt[2]++ = t - r;
*ppt[3]++ = s - t;
}
/* We have a new lot of variates in wk1 */
return;
}
/* ------------------- renormalize --------------------------- */
/* Rescales wk1[] so sum of squares = WL */
/* Returns the original sum-of-squares */
Sf renormalize (void)
{
Sf ts, vv;
Sw i;
ts = 0.0;
for (i = 0; i < WL; i++) {
ts += wk1[i] * wk1[i];
}
vv = sqrt (WL / ts);
for (i = 0; i < WL; i++) wk1[i] *= vv;
return (ts);
}
/* ------------------------ BoxMuller ---------------------- */
/* Fills block gvec of length ll with proper normals */
void boxmuller (Sf *gvec, Sw ll)
{
Sw i;
Sf tx, ty, tr, tz;
/* Here, replace the whole pool with conventional Normal variates */
i = 0;
nextpair:
tx = 2.0 * c7rand(c7g) - 1.0; /* Uniform in -1..1 */
ty = 2.0 * c7rand(c7g) - 1.0; /* Uniform in -1..1 */
tr = tx * tx + ty * ty;
if ((tr > 1.0) || (tr < 0.25)) goto nextpair;
tz = -2.0 * log (c7rand(c7g)); /* Sum of squares */
tz = sqrt ( tz / tr );
gvec [i++] = tx * tz; gvec [i++] = ty * tz;
if (i < ll) goto nextpair;
/* Horrid, but good enough */
return;
}
/* ------------------------- initnorm ---------------------- */
/* To initialize, given a seed integer and a quality level.
The seed can be any integer. The quality level quoll should be
between 1 and 4. Quoll = 1 gives high speed, but leaves some
correlation between the 4th moments of successive batches of values.
Higher values of quoll give lower speed but less correlation.
If called with quoll = 0, initnorm performs a check that the
most crucial routine (regen) is performing correctly. In this
case, the value of 'iseed' is ignored. Initnorm will report the
results of the test, which compares pool values with check17 and
check98, which are defined below.
When a check call is made, a proper call on initnorm must then
be made before using the FastNorm macro. A check call does not
properly initialize the routines even if it succeeds.
*/
static Sf check17 = 0.1255789;
static Sf check98 = -0.7113293;
void initnorm (Sw seed, Sw quoll)
{
Sw i;
/* At one stage, we need to generate a random variable Z such that
(WL * Z*Z) has a Chi-squared-WL density. Now, a var with
an approximate Chi-sq-K distn can be got as
(A + B*n)**2 where n has unit Normal distn,
A**2 = K * sqrt (1 - 1/K), A**2 + B**2 = K. (For large K)
So we form Z as (1/sqrt(WL)) * (A + B*n)
or chic1 + chic2 * n where
chic1 = A / sqrt(WL), chic2 = B / sqrt(WL).
Hence
chic1 = sqrt (A*A / WL) = sqrt ( sqrt (1 - 1/WL)),
chic2 = sqrt (1 - chic1*chic1)
*/
chic1 = sqrt ( sqrt (1.0 - 1.0 / WL));
chic2 = sqrt (1.0 - chic1 * chic1);
/* Set regen counter "gslew" which will affect renormalizations.
Since pools are OK already, we wont't need to renorm for a
while */
gslew = 1;
/* Finally, set "gaussfaze" to return all of wk1
* except the last entry at WL-1 */
gaussfaze = WL-1;
gausssave = wk1;
/* If quoll = 0, do a check on installation */
if (quoll == 0) goto docheck;
qual = quoll;
/* Check sensible values for quoll, say 1 to 4 */
if ((quoll < 0) || (quoll > 4)) {
printf ("From initnorm(): quoll parameter %d out of\
range 1 to 4\n", quoll);
return;
}
c7g[0] = seed; c7g[1] = -3337792;
/* Fill wk1[] with good normals */
boxmuller (wk1, WL);
/* Scale so sum-of-squares = WL */
GScale = sqrt (renormalize () / WL);
/* We have set
GScale to restore the original ChiSq_WL sum-of-squares */
return;
docheck:
/* Set a simple pattern in wk1[] and test results of regen */
for (i = 0; i < WL; i++) wk1[i] = wk2[i] = 0.0;
wk1[0] = sqrt ((double) WL);
c7g[0] = 1234567; c7g[1] = 9876543;
for (i = 0; i < 60; i++) regen();
/* Check a couple of values */
if ((fabs (wk1[17] - check17) > 0.00001) ||
(fabs (wk1[98] - check98) > 0.00001)) {
printf ("\nInitnorm check failed.\n");
printf ("Expected %8.5f got %10.7f\n", check17, wk1[17]);
printf ("Expected %8.5f got %10.7f\n", check98, wk1[98]);
}
else printf ("\nInitnorm check OK\n");
return;
}
/* ---------------------- fastnorm -------------------------- */
/* If gslew shows time is ripe, renormalizes the pool
fastnorm() returns the value GScale*gausssave[0].
*/
Sf fastnorm ()
{
Sf sos;
Sw n1;
if (! (gslew & 0xFFFF)) {
sos = renormalize ();
}
/* The last entry of gausssave, at WL-1, will not have been used.
Use it to get an approx. to sqrt (ChiSq_WL / WL).
See initnorm() code for details */
GScale = chic1 + chic2 * GScale * gausssave [WL-1];
for (n1 = 0; n1 < qual; n1++) regen ();
gslew++;
gaussfaze = WL - 1;
return (GScale * gausssave [0]);
}
/* --------------------- (test) main ------------------------- */
#ifdef Main
#include "FastNorm3.h"
int main()
{
Sf x; Sw i;
initnorm (0, 0);
initnorm (77, 2);
printf ("SoS %20.6f\n", renormalize());
// for (i = 0; i < 2000000; i++) x = FastNorm;
for (i = 0; i < 200; i++) {
x = FastNorm;
printf("%d\t%f\n", i, x);
}
printf ("SoS %20.6f\n", renormalize());
exit (1);
}
#endif

View File

@ -0,0 +1,10 @@
noinst_LTLIBRARIES = libtrannoise.la
libtrannoise_la_SOURCES = \
FastNorm3.c \
1-f-code.c \
wallace.c
AM_CPPFLAGS = -I$(top_srcdir)/src/include -I$(top_srcdir)/src/frontend
MAINTAINERCLEANFILES = Makefile.in

View File

@ -0,0 +1,532 @@
/* Wallace generator for normally distributed random variates
Copyright: Holger Vogt, 2008
*/
//#define FASTNORM_ORIG
#include <stdio.h>
#include <stdlib.h>
#ifdef _MSC_VER
#include <process.h>
#define getpid _getpid
#else
#include <unistd.h>
#endif
#include <math.h>
#include "wallace.h"
#include "FastNorm3.h"
#ifdef HasMain
#include <sys/timeb.h>
#else
#ifndef NOSPICE
#include "ngspice.h"
#endif
#endif
#define POOLSIZE 4096
#define LPOOLSIZE 12
#define NOTRANS 3 /* number of (dual) transformations */
#define VE 10
#define VL (1 << VE)
#define VM (VL-1)
#define WL (4*VL)
#define WM (WL-1)
double *outgauss; /* output vector for user access */
unsigned int variate_used; /* actual index of variate called by user */
double ScaleGauss;
static double *pool1;
static double *pool2;
static unsigned int *addrif, *addrib;
static unsigned n = POOLSIZE;
static double chi1, chi2; /* chi^2 correction values */
static unsigned int newpools;
extern double drand(void);
extern unsigned int CombLCGTausInt(void);
extern void TausSeed(void);
extern unsigned int CombLCGTausInt2(void);
void PolarGauss(double* py1, double* py2)
{
double x1, x2, w;
do {
x1 = drand();
x2 = drand();
w = x1 * x1 + x2 * x2;
} while (( w > 1.0 ) || ( w < 0.25));
w = sqrt( (-2.0 * log( w ) ) / w );
*py1 = (double)(x1 * w);
*py2 = (double)(x2 * w);
}
void initw(void)
{
unsigned i;
double totsqr, nomsqr;
unsigned long int coa, cob, s;
/* initialize the uniform generator */
srand(getpid());
// srand(17);
TausSeed();
ScaleGauss = 1.;
newpools = 1;
/* set up the two pools */
pool1 = TMALLOC(double, n); //(double*)malloc(n * sizeof(double));
pool2 = TMALLOC(double, n); //(double*)malloc(n * sizeof(double));
addrif = TMALLOC(unsigned int, (n + NOTRANS)); //(unsigned int*)malloc((n + NOTRANS) * sizeof(unsigned int));
addrib = TMALLOC(unsigned int, (n + NOTRANS)); //(unsigned int*)malloc((n + NOTRANS) * sizeof(unsigned int));
/* fill the first pool with normally distributed values */
PolarGauss(&pool1[0], &pool1[1]);
for (i = 1; i < n>>1; i++) {
PolarGauss(&pool1[i<<1], &pool1[(i<<1) + 1]);
}
/* normalize pool content */
/* totsqr = totsum = 0.0;
for (i = 0; i < n; i++) {
totsqr += pool1[i] * pool1[i];
totsum += pool1[i];
}
totsum = totsum/n;
for (i = 0; i < n; i++) {
totsqr += (pool1[i] - totsum) * (pool1[i] - totsum);
}
nomsqr = sqrt(n / totsqr);
for (i = 0; i < n; i++)
pool1[i] = (pool1[i] - totsum) * nomsqr;
*/
totsqr = 0.0;
for (i = 0; i < n; i++)
totsqr += pool1[i] * pool1[i];
nomsqr = sqrt(n / totsqr);
for (i = 0; i < n; i++)
pool1[i] *= nomsqr;
/* calculate ch^2 value */
chi1 = sqrt ( sqrt (1.0 - 1.0/n));
chi2 = sqrt ( 1.0 - chi1*chi1);
/* first scaling, based on unused pool1[n-2] */
ScaleGauss = chi1 + chi2 * ScaleGauss * pool1[n-2];
/* access to first pool */
outgauss = pool1;
/* set data counter, we return n-2 values here */
variate_used = n - 2;
/* generate random reading addresses using a LCG */
s = 0;
coa = 241;
cob = 59;
for (i=0; i < (n + NOTRANS); i++) {
// addrif[i] = s = (s * coa + cob) % ( n );
coa = CombLCGTausInt();
addrif[i] = coa >> (32 - LPOOLSIZE);
// printf ("Random add:\t%ld\n" , s);
}
s = 0;
coa = 193;
cob = 15;
for (i=0; i < (n + NOTRANS); i++) {
// addrib[i] = s = (s * coa + cob) % ( n );
coa = CombLCGTausInt();
addrib[i] = coa >> (32 - LPOOLSIZE);
// printf ("Random add:\t%ld\n" , addrib[i]);
}
// printf("norm for orig. Gauss: %e, chi^2 scale: %e\n", nomsqr, ScaleGauss);
// NewWa();
}
/* original FastNorm3.c code */
#ifdef FASTNORM_ORIG
float NewWa ()
{
int i, j, k, m;
float p, q, r, s, t;
int topv[6], ord[4], *top;
float *ppt[4], *ptn;
float nulval, endval;
float totsqr, nomsqr;
nulval = ScaleGauss * pool1[0];
endval = pool1[n-1];
/* Choose 4 random start points in the wk1[] vector
I want them all different. */
top = topv + 1;
/* Set limiting values in top[-1], top[4] */
top[-1] = VL; top[4] = 0;
reran1:
m = CombLCGTausInt(); /* positive 32-bit random */
/* Extract two VE-sized randoms from m, which has 31 useable digits */
m = m >> (31 - 2*VE);
top[0] = m & VM; m = m >> VE; top[1] = m & VM;
m = CombLCGTausInt(); /* positive 32-bit random */
/* Extract two VE-sized randoms from m, which has 31 useable digits */
m = m >> (31 - 2*VE);
top[2] = m & VM; m = m >> VE; top[3] = m & VM;
for (i = 0; i < 4; i++) ord[i] = i;
/* Sort in decreasing size */
for (i = 2; i >= 0; i--) {
for (j = 0; j <= i; j++) {
if (top[j] < top[j+1]) {
k = top[j]; top[j] = top[j+1];
top[j+1] = k;
k = ord[j]; ord[j] = ord[j+1];
ord[j+1] = k;
}
}
}
/* Ensure all different */
for (i = 0; i < 3; i++) { if (top[i] == top[i+1]) goto reran1; }
/* Set pt pointers to their start values for the first chunk. */
for (i = 0; i < 4; i++) {
j = ord[i];
ppt[j] = pool2 + j * VL + top[i];
}
/* Set ptn to point into wk1 */
ptn = pool1;
/* Now ready to do five chunks. The length of chunk i is
top[i-1] - top[i] (I hope)
At the end of chunk i, pointer ord[i] should have reached the end
of its part, and need to be wrapped down to the start of its part.
*/
i = 0;
chunk:
j = top[i] - top[i-1]; /* Minus the chunk length */
for (; j < 0; j++) {
p = *ptn++; s = *ptn++; q = *ptn++; r = *ptn++;
t = (p + q + r + s) * 0.5;
*ppt[0]++ = t - p;
*ppt[1]++ = t - q;
*ppt[2]++ = r - t;
*ppt[3]++ = s - t;
}
/* This should end the chunk. See if all done */
if (i == 4) goto passdone;
/* The pointer for part ord[i] should have passed its end */
j = ord[i];
#ifdef dddd
printf ("Chunk %1d done. Ptr %1d now %4d\n", i, j, ppt[j]-pool2);
#endif
ppt[j] -= VL;
i++;
goto chunk;
passdone:
/* wk1[] values have been transformed and placed in wk2[]
Transform from wk2 to wk1 with a simple shuffle */
m = (CombLCGTausInt2() >> (29 - VE)) & WM;
j = 0;
for (i = 0; i < 4; i++) ppt[i] = pool1 + i * VL;
for (i = 0; i < VL; i++) {
p = pool2[j^m]; j++;
s = pool2[j^m]; j++;
q = pool2[j^m]; j++;
r = pool2[j^m]; j++;
t = (p + q + r + s) * 0.5;
*ppt[0]++ = t - p;
*ppt[1]++ = q - t;
*ppt[2]++ = t - r;
*ppt[3]++ = s - t;
}
/* renormalize again if number of pools beyond limit */
if (!(newpools & 0xFFFF)) {
totsqr = 0.0;
for (i = 0; i < n; i++)
totsqr += pool1[i] * pool1[i];
nomsqr = sqrt(n / totsqr);
for (i = 0; i < n; i++)
pool1[i] *= nomsqr;
}
outgauss = pool1;
/* reset data counter */
variate_used = n - 1;
/* set counter counting nomber of pools made */
newpools++;
/* new scale factor using ch^2 correction,
using pool1[n-1] from last pool */
ScaleGauss = chi1 + chi2 * ScaleGauss * endval;
// printf("Pool number: %d, chi^2 scale: %e\n", newpools, ScaleGauss);
return nulval; /* use old scale */
}
#else
/* Simplified code according to an algorithm published by C. S. Wallace:
"Fast Pseudorandom Generators for Normal and Exponential Variates",
ACM Transactions on Mathmatical Software, Vol. 22, No. 1, March 1996, pp. 119-127.
Transform pool1 to pool2 and back to pool1 NOTRANS times
by orthogonal 4 x 4 Hadamard-Matrix.
Mixing of values is very important: Any value in the pool should contribute to
every value in the new pools, at least after several passes (number of passes
is set by NOTRANS to 2 or 3).
4 values are read in a continuous sequence from the total of POOLSIZE values.
Values are stored in steps modulo POOLSIZE/4.
During backward transformation the values are shuffled by a random number jj.
*/
double NewWa(void)
{
double nulval, endval;
double bl1, bl2, bl3, bl4; /* the four values to be transformed */
double bsum;
double totsqr, nomsqr;
unsigned int i, j, jj, m, mm, mmm;
nulval = ScaleGauss * pool1[0];
endval = pool1[n-1];
m = n >> 2;
// printf("New pool after next value\n");
/* generate new pool by transformation
Transformation is repeated NOTRANS times */
for (i=0; i < NOTRANS; i++) {
mm = m << 1;
mmm = mm + m;
/* forward transformation */
// for (j=0; j < n; j += 4) {
for (j=0; j < m; j++) {
bl1 = pool1[j];
bl2 = pool1[j+m];
bl3 = pool1[j+mm];
bl4 = pool1[j+mmm];
/* Hadamard-Matrix */
bsum = (bl1 + bl2 + bl3 + bl4) * 0.5f;
jj = j<<2;
pool2[jj] = bl1 - bsum;
pool2[jj+1] = bl2 - bsum;
pool2[jj+2] = bsum - bl3;
pool2[jj+3] = bsum - bl4;
}
/* backward transformation */
jj = (CombLCGTausInt2() >> (31 - LPOOLSIZE)) & (n - 1);
for (j=0; j < m; j++) {
bl1 = pool2[j^jj];
bl2 = pool2[(j+m)^jj];
bl3 = pool2[(j+mm)^jj];
bl4 = pool2[(j+mmm)^jj];
/* Hadamard-Matrix */
bsum = (bl1 + bl2 + bl3 + bl4) * 0.5f;
jj = j<<2;
pool1[jj] = bl1 - bsum;
pool1[jj+1] = bl2 - bsum;
pool1[jj+2] = bsum - bl3;
pool1[jj+3] = bsum - bl4;
}
}
/* renormalize again if number of pools beyond limit */
if (!(newpools & 0xFFFF)) {
totsqr = 0.0;
for (i = 0; i < n; i++)
totsqr += pool1[i] * pool1[i];
nomsqr = sqrt(n / totsqr);
for (i = 0; i < n; i++)
pool1[i] *= nomsqr;
}
outgauss = pool1;
/* reset data counter */
variate_used = n - 1;
/* set counter counting nomber of pools made */
newpools++;
/* new scale factor using ch^2 correction,
using pool1[n-1] from previous pool */
ScaleGauss = chi1 + chi2 * ScaleGauss * endval;
// printf("Pool number: %d, chi^2 scale: %e\n", newpools, ScaleGauss);
return nulval; /* use old scale */
// return pool1[0]; /* use new scale */
}
#endif
#ifdef FASTNORMTEST
float NewWa_not(void)
{
float nulval, endval;
float bl1, bl2, bl3, bl4; /* the four values to be transformed */
float bsum;
float totsqr, nomsqr;
unsigned int i, j, jj;
nulval = ScaleGauss * pool1[0];
endval = pool1[n-1];
// printf("New pool after next value\n");
/* generate new pool by transformation
Transformation is repeated NOTRANS times */
for (i=0; i < NOTRANS; i++) {
/* forward transformation */
for (j=0; j < n; j += 4) {
jj = j + i;
bl1 = pool1[addrif[jj]];
bl2 = pool1[addrif[jj+1]];
bl3 = pool1[addrif[jj+2]];
bl4 = pool1[addrif[jj+3]];
/* s = (s*coa + cob) & (n - 1);
bl1 = pool1[s];
s = (s*coa + cob) & (n - 1);
bl2 = pool1[s + 1];
s = (s*coa + cob) & (n - 1);
bl3 = pool1[s + 2];
s = (s*coa + cob) & (n - 1);
bl4 = pool1[s + 3]; */
/* jj = j + i;
bl1 = pool1[addrif[jj]];
bl2 = pool1[addrif[jj+1]];
bl3 = pool1[addrif[jj+2]];
bl4 = pool1[addrif[jj+3]]; */
/* bl1 = pool1[j];
bl2 = pool1[j+1];
bl3 = pool1[j+2];
bl4 = pool1[j+3]; */
/* Hadamard-Matrix */
bsum = (bl1 + bl2 + bl3 + bl4) * 0.5;
/* pool2[j] = bl1 - bsum;
pool2[j+1] = bl2 - bsum;
pool2[j+2] = bsum - bl3;
pool2[j+3] = bsum - bl4; */
pool2[addrib[jj]] = bl1 - bsum;
pool2[addrib[jj+1]] = bl2 - bsum;
pool2[addrib[jj+2]] = bsum - bl3;
pool2[addrib[jj+3]] = bsum - bl4;
}
/* backward transformation */
for (j=0; j < n; j += 4) {
bl1 = pool2[j];
bl2 = pool2[j+1];
bl3 = pool2[j+2];
bl4 = pool2[j+3];
/* bl1 = pool2[addrib[j]];
bl2 = pool2[addrib[j+1]];
bl3 = pool2[addrib[j+2]];
bl4 = pool2[addrib[j+3]]; */
/* Hadamard-Matrix */
bsum = (bl1 + bl2 + bl3 + bl4) * 0.5;
pool1[j] = bl1 - bsum;
pool1[j+1] = bl2 - bsum;
pool1[j+2] = bsum - bl3;
pool1[j+3] = bsum - bl4;
}
}
/* renormalize again if number of pools beyond limit */
if (!(newpools & 0xFFFF)) {
totsqr = 0.0;
for (i = 0; i < n; i++)
totsqr += pool1[i] * pool1[i];
nomsqr = sqrt(n / totsqr);
for (i = 0; i < n; i++)
pool1[i] *= nomsqr;
}
outgauss = pool1;
/* reset data counter */
variate_used = n - 1;
/* set counter counting nomber of pools made */
newpools++;
/* new scale factor using ch^2 correction,
using pool1[n-1] from last pool */
ScaleGauss = chi1 + chi2 * ScaleGauss * endval;
// printf("Pool number: %d, chi^2 scale: %e\n", newpools, ScaleGauss);
return nulval; /* use old scale */
// return pool1[0]; /* use new scale */
}
#endif
/* --------------------- (test) main ------------------------- */
/* gcc -Wall -g -DHasMain -I../../include wallace.c CombTaus.o -o watest.exe */
#ifdef HasMain
#include "wallace.h"
struct timeb timenow;
struct timeb timebegin;
int sec, msec;
void timediff(struct timeb *now, struct timeb *begin, int *sec, int *msec)
{
*msec = now->millitm - begin->millitm;
*sec = now->time - begin->time;
if (*msec < 0) {
*msec += 1000;
(*sec)--;
}
return;
}
int main()
{
float x;
unsigned int i;
long int count;
initw();
ftime(&timebegin);
count = 100000000;
for (i = 0; i < count; i++) {
x = GaussWa;
// printf("%d\t%f\n", i, x);
}
ftime(&timenow);
timediff(&timenow, &timebegin, &sec, &msec);
printf("WallaceHV: %ld normal variates: %f s\n", count, sec + (float) msec / 1000.0);
initnorm(0, 0);
initnorm(77, 3);
ftime(&timebegin);
count = 100000000;
for (i = 0; i < count; i++) {
x = FastNorm;
// printf("%d\t%f\n", i, x);
}
ftime(&timenow);
timediff(&timenow, &timebegin, &sec, &msec);
printf("FastNorm3: %ld normal variates: %f s\n", count, sec + (float) msec / 1000.0);
return (1);
}
#endif

7
src/include/1-f-code.h Normal file
View File

@ -0,0 +1,7 @@
void f_alpha(int n_pts, int n_exp, float X[], float Q_d,
float alpha);
void rvfft(float X[], unsigned long int n);

32
src/include/FastNorm3.h Normal file
View File

@ -0,0 +1,32 @@
/* Last revised 28-1-1999 */
/* This is the header file FastNorm3.h to be included in code files
using FastNorm3.c */
/* I M P O R T A N T ! ! ! ! !
The definition below should be altered to ensure that integer
arithmetic is done on 32-bit words. It may need to be changed from int to
long on some platforms. The 32-bit requirement arises from the use of
a Uniform pseudo-random generator in part of the code, which assumes 32-bit
twos-complement arithmetic. In dire need, replace this generator with
another more suitable for the platform. The rest of the code assumes only
that signed integers up to a bit less than 2^31 can be handled.
*/
#define Sw int /* MUST define Sw as a 32-bit integer or longer */
#define Sf double
extern int gaussfaze;
extern int gaussmask;
extern double *gausssave;
extern double GScale;
#define FastNorm ((--gaussfaze)?GScale*gausssave[gaussfaze]:fastnorm())
void initnorm(Sw seed, Sw quoll);
Sf fastnorm (void);
Sf c7rand(Sw*);
Sw irandm(Sw*);
unsigned Sw urandm(Sw*);
double adchi (double a, int *is);
double rgamma (double g, int *is);
Sf renormalize(void);

View File

@ -1,7 +1,9 @@
#ifndef _BOOL_H
#define _BOOL_H
typedef unsigned char bool;
//typedef unsigned char bool;
typedef int bool;
typedef int BOOL ;
#define BOOLEAN int

108
src/include/fftext.h Normal file
View File

@ -0,0 +1,108 @@
/*******************************************************************
This file extends the fftlib with calls to maintain the cosine and bit reversed tables
for you (including mallocs and free's). Call the init routine for each fft size you will
be using. Then you can call the fft routines below which will make the fftlib library
call with the appropriate tables passed. When you are done with all fft's you can call
fftfree to release the storage for the tables. Note that you can call fftinit repeatedly
with the same size, the extra calls will be ignored. So, you could make a macro to call
fftInit every time you call ffts. For example you could have someting like:
#define FFT(a,n) if(!fftInit(roundtol(LOG2(n)))) ffts(a,roundtol(LOG2(n)),1);else printf("fft error\n");
*******************************************************************/
int fftInit(long M);
// malloc and init cosine and bit reversed tables for a given size fft, ifft, rfft, rifft
/* INPUTS */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* OUTPUTS */
/* private cosine and bit reversed tables */
void fftFree();
// release storage for all private cosine and bit reversed tables
void ffts(float *data, long M, long Rows);
/* Compute in-place complex fft on the rows of the input array */
/* INPUTS */
/* *ioptr = input data array */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
/* OUTPUTS */
/* *ioptr = output data array */
void iffts(float *data, long M, long Rows);
/* Compute in-place inverse complex fft on the rows of the input array */
/* INPUTS */
/* *ioptr = input data array */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
/* OUTPUTS */
/* *ioptr = output data array */
void rffts(float *data, long M, long Rows);
/* Compute in-place real fft on the rows of the input array */
/* The result is the complex spectra of the positive frequencies */
/* except the location for the first complex number contains the real */
/* values for DC and Nyquest */
/* See rspectprod for multiplying two of these spectra together- ex. for fast convolution */
/* INPUTS */
/* *ioptr = real input data array */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
/* OUTPUTS */
/* *ioptr = output data array in the following order */
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
void riffts(float *data, long M, long Rows);
/* Compute in-place real ifft on the rows of the input array */
/* data order as from rffts */
/* INPUTS */
/* *ioptr = input data array in the following order */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
/* OUTPUTS */
/* *ioptr = real output data array */
void rspectprod(float *data1, float *data2, float *outdata, long N);
// When multiplying a pair of spectra from rfft care must be taken to multiply the
// two real values seperately from the complex ones. This routine does it correctly.
// the result can be stored in-place over one of the inputs
/* INPUTS */
/* *data1 = input data array first spectra */
/* *data2 = input data array second spectra */
/* N = fft input size for both data1 and data2 */
/* OUTPUTS */
/* *outdata = output data array spectra */
/* The following is FYI
Note that most of the fft routines require full matrices, ie Rsiz==Ncols
This is how I like to define a real matrix:
struct matrix { // real matrix
float *d; // pointer to data
long Nrows; // number of rows in the matrix
long Ncols; // number of columns in the matrix (can be less than Rsiz)
long Rsiz; // number of floats from one row to the next
};
typedef struct matrix matrix;
CACHEFILLMALLOC and CEILCACHELINE can be used instead of malloc to make
arrays that start exactly on a cache line start.
First we CACHEFILLMALLOC a void * (use this void * when free'ing),
then we set our array pointer equal to the properly cast CEILCACHELINE of this void *
example:
aInit = CACHEFILLMALLOC( NUMFLOATS*sizeof(float) );
a = (float *) CEILCACHELINE(ainit);
... main body of code ...
free(aInit);
To disable this alignment, set CACHELINESIZE to 1
#define CACHELINESIZE 32 // Bytes per cache line
#define CACHELINEFILL (CACHELINESIZE-1)
#define CEILCACHELINE(p) ((((unsigned long)p+CACHELINEFILL)/CACHELINESIZE)*CACHELINESIZE)
#define CACHEFILLMALLOC(n) malloc((n)+CACHELINEFILL)
*/

View File

@ -174,15 +174,10 @@
#define inline _inline
#endif
/*
#ifndef HAVE_RANDOM
#define srandom(a) srand(a)
#define random rand
#define RR_MAX RAND_MAX
#else
#define RR_MAX LONG_MAX
#endif
*/
/* Fast random number generator */
//#define FastRand
#define WaGauss
#define RR_MAX RAND_MAX
#ifdef HAVE_INDEX

22
src/include/wallace.h Normal file
View File

@ -0,0 +1,22 @@
/* Wallace generator for normally distributed random variates
Copyright Holger Vogt, 2008
Calling sequence:
initw(void); initialize using srand(seed)
double x = GaussWa; returns normally distributed random variate
*/
extern double *outgauss; /*output vector for user access */
extern unsigned int variate_used; /* actual index of variate called by user */
extern double ScaleGauss; /* scale factor, including chi square correction */
double NewWa(void); /* generate new pool, return outgauss[0] */
#define GaussWa ((--variate_used)?(outgauss[variate_used]*ScaleGauss):NewWa())
void initw(void); /* initialization of Wallace generator */
void PolarGauss(double* py1, double* py2);

View File

@ -208,6 +208,8 @@ extern int OUTbeginDomain(void *,IFuid,int,IFvalue *);
extern int OUTendDomain(void *), OUTstopnow(void), OUTerror(int,char *,IFuid *);
extern int OUTattributes(void *,IFuid,int,IFvalue *);
extern void initw(void);
IFfrontEnd nutmeginfo = {
IFnewUid,
IFdelUid,
@ -757,8 +759,9 @@ xmain(int argc, char **argv)
main(int argc, char **argv)
#endif /* HAS_WINDOWS */
{
int c;
int err;
int c, err;
unsigned int rseed;
time_t acttime;
bool gotone = FALSE;
char* copystring;
bool addctrlsect = TRUE; /* PN: for autorun */
@ -1106,6 +1109,24 @@ bot:
err = 0;
#ifdef SIMULATOR
#ifdef FastRand
// initialization and seed for FastNorm Gaussian random generator
initnorm (0, 0);
rseed = 66;
if (!cp_getvar("rndseed", CP_NUM, (char *) &rseed)) {
acttime = time(NULL);
rseed = (int)acttime;
}
initnorm (rseed, 2);
fprintf (cp_out, "SoS %f, seed value: %ld\n", renormalize(), rseed);
#elif defined (WaGauss)
if (!cp_getvar("rndseed", CP_NUM, (char *) &rseed)) {
acttime = time(NULL);
rseed = (int)acttime;
}
srand(rseed);
initw();
#endif
if (!ft_servermode && !ft_nutmeg) {
/* Concatenate all non-option arguments into a temporary file
and load that file into the spice core.

View File

@ -1,6 +1,6 @@
## Process this file with automake
SUBDIRS = cmaths ni sparse poly deriv misc
DIST_SUBDIRS = cmaths ni sparse poly deriv misc
SUBDIRS = cmaths ni sparse poly deriv misc fft
DIST_SUBDIRS = cmaths ni sparse poly deriv misc fft
MAINTAINERCLEANFILES = Makefile.in

13
src/maths/fft/Makefile.am Normal file
View File

@ -0,0 +1,13 @@
## Process this file with automake to produce Makefile.in
noinst_LTLIBRARIES = libmathfft.la
libmathfft_la_SOURCES = \
fftext.c \
fftlib.c \
matlib.c
AM_CPPFLAGS = -I$(top_srcdir)/src/include
MAINTAINERCLEANFILES = Makefile.in

37
src/maths/fft/NOTE Normal file
View File

@ -0,0 +1,37 @@
Subject: FFT for RISC 2.0
To: macgifts@sumex-aim.stanford.edu
Enclosure: FFTs-for-RISC-2.sit
Enclosed is a stuffit archive of version 2.0 of my 'C' source code fft library.
Very-Fast Fourier Transform routines. Routines are provided for real and complex
forward and inverse 1d and 2d fourier transforms and 3d complex forward and inverse ffts.
I coded these to optimize execution speed on Risc processors like the PowerPC.
All fft sizes must still be a power of two.
Test programs based on the Numerical Recipes in C routines are provided.
Also included are some simple applications with source code which time the FFTs.
See the enclosed read me file for more information.
Revision version 2.0:
Rewrote code to rely more on compiler optimization (and be less ugly.)
Removed restrictions on too small or too large ffts.
Provided a library extension that manages memory for cosine and bit
reversed counter tables.
Added 2d and 3d complex and 2d real ffts.
Speeded routines for data too large to fit in primary cache.
Changed most testing from Matlab to Numerical Recipes based (because its cheaper.)
Changed call parameters (watch out.)
Revision version 1.21:
line 126 of rfftTest.c corrected.
Revisions version 1.2:
I now store the Nyquest point of the real transform where the 0 for the DC term's
imaginary part used to be. !! WATCH OUT FOR THIS IF YOU USE rfft !!
Added the real inverse Fourier transform.
Revisions version 1.1:
Re-arranged to put fft routines in a shared library and changed source file name to fftlib.c.
Removed some ugly optimizations that are no longer needed for CodeWarrier.
This code is public domain, do anything you want to with it.
[Moderators- This file should replace ffts-for-risc-121-c.hqx and can be included on any CD]

70
src/maths/fft/Read Me Normal file
View File

@ -0,0 +1,70 @@
This directory contains a public domain FFT library which was optimized
for speed on RISC processors such as the PowerPC. All ffts
use single precision floats, for double precision just use a
global search and replace to change float to double in all
source files.
Codewarrier Pro 1.0 project files are also supplied.
** Warning ** Perform rigorous testing to
your own standards before using this code.
(John Green) green_jt@vsdec.npt.nuwc.navy.mil
files:
fftTiming
Application to time complex ffts
rfftTiming
Application to time real ffts
// Directory: fft libraries
files:
fftext.c
Library of in-place fast fourier transforms. Contains forward
and inverse complex and real transforms. The real fft's expect the
frequency domain data to have the real part of the fsamp/2 bin (which
has a 0 imaginary part) to be stored in the location for the imaginary
part of the DC bin (the DC bin of real data is also strictly real.)
You must first call an initialization routine fftInit before calling
the fft computation routines ffts, iffts, rffts and riffts.
The init routines malloc the memory to store the cosine and
bit reversed counter tables as well as initializing their values.
fftlib.c
Lower level library of in-place fast fourier transforms. Same as fftext.c but you
need to manage the mallocs for the cosine and bit reversed tables yourself.
fft2d.c
Library of 2d and 3d complex and 2d real in-place fast fourier transforms.
The init routine fft2dInit must be called before using the 2d routines and
fft3dInit must be called before using the 3d routines. These init routines
will also call the appropriate 1d init routines in fftext.c
matlib.c
Matrix transpose routines used by fft2d.c and complex vector multiply
for forming the product of two spectra.
dxpose.c
Double precision matrix transpose for quick single precision complex transposing
// Directory: timing code
This directory contains the source to fftTiming and rfftTiming
// Directory: Numerical Recipes testing
This directory contains files used to test the various fft routines using
the Numerical Recipes in C routines as a baseline. These routines can be purchased
in PeeCee (after expanding you can move them to a Mac) format from:
http://cfata2.harvard.edu/numerical-recipes/
Unfortunately Numerical Recipes defines its forward and inverse fft's backwards.
For complex fft's I just use their inverse fft as a forward one, but for real ffts
their forward fft followed by my inverse fft reverses the data. They also have ugly matrix
and tensor data types and start their indices with one, Fortran style, but these are
minor annoyances.
// Directory: Matlab testing
This directory contains files to test fast 1d and 2d convolution with Matlab used to
verify the results. An example of using Matlab to test the fft library routines is
also given for the 2d real fft.

156
src/maths/fft/fftext.c Normal file
View File

@ -0,0 +1,156 @@
/*******************************************************************
This file extends the fftlib with calls to maintain the cosine and bit reversed tables
for you (including mallocs and free's). Call the init routine for each fft size you will
be using. Then you can call the fft routines below which will make the fftlib library
call with the appropriate tables passed. When you are done with all fft's you can call
fftfree to release the storage for the tables. Note that you can call fftinit repeatedly
with the same size, the extra calls will be ignored. So, you could make a macro to call
fftInit every time you call ffts. For example you could have someting like:
#define FFT(a,n) if(!fftInit(roundtol(LOG2(n)))) ffts(a,roundtol(LOG2(n)),1);else printf("fft error\n");
*******************************************************************/
#include <stdlib.h>
#include "fftlib.h"
#include "matlib.h"
#include "fftext.h"
// pointers to storage of Utbl's and BRLow's
static float *UtblArray[8*sizeof(long)] = {0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0};
static short *BRLowArray[8*sizeof(long)/2] = {0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0};
int fftInit(long M){
// malloc and init cosine and bit reversed tables for a given size fft, ifft, rfft, rifft
/* INPUTS */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* OUTPUTS */
/* private cosine and bit reversed tables */
int theError = 1;
/*** I did NOT test cases with M>27 ***/
if ((M >= 0) && (M < 8*sizeof(long))){
theError = 0;
if (UtblArray[M] == 0){ // have we not inited this size fft yet?
// init cos table
UtblArray[M] = (float *) malloc( (POW2(M)/4+1)*sizeof(float) );
if (UtblArray[M] == 0)
theError = 2;
else{
fftCosInit(M, UtblArray[M]);
}
if (M > 1){
if (BRLowArray[M/2] == 0){ // init bit reversed table for cmplx fft
BRLowArray[M/2] = (short *) malloc( POW2(M/2-1)*sizeof(short) );
if (BRLowArray[M/2] == 0)
theError = 2;
else{
fftBRInit(M, BRLowArray[M/2]);
}
}
}
if (M > 2){
if (BRLowArray[(M-1)/2] == 0){ // init bit reversed table for real fft
BRLowArray[(M-1)/2] = (short *) malloc( POW2((M-1)/2-1)*sizeof(short) );
if (BRLowArray[(M-1)/2] == 0)
theError = 2;
else{
fftBRInit(M-1, BRLowArray[(M-1)/2]);
}
}
}
}
};
return theError;
}
void fftFree(void){
// release storage for all private cosine and bit reversed tables
long i1;
for (i1=8*sizeof(long)/2-1; i1>=0; i1--){
if (BRLowArray[i1] != 0){
free(BRLowArray[i1]);
BRLowArray[i1] = 0;
};
};
for (i1=8*sizeof(long)-1; i1>=0; i1--){
if (UtblArray[i1] != 0){
free(UtblArray[i1]);
UtblArray[i1] = 0;
};
};
}
/*************************************************
The following calls are easier than calling to fftlib directly.
Just make sure fftlib has been called for each M first.
**************************************************/
void ffts(float *data, long M, long Rows){
/* Compute in-place complex fft on the rows of the input array */
/* INPUTS */
/* *ioptr = input data array */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
/* OUTPUTS */
/* *ioptr = output data array */
ffts1(data, M, Rows, UtblArray[M], BRLowArray[M/2]);
}
void iffts(float *data, long M, long Rows){
/* Compute in-place inverse complex fft on the rows of the input array */
/* INPUTS */
/* *ioptr = input data array */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
/* OUTPUTS */
/* *ioptr = output data array */
iffts1(data, M, Rows, UtblArray[M], BRLowArray[M/2]);
}
void rffts(float *data, long M, long Rows){
/* Compute in-place real fft on the rows of the input array */
/* The result is the complex spectra of the positive frequencies */
/* except the location for the first complex number contains the real */
/* values for DC and Nyquest */
/* See rspectprod for multiplying two of these spectra together- ex. for fast convolution */
/* INPUTS */
/* *ioptr = real input data array */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
/* OUTPUTS */
/* *ioptr = output data array in the following order */
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
rffts1(data, M, Rows, UtblArray[M], BRLowArray[(M-1)/2]);
}
void riffts(float *data, long M, long Rows){
/* Compute in-place real ifft on the rows of the input array */
/* data order as from rffts */
/* INPUTS */
/* *ioptr = input data array in the following order */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
/* OUTPUTS */
/* *ioptr = real output data array */
riffts1(data, M, Rows, UtblArray[M], BRLowArray[(M-1)/2]);
}
void rspectprod(float *data1, float *data2, float *outdata, long N){
// When multiplying a pair of spectra from rfft care must be taken to multiply the
// two real values seperately from the complex ones. This routine does it correctly.
// the result can be stored in-place over one of the inputs
/* INPUTS */
/* *data1 = input data array first spectra */
/* *data2 = input data array second spectra */
/* N = fft input size for both data1 and data2 */
/* OUTPUTS */
/* *outdata = output data array spectra */
if(N>1){
outdata[0] = data1[0] * data2[0]; // multiply the zero freq values
outdata[1] = data1[1] * data2[1]; // multiply the nyquest freq values
cvprod(data1 + 2, data2 + 2, outdata + 2, N/2-1); // multiply the other positive freq values
}
else{
outdata[0] = data1[0] * data2[0];
}
}

106
src/maths/fft/fftext.h Normal file
View File

@ -0,0 +1,106 @@
/*******************************************************************
This file extends the fftlib with calls to maintain the cosine and bit reversed tables
for you (including mallocs and free's). Call the init routine for each fft size you will
be using. Then you can call the fft routines below which will make the fftlib library
call with the appropriate tables passed. When you are done with all fft's you can call
fftfree to release the storage for the tables. Note that you can call fftinit repeatedly
with the same size, the extra calls will be ignored. So, you could make a macro to call
fftInit every time you call ffts. For example you could have someting like:
#define FFT(a,n) if(!fftInit(roundtol(LOG2(n)))) ffts(a,roundtol(LOG2(n)),1);else printf("fft error\n");
*******************************************************************/
int fftInit(long M);
// malloc and init cosine and bit reversed tables for a given size fft, ifft, rfft, rifft
/* INPUTS */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* OUTPUTS */
/* private cosine and bit reversed tables */
void fftFree(void);
// release storage for all private cosine and bit reversed tables
void ffts(float *data, long M, long Rows);
/* Compute in-place complex fft on the rows of the input array */
/* INPUTS */
/* *ioptr = input data array */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
/* OUTPUTS */
/* *ioptr = output data array */
void iffts(float *data, long M, long Rows);
/* Compute in-place inverse complex fft on the rows of the input array */
/* INPUTS */
/* *ioptr = input data array */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
/* OUTPUTS */
/* *ioptr = output data array */
void rffts(float *data, long M, long Rows);
/* Compute in-place real fft on the rows of the input array */
/* The result is the complex spectra of the positive frequencies */
/* except the location for the first complex number contains the real */
/* values for DC and Nyquest */
/* See rspectprod for multiplying two of these spectra together- ex. for fast convolution */
/* INPUTS */
/* *ioptr = real input data array */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
/* OUTPUTS */
/* *ioptr = output data array in the following order */
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
void riffts(float *data, long M, long Rows);
/* Compute in-place real ifft on the rows of the input array */
/* data order as from rffts */
/* INPUTS */
/* *ioptr = input data array in the following order */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
/* OUTPUTS */
/* *ioptr = real output data array */
void rspectprod(float *data1, float *data2, float *outdata, long N);
// When multiplying a pair of spectra from rfft care must be taken to multiply the
// two real values seperately from the complex ones. This routine does it correctly.
// the result can be stored in-place over one of the inputs
/* INPUTS */
/* *data1 = input data array first spectra */
/* *data2 = input data array second spectra */
/* N = fft input size for both data1 and data2 */
/* OUTPUTS */
/* *outdata = output data array spectra */
// The following is FYI
//Note that most of the fft routines require full matrices, ie Rsiz==Ncols
//This is how I like to define a real matrix:
//struct matrix { // real matrix
// float *d; // pointer to data
// long Nrows; // number of rows in the matrix
// long Ncols; // number of columns in the matrix (can be less than Rsiz)
// long Rsiz; // number of floats from one row to the next
//};
//typedef struct matrix matrix;
// CACHEFILLMALLOC and CEILCACHELINE can be used instead of malloc to make
// arrays that start exactly on a cache line start.
// First we CACHEFILLMALLOC a void * (use this void * when free'ing),
// then we set our array pointer equal to the properly cast CEILCACHELINE of this void *
// example:
// aInit = CACHEFILLMALLOC( NUMFLOATS*sizeof(float) );
// a = (float *) CEILCACHELINE(ainit);
// ... main body of code ...
// free(aInit);
//
// To disable this alignment, set CACHELINESIZE to 1
//#define CACHELINESIZE 32 // Bytes per cache line
//#define CACHELINEFILL (CACHELINESIZE-1)
//#define CEILCACHELINE(p) ((((unsigned long)p+CACHELINEFILL)/CACHELINESIZE)*CACHELINESIZE)
//#define CACHEFILLMALLOC(n) malloc((n)+CACHELINEFILL)

3175
src/maths/fft/fftlib.c Normal file

File diff suppressed because it is too large Load Diff

76
src/maths/fft/fftlib.h Normal file
View File

@ -0,0 +1,76 @@
#define MYRECIPLN2 1.442695040888963407359924681001892137426 // 1.0/log(2)
/* some useful conversions between a number and its power of 2 */
#define LOG2(a) (MYRECIPLN2*log(a)) // floating point logarithm base 2
#define POW2(m) ((unsigned long) 1 << (m)) // integer power of 2 for m<32
/*******************************************************************
lower level fft stuff called by routines in fftext.c and fft2d.c
*******************************************************************/
void fftCosInit(long M, float *Utbl);
/* Compute Utbl, the cosine table for ffts */
/* of size (pow(2,M)/4 +1) */
/* INPUTS */
/* M = log2 of fft size */
/* OUTPUTS */
/* *Utbl = cosine table */
void fftBRInit(long M, short *BRLow);
/* Compute BRLow, the bit reversed table for ffts */
/* of size pow(2,M/2 -1) */
/* INPUTS */
/* M = log2 of fft size */
/* OUTPUTS */
/* *BRLow = bit reversed counter table */
void ffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow);
/* Compute in-place complex fft on the rows of the input array */
/* INPUTS */
/* *ioptr = input data array */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */
/* *Utbl = cosine table */
/* *BRLow = bit reversed counter table */
/* OUTPUTS */
/* *ioptr = output data array */
void iffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow);
/* Compute in-place inverse complex fft on the rows of the input array */
/* INPUTS */
/* *ioptr = input data array */
/* M = log2 of fft size */
/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */
/* *Utbl = cosine table */
/* *BRLow = bit reversed counter table */
/* OUTPUTS */
/* *ioptr = output data array */
void rffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow);
/* Compute in-place real fft on the rows of the input array */
/* The result is the complex spectra of the positive frequencies */
/* except the location for the first complex number contains the real */
/* values for DC and Nyquest */
/* INPUTS */
/* *ioptr = real input data array */
/* M = log2 of fft size */
/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */
/* *Utbl = cosine table */
/* *BRLow = bit reversed counter table */
/* OUTPUTS */
/* *ioptr = output data array in the following order */
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
void riffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow);
/* Compute in-place real ifft on the rows of the input array */
/* data order as from rffts1 */
/* INPUTS */
/* *ioptr = input data array in the following order */
/* M = log2 of fft size */
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */
/* *Utbl = cosine table */
/* *BRLow = bit reversed counter table */
/* OUTPUTS */
/* *ioptr = real output data array */

297
src/maths/fft/matlib.c Normal file
View File

@ -0,0 +1,297 @@
/* a few routines from a vector/matrix library */
#include "matlib.h"
void xpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, long Ncols){
/* not in-place matrix transpose */
/* INPUTS */
/* *indata = input data array */
/* iRsiz = offset to between rows of input data array */
/* oRsiz = offset to between rows of output data array */
/* Nrows = number of rows in input data array */
/* Ncols = number of columns in input data array */
/* OUTPUTS */
/* *outdata = output data array */
float *irow; /* pointer to input row start */
float *ocol; /* pointer to output col start */
float *idata; /* pointer to input data */
float *odata; /* pointer to output data */
long RowCnt; /* row counter */
long ColCnt; /* col counter */
float T0; /* data storage */
float T1; /* data storage */
float T2; /* data storage */
float T3; /* data storage */
float T4; /* data storage */
float T5; /* data storage */
float T6; /* data storage */
float T7; /* data storage */
const long inRsizd1 = iRsiz;
const long inRsizd2 = 2*iRsiz;
const long inRsizd3 = inRsizd2+iRsiz;
const long inRsizd4 = 4*iRsiz;
const long inRsizd5 = inRsizd3+inRsizd2;
const long inRsizd6 = inRsizd4+inRsizd2;
const long inRsizd7 = inRsizd4+inRsizd3;
const long inRsizd8 = 8*iRsiz;
ocol = outdata;
irow = indata;
for (RowCnt=Nrows/8; RowCnt>0; RowCnt--){
idata = irow;
odata = ocol;
for (ColCnt=Ncols; ColCnt>0; ColCnt--){
T0 = *idata;
T1 = *(idata+inRsizd1);
T2 = *(idata+inRsizd2);
T3 = *(idata+inRsizd3);
T4 = *(idata+inRsizd4);
T5 = *(idata+inRsizd5);
T6 = *(idata+inRsizd6);
T7 = *(idata+inRsizd7);
*odata = T0;
*(odata+1) = T1;
*(odata+2) = T2;
*(odata+3) = T3;
*(odata+4) = T4;
*(odata+5) = T5;
*(odata+6) = T6;
*(odata+7) = T7;
idata++;
odata += oRsiz;
}
irow += inRsizd8;
ocol += 8;
}
if (Nrows%8 != 0){
for (ColCnt=Ncols; ColCnt>0; ColCnt--){
idata = irow++;
odata = ocol;
ocol += oRsiz;
for (RowCnt=Nrows%8; RowCnt>0; RowCnt--){
T0 = *idata;
*odata++ = T0;
idata += iRsiz;
}
}
}
}
void cxpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, long Ncols){
/* not in-place complex float matrix transpose */
/* INPUTS */
/* *indata = input data array */
/* iRsiz = offset to between rows of input data array */
/* oRsiz = offset to between rows of output data array */
/* Nrows = number of rows in input data array */
/* Ncols = number of columns in input data array */
/* OUTPUTS */
/* *outdata = output data array */
float *irow; /* pointer to input row start */
float *ocol; /* pointer to output col start */
float *idata; /* pointer to input data */
float *odata; /* pointer to output data */
long RowCnt; /* row counter */
long ColCnt; /* col counter */
float T0r; /* data storage */
float T0i; /* data storage */
float T1r; /* data storage */
float T1i; /* data storage */
float T2r; /* data storage */
float T2i; /* data storage */
float T3r; /* data storage */
float T3i; /* data storage */
const long inRsizd1 = 2*iRsiz;
const long inRsizd1i = 2*iRsiz + 1;
const long inRsizd2 = 4*iRsiz;
const long inRsizd2i = 4*iRsiz + 1;
const long inRsizd3 = inRsizd2+inRsizd1;
const long inRsizd3i = inRsizd2+inRsizd1 + 1;
const long inRsizd4 = 8*iRsiz;
ocol = outdata;
irow = indata;
for (RowCnt=Nrows/4; RowCnt>0; RowCnt--){
idata = irow;
odata = ocol;
for (ColCnt=Ncols; ColCnt>0; ColCnt--){
T0r = *idata;
T0i = *(idata +1);
T1r = *(idata+inRsizd1);
T1i = *(idata+inRsizd1i);
T2r = *(idata+inRsizd2);
T2i = *(idata+inRsizd2i);
T3r = *(idata+inRsizd3);
T3i = *(idata+inRsizd3i);
*odata = T0r;
*(odata+1) = T0i;
*(odata+2) = T1r;
*(odata+3) = T1i;
*(odata+4) = T2r;
*(odata+5) = T2i;
*(odata+6) = T3r;
*(odata+7) = T3i;
idata+=2;
odata += 2*oRsiz;
}
irow += inRsizd4;
ocol += 8;
}
if (Nrows%4 != 0){
for (ColCnt=Ncols; ColCnt>0; ColCnt--){
idata = irow;
odata = ocol;
for (RowCnt=Nrows%4; RowCnt>0; RowCnt--){
T0r = *idata;
T0i = *(idata+1);
*odata = T0r;
*(odata+1) = T0i;
odata+=2;
idata += 2*iRsiz;
}
irow+=2;
ocol += 2*oRsiz;
}
}
}
void cvprod(float *a, float *b, float *out, long N){
/* complex vector product, can be in-place */
/* product of complex vector *a times complex vector *b */
/* INPUTS */
/* N vector length */
/* *a complex vector length N complex numbers */
/* *b complex vector length N complex numbers */
/* OUTPUTS */
/* *out complex vector length N */
long OutCnt; /* output counter */
float A0R; /* A storage */
float A0I; /* A storage */
float A1R; /* A storage */
float A1I; /* A storage */
float A2R; /* A storage */
float A2I; /* A storage */
float A3R; /* A storage */
float A3I; /* A storage */
float B0R; /* B storage */
float B0I; /* B storage */
float B1R; /* B storage */
float B1I; /* B storage */
float B2R; /* B storage */
float B2I; /* B storage */
float B3R; /* B storage */
float B3I; /* B storage */
float T0R; /* TMP storage */
float T0I; /* TMP storage */
float T1R; /* TMP storage */
float T1I; /* TMP storage */
float T2R; /* TMP storage */
float T2I; /* TMP storage */
float T3R; /* TMP storage */
float T3I; /* TMP storage */
if (N>=4){
A0R = *a;
B0R = *b;
A0I = *(a +1);
B0I = *(b +1);
A1R = *(a +2);
B1R = *(b +2);
A1I = *(a +3);
B1I = *(b +3);
A2R = *(a +4);
B2R = *(b +4);
A2I = *(a +5);
B2I = *(b +5);
A3R = *(a +6);
B3R = *(b +6);
A3I = *(a +7);
B3I = *(b +7);
T0R = A0R * B0R;
T0I = (A0R * B0I);
T1R = A1R * B1R;
T1I = (A1R * B1I);
T2R = A2R * B2R;
T2I = (A2R * B2I);
T3R = A3R * B3R;
T3I = (A3R * B3I);
T0R -= (A0I * B0I);
T0I = A0I * B0R + T0I;
T1R -= (A1I * B1I);
T1I = A1I * B1R + T1I;
T2R -= (A2I * B2I);
T2I = A2I * B2R + T2I;
T3R -= (A3I * B3I);
T3I = A3I * B3R + T3I;
for (OutCnt=N/4-1; OutCnt > 0; OutCnt--){
a += 8;
b += 8;
A0R = *a;
B0R = *b;
A0I = *(a +1);
B0I = *(b +1);
A1R = *(a +2);
B1R = *(b +2);
A1I = *(a +3);
B1I = *(b +3);
A2R = *(a +4);
B2R = *(b +4);
A2I = *(a +5);
B2I = *(b +5);
A3R = *(a +6);
B3R = *(b +6);
A3I = *(a +7);
B3I = *(b +7);
*out = T0R;
*(out +1) = T0I;
*(out +2) = T1R;
*(out +3) = T1I;
*(out +4) = T2R;
*(out +5) = T2I;
*(out +6) = T3R;
*(out +7) = T3I;
T0R = A0R * B0R;
T0I = (A0R * B0I);
T1R = A1R * B1R;
T1I = (A1R * B1I);
T2R = A2R * B2R;
T2I = (A2R * B2I);
T3R = A3R * B3R;
T3I = (A3R * B3I);
T0R -= (A0I * B0I);
T0I = A0I * B0R + T0I;
T1R -= (A1I * B1I);
T1I = A1I * B1R + T1I;
T2R -= (A2I * B2I);
T2I = A2I * B2R + T2I;
T3R -= (A3I * B3I);
T3I = A3I * B3R + T3I;
out += 8;
}
a += 8;
b += 8;
*out = T0R;
*(out +1) = T0I;
*(out +2) = T1R;
*(out +3) = T1I;
*(out +4) = T2R;
*(out +5) = T2I;
*(out +6) = T3R;
*(out +7) = T3I;
out += 8;
}
for (OutCnt=N%4; OutCnt > 0; OutCnt--){
A0R = *a++;
B0R = *b++;
A0I = *a++;
B0I = *b++;
T0R = A0R * B0R;
T0I = (A0R * B0I);
T0R -= (A0I * B0I);
T0I = A0I * B0R + T0I;
*out++ = T0R;
*out++ = T0I;
}
}

33
src/maths/fft/matlib.h Normal file
View File

@ -0,0 +1,33 @@
/* a few routines from a vector/matrix library */
void xpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, long Ncols);
/* not in-place matrix transpose */
/* INPUTS */
/* *indata = input data array */
/* iRsiz = offset to between rows of input data array */
/* oRsiz = offset to between rows of output data array */
/* Nrows = number of rows in input data array */
/* Ncols = number of columns in input data array */
/* OUTPUTS */
/* *outdata = output data array */
void cxpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, long Ncols);
/* not in-place complex matrix transpose */
/* INPUTS */
/* *indata = input data array */
/* iRsiz = offset to between rows of input data array */
/* oRsiz = offset to between rows of output data array */
/* Nrows = number of rows in input data array */
/* Ncols = number of columns in input data array */
/* OUTPUTS */
/* *outdata = output data array */
void cvprod(float *a, float *b, float *out, long N);
/* complex vector product, can be in-place */
/* product of complex vector *a times complex vector *b */
/* INPUTS */
/* N vector length */
/* *a complex vector length N complex numbers */
/* *b complex vector length N complex numbers */
/* OUTPUTS */
/* *out complex vector length N */

View File

@ -216,3 +216,27 @@ double gauss(void)
return glgset;
}
}
/* Polar form of the Box-Muller generator for Gaussian distributed
random variates.
Generator will be fed with two uniformly distributed random variates.
Delivers two values per call
*/
void rgauss(double* py1, double* py2)
{
double x1, x2, w;
do {
x1 = 2.0 * CombLCGTaus() - 1.0;
x2 = 2.0 * CombLCGTaus() - 1.0;
w = x1 * x1 + x2 * x2;
} while ( w >= 1.0 );
w = sqrt( (-2.0 * log( w ) ) / w );
*py1 = x1 * w;
*py2 = x2 * w;
}

View File

@ -21,8 +21,9 @@ IFparm VSRCpTable[] = { /* parameters */
IOP ("pwl", VSRC_PWL, IF_REALVEC,"Piecewise linear description"),
IOP ("sffm", VSRC_SFFM, IF_REALVEC,"Single freq. FM descripton"),
IOP ("am", VSRC_AM, IF_REALVEC,"Amplitude modulation descripton"),
IOP ("trnoise", VSRC_TRNOISE, IF_REALVEC,"Transient noise descripton"),
OPU ("pos_node",VSRC_POS_NODE, IF_INTEGER,"Positive node of source"),
OPU ("pos_node",VSRC_POS_NODE, IF_INTEGER,"Positive node of source"),
OPU ("neg_node",VSRC_NEG_NODE, IF_INTEGER,"Negative node of source"),
OPU ("function",VSRC_FCN_TYPE, IF_INTEGER,"Function of the source"),
OPU ("order", VSRC_FCN_ORDER, IF_INTEGER,"Order of the source function"),

View File

@ -11,6 +11,10 @@ Author: 1985 Thomas L. Quarles
#include "suffix.h"
#include "missing_math.h"
extern int fftInit(long M);
extern void fftFree(void);
extern void rffts(float *data, long M, long Rows);
#define SAMETIME(a,b) (fabs((a)-(b))<= TIMETOL * PW)
#define TIMETOL 1e-7
@ -74,6 +78,7 @@ VSRCaccept(CKTcircuit *ckt, GENmodel *inModel)
/* offset time by delay */
time = ckt->CKTtime - TD;
tshift = TD;
#ifdef XSPICE
/* normalize phase to 0 - 360° */
/* normalize phase to cycles */
@ -180,6 +185,52 @@ VSRCaccept(CKTcircuit *ckt, GENmodel *inModel)
}
break;
}
/**** tansient noise routines:
VNoi2 2 0 DC 0 TRNOISE(10n 0.5n 0 0n) : generate gaussian distributed noise
rms value, time step, 0 0
VNoi1 1 0 DC 0 TRNOISE(0n 0.5n 1 10n) : generate 1/f noise
0, time step, exponent < 2, rms value
*/
case TRNOISE: {
double NA, NT, TS, time, basetime = 0.;
#define NSAMETIME(a,b) (fabs((a)-(b))<= NTIMETOL * TS)
#define NTIMETOL 1e-7
NA = here->VSRCcoeffs[0]; // input is rms value
NT = here->VSRCcoeffs[1]; // time step
if (NT == 0.) // no further breakpoint if value not given
break;
// TS = NT > ckt->CKTstep ? NT : ckt->CKTstep;
TS = NT;
time = ckt->CKTtime;
if(time >= TS) {
/* repeating signal - figure out where we are
in period */
basetime = TS * floor(time*1.000000000001/TS);
// basetime = TS * floor(time/TS);
// basetime = TS * here->VSRCncount;
time -= basetime;
}
if(ckt->CKTbreak && NSAMETIME(time,0)) {
/* set next breakpoint */
// error = CKTsetBreak(ckt, TS * ((double)here->VSRCncount + 1.));
error = CKTsetBreak(ckt, basetime + TS);
if(error) return(error);
}
/* else if (ckt->CKTbreak && NSAMETIME(time,TS)) {
// set next breakpoint
error = CKTsetBreak(ckt, basetime + TS + TS);
if(error) return(error);
} */
if (ckt->CKTtime == 0.) {
// printf("VSRC: free fft tables\n");
fftFree();
}
}
break;
}
}
bkptset: ;

View File

@ -46,6 +46,7 @@ VSRCask(CKTcircuit *ckt, GENinstance *inst, int which, IFvalue *value, IFvalue *
case VSRC_PWL:
case VSRC_SFFM:
case VSRC_AM:
case VSRC_TRNOISE:
case VSRC_FCN_COEFFS:
temp = value->v.numValue = here->VSRCfunctionOrder;
v = value->v.vec.rVec = TMALLOC(double, here->VSRCfunctionOrder);

View File

@ -48,7 +48,16 @@ typedef struct sVSRCinstance {
double VSRCdF2mag; /* distortion f2 magnitude */
double VSRCdF1phase; /* distortion f1 phase */
double VSRCdF2phase; /* distortion f2 phase */
/*transient noise*/
double VSRCprevTime; /*last time a new random value was issued*/
double VSRCprevVal; /*last value issued at prevTime*/
double VSRCnewVal; /*new value issued at prevTime*/
double VSRCsecRand; /*second random value not yet used*/
float *VSRConeof; /*pointer to array of 1 over f noise values */
long int VSRCncount; /* counter to retrieve noise values */
/*end of noise*/
double VSRCr; /* pwl repeat */
double VSRCrdelay; /* pwl delay period */
double *VSRCposIbrptr; /* pointer to sparse matrix element at
@ -93,6 +102,7 @@ typedef struct sVSRCmodel {
#define SFFM 4
#define PWL 5
#define AM 6
#define TRNOISE 7
#endif /*PULSE*/
/* device parameters */
@ -121,6 +131,7 @@ typedef struct sVSRCmodel {
#define VSRC_AM 22
#define VSRC_R 23
#define VSRC_TD 24
#define VSRC_TRNOISE 25
/* model parameters */

View File

@ -11,6 +11,15 @@ $Id$
#include "trandefs.h"
#include "sperror.h"
#include "suffix.h"
#undef WaGauss
#ifdef FastRand
#include "FastNorm3.h"
#elif defined (WaGauss)
#include "wallace.h"
#else
extern void rgauss(double* py1, double* py2);
#endif
#include "1-f-code.h"
#ifdef XSPICE_EXP
/* gtri - begin - wbk - modify for supply ramping option */
@ -27,7 +36,7 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
VSRCmodel *model = (VSRCmodel *)inModel;
VSRCinstance *here;
double time;
double value;
double value = 0.0;
/* loop through all the voltage source models */
for( ; model != NULL; model = model->VSRCnextModel ) {
@ -35,7 +44,7 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
/* loop through all the instances of the model */
for (here = model->VSRCinstances; here != NULL ;
here=here->VSRCnextInstance) {
if (here->VSRCowner != ARCHme) continue;
if (here->VSRCowner != ARCHme) continue;
*(here->VSRCposIbrptr) += 1.0 ;
*(here->VSRCnegIbrptr) -= 1.0 ;
@ -63,29 +72,29 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
}
case PULSE: {
double V1, V2, TD, TR, TF, PW, PER;
double basetime = 0;
double V1, V2, TD, TR, TF, PW, PER;
double basetime = 0;
#ifdef XSPICE
double PHASE;
double phase;
double deltat;
#endif
V1 = here->VSRCcoeffs[0];
V2 = here->VSRCcoeffs[1];
TD = here->VSRCfunctionOrder > 2
? here->VSRCcoeffs[2] : 0.0;
TR = here->VSRCfunctionOrder > 3
&& here->VSRCcoeffs[3] != 0.0
? here->VSRCcoeffs[3] : ckt->CKTstep;
TF = here->VSRCfunctionOrder > 4
&& here->VSRCcoeffs[4] != 0.0
? here->VSRCcoeffs[4] : ckt->CKTstep;
PW = here->VSRCfunctionOrder > 5
&& here->VSRCcoeffs[5] != 0.0
? here->VSRCcoeffs[5] : ckt->CKTfinalTime;
PER = here->VSRCfunctionOrder > 6
&& here->VSRCcoeffs[6] != 0.0
? here->VSRCcoeffs[6] : ckt->CKTfinalTime;
V1 = here->VSRCcoeffs[0];
V2 = here->VSRCcoeffs[1];
TD = here->VSRCfunctionOrder > 2
? here->VSRCcoeffs[2] : 0.0;
TR = here->VSRCfunctionOrder > 3
&& here->VSRCcoeffs[3] != 0.0
? here->VSRCcoeffs[3] : ckt->CKTstep;
TF = here->VSRCfunctionOrder > 4
&& here->VSRCcoeffs[4] != 0.0
? here->VSRCcoeffs[4] : ckt->CKTstep;
PW = here->VSRCfunctionOrder > 5
&& here->VSRCcoeffs[5] != 0.0
? here->VSRCcoeffs[5] : ckt->CKTfinalTime;
PER = here->VSRCfunctionOrder > 6
&& here->VSRCcoeffs[6] != 0.0
? here->VSRCcoeffs[6] : ckt->CKTfinalTime;
/* shift time by delay time TD */
time -= TD;
@ -126,25 +135,25 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
case SINE: {
double VO, VA, FREQ, TD, THETA;
/* gtri - begin - wbk - add PHASE parameter */
double VO, VA, FREQ, TD, THETA;
/* gtri - begin - wbk - add PHASE parameter */
#ifdef XSPICE
double PHASE;
double phase;
double phase;
PHASE = here->VSRCfunctionOrder > 5
? here->VSRCcoeffs[5] : 0.0;
? here->VSRCcoeffs[5] : 0.0;
/* compute phase in radians */
/* compute phase in radians */
phase = PHASE * M_PI / 180.0;
#endif
VO = here->VSRCcoeffs[0];
VA = here->VSRCcoeffs[1];
VA = here->VSRCcoeffs[1];
FREQ = here->VSRCfunctionOrder > 2
&& here->VSRCcoeffs[2] != 0.0
? here->VSRCcoeffs[2] : (1/ckt->CKTfinalTime);
TD = here->VSRCfunctionOrder > 3
? here->VSRCcoeffs[3] : 0.0;
&& here->VSRCcoeffs[2] != 0.0
? here->VSRCcoeffs[2] : (1/ckt->CKTfinalTime);
TD = here->VSRCfunctionOrder > 3
? here->VSRCcoeffs[3] : 0.0;
THETA = here->VSRCfunctionOrder > 4
? here->VSRCcoeffs[4] : 0.0;
@ -155,12 +164,12 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
} else {
value = VO + VA * sin(FREQ*time * 2.0 * M_PI + phase) *
exp(-time*THETA);
exp(-time*THETA);
#else
value = VO;
} else {
value = VO + VA * sin(FREQ * time * 2.0 * M_PI) *
exp(-(time*THETA));
exp(-(time*THETA));
#endif
/* gtri - end - wbk - add PHASE parameter */
}
@ -168,24 +177,23 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
break;
case EXP: {
double V1, V2, TD1, TD2, TAU1, TAU2;
double V1, V2, TD1, TD2, TAU1, TAU2;
V1 = here->VSRCcoeffs[0];
V2 = here->VSRCcoeffs[1];
TD1 = here->VSRCfunctionOrder > 2
&& here->VSRCcoeffs[2] != 0.0
? here->VSRCcoeffs[2] : ckt->CKTstep;
TAU1 = here->VSRCfunctionOrder > 3
&& here->VSRCcoeffs[3] != 0.0
? here->VSRCcoeffs[3] : ckt->CKTstep;
V2 = here->VSRCcoeffs[1];
TD1 = here->VSRCfunctionOrder > 2
&& here->VSRCcoeffs[2] != 0.0
? here->VSRCcoeffs[2] : ckt->CKTstep;
TAU1 = here->VSRCfunctionOrder > 3
&& here->VSRCcoeffs[3] != 0.0
? here->VSRCcoeffs[3] : ckt->CKTstep;
TD2 = here->VSRCfunctionOrder > 4
&& here->VSRCcoeffs[4] != 0.0
? here->VSRCcoeffs[4] : TD1 + ckt->CKTstep;
&& here->VSRCcoeffs[4] != 0.0
? here->VSRCcoeffs[4] : TD1 + ckt->CKTstep;
TAU2 = here->VSRCfunctionOrder > 5
&& here->VSRCcoeffs[5]
? here->VSRCcoeffs[5] : ckt->CKTstep;
&& here->VSRCcoeffs[5]
? here->VSRCcoeffs[5] : ckt->CKTstep;
if(time <= TD1) {
value = V1;
} else if (time <= TD2) {
@ -199,7 +207,7 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
case SFFM:{
double VO, VA, FC, MDI, FS;
double VO, VA, FC, MDI, FS;
/* gtri - begin - wbk - add PHASE parameters */
#ifdef XSPICE
@ -208,25 +216,24 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
double phases;
PHASEC = here->VSRCfunctionOrder > 5
? here->VSRCcoeffs[5] : 0.0;
? here->VSRCcoeffs[5] : 0.0;
PHASES = here->VSRCfunctionOrder > 6
? here->VSRCcoeffs[6] : 0.0;
? here->VSRCcoeffs[6] : 0.0;
/* compute phases in radians */
phasec = PHASEC * M_PI / 180.0;
phases = PHASES * M_PI / 180.0;
#endif
VO = here->VSRCcoeffs[0];
VA = here->VSRCcoeffs[1];
FC = here->VSRCfunctionOrder > 2
&& here->VSRCcoeffs[2]
? here->VSRCcoeffs[2] : (1/ckt->CKTfinalTime);
MDI = here->VSRCfunctionOrder > 3
? here->VSRCcoeffs[3] : 0.0;
FS = here->VSRCfunctionOrder > 4
&& here->VSRCcoeffs[4]
? here->VSRCcoeffs[4] : (1/ckt->CKTfinalTime);
VO = here->VSRCcoeffs[0];
VA = here->VSRCcoeffs[1];
FC = here->VSRCfunctionOrder > 2
&& here->VSRCcoeffs[2]
? here->VSRCcoeffs[2] : (1/ckt->CKTfinalTime);
MDI = here->VSRCfunctionOrder > 3
? here->VSRCcoeffs[3] : 0.0;
FS = here->VSRCfunctionOrder > 4
&& here->VSRCcoeffs[4]
? here->VSRCcoeffs[4] : (1/ckt->CKTfinalTime);
#ifdef XSPICE
/* compute waveform value */
value = VO + VA *
@ -242,10 +249,9 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
break;
case AM:{
double VA, FC, MF, VO, TD;
double VA, FC, MF, VO, TD;
/* gtri - begin - wbk - add PHASE parameters */
#ifdef XSPICE
double PHASEC, PHASES;
double phasec;
double phases;
@ -260,49 +266,40 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
phases = PHASES * M_PI / 180.0;
#endif
VA = here->VSRCcoeffs[0];
VO = here->VSRCcoeffs[1];
MF = here->VSRCfunctionOrder > 2
&& here->VSRCcoeffs[2]
? here->VSRCcoeffs[2] : (1/ckt->CKTfinalTime);
FC = here->VSRCfunctionOrder > 3
? here->VSRCcoeffs[3] : 0.0;
TD = here->VSRCfunctionOrder > 4
&& here->VSRCcoeffs[4]
? here->VSRCcoeffs[4] : 0.0;
VA = here->VSRCcoeffs[0];
VO = here->VSRCcoeffs[1];
MF = here->VSRCfunctionOrder > 2
&& here->VSRCcoeffs[2]
? here->VSRCcoeffs[2] : (1/ckt->CKTfinalTime);
FC = here->VSRCfunctionOrder > 3
? here->VSRCcoeffs[3] : 0.0;
TD = here->VSRCfunctionOrder > 4
&& here->VSRCcoeffs[4]
? here->VSRCcoeffs[4] : 0.0;
time -= TD;
if (time <= 0) {
value = 0;
} else {
#ifdef XSPICE
/* compute waveform value */
value = VA * (VO + sin(2.0 * M_PI * MF * time + phases )) *
sin(2 * M_PI * FC * time + phases);
/* compute waveform value */
value = VA * (VO + sin(2.0 * M_PI * MF * time + phases )) *
sin(2 * M_PI * FC * time + phases);
#else /* XSPICE */
value = VA * (VO + sin(2.0 * M_PI * MF * time)) *
sin(2 * M_PI * FC * time);
value = VA * (VO + sin(2.0 * M_PI * MF * time)) *
sin(2 * M_PI * FC * time);
#endif
}
}
/* gtri - end - wbk - add PHASE parameters */
}
break;
}
break;
case PWL: {
int i = 0, num_repeat = 0, ii = 0;
double foo, repeat_time = 0, end_time, breakpt_time, itime;
time -= here->VSRCrdelay;
// if(time > PER) {
/* repeating signal - figure out where we are */
/* in period */
// basetime = PER * floor(time/PER);
// time -= basetime;
// }
if(time < *(here->VSRCcoeffs)) {
foo = *(here->VSRCcoeffs + 1) ;
@ -310,35 +307,243 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
goto loadDone;
}
do {
for(i=ii ; i<(here->VSRCfunctionOrder/2)-1; i++ ) {
itime = *(here->VSRCcoeffs+2*i);
if ( AlmostEqualUlps(itime+repeat_time, time, 3 )) {
// if ( fabs( (*(here->VSRCcoeffs+2*i)+repeat_time) - time ) < 1e-20 ) {
foo = *(here->VSRCcoeffs+2*i+1);
value = foo;
goto loadDone;
} else if ( (*(here->VSRCcoeffs+2*i)+repeat_time < time) && (*(here->VSRCcoeffs+2*(i+1))+repeat_time > time) ) {
foo = *(here->VSRCcoeffs+2*i+1) + (((time-(*(here->VSRCcoeffs+2*i)+repeat_time))/
(*(here->VSRCcoeffs+2*(i+1)) - *(here->VSRCcoeffs+2*i))) *
do {
for(i=ii ; i<(here->VSRCfunctionOrder/2)-1; i++ ) {
itime = *(here->VSRCcoeffs+2*i);
if ( AlmostEqualUlps(itime+repeat_time, time, 3 )) {
foo = *(here->VSRCcoeffs+2*i+1);
value = foo;
goto loadDone;
} else if ( (*(here->VSRCcoeffs+2*i)+repeat_time < time)
&& (*(here->VSRCcoeffs+2*(i+1))+repeat_time > time) ) {
foo = *(here->VSRCcoeffs+2*i+1) + (((time-(*(here->VSRCcoeffs+2*i)+repeat_time))/
(*(here->VSRCcoeffs+2*(i+1)) - *(here->VSRCcoeffs+2*i))) *
(*(here->VSRCcoeffs+2*i+3) - *(here->VSRCcoeffs+2*i+1)));
value = foo;
goto loadDone;
}
}
foo = *(here->VSRCcoeffs+ here->VSRCfunctionOrder-1) ;
value = foo;
value = foo;
goto loadDone;
}
}
foo = *(here->VSRCcoeffs+ here->VSRCfunctionOrder-1) ;
value = foo;
if ( !here->VSRCrGiven ) goto loadDone;
if ( !here->VSRCrGiven ) goto loadDone;
end_time = *(here->VSRCcoeffs + here->VSRCfunctionOrder-2);
breakpt_time = *(here->VSRCcoeffs + here->VSRCrBreakpt);
repeat_time = end_time + (end_time - breakpt_time)*num_repeat++ - breakpt_time;
ii = here->VSRCrBreakpt/2;
} while ( here->VSRCrGiven );
end_time = *(here->VSRCcoeffs + here->VSRCfunctionOrder-2);
breakpt_time = *(here->VSRCcoeffs + here->VSRCrBreakpt);
repeat_time = end_time + (end_time - breakpt_time)*num_repeat++ - breakpt_time;
ii = here->VSRCrBreakpt/2;
} while ( here->VSRCrGiven );
break;
}
}
/**** tansient noise routines:
VNoi2 2 0 DC 0 TRNOISE(10n 0.5n 0 0n) : generate gaussian distributed noise
rms value, time step, 0 0
VNoi1 1 0 DC 0 TRNOISE(0n 0.5n 1 10n) : generate 1/f noise
0, time step, exponent < 2, rms value
*/
case TRNOISE: {
/* Generate voltage point every TS with amplitude NA * ra,
where ra is drawn from a random number generator with
gaussian distribution with mean 0 and standard deviation 1
*/
//#define PRVAL
// typedef int bool;
double newval=0.0, lastval=0.0, lasttime=0.0;
double NA, NT, TS;
double V1, V2, basetime = 0.;
double scalef, ra1, ra2;
float NALPHA, NAMP;
long int nosteps, newsteps = 1, newexp = 0;
bool aof = FALSE;
NA = here->VSRCcoeffs[0]; // input is rms value
NT = here->VSRCcoeffs[1]; // time step
scalef = NA;
// scalef = NA*1.32;
NALPHA = here->VSRCfunctionOrder > 2
? (float)here->VSRCcoeffs[2] : 0.0f;
NAMP = here->VSRCfunctionOrder > 3
&& here->VSRCcoeffs[3] != 0.0
&& here->VSRCcoeffs[2] != 0.0
? (float)here->VSRCcoeffs[3] : 0.0f;
if ((NT == 0.) || ((NA == 0.) && (NAMP == 0.))) {
value = here->VSRCdcValue;
goto noiDone;
}
else
TS = NT; /* time step for noise */
if ((NALPHA > 0.0) && (NAMP > 0.0)) aof = TRUE;
lasttime = here->VSRCprevTime;
lastval = here->VSRCprevVal;
newval = here->VSRCnewVal;
/* set all data: DC, white, 1of */
if (time <= 0 /*ckt->CKTstep*/) {
/* data are already set */
if ((here->VSRCprevVal != 0) || (here->VSRCnewVal != 0)) {
value = here->VSRCprevVal;
goto noiDone;
}
lasttime = 0.0;
here->VSRCsecRand = 2.; /* > 1, invalid number out of the random number range */
/* get two random samples */
#ifdef FastRand
// use FastNorm3
here->VSRCprevVal = scalef * GaussWa;
here->VSRCnewVal = scalef * GaussWa;
#elif defined (WaGauss)
// use WallaceHV
here->VSRCprevVal = scalef * GaussWa;
here->VSRCnewVal = scalef * GaussWa;
#else
// make use of two random variables per call to rgauss()
rgauss(&ra1, &ra2);
here->VSRCprevVal = scalef * ra1;
// choose to set start value to 0
here->VSRCprevVal = 0;
here->VSRCnewVal = scalef * ra2;
#endif
/* generate 1 over f noise at time 0 */
if (aof) {
if (here->VSRCncount==0) {
// add 10 steps for start up sequence
nosteps = (long)((ckt->CKTfinalTime)/TS) + 10;
// generate number of steps as power of 2
while(newsteps < nosteps) {
newsteps <<= 1;
newexp++;
}
here->VSRConeof = TMALLOC(float, newsteps); //(float *)tmalloc(sizeof(float) * newsteps);
#ifdef PRVAL
printf("ALPHA: %f, GAIN: %e\n", NALPHA, NAMP);
#endif
f_alpha(newsteps, newexp, here->VSRConeof, NAMP, NALPHA);
#ifdef PRVAL
printf("Noi1: %e, Noi2: %e\n", here->VSRConeof[10], here->VSRConeof[100]);
#endif
here->VSRCprevVal += here->VSRConeof[here->VSRCncount];
here->VSRCncount++;
here->VSRCnewVal += here->VSRConeof[here->VSRCncount];
here->VSRCncount++;
value = newval;
// add DC
here->VSRCprevVal += here->VSRCdcValue;
here->VSRCnewVal += here->VSRCdcValue;
value = here->VSRCprevVal;
#ifdef PRVAL
printf("start1, time: %e, outp: %e, rnd: %e\n", time, newval, testval);
#endif
} else { // here->VSRCncount > 0
// add DC
here->VSRCprevVal += here->VSRCdcValue;
here->VSRCnewVal += here->VSRCdcValue;
value = here->VSRCprevVal;
#ifdef PRVAL
printf("start2, time: %e, outp: %e, rnd: %e\n", time, here->VSRCprevVal, testval);
#endif
}
#ifdef PRVAL
printf("time 0 value: %e for %s\n", here->VSRCprevVal, here->VSRCname);
#endif
goto loadDone;
} //aof
// add DC
here->VSRCprevVal += here->VSRCdcValue;
here->VSRCnewVal += here->VSRCdcValue;
value = here->VSRCprevVal;
here->VSRCprevTime = 0.;
goto loadDone;
} // time < 0
V1 = here->VSRCprevVal;
V2 = here->VSRCnewVal;
if (here->VSRCprevTime == ckt->CKTtime) {
value = here->VSRCprevVal;
goto noiDone;
}
if (time > 0 && time < TS) {
value = V1 + (V2 - V1) * (time) / TS;
}
else if (time >= TS) {
/* repeating signal - figure out where we are in period */
/* numerical correction to avoid basetime less than
next step, e.g. 4.99999999999999995 delivers a floor
of 4 instead of 5 */
basetime = TS * floor(time*1.000000000001/TS);
time -= basetime;
#define NSAMETIME(a,b) (fabs((a)-(b))<= NTIMETOL * TS)
#define NTIMETOL 1e-7
if NSAMETIME(time,0.) {
/* get new random number */
#ifdef FastRand
// use FastNorm3
newval = scalef * FastNorm;
#elif defined (WaGauss)
// use WallaceHV
newval = scalef * GaussWa;
#else
// make use of two random variables per call to rgauss()
if (here->VSRCsecRand == 2.0) {
rgauss(&ra1, &ra2);
newval = scalef * ra1;
here->VSRCsecRand = scalef * ra2;
}
else {
newval = here->VSRCsecRand;
here->VSRCsecRand = 2.0;
}
#endif
V1 = here->VSRCprevVal = here->VSRCnewVal;
V2 = newval; // scale factor t.b.d.
if(here->VSRCdcGiven) V2 += here->VSRCdcValue;
if (aof) {
V2 += here->VSRConeof[here->VSRCncount];
#ifdef PRVAL
printf("aof: %d\n", here->VSRCncount);
#endif
}
here->VSRCncount++;
value = V1;
here->VSRCnewVal = V2;
} else if (time > 0 && time < TS) {
value = V1 + (V2 - V1) * (time) / TS;
#ifdef PRVAL
printf("if1, time: %e, outp: %e, rnd: %e\n", ckt->CKTtime,
V1 + (V2 - V1) * (time) / TS, V2);
#endif
} else { /* time > TS should be never reached */
value = V1 + (V2 - V1) * (time-TS) / TS;
#ifdef PRVAL
printf("if2, time: %e, outp: %e, rnd: %e\n", ckt->CKTtime,
V1 + (V2 - V1) * (time-TS) / TS, V2);
#endif
}
here->VSRCprevTime = ckt->CKTtime;
}
noiDone:
if (time >=ckt->CKTfinalTime) {
/* free the 1of memory */
if (here->VSRConeof) tfree(here->VSRConeof);
/* reset the 1of counter */
here->VSRCncount = 0;
}
goto loadDone;
} // case
break;
} // switch
}
loadDone:
/* gtri - begin - wbk - modify for supply ramping option */
@ -346,11 +551,12 @@ loadDone:
value *= ckt->CKTsrcFact;
value *= cm_analog_ramp_factor();
#else
if (ckt->CKTmode & MODETRANOP) value *= ckt->CKTsrcFact;
*(ckt->CKTrhs + (here->VSRCbranch)) += value;
if (ckt->CKTmode & MODETRANOP) value *= ckt->CKTsrcFact;
/* load the new voltage value into the matrix */
*(ckt->CKTrhs + (here->VSRCbranch)) += value;
#endif
/* gtri - end - wbk - modify to process srcFact, etc. for all sources */
}
}
} // for loop instances
} // for loop models
return(OK);
}

View File

@ -169,6 +169,13 @@ VSRCparam(int param, IFvalue *value, GENinstance *inst, IFvalue *select)
return(E_BADPARM);
}
break;
case VSRC_TRNOISE:
here->VSRCfunctionType = TRNOISE;
here->VSRCfuncTGiven = TRUE;
here->VSRCcoeffs = value->v.vec.rVec;
here->VSRCfunctionOrder = value->v.numValue;
here->VSRCcoeffsGiven = TRUE;
break;
default:
return(E_BADPARM);
}

File diff suppressed because it is too large Load Diff