transient noise simulation
This commit is contained in:
parent
eaadf5d2bc
commit
5e1ed023c6
|
|
@ -1073,10 +1073,12 @@ AC_CONFIG_FILES([Makefile
|
|||
src/frontend/help/Makefile
|
||||
src/frontend/parser/Makefile
|
||||
src/frontend/plotting/Makefile
|
||||
src/frontend/trannoise/Makefile
|
||||
src/frontend/wdisp/Makefile
|
||||
src/include/Makefile
|
||||
src/maths/Makefile
|
||||
src/maths/cmaths/Makefile
|
||||
src/maths/fft/Makefile
|
||||
src/maths/misc/Makefile
|
||||
src/maths/ni/Makefile
|
||||
src/maths/deriv/Makefile
|
||||
|
|
|
|||
|
|
@ -0,0 +1,59 @@
|
|||
* 51 stage Ring-Osc. BSIM3, transient noise
|
||||
* will need 45 min on a i7 860 with 4 threads
|
||||
|
||||
* closes the loop between inverters xinv1 and xinv5
|
||||
vin in out dc 0.5 pulse 0.5 0 0.1n 5n 1 1 1
|
||||
|
||||
vdd dd 0 dc 0 pulse 0 2.2 0 1n 1 1 1
|
||||
|
||||
vss ss 0 dc 0
|
||||
ve sub 0 dc 0
|
||||
|
||||
vpe well 0 2.2
|
||||
|
||||
* noisy inverters
|
||||
xiinv2 dd ss sub well out25 out50 inv253
|
||||
xiinv1 dd ss sub well in out25 inv253
|
||||
|
||||
*very noisy inverter
|
||||
xiinv5 dd ss sub well out50 out inv1_2
|
||||
*output amplifier
|
||||
xiinv11 dd ss sub well out25 bufout inv1
|
||||
cout bufout ss 0.2pF
|
||||
|
||||
.option itl1=500 gmin=1e-15 itl4=10 noacct
|
||||
|
||||
* .dc vdd 0 2 0.01
|
||||
.tran 0.01n 500n
|
||||
|
||||
.save in bufout v(t1)
|
||||
|
||||
.include D:\Spice_Win\Exam_BSIM3\Modelcards\modelcard.nmos
|
||||
.include D:\Spice_Win\Exam_BSIM3\Modelcards\modelcard.pmos
|
||||
|
||||
.include noilib-demo.h
|
||||
|
||||
.control
|
||||
unset ngdebug
|
||||
* first run
|
||||
save bufout $ needed for restricting memory usage
|
||||
rusage
|
||||
tran 8p 10000n
|
||||
rusage
|
||||
plot bufout xlimit 90n 95n
|
||||
linearize
|
||||
fft bufout
|
||||
* next run
|
||||
reset
|
||||
save bufout
|
||||
alter @v.xiinv5.vn1[trnoise] = [ 0 0 0 0 ] $ no noise
|
||||
tran 8p 10000n
|
||||
rusage
|
||||
plot bufout xlimit 90n 95n
|
||||
linearize
|
||||
fft bufout
|
||||
plot mag(bufout) mag(sp2.bufout) xlimit 0 2G ylimit 1e-11 0.1 ylog
|
||||
.endc
|
||||
|
||||
|
||||
.end
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
* simple sample & hold, transient noise
|
||||
|
||||
* switch control
|
||||
* PULSE(V1 V2 TD TR TF PW PER)
|
||||
vgate1 ga1 0 dc 0 pulse (0 1 0 10n 10n 90n 200n)
|
||||
|
||||
Switch1 1 2 ga1 0 smodel1
|
||||
|
||||
* noisy input
|
||||
* rms value white, time step, exponent < 2, rms value 1/f
|
||||
vin 1 0 dc 0 trnoise 0.1m 0.2n 1 0.1m
|
||||
*vin 1 0 dc 0 trnoise 0.1m 0.2n 0 0.1m
|
||||
|
||||
* output
|
||||
c2 2 0 10p
|
||||
|
||||
* second S&H
|
||||
vgate2 ga2 0 dc 0 pulse (0 1 140n 10n 10n 30n 200n)
|
||||
*Buffer EXXXXXXX N+ N- NC+ NC- VALUE
|
||||
e1 4 0 2 0 1
|
||||
Switch2 4 3 ga2 0 smodel2
|
||||
c3 3 0 10p
|
||||
|
||||
.option itl1=500 gmin=1e-15 itl4=10 acct
|
||||
|
||||
.model smodel1 sw vt=0.5 ron=100
|
||||
.model smodel2 sw vt=0.5 ron=100
|
||||
|
||||
.tran 0.4n 100u
|
||||
|
||||
|
||||
.control
|
||||
unset ngdebug
|
||||
set filetype=ascii
|
||||
rusage
|
||||
run
|
||||
rusage all
|
||||
write noi_test.out v(1)
|
||||
plot v(2) v(3) xlimit 4u 5u
|
||||
plot v(ga1) v(ga2) xlimit 4u 5u
|
||||
linearize
|
||||
*rms v(1)
|
||||
fft v(3)
|
||||
plot mag(v(3)) loglog xlimit 1e4 1e8 ylimit 1e-10 1e-4
|
||||
setplot tran1
|
||||
linearize
|
||||
psd 101 v(3)
|
||||
plot mag(v(3)) xlimit 0 3e7 ylimit 0 10u
|
||||
|
||||
.endc
|
||||
|
||||
|
||||
.end
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
|
||||
* standard inverter made noisy
|
||||
*.subckt inv1 dd ss sub well in out
|
||||
*vn1 out outi dc 0 noise 0.1 0.3n 1.0 0.1
|
||||
*mn1 outi in ss sub n1 w=2u l=0.25u AS=3p AD=3p PS=4u PD=4u
|
||||
*mp1 outi in dd well p1 w=4u l=0.25u AS=7p AD=7p PS=6u PD=6u
|
||||
*.ends inv1
|
||||
|
||||
* standard inverter
|
||||
.subckt inv1 dd ss sub well in out
|
||||
mn1 out in ss sub n1 w=2u l=0.25u AS=3p AD=3p PS=4u PD=4u
|
||||
mp1 out in dd well p1 w=4u l=0.25u AS=7p AD=7p PS=6u PD=6u
|
||||
.ends inv1
|
||||
|
||||
* very noisy inverter (noise on vdd and well)
|
||||
.subckt inv1_1 dd ss sub well in out
|
||||
vn1 dd idd dc 0 trnoise 0.05 0.05n 1 0.05
|
||||
vn2 well iwell dc 0 trnoise 0.05 0.05n 1 0.05
|
||||
mn1 out in ss sub n1 w=2u l=0.25u AS=3p AD=3p PS=4u PD=4u
|
||||
mp1 out in idd iwell p1 w=4u l=0.25u AS=7p AD=7p PS=6u PD=6u
|
||||
*Cout out 0 0.1p
|
||||
.ends inv1_1
|
||||
|
||||
|
||||
* another very noisy inverter
|
||||
.subckt inv1_2 dd ss sub well in out
|
||||
vn1 out outi dc 0 trnoise 0.05 8p 1.0 0.001
|
||||
mn1 outi in ss sub n1 w=2u l=0.25u AS=3p AD=3p PS=4u PD=4u
|
||||
mp1 outi in dd well p1 w=4u l=0.25u AS=7p AD=7p PS=6u PD=6u
|
||||
*Cout out 0 0.1p
|
||||
.ends inv1_2
|
||||
|
||||
* another very noisy inverter with current souces parallel to transistor
|
||||
.subckt inv13 dd ss sub well in outi
|
||||
in1 ss outi dc 0 noise 200u 0.05n 1.0 50u
|
||||
mn1 outi in ss sub n1 w=2u l=0.25u AS=3p AD=3p PS=4u PD=4u
|
||||
in2 dd outi dc 0 noise 200u 0.05n 1.0 50u
|
||||
mp1 outi in dd well p1 w=4u l=0.25u AS=7p AD=7p PS=6u PD=6u
|
||||
*Cout out 0 0.1p
|
||||
.ends inv13
|
||||
|
||||
.subckt inv53 dd ss sub well in out
|
||||
xinv1 dd ss sub well in 1 inv1
|
||||
xinv2 dd ss sub well 1 2 inv1
|
||||
xinv3 dd ss sub well 2 3 inv1
|
||||
xinv4 dd ss sub well 3 4 inv1
|
||||
xinv5 dd ss sub well 4 out inv1
|
||||
.ends inv53
|
||||
|
||||
.subckt inv253 dd ss sub well in out
|
||||
xinv1 dd ss sub well in 1 inv53
|
||||
xinv2 dd ss sub well 1 2 inv53
|
||||
xinv3 dd ss sub well 2 3 inv53
|
||||
xinv4 dd ss sub well 3 4 inv53
|
||||
xinv5 dd ss sub well 4 out inv53
|
||||
.ends inv253
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
* Shot noise test with B source, diode
|
||||
* voltage on device (diode, forward)
|
||||
Vdev out 0 DC 0 PULSE(0.4 0.45 10u)
|
||||
* diode, forward direction, to be modeled with noise
|
||||
D1 mess 0 DMOD
|
||||
.model DMOD D IS=1e-14 N=1
|
||||
X1 0 mess out ishot
|
||||
* device between 1 and 2
|
||||
* new output terminals of device including noise: 1 and 3
|
||||
.subckt ishot 1 2 3
|
||||
* white noise source with rms 1V
|
||||
VNG 0 11 DC 0 TRNOISE(1 1n 0 0)
|
||||
*measure the current i(v1)
|
||||
V1 2 3 DC 0
|
||||
* calculate the shot noise
|
||||
* sqrt(2*current*q*bandwidth)
|
||||
BI 1 3 I=sqrt(2*abs(i(v1))*1.6e-19*1e7)*v(11)
|
||||
.ends ishot
|
||||
* 20000 sample points
|
||||
.tran 1n 20u
|
||||
.control
|
||||
run
|
||||
plot (-1)*i(vdev)
|
||||
meas tran vdev_rms avg i(vdev) from=0u to=9.9u
|
||||
meas tran vdev_rms avg i(vdev) from=10.1u to=20u
|
||||
.endc
|
||||
.end
|
||||
|
|
@ -156,6 +156,7 @@ endif
|
|||
ngspice_LDADD += \
|
||||
frontend/parser/libparser.la \
|
||||
frontend/numparam/libnumparam.la \
|
||||
frontend/trannoise/libtrannoise.la \
|
||||
spicelib/parser/libinp.la
|
||||
|
||||
if CIDER_WANTED
|
||||
|
|
@ -170,6 +171,7 @@ ngspice_LDADD += \
|
|||
maths/deriv/libderiv.la \
|
||||
maths/cmaths/libcmaths.la \
|
||||
maths/misc/libmathmisc.la \
|
||||
maths/fft/libmathfft.la \
|
||||
maths/poly/libpoly.la \
|
||||
maths/ni/libni.la \
|
||||
maths/sparse/libsparse.la \
|
||||
|
|
@ -208,8 +210,10 @@ ngnutmeg_LDADD += \
|
|||
frontend/plotting/libplotting.la \
|
||||
frontend/parser/libparser.la \
|
||||
frontend/numparam/libnumparam.la \
|
||||
frontend/trannoise/libtrannoise.la \
|
||||
maths/cmaths/libcmaths.la \
|
||||
maths/misc/libmathmisc.la \
|
||||
maths/fft/libmathfft.la \
|
||||
maths/poly/libpoly.la \
|
||||
misc/libmisc.la \
|
||||
spicelib/parser/libinp.la
|
||||
|
|
@ -384,6 +388,7 @@ libspice_la_LIBADD += \
|
|||
maths/deriv/libderiv.la \
|
||||
maths/cmaths/libcmaths.la \
|
||||
maths/misc/libmathmisc.la \
|
||||
maths/fft/libmathfft.la \
|
||||
maths/poly/libpoly.la \
|
||||
maths/ni/libni.la \
|
||||
maths/sparse/libsparse.la \
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
## Process this file with automake to produce Makefile.in
|
||||
## $Id$
|
||||
|
||||
SUBDIRS = plotting help parser wdisp numparam
|
||||
DIST_SUBDIRS = plotting help parser wdisp numparam
|
||||
SUBDIRS = plotting help parser wdisp numparam trannoise
|
||||
DIST_SUBDIRS = plotting help parser wdisp numparam trannoise
|
||||
EXTRA_DIST = testcommands.c parse-bison.y
|
||||
## For Windows with Visual Studio
|
||||
EXTRA_DIST += parse-bison.c parse-bison.h
|
||||
|
|
|
|||
|
|
@ -16,10 +16,10 @@ Author: 2008 Holger Vogt
|
|||
#include "variable.h"
|
||||
#include "parse.h"
|
||||
#include "../misc/misc_time.h"
|
||||
#include "../maths/fft/fftext.h"
|
||||
|
||||
static void fftext(double*, double*, long int, long int, int);
|
||||
|
||||
|
||||
void
|
||||
com_fft(wordlist *wl)
|
||||
{
|
||||
|
|
@ -250,6 +250,302 @@ com_fft(wordlist *wl)
|
|||
tfree(win);
|
||||
}
|
||||
|
||||
void
|
||||
com_psd(wordlist *wl)
|
||||
{
|
||||
ngcomplex_t **fdvec;
|
||||
double **tdvec;
|
||||
double *freq, *win, *time, *ave;
|
||||
double delta_t, span, noipower;
|
||||
int ngood, mm;
|
||||
unsigned long fpts, i, j, tlen, jj, smooth, hsmooth;
|
||||
char *s;
|
||||
struct dvec *f, *vlist, *lv, *vec;
|
||||
struct pnode *names, *first_name;
|
||||
|
||||
float *reald, *imagd;
|
||||
int size, sign, isreal;
|
||||
double scaling, sum;
|
||||
int order;
|
||||
double scale, sigma;
|
||||
|
||||
if (!plot_cur || !plot_cur->pl_scale) {
|
||||
fprintf(cp_err, "Error: no vectors loaded.\n");
|
||||
return;
|
||||
}
|
||||
if (!isreal(plot_cur->pl_scale) ||
|
||||
((plot_cur->pl_scale)->v_type != SV_TIME)) {
|
||||
fprintf(cp_err, "Error: fft needs real time scale\n");
|
||||
return;
|
||||
}
|
||||
|
||||
tlen = (plot_cur->pl_scale)->v_length;
|
||||
time = (plot_cur->pl_scale)->v_realdata;
|
||||
span = time[tlen-1] - time[0];
|
||||
delta_t = span/(tlen - 1);
|
||||
|
||||
// get filter length from parameter input
|
||||
s = wl->wl_word;
|
||||
if (!(ave = ft_numparse(&s, FALSE)) || (*ave < 1.0)) {
|
||||
fprintf(cp_out, "Number of averaged data points: %d\n", 1);
|
||||
smooth = 1;
|
||||
}
|
||||
else smooth = (int)(*ave);
|
||||
wl = wl->wl_next;
|
||||
|
||||
// size of input vector is power of two and larger than spice vector
|
||||
size = 1;
|
||||
mm = 0;
|
||||
while (size < tlen) {
|
||||
size <<= 1;
|
||||
mm++;
|
||||
}
|
||||
|
||||
// output vector has length of size/2
|
||||
fpts = size>>1;
|
||||
|
||||
// window function
|
||||
win = TMALLOC(double, tlen);
|
||||
{
|
||||
char window[BSIZE_SP];
|
||||
double maxt = time[tlen-1];
|
||||
if (!cp_getvar("specwindow", CP_STRING, window))
|
||||
strcpy(window,"blackman");
|
||||
if (eq(window, "none"))
|
||||
for(i=0; i<tlen; i++) {
|
||||
win[i] = 1;
|
||||
}
|
||||
else if (eq(window, "rectangular"))
|
||||
for(i=0; i<tlen; i++) {
|
||||
if (maxt-time[i] > span) {
|
||||
win[i] = 0;
|
||||
} else {
|
||||
win[i] = 1;
|
||||
}
|
||||
}
|
||||
else if (eq(window, "hanning") || eq(window, "cosine"))
|
||||
for(i=0; i<tlen; i++) {
|
||||
if (maxt-time[i] > span) {
|
||||
win[i] = 0;
|
||||
} else {
|
||||
win[i] = 1 - cos(2*M_PI*(time[i]-maxt)/span);
|
||||
}
|
||||
}
|
||||
else if (eq(window, "hamming"))
|
||||
for(i=0; i<tlen; i++) {
|
||||
if (maxt-time[i] > span) {
|
||||
win[i] = 0;
|
||||
} else {
|
||||
win[i] = 1 - 0.92/1.08*cos(2*M_PI*(time[i]-maxt)/span);
|
||||
}
|
||||
}
|
||||
else if (eq(window, "triangle") || eq(window, "bartlet"))
|
||||
for(i=0; i<tlen; i++) {
|
||||
if (maxt-time[i] > span) {
|
||||
win[i] = 0;
|
||||
} else {
|
||||
win[i] = 2 - fabs(2+4*(time[i]-maxt)/span);
|
||||
}
|
||||
}
|
||||
else if (eq(window, "blackman")) {
|
||||
int order;
|
||||
if (!cp_getvar("specwindoworder", CP_NUM, &order)) order = 2;
|
||||
if (order < 2) order = 2; /* only order 2 supported here */
|
||||
for(i=0; i<tlen; i++) {
|
||||
if (maxt-time[i] > span) {
|
||||
win[i] = 0;
|
||||
} else {
|
||||
win[i] = 1;
|
||||
win[i] -= 0.50/0.42*cos(2*M_PI*(time[i]-maxt)/span);
|
||||
win[i] += 0.08/0.42*cos(4*M_PI*(time[i]-maxt)/span);
|
||||
}
|
||||
}
|
||||
} else if (eq(window, "gaussian")) {
|
||||
if (!cp_getvar("specwindoworder", CP_NUM, &order)) order = 2;
|
||||
if (order < 2) order = 2;
|
||||
sigma=1.0/order;
|
||||
scale=0.83/sigma;
|
||||
for(i=0; i<tlen; i++) {
|
||||
if (maxt-time[i] > span) {
|
||||
win[i] = 0;
|
||||
} else {
|
||||
win[i] = scale*exp(-0.5*pow((time[i]-maxt/2)/(sigma*maxt/2),2));
|
||||
}
|
||||
}
|
||||
/* int order;
|
||||
double scale;
|
||||
extern double erfc(double);
|
||||
if (!cp_getvar("specwindoworder", CP_NUM, &order)) order = 2;
|
||||
if (order < 2) order = 2;
|
||||
scale = pow(2*M_PI/order,0.5)*(0.5-erfc(pow(order,0.5)));
|
||||
for(i=0; i<tlen; i++) {
|
||||
if (maxt-time[i] > span) {
|
||||
win[i] = 0;
|
||||
} else {
|
||||
win[i] = exp(-0.5*order*(1-2*(maxt-time[i])/span)
|
||||
*(1-2*(maxt-time[i])/span))/scale;
|
||||
}
|
||||
}
|
||||
*/
|
||||
} else {
|
||||
fprintf(cp_err, "Warning: unknown window type %s\n", window);
|
||||
tfree(win);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
names = ft_getpnames(wl, TRUE);
|
||||
first_name = names;
|
||||
vlist = NULL;
|
||||
ngood = 0;
|
||||
while (names) {
|
||||
vec = ft_evaluate(names);
|
||||
names = names->pn_next;
|
||||
while (vec) {
|
||||
if (vec->v_length != tlen) {
|
||||
fprintf(cp_err, "Error: lengths of %s vectors don't match: %d, %d\n",
|
||||
vec->v_name, vec->v_length, tlen);
|
||||
vec = vec->v_link2;
|
||||
continue;
|
||||
}
|
||||
if (!isreal(vec)) {
|
||||
fprintf(cp_err, "Error: %s isn't real!\n",
|
||||
vec->v_name);
|
||||
vec = vec->v_link2;
|
||||
continue;
|
||||
}
|
||||
if (vec->v_type == SV_TIME) {
|
||||
vec = vec->v_link2;
|
||||
continue;
|
||||
}
|
||||
if (!vlist)
|
||||
vlist = vec;
|
||||
else
|
||||
lv->v_link2 = vec;
|
||||
lv = vec;
|
||||
vec = vec->v_link2;
|
||||
ngood++;
|
||||
}
|
||||
}
|
||||
free_pnode(first_name);
|
||||
if (!ngood) {
|
||||
return;
|
||||
}
|
||||
|
||||
plot_cur = plot_alloc("spectrum");
|
||||
plot_cur->pl_next = plot_list;
|
||||
plot_list = plot_cur;
|
||||
plot_cur->pl_title = copy((plot_cur->pl_next)->pl_title);
|
||||
plot_cur->pl_name = copy("PSD");
|
||||
plot_cur->pl_date = copy(datestring( ));
|
||||
|
||||
freq = (double *) tmalloc(fpts * sizeof(double));
|
||||
f = alloc(struct dvec);
|
||||
ZERO(f, struct dvec);
|
||||
f->v_name = copy("frequency");
|
||||
f->v_type = SV_FREQUENCY;
|
||||
f->v_flags = (VF_REAL | VF_PERMANENT | VF_PRINT);
|
||||
f->v_length = fpts;
|
||||
f->v_realdata = freq;
|
||||
vec_new(f);
|
||||
|
||||
for (i = 0; i<fpts; i++) freq[i] = i*1./span*tlen/size;
|
||||
|
||||
tdvec = TMALLOC(double*, ngood);
|
||||
fdvec = TMALLOC(ngcomplex_t*, ngood);
|
||||
for (i = 0, vec = vlist; i<ngood; i++) {
|
||||
tdvec[i] = vec->v_realdata; /* real input data */
|
||||
fdvec[i] = TMALLOC(ngcomplex_t, fpts); /* complex output data */
|
||||
f = alloc(struct dvec);
|
||||
ZERO(f, struct dvec);
|
||||
f->v_name = vec_basename(vec);
|
||||
f->v_type = SV_NOTYPE; //vec->v_type;
|
||||
f->v_flags = (VF_COMPLEX | VF_PERMANENT);
|
||||
f->v_length = fpts;
|
||||
f->v_compdata = fdvec[i];
|
||||
vec_new(f);
|
||||
vec = vec->v_link2;
|
||||
}
|
||||
|
||||
printf("PSD: Time span: %g s, input length: %d, zero padding: %d\n", span, size, size-tlen);
|
||||
printf("PSD: Freq. resolution: %g Hz, output length: %d\n", 1.0/span*tlen/size, fpts);
|
||||
|
||||
sign = 1;
|
||||
isreal = 1;
|
||||
|
||||
reald = TMALLOC(float, size);
|
||||
imagd = TMALLOC(float, size);
|
||||
|
||||
// scale = 0.66;
|
||||
|
||||
for (i = 0; i<ngood; i++) {
|
||||
for (j = 0; j < tlen; j++){
|
||||
reald[j] = tdvec[i][j]*win[j];
|
||||
imagd[j] = 0.;
|
||||
}
|
||||
for (j = tlen; j < size; j++){
|
||||
reald[j] = 0.;
|
||||
imagd[j] = 0.;
|
||||
}
|
||||
|
||||
// Green's FFT
|
||||
fftInit(mm);
|
||||
rffts(reald, mm, 1);
|
||||
fftFree();
|
||||
scaling = size*0.3;
|
||||
|
||||
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
|
||||
noipower = fdvec[i][0].cx_real = (double)reald[0]*(double)reald[0];
|
||||
fdvec[i][fpts-1].cx_real = (double)reald[1]*(double)reald[1];
|
||||
noipower += fdvec[i][fpts-1].cx_real;
|
||||
for (j=1; j<(fpts - 1); j++){
|
||||
jj = j<<1;
|
||||
fdvec[i][j].cx_real = ((double)reald[jj]*(double)reald[jj] + (double)reald[jj + 1]*(double)reald[jj + 1]);
|
||||
fdvec[i][j].cx_imag = 0;
|
||||
noipower += fdvec[i][j].cx_real;
|
||||
}
|
||||
printf("Total noise power up to Nyquist frequency %5.3e Hz:\n%e V^2 (or A^2), \nnoise voltage or current %e V (or A)\n",
|
||||
freq[fpts-1],noipower/span*tlen/size/scaling, sqrt(noipower/span*tlen/size/scaling));
|
||||
/* for (j=0; j<fpts ; j++)
|
||||
fdvec[i][j].cx_real = sqrt(fdvec[i][j].cx_real)/scaling;
|
||||
*/
|
||||
/* smoothing with rectangular window of width "smooth",
|
||||
plotting V/sqrt(Hz) or I/sqrt(Hz) */
|
||||
hsmooth = smooth>>1;
|
||||
for (j=0; j<hsmooth; j++){
|
||||
sum = 0.;
|
||||
for (jj = 0; jj < hsmooth + j; jj++)
|
||||
sum += fdvec[i][jj].cx_real;
|
||||
sum /= (double)(hsmooth + j);
|
||||
reald[j] = sqrt(sum)/scaling;
|
||||
}
|
||||
for (j=hsmooth; j<fpts-hsmooth; j++){
|
||||
sum = 0.;
|
||||
for (jj = 0; jj < smooth; jj++)
|
||||
sum += fdvec[i][j-hsmooth+jj].cx_real;
|
||||
sum /= (double)smooth;
|
||||
reald[j] = sqrt(sum)/scaling;
|
||||
}
|
||||
for (j=fpts-hsmooth; j<fpts; j++){
|
||||
sum = 0.;
|
||||
for (jj = 0; jj < smooth; jj++)
|
||||
sum += fdvec[i][j-hsmooth+jj].cx_real;
|
||||
sum /= (double)(fpts - j + hsmooth - 1);
|
||||
reald[j] = sqrt(sum)/scaling;
|
||||
}
|
||||
for (j=0; j<fpts; j++)
|
||||
fdvec[i][j].cx_real = reald[j];
|
||||
}
|
||||
|
||||
free(reald);
|
||||
free(imagd);
|
||||
|
||||
tfree(tdvec);
|
||||
tfree(fdvec);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static void fftext(double* x, double* y, long int n, long int nn, int dir)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -7,5 +7,6 @@
|
|||
#define FFT_H_INCLUDED
|
||||
|
||||
void com_fft(wordlist *wl);
|
||||
void com_psd(wordlist *wl);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -315,7 +315,11 @@ struct comm spcp_coms[] = {
|
|||
{ "fft", com_fft, FALSE, TRUE,
|
||||
{ 0, 0, 0, 0 }, E_DEFHMASK, 1, LOTS,
|
||||
NULL,
|
||||
"vector ... : Create a frequency domain plot with FFT." } ,
|
||||
"vector ... : Create a frequency domain plot with FFT." } ,
|
||||
{ "psd", com_psd, FALSE, TRUE,
|
||||
{ 0, 0, 0, 0 }, E_DEFHMASK, 2, LOTS,
|
||||
NULL,
|
||||
"vector ... : Create a power spetral density plot with FFT." } ,
|
||||
{ "fourier", com_fourier, FALSE, TRUE,
|
||||
{ 0, 040000, 040000, 040000 }, E_DEFHMASK, 1, LOTS,
|
||||
NULL,
|
||||
|
|
@ -713,7 +717,11 @@ struct comm nutcp_coms[] = {
|
|||
{ "fft", com_fft, FALSE, TRUE,
|
||||
{ 0, 0, 0, 0 }, E_DEFHMASK, 1, LOTS,
|
||||
NULL,
|
||||
"vector ... : Create a frequency domain plot with FFT." } ,
|
||||
"vector ... : Create a frequency domain plot with FFT." } ,
|
||||
{ "psd", com_psd, FALSE, TRUE,
|
||||
{ 0, 0, 0, 0 }, E_DEFHMASK, 2, LOTS,
|
||||
NULL,
|
||||
"vector ... : Create a power spetral density plot with FFT." } ,
|
||||
{ "fourier", com_fourier, FALSE, TRUE,
|
||||
{ 0, 040000, 040000, 040000 }, E_DEFHMASK, 1, LOTS,
|
||||
NULL,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,62 @@
|
|||
/* Copyright: Holger Vogt, 2008
|
||||
Generates 1/f noise values according to:
|
||||
"Discrete simulation of colored noise and stochastic
|
||||
processes and 1/fa power law noise generation"
|
||||
Kasdin, N.J.;
|
||||
Proceedings of the IEEE
|
||||
Volume 83, Issue 5, May 1995 Page(s):802 - 827
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h> // var. argumente
|
||||
#include "1-f-code.h"
|
||||
#include "ngspice.h"
|
||||
|
||||
#include "fftext.h"
|
||||
#include "wallace.h"
|
||||
|
||||
|
||||
void f_alpha(int n_pts, int n_exp, float X[], float Q_d,
|
||||
float alpha)
|
||||
{
|
||||
int i;
|
||||
float *hfa, *wfa;
|
||||
float ha;
|
||||
|
||||
ha = alpha/2.0f ;
|
||||
// Q_d = sqrt(Q_d); /* find the deviation of the noise */
|
||||
hfa = TMALLOC(float,n_pts);
|
||||
wfa = TMALLOC(float,n_pts);
|
||||
hfa[0] = 1.0f;
|
||||
wfa[0] = Q_d * (float)GaussWa;
|
||||
/* generate the coefficients hk */
|
||||
for (i=1 ; i < n_pts; i++) {
|
||||
/* generate the coefficients hk */
|
||||
hfa[i] = hfa[i-1] * (ha + (float)(i-1)) / ( (float)(i) );
|
||||
/* fill the sequence wk with white noise */
|
||||
wfa[i] = Q_d * (float)GaussWa;
|
||||
}
|
||||
|
||||
// for (i=0 ; i < n_pts; i++)
|
||||
// printf("rnd %e, hk %e\n", wfa[i], hfa[i]);
|
||||
|
||||
/* perform the discrete Fourier transform */
|
||||
fftInit(n_exp);
|
||||
rffts(hfa, n_exp, 1);
|
||||
rffts(wfa, n_exp, 1) ;
|
||||
|
||||
/* multiply the two complex vectors */
|
||||
rspectprod(hfa, wfa, X, n_pts);
|
||||
/* inverse transform */
|
||||
riffts(X, n_exp, 1);
|
||||
|
||||
free(hfa) ;
|
||||
free(wfa);
|
||||
/* fft tables will be freed in vsrcaccept.c and isrcaccept.c
|
||||
fftFree(); */
|
||||
fprintf(stdout,"%d (2e%d) one over f values created\n", n_pts, n_exp);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
/* Copyright: Holger Vogt, 2008
|
||||
Discrete simulation of colored noise and stochastic
|
||||
processes and 1/fa power law noise generation
|
||||
Kasdin, N.J.;
|
||||
Proceedings of the IEEE
|
||||
Volume 83, Issue 5, May 1995 Page(s):802 - 827
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h> // var. argumente
|
||||
#include "1-f-code.h"
|
||||
#include "ngspice.h"
|
||||
|
||||
#include "fftext.h"
|
||||
#include "wallace.h"
|
||||
|
||||
|
||||
void f_alpha(int n_pts, int n_exp, double X[], double Q_d,
|
||||
double alpha)
|
||||
{
|
||||
unsigned int i;
|
||||
double *hfa, *wfa;
|
||||
double ha;
|
||||
|
||||
ha = alpha/2.0f ;
|
||||
// Q_d = sqrt(Q_d); /* find the deviation of the noise */
|
||||
hfa = TMALLOC(double,n_pts);
|
||||
wfa = TMALLOC(double,n_pts);
|
||||
hfa[0] = 1.0f;
|
||||
wfa[0] = Q_d * GaussWa;
|
||||
/* generate the coefficients hk */
|
||||
for (i=1 ; i < n_pts; i++) {
|
||||
/* generate the coefficients hk */
|
||||
hfa[i] = hfa[i-1] * (ha + (double)(i-1)) / ( (double)(i) );
|
||||
/* fill the sequence wk with white noise */
|
||||
wfa[i] = Q_d * GaussWa;
|
||||
}
|
||||
|
||||
// for (i=0 ; i < n_pts; i++)
|
||||
// printf("rnd %e, hk %e\n", wfa[i], hfa[i]);
|
||||
|
||||
/* perform the discrete Fourier transform */
|
||||
fftInit(n_exp);
|
||||
rffts(hfa, n_exp, 1);
|
||||
rffts(wfa, n_exp, 1) ;
|
||||
|
||||
/* multiply the two complex vectors */
|
||||
rspectprod(hfa, wfa, X, n_pts);
|
||||
/* inverse transform */
|
||||
riffts(X, n_exp, 1);
|
||||
|
||||
free(hfa) ;
|
||||
free(wfa);
|
||||
/* fft tables will be freed in vsrcaccept.c and isrcaccept.c
|
||||
fftFree(); */
|
||||
fprintf(stdout,"%d (2e%d) one over f values created\n", n_pts, n_exp);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,846 @@
|
|||
/* This is file FastNorm3.c */
|
||||
/* SUPERCEDES FastNorm.c, FastNorm2.c. Use with FastNorm3.h */
|
||||
/* 24 June 2003 */
|
||||
|
||||
/* A package containing a very fast generator of pseudo-random
|
||||
Unit NORMAL variates, and some fairly high-quality UNIFORM
|
||||
generators. It also contains a straightforward implementation of
|
||||
a ChiSquared and Gamma generator copied from Ahrens and Dieter.
|
||||
*/
|
||||
|
||||
/* Version 3 with double transformations and controllable extension
|
||||
to repeat the double transformations for higher quality at lower
|
||||
speed.
|
||||
Dated 17 May 20003.
|
||||
Copyright Christopher Stewart Wallace.
|
||||
*/
|
||||
/*
|
||||
%A C. S. Wallace
|
||||
%T Fast Pseudo-Random Generators for Normal and Exponential Variates.
|
||||
%J ACM Trans. Math. Software
|
||||
%V 22
|
||||
%N 1
|
||||
%P 119-127
|
||||
%M MAR
|
||||
%D 1996
|
||||
%O TR 94/197, May 1994, Dept. Computer Science, Monash University
|
||||
%K CSW, CSWallace, Monash, pseudo random number generator, algorithm,
|
||||
jrnl, TOMS, numbers, normal, probability, distribution, PRNG, RNG, Gaussian,
|
||||
distribution, jrnl, ACM, TOMS, TR 94 197, TR197, c1996, c199x, c19xx
|
||||
*/
|
||||
/* Use of this package requires the file "FastNorm3.h" which must be
|
||||
#include-ed in any C files using this package.
|
||||
|
||||
The main purpose of this package is to provide a very fast source
|
||||
of pseudo-random variates from the Unit Normal N(0,1) distribution, having
|
||||
the density function
|
||||
|
||||
f(x) = (1/sqrt(2*PI)) * exp (-0.5 * x^2)
|
||||
|
||||
Variates are obtained not by calling a function, but by use of a macro
|
||||
"FastNorm" defined in FastNorm3.h. In a C program, this macro may appear
|
||||
anywhere a (double) expression could appear, e.g in statements like
|
||||
z += FastNorm;
|
||||
if (FastNorm < 1.1) .....
|
||||
q = fabs (FastNorm); etc.
|
||||
|
||||
The revision history, and a reference to the method description, is given
|
||||
later in this file under the heading "Revision history Fastnorm".
|
||||
|
||||
Major sections of this file, such as the revision history and the
|
||||
major subroutines, are all headed by a line containing a row of minus signs (-)
|
||||
and the name of the section or subroutine.
|
||||
|
||||
The generators included are:
|
||||
a Uniform source of integers, unsigned integers and doubles.
|
||||
Chi-sq(N) (based on Ahrens and Dieter)
|
||||
Gamma(N) (= 0.5 * Chi-sq(2N))
|
||||
Normal (a very fast routine)
|
||||
*/
|
||||
|
||||
/* ----------------- inclusions and some definitions ------------ */
|
||||
#include <math.h>
|
||||
#ifndef NOSPICE
|
||||
#include "ngspice.h"
|
||||
#endif
|
||||
#include "FastNorm3.h"
|
||||
|
||||
|
||||
/* --------------- (Uniform) c7rand, irandm, urandm ---------- */
|
||||
/*
|
||||
c A random number generator called as a function by
|
||||
c c7rand (iseed) or irandm (iseed) or urandm (iseed)
|
||||
|
||||
c The parameter should be a pointer to a 2-element Sw vector.
|
||||
c The first call gives a double uniform in 0 .. 1.
|
||||
c The second gives an Sw integer uniform in 0 .. 2**31-1
|
||||
c The third gives an Sw integer with 32 bits, so unif in
|
||||
c -2**31 .. 2**31-1 if used in 32-bit signed arithmetic.
|
||||
c All update iseed[] in exactly the same way.
|
||||
c iseed[] must be a 2-element Sw vector.
|
||||
c The initial value of iseed[1] may be any 32-bit integer.
|
||||
c The initial value of iseed[0] may be any 32-bit integer except -1.
|
||||
c
|
||||
c The period of the random sequence is 2**32 * (2**32-1)
|
||||
c Its quality is quite good. It is based on the mixed multiplicative
|
||||
c congruential (Lehmer) generator
|
||||
x[n+1] = (69069 * x[n] + odd constant) MOD 2^32
|
||||
c but avoids most of the well-known defects of this type of generator
|
||||
c by, in effect, generating x[n+k] from x[n] as defined by the
|
||||
c sequence above, where k is chosen randomly in 1 ... 128 with the
|
||||
c help of a subsidiary Tauseworth-type generator.
|
||||
c For the positve integer generator irandm, the less
|
||||
c significant digits are more random than is usual for a Lehmer
|
||||
c generator. The last n<31 digits do not repeat with a period of 2^n.
|
||||
c This is also true of the unsigned integer generator urandm, but less
|
||||
c so.
|
||||
|
||||
c This is an implementation in C of the algorithm described in
|
||||
c Technical Report "A Long-Period Pseudo-Random Generator"
|
||||
c TR89/123, Computer Science, Monash University,
|
||||
c Clayton, Vic 3168 AUSTRALIA
|
||||
c by
|
||||
c
|
||||
c C.S.Wallace csw@cs.monash.edu.au
|
||||
|
||||
c The table mt[0:127] is defined by mt[i] = 69069 ** (128-i)
|
||||
*/
|
||||
|
||||
#define MASK ((Sw) 0x12DD4922)
|
||||
/* or in decimal, 316492066 */
|
||||
#define SCALE ((double) 1.0 / (1024.0 * 1024.0 * 1024.0 * 2.0))
|
||||
/* i.e. 2 to power -31 */
|
||||
|
||||
static Sw mt [128] = {
|
||||
902906369,
|
||||
2030498053,
|
||||
-473499623,
|
||||
1640834941,
|
||||
723406961,
|
||||
1993558325,
|
||||
-257162999,
|
||||
-1627724755,
|
||||
913952737,
|
||||
278845029,
|
||||
1327502073,
|
||||
-1261253155,
|
||||
981676113,
|
||||
-1785280363,
|
||||
1700077033,
|
||||
366908557,
|
||||
-1514479167,
|
||||
-682799163,
|
||||
141955545,
|
||||
-830150595,
|
||||
317871153,
|
||||
1542036469,
|
||||
-946413879,
|
||||
-1950779155,
|
||||
985397153,
|
||||
626515237,
|
||||
530871481,
|
||||
783087261,
|
||||
-1512358895,
|
||||
1031357269,
|
||||
-2007710807,
|
||||
-1652747955,
|
||||
-1867214463,
|
||||
928251525,
|
||||
1243003801,
|
||||
-2132510467,
|
||||
1874683889,
|
||||
-717013323,
|
||||
218254473,
|
||||
-1628774995,
|
||||
-2064896159,
|
||||
69678053,
|
||||
281568889,
|
||||
-2104168611,
|
||||
-165128239,
|
||||
1536495125,
|
||||
-39650967,
|
||||
546594317,
|
||||
-725987007,
|
||||
1392966981,
|
||||
1044706649,
|
||||
687331773,
|
||||
-2051306575,
|
||||
1544302965,
|
||||
-758494647,
|
||||
-1243934099,
|
||||
-75073759,
|
||||
293132965,
|
||||
-1935153095,
|
||||
118929437,
|
||||
807830417,
|
||||
-1416222507,
|
||||
-1550074071,
|
||||
-84903219,
|
||||
1355292929,
|
||||
-380482555,
|
||||
-1818444007,
|
||||
-204797315,
|
||||
170442609,
|
||||
-1636797387,
|
||||
868931593,
|
||||
-623503571,
|
||||
1711722209,
|
||||
381210981,
|
||||
-161547783,
|
||||
-272740131,
|
||||
-1450066095,
|
||||
2116588437,
|
||||
1100682473,
|
||||
358442893,
|
||||
-1529216831,
|
||||
2116152005,
|
||||
-776333095,
|
||||
1265240893,
|
||||
-482278607,
|
||||
1067190005,
|
||||
333444553,
|
||||
86502381,
|
||||
753481377,
|
||||
39000101,
|
||||
1779014585,
|
||||
219658653,
|
||||
-920253679,
|
||||
2029538901,
|
||||
1207761577,
|
||||
-1515772851,
|
||||
-236195711,
|
||||
442620293,
|
||||
423166617,
|
||||
-1763648515,
|
||||
-398436623,
|
||||
-1749358155,
|
||||
-538598519,
|
||||
-652439379,
|
||||
430550625,
|
||||
-1481396507,
|
||||
2093206905,
|
||||
-1934691747,
|
||||
-962631983,
|
||||
1454463253,
|
||||
-1877118871,
|
||||
-291917555,
|
||||
-1711673279,
|
||||
201201733,
|
||||
-474645415,
|
||||
-96764739,
|
||||
-1587365199,
|
||||
1945705589,
|
||||
1303896393,
|
||||
1744831853,
|
||||
381957665,
|
||||
2135332261,
|
||||
-55996615,
|
||||
-1190135011,
|
||||
1790562961,
|
||||
-1493191723,
|
||||
475559465,
|
||||
69069
|
||||
};
|
||||
|
||||
double c7rand (Sw *is)
|
||||
{
|
||||
Sw it, leh;
|
||||
|
||||
it = is [0];
|
||||
leh = is [1];
|
||||
/* Do a 7-place right cyclic shift of it */
|
||||
it = ((it >> 7) & 0x01FFFFFF) + ((it & 0x7F) << 25);
|
||||
if (!(it & 0x80000000)) it = it ^ MASK;
|
||||
leh = (leh * mt[it & 127] + it) & 0xFFFFFFFF;
|
||||
is [0] = it; is [1] = leh;
|
||||
if (leh & 0x80000000) leh = leh ^ 0xFFFFFFFF;
|
||||
return (SCALE * leh);
|
||||
}
|
||||
|
||||
|
||||
|
||||
Sw irandm (Sw *is)
|
||||
{
|
||||
Sw it, leh;
|
||||
|
||||
it = is [0];
|
||||
leh = is [1];
|
||||
/* Do a 7-place right cyclic shift of it */
|
||||
it = ((it >> 7) & 0x01FFFFFF) + ((it & 0x7F) << 25);
|
||||
if (!(it & 0x80000000)) it = it ^ MASK;
|
||||
leh = (leh * mt[it & 127] + it) & 0xFFFFFFFF;
|
||||
is [0] = it; is [1] = leh;
|
||||
if (leh & 0x80000000) leh = leh ^ 0xFFFFFFFF;
|
||||
return (leh);
|
||||
}
|
||||
|
||||
|
||||
unsigned int urandm (Sw *is)
|
||||
{
|
||||
Sw it, leh;
|
||||
|
||||
it = is [0];
|
||||
leh = is [1];
|
||||
/* Do a 7-place right cyclic shift of it */
|
||||
it = ((it >> 7) & 0x01FFFFFF) + ((it & 0x7F) << 25);
|
||||
if (!(it & 0x80000000)) it = it ^ MASK;
|
||||
leh = (leh * mt[it & 127] + it) & 0xFFFFFFFF;
|
||||
is [0] = it; is [1] = leh;
|
||||
return (leh);
|
||||
}
|
||||
|
||||
|
||||
/* --------------- (Chi-squared) adchi ----------------------- */
|
||||
/* Simple implementation of Ahrens and Dieter method for a chi-sq
|
||||
random variate of order a >> 1. Uses c7rand, maths library */
|
||||
/* 13 July 1998 */
|
||||
/* Slightly faster if 'a' is the same as on previous call */
|
||||
/* This routine is no longer used in the fastnorm code, but is included
|
||||
because it may be useful */
|
||||
|
||||
|
||||
static double gorder, gm, rt2gm, aold;
|
||||
|
||||
double adchi (double a, int *is)
|
||||
{
|
||||
double x, y, z, sq;
|
||||
|
||||
if (a != aold) {
|
||||
aold = a; gorder = 0.5 * a;
|
||||
gm = gorder - 1.0;
|
||||
rt2gm = sqrt (aold - 1.0);
|
||||
}
|
||||
|
||||
polar:
|
||||
x = 2.0 * c7rand(is) - 1.0; z = c7rand(is);
|
||||
sq = x*x + z*z;
|
||||
if ((sq > 1.0) || (sq < 0.25)) goto polar;
|
||||
y = x / z;
|
||||
x = rt2gm * y + gm;
|
||||
if (x < 0.0) goto polar;
|
||||
|
||||
z = (1.0 + y*y) * exp (gm * log(x/gm) - rt2gm * y);
|
||||
if (c7rand(is) > z) goto polar;
|
||||
|
||||
return (2.0 * x);
|
||||
}
|
||||
|
||||
/* -------------------- (Gamma) rgamma (g, is) ----------- */
|
||||
|
||||
double rgamma (double g, int *is)
|
||||
{
|
||||
double x, y, z, sq;
|
||||
|
||||
if (g != gorder) {
|
||||
gorder = g;
|
||||
gm = gorder - 1.0; aold = 2.0 * gorder;
|
||||
rt2gm = sqrt (aold - 1.0);
|
||||
}
|
||||
|
||||
polar:
|
||||
x = 2.0 * c7rand(is) - 1.0; z = c7rand(is);
|
||||
sq = x*x + z*z;
|
||||
if ((sq > 1.0) || (sq < 0.25)) goto polar;
|
||||
y = x / z;
|
||||
x = rt2gm * y + gm;
|
||||
if (x < 0.0) goto polar;
|
||||
|
||||
z = (1.0 + y*y) * exp (gm * log(x/gm) - rt2gm * y);
|
||||
if (c7rand(is) > z) goto polar;
|
||||
|
||||
return (x);
|
||||
}
|
||||
|
||||
|
||||
/* ------------------ Revision history Fastnorm ------------- */
|
||||
/* Items in this revision history appear in chronological order,
|
||||
so the most recent revsion appears last.
|
||||
Revision items are separated by a line of '+' characters.
|
||||
|
||||
++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
This is a revised version of the algorithm decribed in
|
||||
|
||||
ACM Transactions on Mathematical Software, Vol 22, No 1
|
||||
March 1996, pp 119-127.
|
||||
|
||||
A fast generator of pseudo-random variates from the unit Normal
|
||||
distribution. It keeps a pool of about 1000 variates, and generates new
|
||||
ones by picking 4 from the pool, rotating the 4-vector with these as its
|
||||
components, and replacing the old variates with the components of the
|
||||
rotated vector.
|
||||
|
||||
The program should initialize the generator by calling initnorm(seed)
|
||||
with seed a Sw integer seed value. Different seed values will give
|
||||
different sequences of Normals. Seed may be any 32-bit integer.
|
||||
BUT SEE REVISION of 17 May 2003 for initnorm() parameters.
|
||||
The revised initnorm requires two integer parameters, iseed and
|
||||
quoll, the latter specifying a tradeoff between speed and
|
||||
quality.
|
||||
Then, wherever the program needs a new Normal variate, it should
|
||||
use the macro FastNorm, e.g. in statements like:
|
||||
x = FastNorm; (Sets x to a random Normal value)
|
||||
or
|
||||
x += a + FastNorm * b; (Adds a normal with mean a, SD b, to x)
|
||||
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
Changed basic formula, which was:
|
||||
t = (p+q+r+s)*0.5; p = p-t; q = t-q; r = t-r; s = t-s;
|
||||
This gives sum of new p+q+r+s = 2p(old) which may not be a great
|
||||
choice. The new version is:
|
||||
t = (p+q+r+s)*0.5; p = p-t; q = q-t; r = t-r; s = t-s;
|
||||
which gives new p+q+r+s = p+q-r-s (old) which may be better.
|
||||
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
|
||||
Revision 14 November 1998
|
||||
The older version "FastNorm" which was available via ftp was found
|
||||
to have a defect which could affect some applications.
|
||||
|
||||
Dr Christine Rueb, (Max Planck Institut fur Infomatik,
|
||||
Im Stadtwald W 66123 Saabrucken, F.G.R.,
|
||||
(rueb@mpi-sb.mpg.de)
|
||||
|
||||
found that if a large number N of consecutive variates were summed to give
|
||||
a variate S with nominally N(0,N) distribution, the variance of S was in some
|
||||
cases too small. The effect was noticed with N=400, and was particularly strong
|
||||
for N=1023 if the first several (about 128) variates from FastNorm were
|
||||
discarded. Dr. Rueb traced the effect to an unexpected tendency of FastNorm
|
||||
to concentrate values with an anomolous correlation into the first 128
|
||||
elements of the variate pool.
|
||||
With the help of her analysis, the algorithm has been revised in a
|
||||
way which appears to overcome the problem, at the cost of about a 19%
|
||||
reduction in speed (which still leaves the method very fast.)
|
||||
|
||||
IT MUST BE RECOGNISED THAT THIS ALGORITHM IS NOVEL
|
||||
AND WHILE IT PASSES A NUMBER OF STANDARD TESTS FOR DISTRIBUTIONAL
|
||||
FORM, LACK OF SERIAL CORRELATION ETC., IT MAY STILL HAVE DEFECTS.
|
||||
|
||||
RECALL THE NUMBER OF YEARS WHICH IT TOOK FOR THE LIMITATIONS OF
|
||||
THE LEHMER GENERATOR FOR UNIFORM VARIATES TO BECOME APPARENT !!!
|
||||
|
||||
UNTIL MORE EXPERIENCE IS GAINED WITH THIS TYPE OF GENERATOR, IT
|
||||
WOULD BE WISE IN ANY CRITICAL APPLICATION TO COMPARE RESULTS
|
||||
OBTAINED USING IT WITH RESULTS OBTAINED USING A "STANDARD" FORM
|
||||
OF GENERATOR OF NORMAL VARIATES COUPLED WITH A WELL-DOCUMENTED
|
||||
GENERATOR OF UNIFORM VARIATES.
|
||||
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
|
||||
Revision 1 April 2003.
|
||||
Trying a scanning process proposed by R.P.Brent. It needs 2 pool
|
||||
vectors, as it cannot update in-situ, but may be more robust.
|
||||
It is a bit slower on a 133 Mhz PC but just as fast on a newer PC
|
||||
(moggie) at about 16 ns per call in the 'speed.c' test.
|
||||
The extreme-value defect is the same on old and new versions.
|
||||
If one finds a value 'A' such that a batch of B genuine Normal variates has
|
||||
probability 0.2 of containing a variate with abolute value greater than A,
|
||||
then the probability that both of two consecive batches of B will contain
|
||||
such a value should be 0.2 times 0.2, or 0.04. Instead, both versions give
|
||||
the extreme value prob. as 0.200 (over a million batches) but give the
|
||||
consective-pair prob as 0.050 for batch size B = 1024.
|
||||
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
|
||||
Revision 17 May 2003.
|
||||
The fundamental defect of the method, namely an inadequate 'mixing'
|
||||
of squared value ('energy') between one generation of the pool and the next,
|
||||
cannot readily be removed. In going from one pool to the next, the energy
|
||||
in an old variate is shared among just 4 variates in the new pool. Hence it
|
||||
takes many generations before the energy of some original variate can be
|
||||
distributed across the whole pool. The number of generations needed cannot
|
||||
be less than the log to base 4 of the pool size, or 5 for a pool size of
|
||||
1024. In fact, the pseudo-random indexing of the pool means that rather
|
||||
more generations are needed on average.
|
||||
The defect is readily revealed by the following test. One picks a
|
||||
"batch size" comparable to the pool size, say 500 or 1000. One then
|
||||
computes a value A such that a batch will with probability 0.2 contain one
|
||||
or more variates with absolute value exceeding A.
|
||||
One then draws batches from FastNorm,
|
||||
and tests each batch to see if it contains such an extreme value.
|
||||
Over many batches, one counts the frequency of such 'extreme' batches,
|
||||
and finds (with FastNorm2) that it is indeed about 0.2. However, when one counts
|
||||
the frequency with which succesive batches are both extreme, one finds it to
|
||||
be higher than the proper value (0.2)^2 = 0.04. For batch sizes round the pool
|
||||
size, it can be as high as 0.05. That is, although the frequncy of extreme
|
||||
values is about right, their occurrence in the stream is correlated over a
|
||||
scale of the order of the pool size.
|
||||
The same correlation effect is seen in the average 4th moment of
|
||||
successive batches.
|
||||
Since this inter-generational correlation cannot be avoided, the
|
||||
this revision seeks to reduce it by performing at least two simple
|
||||
rotations of the pool at each generation. Obviously, some speed is lost,
|
||||
but the correlations are reduced.
|
||||
To allow the user to trade off speed and quality, the initialization
|
||||
function initnorm() now provides a QUALITY parameter 'quoll' which controls
|
||||
how many double-rotations are done for each generation.
|
||||
See the comments in initnorm() for more detail.
|
||||
++++++++++ End of revision notes +++++++++ */
|
||||
|
||||
|
||||
|
||||
/* ----------------- Some test results ------------------------ */
|
||||
/*
|
||||
General form:
|
||||
Some simple tests were conducted by transforming FastNorm variates
|
||||
in several ways to yield a variable nominally uniformly distributed in 0 ... 1.
|
||||
Uniformity of the derived variate was then tested by a ChiSquared test on a
|
||||
100-cell histogram with cell counts around 10000. These tests are crude, but
|
||||
showed no untoward results on the present version.
|
||||
Transformations included:
|
||||
y = 0.5 * (1.0 + erf (n1 / sqrt(2))
|
||||
|
||||
y = 0.5 * (n1 / (n1^2 + n2^2 + n3^2) - 1)
|
||||
|
||||
y = exp (-0.5 * (n1^2 + n2^2))
|
||||
|
||||
y = (n1^2 + n2^2) / (n1^2 + n2^2 + n3^2 + n4^2)
|
||||
|
||||
where n1, n2 etc are successive Normal variates.
|
||||
It may be noted that some of these are sensitive to serial correlation if
|
||||
present.
|
||||
|
||||
Fourth moment of batches:
|
||||
Extensive tests for correlation among the fourth moments of successive
|
||||
batches of variates were made, with batch sizes comparabe to or (worst case)
|
||||
equal to the size of the variate pool (4096 in this revision).
|
||||
With 'quality' 1, significant correlation appears after 10^6 batches
|
||||
of worst-case size.
|
||||
With quality 2, no significant correlation is evident after 10^7
|
||||
batches. A just-significant correlation appears after 3.6*10^7 batches.
|
||||
As this requires some 1.4*10^11 deviates to be drawn, it may be irrelevent
|
||||
for many applications. The observed correlation coefficent was 0.0008.
|
||||
With quality 3, results are OK after 10^8 batches, or more than
|
||||
4*10^11 variates.
|
||||
No tests have been done with quality 4 as yet.
|
||||
|
||||
Speed:
|
||||
Speed tests were done on a PC running RedHat Linux, using "-O"
|
||||
compiler optimization. The test loop was
|
||||
for (i = 0; i < 500000000; i++) {
|
||||
a += FastNorm; a -= FastNorm;
|
||||
}
|
||||
Thus the test makes 10^9 uses of FastNorm. The time taken, (which
|
||||
includes time for a call in 'initnorm' and the loop overhead) depends on
|
||||
the 'quality' set by initnorm.
|
||||
Quality 1: 21.5 sec
|
||||
Quality 2: 32.1 sec
|
||||
Quality 3: 42.5 sec
|
||||
Quality 4: 53.1 sec
|
||||
|
||||
By way of comparison, the same 10^9 call loop was timed with the Unix library
|
||||
"random()" routine substituted for FastNorm, and the variable 'a' defined as
|
||||
integer rather than double. Also, since most use of a Uniform generator such
|
||||
as "random()" requires that the returned integer be scaled into a floating-
|
||||
point number in 0 ... 1, the timing was repeated with
|
||||
"a += random" ('a' integer) replaced by "a += Scale*random()" where
|
||||
'a' is double and Scale = 2^(-31). The times obtained were:
|
||||
Random (integer): 44.1 sec
|
||||
Random (double) : 47.7 sec
|
||||
|
||||
It can be seen that FastNorm (even at quality 3) is faster than a
|
||||
commonly-used Uniform generator. To some extent, this result may vary on
|
||||
different computers and compilers. Since FastNorm (at least for qualities
|
||||
above 1) no doubt does more arithmetic per variate than "random()", much of
|
||||
its speed advantage must come from its replacement of a function call per
|
||||
variate by a macro which makes only one function call every 4095 variates.
|
||||
Computers with lower 'call' overheads than the PC used here might show
|
||||
differnt results.
|
||||
Incidently, the Uniform generator 'c7rand()' included in this
|
||||
package, which returns a double uniform in 0 ... 1, and is of fairly high
|
||||
quality, gives a time in the same test of 36.8 sec, a little faster than
|
||||
'random()'.
|
||||
*/
|
||||
|
||||
|
||||
/* ----------------- globals ------------------------- */
|
||||
/* A pool must have a length which is a multiple of 4.
|
||||
* During regeneration of a new pool, the pool is treated as 4
|
||||
* consecutive vectors, each of length VL.
|
||||
*/
|
||||
|
||||
#define VE 10
|
||||
#define VL (1 << VE)
|
||||
#define VM (VL-1)
|
||||
#define WL (4*VL)
|
||||
#define WM (WL-1)
|
||||
|
||||
Sw gaussfaze;
|
||||
Sf *gausssave;
|
||||
Sf GScale;
|
||||
/* GScale,fastnorm,gaussfaze, -save must be visible to callers*/
|
||||
static Sf chic1, chic2; /* Constants used in getting ChiSq_WL */
|
||||
Sw gslew; /* Counts generations */
|
||||
static Sw qual; /* Sets number of double transforms per generation. */
|
||||
static Sw c7g [2]; /* seed values for c7rand */
|
||||
|
||||
Sf wk1 [WL], wk2 [WL]; /* Pools of variates. */
|
||||
|
||||
|
||||
/* ------------------ regen ---------------------- */
|
||||
/* Takes variates from wk1[], transforms to wk[2], then back to wk1[].
|
||||
*/
|
||||
void regen ()
|
||||
{
|
||||
Sw i, j, k, m;
|
||||
Sf p, q, r, s, t;
|
||||
Sw topv[6], ord[4], *top;
|
||||
Sf *ppt[4], *ptn;
|
||||
|
||||
/* Choose 4 random start points in the wk1[] vector
|
||||
I want them all different. */
|
||||
|
||||
top = topv + 1;
|
||||
/* Set limiting values in top[-1], top[4] */
|
||||
top[-1] = VL; top[4] = 0;
|
||||
reran1:
|
||||
m = irandm (c7g); /* positive 32-bit random */
|
||||
/* Extract two VE-sized randoms from m, which has 31 useable digits */
|
||||
m = m >> (31 - 2*VE);
|
||||
top[0] = m & VM; m = m >> VE; top[1] = m & VM;
|
||||
m = irandm (c7g); /* positive 32-bit random */
|
||||
/* Extract two VE-sized randoms from m, which has 31 useable digits */
|
||||
m = m >> (31 - 2*VE);
|
||||
top[2] = m & VM; m = m >> VE; top[3] = m & VM;
|
||||
for (i = 0; i < 4; i++) ord[i] = i;
|
||||
/* Sort in decreasing size */
|
||||
for (i = 2; i >= 0; i--) {
|
||||
for (j = 0; j <= i; j++) {
|
||||
if (top[j] < top[j+1]) {
|
||||
k = top[j]; top[j] = top[j+1];
|
||||
top[j+1] = k;
|
||||
k = ord[j]; ord[j] = ord[j+1];
|
||||
ord[j+1] = k;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Ensure all different */
|
||||
for (i = 0; i < 3; i++) { if (top[i] == top[i+1]) goto reran1; }
|
||||
|
||||
/* Set pt pointers to their start values for the first chunk. */
|
||||
for (i = 0; i < 4; i++) {
|
||||
j = ord[i];
|
||||
ppt[j] = wk2 + j * VL + top[i];
|
||||
}
|
||||
|
||||
/* Set ptn to point into wk1 */
|
||||
ptn = wk1;
|
||||
|
||||
/* Now ready to do five chunks. The length of chunk i is
|
||||
top[i-1] - top[i] (I hope)
|
||||
At the end of chunk i, pointer ord[i] should have reached the end
|
||||
of its part, and need to be wrapped down to the start of its part.
|
||||
*/
|
||||
i = 0;
|
||||
|
||||
chunk:
|
||||
j = top[i] - top[i-1]; /* Minus the chunk length */
|
||||
for (; j < 0; j++) {
|
||||
p = *ptn++; s = *ptn++; q = *ptn++; r = *ptn++;
|
||||
t = (p + q + r + s) * 0.5;
|
||||
*ppt[0]++ = t - p;
|
||||
*ppt[1]++ = t - q;
|
||||
*ppt[2]++ = r - t;
|
||||
*ppt[3]++ = s - t;
|
||||
}
|
||||
/* This should end the chunk. See if all done */
|
||||
if (i == 4) goto passdone;
|
||||
|
||||
/* The pointer for part ord[i] should have passed its end */
|
||||
j = ord[i];
|
||||
#ifdef dddd
|
||||
printf ("Chunk %1d done. Ptr %1d now %4d\n", i, j, ppt[j]-wk2);
|
||||
#endif
|
||||
ppt[j] -= VL;
|
||||
i++;
|
||||
goto chunk;
|
||||
|
||||
passdone:
|
||||
/* wk1[] values have been transformed and placed in wk2[]
|
||||
Transform from wk2 to wk1 with a simple shuffle */
|
||||
m = (irandm (c7g) >> (29 - VE)) & WM;
|
||||
j = 0;
|
||||
for (i = 0; i < 4; i++) ppt[i] = wk1 + i * VL;
|
||||
for (i = 0; i < VL; i++) {
|
||||
p = wk2[j^m]; j++;
|
||||
s = wk2[j^m]; j++;
|
||||
q = wk2[j^m]; j++;
|
||||
r = wk2[j^m]; j++;
|
||||
t = (p + q + r + s) * 0.5;
|
||||
*ppt[0]++ = t - p;
|
||||
*ppt[1]++ = q - t;
|
||||
*ppt[2]++ = t - r;
|
||||
*ppt[3]++ = s - t;
|
||||
}
|
||||
|
||||
/* We have a new lot of variates in wk1 */
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/* ------------------- renormalize --------------------------- */
|
||||
/* Rescales wk1[] so sum of squares = WL */
|
||||
/* Returns the original sum-of-squares */
|
||||
Sf renormalize (void)
|
||||
{
|
||||
Sf ts, vv;
|
||||
Sw i;
|
||||
|
||||
ts = 0.0;
|
||||
for (i = 0; i < WL; i++) {
|
||||
ts += wk1[i] * wk1[i];
|
||||
}
|
||||
vv = sqrt (WL / ts);
|
||||
for (i = 0; i < WL; i++) wk1[i] *= vv;
|
||||
return (ts);
|
||||
}
|
||||
|
||||
|
||||
/* ------------------------ BoxMuller ---------------------- */
|
||||
/* Fills block gvec of length ll with proper normals */
|
||||
void boxmuller (Sf *gvec, Sw ll)
|
||||
{
|
||||
Sw i;
|
||||
Sf tx, ty, tr, tz;
|
||||
|
||||
/* Here, replace the whole pool with conventional Normal variates */
|
||||
i = 0;
|
||||
nextpair:
|
||||
tx = 2.0 * c7rand(c7g) - 1.0; /* Uniform in -1..1 */
|
||||
ty = 2.0 * c7rand(c7g) - 1.0; /* Uniform in -1..1 */
|
||||
tr = tx * tx + ty * ty;
|
||||
if ((tr > 1.0) || (tr < 0.25)) goto nextpair;
|
||||
tz = -2.0 * log (c7rand(c7g)); /* Sum of squares */
|
||||
tz = sqrt ( tz / tr );
|
||||
gvec [i++] = tx * tz; gvec [i++] = ty * tz;
|
||||
if (i < ll) goto nextpair;
|
||||
/* Horrid, but good enough */
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/* ------------------------- initnorm ---------------------- */
|
||||
/* To initialize, given a seed integer and a quality level.
|
||||
The seed can be any integer. The quality level quoll should be
|
||||
between 1 and 4. Quoll = 1 gives high speed, but leaves some
|
||||
correlation between the 4th moments of successive batches of values.
|
||||
Higher values of quoll give lower speed but less correlation.
|
||||
|
||||
If called with quoll = 0, initnorm performs a check that the
|
||||
most crucial routine (regen) is performing correctly. In this
|
||||
case, the value of 'iseed' is ignored. Initnorm will report the
|
||||
results of the test, which compares pool values with check17 and
|
||||
check98, which are defined below.
|
||||
When a check call is made, a proper call on initnorm must then
|
||||
be made before using the FastNorm macro. A check call does not
|
||||
properly initialize the routines even if it succeeds.
|
||||
*/
|
||||
static Sf check17 = 0.1255789;
|
||||
static Sf check98 = -0.7113293;
|
||||
|
||||
void initnorm (Sw seed, Sw quoll)
|
||||
{
|
||||
Sw i;
|
||||
|
||||
/* At one stage, we need to generate a random variable Z such that
|
||||
(WL * Z*Z) has a Chi-squared-WL density. Now, a var with
|
||||
an approximate Chi-sq-K distn can be got as
|
||||
(A + B*n)**2 where n has unit Normal distn,
|
||||
A**2 = K * sqrt (1 - 1/K), A**2 + B**2 = K. (For large K)
|
||||
So we form Z as (1/sqrt(WL)) * (A + B*n)
|
||||
or chic1 + chic2 * n where
|
||||
chic1 = A / sqrt(WL), chic2 = B / sqrt(WL).
|
||||
Hence
|
||||
chic1 = sqrt (A*A / WL) = sqrt ( sqrt (1 - 1/WL)),
|
||||
chic2 = sqrt (1 - chic1*chic1)
|
||||
*/
|
||||
|
||||
chic1 = sqrt ( sqrt (1.0 - 1.0 / WL));
|
||||
chic2 = sqrt (1.0 - chic1 * chic1);
|
||||
|
||||
/* Set regen counter "gslew" which will affect renormalizations.
|
||||
Since pools are OK already, we wont't need to renorm for a
|
||||
while */
|
||||
gslew = 1;
|
||||
/* Finally, set "gaussfaze" to return all of wk1
|
||||
* except the last entry at WL-1 */
|
||||
gaussfaze = WL-1;
|
||||
gausssave = wk1;
|
||||
|
||||
/* If quoll = 0, do a check on installation */
|
||||
if (quoll == 0) goto docheck;
|
||||
qual = quoll;
|
||||
/* Check sensible values for quoll, say 1 to 4 */
|
||||
if ((quoll < 0) || (quoll > 4)) {
|
||||
printf ("From initnorm(): quoll parameter %d out of\
|
||||
range 1 to 4\n", quoll);
|
||||
return;
|
||||
}
|
||||
c7g[0] = seed; c7g[1] = -3337792;
|
||||
|
||||
/* Fill wk1[] with good normals */
|
||||
boxmuller (wk1, WL);
|
||||
/* Scale so sum-of-squares = WL */
|
||||
GScale = sqrt (renormalize () / WL);
|
||||
/* We have set
|
||||
GScale to restore the original ChiSq_WL sum-of-squares */
|
||||
return;
|
||||
|
||||
docheck:
|
||||
/* Set a simple pattern in wk1[] and test results of regen */
|
||||
for (i = 0; i < WL; i++) wk1[i] = wk2[i] = 0.0;
|
||||
wk1[0] = sqrt ((double) WL);
|
||||
c7g[0] = 1234567; c7g[1] = 9876543;
|
||||
for (i = 0; i < 60; i++) regen();
|
||||
/* Check a couple of values */
|
||||
if ((fabs (wk1[17] - check17) > 0.00001) ||
|
||||
(fabs (wk1[98] - check98) > 0.00001)) {
|
||||
printf ("\nInitnorm check failed.\n");
|
||||
printf ("Expected %8.5f got %10.7f\n", check17, wk1[17]);
|
||||
printf ("Expected %8.5f got %10.7f\n", check98, wk1[98]);
|
||||
}
|
||||
else printf ("\nInitnorm check OK\n");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/* ---------------------- fastnorm -------------------------- */
|
||||
/* If gslew shows time is ripe, renormalizes the pool
|
||||
fastnorm() returns the value GScale*gausssave[0].
|
||||
*/
|
||||
|
||||
Sf fastnorm ()
|
||||
{
|
||||
Sf sos;
|
||||
Sw n1;
|
||||
|
||||
if (! (gslew & 0xFFFF)) {
|
||||
sos = renormalize ();
|
||||
}
|
||||
|
||||
/* The last entry of gausssave, at WL-1, will not have been used.
|
||||
Use it to get an approx. to sqrt (ChiSq_WL / WL).
|
||||
See initnorm() code for details */
|
||||
GScale = chic1 + chic2 * GScale * gausssave [WL-1];
|
||||
for (n1 = 0; n1 < qual; n1++) regen ();
|
||||
gslew++;
|
||||
|
||||
gaussfaze = WL - 1;
|
||||
|
||||
return (GScale * gausssave [0]);
|
||||
}
|
||||
|
||||
|
||||
/* --------------------- (test) main ------------------------- */
|
||||
#ifdef Main
|
||||
#include "FastNorm3.h"
|
||||
int main()
|
||||
{
|
||||
Sf x; Sw i;
|
||||
initnorm (0, 0);
|
||||
initnorm (77, 2);
|
||||
printf ("SoS %20.6f\n", renormalize());
|
||||
// for (i = 0; i < 2000000; i++) x = FastNorm;
|
||||
for (i = 0; i < 200; i++) {
|
||||
x = FastNorm;
|
||||
printf("%d\t%f\n", i, x);
|
||||
}
|
||||
printf ("SoS %20.6f\n", renormalize());
|
||||
exit (1);
|
||||
}
|
||||
#endif
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
noinst_LTLIBRARIES = libtrannoise.la
|
||||
|
||||
libtrannoise_la_SOURCES = \
|
||||
FastNorm3.c \
|
||||
1-f-code.c \
|
||||
wallace.c
|
||||
|
||||
AM_CPPFLAGS = -I$(top_srcdir)/src/include -I$(top_srcdir)/src/frontend
|
||||
|
||||
MAINTAINERCLEANFILES = Makefile.in
|
||||
|
|
@ -0,0 +1,532 @@
|
|||
/* Wallace generator for normally distributed random variates
|
||||
Copyright: Holger Vogt, 2008
|
||||
|
||||
*/
|
||||
|
||||
//#define FASTNORM_ORIG
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef _MSC_VER
|
||||
#include <process.h>
|
||||
#define getpid _getpid
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <math.h>
|
||||
#include "wallace.h"
|
||||
#include "FastNorm3.h"
|
||||
|
||||
#ifdef HasMain
|
||||
#include <sys/timeb.h>
|
||||
#else
|
||||
#ifndef NOSPICE
|
||||
#include "ngspice.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define POOLSIZE 4096
|
||||
#define LPOOLSIZE 12
|
||||
#define NOTRANS 3 /* number of (dual) transformations */
|
||||
|
||||
#define VE 10
|
||||
#define VL (1 << VE)
|
||||
#define VM (VL-1)
|
||||
#define WL (4*VL)
|
||||
#define WM (WL-1)
|
||||
|
||||
double *outgauss; /* output vector for user access */
|
||||
unsigned int variate_used; /* actual index of variate called by user */
|
||||
double ScaleGauss;
|
||||
|
||||
static double *pool1;
|
||||
static double *pool2;
|
||||
static unsigned int *addrif, *addrib;
|
||||
static unsigned n = POOLSIZE;
|
||||
static double chi1, chi2; /* chi^2 correction values */
|
||||
static unsigned int newpools;
|
||||
|
||||
extern double drand(void);
|
||||
extern unsigned int CombLCGTausInt(void);
|
||||
extern void TausSeed(void);
|
||||
extern unsigned int CombLCGTausInt2(void);
|
||||
|
||||
|
||||
void PolarGauss(double* py1, double* py2)
|
||||
{
|
||||
double x1, x2, w;
|
||||
|
||||
do {
|
||||
x1 = drand();
|
||||
x2 = drand();
|
||||
w = x1 * x1 + x2 * x2;
|
||||
} while (( w > 1.0 ) || ( w < 0.25));
|
||||
|
||||
w = sqrt( (-2.0 * log( w ) ) / w );
|
||||
|
||||
*py1 = (double)(x1 * w);
|
||||
*py2 = (double)(x2 * w);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void initw(void)
|
||||
{
|
||||
unsigned i;
|
||||
double totsqr, nomsqr;
|
||||
unsigned long int coa, cob, s;
|
||||
|
||||
/* initialize the uniform generator */
|
||||
srand(getpid());
|
||||
// srand(17);
|
||||
TausSeed();
|
||||
|
||||
ScaleGauss = 1.;
|
||||
newpools = 1;
|
||||
|
||||
/* set up the two pools */
|
||||
pool1 = TMALLOC(double, n); //(double*)malloc(n * sizeof(double));
|
||||
pool2 = TMALLOC(double, n); //(double*)malloc(n * sizeof(double));
|
||||
addrif = TMALLOC(unsigned int, (n + NOTRANS)); //(unsigned int*)malloc((n + NOTRANS) * sizeof(unsigned int));
|
||||
addrib = TMALLOC(unsigned int, (n + NOTRANS)); //(unsigned int*)malloc((n + NOTRANS) * sizeof(unsigned int));
|
||||
|
||||
/* fill the first pool with normally distributed values */
|
||||
PolarGauss(&pool1[0], &pool1[1]);
|
||||
for (i = 1; i < n>>1; i++) {
|
||||
PolarGauss(&pool1[i<<1], &pool1[(i<<1) + 1]);
|
||||
}
|
||||
/* normalize pool content */
|
||||
/* totsqr = totsum = 0.0;
|
||||
for (i = 0; i < n; i++) {
|
||||
totsqr += pool1[i] * pool1[i];
|
||||
totsum += pool1[i];
|
||||
}
|
||||
totsum = totsum/n;
|
||||
for (i = 0; i < n; i++) {
|
||||
totsqr += (pool1[i] - totsum) * (pool1[i] - totsum);
|
||||
}
|
||||
nomsqr = sqrt(n / totsqr);
|
||||
for (i = 0; i < n; i++)
|
||||
pool1[i] = (pool1[i] - totsum) * nomsqr;
|
||||
*/
|
||||
totsqr = 0.0;
|
||||
for (i = 0; i < n; i++)
|
||||
totsqr += pool1[i] * pool1[i];
|
||||
nomsqr = sqrt(n / totsqr);
|
||||
for (i = 0; i < n; i++)
|
||||
pool1[i] *= nomsqr;
|
||||
|
||||
/* calculate ch^2 value */
|
||||
chi1 = sqrt ( sqrt (1.0 - 1.0/n));
|
||||
chi2 = sqrt ( 1.0 - chi1*chi1);
|
||||
|
||||
/* first scaling, based on unused pool1[n-2] */
|
||||
ScaleGauss = chi1 + chi2 * ScaleGauss * pool1[n-2];
|
||||
/* access to first pool */
|
||||
outgauss = pool1;
|
||||
/* set data counter, we return n-2 values here */
|
||||
variate_used = n - 2;
|
||||
|
||||
/* generate random reading addresses using a LCG */
|
||||
s = 0;
|
||||
coa = 241;
|
||||
cob = 59;
|
||||
for (i=0; i < (n + NOTRANS); i++) {
|
||||
// addrif[i] = s = (s * coa + cob) % ( n );
|
||||
coa = CombLCGTausInt();
|
||||
addrif[i] = coa >> (32 - LPOOLSIZE);
|
||||
// printf ("Random add:\t%ld\n" , s);
|
||||
}
|
||||
s = 0;
|
||||
coa = 193;
|
||||
cob = 15;
|
||||
for (i=0; i < (n + NOTRANS); i++) {
|
||||
// addrib[i] = s = (s * coa + cob) % ( n );
|
||||
coa = CombLCGTausInt();
|
||||
addrib[i] = coa >> (32 - LPOOLSIZE);
|
||||
// printf ("Random add:\t%ld\n" , addrib[i]);
|
||||
}
|
||||
|
||||
// printf("norm for orig. Gauss: %e, chi^2 scale: %e\n", nomsqr, ScaleGauss);
|
||||
// NewWa();
|
||||
}
|
||||
|
||||
/* original FastNorm3.c code */
|
||||
#ifdef FASTNORM_ORIG
|
||||
float NewWa ()
|
||||
{
|
||||
int i, j, k, m;
|
||||
float p, q, r, s, t;
|
||||
int topv[6], ord[4], *top;
|
||||
float *ppt[4], *ptn;
|
||||
|
||||
float nulval, endval;
|
||||
float totsqr, nomsqr;
|
||||
nulval = ScaleGauss * pool1[0];
|
||||
endval = pool1[n-1];
|
||||
|
||||
/* Choose 4 random start points in the wk1[] vector
|
||||
I want them all different. */
|
||||
|
||||
top = topv + 1;
|
||||
/* Set limiting values in top[-1], top[4] */
|
||||
top[-1] = VL; top[4] = 0;
|
||||
reran1:
|
||||
m = CombLCGTausInt(); /* positive 32-bit random */
|
||||
/* Extract two VE-sized randoms from m, which has 31 useable digits */
|
||||
m = m >> (31 - 2*VE);
|
||||
top[0] = m & VM; m = m >> VE; top[1] = m & VM;
|
||||
m = CombLCGTausInt(); /* positive 32-bit random */
|
||||
/* Extract two VE-sized randoms from m, which has 31 useable digits */
|
||||
m = m >> (31 - 2*VE);
|
||||
top[2] = m & VM; m = m >> VE; top[3] = m & VM;
|
||||
for (i = 0; i < 4; i++) ord[i] = i;
|
||||
/* Sort in decreasing size */
|
||||
for (i = 2; i >= 0; i--) {
|
||||
for (j = 0; j <= i; j++) {
|
||||
if (top[j] < top[j+1]) {
|
||||
k = top[j]; top[j] = top[j+1];
|
||||
top[j+1] = k;
|
||||
k = ord[j]; ord[j] = ord[j+1];
|
||||
ord[j+1] = k;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Ensure all different */
|
||||
for (i = 0; i < 3; i++) { if (top[i] == top[i+1]) goto reran1; }
|
||||
|
||||
/* Set pt pointers to their start values for the first chunk. */
|
||||
for (i = 0; i < 4; i++) {
|
||||
j = ord[i];
|
||||
ppt[j] = pool2 + j * VL + top[i];
|
||||
}
|
||||
|
||||
/* Set ptn to point into wk1 */
|
||||
ptn = pool1;
|
||||
|
||||
/* Now ready to do five chunks. The length of chunk i is
|
||||
top[i-1] - top[i] (I hope)
|
||||
At the end of chunk i, pointer ord[i] should have reached the end
|
||||
of its part, and need to be wrapped down to the start of its part.
|
||||
*/
|
||||
i = 0;
|
||||
|
||||
chunk:
|
||||
j = top[i] - top[i-1]; /* Minus the chunk length */
|
||||
for (; j < 0; j++) {
|
||||
p = *ptn++; s = *ptn++; q = *ptn++; r = *ptn++;
|
||||
t = (p + q + r + s) * 0.5;
|
||||
*ppt[0]++ = t - p;
|
||||
*ppt[1]++ = t - q;
|
||||
*ppt[2]++ = r - t;
|
||||
*ppt[3]++ = s - t;
|
||||
}
|
||||
/* This should end the chunk. See if all done */
|
||||
if (i == 4) goto passdone;
|
||||
|
||||
/* The pointer for part ord[i] should have passed its end */
|
||||
j = ord[i];
|
||||
#ifdef dddd
|
||||
printf ("Chunk %1d done. Ptr %1d now %4d\n", i, j, ppt[j]-pool2);
|
||||
#endif
|
||||
ppt[j] -= VL;
|
||||
i++;
|
||||
goto chunk;
|
||||
|
||||
passdone:
|
||||
/* wk1[] values have been transformed and placed in wk2[]
|
||||
Transform from wk2 to wk1 with a simple shuffle */
|
||||
m = (CombLCGTausInt2() >> (29 - VE)) & WM;
|
||||
j = 0;
|
||||
for (i = 0; i < 4; i++) ppt[i] = pool1 + i * VL;
|
||||
for (i = 0; i < VL; i++) {
|
||||
p = pool2[j^m]; j++;
|
||||
s = pool2[j^m]; j++;
|
||||
q = pool2[j^m]; j++;
|
||||
r = pool2[j^m]; j++;
|
||||
t = (p + q + r + s) * 0.5;
|
||||
*ppt[0]++ = t - p;
|
||||
*ppt[1]++ = q - t;
|
||||
*ppt[2]++ = t - r;
|
||||
*ppt[3]++ = s - t;
|
||||
}
|
||||
|
||||
/* renormalize again if number of pools beyond limit */
|
||||
if (!(newpools & 0xFFFF)) {
|
||||
totsqr = 0.0;
|
||||
for (i = 0; i < n; i++)
|
||||
totsqr += pool1[i] * pool1[i];
|
||||
nomsqr = sqrt(n / totsqr);
|
||||
for (i = 0; i < n; i++)
|
||||
pool1[i] *= nomsqr;
|
||||
}
|
||||
|
||||
outgauss = pool1;
|
||||
/* reset data counter */
|
||||
variate_used = n - 1;
|
||||
|
||||
/* set counter counting nomber of pools made */
|
||||
newpools++;
|
||||
|
||||
/* new scale factor using ch^2 correction,
|
||||
using pool1[n-1] from last pool */
|
||||
ScaleGauss = chi1 + chi2 * ScaleGauss * endval;
|
||||
|
||||
// printf("Pool number: %d, chi^2 scale: %e\n", newpools, ScaleGauss);
|
||||
|
||||
return nulval; /* use old scale */
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Simplified code according to an algorithm published by C. S. Wallace:
|
||||
"Fast Pseudorandom Generators for Normal and Exponential Variates",
|
||||
ACM Transactions on Mathmatical Software, Vol. 22, No. 1, March 1996, pp. 119-127.
|
||||
Transform pool1 to pool2 and back to pool1 NOTRANS times
|
||||
by orthogonal 4 x 4 Hadamard-Matrix.
|
||||
Mixing of values is very important: Any value in the pool should contribute to
|
||||
every value in the new pools, at least after several passes (number of passes
|
||||
is set by NOTRANS to 2 or 3).
|
||||
4 values are read in a continuous sequence from the total of POOLSIZE values.
|
||||
Values are stored in steps modulo POOLSIZE/4.
|
||||
During backward transformation the values are shuffled by a random number jj.
|
||||
*/
|
||||
|
||||
double NewWa(void)
|
||||
{
|
||||
double nulval, endval;
|
||||
double bl1, bl2, bl3, bl4; /* the four values to be transformed */
|
||||
double bsum;
|
||||
double totsqr, nomsqr;
|
||||
unsigned int i, j, jj, m, mm, mmm;
|
||||
|
||||
nulval = ScaleGauss * pool1[0];
|
||||
endval = pool1[n-1];
|
||||
m = n >> 2;
|
||||
// printf("New pool after next value\n");
|
||||
|
||||
/* generate new pool by transformation
|
||||
Transformation is repeated NOTRANS times */
|
||||
for (i=0; i < NOTRANS; i++) {
|
||||
mm = m << 1;
|
||||
mmm = mm + m;
|
||||
/* forward transformation */
|
||||
// for (j=0; j < n; j += 4) {
|
||||
for (j=0; j < m; j++) {
|
||||
bl1 = pool1[j];
|
||||
bl2 = pool1[j+m];
|
||||
bl3 = pool1[j+mm];
|
||||
bl4 = pool1[j+mmm];
|
||||
/* Hadamard-Matrix */
|
||||
bsum = (bl1 + bl2 + bl3 + bl4) * 0.5f;
|
||||
jj = j<<2;
|
||||
pool2[jj] = bl1 - bsum;
|
||||
pool2[jj+1] = bl2 - bsum;
|
||||
pool2[jj+2] = bsum - bl3;
|
||||
pool2[jj+3] = bsum - bl4;
|
||||
}
|
||||
/* backward transformation */
|
||||
jj = (CombLCGTausInt2() >> (31 - LPOOLSIZE)) & (n - 1);
|
||||
for (j=0; j < m; j++) {
|
||||
bl1 = pool2[j^jj];
|
||||
bl2 = pool2[(j+m)^jj];
|
||||
bl3 = pool2[(j+mm)^jj];
|
||||
bl4 = pool2[(j+mmm)^jj];
|
||||
/* Hadamard-Matrix */
|
||||
bsum = (bl1 + bl2 + bl3 + bl4) * 0.5f;
|
||||
jj = j<<2;
|
||||
pool1[jj] = bl1 - bsum;
|
||||
pool1[jj+1] = bl2 - bsum;
|
||||
pool1[jj+2] = bsum - bl3;
|
||||
pool1[jj+3] = bsum - bl4;
|
||||
}
|
||||
}
|
||||
|
||||
/* renormalize again if number of pools beyond limit */
|
||||
if (!(newpools & 0xFFFF)) {
|
||||
totsqr = 0.0;
|
||||
for (i = 0; i < n; i++)
|
||||
totsqr += pool1[i] * pool1[i];
|
||||
nomsqr = sqrt(n / totsqr);
|
||||
for (i = 0; i < n; i++)
|
||||
pool1[i] *= nomsqr;
|
||||
}
|
||||
|
||||
outgauss = pool1;
|
||||
/* reset data counter */
|
||||
variate_used = n - 1;
|
||||
|
||||
/* set counter counting nomber of pools made */
|
||||
newpools++;
|
||||
|
||||
/* new scale factor using ch^2 correction,
|
||||
using pool1[n-1] from previous pool */
|
||||
ScaleGauss = chi1 + chi2 * ScaleGauss * endval;
|
||||
|
||||
// printf("Pool number: %d, chi^2 scale: %e\n", newpools, ScaleGauss);
|
||||
|
||||
return nulval; /* use old scale */
|
||||
// return pool1[0]; /* use new scale */
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef FASTNORMTEST
|
||||
float NewWa_not(void)
|
||||
{
|
||||
float nulval, endval;
|
||||
float bl1, bl2, bl3, bl4; /* the four values to be transformed */
|
||||
float bsum;
|
||||
float totsqr, nomsqr;
|
||||
unsigned int i, j, jj;
|
||||
nulval = ScaleGauss * pool1[0];
|
||||
endval = pool1[n-1];
|
||||
|
||||
// printf("New pool after next value\n");
|
||||
|
||||
/* generate new pool by transformation
|
||||
Transformation is repeated NOTRANS times */
|
||||
for (i=0; i < NOTRANS; i++) {
|
||||
|
||||
/* forward transformation */
|
||||
for (j=0; j < n; j += 4) {
|
||||
jj = j + i;
|
||||
bl1 = pool1[addrif[jj]];
|
||||
bl2 = pool1[addrif[jj+1]];
|
||||
bl3 = pool1[addrif[jj+2]];
|
||||
bl4 = pool1[addrif[jj+3]];
|
||||
/* s = (s*coa + cob) & (n - 1);
|
||||
bl1 = pool1[s];
|
||||
s = (s*coa + cob) & (n - 1);
|
||||
bl2 = pool1[s + 1];
|
||||
s = (s*coa + cob) & (n - 1);
|
||||
bl3 = pool1[s + 2];
|
||||
s = (s*coa + cob) & (n - 1);
|
||||
bl4 = pool1[s + 3]; */
|
||||
/* jj = j + i;
|
||||
bl1 = pool1[addrif[jj]];
|
||||
bl2 = pool1[addrif[jj+1]];
|
||||
bl3 = pool1[addrif[jj+2]];
|
||||
bl4 = pool1[addrif[jj+3]]; */
|
||||
/* bl1 = pool1[j];
|
||||
bl2 = pool1[j+1];
|
||||
bl3 = pool1[j+2];
|
||||
bl4 = pool1[j+3]; */
|
||||
/* Hadamard-Matrix */
|
||||
bsum = (bl1 + bl2 + bl3 + bl4) * 0.5;
|
||||
/* pool2[j] = bl1 - bsum;
|
||||
pool2[j+1] = bl2 - bsum;
|
||||
pool2[j+2] = bsum - bl3;
|
||||
pool2[j+3] = bsum - bl4; */
|
||||
pool2[addrib[jj]] = bl1 - bsum;
|
||||
pool2[addrib[jj+1]] = bl2 - bsum;
|
||||
pool2[addrib[jj+2]] = bsum - bl3;
|
||||
pool2[addrib[jj+3]] = bsum - bl4;
|
||||
}
|
||||
/* backward transformation */
|
||||
for (j=0; j < n; j += 4) {
|
||||
bl1 = pool2[j];
|
||||
bl2 = pool2[j+1];
|
||||
bl3 = pool2[j+2];
|
||||
bl4 = pool2[j+3];
|
||||
/* bl1 = pool2[addrib[j]];
|
||||
bl2 = pool2[addrib[j+1]];
|
||||
bl3 = pool2[addrib[j+2]];
|
||||
bl4 = pool2[addrib[j+3]]; */
|
||||
/* Hadamard-Matrix */
|
||||
bsum = (bl1 + bl2 + bl3 + bl4) * 0.5;
|
||||
pool1[j] = bl1 - bsum;
|
||||
pool1[j+1] = bl2 - bsum;
|
||||
pool1[j+2] = bsum - bl3;
|
||||
pool1[j+3] = bsum - bl4;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* renormalize again if number of pools beyond limit */
|
||||
if (!(newpools & 0xFFFF)) {
|
||||
totsqr = 0.0;
|
||||
for (i = 0; i < n; i++)
|
||||
totsqr += pool1[i] * pool1[i];
|
||||
nomsqr = sqrt(n / totsqr);
|
||||
for (i = 0; i < n; i++)
|
||||
pool1[i] *= nomsqr;
|
||||
}
|
||||
|
||||
outgauss = pool1;
|
||||
/* reset data counter */
|
||||
variate_used = n - 1;
|
||||
|
||||
/* set counter counting nomber of pools made */
|
||||
newpools++;
|
||||
|
||||
/* new scale factor using ch^2 correction,
|
||||
using pool1[n-1] from last pool */
|
||||
ScaleGauss = chi1 + chi2 * ScaleGauss * endval;
|
||||
|
||||
// printf("Pool number: %d, chi^2 scale: %e\n", newpools, ScaleGauss);
|
||||
|
||||
return nulval; /* use old scale */
|
||||
// return pool1[0]; /* use new scale */
|
||||
}
|
||||
#endif
|
||||
|
||||
/* --------------------- (test) main ------------------------- */
|
||||
/* gcc -Wall -g -DHasMain -I../../include wallace.c CombTaus.o -o watest.exe */
|
||||
#ifdef HasMain
|
||||
#include "wallace.h"
|
||||
|
||||
struct timeb timenow;
|
||||
struct timeb timebegin;
|
||||
int sec, msec;
|
||||
|
||||
void timediff(struct timeb *now, struct timeb *begin, int *sec, int *msec)
|
||||
{
|
||||
|
||||
*msec = now->millitm - begin->millitm;
|
||||
*sec = now->time - begin->time;
|
||||
if (*msec < 0) {
|
||||
*msec += 1000;
|
||||
(*sec)--;
|
||||
}
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
float x;
|
||||
unsigned int i;
|
||||
long int count;
|
||||
|
||||
initw();
|
||||
ftime(&timebegin);
|
||||
count = 100000000;
|
||||
for (i = 0; i < count; i++) {
|
||||
x = GaussWa;
|
||||
// printf("%d\t%f\n", i, x);
|
||||
}
|
||||
ftime(&timenow);
|
||||
timediff(&timenow, &timebegin, &sec, &msec);
|
||||
printf("WallaceHV: %ld normal variates: %f s\n", count, sec + (float) msec / 1000.0);
|
||||
|
||||
initnorm(0, 0);
|
||||
initnorm(77, 3);
|
||||
ftime(&timebegin);
|
||||
count = 100000000;
|
||||
for (i = 0; i < count; i++) {
|
||||
x = FastNorm;
|
||||
// printf("%d\t%f\n", i, x);
|
||||
}
|
||||
ftime(&timenow);
|
||||
timediff(&timenow, &timebegin, &sec, &msec);
|
||||
printf("FastNorm3: %ld normal variates: %f s\n", count, sec + (float) msec / 1000.0);
|
||||
|
||||
return (1);
|
||||
}
|
||||
#endif
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
|
||||
|
||||
|
||||
void f_alpha(int n_pts, int n_exp, float X[], float Q_d,
|
||||
float alpha);
|
||||
|
||||
void rvfft(float X[], unsigned long int n);
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
/* Last revised 28-1-1999 */
|
||||
/* This is the header file FastNorm3.h to be included in code files
|
||||
using FastNorm3.c */
|
||||
/* I M P O R T A N T ! ! ! ! !
|
||||
|
||||
The definition below should be altered to ensure that integer
|
||||
arithmetic is done on 32-bit words. It may need to be changed from int to
|
||||
long on some platforms. The 32-bit requirement arises from the use of
|
||||
a Uniform pseudo-random generator in part of the code, which assumes 32-bit
|
||||
twos-complement arithmetic. In dire need, replace this generator with
|
||||
another more suitable for the platform. The rest of the code assumes only
|
||||
that signed integers up to a bit less than 2^31 can be handled.
|
||||
*/
|
||||
|
||||
#define Sw int /* MUST define Sw as a 32-bit integer or longer */
|
||||
#define Sf double
|
||||
|
||||
extern int gaussfaze;
|
||||
extern int gaussmask;
|
||||
extern double *gausssave;
|
||||
extern double GScale;
|
||||
|
||||
#define FastNorm ((--gaussfaze)?GScale*gausssave[gaussfaze]:fastnorm())
|
||||
|
||||
void initnorm(Sw seed, Sw quoll);
|
||||
Sf fastnorm (void);
|
||||
Sf c7rand(Sw*);
|
||||
Sw irandm(Sw*);
|
||||
unsigned Sw urandm(Sw*);
|
||||
double adchi (double a, int *is);
|
||||
double rgamma (double g, int *is);
|
||||
Sf renormalize(void);
|
||||
|
|
@ -1,7 +1,9 @@
|
|||
#ifndef _BOOL_H
|
||||
#define _BOOL_H
|
||||
|
||||
typedef unsigned char bool;
|
||||
//typedef unsigned char bool;
|
||||
typedef int bool;
|
||||
|
||||
typedef int BOOL ;
|
||||
|
||||
#define BOOLEAN int
|
||||
|
|
|
|||
|
|
@ -0,0 +1,108 @@
|
|||
/*******************************************************************
|
||||
This file extends the fftlib with calls to maintain the cosine and bit reversed tables
|
||||
for you (including mallocs and free's). Call the init routine for each fft size you will
|
||||
be using. Then you can call the fft routines below which will make the fftlib library
|
||||
call with the appropriate tables passed. When you are done with all fft's you can call
|
||||
fftfree to release the storage for the tables. Note that you can call fftinit repeatedly
|
||||
with the same size, the extra calls will be ignored. So, you could make a macro to call
|
||||
fftInit every time you call ffts. For example you could have someting like:
|
||||
#define FFT(a,n) if(!fftInit(roundtol(LOG2(n)))) ffts(a,roundtol(LOG2(n)),1);else printf("fft error\n");
|
||||
*******************************************************************/
|
||||
|
||||
int fftInit(long M);
|
||||
// malloc and init cosine and bit reversed tables for a given size fft, ifft, rfft, rifft
|
||||
/* INPUTS */
|
||||
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
|
||||
/* OUTPUTS */
|
||||
/* private cosine and bit reversed tables */
|
||||
|
||||
void fftFree();
|
||||
// release storage for all private cosine and bit reversed tables
|
||||
|
||||
void ffts(float *data, long M, long Rows);
|
||||
/* Compute in-place complex fft on the rows of the input array */
|
||||
/* INPUTS */
|
||||
/* *ioptr = input data array */
|
||||
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
|
||||
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
|
||||
/* OUTPUTS */
|
||||
/* *ioptr = output data array */
|
||||
|
||||
void iffts(float *data, long M, long Rows);
|
||||
/* Compute in-place inverse complex fft on the rows of the input array */
|
||||
/* INPUTS */
|
||||
/* *ioptr = input data array */
|
||||
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
|
||||
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
|
||||
/* OUTPUTS */
|
||||
/* *ioptr = output data array */
|
||||
|
||||
void rffts(float *data, long M, long Rows);
|
||||
/* Compute in-place real fft on the rows of the input array */
|
||||
/* The result is the complex spectra of the positive frequencies */
|
||||
/* except the location for the first complex number contains the real */
|
||||
/* values for DC and Nyquest */
|
||||
/* See rspectprod for multiplying two of these spectra together- ex. for fast convolution */
|
||||
/* INPUTS */
|
||||
/* *ioptr = real input data array */
|
||||
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
|
||||
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
|
||||
/* OUTPUTS */
|
||||
/* *ioptr = output data array in the following order */
|
||||
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
|
||||
|
||||
void riffts(float *data, long M, long Rows);
|
||||
/* Compute in-place real ifft on the rows of the input array */
|
||||
/* data order as from rffts */
|
||||
/* INPUTS */
|
||||
/* *ioptr = input data array in the following order */
|
||||
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
|
||||
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
|
||||
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
|
||||
/* OUTPUTS */
|
||||
/* *ioptr = real output data array */
|
||||
|
||||
void rspectprod(float *data1, float *data2, float *outdata, long N);
|
||||
// When multiplying a pair of spectra from rfft care must be taken to multiply the
|
||||
// two real values seperately from the complex ones. This routine does it correctly.
|
||||
// the result can be stored in-place over one of the inputs
|
||||
/* INPUTS */
|
||||
/* *data1 = input data array first spectra */
|
||||
/* *data2 = input data array second spectra */
|
||||
/* N = fft input size for both data1 and data2 */
|
||||
/* OUTPUTS */
|
||||
/* *outdata = output data array spectra */
|
||||
|
||||
|
||||
/* The following is FYI
|
||||
|
||||
|
||||
Note that most of the fft routines require full matrices, ie Rsiz==Ncols
|
||||
This is how I like to define a real matrix:
|
||||
struct matrix { // real matrix
|
||||
float *d; // pointer to data
|
||||
long Nrows; // number of rows in the matrix
|
||||
long Ncols; // number of columns in the matrix (can be less than Rsiz)
|
||||
long Rsiz; // number of floats from one row to the next
|
||||
};
|
||||
typedef struct matrix matrix;
|
||||
|
||||
|
||||
|
||||
CACHEFILLMALLOC and CEILCACHELINE can be used instead of malloc to make
|
||||
arrays that start exactly on a cache line start.
|
||||
First we CACHEFILLMALLOC a void * (use this void * when free'ing),
|
||||
then we set our array pointer equal to the properly cast CEILCACHELINE of this void *
|
||||
example:
|
||||
aInit = CACHEFILLMALLOC( NUMFLOATS*sizeof(float) );
|
||||
a = (float *) CEILCACHELINE(ainit);
|
||||
... main body of code ...
|
||||
free(aInit);
|
||||
|
||||
To disable this alignment, set CACHELINESIZE to 1
|
||||
#define CACHELINESIZE 32 // Bytes per cache line
|
||||
#define CACHELINEFILL (CACHELINESIZE-1)
|
||||
#define CEILCACHELINE(p) ((((unsigned long)p+CACHELINEFILL)/CACHELINESIZE)*CACHELINESIZE)
|
||||
#define CACHEFILLMALLOC(n) malloc((n)+CACHELINEFILL)
|
||||
*/
|
||||
|
||||
|
|
@ -174,15 +174,10 @@
|
|||
#define inline _inline
|
||||
#endif
|
||||
|
||||
/*
|
||||
#ifndef HAVE_RANDOM
|
||||
#define srandom(a) srand(a)
|
||||
#define random rand
|
||||
#define RR_MAX RAND_MAX
|
||||
#else
|
||||
#define RR_MAX LONG_MAX
|
||||
#endif
|
||||
*/
|
||||
|
||||
/* Fast random number generator */
|
||||
//#define FastRand
|
||||
#define WaGauss
|
||||
#define RR_MAX RAND_MAX
|
||||
|
||||
#ifdef HAVE_INDEX
|
||||
|
|
|
|||
|
|
@ -0,0 +1,22 @@
|
|||
/* Wallace generator for normally distributed random variates
|
||||
Copyright Holger Vogt, 2008
|
||||
|
||||
Calling sequence:
|
||||
initw(void); initialize using srand(seed)
|
||||
double x = GaussWa; returns normally distributed random variate
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
extern double *outgauss; /*output vector for user access */
|
||||
extern unsigned int variate_used; /* actual index of variate called by user */
|
||||
extern double ScaleGauss; /* scale factor, including chi square correction */
|
||||
|
||||
double NewWa(void); /* generate new pool, return outgauss[0] */
|
||||
|
||||
#define GaussWa ((--variate_used)?(outgauss[variate_used]*ScaleGauss):NewWa())
|
||||
|
||||
void initw(void); /* initialization of Wallace generator */
|
||||
|
||||
void PolarGauss(double* py1, double* py2);
|
||||
25
src/main.c
25
src/main.c
|
|
@ -208,6 +208,8 @@ extern int OUTbeginDomain(void *,IFuid,int,IFvalue *);
|
|||
extern int OUTendDomain(void *), OUTstopnow(void), OUTerror(int,char *,IFuid *);
|
||||
extern int OUTattributes(void *,IFuid,int,IFvalue *);
|
||||
|
||||
extern void initw(void);
|
||||
|
||||
IFfrontEnd nutmeginfo = {
|
||||
IFnewUid,
|
||||
IFdelUid,
|
||||
|
|
@ -757,8 +759,9 @@ xmain(int argc, char **argv)
|
|||
main(int argc, char **argv)
|
||||
#endif /* HAS_WINDOWS */
|
||||
{
|
||||
int c;
|
||||
int err;
|
||||
int c, err;
|
||||
unsigned int rseed;
|
||||
time_t acttime;
|
||||
bool gotone = FALSE;
|
||||
char* copystring;
|
||||
bool addctrlsect = TRUE; /* PN: for autorun */
|
||||
|
|
@ -1106,6 +1109,24 @@ bot:
|
|||
err = 0;
|
||||
|
||||
#ifdef SIMULATOR
|
||||
#ifdef FastRand
|
||||
// initialization and seed for FastNorm Gaussian random generator
|
||||
initnorm (0, 0);
|
||||
rseed = 66;
|
||||
if (!cp_getvar("rndseed", CP_NUM, (char *) &rseed)) {
|
||||
acttime = time(NULL);
|
||||
rseed = (int)acttime;
|
||||
}
|
||||
initnorm (rseed, 2);
|
||||
fprintf (cp_out, "SoS %f, seed value: %ld\n", renormalize(), rseed);
|
||||
#elif defined (WaGauss)
|
||||
if (!cp_getvar("rndseed", CP_NUM, (char *) &rseed)) {
|
||||
acttime = time(NULL);
|
||||
rseed = (int)acttime;
|
||||
}
|
||||
srand(rseed);
|
||||
initw();
|
||||
#endif
|
||||
if (!ft_servermode && !ft_nutmeg) {
|
||||
/* Concatenate all non-option arguments into a temporary file
|
||||
and load that file into the spice core.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
## Process this file with automake
|
||||
|
||||
SUBDIRS = cmaths ni sparse poly deriv misc
|
||||
DIST_SUBDIRS = cmaths ni sparse poly deriv misc
|
||||
SUBDIRS = cmaths ni sparse poly deriv misc fft
|
||||
DIST_SUBDIRS = cmaths ni sparse poly deriv misc fft
|
||||
|
||||
MAINTAINERCLEANFILES = Makefile.in
|
||||
|
|
|
|||
|
|
@ -0,0 +1,13 @@
|
|||
## Process this file with automake to produce Makefile.in
|
||||
|
||||
noinst_LTLIBRARIES = libmathfft.la
|
||||
|
||||
libmathfft_la_SOURCES = \
|
||||
fftext.c \
|
||||
fftlib.c \
|
||||
matlib.c
|
||||
|
||||
|
||||
|
||||
AM_CPPFLAGS = -I$(top_srcdir)/src/include
|
||||
MAINTAINERCLEANFILES = Makefile.in
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
Subject: FFT for RISC 2.0
|
||||
To: macgifts@sumex-aim.stanford.edu
|
||||
Enclosure: FFTs-for-RISC-2.sit
|
||||
|
||||
Enclosed is a stuffit archive of version 2.0 of my 'C' source code fft library.
|
||||
|
||||
Very-Fast Fourier Transform routines. Routines are provided for real and complex
|
||||
forward and inverse 1d and 2d fourier transforms and 3d complex forward and inverse ffts.
|
||||
I coded these to optimize execution speed on Risc processors like the PowerPC.
|
||||
All fft sizes must still be a power of two.
|
||||
Test programs based on the Numerical Recipes in C routines are provided.
|
||||
Also included are some simple applications with source code which time the FFTs.
|
||||
See the enclosed read me file for more information.
|
||||
|
||||
Revision version 2.0:
|
||||
Rewrote code to rely more on compiler optimization (and be less ugly.)
|
||||
Removed restrictions on too small or too large ffts.
|
||||
Provided a library extension that manages memory for cosine and bit
|
||||
reversed counter tables.
|
||||
Added 2d and 3d complex and 2d real ffts.
|
||||
Speeded routines for data too large to fit in primary cache.
|
||||
Changed most testing from Matlab to Numerical Recipes based (because its cheaper.)
|
||||
Changed call parameters (watch out.)
|
||||
Revision version 1.21:
|
||||
line 126 of rfftTest.c corrected.
|
||||
Revisions version 1.2:
|
||||
I now store the Nyquest point of the real transform where the 0 for the DC term's
|
||||
imaginary part used to be. !! WATCH OUT FOR THIS IF YOU USE rfft !!
|
||||
Added the real inverse Fourier transform.
|
||||
|
||||
Revisions version 1.1:
|
||||
Re-arranged to put fft routines in a shared library and changed source file name to fftlib.c.
|
||||
Removed some ugly optimizations that are no longer needed for CodeWarrier.
|
||||
|
||||
This code is public domain, do anything you want to with it.
|
||||
|
||||
[Moderators- This file should replace ffts-for-risc-121-c.hqx and can be included on any CD]
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
This directory contains a public domain FFT library which was optimized
|
||||
for speed on RISC processors such as the PowerPC. All ffts
|
||||
use single precision floats, for double precision just use a
|
||||
global search and replace to change float to double in all
|
||||
source files.
|
||||
Codewarrier Pro 1.0 project files are also supplied.
|
||||
|
||||
** Warning ** Perform rigorous testing to
|
||||
your own standards before using this code.
|
||||
|
||||
(John Green) green_jt@vsdec.npt.nuwc.navy.mil
|
||||
|
||||
files:
|
||||
fftTiming
|
||||
Application to time complex ffts
|
||||
|
||||
rfftTiming
|
||||
Application to time real ffts
|
||||
|
||||
// Directory: fft libraries
|
||||
|
||||
files:
|
||||
|
||||
fftext.c
|
||||
Library of in-place fast fourier transforms. Contains forward
|
||||
and inverse complex and real transforms. The real fft's expect the
|
||||
frequency domain data to have the real part of the fsamp/2 bin (which
|
||||
has a 0 imaginary part) to be stored in the location for the imaginary
|
||||
part of the DC bin (the DC bin of real data is also strictly real.)
|
||||
You must first call an initialization routine fftInit before calling
|
||||
the fft computation routines ffts, iffts, rffts and riffts.
|
||||
The init routines malloc the memory to store the cosine and
|
||||
bit reversed counter tables as well as initializing their values.
|
||||
|
||||
fftlib.c
|
||||
Lower level library of in-place fast fourier transforms. Same as fftext.c but you
|
||||
need to manage the mallocs for the cosine and bit reversed tables yourself.
|
||||
|
||||
|
||||
fft2d.c
|
||||
Library of 2d and 3d complex and 2d real in-place fast fourier transforms.
|
||||
The init routine fft2dInit must be called before using the 2d routines and
|
||||
fft3dInit must be called before using the 3d routines. These init routines
|
||||
will also call the appropriate 1d init routines in fftext.c
|
||||
|
||||
matlib.c
|
||||
Matrix transpose routines used by fft2d.c and complex vector multiply
|
||||
for forming the product of two spectra.
|
||||
|
||||
dxpose.c
|
||||
Double precision matrix transpose for quick single precision complex transposing
|
||||
|
||||
// Directory: timing code
|
||||
This directory contains the source to fftTiming and rfftTiming
|
||||
|
||||
// Directory: Numerical Recipes testing
|
||||
This directory contains files used to test the various fft routines using
|
||||
the Numerical Recipes in C routines as a baseline. These routines can be purchased
|
||||
in PeeCee (after expanding you can move them to a Mac) format from:
|
||||
http://cfata2.harvard.edu/numerical-recipes/
|
||||
Unfortunately Numerical Recipes defines its forward and inverse fft's backwards.
|
||||
For complex fft's I just use their inverse fft as a forward one, but for real ffts
|
||||
their forward fft followed by my inverse fft reverses the data. They also have ugly matrix
|
||||
and tensor data types and start their indices with one, Fortran style, but these are
|
||||
minor annoyances.
|
||||
|
||||
// Directory: Matlab testing
|
||||
This directory contains files to test fast 1d and 2d convolution with Matlab used to
|
||||
verify the results. An example of using Matlab to test the fft library routines is
|
||||
also given for the 2d real fft.
|
||||
|
|
@ -0,0 +1,156 @@
|
|||
/*******************************************************************
|
||||
This file extends the fftlib with calls to maintain the cosine and bit reversed tables
|
||||
for you (including mallocs and free's). Call the init routine for each fft size you will
|
||||
be using. Then you can call the fft routines below which will make the fftlib library
|
||||
call with the appropriate tables passed. When you are done with all fft's you can call
|
||||
fftfree to release the storage for the tables. Note that you can call fftinit repeatedly
|
||||
with the same size, the extra calls will be ignored. So, you could make a macro to call
|
||||
fftInit every time you call ffts. For example you could have someting like:
|
||||
#define FFT(a,n) if(!fftInit(roundtol(LOG2(n)))) ffts(a,roundtol(LOG2(n)),1);else printf("fft error\n");
|
||||
*******************************************************************/
|
||||
#include <stdlib.h>
|
||||
#include "fftlib.h"
|
||||
#include "matlib.h"
|
||||
#include "fftext.h"
|
||||
|
||||
// pointers to storage of Utbl's and BRLow's
|
||||
static float *UtblArray[8*sizeof(long)] = {0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0};
|
||||
static short *BRLowArray[8*sizeof(long)/2] = {0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0};
|
||||
|
||||
int fftInit(long M){
|
||||
// malloc and init cosine and bit reversed tables for a given size fft, ifft, rfft, rifft
|
||||
/* INPUTS */
|
||||
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
|
||||
/* OUTPUTS */
|
||||
/* private cosine and bit reversed tables */
|
||||
|
||||
int theError = 1;
|
||||
/*** I did NOT test cases with M>27 ***/
|
||||
if ((M >= 0) && (M < 8*sizeof(long))){
|
||||
theError = 0;
|
||||
if (UtblArray[M] == 0){ // have we not inited this size fft yet?
|
||||
// init cos table
|
||||
UtblArray[M] = (float *) malloc( (POW2(M)/4+1)*sizeof(float) );
|
||||
if (UtblArray[M] == 0)
|
||||
theError = 2;
|
||||
else{
|
||||
fftCosInit(M, UtblArray[M]);
|
||||
}
|
||||
if (M > 1){
|
||||
if (BRLowArray[M/2] == 0){ // init bit reversed table for cmplx fft
|
||||
BRLowArray[M/2] = (short *) malloc( POW2(M/2-1)*sizeof(short) );
|
||||
if (BRLowArray[M/2] == 0)
|
||||
theError = 2;
|
||||
else{
|
||||
fftBRInit(M, BRLowArray[M/2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (M > 2){
|
||||
if (BRLowArray[(M-1)/2] == 0){ // init bit reversed table for real fft
|
||||
BRLowArray[(M-1)/2] = (short *) malloc( POW2((M-1)/2-1)*sizeof(short) );
|
||||
if (BRLowArray[(M-1)/2] == 0)
|
||||
theError = 2;
|
||||
else{
|
||||
fftBRInit(M-1, BRLowArray[(M-1)/2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
return theError;
|
||||
}
|
||||
|
||||
void fftFree(void){
|
||||
// release storage for all private cosine and bit reversed tables
|
||||
long i1;
|
||||
for (i1=8*sizeof(long)/2-1; i1>=0; i1--){
|
||||
if (BRLowArray[i1] != 0){
|
||||
free(BRLowArray[i1]);
|
||||
BRLowArray[i1] = 0;
|
||||
};
|
||||
};
|
||||
for (i1=8*sizeof(long)-1; i1>=0; i1--){
|
||||
if (UtblArray[i1] != 0){
|
||||
free(UtblArray[i1]);
|
||||
UtblArray[i1] = 0;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
/*************************************************
|
||||
The following calls are easier than calling to fftlib directly.
|
||||
Just make sure fftlib has been called for each M first.
|
||||
**************************************************/
|
||||
|
||||
void ffts(float *data, long M, long Rows){
|
||||
/* Compute in-place complex fft on the rows of the input array */
|
||||
/* INPUTS */
|
||||
/* *ioptr = input data array */
|
||||
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
|
||||
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
|
||||
/* OUTPUTS */
|
||||
/* *ioptr = output data array */
|
||||
ffts1(data, M, Rows, UtblArray[M], BRLowArray[M/2]);
|
||||
}
|
||||
|
||||
void iffts(float *data, long M, long Rows){
|
||||
/* Compute in-place inverse complex fft on the rows of the input array */
|
||||
/* INPUTS */
|
||||
/* *ioptr = input data array */
|
||||
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
|
||||
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
|
||||
/* OUTPUTS */
|
||||
/* *ioptr = output data array */
|
||||
iffts1(data, M, Rows, UtblArray[M], BRLowArray[M/2]);
|
||||
}
|
||||
|
||||
void rffts(float *data, long M, long Rows){
|
||||
/* Compute in-place real fft on the rows of the input array */
|
||||
/* The result is the complex spectra of the positive frequencies */
|
||||
/* except the location for the first complex number contains the real */
|
||||
/* values for DC and Nyquest */
|
||||
/* See rspectprod for multiplying two of these spectra together- ex. for fast convolution */
|
||||
/* INPUTS */
|
||||
/* *ioptr = real input data array */
|
||||
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
|
||||
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
|
||||
/* OUTPUTS */
|
||||
/* *ioptr = output data array in the following order */
|
||||
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
|
||||
rffts1(data, M, Rows, UtblArray[M], BRLowArray[(M-1)/2]);
|
||||
}
|
||||
|
||||
void riffts(float *data, long M, long Rows){
|
||||
/* Compute in-place real ifft on the rows of the input array */
|
||||
/* data order as from rffts */
|
||||
/* INPUTS */
|
||||
/* *ioptr = input data array in the following order */
|
||||
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
|
||||
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
|
||||
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
|
||||
/* OUTPUTS */
|
||||
/* *ioptr = real output data array */
|
||||
riffts1(data, M, Rows, UtblArray[M], BRLowArray[(M-1)/2]);
|
||||
}
|
||||
|
||||
void rspectprod(float *data1, float *data2, float *outdata, long N){
|
||||
// When multiplying a pair of spectra from rfft care must be taken to multiply the
|
||||
// two real values seperately from the complex ones. This routine does it correctly.
|
||||
// the result can be stored in-place over one of the inputs
|
||||
/* INPUTS */
|
||||
/* *data1 = input data array first spectra */
|
||||
/* *data2 = input data array second spectra */
|
||||
/* N = fft input size for both data1 and data2 */
|
||||
/* OUTPUTS */
|
||||
/* *outdata = output data array spectra */
|
||||
if(N>1){
|
||||
outdata[0] = data1[0] * data2[0]; // multiply the zero freq values
|
||||
outdata[1] = data1[1] * data2[1]; // multiply the nyquest freq values
|
||||
cvprod(data1 + 2, data2 + 2, outdata + 2, N/2-1); // multiply the other positive freq values
|
||||
}
|
||||
else{
|
||||
outdata[0] = data1[0] * data2[0];
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
/*******************************************************************
|
||||
This file extends the fftlib with calls to maintain the cosine and bit reversed tables
|
||||
for you (including mallocs and free's). Call the init routine for each fft size you will
|
||||
be using. Then you can call the fft routines below which will make the fftlib library
|
||||
call with the appropriate tables passed. When you are done with all fft's you can call
|
||||
fftfree to release the storage for the tables. Note that you can call fftinit repeatedly
|
||||
with the same size, the extra calls will be ignored. So, you could make a macro to call
|
||||
fftInit every time you call ffts. For example you could have someting like:
|
||||
#define FFT(a,n) if(!fftInit(roundtol(LOG2(n)))) ffts(a,roundtol(LOG2(n)),1);else printf("fft error\n");
|
||||
*******************************************************************/
|
||||
|
||||
int fftInit(long M);
|
||||
// malloc and init cosine and bit reversed tables for a given size fft, ifft, rfft, rifft
|
||||
/* INPUTS */
|
||||
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
|
||||
/* OUTPUTS */
|
||||
/* private cosine and bit reversed tables */
|
||||
|
||||
void fftFree(void);
|
||||
// release storage for all private cosine and bit reversed tables
|
||||
|
||||
void ffts(float *data, long M, long Rows);
|
||||
/* Compute in-place complex fft on the rows of the input array */
|
||||
/* INPUTS */
|
||||
/* *ioptr = input data array */
|
||||
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
|
||||
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
|
||||
/* OUTPUTS */
|
||||
/* *ioptr = output data array */
|
||||
|
||||
void iffts(float *data, long M, long Rows);
|
||||
/* Compute in-place inverse complex fft on the rows of the input array */
|
||||
/* INPUTS */
|
||||
/* *ioptr = input data array */
|
||||
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
|
||||
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
|
||||
/* OUTPUTS */
|
||||
/* *ioptr = output data array */
|
||||
|
||||
void rffts(float *data, long M, long Rows);
|
||||
/* Compute in-place real fft on the rows of the input array */
|
||||
/* The result is the complex spectra of the positive frequencies */
|
||||
/* except the location for the first complex number contains the real */
|
||||
/* values for DC and Nyquest */
|
||||
/* See rspectprod for multiplying two of these spectra together- ex. for fast convolution */
|
||||
/* INPUTS */
|
||||
/* *ioptr = real input data array */
|
||||
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
|
||||
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
|
||||
/* OUTPUTS */
|
||||
/* *ioptr = output data array in the following order */
|
||||
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
|
||||
|
||||
void riffts(float *data, long M, long Rows);
|
||||
/* Compute in-place real ifft on the rows of the input array */
|
||||
/* data order as from rffts */
|
||||
/* INPUTS */
|
||||
/* *ioptr = input data array in the following order */
|
||||
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
|
||||
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
|
||||
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */
|
||||
/* OUTPUTS */
|
||||
/* *ioptr = real output data array */
|
||||
|
||||
void rspectprod(float *data1, float *data2, float *outdata, long N);
|
||||
// When multiplying a pair of spectra from rfft care must be taken to multiply the
|
||||
// two real values seperately from the complex ones. This routine does it correctly.
|
||||
// the result can be stored in-place over one of the inputs
|
||||
/* INPUTS */
|
||||
/* *data1 = input data array first spectra */
|
||||
/* *data2 = input data array second spectra */
|
||||
/* N = fft input size for both data1 and data2 */
|
||||
/* OUTPUTS */
|
||||
/* *outdata = output data array spectra */
|
||||
|
||||
|
||||
// The following is FYI
|
||||
|
||||
|
||||
//Note that most of the fft routines require full matrices, ie Rsiz==Ncols
|
||||
//This is how I like to define a real matrix:
|
||||
//struct matrix { // real matrix
|
||||
// float *d; // pointer to data
|
||||
// long Nrows; // number of rows in the matrix
|
||||
// long Ncols; // number of columns in the matrix (can be less than Rsiz)
|
||||
// long Rsiz; // number of floats from one row to the next
|
||||
//};
|
||||
//typedef struct matrix matrix;
|
||||
|
||||
|
||||
|
||||
// CACHEFILLMALLOC and CEILCACHELINE can be used instead of malloc to make
|
||||
// arrays that start exactly on a cache line start.
|
||||
// First we CACHEFILLMALLOC a void * (use this void * when free'ing),
|
||||
// then we set our array pointer equal to the properly cast CEILCACHELINE of this void *
|
||||
// example:
|
||||
// aInit = CACHEFILLMALLOC( NUMFLOATS*sizeof(float) );
|
||||
// a = (float *) CEILCACHELINE(ainit);
|
||||
// ... main body of code ...
|
||||
// free(aInit);
|
||||
//
|
||||
// To disable this alignment, set CACHELINESIZE to 1
|
||||
//#define CACHELINESIZE 32 // Bytes per cache line
|
||||
//#define CACHELINEFILL (CACHELINESIZE-1)
|
||||
//#define CEILCACHELINE(p) ((((unsigned long)p+CACHELINEFILL)/CACHELINESIZE)*CACHELINESIZE)
|
||||
//#define CACHEFILLMALLOC(n) malloc((n)+CACHELINEFILL)
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,76 @@
|
|||
#define MYRECIPLN2 1.442695040888963407359924681001892137426 // 1.0/log(2)
|
||||
|
||||
/* some useful conversions between a number and its power of 2 */
|
||||
#define LOG2(a) (MYRECIPLN2*log(a)) // floating point logarithm base 2
|
||||
#define POW2(m) ((unsigned long) 1 << (m)) // integer power of 2 for m<32
|
||||
|
||||
/*******************************************************************
|
||||
lower level fft stuff called by routines in fftext.c and fft2d.c
|
||||
*******************************************************************/
|
||||
|
||||
void fftCosInit(long M, float *Utbl);
|
||||
/* Compute Utbl, the cosine table for ffts */
|
||||
/* of size (pow(2,M)/4 +1) */
|
||||
/* INPUTS */
|
||||
/* M = log2 of fft size */
|
||||
/* OUTPUTS */
|
||||
/* *Utbl = cosine table */
|
||||
|
||||
void fftBRInit(long M, short *BRLow);
|
||||
/* Compute BRLow, the bit reversed table for ffts */
|
||||
/* of size pow(2,M/2 -1) */
|
||||
/* INPUTS */
|
||||
/* M = log2 of fft size */
|
||||
/* OUTPUTS */
|
||||
/* *BRLow = bit reversed counter table */
|
||||
|
||||
void ffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow);
|
||||
/* Compute in-place complex fft on the rows of the input array */
|
||||
/* INPUTS */
|
||||
/* *ioptr = input data array */
|
||||
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
|
||||
/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */
|
||||
/* *Utbl = cosine table */
|
||||
/* *BRLow = bit reversed counter table */
|
||||
/* OUTPUTS */
|
||||
/* *ioptr = output data array */
|
||||
|
||||
void iffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow);
|
||||
/* Compute in-place inverse complex fft on the rows of the input array */
|
||||
/* INPUTS */
|
||||
/* *ioptr = input data array */
|
||||
/* M = log2 of fft size */
|
||||
/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */
|
||||
/* *Utbl = cosine table */
|
||||
/* *BRLow = bit reversed counter table */
|
||||
/* OUTPUTS */
|
||||
/* *ioptr = output data array */
|
||||
|
||||
void rffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow);
|
||||
/* Compute in-place real fft on the rows of the input array */
|
||||
/* The result is the complex spectra of the positive frequencies */
|
||||
/* except the location for the first complex number contains the real */
|
||||
/* values for DC and Nyquest */
|
||||
/* INPUTS */
|
||||
/* *ioptr = real input data array */
|
||||
/* M = log2 of fft size */
|
||||
/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */
|
||||
/* *Utbl = cosine table */
|
||||
/* *BRLow = bit reversed counter table */
|
||||
/* OUTPUTS */
|
||||
/* *ioptr = output data array in the following order */
|
||||
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
|
||||
|
||||
|
||||
void riffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow);
|
||||
/* Compute in-place real ifft on the rows of the input array */
|
||||
/* data order as from rffts1 */
|
||||
/* INPUTS */
|
||||
/* *ioptr = input data array in the following order */
|
||||
/* M = log2 of fft size */
|
||||
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
|
||||
/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */
|
||||
/* *Utbl = cosine table */
|
||||
/* *BRLow = bit reversed counter table */
|
||||
/* OUTPUTS */
|
||||
/* *ioptr = real output data array */
|
||||
|
|
@ -0,0 +1,297 @@
|
|||
/* a few routines from a vector/matrix library */
|
||||
#include "matlib.h"
|
||||
|
||||
void xpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, long Ncols){
|
||||
/* not in-place matrix transpose */
|
||||
/* INPUTS */
|
||||
/* *indata = input data array */
|
||||
/* iRsiz = offset to between rows of input data array */
|
||||
/* oRsiz = offset to between rows of output data array */
|
||||
/* Nrows = number of rows in input data array */
|
||||
/* Ncols = number of columns in input data array */
|
||||
/* OUTPUTS */
|
||||
/* *outdata = output data array */
|
||||
|
||||
float *irow; /* pointer to input row start */
|
||||
float *ocol; /* pointer to output col start */
|
||||
float *idata; /* pointer to input data */
|
||||
float *odata; /* pointer to output data */
|
||||
long RowCnt; /* row counter */
|
||||
long ColCnt; /* col counter */
|
||||
float T0; /* data storage */
|
||||
float T1; /* data storage */
|
||||
float T2; /* data storage */
|
||||
float T3; /* data storage */
|
||||
float T4; /* data storage */
|
||||
float T5; /* data storage */
|
||||
float T6; /* data storage */
|
||||
float T7; /* data storage */
|
||||
const long inRsizd1 = iRsiz;
|
||||
const long inRsizd2 = 2*iRsiz;
|
||||
const long inRsizd3 = inRsizd2+iRsiz;
|
||||
const long inRsizd4 = 4*iRsiz;
|
||||
const long inRsizd5 = inRsizd3+inRsizd2;
|
||||
const long inRsizd6 = inRsizd4+inRsizd2;
|
||||
const long inRsizd7 = inRsizd4+inRsizd3;
|
||||
const long inRsizd8 = 8*iRsiz;
|
||||
|
||||
ocol = outdata;
|
||||
irow = indata;
|
||||
for (RowCnt=Nrows/8; RowCnt>0; RowCnt--){
|
||||
idata = irow;
|
||||
odata = ocol;
|
||||
for (ColCnt=Ncols; ColCnt>0; ColCnt--){
|
||||
T0 = *idata;
|
||||
T1 = *(idata+inRsizd1);
|
||||
T2 = *(idata+inRsizd2);
|
||||
T3 = *(idata+inRsizd3);
|
||||
T4 = *(idata+inRsizd4);
|
||||
T5 = *(idata+inRsizd5);
|
||||
T6 = *(idata+inRsizd6);
|
||||
T7 = *(idata+inRsizd7);
|
||||
*odata = T0;
|
||||
*(odata+1) = T1;
|
||||
*(odata+2) = T2;
|
||||
*(odata+3) = T3;
|
||||
*(odata+4) = T4;
|
||||
*(odata+5) = T5;
|
||||
*(odata+6) = T6;
|
||||
*(odata+7) = T7;
|
||||
idata++;
|
||||
odata += oRsiz;
|
||||
}
|
||||
irow += inRsizd8;
|
||||
ocol += 8;
|
||||
}
|
||||
if (Nrows%8 != 0){
|
||||
for (ColCnt=Ncols; ColCnt>0; ColCnt--){
|
||||
idata = irow++;
|
||||
odata = ocol;
|
||||
ocol += oRsiz;
|
||||
for (RowCnt=Nrows%8; RowCnt>0; RowCnt--){
|
||||
T0 = *idata;
|
||||
*odata++ = T0;
|
||||
idata += iRsiz;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cxpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, long Ncols){
|
||||
/* not in-place complex float matrix transpose */
|
||||
/* INPUTS */
|
||||
/* *indata = input data array */
|
||||
/* iRsiz = offset to between rows of input data array */
|
||||
/* oRsiz = offset to between rows of output data array */
|
||||
/* Nrows = number of rows in input data array */
|
||||
/* Ncols = number of columns in input data array */
|
||||
/* OUTPUTS */
|
||||
/* *outdata = output data array */
|
||||
|
||||
float *irow; /* pointer to input row start */
|
||||
float *ocol; /* pointer to output col start */
|
||||
float *idata; /* pointer to input data */
|
||||
float *odata; /* pointer to output data */
|
||||
long RowCnt; /* row counter */
|
||||
long ColCnt; /* col counter */
|
||||
float T0r; /* data storage */
|
||||
float T0i; /* data storage */
|
||||
float T1r; /* data storage */
|
||||
float T1i; /* data storage */
|
||||
float T2r; /* data storage */
|
||||
float T2i; /* data storage */
|
||||
float T3r; /* data storage */
|
||||
float T3i; /* data storage */
|
||||
const long inRsizd1 = 2*iRsiz;
|
||||
const long inRsizd1i = 2*iRsiz + 1;
|
||||
const long inRsizd2 = 4*iRsiz;
|
||||
const long inRsizd2i = 4*iRsiz + 1;
|
||||
const long inRsizd3 = inRsizd2+inRsizd1;
|
||||
const long inRsizd3i = inRsizd2+inRsizd1 + 1;
|
||||
const long inRsizd4 = 8*iRsiz;
|
||||
|
||||
ocol = outdata;
|
||||
irow = indata;
|
||||
for (RowCnt=Nrows/4; RowCnt>0; RowCnt--){
|
||||
idata = irow;
|
||||
odata = ocol;
|
||||
for (ColCnt=Ncols; ColCnt>0; ColCnt--){
|
||||
T0r = *idata;
|
||||
T0i = *(idata +1);
|
||||
T1r = *(idata+inRsizd1);
|
||||
T1i = *(idata+inRsizd1i);
|
||||
T2r = *(idata+inRsizd2);
|
||||
T2i = *(idata+inRsizd2i);
|
||||
T3r = *(idata+inRsizd3);
|
||||
T3i = *(idata+inRsizd3i);
|
||||
*odata = T0r;
|
||||
*(odata+1) = T0i;
|
||||
*(odata+2) = T1r;
|
||||
*(odata+3) = T1i;
|
||||
*(odata+4) = T2r;
|
||||
*(odata+5) = T2i;
|
||||
*(odata+6) = T3r;
|
||||
*(odata+7) = T3i;
|
||||
idata+=2;
|
||||
odata += 2*oRsiz;
|
||||
}
|
||||
irow += inRsizd4;
|
||||
ocol += 8;
|
||||
}
|
||||
if (Nrows%4 != 0){
|
||||
for (ColCnt=Ncols; ColCnt>0; ColCnt--){
|
||||
idata = irow;
|
||||
odata = ocol;
|
||||
for (RowCnt=Nrows%4; RowCnt>0; RowCnt--){
|
||||
T0r = *idata;
|
||||
T0i = *(idata+1);
|
||||
*odata = T0r;
|
||||
*(odata+1) = T0i;
|
||||
odata+=2;
|
||||
idata += 2*iRsiz;
|
||||
}
|
||||
irow+=2;
|
||||
ocol += 2*oRsiz;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cvprod(float *a, float *b, float *out, long N){
|
||||
/* complex vector product, can be in-place */
|
||||
/* product of complex vector *a times complex vector *b */
|
||||
/* INPUTS */
|
||||
/* N vector length */
|
||||
/* *a complex vector length N complex numbers */
|
||||
/* *b complex vector length N complex numbers */
|
||||
/* OUTPUTS */
|
||||
/* *out complex vector length N */
|
||||
|
||||
long OutCnt; /* output counter */
|
||||
float A0R; /* A storage */
|
||||
float A0I; /* A storage */
|
||||
float A1R; /* A storage */
|
||||
float A1I; /* A storage */
|
||||
float A2R; /* A storage */
|
||||
float A2I; /* A storage */
|
||||
float A3R; /* A storage */
|
||||
float A3I; /* A storage */
|
||||
float B0R; /* B storage */
|
||||
float B0I; /* B storage */
|
||||
float B1R; /* B storage */
|
||||
float B1I; /* B storage */
|
||||
float B2R; /* B storage */
|
||||
float B2I; /* B storage */
|
||||
float B3R; /* B storage */
|
||||
float B3I; /* B storage */
|
||||
float T0R; /* TMP storage */
|
||||
float T0I; /* TMP storage */
|
||||
float T1R; /* TMP storage */
|
||||
float T1I; /* TMP storage */
|
||||
float T2R; /* TMP storage */
|
||||
float T2I; /* TMP storage */
|
||||
float T3R; /* TMP storage */
|
||||
float T3I; /* TMP storage */
|
||||
|
||||
if (N>=4){
|
||||
A0R = *a;
|
||||
B0R = *b;
|
||||
A0I = *(a +1);
|
||||
B0I = *(b +1);
|
||||
A1R = *(a +2);
|
||||
B1R = *(b +2);
|
||||
A1I = *(a +3);
|
||||
B1I = *(b +3);
|
||||
A2R = *(a +4);
|
||||
B2R = *(b +4);
|
||||
A2I = *(a +5);
|
||||
B2I = *(b +5);
|
||||
A3R = *(a +6);
|
||||
B3R = *(b +6);
|
||||
A3I = *(a +7);
|
||||
B3I = *(b +7);
|
||||
T0R = A0R * B0R;
|
||||
T0I = (A0R * B0I);
|
||||
T1R = A1R * B1R;
|
||||
T1I = (A1R * B1I);
|
||||
T2R = A2R * B2R;
|
||||
T2I = (A2R * B2I);
|
||||
T3R = A3R * B3R;
|
||||
T3I = (A3R * B3I);
|
||||
T0R -= (A0I * B0I);
|
||||
T0I = A0I * B0R + T0I;
|
||||
T1R -= (A1I * B1I);
|
||||
T1I = A1I * B1R + T1I;
|
||||
T2R -= (A2I * B2I);
|
||||
T2I = A2I * B2R + T2I;
|
||||
T3R -= (A3I * B3I);
|
||||
T3I = A3I * B3R + T3I;
|
||||
for (OutCnt=N/4-1; OutCnt > 0; OutCnt--){
|
||||
a += 8;
|
||||
b += 8;
|
||||
A0R = *a;
|
||||
B0R = *b;
|
||||
A0I = *(a +1);
|
||||
B0I = *(b +1);
|
||||
A1R = *(a +2);
|
||||
B1R = *(b +2);
|
||||
A1I = *(a +3);
|
||||
B1I = *(b +3);
|
||||
A2R = *(a +4);
|
||||
B2R = *(b +4);
|
||||
A2I = *(a +5);
|
||||
B2I = *(b +5);
|
||||
A3R = *(a +6);
|
||||
B3R = *(b +6);
|
||||
A3I = *(a +7);
|
||||
B3I = *(b +7);
|
||||
*out = T0R;
|
||||
*(out +1) = T0I;
|
||||
*(out +2) = T1R;
|
||||
*(out +3) = T1I;
|
||||
*(out +4) = T2R;
|
||||
*(out +5) = T2I;
|
||||
*(out +6) = T3R;
|
||||
*(out +7) = T3I;
|
||||
T0R = A0R * B0R;
|
||||
T0I = (A0R * B0I);
|
||||
T1R = A1R * B1R;
|
||||
T1I = (A1R * B1I);
|
||||
T2R = A2R * B2R;
|
||||
T2I = (A2R * B2I);
|
||||
T3R = A3R * B3R;
|
||||
T3I = (A3R * B3I);
|
||||
T0R -= (A0I * B0I);
|
||||
T0I = A0I * B0R + T0I;
|
||||
T1R -= (A1I * B1I);
|
||||
T1I = A1I * B1R + T1I;
|
||||
T2R -= (A2I * B2I);
|
||||
T2I = A2I * B2R + T2I;
|
||||
T3R -= (A3I * B3I);
|
||||
T3I = A3I * B3R + T3I;
|
||||
out += 8;
|
||||
}
|
||||
a += 8;
|
||||
b += 8;
|
||||
*out = T0R;
|
||||
*(out +1) = T0I;
|
||||
*(out +2) = T1R;
|
||||
*(out +3) = T1I;
|
||||
*(out +4) = T2R;
|
||||
*(out +5) = T2I;
|
||||
*(out +6) = T3R;
|
||||
*(out +7) = T3I;
|
||||
out += 8;
|
||||
}
|
||||
for (OutCnt=N%4; OutCnt > 0; OutCnt--){
|
||||
A0R = *a++;
|
||||
B0R = *b++;
|
||||
A0I = *a++;
|
||||
B0I = *b++;
|
||||
T0R = A0R * B0R;
|
||||
T0I = (A0R * B0I);
|
||||
T0R -= (A0I * B0I);
|
||||
T0I = A0I * B0R + T0I;
|
||||
*out++ = T0R;
|
||||
*out++ = T0I;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
/* a few routines from a vector/matrix library */
|
||||
|
||||
void xpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, long Ncols);
|
||||
/* not in-place matrix transpose */
|
||||
/* INPUTS */
|
||||
/* *indata = input data array */
|
||||
/* iRsiz = offset to between rows of input data array */
|
||||
/* oRsiz = offset to between rows of output data array */
|
||||
/* Nrows = number of rows in input data array */
|
||||
/* Ncols = number of columns in input data array */
|
||||
/* OUTPUTS */
|
||||
/* *outdata = output data array */
|
||||
|
||||
void cxpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, long Ncols);
|
||||
/* not in-place complex matrix transpose */
|
||||
/* INPUTS */
|
||||
/* *indata = input data array */
|
||||
/* iRsiz = offset to between rows of input data array */
|
||||
/* oRsiz = offset to between rows of output data array */
|
||||
/* Nrows = number of rows in input data array */
|
||||
/* Ncols = number of columns in input data array */
|
||||
/* OUTPUTS */
|
||||
/* *outdata = output data array */
|
||||
|
||||
void cvprod(float *a, float *b, float *out, long N);
|
||||
/* complex vector product, can be in-place */
|
||||
/* product of complex vector *a times complex vector *b */
|
||||
/* INPUTS */
|
||||
/* N vector length */
|
||||
/* *a complex vector length N complex numbers */
|
||||
/* *b complex vector length N complex numbers */
|
||||
/* OUTPUTS */
|
||||
/* *out complex vector length N */
|
||||
|
|
@ -216,3 +216,27 @@ double gauss(void)
|
|||
return glgset;
|
||||
}
|
||||
}
|
||||
|
||||
/* Polar form of the Box-Muller generator for Gaussian distributed
|
||||
random variates.
|
||||
Generator will be fed with two uniformly distributed random variates.
|
||||
Delivers two values per call
|
||||
*/
|
||||
|
||||
void rgauss(double* py1, double* py2)
|
||||
{
|
||||
double x1, x2, w;
|
||||
|
||||
do {
|
||||
x1 = 2.0 * CombLCGTaus() - 1.0;
|
||||
x2 = 2.0 * CombLCGTaus() - 1.0;
|
||||
w = x1 * x1 + x2 * x2;
|
||||
} while ( w >= 1.0 );
|
||||
|
||||
w = sqrt( (-2.0 * log( w ) ) / w );
|
||||
|
||||
*py1 = x1 * w;
|
||||
*py2 = x2 * w;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -21,8 +21,9 @@ IFparm VSRCpTable[] = { /* parameters */
|
|||
IOP ("pwl", VSRC_PWL, IF_REALVEC,"Piecewise linear description"),
|
||||
IOP ("sffm", VSRC_SFFM, IF_REALVEC,"Single freq. FM descripton"),
|
||||
IOP ("am", VSRC_AM, IF_REALVEC,"Amplitude modulation descripton"),
|
||||
IOP ("trnoise", VSRC_TRNOISE, IF_REALVEC,"Transient noise descripton"),
|
||||
|
||||
OPU ("pos_node",VSRC_POS_NODE, IF_INTEGER,"Positive node of source"),
|
||||
OPU ("pos_node",VSRC_POS_NODE, IF_INTEGER,"Positive node of source"),
|
||||
OPU ("neg_node",VSRC_NEG_NODE, IF_INTEGER,"Negative node of source"),
|
||||
OPU ("function",VSRC_FCN_TYPE, IF_INTEGER,"Function of the source"),
|
||||
OPU ("order", VSRC_FCN_ORDER, IF_INTEGER,"Order of the source function"),
|
||||
|
|
|
|||
|
|
@ -11,6 +11,10 @@ Author: 1985 Thomas L. Quarles
|
|||
#include "suffix.h"
|
||||
#include "missing_math.h"
|
||||
|
||||
extern int fftInit(long M);
|
||||
extern void fftFree(void);
|
||||
extern void rffts(float *data, long M, long Rows);
|
||||
|
||||
#define SAMETIME(a,b) (fabs((a)-(b))<= TIMETOL * PW)
|
||||
#define TIMETOL 1e-7
|
||||
|
||||
|
|
@ -74,6 +78,7 @@ VSRCaccept(CKTcircuit *ckt, GENmodel *inModel)
|
|||
/* offset time by delay */
|
||||
time = ckt->CKTtime - TD;
|
||||
tshift = TD;
|
||||
|
||||
#ifdef XSPICE
|
||||
/* normalize phase to 0 - 360° */
|
||||
/* normalize phase to cycles */
|
||||
|
|
@ -180,6 +185,52 @@ VSRCaccept(CKTcircuit *ckt, GENmodel *inModel)
|
|||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/**** tansient noise routines:
|
||||
VNoi2 2 0 DC 0 TRNOISE(10n 0.5n 0 0n) : generate gaussian distributed noise
|
||||
rms value, time step, 0 0
|
||||
VNoi1 1 0 DC 0 TRNOISE(0n 0.5n 1 10n) : generate 1/f noise
|
||||
0, time step, exponent < 2, rms value
|
||||
*/
|
||||
case TRNOISE: {
|
||||
double NA, NT, TS, time, basetime = 0.;
|
||||
|
||||
#define NSAMETIME(a,b) (fabs((a)-(b))<= NTIMETOL * TS)
|
||||
#define NTIMETOL 1e-7
|
||||
|
||||
NA = here->VSRCcoeffs[0]; // input is rms value
|
||||
NT = here->VSRCcoeffs[1]; // time step
|
||||
if (NT == 0.) // no further breakpoint if value not given
|
||||
break;
|
||||
// TS = NT > ckt->CKTstep ? NT : ckt->CKTstep;
|
||||
TS = NT;
|
||||
time = ckt->CKTtime;
|
||||
|
||||
if(time >= TS) {
|
||||
/* repeating signal - figure out where we are
|
||||
in period */
|
||||
basetime = TS * floor(time*1.000000000001/TS);
|
||||
// basetime = TS * floor(time/TS);
|
||||
// basetime = TS * here->VSRCncount;
|
||||
time -= basetime;
|
||||
}
|
||||
if(ckt->CKTbreak && NSAMETIME(time,0)) {
|
||||
/* set next breakpoint */
|
||||
// error = CKTsetBreak(ckt, TS * ((double)here->VSRCncount + 1.));
|
||||
error = CKTsetBreak(ckt, basetime + TS);
|
||||
if(error) return(error);
|
||||
}
|
||||
/* else if (ckt->CKTbreak && NSAMETIME(time,TS)) {
|
||||
// set next breakpoint
|
||||
error = CKTsetBreak(ckt, basetime + TS + TS);
|
||||
if(error) return(error);
|
||||
} */
|
||||
if (ckt->CKTtime == 0.) {
|
||||
// printf("VSRC: free fft tables\n");
|
||||
fftFree();
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
bkptset: ;
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ VSRCask(CKTcircuit *ckt, GENinstance *inst, int which, IFvalue *value, IFvalue *
|
|||
case VSRC_PWL:
|
||||
case VSRC_SFFM:
|
||||
case VSRC_AM:
|
||||
case VSRC_TRNOISE:
|
||||
case VSRC_FCN_COEFFS:
|
||||
temp = value->v.numValue = here->VSRCfunctionOrder;
|
||||
v = value->v.vec.rVec = TMALLOC(double, here->VSRCfunctionOrder);
|
||||
|
|
|
|||
|
|
@ -48,7 +48,16 @@ typedef struct sVSRCinstance {
|
|||
double VSRCdF2mag; /* distortion f2 magnitude */
|
||||
double VSRCdF1phase; /* distortion f1 phase */
|
||||
double VSRCdF2phase; /* distortion f2 phase */
|
||||
|
||||
|
||||
/*transient noise*/
|
||||
double VSRCprevTime; /*last time a new random value was issued*/
|
||||
double VSRCprevVal; /*last value issued at prevTime*/
|
||||
double VSRCnewVal; /*new value issued at prevTime*/
|
||||
double VSRCsecRand; /*second random value not yet used*/
|
||||
float *VSRConeof; /*pointer to array of 1 over f noise values */
|
||||
long int VSRCncount; /* counter to retrieve noise values */
|
||||
/*end of noise*/
|
||||
|
||||
double VSRCr; /* pwl repeat */
|
||||
double VSRCrdelay; /* pwl delay period */
|
||||
double *VSRCposIbrptr; /* pointer to sparse matrix element at
|
||||
|
|
@ -93,6 +102,7 @@ typedef struct sVSRCmodel {
|
|||
#define SFFM 4
|
||||
#define PWL 5
|
||||
#define AM 6
|
||||
#define TRNOISE 7
|
||||
#endif /*PULSE*/
|
||||
|
||||
/* device parameters */
|
||||
|
|
@ -121,6 +131,7 @@ typedef struct sVSRCmodel {
|
|||
#define VSRC_AM 22
|
||||
#define VSRC_R 23
|
||||
#define VSRC_TD 24
|
||||
#define VSRC_TRNOISE 25
|
||||
|
||||
/* model parameters */
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,15 @@ $Id$
|
|||
#include "trandefs.h"
|
||||
#include "sperror.h"
|
||||
#include "suffix.h"
|
||||
#undef WaGauss
|
||||
#ifdef FastRand
|
||||
#include "FastNorm3.h"
|
||||
#elif defined (WaGauss)
|
||||
#include "wallace.h"
|
||||
#else
|
||||
extern void rgauss(double* py1, double* py2);
|
||||
#endif
|
||||
#include "1-f-code.h"
|
||||
|
||||
#ifdef XSPICE_EXP
|
||||
/* gtri - begin - wbk - modify for supply ramping option */
|
||||
|
|
@ -27,7 +36,7 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
|
|||
VSRCmodel *model = (VSRCmodel *)inModel;
|
||||
VSRCinstance *here;
|
||||
double time;
|
||||
double value;
|
||||
double value = 0.0;
|
||||
|
||||
/* loop through all the voltage source models */
|
||||
for( ; model != NULL; model = model->VSRCnextModel ) {
|
||||
|
|
@ -35,7 +44,7 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
|
|||
/* loop through all the instances of the model */
|
||||
for (here = model->VSRCinstances; here != NULL ;
|
||||
here=here->VSRCnextInstance) {
|
||||
if (here->VSRCowner != ARCHme) continue;
|
||||
if (here->VSRCowner != ARCHme) continue;
|
||||
|
||||
*(here->VSRCposIbrptr) += 1.0 ;
|
||||
*(here->VSRCnegIbrptr) -= 1.0 ;
|
||||
|
|
@ -63,29 +72,29 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
|
|||
}
|
||||
|
||||
case PULSE: {
|
||||
double V1, V2, TD, TR, TF, PW, PER;
|
||||
double basetime = 0;
|
||||
double V1, V2, TD, TR, TF, PW, PER;
|
||||
double basetime = 0;
|
||||
#ifdef XSPICE
|
||||
double PHASE;
|
||||
double phase;
|
||||
double deltat;
|
||||
#endif
|
||||
V1 = here->VSRCcoeffs[0];
|
||||
V2 = here->VSRCcoeffs[1];
|
||||
TD = here->VSRCfunctionOrder > 2
|
||||
? here->VSRCcoeffs[2] : 0.0;
|
||||
TR = here->VSRCfunctionOrder > 3
|
||||
&& here->VSRCcoeffs[3] != 0.0
|
||||
? here->VSRCcoeffs[3] : ckt->CKTstep;
|
||||
TF = here->VSRCfunctionOrder > 4
|
||||
&& here->VSRCcoeffs[4] != 0.0
|
||||
? here->VSRCcoeffs[4] : ckt->CKTstep;
|
||||
PW = here->VSRCfunctionOrder > 5
|
||||
&& here->VSRCcoeffs[5] != 0.0
|
||||
? here->VSRCcoeffs[5] : ckt->CKTfinalTime;
|
||||
PER = here->VSRCfunctionOrder > 6
|
||||
&& here->VSRCcoeffs[6] != 0.0
|
||||
? here->VSRCcoeffs[6] : ckt->CKTfinalTime;
|
||||
V1 = here->VSRCcoeffs[0];
|
||||
V2 = here->VSRCcoeffs[1];
|
||||
TD = here->VSRCfunctionOrder > 2
|
||||
? here->VSRCcoeffs[2] : 0.0;
|
||||
TR = here->VSRCfunctionOrder > 3
|
||||
&& here->VSRCcoeffs[3] != 0.0
|
||||
? here->VSRCcoeffs[3] : ckt->CKTstep;
|
||||
TF = here->VSRCfunctionOrder > 4
|
||||
&& here->VSRCcoeffs[4] != 0.0
|
||||
? here->VSRCcoeffs[4] : ckt->CKTstep;
|
||||
PW = here->VSRCfunctionOrder > 5
|
||||
&& here->VSRCcoeffs[5] != 0.0
|
||||
? here->VSRCcoeffs[5] : ckt->CKTfinalTime;
|
||||
PER = here->VSRCfunctionOrder > 6
|
||||
&& here->VSRCcoeffs[6] != 0.0
|
||||
? here->VSRCcoeffs[6] : ckt->CKTfinalTime;
|
||||
|
||||
/* shift time by delay time TD */
|
||||
time -= TD;
|
||||
|
|
@ -126,25 +135,25 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
|
|||
|
||||
case SINE: {
|
||||
|
||||
double VO, VA, FREQ, TD, THETA;
|
||||
/* gtri - begin - wbk - add PHASE parameter */
|
||||
double VO, VA, FREQ, TD, THETA;
|
||||
/* gtri - begin - wbk - add PHASE parameter */
|
||||
#ifdef XSPICE
|
||||
double PHASE;
|
||||
double phase;
|
||||
double phase;
|
||||
|
||||
PHASE = here->VSRCfunctionOrder > 5
|
||||
? here->VSRCcoeffs[5] : 0.0;
|
||||
? here->VSRCcoeffs[5] : 0.0;
|
||||
|
||||
/* compute phase in radians */
|
||||
/* compute phase in radians */
|
||||
phase = PHASE * M_PI / 180.0;
|
||||
#endif
|
||||
VO = here->VSRCcoeffs[0];
|
||||
VA = here->VSRCcoeffs[1];
|
||||
VA = here->VSRCcoeffs[1];
|
||||
FREQ = here->VSRCfunctionOrder > 2
|
||||
&& here->VSRCcoeffs[2] != 0.0
|
||||
? here->VSRCcoeffs[2] : (1/ckt->CKTfinalTime);
|
||||
TD = here->VSRCfunctionOrder > 3
|
||||
? here->VSRCcoeffs[3] : 0.0;
|
||||
&& here->VSRCcoeffs[2] != 0.0
|
||||
? here->VSRCcoeffs[2] : (1/ckt->CKTfinalTime);
|
||||
TD = here->VSRCfunctionOrder > 3
|
||||
? here->VSRCcoeffs[3] : 0.0;
|
||||
THETA = here->VSRCfunctionOrder > 4
|
||||
? here->VSRCcoeffs[4] : 0.0;
|
||||
|
||||
|
|
@ -155,12 +164,12 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
|
|||
} else {
|
||||
|
||||
value = VO + VA * sin(FREQ*time * 2.0 * M_PI + phase) *
|
||||
exp(-time*THETA);
|
||||
exp(-time*THETA);
|
||||
#else
|
||||
value = VO;
|
||||
} else {
|
||||
value = VO + VA * sin(FREQ * time * 2.0 * M_PI) *
|
||||
exp(-(time*THETA));
|
||||
exp(-(time*THETA));
|
||||
#endif
|
||||
/* gtri - end - wbk - add PHASE parameter */
|
||||
}
|
||||
|
|
@ -168,24 +177,23 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
|
|||
break;
|
||||
|
||||
case EXP: {
|
||||
double V1, V2, TD1, TD2, TAU1, TAU2;
|
||||
double V1, V2, TD1, TD2, TAU1, TAU2;
|
||||
|
||||
V1 = here->VSRCcoeffs[0];
|
||||
V2 = here->VSRCcoeffs[1];
|
||||
TD1 = here->VSRCfunctionOrder > 2
|
||||
&& here->VSRCcoeffs[2] != 0.0
|
||||
? here->VSRCcoeffs[2] : ckt->CKTstep;
|
||||
TAU1 = here->VSRCfunctionOrder > 3
|
||||
&& here->VSRCcoeffs[3] != 0.0
|
||||
? here->VSRCcoeffs[3] : ckt->CKTstep;
|
||||
V2 = here->VSRCcoeffs[1];
|
||||
TD1 = here->VSRCfunctionOrder > 2
|
||||
&& here->VSRCcoeffs[2] != 0.0
|
||||
? here->VSRCcoeffs[2] : ckt->CKTstep;
|
||||
TAU1 = here->VSRCfunctionOrder > 3
|
||||
&& here->VSRCcoeffs[3] != 0.0
|
||||
? here->VSRCcoeffs[3] : ckt->CKTstep;
|
||||
TD2 = here->VSRCfunctionOrder > 4
|
||||
&& here->VSRCcoeffs[4] != 0.0
|
||||
? here->VSRCcoeffs[4] : TD1 + ckt->CKTstep;
|
||||
&& here->VSRCcoeffs[4] != 0.0
|
||||
? here->VSRCcoeffs[4] : TD1 + ckt->CKTstep;
|
||||
TAU2 = here->VSRCfunctionOrder > 5
|
||||
&& here->VSRCcoeffs[5]
|
||||
? here->VSRCcoeffs[5] : ckt->CKTstep;
|
||||
|
||||
|
||||
&& here->VSRCcoeffs[5]
|
||||
? here->VSRCcoeffs[5] : ckt->CKTstep;
|
||||
|
||||
if(time <= TD1) {
|
||||
value = V1;
|
||||
} else if (time <= TD2) {
|
||||
|
|
@ -199,7 +207,7 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
|
|||
|
||||
case SFFM:{
|
||||
|
||||
double VO, VA, FC, MDI, FS;
|
||||
double VO, VA, FC, MDI, FS;
|
||||
/* gtri - begin - wbk - add PHASE parameters */
|
||||
#ifdef XSPICE
|
||||
|
||||
|
|
@ -208,25 +216,24 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
|
|||
double phases;
|
||||
|
||||
PHASEC = here->VSRCfunctionOrder > 5
|
||||
? here->VSRCcoeffs[5] : 0.0;
|
||||
? here->VSRCcoeffs[5] : 0.0;
|
||||
PHASES = here->VSRCfunctionOrder > 6
|
||||
? here->VSRCcoeffs[6] : 0.0;
|
||||
? here->VSRCcoeffs[6] : 0.0;
|
||||
|
||||
/* compute phases in radians */
|
||||
phasec = PHASEC * M_PI / 180.0;
|
||||
phases = PHASES * M_PI / 180.0;
|
||||
|
||||
#endif
|
||||
VO = here->VSRCcoeffs[0];
|
||||
VA = here->VSRCcoeffs[1];
|
||||
FC = here->VSRCfunctionOrder > 2
|
||||
&& here->VSRCcoeffs[2]
|
||||
? here->VSRCcoeffs[2] : (1/ckt->CKTfinalTime);
|
||||
MDI = here->VSRCfunctionOrder > 3
|
||||
? here->VSRCcoeffs[3] : 0.0;
|
||||
FS = here->VSRCfunctionOrder > 4
|
||||
&& here->VSRCcoeffs[4]
|
||||
? here->VSRCcoeffs[4] : (1/ckt->CKTfinalTime);
|
||||
VO = here->VSRCcoeffs[0];
|
||||
VA = here->VSRCcoeffs[1];
|
||||
FC = here->VSRCfunctionOrder > 2
|
||||
&& here->VSRCcoeffs[2]
|
||||
? here->VSRCcoeffs[2] : (1/ckt->CKTfinalTime);
|
||||
MDI = here->VSRCfunctionOrder > 3
|
||||
? here->VSRCcoeffs[3] : 0.0;
|
||||
FS = here->VSRCfunctionOrder > 4
|
||||
&& here->VSRCcoeffs[4]
|
||||
? here->VSRCcoeffs[4] : (1/ckt->CKTfinalTime);
|
||||
#ifdef XSPICE
|
||||
/* compute waveform value */
|
||||
value = VO + VA *
|
||||
|
|
@ -242,10 +249,9 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
|
|||
break;
|
||||
case AM:{
|
||||
|
||||
double VA, FC, MF, VO, TD;
|
||||
double VA, FC, MF, VO, TD;
|
||||
/* gtri - begin - wbk - add PHASE parameters */
|
||||
#ifdef XSPICE
|
||||
|
||||
double PHASEC, PHASES;
|
||||
double phasec;
|
||||
double phases;
|
||||
|
|
@ -260,49 +266,40 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
|
|||
phases = PHASES * M_PI / 180.0;
|
||||
|
||||
#endif
|
||||
|
||||
VA = here->VSRCcoeffs[0];
|
||||
VO = here->VSRCcoeffs[1];
|
||||
MF = here->VSRCfunctionOrder > 2
|
||||
&& here->VSRCcoeffs[2]
|
||||
? here->VSRCcoeffs[2] : (1/ckt->CKTfinalTime);
|
||||
FC = here->VSRCfunctionOrder > 3
|
||||
? here->VSRCcoeffs[3] : 0.0;
|
||||
TD = here->VSRCfunctionOrder > 4
|
||||
&& here->VSRCcoeffs[4]
|
||||
? here->VSRCcoeffs[4] : 0.0;
|
||||
VA = here->VSRCcoeffs[0];
|
||||
VO = here->VSRCcoeffs[1];
|
||||
MF = here->VSRCfunctionOrder > 2
|
||||
&& here->VSRCcoeffs[2]
|
||||
? here->VSRCcoeffs[2] : (1/ckt->CKTfinalTime);
|
||||
FC = here->VSRCfunctionOrder > 3
|
||||
? here->VSRCcoeffs[3] : 0.0;
|
||||
TD = here->VSRCfunctionOrder > 4
|
||||
&& here->VSRCcoeffs[4]
|
||||
? here->VSRCcoeffs[4] : 0.0;
|
||||
|
||||
time -= TD;
|
||||
if (time <= 0) {
|
||||
value = 0;
|
||||
} else {
|
||||
#ifdef XSPICE
|
||||
/* compute waveform value */
|
||||
value = VA * (VO + sin(2.0 * M_PI * MF * time + phases )) *
|
||||
sin(2 * M_PI * FC * time + phases);
|
||||
/* compute waveform value */
|
||||
value = VA * (VO + sin(2.0 * M_PI * MF * time + phases )) *
|
||||
sin(2 * M_PI * FC * time + phases);
|
||||
|
||||
#else /* XSPICE */
|
||||
value = VA * (VO + sin(2.0 * M_PI * MF * time)) *
|
||||
sin(2 * M_PI * FC * time);
|
||||
value = VA * (VO + sin(2.0 * M_PI * MF * time)) *
|
||||
sin(2 * M_PI * FC * time);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* gtri - end - wbk - add PHASE parameters */
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case PWL: {
|
||||
int i = 0, num_repeat = 0, ii = 0;
|
||||
double foo, repeat_time = 0, end_time, breakpt_time, itime;
|
||||
|
||||
time -= here->VSRCrdelay;
|
||||
// if(time > PER) {
|
||||
/* repeating signal - figure out where we are */
|
||||
/* in period */
|
||||
// basetime = PER * floor(time/PER);
|
||||
// time -= basetime;
|
||||
// }
|
||||
|
||||
|
||||
|
||||
if(time < *(here->VSRCcoeffs)) {
|
||||
foo = *(here->VSRCcoeffs + 1) ;
|
||||
|
|
@ -310,35 +307,243 @@ VSRCload(GENmodel *inModel, CKTcircuit *ckt)
|
|||
goto loadDone;
|
||||
}
|
||||
|
||||
do {
|
||||
for(i=ii ; i<(here->VSRCfunctionOrder/2)-1; i++ ) {
|
||||
itime = *(here->VSRCcoeffs+2*i);
|
||||
if ( AlmostEqualUlps(itime+repeat_time, time, 3 )) {
|
||||
// if ( fabs( (*(here->VSRCcoeffs+2*i)+repeat_time) - time ) < 1e-20 ) {
|
||||
foo = *(here->VSRCcoeffs+2*i+1);
|
||||
value = foo;
|
||||
goto loadDone;
|
||||
} else if ( (*(here->VSRCcoeffs+2*i)+repeat_time < time) && (*(here->VSRCcoeffs+2*(i+1))+repeat_time > time) ) {
|
||||
foo = *(here->VSRCcoeffs+2*i+1) + (((time-(*(here->VSRCcoeffs+2*i)+repeat_time))/
|
||||
(*(here->VSRCcoeffs+2*(i+1)) - *(here->VSRCcoeffs+2*i))) *
|
||||
do {
|
||||
for(i=ii ; i<(here->VSRCfunctionOrder/2)-1; i++ ) {
|
||||
itime = *(here->VSRCcoeffs+2*i);
|
||||
if ( AlmostEqualUlps(itime+repeat_time, time, 3 )) {
|
||||
foo = *(here->VSRCcoeffs+2*i+1);
|
||||
value = foo;
|
||||
goto loadDone;
|
||||
} else if ( (*(here->VSRCcoeffs+2*i)+repeat_time < time)
|
||||
&& (*(here->VSRCcoeffs+2*(i+1))+repeat_time > time) ) {
|
||||
foo = *(here->VSRCcoeffs+2*i+1) + (((time-(*(here->VSRCcoeffs+2*i)+repeat_time))/
|
||||
(*(here->VSRCcoeffs+2*(i+1)) - *(here->VSRCcoeffs+2*i))) *
|
||||
(*(here->VSRCcoeffs+2*i+3) - *(here->VSRCcoeffs+2*i+1)));
|
||||
value = foo;
|
||||
goto loadDone;
|
||||
}
|
||||
}
|
||||
foo = *(here->VSRCcoeffs+ here->VSRCfunctionOrder-1) ;
|
||||
value = foo;
|
||||
value = foo;
|
||||
goto loadDone;
|
||||
}
|
||||
}
|
||||
foo = *(here->VSRCcoeffs+ here->VSRCfunctionOrder-1) ;
|
||||
value = foo;
|
||||
|
||||
if ( !here->VSRCrGiven ) goto loadDone;
|
||||
if ( !here->VSRCrGiven ) goto loadDone;
|
||||
|
||||
end_time = *(here->VSRCcoeffs + here->VSRCfunctionOrder-2);
|
||||
breakpt_time = *(here->VSRCcoeffs + here->VSRCrBreakpt);
|
||||
repeat_time = end_time + (end_time - breakpt_time)*num_repeat++ - breakpt_time;
|
||||
ii = here->VSRCrBreakpt/2;
|
||||
} while ( here->VSRCrGiven );
|
||||
end_time = *(here->VSRCcoeffs + here->VSRCfunctionOrder-2);
|
||||
breakpt_time = *(here->VSRCcoeffs + here->VSRCrBreakpt);
|
||||
repeat_time = end_time + (end_time - breakpt_time)*num_repeat++ - breakpt_time;
|
||||
ii = here->VSRCrBreakpt/2;
|
||||
} while ( here->VSRCrGiven );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**** tansient noise routines:
|
||||
VNoi2 2 0 DC 0 TRNOISE(10n 0.5n 0 0n) : generate gaussian distributed noise
|
||||
rms value, time step, 0 0
|
||||
VNoi1 1 0 DC 0 TRNOISE(0n 0.5n 1 10n) : generate 1/f noise
|
||||
0, time step, exponent < 2, rms value
|
||||
*/
|
||||
case TRNOISE: {
|
||||
/* Generate voltage point every TS with amplitude NA * ra,
|
||||
where ra is drawn from a random number generator with
|
||||
gaussian distribution with mean 0 and standard deviation 1
|
||||
*/
|
||||
|
||||
//#define PRVAL
|
||||
// typedef int bool;
|
||||
|
||||
double newval=0.0, lastval=0.0, lasttime=0.0;
|
||||
double NA, NT, TS;
|
||||
double V1, V2, basetime = 0.;
|
||||
double scalef, ra1, ra2;
|
||||
float NALPHA, NAMP;
|
||||
|
||||
long int nosteps, newsteps = 1, newexp = 0;
|
||||
|
||||
bool aof = FALSE;
|
||||
|
||||
NA = here->VSRCcoeffs[0]; // input is rms value
|
||||
NT = here->VSRCcoeffs[1]; // time step
|
||||
|
||||
scalef = NA;
|
||||
// scalef = NA*1.32;
|
||||
|
||||
NALPHA = here->VSRCfunctionOrder > 2
|
||||
? (float)here->VSRCcoeffs[2] : 0.0f;
|
||||
NAMP = here->VSRCfunctionOrder > 3
|
||||
&& here->VSRCcoeffs[3] != 0.0
|
||||
&& here->VSRCcoeffs[2] != 0.0
|
||||
? (float)here->VSRCcoeffs[3] : 0.0f;
|
||||
|
||||
if ((NT == 0.) || ((NA == 0.) && (NAMP == 0.))) {
|
||||
value = here->VSRCdcValue;
|
||||
goto noiDone;
|
||||
}
|
||||
else
|
||||
TS = NT; /* time step for noise */
|
||||
|
||||
if ((NALPHA > 0.0) && (NAMP > 0.0)) aof = TRUE;
|
||||
|
||||
lasttime = here->VSRCprevTime;
|
||||
lastval = here->VSRCprevVal;
|
||||
newval = here->VSRCnewVal;
|
||||
/* set all data: DC, white, 1of */
|
||||
if (time <= 0 /*ckt->CKTstep*/) {
|
||||
/* data are already set */
|
||||
if ((here->VSRCprevVal != 0) || (here->VSRCnewVal != 0)) {
|
||||
value = here->VSRCprevVal;
|
||||
goto noiDone;
|
||||
}
|
||||
lasttime = 0.0;
|
||||
here->VSRCsecRand = 2.; /* > 1, invalid number out of the random number range */
|
||||
/* get two random samples */
|
||||
#ifdef FastRand
|
||||
// use FastNorm3
|
||||
here->VSRCprevVal = scalef * GaussWa;
|
||||
here->VSRCnewVal = scalef * GaussWa;
|
||||
#elif defined (WaGauss)
|
||||
// use WallaceHV
|
||||
here->VSRCprevVal = scalef * GaussWa;
|
||||
here->VSRCnewVal = scalef * GaussWa;
|
||||
#else
|
||||
// make use of two random variables per call to rgauss()
|
||||
rgauss(&ra1, &ra2);
|
||||
here->VSRCprevVal = scalef * ra1;
|
||||
// choose to set start value to 0
|
||||
here->VSRCprevVal = 0;
|
||||
here->VSRCnewVal = scalef * ra2;
|
||||
#endif
|
||||
/* generate 1 over f noise at time 0 */
|
||||
if (aof) {
|
||||
if (here->VSRCncount==0) {
|
||||
// add 10 steps for start up sequence
|
||||
nosteps = (long)((ckt->CKTfinalTime)/TS) + 10;
|
||||
// generate number of steps as power of 2
|
||||
while(newsteps < nosteps) {
|
||||
newsteps <<= 1;
|
||||
newexp++;
|
||||
}
|
||||
here->VSRConeof = TMALLOC(float, newsteps); //(float *)tmalloc(sizeof(float) * newsteps);
|
||||
#ifdef PRVAL
|
||||
printf("ALPHA: %f, GAIN: %e\n", NALPHA, NAMP);
|
||||
#endif
|
||||
f_alpha(newsteps, newexp, here->VSRConeof, NAMP, NALPHA);
|
||||
#ifdef PRVAL
|
||||
printf("Noi1: %e, Noi2: %e\n", here->VSRConeof[10], here->VSRConeof[100]);
|
||||
#endif
|
||||
here->VSRCprevVal += here->VSRConeof[here->VSRCncount];
|
||||
here->VSRCncount++;
|
||||
here->VSRCnewVal += here->VSRConeof[here->VSRCncount];
|
||||
here->VSRCncount++;
|
||||
value = newval;
|
||||
// add DC
|
||||
here->VSRCprevVal += here->VSRCdcValue;
|
||||
here->VSRCnewVal += here->VSRCdcValue;
|
||||
value = here->VSRCprevVal;
|
||||
#ifdef PRVAL
|
||||
printf("start1, time: %e, outp: %e, rnd: %e\n", time, newval, testval);
|
||||
#endif
|
||||
} else { // here->VSRCncount > 0
|
||||
// add DC
|
||||
here->VSRCprevVal += here->VSRCdcValue;
|
||||
here->VSRCnewVal += here->VSRCdcValue;
|
||||
value = here->VSRCprevVal;
|
||||
#ifdef PRVAL
|
||||
printf("start2, time: %e, outp: %e, rnd: %e\n", time, here->VSRCprevVal, testval);
|
||||
#endif
|
||||
}
|
||||
#ifdef PRVAL
|
||||
printf("time 0 value: %e for %s\n", here->VSRCprevVal, here->VSRCname);
|
||||
#endif
|
||||
goto loadDone;
|
||||
} //aof
|
||||
// add DC
|
||||
here->VSRCprevVal += here->VSRCdcValue;
|
||||
here->VSRCnewVal += here->VSRCdcValue;
|
||||
value = here->VSRCprevVal;
|
||||
here->VSRCprevTime = 0.;
|
||||
goto loadDone;
|
||||
} // time < 0
|
||||
|
||||
V1 = here->VSRCprevVal;
|
||||
V2 = here->VSRCnewVal;
|
||||
if (here->VSRCprevTime == ckt->CKTtime) {
|
||||
value = here->VSRCprevVal;
|
||||
goto noiDone;
|
||||
}
|
||||
|
||||
if (time > 0 && time < TS) {
|
||||
value = V1 + (V2 - V1) * (time) / TS;
|
||||
}
|
||||
else if (time >= TS) {
|
||||
/* repeating signal - figure out where we are in period */
|
||||
/* numerical correction to avoid basetime less than
|
||||
next step, e.g. 4.99999999999999995 delivers a floor
|
||||
of 4 instead of 5 */
|
||||
basetime = TS * floor(time*1.000000000001/TS);
|
||||
time -= basetime;
|
||||
|
||||
#define NSAMETIME(a,b) (fabs((a)-(b))<= NTIMETOL * TS)
|
||||
#define NTIMETOL 1e-7
|
||||
|
||||
if NSAMETIME(time,0.) {
|
||||
|
||||
/* get new random number */
|
||||
#ifdef FastRand
|
||||
// use FastNorm3
|
||||
newval = scalef * FastNorm;
|
||||
#elif defined (WaGauss)
|
||||
// use WallaceHV
|
||||
newval = scalef * GaussWa;
|
||||
#else
|
||||
// make use of two random variables per call to rgauss()
|
||||
if (here->VSRCsecRand == 2.0) {
|
||||
rgauss(&ra1, &ra2);
|
||||
newval = scalef * ra1;
|
||||
here->VSRCsecRand = scalef * ra2;
|
||||
}
|
||||
else {
|
||||
newval = here->VSRCsecRand;
|
||||
here->VSRCsecRand = 2.0;
|
||||
}
|
||||
#endif
|
||||
V1 = here->VSRCprevVal = here->VSRCnewVal;
|
||||
V2 = newval; // scale factor t.b.d.
|
||||
if(here->VSRCdcGiven) V2 += here->VSRCdcValue;
|
||||
if (aof) {
|
||||
V2 += here->VSRConeof[here->VSRCncount];
|
||||
#ifdef PRVAL
|
||||
printf("aof: %d\n", here->VSRCncount);
|
||||
#endif
|
||||
}
|
||||
here->VSRCncount++;
|
||||
value = V1;
|
||||
here->VSRCnewVal = V2;
|
||||
} else if (time > 0 && time < TS) {
|
||||
value = V1 + (V2 - V1) * (time) / TS;
|
||||
#ifdef PRVAL
|
||||
printf("if1, time: %e, outp: %e, rnd: %e\n", ckt->CKTtime,
|
||||
V1 + (V2 - V1) * (time) / TS, V2);
|
||||
#endif
|
||||
} else { /* time > TS should be never reached */
|
||||
value = V1 + (V2 - V1) * (time-TS) / TS;
|
||||
#ifdef PRVAL
|
||||
printf("if2, time: %e, outp: %e, rnd: %e\n", ckt->CKTtime,
|
||||
V1 + (V2 - V1) * (time-TS) / TS, V2);
|
||||
#endif
|
||||
}
|
||||
here->VSRCprevTime = ckt->CKTtime;
|
||||
}
|
||||
noiDone:
|
||||
if (time >=ckt->CKTfinalTime) {
|
||||
/* free the 1of memory */
|
||||
if (here->VSRConeof) tfree(here->VSRConeof);
|
||||
/* reset the 1of counter */
|
||||
here->VSRCncount = 0;
|
||||
}
|
||||
goto loadDone;
|
||||
} // case
|
||||
break;
|
||||
} // switch
|
||||
}
|
||||
loadDone:
|
||||
/* gtri - begin - wbk - modify for supply ramping option */
|
||||
|
|
@ -346,11 +551,12 @@ loadDone:
|
|||
value *= ckt->CKTsrcFact;
|
||||
value *= cm_analog_ramp_factor();
|
||||
#else
|
||||
if (ckt->CKTmode & MODETRANOP) value *= ckt->CKTsrcFact;
|
||||
*(ckt->CKTrhs + (here->VSRCbranch)) += value;
|
||||
if (ckt->CKTmode & MODETRANOP) value *= ckt->CKTsrcFact;
|
||||
/* load the new voltage value into the matrix */
|
||||
*(ckt->CKTrhs + (here->VSRCbranch)) += value;
|
||||
#endif
|
||||
/* gtri - end - wbk - modify to process srcFact, etc. for all sources */
|
||||
}
|
||||
}
|
||||
} // for loop instances
|
||||
} // for loop models
|
||||
return(OK);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -169,6 +169,13 @@ VSRCparam(int param, IFvalue *value, GENinstance *inst, IFvalue *select)
|
|||
return(E_BADPARM);
|
||||
}
|
||||
break;
|
||||
case VSRC_TRNOISE:
|
||||
here->VSRCfunctionType = TRNOISE;
|
||||
here->VSRCfuncTGiven = TRUE;
|
||||
here->VSRCcoeffs = value->v.vec.rVec;
|
||||
here->VSRCfunctionOrder = value->v.numValue;
|
||||
here->VSRCcoeffsGiven = TRUE;
|
||||
break;
|
||||
default:
|
||||
return(E_BADPARM);
|
||||
}
|
||||
|
|
|
|||
17882
visualc/vngspice.vcproj
17882
visualc/vngspice.vcproj
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue