ngspice/src/frontend/trannoise/wallace.c

544 lines
15 KiB
C

/* Wallace generator for normally distributed random variates
Copyright: Holger Vogt, 2008
*/
//#define FASTNORM_ORIG
#ifdef HasMain
#include <sys/timeb.h>
#else
#ifndef NOSPICE
#include "ngspice/ngspice.h"
#endif
#endif
#ifdef _MSC_VER
#include <process.h>
#define getpid _getpid
#else
#include <unistd.h>
#endif
#include <math.h>
#include "ngspice/wallace.h"
#include "ngspice/FastNorm3.h"
#define POOLSIZE 4096
#define LPOOLSIZE 12
#define NOTRANS 3 /* number of (dual) transformations */
#define VE 10
#define VL (1 << VE)
#define VM (VL-1)
#define WL (4*VL)
#define WM (WL-1)
double *outgauss; /* output vector for user access */
unsigned int variate_used; /* actual index of variate called by user */
double ScaleGauss;
static double *pool1;
static double *pool2;
static unsigned int *addrif, *addrib;
static unsigned n = POOLSIZE;
static double chi1, chi2; /* chi^2 correction values */
static unsigned int newpools;
extern double drand(void);
extern unsigned int CombLCGTausInt(void);
extern void TausSeed(void);
extern unsigned int CombLCGTausInt2(void);
void
PolarGauss(double* py1, double* py2)
{
double x1, x2, w;
do {
x1 = drand();
x2 = drand();
w = x1 * x1 + x2 * x2;
} while ((w > 1.0) || (w < 0.25));
w = sqrt((-2.0 * log(w)) / w);
*py1 = (double)(x1 * w);
*py2 = (double)(x2 * w);
}
static void
destroy_wallace(void)
{
tfree(pool1);
tfree(pool2);
tfree(addrif);
tfree(addrib);
}
void
initw(void)
{
unsigned i;
double totsqr, nomsqr;
unsigned long int coa, cob, s;
/* initialize the uniform generator */
srand((unsigned int) getpid());
// srand(17);
TausSeed();
ScaleGauss = 1.;
newpools = 1;
/* set up the two pools */
pool1 = TMALLOC(double, n);
pool2 = TMALLOC(double, n);
addrif = TMALLOC(unsigned int, (n + NOTRANS));
addrib = TMALLOC(unsigned int, (n + NOTRANS));
atexit(destroy_wallace);
/* fill the first pool with normally distributed values */
PolarGauss(&pool1[0], &pool1[1]);
for (i = 1; i < n>>1; i++)
PolarGauss(&pool1[i<<1], &pool1[(i<<1) + 1]);
/* normalize pool content */
/* totsqr = totsum = 0.0;
* for (i = 0; i < n; i++) {
* totsqr += pool1[i] * pool1[i];
* totsum += pool1[i];
* }
* totsum = totsum/n;
* for (i = 0; i < n; i++) {
* totsqr += (pool1[i] - totsum) * (pool1[i] - totsum);
* }
* nomsqr = sqrt(n / totsqr);
* for (i = 0; i < n; i++)
* pool1[i] = (pool1[i] - totsum) * nomsqr;
*/
totsqr = 0.0;
for (i = 0; i < n; i++)
totsqr += pool1[i] * pool1[i];
nomsqr = sqrt(n / totsqr);
for (i = 0; i < n; i++)
pool1[i] *= nomsqr;
/* calculate ch^2 value */
chi1 = sqrt(sqrt(1.0 - 1.0/n));
chi2 = sqrt(1.0 - chi1*chi1);
/* first scaling, based on unused pool1[n-2] */
ScaleGauss = chi1 + chi2 * ScaleGauss * pool1[n-2];
/* access to first pool */
outgauss = pool1;
/* set data counter, we return n-2 values here */
variate_used = n - 2;
/* generate random reading addresses using a LCG */
s = 0;
coa = 241;
cob = 59;
for (i = 0; i < (n + NOTRANS); i++) {
// addrif[i] = s = (s * coa + cob) % (n);
coa = CombLCGTausInt();
addrif[i] = coa >> (32 - LPOOLSIZE);
// printf ("Random add:\t%ld\n" , s);
}
s = 0;
coa = 193;
cob = 15;
for (i = 0; i < (n + NOTRANS); i++) {
// addrib[i] = s = (s * coa + cob) % (n);
coa = CombLCGTausInt();
addrib[i] = coa >> (32 - LPOOLSIZE);
// printf ("Random add:\t%ld\n" , addrib[i]);
}
// printf("norm for orig. Gauss: %e, chi^2 scale: %e\n", nomsqr, ScaleGauss);
// NewWa();
}
/* original FastNorm3.c code */
#ifdef FASTNORM_ORIG
float
NewWa()
{
int i, j, k, m;
float p, q, r, s, t;
int topv[6], ord[4], *top;
float *ppt[4], *ptn;
float nulval, endval;
float totsqr, nomsqr;
nulval = ScaleGauss * pool1[0];
endval = pool1[n-1];
/* Choose 4 random start points in the wk1[] vector
I want them all different. */
top = topv + 1;
/* Set limiting values in top[-1], top[4] */
top[-1] = VL; top[4] = 0;
reran1:
m = CombLCGTausInt(); /* positive 32-bit random */
/* Extract two VE-sized randoms from m, which has 31 useable digits */
m = m >> (31 - 2*VE);
top[0] = m & VM; m = m >> VE; top[1] = m & VM;
m = CombLCGTausInt(); /* positive 32-bit random */
/* Extract two VE-sized randoms from m, which has 31 useable digits */
m = m >> (31 - 2*VE);
top[2] = m & VM; m = m >> VE; top[3] = m & VM;
for (i = 0; i < 4; i++)
ord[i] = i;
/* Sort in decreasing size */
for (i = 2; i >= 0; i--)
for (j = 0; j <= i; j++)
if (top[j] < top[j+1]) {
SWAP(int, top[j], top[j+1]);
SWAP(int, ord[j], ord[j+1]);
}
/* Ensure all different */
for (i = 0; i < 3; i++)
if (top[i] == top[i+1])
goto reran1;
/* Set pt pointers to their start values for the first chunk. */
for (i = 0; i < 4; i++) {
j = ord[i];
ppt[j] = pool2 + j * VL + top[i];
}
/* Set ptn to point into wk1 */
ptn = pool1;
/* Now ready to do five chunks. The length of chunk i is
top[i-1] - top[i] (I hope)
At the end of chunk i, pointer ord[i] should have reached the end
of its part, and need to be wrapped down to the start of its part.
*/
i = 0;
chunk:
j = top[i] - top[i-1]; /* Minus the chunk length */
for (; j < 0; j++) {
p = *ptn++; s = *ptn++; q = *ptn++; r = *ptn++;
t = (p + q + r + s) * 0.5;
*ppt[0]++ = t - p;
*ppt[1]++ = t - q;
*ppt[2]++ = r - t;
*ppt[3]++ = s - t;
}
/* This should end the chunk. See if all done */
if (i == 4)
goto passdone;
/* The pointer for part ord[i] should have passed its end */
j = ord[i];
#ifdef dddd
printf ("Chunk %1d done. Ptr %1d now %4d\n", i, j, ppt[j]-pool2);
#endif
ppt[j] -= VL;
i++;
goto chunk;
passdone:
/* wk1[] values have been transformed and placed in wk2[]
Transform from wk2 to wk1 with a simple shuffle */
m = (CombLCGTausInt2() >> (29 - VE)) & WM;
j = 0;
for (i = 0; i < 4; i++)
ppt[i] = pool1 + i * VL;
for (i = 0; i < VL; i++) {
p = pool2[j^m]; j++;
s = pool2[j^m]; j++;
q = pool2[j^m]; j++;
r = pool2[j^m]; j++;
t = (p + q + r + s) * 0.5;
*ppt[0]++ = t - p;
*ppt[1]++ = q - t;
*ppt[2]++ = t - r;
*ppt[3]++ = s - t;
}
/* renormalize again if number of pools beyond limit */
if (!(newpools & 0xFFFF)) {
totsqr = 0.0;
for (i = 0; i < n; i++)
totsqr += pool1[i] * pool1[i];
nomsqr = sqrt(n / totsqr);
for (i = 0; i < n; i++)
pool1[i] *= nomsqr;
}
outgauss = pool1;
/* reset data counter */
variate_used = n - 1;
/* set counter counting nomber of pools made */
newpools++;
/* new scale factor using ch^2 correction,
using pool1[n-1] from last pool */
ScaleGauss = chi1 + chi2 * ScaleGauss * endval;
// printf("Pool number: %d, chi^2 scale: %e\n", newpools, ScaleGauss);
return nulval; /* use old scale */
}
#else
/* Simplified code according to an algorithm published by C. S. Wallace:
"Fast Pseudorandom Generators for Normal and Exponential Variates",
ACM Transactions on Mathmatical Software, Vol. 22, No. 1, March 1996, pp. 119-127.
Transform pool1 to pool2 and back to pool1 NOTRANS times
by orthogonal 4 x 4 Hadamard-Matrix.
Mixing of values is very important: Any value in the pool should contribute to
every value in the new pools, at least after several passes (number of passes
is set by NOTRANS to 2 or 3).
4 values are read in a continuous sequence from the total of POOLSIZE values.
Values are stored in steps modulo POOLSIZE/4.
During backward transformation the values are shuffled by a random number jj.
*/
double
NewWa(void)
{
double nulval, endval;
double bl1, bl2, bl3, bl4; /* the four values to be transformed */
double bsum;
double totsqr, nomsqr;
unsigned int i, j, jj, m, mm, mmm;
nulval = ScaleGauss * pool1[0];
endval = pool1[n-1];
m = n >> 2;
// printf("New pool after next value\n");
/* generate new pool by transformation
Transformation is repeated NOTRANS times */
for (i = 0; i < NOTRANS; i++) {
mm = m << 1;
mmm = mm + m;
/* forward transformation */
// for (j = 0; j < n; j += 4) {
for (j = 0; j < m; j++) {
bl1 = pool1[j];
bl2 = pool1[j+m];
bl3 = pool1[j+mm];
bl4 = pool1[j+mmm];
/* Hadamard-Matrix */
bsum = (bl1 + bl2 + bl3 + bl4) * 0.5f;
jj = j<<2;
pool2[jj] = bl1 - bsum;
pool2[jj+1] = bl2 - bsum;
pool2[jj+2] = bsum - bl3;
pool2[jj+3] = bsum - bl4;
}
/* backward transformation */
jj = (CombLCGTausInt2() >> (31 - LPOOLSIZE)) & (n - 1);
for (j = 0; j < m; j++) {
bl1 = pool2[j^jj];
bl2 = pool2[(j+m)^jj];
bl3 = pool2[(j+mm)^jj];
bl4 = pool2[(j+mmm)^jj];
/* Hadamard-Matrix */
bsum = (bl1 + bl2 + bl3 + bl4) * 0.5f;
jj = j<<2;
pool1[jj] = bl1 - bsum;
pool1[jj+1] = bl2 - bsum;
pool1[jj+2] = bsum - bl3;
pool1[jj+3] = bsum - bl4;
}
}
/* renormalize again if number of pools beyond limit */
if (!(newpools & 0xFFFF)) {
totsqr = 0.0;
for (i = 0; i < n; i++)
totsqr += pool1[i] * pool1[i];
nomsqr = sqrt(n / totsqr);
for (i = 0; i < n; i++)
pool1[i] *= nomsqr;
}
outgauss = pool1;
/* reset data counter */
variate_used = n - 1;
/* set counter counting nomber of pools made */
newpools++;
/* new scale factor using ch^2 correction,
using pool1[n-1] from previous pool */
ScaleGauss = chi1 + chi2 * ScaleGauss * endval;
// printf("Pool number: %d, chi^2 scale: %e\n", newpools, ScaleGauss);
return nulval; /* use old scale */
// return pool1[0]; /* use new scale */
}
#endif
#ifdef FASTNORMTEST
float
NewWa_not(void)
{
float nulval, endval;
float bl1, bl2, bl3, bl4; /* the four values to be transformed */
float bsum;
float totsqr, nomsqr;
unsigned int i, j, jj;
nulval = ScaleGauss * pool1[0];
endval = pool1[n-1];
// printf("New pool after next value\n");
/* generate new pool by transformation
Transformation is repeated NOTRANS times */
for (i = 0; i < NOTRANS; i++) {
/* forward transformation */
for (j = 0; j < n; j += 4) {
jj = j + i;
bl1 = pool1[addrif[jj]];
bl2 = pool1[addrif[jj+1]];
bl3 = pool1[addrif[jj+2]];
bl4 = pool1[addrif[jj+3]];
/* s = (s*coa + cob) & (n - 1);
bl1 = pool1[s];
s = (s*coa + cob) & (n - 1);
bl2 = pool1[s + 1];
s = (s*coa + cob) & (n - 1);
bl3 = pool1[s + 2];
s = (s*coa + cob) & (n - 1);
bl4 = pool1[s + 3]; */
/* jj = j + i;
bl1 = pool1[addrif[jj]];
bl2 = pool1[addrif[jj+1]];
bl3 = pool1[addrif[jj+2]];
bl4 = pool1[addrif[jj+3]]; */
/* bl1 = pool1[j];
bl2 = pool1[j+1];
bl3 = pool1[j+2];
bl4 = pool1[j+3]; */
/* Hadamard-Matrix */
bsum = (bl1 + bl2 + bl3 + bl4) * 0.5;
/* pool2[j] = bl1 - bsum;
pool2[j+1] = bl2 - bsum;
pool2[j+2] = bsum - bl3;
pool2[j+3] = bsum - bl4; */
pool2[addrib[jj]] = bl1 - bsum;
pool2[addrib[jj+1]] = bl2 - bsum;
pool2[addrib[jj+2]] = bsum - bl3;
pool2[addrib[jj+3]] = bsum - bl4;
}
/* backward transformation */
for (j = 0; j < n; j += 4) {
bl1 = pool2[j];
bl2 = pool2[j+1];
bl3 = pool2[j+2];
bl4 = pool2[j+3];
/* bl1 = pool2[addrib[j]];
bl2 = pool2[addrib[j+1]];
bl3 = pool2[addrib[j+2]];
bl4 = pool2[addrib[j+3]]; */
/* Hadamard-Matrix */
bsum = (bl1 + bl2 + bl3 + bl4) * 0.5;
pool1[j] = bl1 - bsum;
pool1[j+1] = bl2 - bsum;
pool1[j+2] = bsum - bl3;
pool1[j+3] = bsum - bl4;
}
}
/* renormalize again if number of pools beyond limit */
if (!(newpools & 0xFFFF)) {
totsqr = 0.0;
for (i = 0; i < n; i++)
totsqr += pool1[i] * pool1[i];
nomsqr = sqrt(n / totsqr);
for (i = 0; i < n; i++)
pool1[i] *= nomsqr;
}
outgauss = pool1;
/* reset data counter */
variate_used = n - 1;
/* set counter counting nomber of pools made */
newpools++;
/* new scale factor using ch^2 correction,
using pool1[n-1] from last pool */
ScaleGauss = chi1 + chi2 * ScaleGauss * endval;
// printf("Pool number: %d, chi^2 scale: %e\n", newpools, ScaleGauss);
return nulval; /* use old scale */
// return pool1[0]; /* use new scale */
}
#endif
/* --------------------- (test) main ------------------------- */
/* gcc -Wall -g -DHasMain -I../../include wallace.c CombTaus.o -o watest.exe */
#ifdef HasMain
#include "ngspice/wallace.h"
struct timeb timenow;
struct timeb timebegin;
int sec, msec;
void
timediff(struct timeb *now, struct timeb *begin, int *sec, int *msec)
{
*msec = now->millitm - begin->millitm;
*sec = now->time - begin->time;
if (*msec < 0) {
*msec += 1000;
(*sec)--;
}
}
int
main()
{
float x;
unsigned int i;
long int count;
initw();
ftime(&timebegin);
count = 100000000;
for (i = 0; i < count; i++) {
x = GaussWa;
// printf("%d\t%f\n", i, x);
}
ftime(&timenow);
timediff(&timenow, &timebegin, &sec, &msec);
printf("WallaceHV: %ld normal variates: %f s\n", count, sec + (float) msec / 1000.0);
initnorm(0, 0);
initnorm(77, 3);
ftime(&timebegin);
count = 100000000;
for (i = 0; i < count; i++) {
x = FastNorm;
// printf("%d\t%f\n", i, x);
}
ftime(&timenow);
timediff(&timenow, &timebegin, &sec, &msec);
printf("FastNorm3: %ld normal variates: %f s\n", count, sec + (float) msec / 1000.0);
return (1);
}
#endif