544 lines
15 KiB
C
544 lines
15 KiB
C
/* Wallace generator for normally distributed random variates
|
|
Copyright: Holger Vogt, 2008
|
|
*/
|
|
|
|
//#define FASTNORM_ORIG
|
|
|
|
#ifdef HasMain
|
|
#include <sys/timeb.h>
|
|
#else
|
|
#ifndef NOSPICE
|
|
#include "ngspice/ngspice.h"
|
|
#endif
|
|
#endif
|
|
#ifdef _MSC_VER
|
|
#include <process.h>
|
|
#define getpid _getpid
|
|
#else
|
|
#include <unistd.h>
|
|
#endif
|
|
#include <math.h>
|
|
#include "ngspice/wallace.h"
|
|
#include "ngspice/FastNorm3.h"
|
|
|
|
#define POOLSIZE 4096
|
|
#define LPOOLSIZE 12
|
|
#define NOTRANS 3 /* number of (dual) transformations */
|
|
|
|
#define VE 10
|
|
#define VL (1 << VE)
|
|
#define VM (VL-1)
|
|
#define WL (4*VL)
|
|
#define WM (WL-1)
|
|
|
|
double *outgauss; /* output vector for user access */
|
|
unsigned int variate_used; /* actual index of variate called by user */
|
|
double ScaleGauss;
|
|
|
|
static double *pool1;
|
|
static double *pool2;
|
|
static unsigned int *addrif, *addrib;
|
|
static unsigned n = POOLSIZE;
|
|
static double chi1, chi2; /* chi^2 correction values */
|
|
static unsigned int newpools;
|
|
|
|
extern double drand(void);
|
|
extern unsigned int CombLCGTausInt(void);
|
|
extern void TausSeed(void);
|
|
extern unsigned int CombLCGTausInt2(void);
|
|
|
|
|
|
void
|
|
PolarGauss(double* py1, double* py2)
|
|
{
|
|
double x1, x2, w;
|
|
|
|
do {
|
|
x1 = drand();
|
|
x2 = drand();
|
|
w = x1 * x1 + x2 * x2;
|
|
} while ((w > 1.0) || (w < 0.25));
|
|
|
|
w = sqrt((-2.0 * log(w)) / w);
|
|
|
|
*py1 = (double)(x1 * w);
|
|
*py2 = (double)(x2 * w);
|
|
}
|
|
|
|
|
|
static void
|
|
destroy_wallace(void)
|
|
{
|
|
tfree(pool1);
|
|
tfree(pool2);
|
|
tfree(addrif);
|
|
tfree(addrib);
|
|
}
|
|
|
|
|
|
void
|
|
initw(void)
|
|
{
|
|
unsigned i;
|
|
double totsqr, nomsqr;
|
|
unsigned long int coa, cob, s;
|
|
|
|
/* initialize the uniform generator */
|
|
srand((unsigned int) getpid());
|
|
// srand(17);
|
|
TausSeed();
|
|
|
|
ScaleGauss = 1.;
|
|
newpools = 1;
|
|
|
|
/* set up the two pools */
|
|
pool1 = TMALLOC(double, n);
|
|
pool2 = TMALLOC(double, n);
|
|
addrif = TMALLOC(unsigned int, (n + NOTRANS));
|
|
addrib = TMALLOC(unsigned int, (n + NOTRANS));
|
|
|
|
atexit(destroy_wallace);
|
|
|
|
/* fill the first pool with normally distributed values */
|
|
PolarGauss(&pool1[0], &pool1[1]);
|
|
for (i = 1; i < n>>1; i++)
|
|
PolarGauss(&pool1[i<<1], &pool1[(i<<1) + 1]);
|
|
|
|
/* normalize pool content */
|
|
/* totsqr = totsum = 0.0;
|
|
* for (i = 0; i < n; i++) {
|
|
* totsqr += pool1[i] * pool1[i];
|
|
* totsum += pool1[i];
|
|
* }
|
|
* totsum = totsum/n;
|
|
* for (i = 0; i < n; i++) {
|
|
* totsqr += (pool1[i] - totsum) * (pool1[i] - totsum);
|
|
* }
|
|
* nomsqr = sqrt(n / totsqr);
|
|
* for (i = 0; i < n; i++)
|
|
* pool1[i] = (pool1[i] - totsum) * nomsqr;
|
|
*/
|
|
totsqr = 0.0;
|
|
for (i = 0; i < n; i++)
|
|
totsqr += pool1[i] * pool1[i];
|
|
nomsqr = sqrt(n / totsqr);
|
|
for (i = 0; i < n; i++)
|
|
pool1[i] *= nomsqr;
|
|
|
|
/* calculate ch^2 value */
|
|
chi1 = sqrt(sqrt(1.0 - 1.0/n));
|
|
chi2 = sqrt(1.0 - chi1*chi1);
|
|
|
|
/* first scaling, based on unused pool1[n-2] */
|
|
ScaleGauss = chi1 + chi2 * ScaleGauss * pool1[n-2];
|
|
/* access to first pool */
|
|
outgauss = pool1;
|
|
/* set data counter, we return n-2 values here */
|
|
variate_used = n - 2;
|
|
|
|
/* generate random reading addresses using a LCG */
|
|
s = 0;
|
|
coa = 241;
|
|
cob = 59;
|
|
for (i = 0; i < (n + NOTRANS); i++) {
|
|
// addrif[i] = s = (s * coa + cob) % (n);
|
|
coa = CombLCGTausInt();
|
|
addrif[i] = coa >> (32 - LPOOLSIZE);
|
|
// printf ("Random add:\t%ld\n" , s);
|
|
}
|
|
s = 0;
|
|
coa = 193;
|
|
cob = 15;
|
|
for (i = 0; i < (n + NOTRANS); i++) {
|
|
// addrib[i] = s = (s * coa + cob) % (n);
|
|
coa = CombLCGTausInt();
|
|
addrib[i] = coa >> (32 - LPOOLSIZE);
|
|
// printf ("Random add:\t%ld\n" , addrib[i]);
|
|
}
|
|
|
|
// printf("norm for orig. Gauss: %e, chi^2 scale: %e\n", nomsqr, ScaleGauss);
|
|
// NewWa();
|
|
}
|
|
|
|
|
|
/* original FastNorm3.c code */
|
|
#ifdef FASTNORM_ORIG
|
|
float
|
|
NewWa()
|
|
{
|
|
int i, j, k, m;
|
|
float p, q, r, s, t;
|
|
int topv[6], ord[4], *top;
|
|
float *ppt[4], *ptn;
|
|
|
|
float nulval, endval;
|
|
float totsqr, nomsqr;
|
|
nulval = ScaleGauss * pool1[0];
|
|
endval = pool1[n-1];
|
|
|
|
/* Choose 4 random start points in the wk1[] vector
|
|
I want them all different. */
|
|
|
|
top = topv + 1;
|
|
/* Set limiting values in top[-1], top[4] */
|
|
top[-1] = VL; top[4] = 0;
|
|
reran1:
|
|
m = CombLCGTausInt(); /* positive 32-bit random */
|
|
/* Extract two VE-sized randoms from m, which has 31 useable digits */
|
|
m = m >> (31 - 2*VE);
|
|
top[0] = m & VM; m = m >> VE; top[1] = m & VM;
|
|
m = CombLCGTausInt(); /* positive 32-bit random */
|
|
/* Extract two VE-sized randoms from m, which has 31 useable digits */
|
|
m = m >> (31 - 2*VE);
|
|
top[2] = m & VM; m = m >> VE; top[3] = m & VM;
|
|
for (i = 0; i < 4; i++)
|
|
ord[i] = i;
|
|
/* Sort in decreasing size */
|
|
for (i = 2; i >= 0; i--)
|
|
for (j = 0; j <= i; j++)
|
|
if (top[j] < top[j+1]) {
|
|
SWAP(int, top[j], top[j+1]);
|
|
SWAP(int, ord[j], ord[j+1]);
|
|
}
|
|
|
|
/* Ensure all different */
|
|
for (i = 0; i < 3; i++)
|
|
if (top[i] == top[i+1])
|
|
goto reran1;
|
|
|
|
/* Set pt pointers to their start values for the first chunk. */
|
|
for (i = 0; i < 4; i++) {
|
|
j = ord[i];
|
|
ppt[j] = pool2 + j * VL + top[i];
|
|
}
|
|
|
|
/* Set ptn to point into wk1 */
|
|
ptn = pool1;
|
|
|
|
/* Now ready to do five chunks. The length of chunk i is
|
|
top[i-1] - top[i] (I hope)
|
|
At the end of chunk i, pointer ord[i] should have reached the end
|
|
of its part, and need to be wrapped down to the start of its part.
|
|
*/
|
|
i = 0;
|
|
|
|
chunk:
|
|
j = top[i] - top[i-1]; /* Minus the chunk length */
|
|
for (; j < 0; j++) {
|
|
p = *ptn++; s = *ptn++; q = *ptn++; r = *ptn++;
|
|
t = (p + q + r + s) * 0.5;
|
|
*ppt[0]++ = t - p;
|
|
*ppt[1]++ = t - q;
|
|
*ppt[2]++ = r - t;
|
|
*ppt[3]++ = s - t;
|
|
}
|
|
/* This should end the chunk. See if all done */
|
|
if (i == 4)
|
|
goto passdone;
|
|
|
|
/* The pointer for part ord[i] should have passed its end */
|
|
j = ord[i];
|
|
#ifdef dddd
|
|
printf ("Chunk %1d done. Ptr %1d now %4d\n", i, j, ppt[j]-pool2);
|
|
#endif
|
|
ppt[j] -= VL;
|
|
i++;
|
|
goto chunk;
|
|
|
|
passdone:
|
|
/* wk1[] values have been transformed and placed in wk2[]
|
|
Transform from wk2 to wk1 with a simple shuffle */
|
|
m = (CombLCGTausInt2() >> (29 - VE)) & WM;
|
|
j = 0;
|
|
for (i = 0; i < 4; i++)
|
|
ppt[i] = pool1 + i * VL;
|
|
for (i = 0; i < VL; i++) {
|
|
p = pool2[j^m]; j++;
|
|
s = pool2[j^m]; j++;
|
|
q = pool2[j^m]; j++;
|
|
r = pool2[j^m]; j++;
|
|
t = (p + q + r + s) * 0.5;
|
|
*ppt[0]++ = t - p;
|
|
*ppt[1]++ = q - t;
|
|
*ppt[2]++ = t - r;
|
|
*ppt[3]++ = s - t;
|
|
}
|
|
|
|
/* renormalize again if number of pools beyond limit */
|
|
if (!(newpools & 0xFFFF)) {
|
|
totsqr = 0.0;
|
|
for (i = 0; i < n; i++)
|
|
totsqr += pool1[i] * pool1[i];
|
|
nomsqr = sqrt(n / totsqr);
|
|
for (i = 0; i < n; i++)
|
|
pool1[i] *= nomsqr;
|
|
}
|
|
|
|
outgauss = pool1;
|
|
/* reset data counter */
|
|
variate_used = n - 1;
|
|
|
|
/* set counter counting nomber of pools made */
|
|
newpools++;
|
|
|
|
/* new scale factor using ch^2 correction,
|
|
using pool1[n-1] from last pool */
|
|
ScaleGauss = chi1 + chi2 * ScaleGauss * endval;
|
|
|
|
// printf("Pool number: %d, chi^2 scale: %e\n", newpools, ScaleGauss);
|
|
|
|
return nulval; /* use old scale */
|
|
}
|
|
|
|
#else
|
|
|
|
/* Simplified code according to an algorithm published by C. S. Wallace:
|
|
"Fast Pseudorandom Generators for Normal and Exponential Variates",
|
|
ACM Transactions on Mathmatical Software, Vol. 22, No. 1, March 1996, pp. 119-127.
|
|
Transform pool1 to pool2 and back to pool1 NOTRANS times
|
|
by orthogonal 4 x 4 Hadamard-Matrix.
|
|
Mixing of values is very important: Any value in the pool should contribute to
|
|
every value in the new pools, at least after several passes (number of passes
|
|
is set by NOTRANS to 2 or 3).
|
|
4 values are read in a continuous sequence from the total of POOLSIZE values.
|
|
Values are stored in steps modulo POOLSIZE/4.
|
|
During backward transformation the values are shuffled by a random number jj.
|
|
*/
|
|
|
|
double
|
|
NewWa(void)
|
|
{
|
|
double nulval, endval;
|
|
double bl1, bl2, bl3, bl4; /* the four values to be transformed */
|
|
double bsum;
|
|
double totsqr, nomsqr;
|
|
unsigned int i, j, jj, m, mm, mmm;
|
|
|
|
nulval = ScaleGauss * pool1[0];
|
|
endval = pool1[n-1];
|
|
m = n >> 2;
|
|
// printf("New pool after next value\n");
|
|
|
|
/* generate new pool by transformation
|
|
Transformation is repeated NOTRANS times */
|
|
for (i = 0; i < NOTRANS; i++) {
|
|
mm = m << 1;
|
|
mmm = mm + m;
|
|
/* forward transformation */
|
|
// for (j = 0; j < n; j += 4) {
|
|
for (j = 0; j < m; j++) {
|
|
bl1 = pool1[j];
|
|
bl2 = pool1[j+m];
|
|
bl3 = pool1[j+mm];
|
|
bl4 = pool1[j+mmm];
|
|
/* Hadamard-Matrix */
|
|
bsum = (bl1 + bl2 + bl3 + bl4) * 0.5f;
|
|
jj = j<<2;
|
|
pool2[jj] = bl1 - bsum;
|
|
pool2[jj+1] = bl2 - bsum;
|
|
pool2[jj+2] = bsum - bl3;
|
|
pool2[jj+3] = bsum - bl4;
|
|
}
|
|
/* backward transformation */
|
|
jj = (CombLCGTausInt2() >> (31 - LPOOLSIZE)) & (n - 1);
|
|
for (j = 0; j < m; j++) {
|
|
bl1 = pool2[j^jj];
|
|
bl2 = pool2[(j+m)^jj];
|
|
bl3 = pool2[(j+mm)^jj];
|
|
bl4 = pool2[(j+mmm)^jj];
|
|
/* Hadamard-Matrix */
|
|
bsum = (bl1 + bl2 + bl3 + bl4) * 0.5f;
|
|
jj = j<<2;
|
|
pool1[jj] = bl1 - bsum;
|
|
pool1[jj+1] = bl2 - bsum;
|
|
pool1[jj+2] = bsum - bl3;
|
|
pool1[jj+3] = bsum - bl4;
|
|
}
|
|
}
|
|
|
|
/* renormalize again if number of pools beyond limit */
|
|
if (!(newpools & 0xFFFF)) {
|
|
totsqr = 0.0;
|
|
for (i = 0; i < n; i++)
|
|
totsqr += pool1[i] * pool1[i];
|
|
nomsqr = sqrt(n / totsqr);
|
|
for (i = 0; i < n; i++)
|
|
pool1[i] *= nomsqr;
|
|
}
|
|
|
|
outgauss = pool1;
|
|
/* reset data counter */
|
|
variate_used = n - 1;
|
|
|
|
/* set counter counting nomber of pools made */
|
|
newpools++;
|
|
|
|
/* new scale factor using ch^2 correction,
|
|
using pool1[n-1] from previous pool */
|
|
ScaleGauss = chi1 + chi2 * ScaleGauss * endval;
|
|
|
|
// printf("Pool number: %d, chi^2 scale: %e\n", newpools, ScaleGauss);
|
|
|
|
return nulval; /* use old scale */
|
|
// return pool1[0]; /* use new scale */
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
#ifdef FASTNORMTEST
|
|
float
|
|
NewWa_not(void)
|
|
{
|
|
float nulval, endval;
|
|
float bl1, bl2, bl3, bl4; /* the four values to be transformed */
|
|
float bsum;
|
|
float totsqr, nomsqr;
|
|
unsigned int i, j, jj;
|
|
nulval = ScaleGauss * pool1[0];
|
|
endval = pool1[n-1];
|
|
|
|
// printf("New pool after next value\n");
|
|
|
|
/* generate new pool by transformation
|
|
Transformation is repeated NOTRANS times */
|
|
for (i = 0; i < NOTRANS; i++) {
|
|
|
|
/* forward transformation */
|
|
for (j = 0; j < n; j += 4) {
|
|
jj = j + i;
|
|
bl1 = pool1[addrif[jj]];
|
|
bl2 = pool1[addrif[jj+1]];
|
|
bl3 = pool1[addrif[jj+2]];
|
|
bl4 = pool1[addrif[jj+3]];
|
|
/* s = (s*coa + cob) & (n - 1);
|
|
bl1 = pool1[s];
|
|
s = (s*coa + cob) & (n - 1);
|
|
bl2 = pool1[s + 1];
|
|
s = (s*coa + cob) & (n - 1);
|
|
bl3 = pool1[s + 2];
|
|
s = (s*coa + cob) & (n - 1);
|
|
bl4 = pool1[s + 3]; */
|
|
/* jj = j + i;
|
|
bl1 = pool1[addrif[jj]];
|
|
bl2 = pool1[addrif[jj+1]];
|
|
bl3 = pool1[addrif[jj+2]];
|
|
bl4 = pool1[addrif[jj+3]]; */
|
|
/* bl1 = pool1[j];
|
|
bl2 = pool1[j+1];
|
|
bl3 = pool1[j+2];
|
|
bl4 = pool1[j+3]; */
|
|
/* Hadamard-Matrix */
|
|
bsum = (bl1 + bl2 + bl3 + bl4) * 0.5;
|
|
/* pool2[j] = bl1 - bsum;
|
|
pool2[j+1] = bl2 - bsum;
|
|
pool2[j+2] = bsum - bl3;
|
|
pool2[j+3] = bsum - bl4; */
|
|
pool2[addrib[jj]] = bl1 - bsum;
|
|
pool2[addrib[jj+1]] = bl2 - bsum;
|
|
pool2[addrib[jj+2]] = bsum - bl3;
|
|
pool2[addrib[jj+3]] = bsum - bl4;
|
|
}
|
|
/* backward transformation */
|
|
for (j = 0; j < n; j += 4) {
|
|
bl1 = pool2[j];
|
|
bl2 = pool2[j+1];
|
|
bl3 = pool2[j+2];
|
|
bl4 = pool2[j+3];
|
|
/* bl1 = pool2[addrib[j]];
|
|
bl2 = pool2[addrib[j+1]];
|
|
bl3 = pool2[addrib[j+2]];
|
|
bl4 = pool2[addrib[j+3]]; */
|
|
/* Hadamard-Matrix */
|
|
bsum = (bl1 + bl2 + bl3 + bl4) * 0.5;
|
|
pool1[j] = bl1 - bsum;
|
|
pool1[j+1] = bl2 - bsum;
|
|
pool1[j+2] = bsum - bl3;
|
|
pool1[j+3] = bsum - bl4;
|
|
}
|
|
}
|
|
|
|
/* renormalize again if number of pools beyond limit */
|
|
if (!(newpools & 0xFFFF)) {
|
|
totsqr = 0.0;
|
|
for (i = 0; i < n; i++)
|
|
totsqr += pool1[i] * pool1[i];
|
|
nomsqr = sqrt(n / totsqr);
|
|
for (i = 0; i < n; i++)
|
|
pool1[i] *= nomsqr;
|
|
}
|
|
|
|
outgauss = pool1;
|
|
/* reset data counter */
|
|
variate_used = n - 1;
|
|
|
|
/* set counter counting nomber of pools made */
|
|
newpools++;
|
|
|
|
/* new scale factor using ch^2 correction,
|
|
using pool1[n-1] from last pool */
|
|
ScaleGauss = chi1 + chi2 * ScaleGauss * endval;
|
|
|
|
// printf("Pool number: %d, chi^2 scale: %e\n", newpools, ScaleGauss);
|
|
|
|
return nulval; /* use old scale */
|
|
// return pool1[0]; /* use new scale */
|
|
}
|
|
#endif
|
|
|
|
/* --------------------- (test) main ------------------------- */
|
|
/* gcc -Wall -g -DHasMain -I../../include wallace.c CombTaus.o -o watest.exe */
|
|
#ifdef HasMain
|
|
#include "ngspice/wallace.h"
|
|
|
|
struct timeb timenow;
|
|
struct timeb timebegin;
|
|
int sec, msec;
|
|
|
|
void
|
|
timediff(struct timeb *now, struct timeb *begin, int *sec, int *msec)
|
|
{
|
|
|
|
*msec = now->millitm - begin->millitm;
|
|
*sec = now->time - begin->time;
|
|
if (*msec < 0) {
|
|
*msec += 1000;
|
|
(*sec)--;
|
|
}
|
|
}
|
|
|
|
|
|
int
|
|
main()
|
|
{
|
|
float x;
|
|
unsigned int i;
|
|
long int count;
|
|
|
|
initw();
|
|
ftime(&timebegin);
|
|
count = 100000000;
|
|
for (i = 0; i < count; i++) {
|
|
x = GaussWa;
|
|
// printf("%d\t%f\n", i, x);
|
|
}
|
|
ftime(&timenow);
|
|
timediff(&timenow, &timebegin, &sec, &msec);
|
|
printf("WallaceHV: %ld normal variates: %f s\n", count, sec + (float) msec / 1000.0);
|
|
|
|
initnorm(0, 0);
|
|
initnorm(77, 3);
|
|
ftime(&timebegin);
|
|
count = 100000000;
|
|
for (i = 0; i < count; i++) {
|
|
x = FastNorm;
|
|
// printf("%d\t%f\n", i, x);
|
|
}
|
|
ftime(&timenow);
|
|
timediff(&timenow, &timebegin, &sec, &msec);
|
|
printf("FastNorm3: %ld normal variates: %f s\n", count, sec + (float) msec / 1000.0);
|
|
|
|
return (1);
|
|
}
|
|
#endif
|