diff --git a/ChangeLog b/ChangeLog index fb7b32de0..1dd2c4ade 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2011-08-08 Holger Vogt + * src/frontend/com_fft.c, src/maths/fft/fftext.c, fftext.h, + fftlib.c, fftlib.h, matlib.c, matlib.h: convert float to double + * winmain.c: increase text output buffer + 2011-08-08 Robert Larice * src/xspice/cm/cmutil.c : ngspice.h always must be the first included file diff --git a/src/frontend/com_fft.c b/src/frontend/com_fft.c index 8f6a82248..3dae15d54 100644 --- a/src/frontend/com_fft.c +++ b/src/frontend/com_fft.c @@ -257,14 +257,14 @@ com_psd(wordlist *wl) double **tdvec; double *freq, *win, *time, *ave; double delta_t, span, noipower; - int ngood, mm; - unsigned long fpts, i, j, tlen, jj, smooth, hsmooth; + int mm; + unsigned long size, ngood, fpts, i, j, tlen, jj, smooth, hsmooth; char *s; struct dvec *f, *vlist, *lv = NULL, *vec; struct pnode *names, *first_name; - float *reald, *imagd; - int size, sign, isreal; + double *reald, *imagd; + int sign, isreal; double scaling, sum; int order; double scale, sigma; @@ -403,7 +403,7 @@ com_psd(wordlist *wl) vec = ft_evaluate(names); names = names->pn_next; while (vec) { - if (vec->v_length != tlen) { + if (vec->v_length != (int)tlen) { fprintf(cp_err, "Error: lengths of %s vectors don't match: %d, %d\n", vec->v_name, vec->v_length, tlen); vec = vec->v_link2; @@ -428,7 +428,7 @@ com_psd(wordlist *wl) ngood++; } } - free_pnode(first_name); + free_pnode_o(first_name); if (!ngood) { return; } @@ -440,7 +440,7 @@ com_psd(wordlist *wl) plot_cur->pl_name = copy("PSD"); plot_cur->pl_date = copy(datestring( )); - freq = (double *) tmalloc(fpts * sizeof(double)); + freq = TMALLOC(double, fpts); f = alloc(struct dvec); ZERO(f, struct dvec); f->v_name = copy("frequency"); @@ -474,14 +474,14 @@ com_psd(wordlist *wl) sign = 1; isreal = 1; - reald = TMALLOC(float, size); - imagd = TMALLOC(float, size); + reald = TMALLOC(double, size); + imagd = TMALLOC(double, size); // scale = 0.66; for (i = 0; i27 ***/ -if ((M >= 0) && (M < 8*sizeof(long))){ - theError = 0; - if (UtblArray[M] == 0){ // have we not inited this size fft yet? - // init cos table - UtblArray[M] = (float *) malloc( (POW2(M)/4+1)*sizeof(float) ); - if (UtblArray[M] == 0) - theError = 2; - else{ - fftCosInit(M, UtblArray[M]); - } - if (M > 1){ - if (BRLowArray[M/2] == 0){ // init bit reversed table for cmplx fft - BRLowArray[M/2] = (short *) malloc( POW2(M/2-1)*sizeof(short) ); - if (BRLowArray[M/2] == 0) - theError = 2; - else{ - fftBRInit(M, BRLowArray[M/2]); - } - } - } - if (M > 2){ - if (BRLowArray[(M-1)/2] == 0){ // init bit reversed table for real fft - BRLowArray[(M-1)/2] = (short *) malloc( POW2((M-1)/2-1)*sizeof(short) ); - if (BRLowArray[(M-1)/2] == 0) - theError = 2; - else{ - fftBRInit(M-1, BRLowArray[(M-1)/2]); - } - } - } - } -}; -return theError; + int theError = 1; + /*** I did NOT test cases with M>27 ***/ + if ((M >= 0) && (M < 8*sizeof(long))) { + theError = 0; + if (UtblArray[M] == 0) { // have we not inited this size fft yet? + // init cos table + UtblArray[M] = (double *) malloc( (POW2(M)/4+1)*sizeof(double) ); + if (UtblArray[M] == 0) + theError = 2; + else { + fftCosInit(M, UtblArray[M]); + } + if (M > 1) { + if (BRLowArray[M/2] == 0) { // init bit reversed table for cmplx fft + BRLowArray[M/2] = (short *) malloc( POW2(M/2-1)*sizeof(short) ); + if (BRLowArray[M/2] == 0) + theError = 2; + else { + fftBRInit(M, BRLowArray[M/2]); + } + } + } + if (M > 2) { + if (BRLowArray[(M-1)/2] == 0) { // init bit reversed table for real fft + BRLowArray[(M-1)/2] = (short *) malloc( POW2((M-1)/2-1)*sizeof(short) ); + if (BRLowArray[(M-1)/2] == 0) + theError = 2; + else { + fftBRInit(M-1, BRLowArray[(M-1)/2]); + } + } + } + } + }; + return theError; } -void fftFree(void){ +void fftFree(void) +{ // release storage for all private cosine and bit reversed tables -long i1; -for (i1=8*sizeof(long)/2-1; i1>=0; i1--){ - if (BRLowArray[i1] != 0){ - free(BRLowArray[i1]); - BRLowArray[i1] = 0; - }; -}; -for (i1=8*sizeof(long)-1; i1>=0; i1--){ - if (UtblArray[i1] != 0){ - free(UtblArray[i1]); - UtblArray[i1] = 0; - }; -}; + long i1; + for (i1=8*sizeof(long)/2-1; i1>=0; i1--) { + if (BRLowArray[i1] != 0) { + free(BRLowArray[i1]); + BRLowArray[i1] = 0; + }; + }; + for (i1=8*sizeof(long)-1; i1>=0; i1--) { + if (UtblArray[i1] != 0) { + free(UtblArray[i1]); + UtblArray[i1] = 0; + }; + }; } /************************************************* @@ -84,73 +87,77 @@ for (i1=8*sizeof(long)-1; i1>=0; i1--){ Just make sure fftlib has been called for each M first. **************************************************/ -void ffts(float *data, long M, long Rows){ -/* Compute in-place complex fft on the rows of the input array */ -/* INPUTS */ -/* *ioptr = input data array */ -/* M = log2 of fft size (ex M=10 for 1024 point fft) */ -/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */ -/* OUTPUTS */ -/* *ioptr = output data array */ - ffts1(data, M, Rows, UtblArray[M], BRLowArray[M/2]); +void ffts(double *data, long M, long Rows) +{ + /* Compute in-place complex fft on the rows of the input array */ + /* INPUTS */ + /* *ioptr = input data array */ + /* M = log2 of fft size (ex M=10 for 1024 point fft) */ + /* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */ + /* OUTPUTS */ + /* *ioptr = output data array */ + ffts1(data, M, Rows, UtblArray[M], BRLowArray[M/2]); } -void iffts(float *data, long M, long Rows){ -/* Compute in-place inverse complex fft on the rows of the input array */ -/* INPUTS */ -/* *ioptr = input data array */ -/* M = log2 of fft size (ex M=10 for 1024 point fft) */ -/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */ -/* OUTPUTS */ -/* *ioptr = output data array */ - iffts1(data, M, Rows, UtblArray[M], BRLowArray[M/2]); +void iffts(double *data, long M, long Rows) +{ + /* Compute in-place inverse complex fft on the rows of the input array */ + /* INPUTS */ + /* *ioptr = input data array */ + /* M = log2 of fft size (ex M=10 for 1024 point fft) */ + /* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */ + /* OUTPUTS */ + /* *ioptr = output data array */ + iffts1(data, M, Rows, UtblArray[M], BRLowArray[M/2]); } -void rffts(float *data, long M, long Rows){ -/* Compute in-place real fft on the rows of the input array */ -/* The result is the complex spectra of the positive frequencies */ -/* except the location for the first complex number contains the real */ -/* values for DC and Nyquest */ -/* See rspectprod for multiplying two of these spectra together- ex. for fast convolution */ -/* INPUTS */ -/* *ioptr = real input data array */ -/* M = log2 of fft size (ex M=10 for 1024 point fft) */ -/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */ -/* OUTPUTS */ -/* *ioptr = output data array in the following order */ -/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */ - rffts1(data, M, Rows, UtblArray[M], BRLowArray[(M-1)/2]); +void rffts(double *data, long M, long Rows) +{ + /* Compute in-place real fft on the rows of the input array */ + /* The result is the complex spectra of the positive frequencies */ + /* except the location for the first complex number contains the real */ + /* values for DC and Nyquest */ + /* See rspectprod for multiplying two of these spectra together- ex. for fast convolution */ + /* INPUTS */ + /* *ioptr = real input data array */ + /* M = log2 of fft size (ex M=10 for 1024 point fft) */ + /* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */ + /* OUTPUTS */ + /* *ioptr = output data array in the following order */ + /* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */ + rffts1(data, M, Rows, UtblArray[M], BRLowArray[(M-1)/2]); } -void riffts(float *data, long M, long Rows){ -/* Compute in-place real ifft on the rows of the input array */ -/* data order as from rffts */ -/* INPUTS */ -/* *ioptr = input data array in the following order */ -/* M = log2 of fft size (ex M=10 for 1024 point fft) */ -/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */ -/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */ -/* OUTPUTS */ -/* *ioptr = real output data array */ - riffts1(data, M, Rows, UtblArray[M], BRLowArray[(M-1)/2]); +void riffts(double *data, long M, long Rows) +{ + /* Compute in-place real ifft on the rows of the input array */ + /* data order as from rffts */ + /* INPUTS */ + /* *ioptr = input data array in the following order */ + /* M = log2 of fft size (ex M=10 for 1024 point fft) */ + /* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */ + /* Rows = number of rows in ioptr array (use 1 for Rows for a single fft) */ + /* OUTPUTS */ + /* *ioptr = real output data array */ + riffts1(data, M, Rows, UtblArray[M], BRLowArray[(M-1)/2]); } -void rspectprod(float *data1, float *data2, float *outdata, long N){ +void rspectprod(double *data1, double *data2, double *outdata, long N) +{ // When multiplying a pair of spectra from rfft care must be taken to multiply the // two real values seperately from the complex ones. This routine does it correctly. // the result can be stored in-place over one of the inputs -/* INPUTS */ -/* *data1 = input data array first spectra */ -/* *data2 = input data array second spectra */ -/* N = fft input size for both data1 and data2 */ -/* OUTPUTS */ -/* *outdata = output data array spectra */ -if(N>1){ - outdata[0] = data1[0] * data2[0]; // multiply the zero freq values - outdata[1] = data1[1] * data2[1]; // multiply the nyquest freq values - cvprod(data1 + 2, data2 + 2, outdata + 2, N/2-1); // multiply the other positive freq values -} -else{ - outdata[0] = data1[0] * data2[0]; -} + /* INPUTS */ + /* *data1 = input data array first spectra */ + /* *data2 = input data array second spectra */ + /* N = fft input size for both data1 and data2 */ + /* OUTPUTS */ + /* *outdata = output data array spectra */ + if(N>1) { + outdata[0] = data1[0] * data2[0]; // multiply the zero freq values + outdata[1] = data1[1] * data2[1]; // multiply the nyquest freq values + cvprod(data1 + 2, data2 + 2, outdata + 2, N/2-1); // multiply the other positive freq values + } else { + outdata[0] = data1[0] * data2[0]; + } } diff --git a/src/maths/fft/fftext.h b/src/maths/fft/fftext.h index 1db007464..b17f60969 100644 --- a/src/maths/fft/fftext.h +++ b/src/maths/fft/fftext.h @@ -19,7 +19,7 @@ int fftInit(long M); void fftFree(void); // release storage for all private cosine and bit reversed tables -void ffts(float *data, long M, long Rows); +void ffts(double *data, long M, long Rows); /* Compute in-place complex fft on the rows of the input array */ /* INPUTS */ /* *ioptr = input data array */ @@ -28,7 +28,7 @@ void ffts(float *data, long M, long Rows); /* OUTPUTS */ /* *ioptr = output data array */ -void iffts(float *data, long M, long Rows); +void iffts(double *data, long M, long Rows); /* Compute in-place inverse complex fft on the rows of the input array */ /* INPUTS */ /* *ioptr = input data array */ @@ -37,7 +37,7 @@ void iffts(float *data, long M, long Rows); /* OUTPUTS */ /* *ioptr = output data array */ -void rffts(float *data, long M, long Rows); +void rffts(double *data, long M, long Rows); /* Compute in-place real fft on the rows of the input array */ /* The result is the complex spectra of the positive frequencies */ /* except the location for the first complex number contains the real */ @@ -51,7 +51,7 @@ void rffts(float *data, long M, long Rows); /* *ioptr = output data array in the following order */ /* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */ -void riffts(float *data, long M, long Rows); +void riffts(double *data, long M, long Rows); /* Compute in-place real ifft on the rows of the input array */ /* data order as from rffts */ /* INPUTS */ @@ -62,7 +62,7 @@ void riffts(float *data, long M, long Rows); /* OUTPUTS */ /* *ioptr = real output data array */ -void rspectprod(float *data1, float *data2, float *outdata, long N); +void rspectprod(double *data1, double *data2, double *outdata, long N); // When multiplying a pair of spectra from rfft care must be taken to multiply the // two real values seperately from the complex ones. This routine does it correctly. // the result can be stored in-place over one of the inputs @@ -80,10 +80,10 @@ void rspectprod(float *data1, float *data2, float *outdata, long N); //Note that most of the fft routines require full matrices, ie Rsiz==Ncols //This is how I like to define a real matrix: //struct matrix { // real matrix -// float *d; // pointer to data +// double *d; // pointer to data // long Nrows; // number of rows in the matrix // long Ncols; // number of columns in the matrix (can be less than Rsiz) -// long Rsiz; // number of floats from one row to the next +// long Rsiz; // number of doubles from one row to the next //}; //typedef struct matrix matrix; diff --git a/src/maths/fft/fftlib.c b/src/maths/fft/fftlib.c index 66b36ae76..e71549466 100644 --- a/src/maths/fft/fftlib.c +++ b/src/maths/fft/fftlib.c @@ -3,7 +3,7 @@ lower level fft stuff including routines called in fftext.c and fft2d.c *******************************************************************/ #include "fftlib.h" #include -#define MCACHE (11-(sizeof(float)/8)) // fft's with M bigger than this bust primary cache +#define MCACHE (11-(sizeof(double)/8)) // fft's with M bigger than this bust primary cache // some math constants to 40 decimal places #define MYPI 3.141592653589793238462643383279502884197 // pi @@ -20,3156 +20,3188 @@ lower level fft stuff including routines called in fftext.c and fft2d.c routines to initialize tables used by fft routines **************************************************/ -void fftCosInit(long M, float *Utbl){ -/* Compute Utbl, the cosine table for ffts */ -/* of size (pow(2,M)/4 +1) */ -/* INPUTS */ -/* M = log2 of fft size */ -/* OUTPUTS */ -/* *Utbl = cosine table */ -unsigned long fftN = POW2(M); -unsigned long i1; - Utbl[0] = 1.0; - for (i1 = 1; i1 < fftN/4; i1++) - Utbl[i1] = cos( (2.0 * MYPI * i1) / fftN ); - Utbl[fftN/4] = 0.0; +void fftCosInit(long M, double *Utbl) +{ + /* Compute Utbl, the cosine table for ffts */ + /* of size (pow(2,M)/4 +1) */ + /* INPUTS */ + /* M = log2 of fft size */ + /* OUTPUTS */ + /* *Utbl = cosine table */ + unsigned long fftN = POW2(M); + unsigned long i1; + Utbl[0] = 1.0; + for (i1 = 1; i1 < fftN/4; i1++) + Utbl[i1] = cos( (2.0 * MYPI * i1) / fftN ); + Utbl[fftN/4] = 0.0; } -void fftBRInit(long M, short *BRLow){ -/* Compute BRLow, the bit reversed table for ffts */ -/* of size pow(2,M/2 -1) */ -/* INPUTS */ -/* M = log2 of fft size */ -/* OUTPUTS */ -/* *BRLow = bit reversed counter table */ -long Mroot_1 = M / 2 - 1; -long Nroot_1 = POW2(Mroot_1); -long i1; -long bitsum; -long bitmask; -long bit; -for (i1 = 0; i1 < Nroot_1; i1++){ - bitsum = 0; - bitmask = 1; - for (bit=1; bit <= Mroot_1; bitmask<<=1, bit++) - if (i1 & bitmask) - bitsum = bitsum + (Nroot_1 >> bit); - BRLow[i1] = bitsum; -}; +void fftBRInit(long M, short *BRLow) +{ + /* Compute BRLow, the bit reversed table for ffts */ + /* of size pow(2,M/2 -1) */ + /* INPUTS */ + /* M = log2 of fft size */ + /* OUTPUTS */ + /* *BRLow = bit reversed counter table */ + long Mroot_1 = M / 2 - 1; + long Nroot_1 = POW2(Mroot_1); + long i1; + long bitsum; + long bitmask; + long bit; + for (i1 = 0; i1 < Nroot_1; i1++) { + bitsum = 0; + bitmask = 1; + for (bit=1; bit <= Mroot_1; bitmask<<=1, bit++) + if (i1 & bitmask) + bitsum = bitsum + (Nroot_1 >> bit); + BRLow[i1] = bitsum; + }; } /************************************************ parts of ffts1 *************************************************/ -static inline void bitrevR2(float *ioptr, long M, short *BRLow); -static inline void bitrevR2(float *ioptr, long M, short *BRLow){ -/*** bit reverse and first radix 2 stage of forward or inverse fft ***/ -float f0r; -float f0i; -float f1r; -float f1i; -float f2r; -float f2i; -float f3r; -float f3i; -float f4r; -float f4i; -float f5r; -float f5i; -float f6r; -float f6i; -float f7r; -float f7i; -float t0r; -float t0i; -float t1r; -float t1i; -float *p0r; -float *p1r; -float *IOP; -float *iolimit; -long Colstart; -long iCol; -unsigned long posA; -unsigned long posAi; -unsigned long posB; -unsigned long posBi; +static inline void bitrevR2(double *ioptr, long M, short *BRLow); +static inline void bitrevR2(double *ioptr, long M, short *BRLow) +{ + /*** bit reverse and first radix 2 stage of forward or inverse fft ***/ + double f0r; + double f0i; + double f1r; + double f1i; + double f2r; + double f2i; + double f3r; + double f3i; + double f4r; + double f4i; + double f5r; + double f5i; + double f6r; + double f6i; + double f7r; + double f7i; + double t0r; + double t0i; + double t1r; + double t1i; + double *p0r; + double *p1r; + double *IOP; + double *iolimit; + long Colstart; + long iCol; + unsigned long posA; + unsigned long posAi; + unsigned long posB; + unsigned long posBi; -const unsigned long Nrems2 = POW2((M+3)/2); -const unsigned long Nroot_1_ColInc = POW2(M)-Nrems2; -const unsigned long Nroot_1 = POW2(M/2-1)-1; -const unsigned long ColstartShift = (M+1)/2 +1; -posA = POW2(M); // 1/2 of POW2(M) complexes -posAi = posA + 1; -posB = posA + 2; -posBi = posB + 1; + const unsigned long Nrems2 = POW2((M+3)/2); + const unsigned long Nroot_1_ColInc = POW2(M)-Nrems2; + const unsigned long Nroot_1 = POW2(M/2-1)-1; + const unsigned long ColstartShift = (M+1)/2 +1; + posA = POW2(M); // 1/2 of POW2(M) complexes + posAi = posA + 1; + posB = posA + 2; + posBi = posB + 1; -iolimit = ioptr + Nrems2; -for (; ioptr < iolimit; ioptr += POW2(M/2+1)){ - for (Colstart = Nroot_1; Colstart >= 0; Colstart--){ - iCol = Nroot_1; - p0r = ioptr+ Nroot_1_ColInc + BRLow[Colstart]*4; - IOP = ioptr + (Colstart << ColstartShift); - p1r = IOP + BRLow[iCol]*4; - f0r = *(p0r); - f0i = *(p0r+1); - f1r = *(p0r+posA); - f1i = *(p0r+posAi); - for (; iCol > Colstart;){ - f2r = *(p0r+2); - f2i = *(p0r+(2+1)); - f3r = *(p0r+posB); - f3i = *(p0r+posBi); - f4r = *(p1r); - f4i = *(p1r+1); - f5r = *(p1r+posA); - f5i = *(p1r+posAi); - f6r = *(p1r+2); - f6i = *(p1r+(2+1)); - f7r = *(p1r+posB); - f7i = *(p1r+posBi); - - t0r = f0r + f1r; - t0i = f0i + f1i; - f1r = f0r - f1r; - f1i = f0i - f1i; - t1r = f2r + f3r; - t1i = f2i + f3i; - f3r = f2r - f3r; - f3i = f2i - f3i; - f0r = f4r + f5r; - f0i = f4i + f5i; - f5r = f4r - f5r; - f5i = f4i - f5i; - f2r = f6r + f7r; - f2i = f6i + f7i; - f7r = f6r - f7r; - f7i = f6i - f7i; + iolimit = ioptr + Nrems2; + for (; ioptr < iolimit; ioptr += POW2(M/2+1)) { + for (Colstart = Nroot_1; Colstart >= 0; Colstart--) { + iCol = Nroot_1; + p0r = ioptr+ Nroot_1_ColInc + BRLow[Colstart]*4; + IOP = ioptr + (Colstart << ColstartShift); + p1r = IOP + BRLow[iCol]*4; + f0r = *(p0r); + f0i = *(p0r+1); + f1r = *(p0r+posA); + f1i = *(p0r+posAi); + for (; iCol > Colstart;) { + f2r = *(p0r+2); + f2i = *(p0r+(2+1)); + f3r = *(p0r+posB); + f3i = *(p0r+posBi); + f4r = *(p1r); + f4i = *(p1r+1); + f5r = *(p1r+posA); + f5i = *(p1r+posAi); + f6r = *(p1r+2); + f6i = *(p1r+(2+1)); + f7r = *(p1r+posB); + f7i = *(p1r+posBi); - *(p1r) = t0r; - *(p1r+1) = t0i; - *(p1r+2) = f1r; - *(p1r+(2+1)) = f1i; - *(p1r+posA) = t1r; - *(p1r+posAi) = t1i; - *(p1r+posB) = f3r; - *(p1r+posBi) = f3i; - *(p0r) = f0r; - *(p0r+1) = f0i; - *(p0r+2) = f5r; - *(p0r+(2+1)) = f5i; - *(p0r+posA) = f2r; - *(p0r+posAi) = f2i; - *(p0r+posB) = f7r; - *(p0r+posBi) = f7i; + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; + t1r = f2r + f3r; + t1i = f2i + f3i; + f3r = f2r - f3r; + f3i = f2i - f3i; + f0r = f4r + f5r; + f0i = f4i + f5i; + f5r = f4r - f5r; + f5i = f4i - f5i; + f2r = f6r + f7r; + f2i = f6i + f7i; + f7r = f6r - f7r; + f7i = f6i - f7i; - p0r -= Nrems2; - f0r = *(p0r); - f0i = *(p0r+1); - f1r = *(p0r+posA); - f1i = *(p0r+posAi); - iCol -= 1; - p1r = IOP + BRLow[iCol]*4; - }; - f2r = *(p0r+2); - f2i = *(p0r+(2+1)); - f3r = *(p0r+posB); - f3i = *(p0r+posBi); + *(p1r) = t0r; + *(p1r+1) = t0i; + *(p1r+2) = f1r; + *(p1r+(2+1)) = f1i; + *(p1r+posA) = t1r; + *(p1r+posAi) = t1i; + *(p1r+posB) = f3r; + *(p1r+posBi) = f3i; + *(p0r) = f0r; + *(p0r+1) = f0i; + *(p0r+2) = f5r; + *(p0r+(2+1)) = f5i; + *(p0r+posA) = f2r; + *(p0r+posAi) = f2i; + *(p0r+posB) = f7r; + *(p0r+posBi) = f7i; - t0r = f0r + f1r; - t0i = f0i + f1i; - f1r = f0r - f1r; - f1i = f0i - f1i; - t1r = f2r + f3r; - t1i = f2i + f3i; - f3r = f2r - f3r; - f3i = f2i - f3i; + p0r -= Nrems2; + f0r = *(p0r); + f0i = *(p0r+1); + f1r = *(p0r+posA); + f1i = *(p0r+posAi); + iCol -= 1; + p1r = IOP + BRLow[iCol]*4; + }; + f2r = *(p0r+2); + f2i = *(p0r+(2+1)); + f3r = *(p0r+posB); + f3i = *(p0r+posBi); - *(p0r) = t0r; - *(p0r+1) = t0i; - *(p0r+2) = f1r; - *(p0r+(2+1)) = f1i; - *(p0r+posA) = t1r; - *(p0r+posAi) = t1i; - *(p0r+posB) = f3r; - *(p0r+posBi) = f3i; + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; + t1r = f2r + f3r; + t1i = f2i + f3i; + f3r = f2r - f3r; + f3i = f2i - f3i; - }; -}; + *(p0r) = t0r; + *(p0r+1) = t0i; + *(p0r+2) = f1r; + *(p0r+(2+1)) = f1i; + *(p0r+posA) = t1r; + *(p0r+posAi) = t1i; + *(p0r+posB) = f3r; + *(p0r+posBi) = f3i; + + }; + }; } -static inline void fft2pt(float *ioptr); -static inline void fft2pt(float *ioptr){ -/*** RADIX 2 fft ***/ -float f0r, f0i, f1r, f1i; -float t0r, t0i; +static inline void fft2pt(double *ioptr); +static inline void fft2pt(double *ioptr) +{ + /*** RADIX 2 fft ***/ + double f0r, f0i, f1r, f1i; + double t0r, t0i; - /* bit reversed load */ -f0r = ioptr[0]; -f0i = ioptr[1]; -f1r = ioptr[2]; -f1i = ioptr[3]; + /* bit reversed load */ + f0r = ioptr[0]; + f0i = ioptr[1]; + f1r = ioptr[2]; + f1i = ioptr[3]; - /* Butterflys */ - /* - f0 - - t0 - f1 - 1 - f1 - */ + /* Butterflys */ + /* + f0 - - t0 + f1 - 1 - f1 + */ -t0r = f0r + f1r; -t0i = f0i + f1i; -f1r = f0r - f1r; -f1i = f0i - f1i; + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; - /* store result */ -ioptr[0] = t0r; -ioptr[1] = t0i; -ioptr[2] = f1r; -ioptr[3] = f1i; + /* store result */ + ioptr[0] = t0r; + ioptr[1] = t0i; + ioptr[2] = f1r; + ioptr[3] = f1i; } -static inline void fft4pt(float *ioptr); -static inline void fft4pt(float *ioptr){ -/*** RADIX 4 fft ***/ -float f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; -float t0r, t0i, t1r, t1i; +static inline void fft4pt(double *ioptr); +static inline void fft4pt(double *ioptr) +{ + /*** RADIX 4 fft ***/ + double f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + double t0r, t0i, t1r, t1i; - /* bit reversed load */ -f0r = ioptr[0]; -f0i = ioptr[1]; -f1r = ioptr[4]; -f1i = ioptr[5]; -f2r = ioptr[2]; -f2i = ioptr[3]; -f3r = ioptr[6]; -f3i = ioptr[7]; + /* bit reversed load */ + f0r = ioptr[0]; + f0i = ioptr[1]; + f1r = ioptr[4]; + f1i = ioptr[5]; + f2r = ioptr[2]; + f2i = ioptr[3]; + f3r = ioptr[6]; + f3i = ioptr[7]; - /* Butterflys */ - /* - f0 - - t0 - - f0 - f1 - 1 - f1 - - f1 - f2 - - f2 - 1 - f2 - f3 - 1 - t1 - -i - f3 - */ + /* Butterflys */ + /* + f0 - - t0 - - f0 + f1 - 1 - f1 - - f1 + f2 - - f2 - 1 - f2 + f3 - 1 - t1 - -i - f3 + */ -t0r = f0r + f1r; -t0i = f0i + f1i; -f1r = f0r - f1r; -f1i = f0i - f1i; + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; -t1r = f2r - f3r; -t1i = f2i - f3i; -f2r = f2r + f3r; -f2i = f2i + f3i; + t1r = f2r - f3r; + t1i = f2i - f3i; + f2r = f2r + f3r; + f2i = f2i + f3i; -f0r = t0r + f2r; -f0i = t0i + f2i; -f2r = t0r - f2r; -f2i = t0i - f2i; + f0r = t0r + f2r; + f0i = t0i + f2i; + f2r = t0r - f2r; + f2i = t0i - f2i; -f3r = f1r - t1i; -f3i = f1i + t1r; -f1r = f1r + t1i; -f1i = f1i - t1r; + f3r = f1r - t1i; + f3i = f1i + t1r; + f1r = f1r + t1i; + f1i = f1i - t1r; - /* store result */ -ioptr[0] = f0r; -ioptr[1] = f0i; -ioptr[2] = f1r; -ioptr[3] = f1i; -ioptr[4] = f2r; -ioptr[5] = f2i; -ioptr[6] = f3r; -ioptr[7] = f3i; + /* store result */ + ioptr[0] = f0r; + ioptr[1] = f0i; + ioptr[2] = f1r; + ioptr[3] = f1i; + ioptr[4] = f2r; + ioptr[5] = f2i; + ioptr[6] = f3r; + ioptr[7] = f3i; } -static inline void fft8pt(float *ioptr); -static inline void fft8pt(float *ioptr){ -/*** RADIX 8 fft ***/ -float w0r = 1.0/MYROOT2; /* cos(pi/4) */ -float f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; -float f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; -float t0r, t0i, t1r, t1i; -const float Two = 2.0; +static inline void fft8pt(double *ioptr); +static inline void fft8pt(double *ioptr) +{ + /*** RADIX 8 fft ***/ + double w0r = 1.0/MYROOT2; /* cos(pi/4) */ + double f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + double f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; + double t0r, t0i, t1r, t1i; + const double Two = 2.0; - /* bit reversed load */ -f0r = ioptr[0]; -f0i = ioptr[1]; -f1r = ioptr[8]; -f1i = ioptr[9]; -f2r = ioptr[4]; -f2i = ioptr[5]; -f3r = ioptr[12]; -f3i = ioptr[13]; -f4r = ioptr[2]; -f4i = ioptr[3]; -f5r = ioptr[10]; -f5i = ioptr[11]; -f6r = ioptr[6]; -f6i = ioptr[7]; -f7r = ioptr[14]; -f7i = ioptr[15]; - /* Butterflys */ - /* - f0 - - t0 - - f0 - - f0 - f1 - 1 - f1 - - f1 - - f1 - f2 - - f2 - 1 - f2 - - f2 - f3 - 1 - t1 - -i - f3 - - f3 - f4 - - t0 - - f4 - 1 - t0 - f5 - 1 - f5 - - f5 - w3 - f4 - f6 - - f6 - 1 - f6 - -i - t1 - f7 - 1 - t1 - -i - f7 - iw3- f6 - */ + /* bit reversed load */ + f0r = ioptr[0]; + f0i = ioptr[1]; + f1r = ioptr[8]; + f1i = ioptr[9]; + f2r = ioptr[4]; + f2i = ioptr[5]; + f3r = ioptr[12]; + f3i = ioptr[13]; + f4r = ioptr[2]; + f4i = ioptr[3]; + f5r = ioptr[10]; + f5i = ioptr[11]; + f6r = ioptr[6]; + f6i = ioptr[7]; + f7r = ioptr[14]; + f7i = ioptr[15]; + /* Butterflys */ + /* + f0 - - t0 - - f0 - - f0 + f1 - 1 - f1 - - f1 - - f1 + f2 - - f2 - 1 - f2 - - f2 + f3 - 1 - t1 - -i - f3 - - f3 + f4 - - t0 - - f4 - 1 - t0 + f5 - 1 - f5 - - f5 - w3 - f4 + f6 - - f6 - 1 - f6 - -i - t1 + f7 - 1 - t1 - -i - f7 - iw3- f6 + */ -t0r = f0r + f1r; -t0i = f0i + f1i; -f1r = f0r - f1r; -f1i = f0i - f1i; + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; -t1r = f2r - f3r; -t1i = f2i - f3i; -f2r = f2r + f3r; -f2i = f2i + f3i; + t1r = f2r - f3r; + t1i = f2i - f3i; + f2r = f2r + f3r; + f2i = f2i + f3i; -f0r = t0r + f2r; -f0i = t0i + f2i; -f2r = t0r - f2r; -f2i = t0i - f2i; + f0r = t0r + f2r; + f0i = t0i + f2i; + f2r = t0r - f2r; + f2i = t0i - f2i; -f3r = f1r - t1i; -f3i = f1i + t1r; -f1r = f1r + t1i; -f1i = f1i - t1r; + f3r = f1r - t1i; + f3i = f1i + t1r; + f1r = f1r + t1i; + f1i = f1i - t1r; -t0r = f4r + f5r; -t0i = f4i + f5i; -f5r = f4r - f5r; -f5i = f4i - f5i; + t0r = f4r + f5r; + t0i = f4i + f5i; + f5r = f4r - f5r; + f5i = f4i - f5i; -t1r = f6r - f7r; -t1i = f6i - f7i; -f6r = f6r + f7r; -f6i = f6i + f7i; + t1r = f6r - f7r; + t1i = f6i - f7i; + f6r = f6r + f7r; + f6i = f6i + f7i; -f4r = t0r + f6r; -f4i = t0i + f6i; -f6r = t0r - f6r; -f6i = t0i - f6i; + f4r = t0r + f6r; + f4i = t0i + f6i; + f6r = t0r - f6r; + f6i = t0i - f6i; -f7r = f5r - t1i; -f7i = f5i + t1r; -f5r = f5r + t1i; -f5i = f5i - t1r; + f7r = f5r - t1i; + f7i = f5i + t1r; + f5r = f5r + t1i; + f5i = f5i - t1r; -t0r = f0r - f4r; -t0i = f0i - f4i; -f0r = f0r + f4r; -f0i = f0i + f4i; + t0r = f0r - f4r; + t0i = f0i - f4i; + f0r = f0r + f4r; + f0i = f0i + f4i; -t1r = f2r - f6i; -t1i = f2i + f6r; -f2r = f2r + f6i; -f2i = f2i - f6r; + t1r = f2r - f6i; + t1i = f2i + f6r; + f2r = f2r + f6i; + f2i = f2i - f6r; -f4r = f1r - f5r * w0r - f5i * w0r; -f4i = f1i + f5r * w0r - f5i * w0r; -f1r = f1r * Two - f4r; -f1i = f1i * Two - f4i; + f4r = f1r - f5r * w0r - f5i * w0r; + f4i = f1i + f5r * w0r - f5i * w0r; + f1r = f1r * Two - f4r; + f1i = f1i * Two - f4i; -f6r = f3r + f7r * w0r - f7i * w0r; -f6i = f3i + f7r * w0r + f7i * w0r; -f3r = f3r * Two - f6r; -f3i = f3i * Two - f6i; + f6r = f3r + f7r * w0r - f7i * w0r; + f6i = f3i + f7r * w0r + f7i * w0r; + f3r = f3r * Two - f6r; + f3i = f3i * Two - f6i; - /* store result */ -ioptr[0] = f0r; -ioptr[1] = f0i; -ioptr[2] = f1r; -ioptr[3] = f1i; -ioptr[4] = f2r; -ioptr[5] = f2i; -ioptr[6] = f3r; -ioptr[7] = f3i; -ioptr[8] = t0r; -ioptr[9] = t0i; -ioptr[10] = f4r; -ioptr[11] = f4i; -ioptr[12] = t1r; -ioptr[13] = t1i; -ioptr[14] = f6r; -ioptr[15] = f6i; + /* store result */ + ioptr[0] = f0r; + ioptr[1] = f0i; + ioptr[2] = f1r; + ioptr[3] = f1i; + ioptr[4] = f2r; + ioptr[5] = f2i; + ioptr[6] = f3r; + ioptr[7] = f3i; + ioptr[8] = t0r; + ioptr[9] = t0i; + ioptr[10] = f4r; + ioptr[11] = f4i; + ioptr[12] = t1r; + ioptr[13] = t1i; + ioptr[14] = f6r; + ioptr[15] = f6i; } -static inline void bfR2(float *ioptr, long M, long NDiffU); -static inline void bfR2(float *ioptr, long M, long NDiffU){ -/*** 2nd radix 2 stage ***/ -unsigned long pos; -unsigned long posi; -unsigned long pinc; -unsigned long pnext; -unsigned long NSameU; -unsigned long SameUCnt; +static inline void bfR2(double *ioptr, long M, long NDiffU); +static inline void bfR2(double *ioptr, long M, long NDiffU) +{ + /*** 2nd radix 2 stage ***/ + unsigned long pos; + unsigned long posi; + unsigned long pinc; + unsigned long pnext; + unsigned long NSameU; + unsigned long SameUCnt; -float *pstrt; -float *p0r, *p1r, *p2r, *p3r; + double *pstrt; + double *p0r, *p1r, *p2r, *p3r; -float f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; -float f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; -/* const float Two = 2.0; */ + double f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + double f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; + /* const double Two = 2.0; */ -pinc = NDiffU * 2; // 2 floats per complex -pnext = pinc * 4; -pos = 2; -posi = pos+1; -NSameU = POW2(M) / 4 /NDiffU; // 4 Us at a time -pstrt = ioptr; -p0r = pstrt; -p1r = pstrt+pinc; -p2r = p1r+pinc; -p3r = p2r+pinc; + pinc = NDiffU * 2; // 2 doubles per complex + pnext = pinc * 4; + pos = 2; + posi = pos+1; + NSameU = POW2(M) / 4 /NDiffU; // 4 Us at a time + pstrt = ioptr; + p0r = pstrt; + p1r = pstrt+pinc; + p2r = p1r+pinc; + p3r = p2r+pinc; - /* Butterflys */ - /* - f0 - - f4 - f1 - 1 - f5 - f2 - - f6 - f3 - 1 - f7 - */ - /* Butterflys */ - /* - f0 - - f4 - f1 - 1 - f5 - f2 - - f6 - f3 - 1 - f7 - */ + /* Butterflys */ + /* + f0 - - f4 + f1 - 1 - f5 + f2 - - f6 + f3 - 1 - f7 + */ + /* Butterflys */ + /* + f0 - - f4 + f1 - 1 - f5 + f2 - - f6 + f3 - 1 - f7 + */ -for (SameUCnt = NSameU; SameUCnt > 0 ; SameUCnt--){ + for (SameUCnt = NSameU; SameUCnt > 0 ; SameUCnt--) { - f0r = *p0r; - f1r = *p1r; - f0i = *(p0r + 1); - f1i = *(p1r + 1); - f2r = *p2r; - f3r = *p3r; - f2i = *(p2r + 1); - f3i = *(p3r + 1); + f0r = *p0r; + f1r = *p1r; + f0i = *(p0r + 1); + f1i = *(p1r + 1); + f2r = *p2r; + f3r = *p3r; + f2i = *(p2r + 1); + f3i = *(p3r + 1); - f4r = f0r + f1r; - f4i = f0i + f1i; - f5r = f0r - f1r; - f5i = f0i - f1i; + f4r = f0r + f1r; + f4i = f0i + f1i; + f5r = f0r - f1r; + f5i = f0i - f1i; - f6r = f2r + f3r; - f6i = f2i + f3i; - f7r = f2r - f3r; - f7i = f2i - f3i; + f6r = f2r + f3r; + f6i = f2i + f3i; + f7r = f2r - f3r; + f7i = f2i - f3i; - *p0r = f4r; - *(p0r + 1) = f4i; - *p1r = f5r; - *(p1r + 1) = f5i; - *p2r = f6r; - *(p2r + 1) = f6i; - *p3r = f7r; - *(p3r + 1) = f7i; + *p0r = f4r; + *(p0r + 1) = f4i; + *p1r = f5r; + *(p1r + 1) = f5i; + *p2r = f6r; + *(p2r + 1) = f6i; + *p3r = f7r; + *(p3r + 1) = f7i; - f0r = *(p0r + pos); - f1i = *(p1r + posi); - f0i = *(p0r + posi); - f1r = *(p1r + pos); - f2r = *(p2r + pos); - f3i = *(p3r + posi); - f2i = *(p2r + posi); - f3r = *(p3r + pos); + f0r = *(p0r + pos); + f1i = *(p1r + posi); + f0i = *(p0r + posi); + f1r = *(p1r + pos); + f2r = *(p2r + pos); + f3i = *(p3r + posi); + f2i = *(p2r + posi); + f3r = *(p3r + pos); - f4r = f0r + f1i; - f4i = f0i - f1r; - f5r = f0r - f1i; - f5i = f0i + f1r; + f4r = f0r + f1i; + f4i = f0i - f1r; + f5r = f0r - f1i; + f5i = f0i + f1r; - f6r = f2r + f3i; - f6i = f2i - f3r; - f7r = f2r - f3i; - f7i = f2i + f3r; + f6r = f2r + f3i; + f6i = f2i - f3r; + f7r = f2r - f3i; + f7i = f2i + f3r; - *(p0r + pos) = f4r; - *(p0r + posi) = f4i; - *(p1r + pos) = f5r; - *(p1r + posi) = f5i; - *(p2r + pos) = f6r; - *(p2r + posi) = f6i; - *(p3r + pos) = f7r; - *(p3r + posi) = f7i; + *(p0r + pos) = f4r; + *(p0r + posi) = f4i; + *(p1r + pos) = f5r; + *(p1r + posi) = f5i; + *(p2r + pos) = f6r; + *(p2r + posi) = f6i; + *(p3r + pos) = f7r; + *(p3r + posi) = f7i; - p0r += pnext; - p1r += pnext; - p2r += pnext; - p3r += pnext; + p0r += pnext; + p1r += pnext; + p2r += pnext; + p3r += pnext; -} + } } -static inline void bfR4(float *ioptr, long M, long NDiffU); -static inline void bfR4(float *ioptr, long M, long NDiffU){ -/*** 1 radix 4 stage ***/ -unsigned long pos; -unsigned long posi; -unsigned long pinc; -unsigned long pnext; -unsigned long pnexti; -unsigned long NSameU; -unsigned long SameUCnt; +static inline void bfR4(double *ioptr, long M, long NDiffU); +static inline void bfR4(double *ioptr, long M, long NDiffU) +{ + /*** 1 radix 4 stage ***/ + unsigned long pos; + unsigned long posi; + unsigned long pinc; + unsigned long pnext; + unsigned long pnexti; + unsigned long NSameU; + unsigned long SameUCnt; -float *pstrt; -float *p0r, *p1r, *p2r, *p3r; + double *pstrt; + double *p0r, *p1r, *p2r, *p3r; -float w1r = 1.0/MYROOT2; /* cos(pi/4) */ -float f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; -float f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; -float t1r, t1i; -const float Two = 2.0; + double w1r = 1.0/MYROOT2; /* cos(pi/4) */ + double f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + double f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; + double t1r, t1i; + const double Two = 2.0; -pinc = NDiffU * 2; // 2 floats per complex -pnext = pinc * 4; -pnexti = pnext + 1; -pos = 2; -posi = pos+1; -NSameU = POW2(M) / 4 /NDiffU; // 4 pts per butterfly -pstrt = ioptr; -p0r = pstrt; -p1r = pstrt+pinc; -p2r = p1r+pinc; -p3r = p2r+pinc; + pinc = NDiffU * 2; // 2 doubles per complex + pnext = pinc * 4; + pnexti = pnext + 1; + pos = 2; + posi = pos+1; + NSameU = POW2(M) / 4 /NDiffU; // 4 pts per butterfly + pstrt = ioptr; + p0r = pstrt; + p1r = pstrt+pinc; + p2r = p1r+pinc; + p3r = p2r+pinc; - /* Butterflys */ - /* - f0 - - f0 - - f4 - f1 - 1 - f5 - - f5 - f2 - - f6 - 1 - f6 - f3 - 1 - f3 - -i - f7 - */ - /* Butterflys */ - /* - f0 - - f4 - - f4 - f1 - -i - t1 - - f5 - f2 - - f2 - w1 - f6 - f3 - -i - f7 - iw1- f7 - */ + /* Butterflys */ + /* + f0 - - f0 - - f4 + f1 - 1 - f5 - - f5 + f2 - - f6 - 1 - f6 + f3 - 1 - f3 - -i - f7 + */ + /* Butterflys */ + /* + f0 - - f4 - - f4 + f1 - -i - t1 - - f5 + f2 - - f2 - w1 - f6 + f3 - -i - f7 - iw1- f7 + */ -f0r = *p0r; -f1r = *p1r; -f2r = *p2r; -f3r = *p3r; -f0i = *(p0r + 1); -f1i = *(p1r + 1); -f2i = *(p2r + 1); -f3i = *(p3r + 1); + f0r = *p0r; + f1r = *p1r; + f2r = *p2r; + f3r = *p3r; + f0i = *(p0r + 1); + f1i = *(p1r + 1); + f2i = *(p2r + 1); + f3i = *(p3r + 1); -f5r = f0r - f1r; -f5i = f0i - f1i; -f0r = f0r + f1r; -f0i = f0i + f1i; + f5r = f0r - f1r; + f5i = f0i - f1i; + f0r = f0r + f1r; + f0i = f0i + f1i; -f6r = f2r + f3r; -f6i = f2i + f3i; -f3r = f2r - f3r; -f3i = f2i - f3i; + f6r = f2r + f3r; + f6i = f2i + f3i; + f3r = f2r - f3r; + f3i = f2i - f3i; -for (SameUCnt = NSameU-1; SameUCnt > 0 ; SameUCnt--){ + for (SameUCnt = NSameU-1; SameUCnt > 0 ; SameUCnt--) { - f7r = f5r - f3i; - f7i = f5i + f3r; - f5r = f5r + f3i; - f5i = f5i - f3r; + f7r = f5r - f3i; + f7i = f5i + f3r; + f5r = f5r + f3i; + f5i = f5i - f3r; - f4r = f0r + f6r; - f4i = f0i + f6i; - f6r = f0r - f6r; - f6i = f0i - f6i; + f4r = f0r + f6r; + f4i = f0i + f6i; + f6r = f0r - f6r; + f6i = f0i - f6i; - f2r = *(p2r + pos); - f2i = *(p2r + posi); - f1r = *(p1r + pos); - f1i = *(p1r + posi); - f3i = *(p3r + posi); - f0r = *(p0r + pos); - f3r = *(p3r + pos); - f0i = *(p0r + posi); + f2r = *(p2r + pos); + f2i = *(p2r + posi); + f1r = *(p1r + pos); + f1i = *(p1r + posi); + f3i = *(p3r + posi); + f0r = *(p0r + pos); + f3r = *(p3r + pos); + f0i = *(p0r + posi); - *p3r = f7r; - *p0r = f4r; - *(p3r + 1) = f7i; - *(p0r + 1) = f4i; - *p1r = f5r; - *p2r = f6r; - *(p1r + 1) = f5i; - *(p2r + 1) = f6i; + *p3r = f7r; + *p0r = f4r; + *(p3r + 1) = f7i; + *(p0r + 1) = f4i; + *p1r = f5r; + *p2r = f6r; + *(p1r + 1) = f5i; + *(p2r + 1) = f6i; - f7r = f2r - f3i; - f7i = f2i + f3r; - f2r = f2r + f3i; - f2i = f2i - f3r; + f7r = f2r - f3i; + f7i = f2i + f3r; + f2r = f2r + f3i; + f2i = f2i - f3r; - f4r = f0r + f1i; - f4i = f0i - f1r; - t1r = f0r - f1i; - t1i = f0i + f1r; + f4r = f0r + f1i; + f4i = f0i - f1r; + t1r = f0r - f1i; + t1i = f0i + f1r; - f5r = t1r - f7r * w1r + f7i * w1r; - f5i = t1i - f7r * w1r - f7i * w1r; - f7r = t1r * Two - f5r; - f7i = t1i * Two - f5i; + f5r = t1r - f7r * w1r + f7i * w1r; + f5i = t1i - f7r * w1r - f7i * w1r; + f7r = t1r * Two - f5r; + f7i = t1i * Two - f5i; - f6r = f4r - f2r * w1r - f2i * w1r; - f6i = f4i + f2r * w1r - f2i * w1r; - f4r = f4r * Two - f6r; - f4i = f4i * Two - f6i; + f6r = f4r - f2r * w1r - f2i * w1r; + f6i = f4i + f2r * w1r - f2i * w1r; + f4r = f4r * Two - f6r; + f4i = f4i * Two - f6i; - f3r = *(p3r + pnext); - f0r = *(p0r + pnext); - f3i = *(p3r + pnexti); - f0i = *(p0r + pnexti); - f2r = *(p2r + pnext); - f2i = *(p2r + pnexti); - f1r = *(p1r + pnext); - f1i = *(p1r + pnexti); + f3r = *(p3r + pnext); + f0r = *(p0r + pnext); + f3i = *(p3r + pnexti); + f0i = *(p0r + pnexti); + f2r = *(p2r + pnext); + f2i = *(p2r + pnexti); + f1r = *(p1r + pnext); + f1i = *(p1r + pnexti); - *(p2r + pos) = f6r; - *(p1r + pos) = f5r; - *(p2r + posi) = f6i; - *(p1r + posi) = f5i; - *(p3r + pos) = f7r; - *(p0r + pos) = f4r; - *(p3r + posi) = f7i; - *(p0r + posi) = f4i; + *(p2r + pos) = f6r; + *(p1r + pos) = f5r; + *(p2r + posi) = f6i; + *(p1r + posi) = f5i; + *(p3r + pos) = f7r; + *(p0r + pos) = f4r; + *(p3r + posi) = f7i; + *(p0r + posi) = f4i; - f6r = f2r + f3r; - f6i = f2i + f3i; - f3r = f2r - f3r; - f3i = f2i - f3i; + f6r = f2r + f3r; + f6i = f2i + f3i; + f3r = f2r - f3r; + f3i = f2i - f3i; - f5r = f0r - f1r; - f5i = f0i - f1i; - f0r = f0r + f1r; - f0i = f0i + f1i; + f5r = f0r - f1r; + f5i = f0i - f1i; + f0r = f0r + f1r; + f0i = f0i + f1i; - p3r += pnext; - p0r += pnext; - p1r += pnext; - p2r += pnext; + p3r += pnext; + p0r += pnext; + p1r += pnext; + p2r += pnext; -} -f7r = f5r - f3i; -f7i = f5i + f3r; -f5r = f5r + f3i; -f5i = f5i - f3r; + } + f7r = f5r - f3i; + f7i = f5i + f3r; + f5r = f5r + f3i; + f5i = f5i - f3r; -f4r = f0r + f6r; -f4i = f0i + f6i; -f6r = f0r - f6r; -f6i = f0i - f6i; + f4r = f0r + f6r; + f4i = f0i + f6i; + f6r = f0r - f6r; + f6i = f0i - f6i; -f2r = *(p2r + pos); -f2i = *(p2r + posi); -f1r = *(p1r + pos); -f1i = *(p1r + posi); -f3i = *(p3r + posi); -f0r = *(p0r + pos); -f3r = *(p3r + pos); -f0i = *(p0r + posi); + f2r = *(p2r + pos); + f2i = *(p2r + posi); + f1r = *(p1r + pos); + f1i = *(p1r + posi); + f3i = *(p3r + posi); + f0r = *(p0r + pos); + f3r = *(p3r + pos); + f0i = *(p0r + posi); -*p3r = f7r; -*p0r = f4r; -*(p3r + 1) = f7i; -*(p0r + 1) = f4i; -*p1r = f5r; -*p2r = f6r; -*(p1r + 1) = f5i; -*(p2r + 1) = f6i; + *p3r = f7r; + *p0r = f4r; + *(p3r + 1) = f7i; + *(p0r + 1) = f4i; + *p1r = f5r; + *p2r = f6r; + *(p1r + 1) = f5i; + *(p2r + 1) = f6i; -f7r = f2r - f3i; -f7i = f2i + f3r; -f2r = f2r + f3i; -f2i = f2i - f3r; + f7r = f2r - f3i; + f7i = f2i + f3r; + f2r = f2r + f3i; + f2i = f2i - f3r; -f4r = f0r + f1i; -f4i = f0i - f1r; -t1r = f0r - f1i; -t1i = f0i + f1r; + f4r = f0r + f1i; + f4i = f0i - f1r; + t1r = f0r - f1i; + t1i = f0i + f1r; -f5r = t1r - f7r * w1r + f7i * w1r; -f5i = t1i - f7r * w1r - f7i * w1r; -f7r = t1r * Two - f5r; -f7i = t1i * Two - f5i; + f5r = t1r - f7r * w1r + f7i * w1r; + f5i = t1i - f7r * w1r - f7i * w1r; + f7r = t1r * Two - f5r; + f7i = t1i * Two - f5i; -f6r = f4r - f2r * w1r - f2i * w1r; -f6i = f4i + f2r * w1r - f2i * w1r; -f4r = f4r * Two - f6r; -f4i = f4i * Two - f6i; + f6r = f4r - f2r * w1r - f2i * w1r; + f6i = f4i + f2r * w1r - f2i * w1r; + f4r = f4r * Two - f6r; + f4i = f4i * Two - f6i; -*(p2r + pos) = f6r; -*(p1r + pos) = f5r; -*(p2r + posi) = f6i; -*(p1r + posi) = f5i; -*(p3r + pos) = f7r; -*(p0r + pos) = f4r; -*(p3r + posi) = f7i; -*(p0r + posi) = f4i; + *(p2r + pos) = f6r; + *(p1r + pos) = f5r; + *(p2r + posi) = f6i; + *(p1r + posi) = f5i; + *(p3r + pos) = f7r; + *(p0r + pos) = f4r; + *(p3r + posi) = f7i; + *(p0r + posi) = f4i; } -static inline void bfstages(float *ioptr, long M, float *Utbl, long Ustride, long NDiffU, long StageCnt); -static inline void bfstages(float *ioptr, long M, float *Utbl, long Ustride, long NDiffU, long StageCnt){ -/*** RADIX 8 Stages ***/ -unsigned long pos; -unsigned long posi; -unsigned long pinc; -unsigned long pnext; -unsigned long NSameU; -unsigned long Uinc; -unsigned long Uinc2; -unsigned long Uinc4; -unsigned long DiffUCnt; -unsigned long SameUCnt; -unsigned long U2toU3; +static inline void bfstages(double *ioptr, long M, double *Utbl, long Ustride, long NDiffU, long StageCnt); +static inline void bfstages(double *ioptr, long M, double *Utbl, long Ustride, long NDiffU, long StageCnt) +{ + /*** RADIX 8 Stages ***/ + unsigned long pos; + unsigned long posi; + unsigned long pinc; + unsigned long pnext; + unsigned long NSameU; + unsigned long Uinc; + unsigned long Uinc2; + unsigned long Uinc4; + unsigned long DiffUCnt; + unsigned long SameUCnt; + unsigned long U2toU3; -float *pstrt; -float *p0r, *p1r, *p2r, *p3r; -float *u0r, *u0i, *u1r, *u1i, *u2r, *u2i; + double *pstrt; + double *p0r, *p1r, *p2r, *p3r; + double *u0r, *u0i, *u1r, *u1i, *u2r, *u2i; -float w0r, w0i, w1r, w1i, w2r, w2i, w3r, w3i; -float f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; -float f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; -float t0r, t0i, t1r, t1i; -const float Two = 2.0; + double w0r, w0i, w1r, w1i, w2r, w2i, w3r, w3i; + double f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + double f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; + double t0r, t0i, t1r, t1i; + const double Two = 2.0; -pinc = NDiffU * 2; // 2 floats per complex -pnext = pinc * 8; -pos = pinc * 4; -posi = pos + 1; -NSameU = POW2(M) / 8 /NDiffU; // 8 pts per butterfly -Uinc = NSameU * Ustride; -Uinc2 = Uinc * 2; -Uinc4 = Uinc * 4; -U2toU3 = (POW2(M) / 8)*Ustride; -for (; StageCnt > 0 ; StageCnt--){ + pinc = NDiffU * 2; // 2 doubles per complex + pnext = pinc * 8; + pos = pinc * 4; + posi = pos + 1; + NSameU = POW2(M) / 8 /NDiffU; // 8 pts per butterfly + Uinc = NSameU * Ustride; + Uinc2 = Uinc * 2; + Uinc4 = Uinc * 4; + U2toU3 = (POW2(M) / 8)*Ustride; + for (; StageCnt > 0 ; StageCnt--) { - u0r = &Utbl[0]; - u0i = &Utbl[POW2(M-2)*Ustride]; - u1r = u0r; - u1i = u0i; - u2r = u0r; - u2i = u0i; + u0r = &Utbl[0]; + u0i = &Utbl[POW2(M-2)*Ustride]; + u1r = u0r; + u1i = u0i; + u2r = u0r; + u2i = u0i; - w0r = *u0r; - w0i = *u0i; - w1r = *u1r; - w1i = *u1i; - w2r = *u2r; - w2i = *u2i; - w3r = *(u2r+U2toU3); - w3i = *(u2i-U2toU3); + w0r = *u0r; + w0i = *u0i; + w1r = *u1r; + w1i = *u1i; + w2r = *u2r; + w2i = *u2i; + w3r = *(u2r+U2toU3); + w3i = *(u2i-U2toU3); - pstrt = ioptr; + pstrt = ioptr; - p0r = pstrt; - p1r = pstrt+pinc; - p2r = p1r+pinc; - p3r = p2r+pinc; + p0r = pstrt; + p1r = pstrt+pinc; + p2r = p1r+pinc; + p3r = p2r+pinc; - /* Butterflys */ - /* - f0 - - t0 - - f0 - - f0 - f1 - w0- f1 - - f1 - - f1 - f2 - - f2 - w1- f2 - - f4 - f3 - w0- t1 - iw1- f3 - - f5 + /* Butterflys */ + /* + f0 - - t0 - - f0 - - f0 + f1 - w0- f1 - - f1 - - f1 + f2 - - f2 - w1- f2 - - f4 + f3 - w0- t1 - iw1- f3 - - f5 - f4 - - t0 - - f4 - w2- t0 - f5 - w0- f5 - - f5 - w3- t1 - f6 - - f6 - w1- f6 - iw2- f6 - f7 - w0- t1 - iw1- f7 - iw3- f7 - */ + f4 - - t0 - - f4 - w2- t0 + f5 - w0- f5 - - f5 - w3- t1 + f6 - - f6 - w1- f6 - iw2- f6 + f7 - w0- t1 - iw1- f7 - iw3- f7 + */ - for (DiffUCnt = NDiffU; DiffUCnt > 0 ; DiffUCnt--){ - f0r = *p0r; - f0i = *(p0r + 1); - f1r = *p1r; - f1i = *(p1r + 1); - for (SameUCnt = NSameU-1; SameUCnt > 0 ; SameUCnt--){ - f2r = *p2r; - f2i = *(p2r + 1); - f3r = *p3r; - f3i = *(p3r + 1); + for (DiffUCnt = NDiffU; DiffUCnt > 0 ; DiffUCnt--) { + f0r = *p0r; + f0i = *(p0r + 1); + f1r = *p1r; + f1i = *(p1r + 1); + for (SameUCnt = NSameU-1; SameUCnt > 0 ; SameUCnt--) { + f2r = *p2r; + f2i = *(p2r + 1); + f3r = *p3r; + f3i = *(p3r + 1); - t0r = f0r + f1r * w0r + f1i * w0i; - t0i = f0i - f1r * w0i + f1i * w0r; - f1r = f0r * Two - t0r; - f1i = f0i * Two - t0i; + t0r = f0r + f1r * w0r + f1i * w0i; + t0i = f0i - f1r * w0i + f1i * w0r; + f1r = f0r * Two - t0r; + f1i = f0i * Two - t0i; - f4r = *(p0r + pos); - f4i = *(p0r + posi); - f5r = *(p1r + pos); - f5i = *(p1r + posi); + f4r = *(p0r + pos); + f4i = *(p0r + posi); + f5r = *(p1r + pos); + f5i = *(p1r + posi); - f6r = *(p2r + pos); - f6i = *(p2r + posi); - f7r = *(p3r + pos); - f7i = *(p3r + posi); + f6r = *(p2r + pos); + f6i = *(p2r + posi); + f7r = *(p3r + pos); + f7i = *(p3r + posi); - t1r = f2r - f3r * w0r - f3i * w0i; - t1i = f2i + f3r * w0i - f3i * w0r; - f2r = f2r * Two - t1r; - f2i = f2i * Two - t1i; + t1r = f2r - f3r * w0r - f3i * w0i; + t1i = f2i + f3r * w0i - f3i * w0r; + f2r = f2r * Two - t1r; + f2i = f2i * Two - t1i; - f0r = t0r + f2r * w1r + f2i * w1i; - f0i = t0i - f2r * w1i + f2i * w1r; - f2r = t0r * Two - f0r; - f2i = t0i * Two - f0i; + f0r = t0r + f2r * w1r + f2i * w1i; + f0i = t0i - f2r * w1i + f2i * w1r; + f2r = t0r * Two - f0r; + f2i = t0i * Two - f0i; - f3r = f1r + t1r * w1i - t1i * w1r; - f3i = f1i + t1r * w1r + t1i * w1i; - f1r = f1r * Two - f3r; - f1i = f1i * Two - f3i; + f3r = f1r + t1r * w1i - t1i * w1r; + f3i = f1i + t1r * w1r + t1i * w1i; + f1r = f1r * Two - f3r; + f1i = f1i * Two - f3i; - t0r = f4r + f5r * w0r + f5i * w0i; - t0i = f4i - f5r * w0i + f5i * w0r; - f5r = f4r * Two - t0r; - f5i = f4i * Two - t0i; + t0r = f4r + f5r * w0r + f5i * w0i; + t0i = f4i - f5r * w0i + f5i * w0r; + f5r = f4r * Two - t0r; + f5i = f4i * Two - t0i; - t1r = f6r - f7r * w0r - f7i * w0i; - t1i = f6i + f7r * w0i - f7i * w0r; - f6r = f6r * Two - t1r; - f6i = f6i * Two - t1i; + t1r = f6r - f7r * w0r - f7i * w0i; + t1i = f6i + f7r * w0i - f7i * w0r; + f6r = f6r * Two - t1r; + f6i = f6i * Two - t1i; - f4r = t0r + f6r * w1r + f6i * w1i; - f4i = t0i - f6r * w1i + f6i * w1r; - f6r = t0r * Two - f4r; - f6i = t0i * Two - f4i; + f4r = t0r + f6r * w1r + f6i * w1i; + f4i = t0i - f6r * w1i + f6i * w1r; + f6r = t0r * Two - f4r; + f6i = t0i * Two - f4i; - f7r = f5r + t1r * w1i - t1i * w1r; - f7i = f5i + t1r * w1r + t1i * w1i; - f5r = f5r * Two - f7r; - f5i = f5i * Two - f7i; + f7r = f5r + t1r * w1i - t1i * w1r; + f7i = f5i + t1r * w1r + t1i * w1i; + f5r = f5r * Two - f7r; + f5i = f5i * Two - f7i; - t0r = f0r - f4r * w2r - f4i * w2i; - t0i = f0i + f4r * w2i - f4i * w2r; - f0r = f0r * Two - t0r; - f0i = f0i * Two - t0i; + t0r = f0r - f4r * w2r - f4i * w2i; + t0i = f0i + f4r * w2i - f4i * w2r; + f0r = f0r * Two - t0r; + f0i = f0i * Two - t0i; - t1r = f1r - f5r * w3r - f5i * w3i; - t1i = f1i + f5r * w3i - f5i * w3r; - f1r = f1r * Two - t1r; - f1i = f1i * Two - t1i; + t1r = f1r - f5r * w3r - f5i * w3i; + t1i = f1i + f5r * w3i - f5i * w3r; + f1r = f1r * Two - t1r; + f1i = f1i * Two - t1i; - *(p0r + pos) = t0r; - *(p1r + pos) = t1r; - *(p0r + posi) = t0i; - *(p1r + posi) = t1i; - *p0r = f0r; - *p1r = f1r; - *(p0r + 1) = f0i; - *(p1r + 1) = f1i; + *(p0r + pos) = t0r; + *(p1r + pos) = t1r; + *(p0r + posi) = t0i; + *(p1r + posi) = t1i; + *p0r = f0r; + *p1r = f1r; + *(p0r + 1) = f0i; + *(p1r + 1) = f1i; - p0r += pnext; - f0r = *p0r; - f0i = *(p0r + 1); + p0r += pnext; + f0r = *p0r; + f0i = *(p0r + 1); - p1r += pnext; + p1r += pnext; - f1r = *p1r; - f1i = *(p1r + 1); + f1r = *p1r; + f1i = *(p1r + 1); - f4r = f2r - f6r * w2i + f6i * w2r; - f4i = f2i - f6r * w2r - f6i * w2i; - f6r = f2r * Two - f4r; - f6i = f2i * Two - f4i; + f4r = f2r - f6r * w2i + f6i * w2r; + f4i = f2i - f6r * w2r - f6i * w2i; + f6r = f2r * Two - f4r; + f6i = f2i * Two - f4i; - f5r = f3r - f7r * w3i + f7i * w3r; - f5i = f3i - f7r * w3r - f7i * w3i; - f7r = f3r * Two - f5r; - f7i = f3i * Two - f5i; + f5r = f3r - f7r * w3i + f7i * w3r; + f5i = f3i - f7r * w3r - f7i * w3i; + f7r = f3r * Two - f5r; + f7i = f3i * Two - f5i; - *p2r = f4r; - *p3r = f5r; - *(p2r + 1) = f4i; - *(p3r + 1) = f5i; - *(p2r + pos) = f6r; - *(p3r + pos) = f7r; - *(p2r + posi) = f6i; - *(p3r + posi) = f7i; + *p2r = f4r; + *p3r = f5r; + *(p2r + 1) = f4i; + *(p3r + 1) = f5i; + *(p2r + pos) = f6r; + *(p3r + pos) = f7r; + *(p2r + posi) = f6i; + *(p3r + posi) = f7i; - p2r += pnext; - p3r += pnext; + p2r += pnext; + p3r += pnext; - } - - f2r = *p2r; - f2i = *(p2r + 1); - f3r = *p3r; - f3i = *(p3r + 1); + } - t0r = f0r + f1r * w0r + f1i * w0i; - t0i = f0i - f1r * w0i + f1i * w0r; - f1r = f0r * Two - t0r; - f1i = f0i * Two - t0i; + f2r = *p2r; + f2i = *(p2r + 1); + f3r = *p3r; + f3i = *(p3r + 1); - f4r = *(p0r + pos); - f4i = *(p0r + posi); - f5r = *(p1r + pos); - f5i = *(p1r + posi); + t0r = f0r + f1r * w0r + f1i * w0i; + t0i = f0i - f1r * w0i + f1i * w0r; + f1r = f0r * Two - t0r; + f1i = f0i * Two - t0i; - f6r = *(p2r + pos); - f6i = *(p2r + posi); - f7r = *(p3r + pos); - f7i = *(p3r + posi); + f4r = *(p0r + pos); + f4i = *(p0r + posi); + f5r = *(p1r + pos); + f5i = *(p1r + posi); - t1r = f2r - f3r * w0r - f3i * w0i; - t1i = f2i + f3r * w0i - f3i * w0r; - f2r = f2r * Two - t1r; - f2i = f2i * Two - t1i; + f6r = *(p2r + pos); + f6i = *(p2r + posi); + f7r = *(p3r + pos); + f7i = *(p3r + posi); - f0r = t0r + f2r * w1r + f2i * w1i; - f0i = t0i - f2r * w1i + f2i * w1r; - f2r = t0r * Two - f0r; - f2i = t0i * Two - f0i; + t1r = f2r - f3r * w0r - f3i * w0i; + t1i = f2i + f3r * w0i - f3i * w0r; + f2r = f2r * Two - t1r; + f2i = f2i * Two - t1i; - f3r = f1r + t1r * w1i - t1i * w1r; - f3i = f1i + t1r * w1r + t1i * w1i; - f1r = f1r * Two - f3r; - f1i = f1i * Two - f3i; + f0r = t0r + f2r * w1r + f2i * w1i; + f0i = t0i - f2r * w1i + f2i * w1r; + f2r = t0r * Two - f0r; + f2i = t0i * Two - f0i; - if (DiffUCnt == NDiffU/2) - Uinc4 = -Uinc4; + f3r = f1r + t1r * w1i - t1i * w1r; + f3i = f1i + t1r * w1r + t1i * w1i; + f1r = f1r * Two - f3r; + f1i = f1i * Two - f3i; - u0r += Uinc4; - u0i -= Uinc4; - u1r += Uinc2; - u1i -= Uinc2; - u2r += Uinc; - u2i -= Uinc; + if (DiffUCnt == NDiffU/2) + Uinc4 = -Uinc4; - pstrt += 2; + u0r += Uinc4; + u0i -= Uinc4; + u1r += Uinc2; + u1i -= Uinc2; + u2r += Uinc; + u2i -= Uinc; - t0r = f4r + f5r * w0r + f5i * w0i; - t0i = f4i - f5r * w0i + f5i * w0r; - f5r = f4r * Two - t0r; - f5i = f4i * Two - t0i; + pstrt += 2; - t1r = f6r - f7r * w0r - f7i * w0i; - t1i = f6i + f7r * w0i - f7i * w0r; - f6r = f6r * Two - t1r; - f6i = f6i * Two - t1i; + t0r = f4r + f5r * w0r + f5i * w0i; + t0i = f4i - f5r * w0i + f5i * w0r; + f5r = f4r * Two - t0r; + f5i = f4i * Two - t0i; - f4r = t0r + f6r * w1r + f6i * w1i; - f4i = t0i - f6r * w1i + f6i * w1r; - f6r = t0r * Two - f4r; - f6i = t0i * Two - f4i; + t1r = f6r - f7r * w0r - f7i * w0i; + t1i = f6i + f7r * w0i - f7i * w0r; + f6r = f6r * Two - t1r; + f6i = f6i * Two - t1i; - f7r = f5r + t1r * w1i - t1i * w1r; - f7i = f5i + t1r * w1r + t1i * w1i; - f5r = f5r * Two - f7r; - f5i = f5i * Two - f7i; + f4r = t0r + f6r * w1r + f6i * w1i; + f4i = t0i - f6r * w1i + f6i * w1r; + f6r = t0r * Two - f4r; + f6i = t0i * Two - f4i; - w0r = *u0r; - w0i = *u0i; - w1r = *u1r; - w1i = *u1i; + f7r = f5r + t1r * w1i - t1i * w1r; + f7i = f5i + t1r * w1r + t1i * w1i; + f5r = f5r * Two - f7r; + f5i = f5i * Two - f7i; - if (DiffUCnt <= NDiffU/2) - w0r = -w0r; + w0r = *u0r; + w0i = *u0i; + w1r = *u1r; + w1i = *u1i; - t0r = f0r - f4r * w2r - f4i * w2i; - t0i = f0i + f4r * w2i - f4i * w2r; - f0r = f0r * Two - t0r; - f0i = f0i * Two - t0i; + if (DiffUCnt <= NDiffU/2) + w0r = -w0r; - f4r = f2r - f6r * w2i + f6i * w2r; - f4i = f2i - f6r * w2r - f6i * w2i; - f6r = f2r * Two - f4r; - f6i = f2i * Two - f4i; + t0r = f0r - f4r * w2r - f4i * w2i; + t0i = f0i + f4r * w2i - f4i * w2r; + f0r = f0r * Two - t0r; + f0i = f0i * Two - t0i; - *(p0r + pos) = t0r; - *p2r = f4r; - *(p0r + posi) = t0i; - *(p2r + 1) = f4i; - w2r = *u2r; - w2i = *u2i; - *p0r = f0r; - *(p2r + pos) = f6r; - *(p0r + 1) = f0i; - *(p2r + posi) = f6i; + f4r = f2r - f6r * w2i + f6i * w2r; + f4i = f2i - f6r * w2r - f6i * w2i; + f6r = f2r * Two - f4r; + f6i = f2i * Two - f4i; - p0r = pstrt; - p2r = pstrt + pinc + pinc; + *(p0r + pos) = t0r; + *p2r = f4r; + *(p0r + posi) = t0i; + *(p2r + 1) = f4i; + w2r = *u2r; + w2i = *u2i; + *p0r = f0r; + *(p2r + pos) = f6r; + *(p0r + 1) = f0i; + *(p2r + posi) = f6i; - t1r = f1r - f5r * w3r - f5i * w3i; - t1i = f1i + f5r * w3i - f5i * w3r; - f1r = f1r * Two - t1r; - f1i = f1i * Two - t1i; + p0r = pstrt; + p2r = pstrt + pinc + pinc; - f5r = f3r - f7r * w3i + f7i * w3r; - f5i = f3i - f7r * w3r - f7i * w3i; - f7r = f3r * Two - f5r; - f7i = f3i * Two - f5i; + t1r = f1r - f5r * w3r - f5i * w3i; + t1i = f1i + f5r * w3i - f5i * w3r; + f1r = f1r * Two - t1r; + f1i = f1i * Two - t1i; - *(p1r + pos) = t1r; - *p3r = f5r; - *(p1r + posi) = t1i; - *(p3r + 1) = f5i; - w3r = *(u2r+U2toU3); - w3i = *(u2i-U2toU3); - *p1r = f1r; - *(p3r + pos) = f7r; - *(p1r + 1) = f1i; - *(p3r + posi) = f7i; + f5r = f3r - f7r * w3i + f7i * w3r; + f5i = f3i - f7r * w3r - f7i * w3i; + f7r = f3r * Two - f5r; + f7i = f3i * Two - f5i; - p1r = pstrt + pinc; - p3r = p2r + pinc; - } - NSameU /= 8; - Uinc /= 8; - Uinc2 /= 8; - Uinc4 = Uinc * 4; - NDiffU *= 8; - pinc *= 8; - pnext *= 8; - pos *= 8; - posi = pos + 1; -} + *(p1r + pos) = t1r; + *p3r = f5r; + *(p1r + posi) = t1i; + *(p3r + 1) = f5i; + w3r = *(u2r+U2toU3); + w3i = *(u2i-U2toU3); + *p1r = f1r; + *(p3r + pos) = f7r; + *(p1r + 1) = f1i; + *(p3r + posi) = f7i; + + p1r = pstrt + pinc; + p3r = p2r + pinc; + } + NSameU /= 8; + Uinc /= 8; + Uinc2 /= 8; + Uinc4 = Uinc * 4; + NDiffU *= 8; + pinc *= 8; + pnext *= 8; + pos *= 8; + posi = pos + 1; + } } -void fftrecurs(float *ioptr, long M, float *Utbl, long Ustride, long NDiffU, long StageCnt); -void fftrecurs(float *ioptr, long M, float *Utbl, long Ustride, long NDiffU, long StageCnt){ -/* recursive bfstages calls to maximize on chip cache efficiency */ -long i1; -if (M <= MCACHE) // fits on chip ? - bfstages(ioptr, M, Utbl, Ustride, NDiffU, StageCnt); /* RADIX 8 Stages */ -else{ - for (i1=0; i1<8; i1++){ - fftrecurs(&ioptr[i1*POW2(M-3)*2], M-3, Utbl, 8*Ustride, NDiffU, StageCnt-1); /* RADIX 8 Stages */ - } - bfstages(ioptr, M, Utbl, Ustride, POW2(M-3), 1); /* RADIX 8 Stage */ -} +void fftrecurs(double *ioptr, long M, double *Utbl, long Ustride, long NDiffU, long StageCnt); +void fftrecurs(double *ioptr, long M, double *Utbl, long Ustride, long NDiffU, long StageCnt) +{ + /* recursive bfstages calls to maximize on chip cache efficiency */ + long i1; + if (M <= MCACHE) // fits on chip ? + bfstages(ioptr, M, Utbl, Ustride, NDiffU, StageCnt); /* RADIX 8 Stages */ + else { + for (i1=0; i1<8; i1++) { + fftrecurs(&ioptr[i1*POW2(M-3)*2], M-3, Utbl, 8*Ustride, NDiffU, StageCnt-1); /* RADIX 8 Stages */ + } + bfstages(ioptr, M, Utbl, Ustride, POW2(M-3), 1); /* RADIX 8 Stage */ + } } -void ffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow){ -/* Compute in-place complex fft on the rows of the input array */ -/* INPUTS */ -/* *ioptr = input data array */ -/* M = log2 of fft size (ex M=10 for 1024 point fft) */ -/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */ -/* *Utbl = cosine table */ -/* *BRLow = bit reversed counter table */ -/* OUTPUTS */ -/* *ioptr = output data array */ +void ffts1(double *ioptr, long M, long Rows, double *Utbl, short *BRLow) +{ + /* Compute in-place complex fft on the rows of the input array */ + /* INPUTS */ + /* *ioptr = input data array */ + /* M = log2 of fft size (ex M=10 for 1024 point fft) */ + /* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */ + /* *Utbl = cosine table */ + /* *BRLow = bit reversed counter table */ + /* OUTPUTS */ + /* *ioptr = output data array */ -long StageCnt; -long NDiffU; + long StageCnt; + long NDiffU; -switch (M){ -case 0: - break; -case 1: - for (;Rows>0;Rows--){ - fft2pt(ioptr); /* a 2 pt fft */ - ioptr += 2*POW2(M); - } - break; -case 2: - for (;Rows>0;Rows--){ - fft4pt(ioptr); /* a 4 pt fft */ - ioptr += 2*POW2(M); - } - break; -case 3: - for (;Rows>0;Rows--){ - fft8pt(ioptr); /* an 8 pt fft */ - ioptr += 2*POW2(M); - } - break; -default: - for (;Rows>0;Rows--){ + switch (M) { + case 0: + break; + case 1: + for (; Rows>0; Rows--) { + fft2pt(ioptr); /* a 2 pt fft */ + ioptr += 2*POW2(M); + } + break; + case 2: + for (; Rows>0; Rows--) { + fft4pt(ioptr); /* a 4 pt fft */ + ioptr += 2*POW2(M); + } + break; + case 3: + for (; Rows>0; Rows--) { + fft8pt(ioptr); /* an 8 pt fft */ + ioptr += 2*POW2(M); + } + break; + default: + for (; Rows>0; Rows--) { - bitrevR2(ioptr, M, BRLow); /* bit reverse and first radix 2 stage */ - - StageCnt = (M-1) / 3; // number of radix 8 stages - NDiffU = 2; // one radix 2 stage already complete + bitrevR2(ioptr, M, BRLow); /* bit reverse and first radix 2 stage */ - if ( (M-1-(StageCnt * 3)) == 1 ){ - bfR2(ioptr, M, NDiffU); /* 1 radix 2 stage */ - NDiffU *= 2; - } + StageCnt = (M-1) / 3; // number of radix 8 stages + NDiffU = 2; // one radix 2 stage already complete - if ( (M-1-(StageCnt * 3)) == 2 ){ - bfR4(ioptr, M, NDiffU); /* 1 radix 4 stage */ - NDiffU *= 4; - } + if ( (M-1-(StageCnt * 3)) == 1 ) { + bfR2(ioptr, M, NDiffU); /* 1 radix 2 stage */ + NDiffU *= 2; + } - if (M <= MCACHE) - bfstages(ioptr, M, Utbl, 1, NDiffU, StageCnt); /* RADIX 8 Stages */ + if ( (M-1-(StageCnt * 3)) == 2 ) { + bfR4(ioptr, M, NDiffU); /* 1 radix 4 stage */ + NDiffU *= 4; + } - else{ - fftrecurs(ioptr, M, Utbl, 1, NDiffU, StageCnt); /* RADIX 8 Stages */ - } + if (M <= MCACHE) + bfstages(ioptr, M, Utbl, 1, NDiffU, StageCnt); /* RADIX 8 Stages */ - ioptr += 2*POW2(M); - } -} + else { + fftrecurs(ioptr, M, Utbl, 1, NDiffU, StageCnt); /* RADIX 8 Stages */ + } + + ioptr += 2*POW2(M); + } + } } /************************************************ parts of iffts1 *************************************************/ -static inline void scbitrevR2(float *ioptr, long M, short *BRLow, float scale); -static inline void scbitrevR2(float *ioptr, long M, short *BRLow, float scale){ -/*** scaled bit reverse and first radix 2 stage forward or inverse fft ***/ -float f0r; -float f0i; -float f1r; -float f1i; -float f2r; -float f2i; -float f3r; -float f3i; -float f4r; -float f4i; -float f5r; -float f5i; -float f6r; -float f6i; -float f7r; -float f7i; -float t0r; -float t0i; -float t1r; -float t1i; -float *p0r; -float *p1r; -float *IOP; -float *iolimit; -long Colstart; -long iCol; -unsigned long posA; -unsigned long posAi; -unsigned long posB; -unsigned long posBi; +static inline void scbitrevR2(double *ioptr, long M, short *BRLow, double scale); +static inline void scbitrevR2(double *ioptr, long M, short *BRLow, double scale) +{ + /*** scaled bit reverse and first radix 2 stage forward or inverse fft ***/ + double f0r; + double f0i; + double f1r; + double f1i; + double f2r; + double f2i; + double f3r; + double f3i; + double f4r; + double f4i; + double f5r; + double f5i; + double f6r; + double f6i; + double f7r; + double f7i; + double t0r; + double t0i; + double t1r; + double t1i; + double *p0r; + double *p1r; + double *IOP; + double *iolimit; + long Colstart; + long iCol; + unsigned long posA; + unsigned long posAi; + unsigned long posB; + unsigned long posBi; -const unsigned long Nrems2 = POW2((M+3)/2); -const unsigned long Nroot_1_ColInc = POW2(M)-Nrems2; -const unsigned long Nroot_1 = POW2(M/2-1)-1; -const unsigned long ColstartShift = (M+1)/2 +1; -posA = POW2(M); // 1/2 of POW2(M) complexes -posAi = posA + 1; -posB = posA + 2; -posBi = posB + 1; + const unsigned long Nrems2 = POW2((M+3)/2); + const unsigned long Nroot_1_ColInc = POW2(M)-Nrems2; + const unsigned long Nroot_1 = POW2(M/2-1)-1; + const unsigned long ColstartShift = (M+1)/2 +1; + posA = POW2(M); // 1/2 of POW2(M) complexes + posAi = posA + 1; + posB = posA + 2; + posBi = posB + 1; -iolimit = ioptr + Nrems2; -for (; ioptr < iolimit; ioptr += POW2(M/2+1)){ - for (Colstart = Nroot_1; Colstart >= 0; Colstart--){ - iCol = Nroot_1; - p0r = ioptr+ Nroot_1_ColInc + BRLow[Colstart]*4; - IOP = ioptr + (Colstart << ColstartShift); - p1r = IOP + BRLow[iCol]*4; - f0r = *(p0r); - f0i = *(p0r+1); - f1r = *(p0r+posA); - f1i = *(p0r+posAi); - for (; iCol > Colstart;){ - f2r = *(p0r+2); - f2i = *(p0r+(2+1)); - f3r = *(p0r+posB); - f3i = *(p0r+posBi); - f4r = *(p1r); - f4i = *(p1r+1); - f5r = *(p1r+posA); - f5i = *(p1r+posAi); - f6r = *(p1r+2); - f6i = *(p1r+(2+1)); - f7r = *(p1r+posB); - f7i = *(p1r+posBi); - - t0r = f0r + f1r; - t0i = f0i + f1i; - f1r = f0r - f1r; - f1i = f0i - f1i; - t1r = f2r + f3r; - t1i = f2i + f3i; - f3r = f2r - f3r; - f3i = f2i - f3i; - f0r = f4r + f5r; - f0i = f4i + f5i; - f5r = f4r - f5r; - f5i = f4i - f5i; - f2r = f6r + f7r; - f2i = f6i + f7i; - f7r = f6r - f7r; - f7i = f6i - f7i; + iolimit = ioptr + Nrems2; + for (; ioptr < iolimit; ioptr += POW2(M/2+1)) { + for (Colstart = Nroot_1; Colstart >= 0; Colstart--) { + iCol = Nroot_1; + p0r = ioptr+ Nroot_1_ColInc + BRLow[Colstart]*4; + IOP = ioptr + (Colstart << ColstartShift); + p1r = IOP + BRLow[iCol]*4; + f0r = *(p0r); + f0i = *(p0r+1); + f1r = *(p0r+posA); + f1i = *(p0r+posAi); + for (; iCol > Colstart;) { + f2r = *(p0r+2); + f2i = *(p0r+(2+1)); + f3r = *(p0r+posB); + f3i = *(p0r+posBi); + f4r = *(p1r); + f4i = *(p1r+1); + f5r = *(p1r+posA); + f5i = *(p1r+posAi); + f6r = *(p1r+2); + f6i = *(p1r+(2+1)); + f7r = *(p1r+posB); + f7i = *(p1r+posBi); - *(p1r) = scale*t0r; - *(p1r+1) = scale*t0i; - *(p1r+2) = scale*f1r; - *(p1r+(2+1)) = scale*f1i; - *(p1r+posA) = scale*t1r; - *(p1r+posAi) = scale*t1i; - *(p1r+posB) = scale*f3r; - *(p1r+posBi) = scale*f3i; - *(p0r) = scale*f0r; - *(p0r+1) = scale*f0i; - *(p0r+2) = scale*f5r; - *(p0r+(2+1)) = scale*f5i; - *(p0r+posA) = scale*f2r; - *(p0r+posAi) = scale*f2i; - *(p0r+posB) = scale*f7r; - *(p0r+posBi) = scale*f7i; + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; + t1r = f2r + f3r; + t1i = f2i + f3i; + f3r = f2r - f3r; + f3i = f2i - f3i; + f0r = f4r + f5r; + f0i = f4i + f5i; + f5r = f4r - f5r; + f5i = f4i - f5i; + f2r = f6r + f7r; + f2i = f6i + f7i; + f7r = f6r - f7r; + f7i = f6i - f7i; - p0r -= Nrems2; - f0r = *(p0r); - f0i = *(p0r+1); - f1r = *(p0r+posA); - f1i = *(p0r+posAi); - iCol -= 1; - p1r = IOP + BRLow[iCol]*4; - }; - f2r = *(p0r+2); - f2i = *(p0r+(2+1)); - f3r = *(p0r+posB); - f3i = *(p0r+posBi); + *(p1r) = scale*t0r; + *(p1r+1) = scale*t0i; + *(p1r+2) = scale*f1r; + *(p1r+(2+1)) = scale*f1i; + *(p1r+posA) = scale*t1r; + *(p1r+posAi) = scale*t1i; + *(p1r+posB) = scale*f3r; + *(p1r+posBi) = scale*f3i; + *(p0r) = scale*f0r; + *(p0r+1) = scale*f0i; + *(p0r+2) = scale*f5r; + *(p0r+(2+1)) = scale*f5i; + *(p0r+posA) = scale*f2r; + *(p0r+posAi) = scale*f2i; + *(p0r+posB) = scale*f7r; + *(p0r+posBi) = scale*f7i; - t0r = f0r + f1r; - t0i = f0i + f1i; - f1r = f0r - f1r; - f1i = f0i - f1i; - t1r = f2r + f3r; - t1i = f2i + f3i; - f3r = f2r - f3r; - f3i = f2i - f3i; + p0r -= Nrems2; + f0r = *(p0r); + f0i = *(p0r+1); + f1r = *(p0r+posA); + f1i = *(p0r+posAi); + iCol -= 1; + p1r = IOP + BRLow[iCol]*4; + }; + f2r = *(p0r+2); + f2i = *(p0r+(2+1)); + f3r = *(p0r+posB); + f3i = *(p0r+posBi); - *(p0r) = scale*t0r; - *(p0r+1) = scale*t0i; - *(p0r+2) = scale*f1r; - *(p0r+(2+1)) = scale*f1i; - *(p0r+posA) = scale*t1r; - *(p0r+posAi) = scale*t1i; - *(p0r+posB) = scale*f3r; - *(p0r+posBi) = scale*f3i; + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; + t1r = f2r + f3r; + t1i = f2i + f3i; + f3r = f2r - f3r; + f3i = f2i - f3i; - }; -}; + *(p0r) = scale*t0r; + *(p0r+1) = scale*t0i; + *(p0r+2) = scale*f1r; + *(p0r+(2+1)) = scale*f1i; + *(p0r+posA) = scale*t1r; + *(p0r+posAi) = scale*t1i; + *(p0r+posB) = scale*f3r; + *(p0r+posBi) = scale*f3i; + + }; + }; } -static inline void ifft2pt(float *ioptr, float scale); -static inline void ifft2pt(float *ioptr, float scale){ -/*** RADIX 2 ifft ***/ -float f0r, f0i, f1r, f1i; -float t0r, t0i; +static inline void ifft2pt(double *ioptr, double scale); +static inline void ifft2pt(double *ioptr, double scale) +{ + /*** RADIX 2 ifft ***/ + double f0r, f0i, f1r, f1i; + double t0r, t0i; - /* bit reversed load */ -f0r = ioptr[0]; -f0i = ioptr[1]; -f1r = ioptr[2]; -f1i = ioptr[3]; + /* bit reversed load */ + f0r = ioptr[0]; + f0i = ioptr[1]; + f1r = ioptr[2]; + f1i = ioptr[3]; - /* Butterflys */ - /* - f0 - - t0 - f1 - 1 - f1 - */ + /* Butterflys */ + /* + f0 - - t0 + f1 - 1 - f1 + */ -t0r = f0r + f1r; -t0i = f0i + f1i; -f1r = f0r - f1r; -f1i = f0i - f1i; + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; - /* store result */ -ioptr[0] = scale*t0r; -ioptr[1] = scale*t0i; -ioptr[2] = scale*f1r; -ioptr[3] = scale*f1i; + /* store result */ + ioptr[0] = scale*t0r; + ioptr[1] = scale*t0i; + ioptr[2] = scale*f1r; + ioptr[3] = scale*f1i; } -static inline void ifft4pt(float *ioptr, float scale); -static inline void ifft4pt(float *ioptr, float scale){ -/*** RADIX 4 ifft ***/ -float f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; -float t0r, t0i, t1r, t1i; +static inline void ifft4pt(double *ioptr, double scale); +static inline void ifft4pt(double *ioptr, double scale) +{ + /*** RADIX 4 ifft ***/ + double f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + double t0r, t0i, t1r, t1i; - /* bit reversed load */ -f0r = ioptr[0]; -f0i = ioptr[1]; -f1r = ioptr[4]; -f1i = ioptr[5]; -f2r = ioptr[2]; -f2i = ioptr[3]; -f3r = ioptr[6]; -f3i = ioptr[7]; + /* bit reversed load */ + f0r = ioptr[0]; + f0i = ioptr[1]; + f1r = ioptr[4]; + f1i = ioptr[5]; + f2r = ioptr[2]; + f2i = ioptr[3]; + f3r = ioptr[6]; + f3i = ioptr[7]; - /* Butterflys */ - /* - f0 - - t0 - - f0 - f1 - 1 - f1 - - f1 - f2 - - f2 - 1 - f2 - f3 - 1 - t1 - i - f3 - */ + /* Butterflys */ + /* + f0 - - t0 - - f0 + f1 - 1 - f1 - - f1 + f2 - - f2 - 1 - f2 + f3 - 1 - t1 - i - f3 + */ -t0r = f0r + f1r; -t0i = f0i + f1i; -f1r = f0r - f1r; -f1i = f0i - f1i; + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; -t1r = f2r - f3r; -t1i = f2i - f3i; -f2r = f2r + f3r; -f2i = f2i + f3i; + t1r = f2r - f3r; + t1i = f2i - f3i; + f2r = f2r + f3r; + f2i = f2i + f3i; -f0r = t0r + f2r; -f0i = t0i + f2i; -f2r = t0r - f2r; -f2i = t0i - f2i; + f0r = t0r + f2r; + f0i = t0i + f2i; + f2r = t0r - f2r; + f2i = t0i - f2i; -f3r = f1r + t1i; -f3i = f1i - t1r; -f1r = f1r - t1i; -f1i = f1i + t1r; + f3r = f1r + t1i; + f3i = f1i - t1r; + f1r = f1r - t1i; + f1i = f1i + t1r; - /* store result */ -ioptr[0] = scale*f0r; -ioptr[1] = scale*f0i; -ioptr[2] = scale*f1r; -ioptr[3] = scale*f1i; -ioptr[4] = scale*f2r; -ioptr[5] = scale*f2i; -ioptr[6] = scale*f3r; -ioptr[7] = scale*f3i; + /* store result */ + ioptr[0] = scale*f0r; + ioptr[1] = scale*f0i; + ioptr[2] = scale*f1r; + ioptr[3] = scale*f1i; + ioptr[4] = scale*f2r; + ioptr[5] = scale*f2i; + ioptr[6] = scale*f3r; + ioptr[7] = scale*f3i; } -static inline void ifft8pt(float *ioptr, float scale); -static inline void ifft8pt(float *ioptr, float scale){ -/*** RADIX 8 ifft ***/ -float w0r = 1.0/MYROOT2; /* cos(pi/4) */ -float f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; -float f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; -float t0r, t0i, t1r, t1i; -const float Two = 2.0; +static inline void ifft8pt(double *ioptr, double scale); +static inline void ifft8pt(double *ioptr, double scale) +{ + /*** RADIX 8 ifft ***/ + double w0r = 1.0/MYROOT2; /* cos(pi/4) */ + double f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + double f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; + double t0r, t0i, t1r, t1i; + const double Two = 2.0; - /* bit reversed load */ -f0r = ioptr[0]; -f0i = ioptr[1]; -f1r = ioptr[8]; -f1i = ioptr[9]; -f2r = ioptr[4]; -f2i = ioptr[5]; -f3r = ioptr[12]; -f3i = ioptr[13]; -f4r = ioptr[2]; -f4i = ioptr[3]; -f5r = ioptr[10]; -f5i = ioptr[11]; -f6r = ioptr[6]; -f6i = ioptr[7]; -f7r = ioptr[14]; -f7i = ioptr[15]; + /* bit reversed load */ + f0r = ioptr[0]; + f0i = ioptr[1]; + f1r = ioptr[8]; + f1i = ioptr[9]; + f2r = ioptr[4]; + f2i = ioptr[5]; + f3r = ioptr[12]; + f3i = ioptr[13]; + f4r = ioptr[2]; + f4i = ioptr[3]; + f5r = ioptr[10]; + f5i = ioptr[11]; + f6r = ioptr[6]; + f6i = ioptr[7]; + f7r = ioptr[14]; + f7i = ioptr[15]; - /* Butterflys */ - /* - f0 - - t0 - - f0 - - f0 - f1 - 1 - f1 - - f1 - - f1 - f2 - - f2 - 1 - f2 - - f2 - f3 - 1 - t1 - i - f3 - - f3 - f4 - - t0 - - f4 - 1 - t0 - f5 - 1 - f5 - - f5 - w3 - f4 - f6 - - f6 - 1 - f6 - i - t1 - f7 - 1 - t1 - i - f7 - iw3- f6 - */ + /* Butterflys */ + /* + f0 - - t0 - - f0 - - f0 + f1 - 1 - f1 - - f1 - - f1 + f2 - - f2 - 1 - f2 - - f2 + f3 - 1 - t1 - i - f3 - - f3 + f4 - - t0 - - f4 - 1 - t0 + f5 - 1 - f5 - - f5 - w3 - f4 + f6 - - f6 - 1 - f6 - i - t1 + f7 - 1 - t1 - i - f7 - iw3- f6 + */ -t0r = f0r + f1r; -t0i = f0i + f1i; -f1r = f0r - f1r; -f1i = f0i - f1i; + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; -t1r = f2r - f3r; -t1i = f2i - f3i; -f2r = f2r + f3r; -f2i = f2i + f3i; + t1r = f2r - f3r; + t1i = f2i - f3i; + f2r = f2r + f3r; + f2i = f2i + f3i; -f0r = t0r + f2r; -f0i = t0i + f2i; -f2r = t0r - f2r; -f2i = t0i - f2i; + f0r = t0r + f2r; + f0i = t0i + f2i; + f2r = t0r - f2r; + f2i = t0i - f2i; -f3r = f1r + t1i; -f3i = f1i - t1r; -f1r = f1r - t1i; -f1i = f1i + t1r; + f3r = f1r + t1i; + f3i = f1i - t1r; + f1r = f1r - t1i; + f1i = f1i + t1r; -t0r = f4r + f5r; -t0i = f4i + f5i; -f5r = f4r - f5r; -f5i = f4i - f5i; + t0r = f4r + f5r; + t0i = f4i + f5i; + f5r = f4r - f5r; + f5i = f4i - f5i; -t1r = f6r - f7r; -t1i = f6i - f7i; -f6r = f6r + f7r; -f6i = f6i + f7i; + t1r = f6r - f7r; + t1i = f6i - f7i; + f6r = f6r + f7r; + f6i = f6i + f7i; -f4r = t0r + f6r; -f4i = t0i + f6i; -f6r = t0r - f6r; -f6i = t0i - f6i; + f4r = t0r + f6r; + f4i = t0i + f6i; + f6r = t0r - f6r; + f6i = t0i - f6i; -f7r = f5r + t1i; -f7i = f5i - t1r; -f5r = f5r - t1i; -f5i = f5i + t1r; + f7r = f5r + t1i; + f7i = f5i - t1r; + f5r = f5r - t1i; + f5i = f5i + t1r; -t0r = f0r - f4r; -t0i = f0i - f4i; -f0r = f0r + f4r; -f0i = f0i + f4i; + t0r = f0r - f4r; + t0i = f0i - f4i; + f0r = f0r + f4r; + f0i = f0i + f4i; -t1r = f2r + f6i; -t1i = f2i - f6r; -f2r = f2r - f6i; -f2i = f2i + f6r; + t1r = f2r + f6i; + t1i = f2i - f6r; + f2r = f2r - f6i; + f2i = f2i + f6r; -f4r = f1r - f5r * w0r + f5i * w0r; -f4i = f1i - f5r * w0r - f5i * w0r; -f1r = f1r * Two - f4r; -f1i = f1i * Two - f4i; + f4r = f1r - f5r * w0r + f5i * w0r; + f4i = f1i - f5r * w0r - f5i * w0r; + f1r = f1r * Two - f4r; + f1i = f1i * Two - f4i; -f6r = f3r + f7r * w0r + f7i * w0r; -f6i = f3i - f7r * w0r + f7i * w0r; -f3r = f3r * Two - f6r; -f3i = f3i * Two - f6i; + f6r = f3r + f7r * w0r + f7i * w0r; + f6i = f3i - f7r * w0r + f7i * w0r; + f3r = f3r * Two - f6r; + f3i = f3i * Two - f6i; - /* store result */ -ioptr[0] = scale*f0r; -ioptr[1] = scale*f0i; -ioptr[2] = scale*f1r; -ioptr[3] = scale*f1i; -ioptr[4] = scale*f2r; -ioptr[5] = scale*f2i; -ioptr[6] = scale*f3r; -ioptr[7] = scale*f3i; -ioptr[8] = scale*t0r; -ioptr[9] = scale*t0i; -ioptr[10] = scale*f4r; -ioptr[11] = scale*f4i; -ioptr[12] = scale*t1r; -ioptr[13] = scale*t1i; -ioptr[14] = scale*f6r; -ioptr[15] = scale*f6i; + /* store result */ + ioptr[0] = scale*f0r; + ioptr[1] = scale*f0i; + ioptr[2] = scale*f1r; + ioptr[3] = scale*f1i; + ioptr[4] = scale*f2r; + ioptr[5] = scale*f2i; + ioptr[6] = scale*f3r; + ioptr[7] = scale*f3i; + ioptr[8] = scale*t0r; + ioptr[9] = scale*t0i; + ioptr[10] = scale*f4r; + ioptr[11] = scale*f4i; + ioptr[12] = scale*t1r; + ioptr[13] = scale*t1i; + ioptr[14] = scale*f6r; + ioptr[15] = scale*f6i; } -static inline void ibfR2(float *ioptr, long M, long NDiffU); -static inline void ibfR2(float *ioptr, long M, long NDiffU){ -/*** 2nd radix 2 stage ***/ -unsigned long pos; -unsigned long posi; -unsigned long pinc; -unsigned long pnext; -unsigned long NSameU; -unsigned long SameUCnt; +static inline void ibfR2(double *ioptr, long M, long NDiffU); +static inline void ibfR2(double *ioptr, long M, long NDiffU) +{ + /*** 2nd radix 2 stage ***/ + unsigned long pos; + unsigned long posi; + unsigned long pinc; + unsigned long pnext; + unsigned long NSameU; + unsigned long SameUCnt; -float *pstrt; -float *p0r, *p1r, *p2r, *p3r; + double *pstrt; + double *p0r, *p1r, *p2r, *p3r; -float f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; -float f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; -/* const float Two = 2.0; */ + double f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + double f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; + /* const double Two = 2.0; */ -pinc = NDiffU * 2; // 2 floats per complex -pnext = pinc * 4; -pos = 2; -posi = pos+1; -NSameU = POW2(M) / 4 /NDiffU; // 4 Us at a time -pstrt = ioptr; -p0r = pstrt; -p1r = pstrt+pinc; -p2r = p1r+pinc; -p3r = p2r+pinc; + pinc = NDiffU * 2; // 2 doubles per complex + pnext = pinc * 4; + pos = 2; + posi = pos+1; + NSameU = POW2(M) / 4 /NDiffU; // 4 Us at a time + pstrt = ioptr; + p0r = pstrt; + p1r = pstrt+pinc; + p2r = p1r+pinc; + p3r = p2r+pinc; - /* Butterflys */ - /* - f0 - - f4 - f1 - 1 - f5 - f2 - - f6 - f3 - 1 - f7 - */ - /* Butterflys */ - /* - f0 - - f4 - f1 - 1 - f5 - f2 - - f6 - f3 - 1 - f7 - */ + /* Butterflys */ + /* + f0 - - f4 + f1 - 1 - f5 + f2 - - f6 + f3 - 1 - f7 + */ + /* Butterflys */ + /* + f0 - - f4 + f1 - 1 - f5 + f2 - - f6 + f3 - 1 - f7 + */ -for (SameUCnt = NSameU; SameUCnt > 0 ; SameUCnt--){ + for (SameUCnt = NSameU; SameUCnt > 0 ; SameUCnt--) { - f0r = *p0r; - f1r = *p1r; - f0i = *(p0r + 1); - f1i = *(p1r + 1); - f2r = *p2r; - f3r = *p3r; - f2i = *(p2r + 1); - f3i = *(p3r + 1); + f0r = *p0r; + f1r = *p1r; + f0i = *(p0r + 1); + f1i = *(p1r + 1); + f2r = *p2r; + f3r = *p3r; + f2i = *(p2r + 1); + f3i = *(p3r + 1); - f4r = f0r + f1r; - f4i = f0i + f1i; - f5r = f0r - f1r; - f5i = f0i - f1i; + f4r = f0r + f1r; + f4i = f0i + f1i; + f5r = f0r - f1r; + f5i = f0i - f1i; - f6r = f2r + f3r; - f6i = f2i + f3i; - f7r = f2r - f3r; - f7i = f2i - f3i; + f6r = f2r + f3r; + f6i = f2i + f3i; + f7r = f2r - f3r; + f7i = f2i - f3i; - *p0r = f4r; - *(p0r + 1) = f4i; - *p1r = f5r; - *(p1r + 1) = f5i; - *p2r = f6r; - *(p2r + 1) = f6i; - *p3r = f7r; - *(p3r + 1) = f7i; + *p0r = f4r; + *(p0r + 1) = f4i; + *p1r = f5r; + *(p1r + 1) = f5i; + *p2r = f6r; + *(p2r + 1) = f6i; + *p3r = f7r; + *(p3r + 1) = f7i; - f0r = *(p0r + pos); - f1i = *(p1r + posi); - f0i = *(p0r + posi); - f1r = *(p1r + pos); - f2r = *(p2r + pos); - f3i = *(p3r + posi); - f2i = *(p2r + posi); - f3r = *(p3r + pos); + f0r = *(p0r + pos); + f1i = *(p1r + posi); + f0i = *(p0r + posi); + f1r = *(p1r + pos); + f2r = *(p2r + pos); + f3i = *(p3r + posi); + f2i = *(p2r + posi); + f3r = *(p3r + pos); - f4r = f0r - f1i; - f4i = f0i + f1r; - f5r = f0r + f1i; - f5i = f0i - f1r; + f4r = f0r - f1i; + f4i = f0i + f1r; + f5r = f0r + f1i; + f5i = f0i - f1r; - f6r = f2r - f3i; - f6i = f2i + f3r; - f7r = f2r + f3i; - f7i = f2i - f3r; + f6r = f2r - f3i; + f6i = f2i + f3r; + f7r = f2r + f3i; + f7i = f2i - f3r; - *(p0r + pos) = f4r; - *(p0r + posi) = f4i; - *(p1r + pos) = f5r; - *(p1r + posi) = f5i; - *(p2r + pos) = f6r; - *(p2r + posi) = f6i; - *(p3r + pos) = f7r; - *(p3r + posi) = f7i; + *(p0r + pos) = f4r; + *(p0r + posi) = f4i; + *(p1r + pos) = f5r; + *(p1r + posi) = f5i; + *(p2r + pos) = f6r; + *(p2r + posi) = f6i; + *(p3r + pos) = f7r; + *(p3r + posi) = f7i; - p0r += pnext; - p1r += pnext; - p2r += pnext; - p3r += pnext; + p0r += pnext; + p1r += pnext; + p2r += pnext; + p3r += pnext; -} + } } -static inline void ibfR4(float *ioptr, long M, long NDiffU); -static inline void ibfR4(float *ioptr, long M, long NDiffU){ -/*** 1 radix 4 stage ***/ -unsigned long pos; -unsigned long posi; -unsigned long pinc; -unsigned long pnext; -unsigned long pnexti; -unsigned long NSameU; -unsigned long SameUCnt; +static inline void ibfR4(double *ioptr, long M, long NDiffU); +static inline void ibfR4(double *ioptr, long M, long NDiffU) +{ + /*** 1 radix 4 stage ***/ + unsigned long pos; + unsigned long posi; + unsigned long pinc; + unsigned long pnext; + unsigned long pnexti; + unsigned long NSameU; + unsigned long SameUCnt; -float *pstrt; -float *p0r, *p1r, *p2r, *p3r; + double *pstrt; + double *p0r, *p1r, *p2r, *p3r; -float w1r = 1.0/MYROOT2; /* cos(pi/4) */ -float f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; -float f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; -float t1r, t1i; -const float Two = 2.0; + double w1r = 1.0/MYROOT2; /* cos(pi/4) */ + double f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + double f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; + double t1r, t1i; + const double Two = 2.0; -pinc = NDiffU * 2; // 2 floats per complex -pnext = pinc * 4; -pnexti = pnext + 1; -pos = 2; -posi = pos+1; -NSameU = POW2(M) / 4 /NDiffU; // 4 pts per butterfly -pstrt = ioptr; -p0r = pstrt; -p1r = pstrt+pinc; -p2r = p1r+pinc; -p3r = p2r+pinc; + pinc = NDiffU * 2; // 2 doubles per complex + pnext = pinc * 4; + pnexti = pnext + 1; + pos = 2; + posi = pos+1; + NSameU = POW2(M) / 4 /NDiffU; // 4 pts per butterfly + pstrt = ioptr; + p0r = pstrt; + p1r = pstrt+pinc; + p2r = p1r+pinc; + p3r = p2r+pinc; - /* Butterflys */ - /* - f0 - - f0 - - f4 - f1 - 1 - f5 - - f5 - f2 - - f6 - 1 - f6 - f3 - 1 - f3 - -i - f7 - */ - /* Butterflys */ - /* - f0 - - f4 - - f4 - f1 - -i - t1 - - f5 - f2 - - f2 - w1 - f6 - f3 - -i - f7 - iw1- f7 - */ + /* Butterflys */ + /* + f0 - - f0 - - f4 + f1 - 1 - f5 - - f5 + f2 - - f6 - 1 - f6 + f3 - 1 - f3 - -i - f7 + */ + /* Butterflys */ + /* + f0 - - f4 - - f4 + f1 - -i - t1 - - f5 + f2 - - f2 - w1 - f6 + f3 - -i - f7 - iw1- f7 + */ -f0r = *p0r; -f1r = *p1r; -f2r = *p2r; -f3r = *p3r; -f0i = *(p0r + 1); -f1i = *(p1r + 1); -f2i = *(p2r + 1); -f3i = *(p3r + 1); + f0r = *p0r; + f1r = *p1r; + f2r = *p2r; + f3r = *p3r; + f0i = *(p0r + 1); + f1i = *(p1r + 1); + f2i = *(p2r + 1); + f3i = *(p3r + 1); -f5r = f0r - f1r; -f5i = f0i - f1i; -f0r = f0r + f1r; -f0i = f0i + f1i; + f5r = f0r - f1r; + f5i = f0i - f1i; + f0r = f0r + f1r; + f0i = f0i + f1i; -f6r = f2r + f3r; -f6i = f2i + f3i; -f3r = f2r - f3r; -f3i = f2i - f3i; + f6r = f2r + f3r; + f6i = f2i + f3i; + f3r = f2r - f3r; + f3i = f2i - f3i; -for (SameUCnt = NSameU-1; SameUCnt > 0 ; SameUCnt--){ + for (SameUCnt = NSameU-1; SameUCnt > 0 ; SameUCnt--) { - f7r = f5r + f3i; - f7i = f5i - f3r; - f5r = f5r - f3i; - f5i = f5i + f3r; + f7r = f5r + f3i; + f7i = f5i - f3r; + f5r = f5r - f3i; + f5i = f5i + f3r; - f4r = f0r + f6r; - f4i = f0i + f6i; - f6r = f0r - f6r; - f6i = f0i - f6i; + f4r = f0r + f6r; + f4i = f0i + f6i; + f6r = f0r - f6r; + f6i = f0i - f6i; - f2r = *(p2r + pos); - f2i = *(p2r + posi); - f1r = *(p1r + pos); - f1i = *(p1r + posi); - f3i = *(p3r + posi); - f0r = *(p0r + pos); - f3r = *(p3r + pos); - f0i = *(p0r + posi); + f2r = *(p2r + pos); + f2i = *(p2r + posi); + f1r = *(p1r + pos); + f1i = *(p1r + posi); + f3i = *(p3r + posi); + f0r = *(p0r + pos); + f3r = *(p3r + pos); + f0i = *(p0r + posi); - *p3r = f7r; - *p0r = f4r; - *(p3r + 1) = f7i; - *(p0r + 1) = f4i; - *p1r = f5r; - *p2r = f6r; - *(p1r + 1) = f5i; - *(p2r + 1) = f6i; + *p3r = f7r; + *p0r = f4r; + *(p3r + 1) = f7i; + *(p0r + 1) = f4i; + *p1r = f5r; + *p2r = f6r; + *(p1r + 1) = f5i; + *(p2r + 1) = f6i; - f7r = f2r + f3i; - f7i = f2i - f3r; - f2r = f2r - f3i; - f2i = f2i + f3r; + f7r = f2r + f3i; + f7i = f2i - f3r; + f2r = f2r - f3i; + f2i = f2i + f3r; - f4r = f0r - f1i; - f4i = f0i + f1r; - t1r = f0r + f1i; - t1i = f0i - f1r; + f4r = f0r - f1i; + f4i = f0i + f1r; + t1r = f0r + f1i; + t1i = f0i - f1r; - f5r = t1r - f7r * w1r - f7i * w1r; - f5i = t1i + f7r * w1r - f7i * w1r; - f7r = t1r * Two - f5r; - f7i = t1i * Two - f5i; + f5r = t1r - f7r * w1r - f7i * w1r; + f5i = t1i + f7r * w1r - f7i * w1r; + f7r = t1r * Two - f5r; + f7i = t1i * Two - f5i; - f6r = f4r - f2r * w1r + f2i * w1r; - f6i = f4i - f2r * w1r - f2i * w1r; - f4r = f4r * Two - f6r; - f4i = f4i * Two - f6i; + f6r = f4r - f2r * w1r + f2i * w1r; + f6i = f4i - f2r * w1r - f2i * w1r; + f4r = f4r * Two - f6r; + f4i = f4i * Two - f6i; - f3r = *(p3r + pnext); - f0r = *(p0r + pnext); - f3i = *(p3r + pnexti); - f0i = *(p0r + pnexti); - f2r = *(p2r + pnext); - f2i = *(p2r + pnexti); - f1r = *(p1r + pnext); - f1i = *(p1r + pnexti); + f3r = *(p3r + pnext); + f0r = *(p0r + pnext); + f3i = *(p3r + pnexti); + f0i = *(p0r + pnexti); + f2r = *(p2r + pnext); + f2i = *(p2r + pnexti); + f1r = *(p1r + pnext); + f1i = *(p1r + pnexti); - *(p2r + pos) = f6r; - *(p1r + pos) = f5r; - *(p2r + posi) = f6i; - *(p1r + posi) = f5i; - *(p3r + pos) = f7r; - *(p0r + pos) = f4r; - *(p3r + posi) = f7i; - *(p0r + posi) = f4i; + *(p2r + pos) = f6r; + *(p1r + pos) = f5r; + *(p2r + posi) = f6i; + *(p1r + posi) = f5i; + *(p3r + pos) = f7r; + *(p0r + pos) = f4r; + *(p3r + posi) = f7i; + *(p0r + posi) = f4i; - f6r = f2r + f3r; - f6i = f2i + f3i; - f3r = f2r - f3r; - f3i = f2i - f3i; + f6r = f2r + f3r; + f6i = f2i + f3i; + f3r = f2r - f3r; + f3i = f2i - f3i; - f5r = f0r - f1r; - f5i = f0i - f1i; - f0r = f0r + f1r; - f0i = f0i + f1i; + f5r = f0r - f1r; + f5i = f0i - f1i; + f0r = f0r + f1r; + f0i = f0i + f1i; - p3r += pnext; - p0r += pnext; - p1r += pnext; - p2r += pnext; + p3r += pnext; + p0r += pnext; + p1r += pnext; + p2r += pnext; -} -f7r = f5r + f3i; -f7i = f5i - f3r; -f5r = f5r - f3i; -f5i = f5i + f3r; + } + f7r = f5r + f3i; + f7i = f5i - f3r; + f5r = f5r - f3i; + f5i = f5i + f3r; -f4r = f0r + f6r; -f4i = f0i + f6i; -f6r = f0r - f6r; -f6i = f0i - f6i; + f4r = f0r + f6r; + f4i = f0i + f6i; + f6r = f0r - f6r; + f6i = f0i - f6i; -f2r = *(p2r + pos); -f2i = *(p2r + posi); -f1r = *(p1r + pos); -f1i = *(p1r + posi); -f3i = *(p3r + posi); -f0r = *(p0r + pos); -f3r = *(p3r + pos); -f0i = *(p0r + posi); + f2r = *(p2r + pos); + f2i = *(p2r + posi); + f1r = *(p1r + pos); + f1i = *(p1r + posi); + f3i = *(p3r + posi); + f0r = *(p0r + pos); + f3r = *(p3r + pos); + f0i = *(p0r + posi); -*p3r = f7r; -*p0r = f4r; -*(p3r + 1) = f7i; -*(p0r + 1) = f4i; -*p1r = f5r; -*p2r = f6r; -*(p1r + 1) = f5i; -*(p2r + 1) = f6i; + *p3r = f7r; + *p0r = f4r; + *(p3r + 1) = f7i; + *(p0r + 1) = f4i; + *p1r = f5r; + *p2r = f6r; + *(p1r + 1) = f5i; + *(p2r + 1) = f6i; -f7r = f2r + f3i; -f7i = f2i - f3r; -f2r = f2r - f3i; -f2i = f2i + f3r; + f7r = f2r + f3i; + f7i = f2i - f3r; + f2r = f2r - f3i; + f2i = f2i + f3r; -f4r = f0r - f1i; -f4i = f0i + f1r; -t1r = f0r + f1i; -t1i = f0i - f1r; + f4r = f0r - f1i; + f4i = f0i + f1r; + t1r = f0r + f1i; + t1i = f0i - f1r; -f5r = t1r - f7r * w1r - f7i * w1r; -f5i = t1i + f7r * w1r - f7i * w1r; -f7r = t1r * Two - f5r; -f7i = t1i * Two - f5i; + f5r = t1r - f7r * w1r - f7i * w1r; + f5i = t1i + f7r * w1r - f7i * w1r; + f7r = t1r * Two - f5r; + f7i = t1i * Two - f5i; -f6r = f4r - f2r * w1r + f2i * w1r; -f6i = f4i - f2r * w1r - f2i * w1r; -f4r = f4r * Two - f6r; -f4i = f4i * Two - f6i; + f6r = f4r - f2r * w1r + f2i * w1r; + f6i = f4i - f2r * w1r - f2i * w1r; + f4r = f4r * Two - f6r; + f4i = f4i * Two - f6i; -*(p2r + pos) = f6r; -*(p1r + pos) = f5r; -*(p2r + posi) = f6i; -*(p1r + posi) = f5i; -*(p3r + pos) = f7r; -*(p0r + pos) = f4r; -*(p3r + posi) = f7i; -*(p0r + posi) = f4i; + *(p2r + pos) = f6r; + *(p1r + pos) = f5r; + *(p2r + posi) = f6i; + *(p1r + posi) = f5i; + *(p3r + pos) = f7r; + *(p0r + pos) = f4r; + *(p3r + posi) = f7i; + *(p0r + posi) = f4i; } -static inline void ibfstages(float *ioptr, long M, float *Utbl, long Ustride, long NDiffU, long StageCnt); -static inline void ibfstages(float *ioptr, long M, float *Utbl, long Ustride, long NDiffU, long StageCnt){ -/*** RADIX 8 Stages ***/ -unsigned long pos; -unsigned long posi; -unsigned long pinc; -unsigned long pnext; -unsigned long NSameU; -unsigned long Uinc; -unsigned long Uinc2; -unsigned long Uinc4; -unsigned long DiffUCnt; -unsigned long SameUCnt; -unsigned long U2toU3; +static inline void ibfstages(double *ioptr, long M, double *Utbl, long Ustride, long NDiffU, long StageCnt); +static inline void ibfstages(double *ioptr, long M, double *Utbl, long Ustride, long NDiffU, long StageCnt) +{ + /*** RADIX 8 Stages ***/ + unsigned long pos; + unsigned long posi; + unsigned long pinc; + unsigned long pnext; + unsigned long NSameU; + unsigned long Uinc; + unsigned long Uinc2; + unsigned long Uinc4; + unsigned long DiffUCnt; + unsigned long SameUCnt; + unsigned long U2toU3; -float *pstrt; -float *p0r, *p1r, *p2r, *p3r; -float *u0r, *u0i, *u1r, *u1i, *u2r, *u2i; + double *pstrt; + double *p0r, *p1r, *p2r, *p3r; + double *u0r, *u0i, *u1r, *u1i, *u2r, *u2i; -float w0r, w0i, w1r, w1i, w2r, w2i, w3r, w3i; -float f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; -float f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; -float t0r, t0i, t1r, t1i; -const float Two = 2.0; + double w0r, w0i, w1r, w1i, w2r, w2i, w3r, w3i; + double f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + double f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; + double t0r, t0i, t1r, t1i; + const double Two = 2.0; -pinc = NDiffU * 2; // 2 floats per complex -pnext = pinc * 8; -pos = pinc * 4; -posi = pos + 1; -NSameU = POW2(M) / 8 /NDiffU; // 8 pts per butterfly -Uinc = NSameU * Ustride; -Uinc2 = Uinc * 2; -Uinc4 = Uinc * 4; -U2toU3 = (POW2(M) / 8)*Ustride; -for (; StageCnt > 0 ; StageCnt--){ + pinc = NDiffU * 2; // 2 doubles per complex + pnext = pinc * 8; + pos = pinc * 4; + posi = pos + 1; + NSameU = POW2(M) / 8 /NDiffU; // 8 pts per butterfly + Uinc = NSameU * Ustride; + Uinc2 = Uinc * 2; + Uinc4 = Uinc * 4; + U2toU3 = (POW2(M) / 8)*Ustride; + for (; StageCnt > 0 ; StageCnt--) { - u0r = &Utbl[0]; - u0i = &Utbl[POW2(M-2)*Ustride]; - u1r = u0r; - u1i = u0i; - u2r = u0r; - u2i = u0i; + u0r = &Utbl[0]; + u0i = &Utbl[POW2(M-2)*Ustride]; + u1r = u0r; + u1i = u0i; + u2r = u0r; + u2i = u0i; - w0r = *u0r; - w0i = *u0i; - w1r = *u1r; - w1i = *u1i; - w2r = *u2r; - w2i = *u2i; - w3r = *(u2r+U2toU3); - w3i = *(u2i-U2toU3); + w0r = *u0r; + w0i = *u0i; + w1r = *u1r; + w1i = *u1i; + w2r = *u2r; + w2i = *u2i; + w3r = *(u2r+U2toU3); + w3i = *(u2i-U2toU3); - pstrt = ioptr; + pstrt = ioptr; - p0r = pstrt; - p1r = pstrt+pinc; - p2r = p1r+pinc; - p3r = p2r+pinc; + p0r = pstrt; + p1r = pstrt+pinc; + p2r = p1r+pinc; + p3r = p2r+pinc; - /* Butterflys */ - /* - f0 - - t0 - - f0 - - f0 - f1 - w0- f1 - - f1 - - f1 - f2 - - f2 - w1- f2 - - f4 - f3 - w0- t1 - iw1- f3 - - f5 + /* Butterflys */ + /* + f0 - - t0 - - f0 - - f0 + f1 - w0- f1 - - f1 - - f1 + f2 - - f2 - w1- f2 - - f4 + f3 - w0- t1 - iw1- f3 - - f5 - f4 - - t0 - - f4 - w2- t0 - f5 - w0- f5 - - f5 - w3- t1 - f6 - - f6 - w1- f6 - iw2- f6 - f7 - w0- t1 - iw1- f7 - iw3- f7 - */ + f4 - - t0 - - f4 - w2- t0 + f5 - w0- f5 - - f5 - w3- t1 + f6 - - f6 - w1- f6 - iw2- f6 + f7 - w0- t1 - iw1- f7 - iw3- f7 + */ - for (DiffUCnt = NDiffU; DiffUCnt > 0 ; DiffUCnt--){ - f0r = *p0r; - f0i = *(p0r + 1); - f1r = *p1r; - f1i = *(p1r + 1); - for (SameUCnt = NSameU-1; SameUCnt > 0 ; SameUCnt--){ - f2r = *p2r; - f2i = *(p2r + 1); - f3r = *p3r; - f3i = *(p3r + 1); + for (DiffUCnt = NDiffU; DiffUCnt > 0 ; DiffUCnt--) { + f0r = *p0r; + f0i = *(p0r + 1); + f1r = *p1r; + f1i = *(p1r + 1); + for (SameUCnt = NSameU-1; SameUCnt > 0 ; SameUCnt--) { + f2r = *p2r; + f2i = *(p2r + 1); + f3r = *p3r; + f3i = *(p3r + 1); - t0r = f0r + f1r * w0r - f1i * w0i; - t0i = f0i + f1r * w0i + f1i * w0r; - f1r = f0r * Two - t0r; - f1i = f0i * Two - t0i; + t0r = f0r + f1r * w0r - f1i * w0i; + t0i = f0i + f1r * w0i + f1i * w0r; + f1r = f0r * Two - t0r; + f1i = f0i * Two - t0i; - f4r = *(p0r + pos); - f4i = *(p0r + posi); - f5r = *(p1r + pos); - f5i = *(p1r + posi); + f4r = *(p0r + pos); + f4i = *(p0r + posi); + f5r = *(p1r + pos); + f5i = *(p1r + posi); - f6r = *(p2r + pos); - f6i = *(p2r + posi); - f7r = *(p3r + pos); - f7i = *(p3r + posi); + f6r = *(p2r + pos); + f6i = *(p2r + posi); + f7r = *(p3r + pos); + f7i = *(p3r + posi); - t1r = f2r - f3r * w0r + f3i * w0i; - t1i = f2i - f3r * w0i - f3i * w0r; - f2r = f2r * Two - t1r; - f2i = f2i * Two - t1i; + t1r = f2r - f3r * w0r + f3i * w0i; + t1i = f2i - f3r * w0i - f3i * w0r; + f2r = f2r * Two - t1r; + f2i = f2i * Two - t1i; - f0r = t0r + f2r * w1r - f2i * w1i; - f0i = t0i + f2r * w1i + f2i * w1r; - f2r = t0r * Two - f0r; - f2i = t0i * Two - f0i; + f0r = t0r + f2r * w1r - f2i * w1i; + f0i = t0i + f2r * w1i + f2i * w1r; + f2r = t0r * Two - f0r; + f2i = t0i * Two - f0i; - f3r = f1r + t1r * w1i + t1i * w1r; - f3i = f1i - t1r * w1r + t1i * w1i; - f1r = f1r * Two - f3r; - f1i = f1i * Two - f3i; + f3r = f1r + t1r * w1i + t1i * w1r; + f3i = f1i - t1r * w1r + t1i * w1i; + f1r = f1r * Two - f3r; + f1i = f1i * Two - f3i; - t0r = f4r + f5r * w0r - f5i * w0i; - t0i = f4i + f5r * w0i + f5i * w0r; - f5r = f4r * Two - t0r; - f5i = f4i * Two - t0i; + t0r = f4r + f5r * w0r - f5i * w0i; + t0i = f4i + f5r * w0i + f5i * w0r; + f5r = f4r * Two - t0r; + f5i = f4i * Two - t0i; - t1r = f6r - f7r * w0r + f7i * w0i; - t1i = f6i - f7r * w0i - f7i * w0r; - f6r = f6r * Two - t1r; - f6i = f6i * Two - t1i; + t1r = f6r - f7r * w0r + f7i * w0i; + t1i = f6i - f7r * w0i - f7i * w0r; + f6r = f6r * Two - t1r; + f6i = f6i * Two - t1i; - f4r = t0r + f6r * w1r - f6i * w1i; - f4i = t0i + f6r * w1i + f6i * w1r; - f6r = t0r * Two - f4r; - f6i = t0i * Two - f4i; + f4r = t0r + f6r * w1r - f6i * w1i; + f4i = t0i + f6r * w1i + f6i * w1r; + f6r = t0r * Two - f4r; + f6i = t0i * Two - f4i; - f7r = f5r + t1r * w1i + t1i * w1r; - f7i = f5i - t1r * w1r + t1i * w1i; - f5r = f5r * Two - f7r; - f5i = f5i * Two - f7i; + f7r = f5r + t1r * w1i + t1i * w1r; + f7i = f5i - t1r * w1r + t1i * w1i; + f5r = f5r * Two - f7r; + f5i = f5i * Two - f7i; - t0r = f0r - f4r * w2r + f4i * w2i; - t0i = f0i - f4r * w2i - f4i * w2r; - f0r = f0r * Two - t0r; - f0i = f0i * Two - t0i; + t0r = f0r - f4r * w2r + f4i * w2i; + t0i = f0i - f4r * w2i - f4i * w2r; + f0r = f0r * Two - t0r; + f0i = f0i * Two - t0i; - t1r = f1r - f5r * w3r + f5i * w3i; - t1i = f1i - f5r * w3i - f5i * w3r; - f1r = f1r * Two - t1r; - f1i = f1i * Two - t1i; + t1r = f1r - f5r * w3r + f5i * w3i; + t1i = f1i - f5r * w3i - f5i * w3r; + f1r = f1r * Two - t1r; + f1i = f1i * Two - t1i; - *(p0r + pos) = t0r; - *(p0r + posi) = t0i; - *p0r = f0r; - *(p0r + 1) = f0i; + *(p0r + pos) = t0r; + *(p0r + posi) = t0i; + *p0r = f0r; + *(p0r + 1) = f0i; - p0r += pnext; - f0r = *p0r; - f0i = *(p0r + 1); + p0r += pnext; + f0r = *p0r; + f0i = *(p0r + 1); - *(p1r + pos) = t1r; - *(p1r + posi) = t1i; - *p1r = f1r; - *(p1r + 1) = f1i; + *(p1r + pos) = t1r; + *(p1r + posi) = t1i; + *p1r = f1r; + *(p1r + 1) = f1i; - p1r += pnext; + p1r += pnext; - f1r = *p1r; - f1i = *(p1r + 1); + f1r = *p1r; + f1i = *(p1r + 1); - f4r = f2r - f6r * w2i - f6i * w2r; - f4i = f2i + f6r * w2r - f6i * w2i; - f6r = f2r * Two - f4r; - f6i = f2i * Two - f4i; + f4r = f2r - f6r * w2i - f6i * w2r; + f4i = f2i + f6r * w2r - f6i * w2i; + f6r = f2r * Two - f4r; + f6i = f2i * Two - f4i; - f5r = f3r - f7r * w3i - f7i * w3r; - f5i = f3i + f7r * w3r - f7i * w3i; - f7r = f3r * Two - f5r; - f7i = f3i * Two - f5i; + f5r = f3r - f7r * w3i - f7i * w3r; + f5i = f3i + f7r * w3r - f7i * w3i; + f7r = f3r * Two - f5r; + f7i = f3i * Two - f5i; - *p2r = f4r; - *(p2r + 1) = f4i; - *(p2r + pos) = f6r; - *(p2r + posi) = f6i; + *p2r = f4r; + *(p2r + 1) = f4i; + *(p2r + pos) = f6r; + *(p2r + posi) = f6i; - p2r += pnext; + p2r += pnext; - *p3r = f5r; - *(p3r + 1) = f5i; - *(p3r + pos) = f7r; - *(p3r + posi) = f7i; + *p3r = f5r; + *(p3r + 1) = f5i; + *(p3r + pos) = f7r; + *(p3r + posi) = f7i; - p3r += pnext; + p3r += pnext; - } - - f2r = *p2r; - f2i = *(p2r + 1); - f3r = *p3r; - f3i = *(p3r + 1); + } - t0r = f0r + f1r * w0r - f1i * w0i; - t0i = f0i + f1r * w0i + f1i * w0r; - f1r = f0r * Two - t0r; - f1i = f0i * Two - t0i; + f2r = *p2r; + f2i = *(p2r + 1); + f3r = *p3r; + f3i = *(p3r + 1); - f4r = *(p0r + pos); - f4i = *(p0r + posi); - f5r = *(p1r + pos); - f5i = *(p1r + posi); + t0r = f0r + f1r * w0r - f1i * w0i; + t0i = f0i + f1r * w0i + f1i * w0r; + f1r = f0r * Two - t0r; + f1i = f0i * Two - t0i; - f6r = *(p2r + pos); - f6i = *(p2r + posi); - f7r = *(p3r + pos); - f7i = *(p3r + posi); + f4r = *(p0r + pos); + f4i = *(p0r + posi); + f5r = *(p1r + pos); + f5i = *(p1r + posi); - t1r = f2r - f3r * w0r + f3i * w0i; - t1i = f2i - f3r * w0i - f3i * w0r; - f2r = f2r * Two - t1r; - f2i = f2i * Two - t1i; + f6r = *(p2r + pos); + f6i = *(p2r + posi); + f7r = *(p3r + pos); + f7i = *(p3r + posi); - f0r = t0r + f2r * w1r - f2i * w1i; - f0i = t0i + f2r * w1i + f2i * w1r; - f2r = t0r * Two - f0r; - f2i = t0i * Two - f0i; + t1r = f2r - f3r * w0r + f3i * w0i; + t1i = f2i - f3r * w0i - f3i * w0r; + f2r = f2r * Two - t1r; + f2i = f2i * Two - t1i; - f3r = f1r + t1r * w1i + t1i * w1r; - f3i = f1i - t1r * w1r + t1i * w1i; - f1r = f1r * Two - f3r; - f1i = f1i * Two - f3i; + f0r = t0r + f2r * w1r - f2i * w1i; + f0i = t0i + f2r * w1i + f2i * w1r; + f2r = t0r * Two - f0r; + f2i = t0i * Two - f0i; - if (DiffUCnt == NDiffU/2) - Uinc4 = -Uinc4; + f3r = f1r + t1r * w1i + t1i * w1r; + f3i = f1i - t1r * w1r + t1i * w1i; + f1r = f1r * Two - f3r; + f1i = f1i * Two - f3i; - u0r += Uinc4; - u0i -= Uinc4; - u1r += Uinc2; - u1i -= Uinc2; - u2r += Uinc; - u2i -= Uinc; + if (DiffUCnt == NDiffU/2) + Uinc4 = -Uinc4; - pstrt += 2; + u0r += Uinc4; + u0i -= Uinc4; + u1r += Uinc2; + u1i -= Uinc2; + u2r += Uinc; + u2i -= Uinc; - t0r = f4r + f5r * w0r - f5i * w0i; - t0i = f4i + f5r * w0i + f5i * w0r; - f5r = f4r * Two - t0r; - f5i = f4i * Two - t0i; + pstrt += 2; - t1r = f6r - f7r * w0r + f7i * w0i; - t1i = f6i - f7r * w0i - f7i * w0r; - f6r = f6r * Two - t1r; - f6i = f6i * Two - t1i; + t0r = f4r + f5r * w0r - f5i * w0i; + t0i = f4i + f5r * w0i + f5i * w0r; + f5r = f4r * Two - t0r; + f5i = f4i * Two - t0i; - f4r = t0r + f6r * w1r - f6i * w1i; - f4i = t0i + f6r * w1i + f6i * w1r; - f6r = t0r * Two - f4r; - f6i = t0i * Two - f4i; + t1r = f6r - f7r * w0r + f7i * w0i; + t1i = f6i - f7r * w0i - f7i * w0r; + f6r = f6r * Two - t1r; + f6i = f6i * Two - t1i; - f7r = f5r + t1r * w1i + t1i * w1r; - f7i = f5i - t1r * w1r + t1i * w1i; - f5r = f5r * Two - f7r; - f5i = f5i * Two - f7i; + f4r = t0r + f6r * w1r - f6i * w1i; + f4i = t0i + f6r * w1i + f6i * w1r; + f6r = t0r * Two - f4r; + f6i = t0i * Two - f4i; - w0r = *u0r; - w0i = *u0i; - w1r = *u1r; - w1i = *u1i; + f7r = f5r + t1r * w1i + t1i * w1r; + f7i = f5i - t1r * w1r + t1i * w1i; + f5r = f5r * Two - f7r; + f5i = f5i * Two - f7i; - if (DiffUCnt <= NDiffU/2) - w0r = -w0r; + w0r = *u0r; + w0i = *u0i; + w1r = *u1r; + w1i = *u1i; - t0r = f0r - f4r * w2r + f4i * w2i; - t0i = f0i - f4r * w2i - f4i * w2r; - f0r = f0r * Two - t0r; - f0i = f0i * Two - t0i; + if (DiffUCnt <= NDiffU/2) + w0r = -w0r; - f4r = f2r - f6r * w2i - f6i * w2r; - f4i = f2i + f6r * w2r - f6i * w2i; - f6r = f2r * Two - f4r; - f6i = f2i * Two - f4i; + t0r = f0r - f4r * w2r + f4i * w2i; + t0i = f0i - f4r * w2i - f4i * w2r; + f0r = f0r * Two - t0r; + f0i = f0i * Two - t0i; - *(p0r + pos) = t0r; - *p2r = f4r; - *(p0r + posi) = t0i; - *(p2r + 1) = f4i; - w2r = *u2r; - w2i = *u2i; - *p0r = f0r; - *(p2r + pos) = f6r; - *(p0r + 1) = f0i; - *(p2r + posi) = f6i; + f4r = f2r - f6r * w2i - f6i * w2r; + f4i = f2i + f6r * w2r - f6i * w2i; + f6r = f2r * Two - f4r; + f6i = f2i * Two - f4i; - p0r = pstrt; - p2r = pstrt + pinc + pinc; + *(p0r + pos) = t0r; + *p2r = f4r; + *(p0r + posi) = t0i; + *(p2r + 1) = f4i; + w2r = *u2r; + w2i = *u2i; + *p0r = f0r; + *(p2r + pos) = f6r; + *(p0r + 1) = f0i; + *(p2r + posi) = f6i; - t1r = f1r - f5r * w3r + f5i * w3i; - t1i = f1i - f5r * w3i - f5i * w3r; - f1r = f1r * Two - t1r; - f1i = f1i * Two - t1i; + p0r = pstrt; + p2r = pstrt + pinc + pinc; - f5r = f3r - f7r * w3i - f7i * w3r; - f5i = f3i + f7r * w3r - f7i * w3i; - f7r = f3r * Two - f5r; - f7i = f3i * Two - f5i; + t1r = f1r - f5r * w3r + f5i * w3i; + t1i = f1i - f5r * w3i - f5i * w3r; + f1r = f1r * Two - t1r; + f1i = f1i * Two - t1i; - *(p1r + pos) = t1r; - *p3r = f5r; - *(p1r + posi) = t1i; - *(p3r + 1) = f5i; - w3r = *(u2r+U2toU3); - w3i = *(u2i-U2toU3); - *p1r = f1r; - *(p3r + pos) = f7r; - *(p1r + 1) = f1i; - *(p3r + posi) = f7i; + f5r = f3r - f7r * w3i - f7i * w3r; + f5i = f3i + f7r * w3r - f7i * w3i; + f7r = f3r * Two - f5r; + f7i = f3i * Two - f5i; - p1r = pstrt + pinc; - p3r = p2r + pinc; - } - NSameU /= 8; - Uinc /= 8; - Uinc2 /= 8; - Uinc4 = Uinc * 4; - NDiffU *= 8; - pinc *= 8; - pnext *= 8; - pos *= 8; - posi = pos + 1; -} + *(p1r + pos) = t1r; + *p3r = f5r; + *(p1r + posi) = t1i; + *(p3r + 1) = f5i; + w3r = *(u2r+U2toU3); + w3i = *(u2i-U2toU3); + *p1r = f1r; + *(p3r + pos) = f7r; + *(p1r + 1) = f1i; + *(p3r + posi) = f7i; + + p1r = pstrt + pinc; + p3r = p2r + pinc; + } + NSameU /= 8; + Uinc /= 8; + Uinc2 /= 8; + Uinc4 = Uinc * 4; + NDiffU *= 8; + pinc *= 8; + pnext *= 8; + pos *= 8; + posi = pos + 1; + } } -void ifftrecurs(float *ioptr, long M, float *Utbl, long Ustride, long NDiffU, long StageCnt); -void ifftrecurs(float *ioptr, long M, float *Utbl, long Ustride, long NDiffU, long StageCnt){ -/* recursive bfstages calls to maximize on chip cache efficiency */ -long i1; -if (M <= MCACHE) - ibfstages(ioptr, M, Utbl, Ustride, NDiffU, StageCnt); /* RADIX 8 Stages */ -else{ - for (i1=0; i1<8; i1++){ - ifftrecurs(&ioptr[i1*POW2(M-3)*2], M-3, Utbl, 8*Ustride, NDiffU, StageCnt-1); /* RADIX 8 Stages */ - } - ibfstages(ioptr, M, Utbl, Ustride, POW2(M-3), 1); /* RADIX 8 Stage */ -} +void ifftrecurs(double *ioptr, long M, double *Utbl, long Ustride, long NDiffU, long StageCnt); +void ifftrecurs(double *ioptr, long M, double *Utbl, long Ustride, long NDiffU, long StageCnt) +{ + /* recursive bfstages calls to maximize on chip cache efficiency */ + long i1; + if (M <= MCACHE) + ibfstages(ioptr, M, Utbl, Ustride, NDiffU, StageCnt); /* RADIX 8 Stages */ + else { + for (i1=0; i1<8; i1++) { + ifftrecurs(&ioptr[i1*POW2(M-3)*2], M-3, Utbl, 8*Ustride, NDiffU, StageCnt-1); /* RADIX 8 Stages */ + } + ibfstages(ioptr, M, Utbl, Ustride, POW2(M-3), 1); /* RADIX 8 Stage */ + } } -void iffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow){ -/* Compute in-place inverse complex fft on the rows of the input array */ -/* INPUTS */ -/* *ioptr = input data array */ -/* M = log2 of fft size */ -/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */ -/* *Utbl = cosine table */ -/* *BRLow = bit reversed counter table */ -/* OUTPUTS */ -/* *ioptr = output data array */ +void iffts1(double *ioptr, long M, long Rows, double *Utbl, short *BRLow) +{ + /* Compute in-place inverse complex fft on the rows of the input array */ + /* INPUTS */ + /* *ioptr = input data array */ + /* M = log2 of fft size */ + /* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */ + /* *Utbl = cosine table */ + /* *BRLow = bit reversed counter table */ + /* OUTPUTS */ + /* *ioptr = output data array */ -long StageCnt; -long NDiffU; -const float scale = 1.0/POW2(M); + long StageCnt; + long NDiffU; + const double scale = 1.0/POW2(M); -switch (M){ -case 0: - break; -case 1: - for (;Rows>0;Rows--){ - ifft2pt(ioptr, scale); /* a 2 pt fft */ - ioptr += 2*POW2(M); - } - break; -case 2: - for (;Rows>0;Rows--){ - ifft4pt(ioptr, scale); /* a 4 pt fft */ - ioptr += 2*POW2(M); - } - break; -case 3: - for (;Rows>0;Rows--){ - ifft8pt(ioptr, scale); /* an 8 pt fft */ - ioptr += 2*POW2(M); - } - break; -default: - for (;Rows>0;Rows--){ + switch (M) { + case 0: + break; + case 1: + for (; Rows>0; Rows--) { + ifft2pt(ioptr, scale); /* a 2 pt fft */ + ioptr += 2*POW2(M); + } + break; + case 2: + for (; Rows>0; Rows--) { + ifft4pt(ioptr, scale); /* a 4 pt fft */ + ioptr += 2*POW2(M); + } + break; + case 3: + for (; Rows>0; Rows--) { + ifft8pt(ioptr, scale); /* an 8 pt fft */ + ioptr += 2*POW2(M); + } + break; + default: + for (; Rows>0; Rows--) { - scbitrevR2(ioptr, M, BRLow, scale); /* bit reverse and first radix 2 stage */ - - StageCnt = (M-1) / 3; // number of radix 8 stages - NDiffU = 2; // one radix 2 stage already complete + scbitrevR2(ioptr, M, BRLow, scale); /* bit reverse and first radix 2 stage */ - if ( (M-1-(StageCnt * 3)) == 1 ){ - ibfR2(ioptr, M, NDiffU); /* 1 radix 2 stage */ - NDiffU *= 2; - } + StageCnt = (M-1) / 3; // number of radix 8 stages + NDiffU = 2; // one radix 2 stage already complete - if ( (M-1-(StageCnt * 3)) == 2 ){ - ibfR4(ioptr, M, NDiffU); /* 1 radix 4 stage */ - NDiffU *= 4; - } + if ( (M-1-(StageCnt * 3)) == 1 ) { + ibfR2(ioptr, M, NDiffU); /* 1 radix 2 stage */ + NDiffU *= 2; + } - if (M <= MCACHE) - ibfstages(ioptr, M, Utbl, 1, NDiffU, StageCnt); /* RADIX 8 Stages */ + if ( (M-1-(StageCnt * 3)) == 2 ) { + ibfR4(ioptr, M, NDiffU); /* 1 radix 4 stage */ + NDiffU *= 4; + } - else{ - ifftrecurs(ioptr, M, Utbl, 1, NDiffU, StageCnt); /* RADIX 8 Stages */ - } + if (M <= MCACHE) + ibfstages(ioptr, M, Utbl, 1, NDiffU, StageCnt); /* RADIX 8 Stages */ - ioptr += 2*POW2(M); - } -} + else { + ifftrecurs(ioptr, M, Utbl, 1, NDiffU, StageCnt); /* RADIX 8 Stages */ + } + + ioptr += 2*POW2(M); + } + } } /************************************************ parts of rffts1 *************************************************/ -static inline void rfft1pt(float *ioptr); -static inline void rfft1pt(float *ioptr){ -/*** RADIX 2 rfft ***/ -float f0r, f0i; -float t0r, t0i; +static inline void rfft1pt(double *ioptr); +static inline void rfft1pt(double *ioptr) +{ + /*** RADIX 2 rfft ***/ + double f0r, f0i; + double t0r, t0i; - /* bit reversed load */ -f0r = ioptr[0]; -f0i = ioptr[1]; + /* bit reversed load */ + f0r = ioptr[0]; + f0i = ioptr[1]; - /* finish rfft */ -t0r = f0r + f0i; -t0i = f0r - f0i; + /* finish rfft */ + t0r = f0r + f0i; + t0i = f0r - f0i; - /* store result */ -ioptr[0] = t0r; -ioptr[1] = t0i; + /* store result */ + ioptr[0] = t0r; + ioptr[1] = t0i; } -static inline void rfft2pt(float *ioptr); -static inline void rfft2pt(float *ioptr){ -/*** RADIX 4 rfft ***/ -float f0r, f0i, f1r, f1i; -float t0r, t0i; +static inline void rfft2pt(double *ioptr); +static inline void rfft2pt(double *ioptr) +{ + /*** RADIX 4 rfft ***/ + double f0r, f0i, f1r, f1i; + double t0r, t0i; - /* bit reversed load */ -f0r = ioptr[0]; -f0i = ioptr[1]; -f1r = ioptr[2]; -f1i = ioptr[3]; + /* bit reversed load */ + f0r = ioptr[0]; + f0i = ioptr[1]; + f1r = ioptr[2]; + f1i = ioptr[3]; - /* Butterflys */ - /* - f0 - - t0 - f1 - 1 - f1 - */ + /* Butterflys */ + /* + f0 - - t0 + f1 - 1 - f1 + */ -t0r = f0r + f1r; -t0i = f0i + f1i; -f1r = f0r - f1r; -f1i = f1i - f0i; - /* finish rfft */ -f0r = t0r + t0i; -f0i = t0r - t0i; + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f1i - f0i; + /* finish rfft */ + f0r = t0r + t0i; + f0i = t0r - t0i; - /* store result */ -ioptr[0] = f0r; -ioptr[1] = f0i; -ioptr[2] = f1r; -ioptr[3] = f1i; + /* store result */ + ioptr[0] = f0r; + ioptr[1] = f0i; + ioptr[2] = f1r; + ioptr[3] = f1i; } -static inline void rfft4pt(float *ioptr); -static inline void rfft4pt(float *ioptr){ -/*** RADIX 8 rfft ***/ -float f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; -float t0r, t0i, t1r, t1i; -float w0r = 1.0/MYROOT2; /* cos(pi/4) */ -const float Two = 2.0; -const float scale = 0.5; +static inline void rfft4pt(double *ioptr); +static inline void rfft4pt(double *ioptr) +{ + /*** RADIX 8 rfft ***/ + double f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + double t0r, t0i, t1r, t1i; + double w0r = 1.0/MYROOT2; /* cos(pi/4) */ + const double Two = 2.0; + const double scale = 0.5; - /* bit reversed load */ -f0r = ioptr[0]; -f0i = ioptr[1]; -f1r = ioptr[4]; -f1i = ioptr[5]; -f2r = ioptr[2]; -f2i = ioptr[3]; -f3r = ioptr[6]; -f3i = ioptr[7]; + /* bit reversed load */ + f0r = ioptr[0]; + f0i = ioptr[1]; + f1r = ioptr[4]; + f1i = ioptr[5]; + f2r = ioptr[2]; + f2i = ioptr[3]; + f3r = ioptr[6]; + f3i = ioptr[7]; - /* Butterflys */ - /* - f0 - - t0 - - f0 - f1 - 1 - f1 - - f1 - f2 - - f2 - 1 - f2 - f3 - 1 - t1 - -i - f3 - */ + /* Butterflys */ + /* + f0 - - t0 - - f0 + f1 - 1 - f1 - - f1 + f2 - - f2 - 1 - f2 + f3 - 1 - t1 - -i - f3 + */ -t0r = f0r + f1r; -t0i = f0i + f1i; -f1r = f0r - f1r; -f1i = f0i - f1i; + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; -t1r = f2r - f3r; -t1i = f2i - f3i; -f2r = f2r + f3r; -f2i = f2i + f3i; + t1r = f2r - f3r; + t1i = f2i - f3i; + f2r = f2r + f3r; + f2i = f2i + f3i; -f0r = t0r + f2r; -f0i = t0i + f2i; -f2r = t0r - f2r; -f2i = f2i - t0i; // neg for rfft + f0r = t0r + f2r; + f0i = t0i + f2i; + f2r = t0r - f2r; + f2i = f2i - t0i; // neg for rfft -f3r = f1r - t1i; -f3i = f1i + t1r; -f1r = f1r + t1i; -f1i = f1i - t1r; + f3r = f1r - t1i; + f3i = f1i + t1r; + f1r = f1r + t1i; + f1i = f1i - t1r; - /* finish rfft */ -t0r = f0r + f0i; /* compute Re(x[0]) */ -t0i = f0r - f0i; /* compute Re(x[N/2]) */ + /* finish rfft */ + t0r = f0r + f0i; /* compute Re(x[0]) */ + t0i = f0r - f0i; /* compute Re(x[N/2]) */ -t1r = f1r + f3r; -t1i = f1i - f3i; -f0r = f1i + f3i; -f0i = f3r - f1r; + t1r = f1r + f3r; + t1i = f1i - f3i; + f0r = f1i + f3i; + f0i = f3r - f1r; -f1r = t1r + w0r * f0r + w0r * f0i; -f1i = t1i - w0r * f0r + w0r * f0i; -f3r = Two * t1r - f1r; -f3i = f1i - Two * t1i; + f1r = t1r + w0r * f0r + w0r * f0i; + f1i = t1i - w0r * f0r + w0r * f0i; + f3r = Two * t1r - f1r; + f3i = f1i - Two * t1i; - /* store result */ -ioptr[4] = f2r; -ioptr[5] = f2i; -ioptr[0] = t0r; -ioptr[1] = t0i; + /* store result */ + ioptr[4] = f2r; + ioptr[5] = f2i; + ioptr[0] = t0r; + ioptr[1] = t0i; -ioptr[2] = scale*f1r; -ioptr[3] = scale*f1i; -ioptr[6] = scale*f3r; -ioptr[7] = scale*f3i; + ioptr[2] = scale*f1r; + ioptr[3] = scale*f1i; + ioptr[6] = scale*f3r; + ioptr[7] = scale*f3i; } -static inline void rfft8pt(float *ioptr); -static inline void rfft8pt(float *ioptr){ -/*** RADIX 16 rfft ***/ -float w0r = 1.0/MYROOT2; /* cos(pi/4) */ -float w1r = MYCOSPID8; /* cos(pi/8) */ -float w1i = MYSINPID8; /* sin(pi/8) */ -float f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; -float f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; -float t0r, t0i, t1r, t1i; -const float Two = 2.0; -const float scale = 0.5; +static inline void rfft8pt(double *ioptr); +static inline void rfft8pt(double *ioptr) +{ + /*** RADIX 16 rfft ***/ + double w0r = 1.0/MYROOT2; /* cos(pi/4) */ + double w1r = MYCOSPID8; /* cos(pi/8) */ + double w1i = MYSINPID8; /* sin(pi/8) */ + double f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + double f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; + double t0r, t0i, t1r, t1i; + const double Two = 2.0; + const double scale = 0.5; - /* bit reversed load */ -f0r = ioptr[0]; -f0i = ioptr[1]; -f1r = ioptr[8]; -f1i = ioptr[9]; -f2r = ioptr[4]; -f2i = ioptr[5]; -f3r = ioptr[12]; -f3i = ioptr[13]; -f4r = ioptr[2]; -f4i = ioptr[3]; -f5r = ioptr[10]; -f5i = ioptr[11]; -f6r = ioptr[6]; -f6i = ioptr[7]; -f7r = ioptr[14]; -f7i = ioptr[15]; - /* Butterflys */ - /* - f0 - - t0 - - f0 - - f0 - f1 - 1 - f1 - - f1 - - f1 - f2 - - f2 - 1 - f2 - - f2 - f3 - 1 - t1 - -i - f3 - - f3 - f4 - - t0 - - f4 - 1 - t0 - f5 - 1 - f5 - - f5 - w3 - f4 - f6 - - f6 - 1 - f6 - -i - t1 - f7 - 1 - t1 - -i - f7 - iw3- f6 - */ + /* bit reversed load */ + f0r = ioptr[0]; + f0i = ioptr[1]; + f1r = ioptr[8]; + f1i = ioptr[9]; + f2r = ioptr[4]; + f2i = ioptr[5]; + f3r = ioptr[12]; + f3i = ioptr[13]; + f4r = ioptr[2]; + f4i = ioptr[3]; + f5r = ioptr[10]; + f5i = ioptr[11]; + f6r = ioptr[6]; + f6i = ioptr[7]; + f7r = ioptr[14]; + f7i = ioptr[15]; + /* Butterflys */ + /* + f0 - - t0 - - f0 - - f0 + f1 - 1 - f1 - - f1 - - f1 + f2 - - f2 - 1 - f2 - - f2 + f3 - 1 - t1 - -i - f3 - - f3 + f4 - - t0 - - f4 - 1 - t0 + f5 - 1 - f5 - - f5 - w3 - f4 + f6 - - f6 - 1 - f6 - -i - t1 + f7 - 1 - t1 - -i - f7 - iw3- f6 + */ -t0r = f0r + f1r; -t0i = f0i + f1i; -f1r = f0r - f1r; -f1i = f0i - f1i; + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; -t1r = f2r - f3r; -t1i = f2i - f3i; -f2r = f2r + f3r; -f2i = f2i + f3i; + t1r = f2r - f3r; + t1i = f2i - f3i; + f2r = f2r + f3r; + f2i = f2i + f3i; -f0r = t0r + f2r; -f0i = t0i + f2i; -f2r = t0r - f2r; -f2i = t0i - f2i; + f0r = t0r + f2r; + f0i = t0i + f2i; + f2r = t0r - f2r; + f2i = t0i - f2i; -f3r = f1r - t1i; -f3i = f1i + t1r; -f1r = f1r + t1i; -f1i = f1i - t1r; + f3r = f1r - t1i; + f3i = f1i + t1r; + f1r = f1r + t1i; + f1i = f1i - t1r; -t0r = f4r + f5r; -t0i = f4i + f5i; -f5r = f4r - f5r; -f5i = f4i - f5i; + t0r = f4r + f5r; + t0i = f4i + f5i; + f5r = f4r - f5r; + f5i = f4i - f5i; -t1r = f6r - f7r; -t1i = f6i - f7i; -f6r = f6r + f7r; -f6i = f6i + f7i; + t1r = f6r - f7r; + t1i = f6i - f7i; + f6r = f6r + f7r; + f6i = f6i + f7i; -f4r = t0r + f6r; -f4i = t0i + f6i; -f6r = t0r - f6r; -f6i = t0i - f6i; + f4r = t0r + f6r; + f4i = t0i + f6i; + f6r = t0r - f6r; + f6i = t0i - f6i; -f7r = f5r - t1i; -f7i = f5i + t1r; -f5r = f5r + t1i; -f5i = f5i - t1r; + f7r = f5r - t1i; + f7i = f5i + t1r; + f5r = f5r + t1i; + f5i = f5i - t1r; -t0r = f0r - f4r; -t0i = f4i - f0i; // neg for rfft -f0r = f0r + f4r; -f0i = f0i + f4i; + t0r = f0r - f4r; + t0i = f4i - f0i; // neg for rfft + f0r = f0r + f4r; + f0i = f0i + f4i; -t1r = f2r - f6i; -t1i = f2i + f6r; -f2r = f2r + f6i; -f2i = f2i - f6r; + t1r = f2r - f6i; + t1i = f2i + f6r; + f2r = f2r + f6i; + f2i = f2i - f6r; -f4r = f1r - f5r * w0r - f5i * w0r; -f4i = f1i + f5r * w0r - f5i * w0r; -f1r = f1r * Two - f4r; -f1i = f1i * Two - f4i; + f4r = f1r - f5r * w0r - f5i * w0r; + f4i = f1i + f5r * w0r - f5i * w0r; + f1r = f1r * Two - f4r; + f1i = f1i * Two - f4i; -f6r = f3r + f7r * w0r - f7i * w0r; -f6i = f3i + f7r * w0r + f7i * w0r; -f3r = f3r * Two - f6r; -f3i = f3i * Two - f6i; + f6r = f3r + f7r * w0r - f7i * w0r; + f6i = f3i + f7r * w0r + f7i * w0r; + f3r = f3r * Two - f6r; + f3i = f3i * Two - f6i; - /* finish rfft */ -f5r = f0r + f0i; /* compute Re(x[0]) */ -f5i = f0r - f0i; /* compute Re(x[N/2]) */ + /* finish rfft */ + f5r = f0r + f0i; /* compute Re(x[0]) */ + f5i = f0r - f0i; /* compute Re(x[N/2]) */ -f0r = f2r + t1r; -f0i = f2i - t1i; -f7r = f2i + t1i; -f7i = t1r - f2r; + f0r = f2r + t1r; + f0i = f2i - t1i; + f7r = f2i + t1i; + f7i = t1r - f2r; -f2r = f0r + w0r * f7r + w0r * f7i; -f2i = f0i - w0r * f7r + w0r * f7i; -t1r = Two * f0r - f2r; -t1i = f2i - Two * f0i; + f2r = f0r + w0r * f7r + w0r * f7i; + f2i = f0i - w0r * f7r + w0r * f7i; + t1r = Two * f0r - f2r; + t1i = f2i - Two * f0i; -f0r = f1r + f6r; -f0i = f1i - f6i; -f7r = f1i + f6i; -f7i = f6r - f1r; + f0r = f1r + f6r; + f0i = f1i - f6i; + f7r = f1i + f6i; + f7i = f6r - f1r; -f1r = f0r + w1r * f7r + w1i * f7i; -f1i = f0i - w1i * f7r + w1r * f7i; -f6r = Two * f0r - f1r; -f6i = f1i - Two * f0i; + f1r = f0r + w1r * f7r + w1i * f7i; + f1i = f0i - w1i * f7r + w1r * f7i; + f6r = Two * f0r - f1r; + f6i = f1i - Two * f0i; -f0r = f3r + f4r; -f0i = f3i - f4i; -f7r = f3i + f4i; -f7i = f4r - f3r; + f0r = f3r + f4r; + f0i = f3i - f4i; + f7r = f3i + f4i; + f7i = f4r - f3r; -f3r = f0r + w1i * f7r + w1r * f7i; -f3i = f0i - w1r * f7r + w1i * f7i; -f4r = Two * f0r - f3r; -f4i = f3i - Two * f0i; + f3r = f0r + w1i * f7r + w1r * f7i; + f3i = f0i - w1r * f7r + w1i * f7i; + f4r = Two * f0r - f3r; + f4i = f3i - Two * f0i; - /* store result */ -ioptr[8] = t0r; -ioptr[9] = t0i; -ioptr[0] = f5r; -ioptr[1] = f5i; + /* store result */ + ioptr[8] = t0r; + ioptr[9] = t0i; + ioptr[0] = f5r; + ioptr[1] = f5i; -ioptr[4] = scale*f2r; -ioptr[5] = scale*f2i; -ioptr[12] = scale*t1r; -ioptr[13] = scale*t1i; + ioptr[4] = scale*f2r; + ioptr[5] = scale*f2i; + ioptr[12] = scale*t1r; + ioptr[13] = scale*t1i; -ioptr[2] = scale*f1r; -ioptr[3] = scale*f1i; -ioptr[6] = scale*f3r; -ioptr[7] = scale*f3i; -ioptr[10] = scale*f4r; -ioptr[11] = scale*f4i; -ioptr[14] = scale*f6r; -ioptr[15] = scale*f6i; + ioptr[2] = scale*f1r; + ioptr[3] = scale*f1i; + ioptr[6] = scale*f3r; + ioptr[7] = scale*f3i; + ioptr[10] = scale*f4r; + ioptr[11] = scale*f4i; + ioptr[14] = scale*f6r; + ioptr[15] = scale*f6i; } -static inline void frstage(float *ioptr, long M, float *Utbl); -static inline void frstage(float *ioptr, long M, float *Utbl){ -/* Finish RFFT */ +static inline void frstage(double *ioptr, long M, double *Utbl); +static inline void frstage(double *ioptr, long M, double *Utbl) +{ + /* Finish RFFT */ -unsigned long pos; -unsigned long posi; -unsigned long diffUcnt; + unsigned long pos; + unsigned long posi; + unsigned long diffUcnt; -float *p0r, *p1r; -float *u0r, *u0i; + double *p0r, *p1r; + double *u0r, *u0i; -float w0r, w0i; -float f0r, f0i, f1r, f1i, f4r, f4i, f5r, f5i; -float t0r, t0i, t1r, t1i; -const float Two = 2.0; + double w0r, w0i; + double f0r, f0i, f1r, f1i, f4r, f4i, f5r, f5i; + double t0r, t0i, t1r, t1i; + const double Two = 2.0; -pos = POW2(M-1); -posi = pos + 1; + pos = POW2(M-1); + posi = pos + 1; -p0r = ioptr; -p1r = ioptr + pos/2; + p0r = ioptr; + p1r = ioptr + pos/2; -u0r = Utbl + POW2(M-3); + u0r = Utbl + POW2(M-3); -w0r = *u0r, + w0r = *u0r, -f0r = *(p0r); -f0i = *(p0r + 1); -f4r = *(p0r + pos); -f4i = *(p0r + posi); -f1r = *(p1r); -f1i = *(p1r + 1); -f5r = *(p1r + pos); -f5i = *(p1r + posi); + f0r = *(p0r); + f0i = *(p0r + 1); + f4r = *(p0r + pos); + f4i = *(p0r + posi); + f1r = *(p1r); + f1i = *(p1r + 1); + f5r = *(p1r + pos); + f5i = *(p1r + posi); - t0r = Two * f0r + Two * f0i; /* compute Re(x[0]) */ - t0i = Two * f0r - Two * f0i; /* compute Re(x[N/2]) */ - t1r = f4r + f4r; - t1i = -f4i - f4i; + t0r = Two * f0r + Two * f0i; /* compute Re(x[0]) */ + t0i = Two * f0r - Two * f0i; /* compute Re(x[N/2]) */ + t1r = f4r + f4r; + t1i = -f4i - f4i; - f0r = f1r + f5r; - f0i = f1i - f5i; - f4r = f1i + f5i; - f4i = f5r - f1r; + f0r = f1r + f5r; + f0i = f1i - f5i; + f4r = f1i + f5i; + f4i = f5r - f1r; - f1r = f0r + w0r * f4r + w0r * f4i; - f1i = f0i - w0r * f4r + w0r * f4i; - f5r = Two * f0r - f1r; - f5i = f1i - Two * f0i; + f1r = f0r + w0r * f4r + w0r * f4i; + f1i = f0i - w0r * f4r + w0r * f4i; + f5r = Two * f0r - f1r; + f5i = f1i - Two * f0i; -*(p0r) = t0r; -*(p0r + 1) = t0i; -*(p0r + pos) = t1r; -*(p0r + posi) = t1i; -*(p1r) = f1r; -*(p1r + 1) = f1i; -*(p1r + pos) = f5r; -*(p1r + posi) = f5i; + *(p0r) = t0r; + *(p0r + 1) = t0i; + *(p0r + pos) = t1r; + *(p0r + posi) = t1i; + *(p1r) = f1r; + *(p1r + 1) = f1i; + *(p1r + pos) = f5r; + *(p1r + posi) = f5i; -u0r = Utbl + 1; -u0i = Utbl + (POW2(M-2)-1); + u0r = Utbl + 1; + u0i = Utbl + (POW2(M-2)-1); -w0r = *u0r, -w0i = *u0i; - -p0r = (ioptr + 2); -p1r = (ioptr + (POW2(M-2)-1)*2); + w0r = *u0r, + w0i = *u0i; - /* Butterflys */ - /* - f0 - t0 - - f0 - f5 - t1 - w0 - f5 + p0r = (ioptr + 2); + p1r = (ioptr + (POW2(M-2)-1)*2); - f1 - t0 - - f1 - f4 - t1 -iw0 - f4 - */ + /* Butterflys */ + /* + f0 - t0 - - f0 + f5 - t1 - w0 - f5 -for (diffUcnt = POW2(M-3)-1; diffUcnt > 0 ; diffUcnt--){ + f1 - t0 - - f1 + f4 - t1 -iw0 - f4 + */ - f0r = *(p0r); - f0i = *(p0r + 1); - f5r = *(p1r + pos); - f5i = *(p1r + posi); - f1r = *(p1r); - f1i = *(p1r + 1); - f4r = *(p0r + pos); - f4i = *(p0r + posi); + for (diffUcnt = POW2(M-3)-1; diffUcnt > 0 ; diffUcnt--) { - t0r = f0r + f5r; - t0i = f0i - f5i; - t1r = f0i + f5i; - t1i = f5r - f0r; + f0r = *(p0r); + f0i = *(p0r + 1); + f5r = *(p1r + pos); + f5i = *(p1r + posi); + f1r = *(p1r); + f1i = *(p1r + 1); + f4r = *(p0r + pos); + f4i = *(p0r + posi); - f0r = t0r + w0r * t1r + w0i * t1i; - f0i = t0i - w0i * t1r + w0r * t1i; - f5r = Two * t0r - f0r; - f5i = f0i - Two * t0i; + t0r = f0r + f5r; + t0i = f0i - f5i; + t1r = f0i + f5i; + t1i = f5r - f0r; - t0r = f1r + f4r; - t0i = f1i - f4i; - t1r = f1i + f4i; - t1i = f4r - f1r; + f0r = t0r + w0r * t1r + w0i * t1i; + f0i = t0i - w0i * t1r + w0r * t1i; + f5r = Two * t0r - f0r; + f5i = f0i - Two * t0i; - f1r = t0r + w0i * t1r + w0r * t1i; - f1i = t0i - w0r * t1r + w0i * t1i; - f4r = Two * t0r - f1r; - f4i = f1i - Two * t0i; + t0r = f1r + f4r; + t0i = f1i - f4i; + t1r = f1i + f4i; + t1i = f4r - f1r; - *(p0r) = f0r; - *(p0r + 1) = f0i; - *(p1r + pos) = f5r; - *(p1r + posi) = f5i; + f1r = t0r + w0i * t1r + w0r * t1i; + f1i = t0i - w0r * t1r + w0i * t1i; + f4r = Two * t0r - f1r; + f4i = f1i - Two * t0i; - w0r = *++u0r; - w0i = *--u0i; + *(p0r) = f0r; + *(p0r + 1) = f0i; + *(p1r + pos) = f5r; + *(p1r + posi) = f5i; - *(p1r) = f1r; - *(p1r + 1) = f1i; - *(p0r + pos) = f4r; - *(p0r + posi) = f4i; + w0r = *++u0r; + w0i = *--u0i; - p0r += 2; - p1r -= 2; -}; + *(p1r) = f1r; + *(p1r + 1) = f1i; + *(p0r + pos) = f4r; + *(p0r + posi) = f4i; + + p0r += 2; + p1r -= 2; + }; } -void rffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow){ -/* Compute in-place real fft on the rows of the input array */ -/* The result is the complex spectra of the positive frequencies */ -/* except the location for the first complex number contains the real */ -/* values for DC and Nyquest */ -/* INPUTS */ -/* *ioptr = real input data array */ -/* M = log2 of fft size */ -/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */ -/* *Utbl = cosine table */ -/* *BRLow = bit reversed counter table */ -/* OUTPUTS */ -/* *ioptr = output data array in the following order */ -/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */ +void rffts1(double *ioptr, long M, long Rows, double *Utbl, short *BRLow) +{ + /* Compute in-place real fft on the rows of the input array */ + /* The result is the complex spectra of the positive frequencies */ + /* except the location for the first complex number contains the real */ + /* values for DC and Nyquest */ + /* INPUTS */ + /* *ioptr = real input data array */ + /* M = log2 of fft size */ + /* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */ + /* *Utbl = cosine table */ + /* *BRLow = bit reversed counter table */ + /* OUTPUTS */ + /* *ioptr = output data array in the following order */ + /* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */ -float scale; -long StageCnt; -long NDiffU; + double scale; + long StageCnt; + long NDiffU; -M=M-1; -switch (M){ -case -1: - break; -case 0: - for (;Rows>0;Rows--){ - rfft1pt(ioptr); /* a 2 pt fft */ - ioptr += 2*POW2(M); - } -case 1: - for (;Rows>0;Rows--){ - rfft2pt(ioptr); /* a 4 pt fft */ - ioptr += 2*POW2(M); - } - break; -case 2: - for (;Rows>0;Rows--){ - rfft4pt(ioptr); /* an 8 pt fft */ - ioptr += 2*POW2(M); - } - break; -case 3: - for (;Rows>0;Rows--){ - rfft8pt(ioptr); /* a 16 pt fft */ - ioptr += 2*POW2(M); - } - break; -default: - scale = 0.5; - for (;Rows>0;Rows--){ + M=M-1; + switch (M) { + case -1: + break; + case 0: + for (; Rows>0; Rows--) { + rfft1pt(ioptr); /* a 2 pt fft */ + ioptr += 2*POW2(M); + } + case 1: + for (; Rows>0; Rows--) { + rfft2pt(ioptr); /* a 4 pt fft */ + ioptr += 2*POW2(M); + } + break; + case 2: + for (; Rows>0; Rows--) { + rfft4pt(ioptr); /* an 8 pt fft */ + ioptr += 2*POW2(M); + } + break; + case 3: + for (; Rows>0; Rows--) { + rfft8pt(ioptr); /* a 16 pt fft */ + ioptr += 2*POW2(M); + } + break; + default: + scale = 0.5; + for (; Rows>0; Rows--) { - scbitrevR2(ioptr, M, BRLow, scale); /* bit reverse and first radix 2 stage */ - - StageCnt = (M-1) / 3; // number of radix 8 stages - NDiffU = 2; // one radix 2 stage already complete + scbitrevR2(ioptr, M, BRLow, scale); /* bit reverse and first radix 2 stage */ - if ( (M-1-(StageCnt * 3)) == 1 ){ - bfR2(ioptr, M, NDiffU); /* 1 radix 2 stage */ - NDiffU *= 2; - } + StageCnt = (M-1) / 3; // number of radix 8 stages + NDiffU = 2; // one radix 2 stage already complete - if ( (M-1-(StageCnt * 3)) == 2 ){ - bfR4(ioptr, M, NDiffU); /* 1 radix 4 stage */ - NDiffU *= 4; - } + if ( (M-1-(StageCnt * 3)) == 1 ) { + bfR2(ioptr, M, NDiffU); /* 1 radix 2 stage */ + NDiffU *= 2; + } - if (M <= MCACHE){ - bfstages(ioptr, M, Utbl, 2, NDiffU, StageCnt); /* RADIX 8 Stages */ - frstage(ioptr, M+1, Utbl); - } + if ( (M-1-(StageCnt * 3)) == 2 ) { + bfR4(ioptr, M, NDiffU); /* 1 radix 4 stage */ + NDiffU *= 4; + } - else{ - fftrecurs(ioptr, M, Utbl, 2, NDiffU, StageCnt); /* RADIX 8 Stages */ - frstage(ioptr, M+1, Utbl); - } + if (M <= MCACHE) { + bfstages(ioptr, M, Utbl, 2, NDiffU, StageCnt); /* RADIX 8 Stages */ + frstage(ioptr, M+1, Utbl); + } - ioptr += 2*POW2(M); - } -} + else { + fftrecurs(ioptr, M, Utbl, 2, NDiffU, StageCnt); /* RADIX 8 Stages */ + frstage(ioptr, M+1, Utbl); + } + + ioptr += 2*POW2(M); + } + } } /************************************************ parts of riffts1 *************************************************/ -static inline void rifft1pt(float *ioptr, float scale); -static inline void rifft1pt(float *ioptr, float scale){ -/*** RADIX 2 rifft ***/ -float f0r, f0i; -float t0r, t0i; +static inline void rifft1pt(double *ioptr, double scale); +static inline void rifft1pt(double *ioptr, double scale) +{ + /*** RADIX 2 rifft ***/ + double f0r, f0i; + double t0r, t0i; - /* bit reversed load */ -f0r = ioptr[0]; -f0i = ioptr[1]; + /* bit reversed load */ + f0r = ioptr[0]; + f0i = ioptr[1]; - /* finish rfft */ -t0r = f0r + f0i; -t0i = f0r - f0i; + /* finish rfft */ + t0r = f0r + f0i; + t0i = f0r - f0i; - /* store result */ -ioptr[0] = scale*t0r; -ioptr[1] = scale*t0i; + /* store result */ + ioptr[0] = scale*t0r; + ioptr[1] = scale*t0i; } -static inline void rifft2pt(float *ioptr, float scale); -static inline void rifft2pt(float *ioptr, float scale){ -/*** RADIX 4 rifft ***/ -float f0r, f0i, f1r, f1i; -float t0r, t0i; -const float Two = 2.0; +static inline void rifft2pt(double *ioptr, double scale); +static inline void rifft2pt(double *ioptr, double scale) +{ + /*** RADIX 4 rifft ***/ + double f0r, f0i, f1r, f1i; + double t0r, t0i; + const double Two = 2.0; - /* bit reversed load */ -t0r = ioptr[0]; -t0i = ioptr[1]; -f1r = Two * ioptr[2]; -f1i = Two * ioptr[3]; + /* bit reversed load */ + t0r = ioptr[0]; + t0i = ioptr[1]; + f1r = Two * ioptr[2]; + f1i = Two * ioptr[3]; - /* start rifft */ -f0r = t0r + t0i; -f0i = t0r - t0i; - /* Butterflys */ - /* - f0 - - t0 - f1 - 1 - f1 - */ + /* start rifft */ + f0r = t0r + t0i; + f0i = t0r - t0i; + /* Butterflys */ + /* + f0 - - t0 + f1 - 1 - f1 + */ -t0r = f0r + f1r; -t0i = f0i - f1i; -f1r = f0r - f1r; -f1i = f0i + f1i; + t0r = f0r + f1r; + t0i = f0i - f1i; + f1r = f0r - f1r; + f1i = f0i + f1i; - /* store result */ -ioptr[0] = scale*t0r; -ioptr[1] = scale*t0i; -ioptr[2] = scale*f1r; -ioptr[3] = scale*f1i; + /* store result */ + ioptr[0] = scale*t0r; + ioptr[1] = scale*t0i; + ioptr[2] = scale*f1r; + ioptr[3] = scale*f1i; } -static inline void rifft4pt(float *ioptr, float scale); -static inline void rifft4pt(float *ioptr, float scale){ -/*** RADIX 8 rifft ***/ -float f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; -float t0r, t0i, t1r, t1i; -float w0r = 1.0/MYROOT2; /* cos(pi/4) */ -const float Two = 2.0; +static inline void rifft4pt(double *ioptr, double scale); +static inline void rifft4pt(double *ioptr, double scale) +{ + /*** RADIX 8 rifft ***/ + double f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + double t0r, t0i, t1r, t1i; + double w0r = 1.0/MYROOT2; /* cos(pi/4) */ + const double Two = 2.0; - /* bit reversed load */ -t0r = ioptr[0]; -t0i = ioptr[1]; -f2r = ioptr[2]; -f2i = ioptr[3]; -f1r = Two * ioptr[4]; -f1i = Two * ioptr[5]; -f3r = ioptr[6]; -f3i = ioptr[7]; + /* bit reversed load */ + t0r = ioptr[0]; + t0i = ioptr[1]; + f2r = ioptr[2]; + f2i = ioptr[3]; + f1r = Two * ioptr[4]; + f1i = Two * ioptr[5]; + f3r = ioptr[6]; + f3i = ioptr[7]; - /* start rfft */ -f0r = t0r + t0i; /* compute Re(x[0]) */ -f0i = t0r - t0i; /* compute Re(x[N/2]) */ + /* start rfft */ + f0r = t0r + t0i; /* compute Re(x[0]) */ + f0i = t0r - t0i; /* compute Re(x[N/2]) */ -t1r = f2r + f3r; -t1i = f2i - f3i; -t0r = f2r - f3r; -t0i = f2i + f3i; + t1r = f2r + f3r; + t1i = f2i - f3i; + t0r = f2r - f3r; + t0i = f2i + f3i; -f2r = t1r - w0r * t0r - w0r * t0i; -f2i = t1i + w0r * t0r - w0r * t0i; -f3r = Two * t1r - f2r; -f3i = f2i - Two * t1i; + f2r = t1r - w0r * t0r - w0r * t0i; + f2i = t1i + w0r * t0r - w0r * t0i; + f3r = Two * t1r - f2r; + f3i = f2i - Two * t1i; - /* Butterflys */ - /* - f0 - - t0 - - f0 - f1 - 1 - f1 - - f1 - f2 - - f2 - 1 - f2 - f3 - 1 - t1 - i - f3 - */ + /* Butterflys */ + /* + f0 - - t0 - - f0 + f1 - 1 - f1 - - f1 + f2 - - f2 - 1 - f2 + f3 - 1 - t1 - i - f3 + */ -t0r = f0r + f1r; -t0i = f0i - f1i; -f1r = f0r - f1r; -f1i = f0i + f1i; + t0r = f0r + f1r; + t0i = f0i - f1i; + f1r = f0r - f1r; + f1i = f0i + f1i; -t1r = f2r - f3r; -t1i = f2i - f3i; -f2r = f2r + f3r; -f2i = f2i + f3i; + t1r = f2r - f3r; + t1i = f2i - f3i; + f2r = f2r + f3r; + f2i = f2i + f3i; -f0r = t0r + f2r; -f0i = t0i + f2i; -f2r = t0r - f2r; -f2i = t0i - f2i; + f0r = t0r + f2r; + f0i = t0i + f2i; + f2r = t0r - f2r; + f2i = t0i - f2i; -f3r = f1r + t1i; -f3i = f1i - t1r; -f1r = f1r - t1i; -f1i = f1i + t1r; + f3r = f1r + t1i; + f3i = f1i - t1r; + f1r = f1r - t1i; + f1i = f1i + t1r; - /* store result */ -ioptr[0] = scale*f0r; -ioptr[1] = scale*f0i; -ioptr[2] = scale*f1r; -ioptr[3] = scale*f1i; -ioptr[4] = scale*f2r; -ioptr[5] = scale*f2i; -ioptr[6] = scale*f3r; -ioptr[7] = scale*f3i; + /* store result */ + ioptr[0] = scale*f0r; + ioptr[1] = scale*f0i; + ioptr[2] = scale*f1r; + ioptr[3] = scale*f1i; + ioptr[4] = scale*f2r; + ioptr[5] = scale*f2i; + ioptr[6] = scale*f3r; + ioptr[7] = scale*f3i; } -static inline void rifft8pt(float *ioptr, float scale); -static inline void rifft8pt(float *ioptr, float scale){ -/*** RADIX 16 rifft ***/ -float w0r = 1.0/MYROOT2; /* cos(pi/4) */ -float w1r = MYCOSPID8; /* cos(pi/8) */ -float w1i = MYSINPID8; /* sin(pi/8) */ -float f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; -float f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; -float t0r, t0i, t1r, t1i; -const float Two = 2.0; +static inline void rifft8pt(double *ioptr, double scale); +static inline void rifft8pt(double *ioptr, double scale) +{ + /*** RADIX 16 rifft ***/ + double w0r = 1.0/MYROOT2; /* cos(pi/4) */ + double w1r = MYCOSPID8; /* cos(pi/8) */ + double w1i = MYSINPID8; /* sin(pi/8) */ + double f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + double f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; + double t0r, t0i, t1r, t1i; + const double Two = 2.0; - /* bit reversed load */ -t0r = ioptr[0]; -t0i = ioptr[1]; -f4r = ioptr[2]; -f4i = ioptr[3]; -f2r = ioptr[4]; -f2i = ioptr[5]; -f6r = ioptr[6]; -f6i = ioptr[7]; -f1r = Two * ioptr[8]; -f1i = Two * ioptr[9]; -f5r = ioptr[10]; -f5i = ioptr[11]; -f3r = ioptr[12]; -f3i = ioptr[13]; -f7r = ioptr[14]; -f7i = ioptr[15]; + /* bit reversed load */ + t0r = ioptr[0]; + t0i = ioptr[1]; + f4r = ioptr[2]; + f4i = ioptr[3]; + f2r = ioptr[4]; + f2i = ioptr[5]; + f6r = ioptr[6]; + f6i = ioptr[7]; + f1r = Two * ioptr[8]; + f1i = Two * ioptr[9]; + f5r = ioptr[10]; + f5i = ioptr[11]; + f3r = ioptr[12]; + f3i = ioptr[13]; + f7r = ioptr[14]; + f7i = ioptr[15]; - /* start rfft */ -f0r = t0r + t0i; /* compute Re(x[0]) */ -f0i = t0r - t0i; /* compute Re(x[N/2]) */ + /* start rfft */ + f0r = t0r + t0i; /* compute Re(x[0]) */ + f0i = t0r - t0i; /* compute Re(x[N/2]) */ -t0r = f2r + f3r; -t0i = f2i - f3i; -t1r = f2r - f3r; -t1i = f2i + f3i; + t0r = f2r + f3r; + t0i = f2i - f3i; + t1r = f2r - f3r; + t1i = f2i + f3i; -f2r = t0r - w0r * t1r - w0r * t1i; -f2i = t0i + w0r * t1r - w0r * t1i; -f3r = Two * t0r - f2r; -f3i = f2i - Two * t0i; + f2r = t0r - w0r * t1r - w0r * t1i; + f2i = t0i + w0r * t1r - w0r * t1i; + f3r = Two * t0r - f2r; + f3i = f2i - Two * t0i; -t0r = f4r + f7r; -t0i = f4i - f7i; -t1r = f4r - f7r; -t1i = f4i + f7i; + t0r = f4r + f7r; + t0i = f4i - f7i; + t1r = f4r - f7r; + t1i = f4i + f7i; -f4r = t0r - w1i * t1r - w1r * t1i; -f4i = t0i + w1r * t1r - w1i * t1i; -f7r = Two * t0r - f4r; -f7i = f4i - Two * t0i; + f4r = t0r - w1i * t1r - w1r * t1i; + f4i = t0i + w1r * t1r - w1i * t1i; + f7r = Two * t0r - f4r; + f7i = f4i - Two * t0i; -t0r = f6r + f5r; -t0i = f6i - f5i; -t1r = f6r - f5r; -t1i = f6i + f5i; + t0r = f6r + f5r; + t0i = f6i - f5i; + t1r = f6r - f5r; + t1i = f6i + f5i; -f6r = t0r - w1r * t1r - w1i * t1i; -f6i = t0i + w1i * t1r - w1r * t1i; -f5r = Two * t0r - f6r; -f5i = f6i - Two * t0i; + f6r = t0r - w1r * t1r - w1i * t1i; + f6i = t0i + w1i * t1r - w1r * t1i; + f5r = Two * t0r - f6r; + f5i = f6i - Two * t0i; - /* Butterflys */ - /* - f0 - - t0 - - f0 - - f0 - f1* - 1 - f1 - - f1 - - f1 - f2 - - f2 - 1 - f2 - - f2 - f3 - 1 - t1 - i - f3 - - f3 - f4 - - t0 - - f4 - 1 - t0 - f5 - 1 - f5 - - f5 - w3 - f4 - f6 - - f6 - 1 - f6 - i - t1 - f7 - 1 - t1 - i - f7 - iw3- f6 - */ + /* Butterflys */ + /* + f0 - - t0 - - f0 - - f0 + f1* - 1 - f1 - - f1 - - f1 + f2 - - f2 - 1 - f2 - - f2 + f3 - 1 - t1 - i - f3 - - f3 + f4 - - t0 - - f4 - 1 - t0 + f5 - 1 - f5 - - f5 - w3 - f4 + f6 - - f6 - 1 - f6 - i - t1 + f7 - 1 - t1 - i - f7 - iw3- f6 + */ -t0r = f0r + f1r; -t0i = f0i - f1i; -f1r = f0r - f1r; -f1i = f0i + f1i; + t0r = f0r + f1r; + t0i = f0i - f1i; + f1r = f0r - f1r; + f1i = f0i + f1i; -t1r = f2r - f3r; -t1i = f2i - f3i; -f2r = f2r + f3r; -f2i = f2i + f3i; + t1r = f2r - f3r; + t1i = f2i - f3i; + f2r = f2r + f3r; + f2i = f2i + f3i; -f0r = t0r + f2r; -f0i = t0i + f2i; -f2r = t0r - f2r; -f2i = t0i - f2i; + f0r = t0r + f2r; + f0i = t0i + f2i; + f2r = t0r - f2r; + f2i = t0i - f2i; -f3r = f1r + t1i; -f3i = f1i - t1r; -f1r = f1r - t1i; -f1i = f1i + t1r; + f3r = f1r + t1i; + f3i = f1i - t1r; + f1r = f1r - t1i; + f1i = f1i + t1r; -t0r = f4r + f5r; -t0i = f4i + f5i; -f5r = f4r - f5r; -f5i = f4i - f5i; + t0r = f4r + f5r; + t0i = f4i + f5i; + f5r = f4r - f5r; + f5i = f4i - f5i; -t1r = f6r - f7r; -t1i = f6i - f7i; -f6r = f6r + f7r; -f6i = f6i + f7i; + t1r = f6r - f7r; + t1i = f6i - f7i; + f6r = f6r + f7r; + f6i = f6i + f7i; -f4r = t0r + f6r; -f4i = t0i + f6i; -f6r = t0r - f6r; -f6i = t0i - f6i; + f4r = t0r + f6r; + f4i = t0i + f6i; + f6r = t0r - f6r; + f6i = t0i - f6i; -f7r = f5r + t1i; -f7i = f5i - t1r; -f5r = f5r - t1i; -f5i = f5i + t1r; + f7r = f5r + t1i; + f7i = f5i - t1r; + f5r = f5r - t1i; + f5i = f5i + t1r; -t0r = f0r - f4r; -t0i = f0i - f4i; -f0r = f0r + f4r; -f0i = f0i + f4i; + t0r = f0r - f4r; + t0i = f0i - f4i; + f0r = f0r + f4r; + f0i = f0i + f4i; -t1r = f2r + f6i; -t1i = f2i - f6r; -f2r = f2r - f6i; -f2i = f2i + f6r; + t1r = f2r + f6i; + t1i = f2i - f6r; + f2r = f2r - f6i; + f2i = f2i + f6r; -f4r = f1r - f5r * w0r + f5i * w0r; -f4i = f1i - f5r * w0r - f5i * w0r; -f1r = f1r * Two - f4r; -f1i = f1i * Two - f4i; + f4r = f1r - f5r * w0r + f5i * w0r; + f4i = f1i - f5r * w0r - f5i * w0r; + f1r = f1r * Two - f4r; + f1i = f1i * Two - f4i; -f6r = f3r + f7r * w0r + f7i * w0r; -f6i = f3i - f7r * w0r + f7i * w0r; -f3r = f3r * Two - f6r; -f3i = f3i * Two - f6i; + f6r = f3r + f7r * w0r + f7i * w0r; + f6i = f3i - f7r * w0r + f7i * w0r; + f3r = f3r * Two - f6r; + f3i = f3i * Two - f6i; - /* store result */ -ioptr[0] = scale*f0r; -ioptr[1] = scale*f0i; -ioptr[2] = scale*f1r; -ioptr[3] = scale*f1i; -ioptr[4] = scale*f2r; -ioptr[5] = scale*f2i; -ioptr[6] = scale*f3r; -ioptr[7] = scale*f3i; -ioptr[8] = scale*t0r; -ioptr[9] = scale*t0i; -ioptr[10] = scale*f4r; -ioptr[11] = scale*f4i; -ioptr[12] = scale*t1r; -ioptr[13] = scale*t1i; -ioptr[14] = scale*f6r; -ioptr[15] = scale*f6i; + /* store result */ + ioptr[0] = scale*f0r; + ioptr[1] = scale*f0i; + ioptr[2] = scale*f1r; + ioptr[3] = scale*f1i; + ioptr[4] = scale*f2r; + ioptr[5] = scale*f2i; + ioptr[6] = scale*f3r; + ioptr[7] = scale*f3i; + ioptr[8] = scale*t0r; + ioptr[9] = scale*t0i; + ioptr[10] = scale*f4r; + ioptr[11] = scale*f4i; + ioptr[12] = scale*t1r; + ioptr[13] = scale*t1i; + ioptr[14] = scale*f6r; + ioptr[15] = scale*f6i; } -static inline void ifrstage(float *ioptr, long M, float *Utbl); -static inline void ifrstage(float *ioptr, long M, float *Utbl){ -/* Start RIFFT */ +static inline void ifrstage(double *ioptr, long M, double *Utbl); +static inline void ifrstage(double *ioptr, long M, double *Utbl) +{ + /* Start RIFFT */ -unsigned long pos; -unsigned long posi; -unsigned long diffUcnt; + unsigned long pos; + unsigned long posi; + unsigned long diffUcnt; -float *p0r, *p1r; -float *u0r, *u0i; + double *p0r, *p1r; + double *u0r, *u0i; -float w0r, w0i; -float f0r, f0i, f1r, f1i, f4r, f4i, f5r, f5i; -float t0r, t0i, t1r, t1i; -const float Two = 2.0; + double w0r, w0i; + double f0r, f0i, f1r, f1i, f4r, f4i, f5r, f5i; + double t0r, t0i, t1r, t1i; + const double Two = 2.0; -pos = POW2(M-1); -posi = pos + 1; + pos = POW2(M-1); + posi = pos + 1; -p0r = ioptr; -p1r = ioptr + pos/2; + p0r = ioptr; + p1r = ioptr + pos/2; -u0r = Utbl + POW2(M-3); + u0r = Utbl + POW2(M-3); -w0r = *u0r, + w0r = *u0r, -f0r = *(p0r); -f0i = *(p0r + 1); -f4r = *(p0r + pos); -f4i = *(p0r + posi); -f1r = *(p1r); -f1i = *(p1r + 1); -f5r = *(p1r + pos); -f5i = *(p1r + posi); + f0r = *(p0r); + f0i = *(p0r + 1); + f4r = *(p0r + pos); + f4i = *(p0r + posi); + f1r = *(p1r); + f1i = *(p1r + 1); + f5r = *(p1r + pos); + f5i = *(p1r + posi); - t0r = f0r + f0i; - t0i = f0r - f0i; - t1r = f4r + f4r; - t1i = -f4i - f4i; + t0r = f0r + f0i; + t0i = f0r - f0i; + t1r = f4r + f4r; + t1i = -f4i - f4i; - f0r = f1r + f5r; - f0i = f1i - f5i; - f4r = f1r - f5r; - f4i = f1i + f5i; + f0r = f1r + f5r; + f0i = f1i - f5i; + f4r = f1r - f5r; + f4i = f1i + f5i; - f1r = f0r - w0r * f4r - w0r * f4i; - f1i = f0i + w0r * f4r - w0r * f4i; - f5r = Two * f0r - f1r; - f5i = f1i - Two * f0i; + f1r = f0r - w0r * f4r - w0r * f4i; + f1i = f0i + w0r * f4r - w0r * f4i; + f5r = Two * f0r - f1r; + f5i = f1i - Two * f0i; -*(p0r) = t0r; -*(p0r + 1) = t0i; -*(p0r + pos) = t1r; -*(p0r + posi) = t1i; -*(p1r) = f1r; -*(p1r + 1) = f1i; -*(p1r + pos) = f5r; -*(p1r + posi) = f5i; + *(p0r) = t0r; + *(p0r + 1) = t0i; + *(p0r + pos) = t1r; + *(p0r + posi) = t1i; + *(p1r) = f1r; + *(p1r + 1) = f1i; + *(p1r + pos) = f5r; + *(p1r + posi) = f5i; -u0r = Utbl + 1; -u0i = Utbl + (POW2(M-2)-1); + u0r = Utbl + 1; + u0i = Utbl + (POW2(M-2)-1); -w0r = *u0r, -w0i = *u0i; - -p0r = (ioptr + 2); -p1r = (ioptr + (POW2(M-2)-1)*2); + w0r = *u0r, + w0i = *u0i; - /* Butterflys */ - /* - f0 - t0 - f0 - f1 - t1 -w0- f1 + p0r = (ioptr + 2); + p1r = (ioptr + (POW2(M-2)-1)*2); - f2 - t0 - f2 - f3 - t1 -iw0- f3 - */ + /* Butterflys */ + /* + f0 - t0 - f0 + f1 - t1 -w0- f1 -for (diffUcnt = POW2(M-3)-1; diffUcnt > 0 ; diffUcnt--){ + f2 - t0 - f2 + f3 - t1 -iw0- f3 + */ - f0r = *(p0r); - f0i = *(p0r + 1); - f5r = *(p1r + pos); - f5i = *(p1r + posi); - f1r = *(p1r); - f1i = *(p1r + 1); - f4r = *(p0r + pos); - f4i = *(p0r + posi); + for (diffUcnt = POW2(M-3)-1; diffUcnt > 0 ; diffUcnt--) { - t0r = f0r + f5r; - t0i = f0i - f5i; - t1r = f0r - f5r; - t1i = f0i + f5i; + f0r = *(p0r); + f0i = *(p0r + 1); + f5r = *(p1r + pos); + f5i = *(p1r + posi); + f1r = *(p1r); + f1i = *(p1r + 1); + f4r = *(p0r + pos); + f4i = *(p0r + posi); - f0r = t0r - w0i * t1r - w0r * t1i; - f0i = t0i + w0r * t1r - w0i * t1i; - f5r = Two * t0r - f0r; - f5i = f0i - Two * t0i; + t0r = f0r + f5r; + t0i = f0i - f5i; + t1r = f0r - f5r; + t1i = f0i + f5i; - t0r = f1r + f4r; - t0i = f1i - f4i; - t1r = f1r - f4r; - t1i = f1i + f4i; + f0r = t0r - w0i * t1r - w0r * t1i; + f0i = t0i + w0r * t1r - w0i * t1i; + f5r = Two * t0r - f0r; + f5i = f0i - Two * t0i; - f1r = t0r - w0r * t1r - w0i * t1i; - f1i = t0i + w0i * t1r - w0r * t1i; - f4r = Two * t0r - f1r; - f4i = f1i - Two * t0i; + t0r = f1r + f4r; + t0i = f1i - f4i; + t1r = f1r - f4r; + t1i = f1i + f4i; - *(p0r) = f0r; - *(p0r + 1) = f0i; - *(p1r + pos) = f5r; - *(p1r + posi) = f5i; + f1r = t0r - w0r * t1r - w0i * t1i; + f1i = t0i + w0i * t1r - w0r * t1i; + f4r = Two * t0r - f1r; + f4i = f1i - Two * t0i; - w0r = *++u0r; - w0i = *--u0i; + *(p0r) = f0r; + *(p0r + 1) = f0i; + *(p1r + pos) = f5r; + *(p1r + posi) = f5i; - *(p1r) = f1r; - *(p1r + 1) = f1i; - *(p0r + pos) = f4r; - *(p0r + posi) = f4i; + w0r = *++u0r; + w0i = *--u0i; - p0r += 2; - p1r -= 2; -}; + *(p1r) = f1r; + *(p1r + 1) = f1i; + *(p0r + pos) = f4r; + *(p0r + posi) = f4i; + + p0r += 2; + p1r -= 2; + }; } -void riffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow){ -/* Compute in-place real ifft on the rows of the input array */ -/* data order as from rffts1 */ -/* INPUTS */ -/* *ioptr = input data array in the following order */ -/* M = log2 of fft size */ -/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */ -/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */ -/* *Utbl = cosine table */ -/* *BRLow = bit reversed counter table */ -/* OUTPUTS */ -/* *ioptr = real output data array */ +void riffts1(double *ioptr, long M, long Rows, double *Utbl, short *BRLow) +{ + /* Compute in-place real ifft on the rows of the input array */ + /* data order as from rffts1 */ + /* INPUTS */ + /* *ioptr = input data array in the following order */ + /* M = log2 of fft size */ + /* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */ + /* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */ + /* *Utbl = cosine table */ + /* *BRLow = bit reversed counter table */ + /* OUTPUTS */ + /* *ioptr = real output data array */ -float scale; -long StageCnt; -long NDiffU; + double scale; + long StageCnt; + long NDiffU; -scale = 1.0/POW2(M); -M=M-1; -switch (M){ -case -1: - break; -case 0: - for (;Rows>0;Rows--){ - rifft1pt(ioptr, scale); /* a 2 pt fft */ - ioptr += 2*POW2(M); - } -case 1: - for (;Rows>0;Rows--){ - rifft2pt(ioptr, scale); /* a 4 pt fft */ - ioptr += 2*POW2(M); - } - break; -case 2: - for (;Rows>0;Rows--){ - rifft4pt(ioptr, scale); /* an 8 pt fft */ - ioptr += 2*POW2(M); - } - break; -case 3: - for (;Rows>0;Rows--){ - rifft8pt(ioptr, scale); /* a 16 pt fft */ - ioptr += 2*POW2(M); - } - break; -default: - for (;Rows>0;Rows--){ + scale = 1.0/POW2(M); + M=M-1; + switch (M) { + case -1: + break; + case 0: + for (; Rows>0; Rows--) { + rifft1pt(ioptr, scale); /* a 2 pt fft */ + ioptr += 2*POW2(M); + } + case 1: + for (; Rows>0; Rows--) { + rifft2pt(ioptr, scale); /* a 4 pt fft */ + ioptr += 2*POW2(M); + } + break; + case 2: + for (; Rows>0; Rows--) { + rifft4pt(ioptr, scale); /* an 8 pt fft */ + ioptr += 2*POW2(M); + } + break; + case 3: + for (; Rows>0; Rows--) { + rifft8pt(ioptr, scale); /* a 16 pt fft */ + ioptr += 2*POW2(M); + } + break; + default: + for (; Rows>0; Rows--) { - ifrstage(ioptr, M+1, Utbl); + ifrstage(ioptr, M+1, Utbl); - scbitrevR2(ioptr, M, BRLow, scale); /* bit reverse and first radix 2 stage */ - - StageCnt = (M-1) / 3; // number of radix 8 stages - NDiffU = 2; // one radix 2 stage already complete + scbitrevR2(ioptr, M, BRLow, scale); /* bit reverse and first radix 2 stage */ - if ( (M-1-(StageCnt * 3)) == 1 ){ - ibfR2(ioptr, M, NDiffU); /* 1 radix 2 stage */ - NDiffU *= 2; - } + StageCnt = (M-1) / 3; // number of radix 8 stages + NDiffU = 2; // one radix 2 stage already complete - if ( (M-1-(StageCnt * 3)) == 2 ){ - ibfR4(ioptr, M, NDiffU); /* 1 radix 4 stage */ - NDiffU *= 4; - } + if ( (M-1-(StageCnt * 3)) == 1 ) { + ibfR2(ioptr, M, NDiffU); /* 1 radix 2 stage */ + NDiffU *= 2; + } - if (M <= MCACHE){ - ibfstages(ioptr, M, Utbl, 2, NDiffU, StageCnt); /* RADIX 8 Stages */ - } + if ( (M-1-(StageCnt * 3)) == 2 ) { + ibfR4(ioptr, M, NDiffU); /* 1 radix 4 stage */ + NDiffU *= 4; + } - else{ - ifftrecurs(ioptr, M, Utbl, 2, NDiffU, StageCnt); /* RADIX 8 Stages */ - } + if (M <= MCACHE) { + ibfstages(ioptr, M, Utbl, 2, NDiffU, StageCnt); /* RADIX 8 Stages */ + } - ioptr += 2*POW2(M); - } -} + else { + ifftrecurs(ioptr, M, Utbl, 2, NDiffU, StageCnt); /* RADIX 8 Stages */ + } + + ioptr += 2*POW2(M); + } + } } diff --git a/src/maths/fft/fftlib.h b/src/maths/fft/fftlib.h index 86de4acfa..7a87c7bc4 100644 --- a/src/maths/fft/fftlib.h +++ b/src/maths/fft/fftlib.h @@ -1,14 +1,14 @@ #define MYRECIPLN2 1.442695040888963407359924681001892137426 // 1.0/log(2) /* some useful conversions between a number and its power of 2 */ -#define LOG2(a) (MYRECIPLN2*log(a)) // floating point logarithm base 2 +#define LOG2(a) (MYRECIPLN2*log(a)) // doubleing point logarithm base 2 #define POW2(m) ((unsigned long) 1 << (m)) // integer power of 2 for m<32 /******************************************************************* lower level fft stuff called by routines in fftext.c and fft2d.c *******************************************************************/ -void fftCosInit(long M, float *Utbl); +void fftCosInit(long M, double *Utbl); /* Compute Utbl, the cosine table for ffts */ /* of size (pow(2,M)/4 +1) */ /* INPUTS */ @@ -24,7 +24,7 @@ void fftBRInit(long M, short *BRLow); /* OUTPUTS */ /* *BRLow = bit reversed counter table */ -void ffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow); +void ffts1(double *ioptr, long M, long Rows, double *Utbl, short *BRLow); /* Compute in-place complex fft on the rows of the input array */ /* INPUTS */ /* *ioptr = input data array */ @@ -35,7 +35,7 @@ void ffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow); /* OUTPUTS */ /* *ioptr = output data array */ -void iffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow); +void iffts1(double *ioptr, long M, long Rows, double *Utbl, short *BRLow); /* Compute in-place inverse complex fft on the rows of the input array */ /* INPUTS */ /* *ioptr = input data array */ @@ -46,7 +46,7 @@ void iffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow); /* OUTPUTS */ /* *ioptr = output data array */ -void rffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow); +void rffts1(double *ioptr, long M, long Rows, double *Utbl, short *BRLow); /* Compute in-place real fft on the rows of the input array */ /* The result is the complex spectra of the positive frequencies */ /* except the location for the first complex number contains the real */ @@ -62,7 +62,7 @@ void rffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow); /* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */ -void riffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow); +void riffts1(double *ioptr, long M, long Rows, double *Utbl, short *BRLow); /* Compute in-place real ifft on the rows of the input array */ /* data order as from rffts1 */ /* INPUTS */ diff --git a/src/maths/fft/matlib.c b/src/maths/fft/matlib.c index 4e8b682e1..1b1832282 100644 --- a/src/maths/fft/matlib.c +++ b/src/maths/fft/matlib.c @@ -1,297 +1,300 @@ /* a few routines from a vector/matrix library */ #include "matlib.h" -void xpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, long Ncols){ -/* not in-place matrix transpose */ -/* INPUTS */ -/* *indata = input data array */ -/* iRsiz = offset to between rows of input data array */ -/* oRsiz = offset to between rows of output data array */ -/* Nrows = number of rows in input data array */ -/* Ncols = number of columns in input data array */ -/* OUTPUTS */ -/* *outdata = output data array */ +void xpose(double *indata, long iRsiz, double *outdata, long oRsiz, long Nrows, long Ncols) +{ + /* not in-place matrix transpose */ + /* INPUTS */ + /* *indata = input data array */ + /* iRsiz = offset to between rows of input data array */ + /* oRsiz = offset to between rows of output data array */ + /* Nrows = number of rows in input data array */ + /* Ncols = number of columns in input data array */ + /* OUTPUTS */ + /* *outdata = output data array */ -float *irow; /* pointer to input row start */ -float *ocol; /* pointer to output col start */ -float *idata; /* pointer to input data */ -float *odata; /* pointer to output data */ -long RowCnt; /* row counter */ -long ColCnt; /* col counter */ -float T0; /* data storage */ -float T1; /* data storage */ -float T2; /* data storage */ -float T3; /* data storage */ -float T4; /* data storage */ -float T5; /* data storage */ -float T6; /* data storage */ -float T7; /* data storage */ -const long inRsizd1 = iRsiz; -const long inRsizd2 = 2*iRsiz; -const long inRsizd3 = inRsizd2+iRsiz; -const long inRsizd4 = 4*iRsiz; -const long inRsizd5 = inRsizd3+inRsizd2; -const long inRsizd6 = inRsizd4+inRsizd2; -const long inRsizd7 = inRsizd4+inRsizd3; -const long inRsizd8 = 8*iRsiz; + double *irow; /* pointer to input row start */ + double *ocol; /* pointer to output col start */ + double *idata; /* pointer to input data */ + double *odata; /* pointer to output data */ + long RowCnt; /* row counter */ + long ColCnt; /* col counter */ + double T0; /* data storage */ + double T1; /* data storage */ + double T2; /* data storage */ + double T3; /* data storage */ + double T4; /* data storage */ + double T5; /* data storage */ + double T6; /* data storage */ + double T7; /* data storage */ + const long inRsizd1 = iRsiz; + const long inRsizd2 = 2*iRsiz; + const long inRsizd3 = inRsizd2+iRsiz; + const long inRsizd4 = 4*iRsiz; + const long inRsizd5 = inRsizd3+inRsizd2; + const long inRsizd6 = inRsizd4+inRsizd2; + const long inRsizd7 = inRsizd4+inRsizd3; + const long inRsizd8 = 8*iRsiz; -ocol = outdata; -irow = indata; -for (RowCnt=Nrows/8; RowCnt>0; RowCnt--){ - idata = irow; - odata = ocol; - for (ColCnt=Ncols; ColCnt>0; ColCnt--){ - T0 = *idata; - T1 = *(idata+inRsizd1); - T2 = *(idata+inRsizd2); - T3 = *(idata+inRsizd3); - T4 = *(idata+inRsizd4); - T5 = *(idata+inRsizd5); - T6 = *(idata+inRsizd6); - T7 = *(idata+inRsizd7); - *odata = T0; - *(odata+1) = T1; - *(odata+2) = T2; - *(odata+3) = T3; - *(odata+4) = T4; - *(odata+5) = T5; - *(odata+6) = T6; - *(odata+7) = T7; - idata++; - odata += oRsiz; - } - irow += inRsizd8; - ocol += 8; -} -if (Nrows%8 != 0){ - for (ColCnt=Ncols; ColCnt>0; ColCnt--){ - idata = irow++; - odata = ocol; - ocol += oRsiz; - for (RowCnt=Nrows%8; RowCnt>0; RowCnt--){ - T0 = *idata; - *odata++ = T0; - idata += iRsiz; - } - } -} + ocol = outdata; + irow = indata; + for (RowCnt=Nrows/8; RowCnt>0; RowCnt--) { + idata = irow; + odata = ocol; + for (ColCnt=Ncols; ColCnt>0; ColCnt--) { + T0 = *idata; + T1 = *(idata+inRsizd1); + T2 = *(idata+inRsizd2); + T3 = *(idata+inRsizd3); + T4 = *(idata+inRsizd4); + T5 = *(idata+inRsizd5); + T6 = *(idata+inRsizd6); + T7 = *(idata+inRsizd7); + *odata = T0; + *(odata+1) = T1; + *(odata+2) = T2; + *(odata+3) = T3; + *(odata+4) = T4; + *(odata+5) = T5; + *(odata+6) = T6; + *(odata+7) = T7; + idata++; + odata += oRsiz; + } + irow += inRsizd8; + ocol += 8; + } + if (Nrows%8 != 0) { + for (ColCnt=Ncols; ColCnt>0; ColCnt--) { + idata = irow++; + odata = ocol; + ocol += oRsiz; + for (RowCnt=Nrows%8; RowCnt>0; RowCnt--) { + T0 = *idata; + *odata++ = T0; + idata += iRsiz; + } + } + } } -void cxpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, long Ncols){ -/* not in-place complex float matrix transpose */ -/* INPUTS */ -/* *indata = input data array */ -/* iRsiz = offset to between rows of input data array */ -/* oRsiz = offset to between rows of output data array */ -/* Nrows = number of rows in input data array */ -/* Ncols = number of columns in input data array */ -/* OUTPUTS */ -/* *outdata = output data array */ +void cxpose(double *indata, long iRsiz, double *outdata, long oRsiz, long Nrows, long Ncols) +{ + /* not in-place complex double matrix transpose */ + /* INPUTS */ + /* *indata = input data array */ + /* iRsiz = offset to between rows of input data array */ + /* oRsiz = offset to between rows of output data array */ + /* Nrows = number of rows in input data array */ + /* Ncols = number of columns in input data array */ + /* OUTPUTS */ + /* *outdata = output data array */ -float *irow; /* pointer to input row start */ -float *ocol; /* pointer to output col start */ -float *idata; /* pointer to input data */ -float *odata; /* pointer to output data */ -long RowCnt; /* row counter */ -long ColCnt; /* col counter */ -float T0r; /* data storage */ -float T0i; /* data storage */ -float T1r; /* data storage */ -float T1i; /* data storage */ -float T2r; /* data storage */ -float T2i; /* data storage */ -float T3r; /* data storage */ -float T3i; /* data storage */ -const long inRsizd1 = 2*iRsiz; -const long inRsizd1i = 2*iRsiz + 1; -const long inRsizd2 = 4*iRsiz; -const long inRsizd2i = 4*iRsiz + 1; -const long inRsizd3 = inRsizd2+inRsizd1; -const long inRsizd3i = inRsizd2+inRsizd1 + 1; -const long inRsizd4 = 8*iRsiz; + double *irow; /* pointer to input row start */ + double *ocol; /* pointer to output col start */ + double *idata; /* pointer to input data */ + double *odata; /* pointer to output data */ + long RowCnt; /* row counter */ + long ColCnt; /* col counter */ + double T0r; /* data storage */ + double T0i; /* data storage */ + double T1r; /* data storage */ + double T1i; /* data storage */ + double T2r; /* data storage */ + double T2i; /* data storage */ + double T3r; /* data storage */ + double T3i; /* data storage */ + const long inRsizd1 = 2*iRsiz; + const long inRsizd1i = 2*iRsiz + 1; + const long inRsizd2 = 4*iRsiz; + const long inRsizd2i = 4*iRsiz + 1; + const long inRsizd3 = inRsizd2+inRsizd1; + const long inRsizd3i = inRsizd2+inRsizd1 + 1; + const long inRsizd4 = 8*iRsiz; -ocol = outdata; -irow = indata; -for (RowCnt=Nrows/4; RowCnt>0; RowCnt--){ - idata = irow; - odata = ocol; - for (ColCnt=Ncols; ColCnt>0; ColCnt--){ - T0r = *idata; - T0i = *(idata +1); - T1r = *(idata+inRsizd1); - T1i = *(idata+inRsizd1i); - T2r = *(idata+inRsizd2); - T2i = *(idata+inRsizd2i); - T3r = *(idata+inRsizd3); - T3i = *(idata+inRsizd3i); - *odata = T0r; - *(odata+1) = T0i; - *(odata+2) = T1r; - *(odata+3) = T1i; - *(odata+4) = T2r; - *(odata+5) = T2i; - *(odata+6) = T3r; - *(odata+7) = T3i; - idata+=2; - odata += 2*oRsiz; - } - irow += inRsizd4; - ocol += 8; -} -if (Nrows%4 != 0){ - for (ColCnt=Ncols; ColCnt>0; ColCnt--){ - idata = irow; - odata = ocol; - for (RowCnt=Nrows%4; RowCnt>0; RowCnt--){ - T0r = *idata; - T0i = *(idata+1); - *odata = T0r; - *(odata+1) = T0i; - odata+=2; - idata += 2*iRsiz; - } - irow+=2; - ocol += 2*oRsiz; - } -} + ocol = outdata; + irow = indata; + for (RowCnt=Nrows/4; RowCnt>0; RowCnt--) { + idata = irow; + odata = ocol; + for (ColCnt=Ncols; ColCnt>0; ColCnt--) { + T0r = *idata; + T0i = *(idata +1); + T1r = *(idata+inRsizd1); + T1i = *(idata+inRsizd1i); + T2r = *(idata+inRsizd2); + T2i = *(idata+inRsizd2i); + T3r = *(idata+inRsizd3); + T3i = *(idata+inRsizd3i); + *odata = T0r; + *(odata+1) = T0i; + *(odata+2) = T1r; + *(odata+3) = T1i; + *(odata+4) = T2r; + *(odata+5) = T2i; + *(odata+6) = T3r; + *(odata+7) = T3i; + idata+=2; + odata += 2*oRsiz; + } + irow += inRsizd4; + ocol += 8; + } + if (Nrows%4 != 0) { + for (ColCnt=Ncols; ColCnt>0; ColCnt--) { + idata = irow; + odata = ocol; + for (RowCnt=Nrows%4; RowCnt>0; RowCnt--) { + T0r = *idata; + T0i = *(idata+1); + *odata = T0r; + *(odata+1) = T0i; + odata+=2; + idata += 2*iRsiz; + } + irow+=2; + ocol += 2*oRsiz; + } + } } -void cvprod(float *a, float *b, float *out, long N){ -/* complex vector product, can be in-place */ -/* product of complex vector *a times complex vector *b */ -/* INPUTS */ -/* N vector length */ -/* *a complex vector length N complex numbers */ -/* *b complex vector length N complex numbers */ -/* OUTPUTS */ -/* *out complex vector length N */ +void cvprod(double *a, double *b, double *out, long N) +{ + /* complex vector product, can be in-place */ + /* product of complex vector *a times complex vector *b */ + /* INPUTS */ + /* N vector length */ + /* *a complex vector length N complex numbers */ + /* *b complex vector length N complex numbers */ + /* OUTPUTS */ + /* *out complex vector length N */ -long OutCnt; /* output counter */ -float A0R; /* A storage */ -float A0I; /* A storage */ -float A1R; /* A storage */ -float A1I; /* A storage */ -float A2R; /* A storage */ -float A2I; /* A storage */ -float A3R; /* A storage */ -float A3I; /* A storage */ -float B0R; /* B storage */ -float B0I; /* B storage */ -float B1R; /* B storage */ -float B1I; /* B storage */ -float B2R; /* B storage */ -float B2I; /* B storage */ -float B3R; /* B storage */ -float B3I; /* B storage */ -float T0R; /* TMP storage */ -float T0I; /* TMP storage */ -float T1R; /* TMP storage */ -float T1I; /* TMP storage */ -float T2R; /* TMP storage */ -float T2I; /* TMP storage */ -float T3R; /* TMP storage */ -float T3I; /* TMP storage */ + long OutCnt; /* output counter */ + double A0R; /* A storage */ + double A0I; /* A storage */ + double A1R; /* A storage */ + double A1I; /* A storage */ + double A2R; /* A storage */ + double A2I; /* A storage */ + double A3R; /* A storage */ + double A3I; /* A storage */ + double B0R; /* B storage */ + double B0I; /* B storage */ + double B1R; /* B storage */ + double B1I; /* B storage */ + double B2R; /* B storage */ + double B2I; /* B storage */ + double B3R; /* B storage */ + double B3I; /* B storage */ + double T0R; /* TMP storage */ + double T0I; /* TMP storage */ + double T1R; /* TMP storage */ + double T1I; /* TMP storage */ + double T2R; /* TMP storage */ + double T2I; /* TMP storage */ + double T3R; /* TMP storage */ + double T3I; /* TMP storage */ -if (N>=4){ - A0R = *a; - B0R = *b; - A0I = *(a +1); - B0I = *(b +1); - A1R = *(a +2); - B1R = *(b +2); - A1I = *(a +3); - B1I = *(b +3); - A2R = *(a +4); - B2R = *(b +4); - A2I = *(a +5); - B2I = *(b +5); - A3R = *(a +6); - B3R = *(b +6); - A3I = *(a +7); - B3I = *(b +7); - T0R = A0R * B0R; - T0I = (A0R * B0I); - T1R = A1R * B1R; - T1I = (A1R * B1I); - T2R = A2R * B2R; - T2I = (A2R * B2I); - T3R = A3R * B3R; - T3I = (A3R * B3I); - T0R -= (A0I * B0I); - T0I = A0I * B0R + T0I; - T1R -= (A1I * B1I); - T1I = A1I * B1R + T1I; - T2R -= (A2I * B2I); - T2I = A2I * B2R + T2I; - T3R -= (A3I * B3I); - T3I = A3I * B3R + T3I; - for (OutCnt=N/4-1; OutCnt > 0; OutCnt--){ - a += 8; - b += 8; - A0R = *a; - B0R = *b; - A0I = *(a +1); - B0I = *(b +1); - A1R = *(a +2); - B1R = *(b +2); - A1I = *(a +3); - B1I = *(b +3); - A2R = *(a +4); - B2R = *(b +4); - A2I = *(a +5); - B2I = *(b +5); - A3R = *(a +6); - B3R = *(b +6); - A3I = *(a +7); - B3I = *(b +7); - *out = T0R; - *(out +1) = T0I; - *(out +2) = T1R; - *(out +3) = T1I; - *(out +4) = T2R; - *(out +5) = T2I; - *(out +6) = T3R; - *(out +7) = T3I; - T0R = A0R * B0R; - T0I = (A0R * B0I); - T1R = A1R * B1R; - T1I = (A1R * B1I); - T2R = A2R * B2R; - T2I = (A2R * B2I); - T3R = A3R * B3R; - T3I = (A3R * B3I); - T0R -= (A0I * B0I); - T0I = A0I * B0R + T0I; - T1R -= (A1I * B1I); - T1I = A1I * B1R + T1I; - T2R -= (A2I * B2I); - T2I = A2I * B2R + T2I; - T3R -= (A3I * B3I); - T3I = A3I * B3R + T3I; - out += 8; - } - a += 8; - b += 8; - *out = T0R; - *(out +1) = T0I; - *(out +2) = T1R; - *(out +3) = T1I; - *(out +4) = T2R; - *(out +5) = T2I; - *(out +6) = T3R; - *(out +7) = T3I; - out += 8; -} -for (OutCnt=N%4; OutCnt > 0; OutCnt--){ - A0R = *a++; - B0R = *b++; - A0I = *a++; - B0I = *b++; - T0R = A0R * B0R; - T0I = (A0R * B0I); - T0R -= (A0I * B0I); - T0I = A0I * B0R + T0I; - *out++ = T0R; - *out++ = T0I; -} + if (N>=4) { + A0R = *a; + B0R = *b; + A0I = *(a +1); + B0I = *(b +1); + A1R = *(a +2); + B1R = *(b +2); + A1I = *(a +3); + B1I = *(b +3); + A2R = *(a +4); + B2R = *(b +4); + A2I = *(a +5); + B2I = *(b +5); + A3R = *(a +6); + B3R = *(b +6); + A3I = *(a +7); + B3I = *(b +7); + T0R = A0R * B0R; + T0I = (A0R * B0I); + T1R = A1R * B1R; + T1I = (A1R * B1I); + T2R = A2R * B2R; + T2I = (A2R * B2I); + T3R = A3R * B3R; + T3I = (A3R * B3I); + T0R -= (A0I * B0I); + T0I = A0I * B0R + T0I; + T1R -= (A1I * B1I); + T1I = A1I * B1R + T1I; + T2R -= (A2I * B2I); + T2I = A2I * B2R + T2I; + T3R -= (A3I * B3I); + T3I = A3I * B3R + T3I; + for (OutCnt=N/4-1; OutCnt > 0; OutCnt--) { + a += 8; + b += 8; + A0R = *a; + B0R = *b; + A0I = *(a +1); + B0I = *(b +1); + A1R = *(a +2); + B1R = *(b +2); + A1I = *(a +3); + B1I = *(b +3); + A2R = *(a +4); + B2R = *(b +4); + A2I = *(a +5); + B2I = *(b +5); + A3R = *(a +6); + B3R = *(b +6); + A3I = *(a +7); + B3I = *(b +7); + *out = T0R; + *(out +1) = T0I; + *(out +2) = T1R; + *(out +3) = T1I; + *(out +4) = T2R; + *(out +5) = T2I; + *(out +6) = T3R; + *(out +7) = T3I; + T0R = A0R * B0R; + T0I = (A0R * B0I); + T1R = A1R * B1R; + T1I = (A1R * B1I); + T2R = A2R * B2R; + T2I = (A2R * B2I); + T3R = A3R * B3R; + T3I = (A3R * B3I); + T0R -= (A0I * B0I); + T0I = A0I * B0R + T0I; + T1R -= (A1I * B1I); + T1I = A1I * B1R + T1I; + T2R -= (A2I * B2I); + T2I = A2I * B2R + T2I; + T3R -= (A3I * B3I); + T3I = A3I * B3R + T3I; + out += 8; + } + a += 8; + b += 8; + *out = T0R; + *(out +1) = T0I; + *(out +2) = T1R; + *(out +3) = T1I; + *(out +4) = T2R; + *(out +5) = T2I; + *(out +6) = T3R; + *(out +7) = T3I; + out += 8; + } + for (OutCnt=N%4; OutCnt > 0; OutCnt--) { + A0R = *a++; + B0R = *b++; + A0I = *a++; + B0I = *b++; + T0R = A0R * B0R; + T0I = (A0R * B0I); + T0R -= (A0I * B0I); + T0I = A0I * B0R + T0I; + *out++ = T0R; + *out++ = T0I; + } } diff --git a/src/maths/fft/matlib.h b/src/maths/fft/matlib.h index baf9e6e7f..0798798e9 100644 --- a/src/maths/fft/matlib.h +++ b/src/maths/fft/matlib.h @@ -1,6 +1,6 @@ /* a few routines from a vector/matrix library */ -void xpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, long Ncols); +void xpose(double *indata, long iRsiz, double *outdata, long oRsiz, long Nrows, long Ncols); /* not in-place matrix transpose */ /* INPUTS */ /* *indata = input data array */ @@ -11,7 +11,7 @@ void xpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, lo /* OUTPUTS */ /* *outdata = output data array */ -void cxpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, long Ncols); +void cxpose(double *indata, long iRsiz, double *outdata, long oRsiz, long Nrows, long Ncols); /* not in-place complex matrix transpose */ /* INPUTS */ /* *indata = input data array */ @@ -22,7 +22,7 @@ void cxpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, l /* OUTPUTS */ /* *outdata = output data array */ -void cvprod(float *a, float *b, float *out, long N); +void cvprod(double *a, double *b, double *out, long N); /* complex vector product, can be in-place */ /* product of complex vector *a times complex vector *b */ /* INPUTS */ diff --git a/src/winmain.c b/src/winmain.c index 03bf5b608..156820263 100644 --- a/src/winmain.c +++ b/src/winmain.c @@ -29,7 +29,7 @@ #include "misc/misc_time.h" /* timediff */ /* Constants */ -#define TBufSize 8192 // size of text buffer +#define TBufSize 65536 // size of text buffer #define CR VK_RETURN // Carriage Return #define LF 10 // Line Feed #define SE 0 // String termination