/* This code is a translation from a Fortran program.

   The original Fortran FFT was written by G. D. Bergland and M. T. Dolan,
   and can be found in the book:

   _Programs for Digital Signal Processing_, edited by the DSP Committee,
   IEEE Acoustics, Speech, and Signal Processing Society (IEEE Press, 1979),
   Chapter 1.2, "Fast Fourier Transform Algorithms," p. 1.2-1 - 1.2-18.

   The translation to C was made by Dr. Richard L. Lachance
   (richard.lachance@bomem.com). 

   THIS FILE MAY BE FREELY USED, MODIFIED, AND DISTRIBUTED AS LONG
   AS THESE COMMENTS ARE INCLUDED WITH ANY DERIVATION OF THIS
   SOURCE CODE. */

/* Steven G. Johnson (stevenj@alum.mit.edu): 

   Received this file in personal communication with Dr. Lachance,
   7/97.   Changed so that both direct and inverse ffts are
   unnormalized.  (Also made some cosmetic changes.) */

#include <stdio.h>
#include <math.h>

#include <fftw.h>

#define MAXPOW  20      /* maximum number of points for Fourier Transform
                           limited by program itself (use n <= 2^MAXPOW) */

#define DIRECT  -1      /* use Fourier(vector, dim, DIRECT/INVERSE); */
#define INVERSE  1      /* corresponds to exponential sign in definition */

#define SUCCESS 0
#define FAILURE 1

typedef struct  /* complex type structure */
{
    FFTW_REAL r;   /* real part */
    FFTW_REAL i;   /* imaginary part */
} complex;

/* prototypes: */

short bergland_fft1d(complex data[], int n);
void r2tx(int nthpo, complex c0[], complex c1[]);
void r4tx(int nthpo, complex c0[], complex c1[],
    complex c2[], complex c3[]);
void r8tx(int nxtlt, int nthpo, int length, complex c0[], complex c1[],
    complex c2[], complex c3[], complex c4[], complex c5[],
    complex c6[], complex c7[]);

#define  ISQRT_2    0.70710678118654752440  /* 1/Sqrt[2] */
#define  TWO_PI 6.2831853071795864769252867665590057683943387987502
#define  FRC(x)     ((x) - (int)(x))    /* ok for x > 0 & x < 0 */
#define  SWAP(a,b) { FFTW_REAL xxx_temp; xxx_temp = a; a = b; b = xxx_temp; }

short bergland_fft(complex data[], int n, int dir)
/* Computes Direct or Inverse Discrete Fourier Transform, according to the
   following definitions:
   
                              N-1
                              ---      -2 Pi i t v / N
            Direct:    F_v =  >   f_t e
                              ---
                              t=0

                              N-1
                              ---      +2 Pi i t v / N
           Inverse:    f_t =  >   F_v e
                              ---
                              t=0
*/
{
    short result;
    int i;
        
    if (dir == DIRECT)
        for(i = 0; i < n; i++)
            data[i].i = -data[i].i;
        
    result = bergland_fft1d(data, n); /* computes exponential + sign */

    if (dir == DIRECT) 
    {
        for(i = 0; i < n; i++)
            data[i].i = -data[i].i;
    }

    return result;

} /* end Fourier() */


short bergland_fft1d(complex data[], int n)
/* Computes Discrete Fast Fourier Transform on a complex data set.

   Calculates summation with plus sign in integral and no normalization.

                              n-1
                              ---      +2 Pi i k l / n
                       H_l =  >   h_k e
                              ---
                              k=0
    
   The routine performs as many base 8 iterations as possible and then 
   finishes with a base 4 iteration or base 2 iteration if needed.
   Indiciation of vector data is from 0 to len-1.

    Enhanced IEEE Radix 8 algorithm of low complexity (5.4 N Log_2[N]) 
((NOTE: THE 5.4 CONSTANT IS FOR ALGORITHMS COMPARISONS ON MY MACHINE))

 Adapted from:  Programs for digital signal processing
                IEEE Acoustics, Speach, and Signal Processing Society.

		(translated from Fortran)

	by: Richard Lachance
*/
{
    register int i, ij, ji;
    int j1, j2, j3, j4, j5, j6, j7, j8, j9, j10;
    int j11, j12, j13, j14, j15, j16, j17, j18, j19;
    int n2pow, n4pow, n8pow, l[MAXPOW + 1];
    int nthpo, nxtlt, length;

    nthpo = n;

    for(i = 1, n2pow = 0;; n2pow++)     /* finds power of 2 */
    {
        if (i >= n)
            break;

        i *= 2;
    }

    if (i != n) /* n != 2^m */
    {
        fprintf(stderr, "FFT(): n = %d is not a power of 2", n);
        return FAILURE;
    }

    if (n2pow > MAXPOW)
    {
        fprintf(stderr, "FFT(): n must be smaller than %lf (2^%d)",
            pow(2.0, MAXPOW), MAXPOW);
        return FAILURE;
    }

    n8pow = n2pow / 3;
    n4pow = n2pow - n8pow * 3;

    for (i = 0; i < n8pow; i++) /* radix 8 passes, if any */
    {
        nxtlt = 1L << (n2pow -  3 * (i + 1));
        length = nxtlt << 3;    /* x 8 */
        r8tx(nxtlt, nthpo, length, data, data + nxtlt, data + 2 * nxtlt,
            data + 3 * nxtlt, data + 4 * nxtlt, data + 5 * nxtlt,
            data + 6 * nxtlt, data + 7 * nxtlt);
    }

    if (n4pow == 2)         /* go through the base 4 iteration */
        r4tx(nthpo, data, data + 1, data + 2, data + 3);

    if (n4pow == 1)         /* go through the base 2 iteration */
        r2tx(nthpo, data, data + 1);


    /* Butterfly Swapping: accounts or about 10% of the total computing time */

    for (i = 0; i < MAXPOW; i++)
    {
        if (i < n2pow)
            l[(int)i] = 1L << (n2pow - i);
        else
            l[(int)i] = 1;  /* do not loop over superior powers */
    }

    /* note: can cut series of loop like this:

         / * MAXPOW = 15 (32768 = 32 Kpts) * /
         for(j14 = 0; j14 < l[14]; j14 += l[15])
          for ...
    */

    ij = 0;
    /* MAXPOW = 20 (1048576 = 1 Mpts) */
    for(j19 = 0;   j19 < l[19]; j19++)
     for(j18 = j19; j18 < l[18]; j18 += l[19])
      for(j17 = j18; j17 < l[17]; j17 += l[18])
       for(j16 = j17; j16 < l[16]; j16 += l[17])
        for(j15 = j16; j15 < l[15]; j15 += l[16])
         for(j14 = j15; j14 < l[14]; j14 += l[15])
          for(j13 = j14; j13 < l[13]; j13 += l[14])
           for(j12 = j13; j12 < l[12]; j12 += l[13])
            for(j11 = j12; j11 < l[11]; j11 += l[12])
             for(j10 = j11; j10 < l[10]; j10 += l[11])
              for( j9 = j10;  j9 < l[9];   j9 += l[10])
               for( j8 =  j9;  j8 < l[8];   j8 += l[9])
                for( j7 =  j8;  j7 < l[7];   j7 += l[8])
                 for( j6 =  j7;  j6 < l[6];   j6 += l[7])
                  for( j5 =  j6;  j5 < l[5];   j5 += l[6])
                   for( j4 =  j5;  j4 < l[4];   j4 += l[5])
                    for( j3 =  j4;  j3 < l[3];   j3 += l[4])
                     for( j2 =  j3;  j2 < l[2];   j2 += l[3])
                      for( j1 =  j2;  j1 < l[1];   j1 += l[2])
                       for(ji  =  j1;  ji < l[0];   ji += l[1], ij++)
                        if (ij > ji)
                        {
                            SWAP(data[ij].r, data[ji].r);
                            SWAP(data[ij].i, data[ji].i);
                        }

    return SUCCESS;

} /* end of fft1d() */


void r2tx(int nthpo, complex c0[], complex c1[])
/* Radix 2 iteration subroutine */
{
    register int i;
    FFTW_REAL r1, f1;

    for(i = 0; i < nthpo; i += 2)
    {
        r1 = c0[i].r + c1[i].r;
        c1[i].r = c0[i].r - c1[i].r;
        c0[i].r = r1;

        f1 = c0[i].i + c1[i].i;
        c1[i].i = c0[i].i - c1[i].i;
        c0[i].i = f1;
    }
    
} /* end of r2tx() */


void r4tx(int nthpo, complex c0[], complex c1[], 
    complex c2[], complex c3[])
/* Radix 4 iteration subroutine */
{
    register int i;
    FFTW_REAL r1, r2, r3, r4, f1, f2, f3, f4;

    for(i = 0; i < nthpo; i += 4)
    {
        r1 = c0[i].r + c2[i].r;  f1 = c0[i].i + c2[i].i;
        r2 = c0[i].r - c2[i].r;  f2 = c0[i].i - c2[i].i;
        r3 = c1[i].r + c3[i].r;  f3 = c1[i].i + c3[i].i;
        r4 = c1[i].r - c3[i].r;  f4 = c1[i].i - c3[i].i;
        
        c0[i].r = r1 + r3;  c0[i].i = f1 + f3;
        c1[i].r = r1 - r3;  c1[i].i = f1 - f3;
        c2[i].r = r2 - f4;  c2[i].i = f2 + r4;
        c3[i].r = r2 + f4;  c3[i].i = f2 - r4;
    }
    
} /* end of r4tx() */


void r8tx(int nxtlt, int nthpo, int length, complex c0[], complex c1[],
    complex c2[], complex c3[], complex c4[], complex c5[],
    complex c6[], complex c7[])
/* Radix 8 iteration subroutine */
{
    register int i, j;
    FFTW_REAL r1, r2, r3, r4, r5, r6, r7, s1, s2, s3, s4, s5, s6, s7;
    FFTW_REAL ar0, ar1, ar2, ar3, ar4, ar5, ar6, ar7;
    FFTW_REAL ai0, ai1, ai2, ai3, ai4, ai5, ai6, ai7;
    FFTW_REAL br0, br1, br2, br3, br4, br5, br6, br7;
    FFTW_REAL bi0, bi1, bi2, bi3, bi4, bi5, bi6, bi7;
    FFTW_REAL arg, scale, tr, ti;

    scale = TWO_PI / length;
    
    for(i = 0; i < nxtlt; i++)
    {
        /* trigonometric functions are not called enough times to justify the
           use of a lookup table: saved time is negligible, at the expense of
           increased memory usage (depends on the plateform of implementation)*/
        arg = scale * i;
        r1 = cos(arg);
        s1 = sin(arg);

        r2 = r1 * r1 - s1 * s1;  s2 = 2.0 * r1 * s1;
        r3 = r1 * r2 - s1 * s2;  s3 =  r2 * s1 + s2 * r1;
        r4 = r2 * r2 - s2 * s2;  s4 = 2.0 * r2 * s2;
        r5 = r2 * r3 - s2 * s3;  s5 =  r3 * s2 + s3 * r2;
        r6 = r3 * r3 - s3 * s3;  s6 = 2.0 * r3 * s3;
        r7 = r3 * r4 - s3 * s4;  s7 =  r4 * s3 + s4 * r3;

        for(j = i; j < nthpo; j += length)
        {
            ar0 = c0[j].r + c4[j].r;  ai0 = c0[j].i + c4[j].i;
            ar1 = c1[j].r + c5[j].r;  ai1 = c1[j].i + c5[j].i;
            ar2 = c2[j].r + c6[j].r;  ai2 = c2[j].i + c6[j].i;
            ar3 = c3[j].r + c7[j].r;  ai3 = c3[j].i + c7[j].i;
            ar4 = c0[j].r - c4[j].r;  ai4 = c0[j].i - c4[j].i;
            ar5 = c1[j].r - c5[j].r;  ai5 = c1[j].i - c5[j].i;
            ar6 = c2[j].r - c6[j].r;  ai6 = c2[j].i - c6[j].i;
            ar7 = c3[j].r - c7[j].r;  ai7 = c3[j].i - c7[j].i;

            br0 = ar0 + ar2;  bi0 = ai0 + ai2;
            br1 = ar1 + ar3;  bi1 = ai1 + ai3;
            br2 = ar0 - ar2;  bi2 = ai0 - ai2;
            br3 = ar1 - ar3;  bi3 = ai1 - ai3;
            br4 = ar4 - ai6;  bi4 = ai4 + ar6;
            br5 = ar5 - ai7;  bi5 = ai5 + ar7;
            br6 = ar4 + ai6;  bi6 = ai4 - ar6;
            br7 = ar5 + ai7;  bi7 = ai5 - ar7;

            c0[j].r = br0 + br1;  c0[j].i = bi0 + bi1;

            if (i > 0)
            {
                c1[j].r = r4 * (br0 - br1) - s4 * (bi0 - bi1);
                c1[j].i = r4 * (bi0 - bi1) + s4 * (br0 - br1);
                c2[j].r = r2 * (br2 - bi3) - s2 * (bi2 + br3);
                c2[j].i = r2 * (bi2 + br3) + s2 * (br2 - bi3);
                c3[j].r = r6 * (br2 + bi3) - s6 * (bi2 - br3);
                c3[j].i = r6 * (bi2 - br3) + s6 * (br2 + bi3);
                
                tr = ISQRT_2 * (br5 - bi5);
                ti = ISQRT_2 * (br5 + bi5);
                
                c4[j].r = r1 * (br4 + tr) - s1 * (bi4 + ti);
                c4[j].i = r1 * (bi4 + ti) + s1 * (br4 + tr);
                c5[j].r = r5 * (br4 - tr) - s5 * (bi4 - ti);
                c5[j].i = r5 * (bi4 - ti) + s5 * (br4 - tr);
                
                tr = -ISQRT_2 * (br7 + bi7);
                ti = ISQRT_2 * (br7 - bi7);
                
                c6[j].r = r3 * (br6 + tr) - s3 * (bi6 + ti);
                c6[j].i = r3 * (bi6 + ti) + s3 * (br6 + tr);
                c7[j].r = r7 * (br6 - tr) - s7 * (bi6 - ti);
                c7[j].i = r7 * (bi6 - ti) + s7 * (br6 - tr);
            }
            else
            {
                c1[j].r = br0 - br1;  c1[j].i = bi0 - bi1;
                c2[j].r = br2 - bi3;  c2[j].i = bi2 + br3;
                c3[j].r = br2 + bi3;  c3[j].i = bi2 - br3;
                
                tr = ISQRT_2 * (br5 - bi5);
                ti = ISQRT_2 * (br5 + bi5);
                
                c4[j].r = br4 + tr;  c4[j].i = bi4 + ti;
                c5[j].r = br4 - tr;  c5[j].i = bi4 - ti;
                
                tr = - ISQRT_2 * (br7 + bi7);
                ti = ISQRT_2 * (br7 - bi7);
                
                c6[j].r = br6 + tr;  c6[j].i = bi6 + ti;
                c7[j].r = br6 - tr;  c7[j].i = bi6 - ti;
            }

        } /* end for(j) */
    } /* end for(i) */
    
} /* end of r8tx() */
 
