/*	This file contains demonstration code that supplements the paper
	_Construction of a High-Performance FFT_.

	See the paper and ReadMe.txt for more information.

	The code is formatted for tabs at four-column intervals.
*/


#include "common.h"


// Expanded FFT Kernel
static void FFT_Kernel(
	ComplexArray vOut,					// Address of output vector.
	ComplexArray vIn,					// Address of input vector.
	int *m,								// m's from mathematics.
	int *n,								// n's from mathematics.
	int N,								// N from mathematics.
	int P								// P from mathematics.
)
{
	int p, k0;

	if (N & 1)
		FFT_Butterflies(3, vOut, vIn, 0, 1<<N);
	else
		FFT_Butterflies(2, vOut, vIn, 0, 1<<N);

	for (p  = 1; p  < P-2    ; ++p )
	for (k0 = 0; k0 < 1<<n[p]; ++k0)
		FFT_Butterflies(2, vOut, vOut, k0, 1<<N-n[p]);

	if (p < P-1)
	for (k0 = 0; k0 < 1<<N-4; ++k0)
		FFT_Butterflies(2, vOut, vOut, k0, 16);

	for (k0 = 0; k0 < 1<<N-2; ++k0)
		FFT_Butterflies(2, vOut, vOut, k0, 4);
}


/*	This routine, FFT, provides the public interface for the FFT.  It
	allocates necessary memory, chooses a structure for the FFT, calls
	the kernel, and provides the bit-reversal permutation for versions
	of the kernel that do not have it.
*/
extern "C" {
int FFT(
	float *re,	// Address of real components.
	float *im,	// Address of imaginary components.
	int N,		// Base-two logarithm of length of vector.
	int d		// Direction of transform.
)
{
	extern void BitReversalPermute(float *re, float *im, int TwoToTheN);

	int P;

	ComplexArray *v;
	int i, *m, *n;

	// This FFT does not support the reverse transform.
	if (d != 1)
		return 1;

	// This FFT does not support N < 4.
	if (N < 4)
		return 1;

	// Start structure the FFT by choosing how many passes we will have.
	if (N == 0)			// When N=0, there are no passes.
		P = 0;
	else if (N == 1)	// When N=1, we can only use a single pass.
		P = 1;
	else				// Otherwise, we will have mostly radix-4 passes.
		P = N/2;

	AllocVectors(&v, &m, &n, N, P);

	// Structure the FFT by choosing the values of m[p].
	if (N == 0)			// When N=0, there are no passes.
		;
	else if (N == 1)	// When N=1, we can only use a single radix-2 pass.
		m[0] = 1;
	else if (N & 1)		// Otherwise, if N is odd, start with a radix-8 pass.
		m[0] = 3;
	else				// Otherwise, if N is even, start with a radix-4 pass.
		m[0] = 2;

	// Use radix-4 passes for all passes after the first.
	for (i = 1; i < P; ++i)
		m[i] = 2;

	// The n's are determined by the m's.
	n[0] = 0;
	for (i = 1; i <= P; ++i)
		n[i] = n[i-1] + m[i-1];

	// Copy mathematical input h from re and im into v[0].
	for (i = 0; i < 1<<N; ++i) {
		v[0].re[i] = re[i];
		v[0].im[i] = im[i];
	}

	// Do the FFT!
	FFT_Kernel(v[N], v[0], m, n, N, P);

	// Copy output from v[N] into re and im.
	for (i = 0; i < 1<<N; ++i) {
		re[i] = v[N][i].re;
		im[i] = v[N][i].im;
	}

	// Do the bit-reversal because it is not in this FFT kernel.
	// Convert bit-reversed v[N] to desired mathematical output H.
	BitReversalPermute(re, im, 1<<N);

	FreeVectors(&v, &m, &n, N+1);

	return 0;
}
}
