/*   
-----------------------------------------------------------------------
   Description: C FFT library routines

   Usage:       These routines will typically be called in either the
                "high efficiency" mode or the "easy usage" mode.

		High efficiency:
		  Use this mode when speed is of the essence and you
		  are willing to take care of plan initialization, 
		  allocation, and destruction yourself.
		  
		  Example:
		    plan = (FFT_Plan *) malloc(sizeof(FFT_Plan));
		    fft_init(plan, fftsize, fftflags);
		    while (notdone) {
		      fft_work(ibuf,plan);
		    }  
		    fft_free(plan);
		    free(plan);
		   
		Easy usage:
		  Use this mode when simplicity is the goal and you
		  want the routines to take care of everything for
		  you.

		  Example:
		    fftflags = FFT_FLAGS_INITPLANS | ...
	            nm_fft(ibuf,fftsize,fftflags);
		    while (notdone) {
	              nm_fft(ibuf,fftsize,fftflags);
		    }
		    fftflags = FFT_FLAGS_FREEPLANS | ...     
	            nm_fft(ibuf,fftsize,fftflags);


   Authors:     Too many to mention, but FPS, FFTW, JGS, RDH, SMM and 
                others bear most of the guilt. 

   Revisions:   

   DATE		AUTHOR		DESCRIPTION
   -----------	--------------	---------------------------------------
   01/01/89	FPS		original Fortran decimation in time algorithm
   08/01/95	SMM,JGS		"MIDAS" port and modifications
   04/01/00	RDH		conversion to C and new architecture
   04/04/00	RDH		addition of RADIX-8 pass
   04/22/00	RDH		addition of generic radix pass
   01/04/01	RDH		major overhaul of radix passes to 
                                  incorporate WANG's faster kernels

   NOTE 1: BEWARE TO ALL WHO ENTER HERE... what follows is ugly and 
           repulsive but fast. Change any line at your own risk.

   NOTE 2: Some of the concepts incorporated in these routines are
           borrowed from the FFTW (fastest FFT in the West) folks.
	   Unfortunately, that code (while extremely fast) is very
	   large and complex. The resulting object code consumes
	   over a mega-byte and the source code is machine generated
	   (hence unreadable). This library code was a compromise 
	   between the FFTW and FPS approaches. The result is fast
	   and significantly smaller on most platforms. FFTW is
	   distributed under the GNU General Public License.

  References:
      C. Temperton,  "Self-Sorting Mixed Radix Fast Fourier
      Transforms", Tech. Memo. No. 66,  Research Dept.,  European
      Center for Medium Range Weather Forcasts, Reading, U.K., 1982

      Z. Wang, "A Prime Factor Fast W Transform Algorithm",
      IEEE Transactions on Signal Processing, vol. 40, no. 9,
      September 1992, pp. 2361-2368
-----------------------------------------------------------------------
*/

/* Global Includes */
#include <stdlib.h>
#include <string.h>
#include <math.h> 
#include "libnmfft.h"


/*
-----------------------------------------------------------------------

  FUNCTION: 
    fft_genradix()      

  SYNOPSIS: 
    void fft_genradix(fft_int n, FFT_Plan *plan)                  

  ARGUMENTS:
    fft_int   n    --- FFT size to factor                         
    FFT_Plan *plan --- FFT execution plan

  PURPOSE:
    Computes the Radix pass factorization of a complex FFT size
    with biases for RISC/non-RISC platforms. Any residual is assigned
    to a single-pass generic radix butterfly.

  REVISIONS:
    04/25/00 	rdh	added conditional compile for RISC/non-RISC
    05/01/00 	rdh	changed calling to use FFT_Plan for generic
                          radix support
    08/17/01 	rdh	modified to use "profiles" for various machines

-----------------------------------------------------------------------
*/

fft_int intmod(fft_int iarg, fft_int idiv)
{
  fft_int irtn;
  irtn = (idiv != 0) ? (iarg % idiv) : (-1);
  return (irtn);
}

void fft_genradix(fft_int n, FFT_Plan *plan) 
{
 /* Local variables */
 fft_int *mrid;
 fft_int *mrcnt;
 fft_int i,ip,nsize;

  /* Shortcut assignments */
     mrid  = plan->radixpassid;
     mrcnt = plan->radixpasscnt;

  /* Initialize results */
     nsize = (n > 0) ? (n) : 0;
     plan->radixgeneric = 0;
     for (i=0; i<(FFT_RADIX_MAX+1); i++) {
       mrid[i]  = i;
       mrcnt[i] = 0;
     }  

  /* Factor the FFT size using architecture dependencies */
     for (i=FFT_RADIX_MAX; i>=2; i--) {
       for (ip=0; ip<7; ip++) {
         /* Check for valid radix size, max FFT size, min FFT size */
         if ((i == radix_decomp_profile[ip][0]) &&
             (n >  radix_decomp_profile[ip][1]) &&
             (n <  radix_decomp_profile[ip][2])) {
           /* Check for completed factorization and max radixpass count */
           while((nsize>1) && 
                 (intmod(nsize,i) == 0) && 
                 (mrcnt[i] < radix_decomp_profile[ip][3])) {
              mrcnt[i] = mrcnt[i]+1;
              nsize /= i;
           }
         }
       }
     }  

  /* Any leftover must be processed by a radix_generic pass */
     if (nsize != 1) {
       plan->radixgeneric = nsize;
       nsize = 1; 
     }

  /* Determine return status */  
     if(nsize != 1) {
        plan->radixgeneric = 0;
        for (i=0; i<(FFT_RADIX_MAX+1); i++) {
           mrid[i]  = i;
           mrcnt[i] = 0;
        }  
     }

     return;
}


/*
-----------------------------------------------------------------------

  FUNCTION: 
    fft_checkradix()    

  SYNOPSIS: 
    fft_int fft_checkradix(fft_int n, fft_int flags)                   

  ARGUMENTS:
    fft_int n     --- FFT size to factor                         
    fft_int flags --- FFT parameter flags (real/complex/etc.)    

  RETURN:
    (fft_int)  1 --- FFT size/type can be efficiently computed (factored)
    (fft_int)  0 --- FFT size/type can be computed inefficiently
    (fft_int) -1 --- requested FFT size/type cannot be computed by 
                       this library

  PURPOSE:
    Can be called prior to doing a call to nm_fft() to determine 
    whether a desired FFT size can be efficiently computed by the
    available radix passes. 

  REVISIONS:
    02/15/01 	rdh	baseline

-----------------------------------------------------------------------
*/

fft_int fft_checkradix(fft_int n, fft_int flags)
{
  FFT_Plan lclplan;
  fft_int lclflags;
  fft_int nfft;
  fft_int irtn;

  /* Initialize the local plan fields */
     lclflags = flags;
     lclplan.radixgeneric = 0;

  /* Check for even number of points in Real FFT size */
     nfft = n;
     irtn = 1;
     if ((lclflags & FFT_FLAGS_REAL) == FFT_FLAGS_REAL)
       nfft = (intmod(n,2) == 0) ? (n/2) : (0);

  /* Factor the FFT size into available radix passes */
     if (nfft > 0) 
       fft_genradix(nfft,&lclplan);
     else
       irtn = -1;

  /* Check the return status */
     if (lclplan.radixgeneric > 0)
       irtn = (lclplan.radixgeneric > 101) ? (-1) : (0);

  /* Stupid check */
     if (n < 1) irtn = -1;

  return (irtn);
}


/*
-----------------------------------------------------------------------
  FUNCTION: 
    fft_gen_twiddle()

  SYNOPSIS: 
    void fft_gen_twiddle(fft_real *t, fft_int n, fft_int rcflag)

  ARGUMENTS:
    fft_real  *t     --- complex output vector of twiddle factors
    fft_int   n      --- size of complex input vector
    fft_int   rcflag --- real/complex FFT type flag

  PURPOSE:
    Generate the twiddle factor table for use by the
    RADIX-M and packing functions.

  REVISIONS:
    04/20/00 	rdh	baseline conversion from FPS routine
    04/21/00 	rdh	changed phase argument to double for precision
-----------------------------------------------------------------------
*/

void fft_gen_twiddle(fft_real *t, fft_int n, fft_int rcflag)
{
/* Constants */
const fft_dble twopi =
                  ((fft_dble) 6.2831853071795864769252867665590057683943388);

/* Local variables */
fft_int i, nfft, nfft2;
fft_dble radphas;
fft_cmplx *tw;

 /* Initialize FFT size */
    if(n < 1) return;
    nfft  = n;
    nfft2 = (rcflag == FFT_FLAGS_REAL) ? (nfft/2) : 0;

 /* Create twiddle table for complex and real FFT */
    tw  = (fft_cmplx *) t;                                  /* Complex FFT */
    tw += (nfft-1);
    for(i=nfft; i>nfft2; i--, tw--) {
        radphas = twopi*(((fft_dble) (i-1))/((fft_dble) nfft));
        tw[0].re = cos(radphas);
        tw[0].im = sin(radphas);
    }
    if (rcflag == FFT_FLAGS_REAL) {                         /* Real FFT    */
        tw  = (fft_cmplx *) t;
        tw += (nfft2-1);
        for(i=nfft2; i>=1; i--, tw--) {
            radphas = 2.0*twopi*(((fft_dble) (i-1))/((fft_dble) nfft));
            tw[0].re = cos(radphas);
            tw[0].im = sin(radphas);
        }
    }

    return;
}


/*
-----------------------------------------------------------------------
  FUNCTION: 
    fft_generic_matrix()

  SYNOPSIS: 
    void fft_generic_matrix(fft_int n, fft_cmplx *w)

  ARGUMENTS:
    fft_int n    --- size of DFT matrix to create
    fft_cmplx *w --- complex DFT matrix elements     

  PURPOSE:
    Creates the DFT kernel matrix for radixpass_generic() to use.
    This matrix is required when no available fast radixpass can
    be found by the planner factorization, fft_genradix().

  REVISIONS:
    05/01/00 	rdh	baseline
-----------------------------------------------------------------------
*/

void fft_generic_matrix(fft_int n, fft_cmplx *w)               
{
 /* Constants */
    const fft_dble twopi =
                  ((fft_dble) 6.2831853071795864769252867665590057683943388);

 /* Local variables */
    fft_int irow,icol,idx;
    fft_dble dn,dtwid,drow,dcol,dftarg;

 /* Create the DFT matrix */
    dn    = (double) n;
    dtwid = (n > 0) ? (twopi/dn) : 0;
    for (irow=0; irow<n; irow++) {
      for (icol=0; icol<n; icol++) {
        drow    = (double) irow;
	dcol    = (double) icol;
	dftarg  = dtwid*fmod((drow*dcol),dn);
	idx     = (irow*n)+icol;
	w[idx].re =  cos(dftarg);
	w[idx].im = -1.0*sin(dftarg);
      }
    }

    return;
}


/*
-----------------------------------------------------------------------
  FUNCTION: 
    radixpass_M()    

  SYNOPSIS: 
    void radixpass_M(const fft_real *a, fft_real *c, const fft_real *t,
                     fft_int n, fft_int la, fft_int dirflag)

  ARGUMENTS:
    fft_real *a       --- complex input vector treated as real array
    fft_real *c       --- complex output vector treated as real array
    fft_real *t       --- complex twiddle vector treated as real array
    fft_int  n        --- size of complex input/output/twiddle vectors
    fft_int  la       --- partial product of remaining radix passes
    fft_int  dirflag  --- FFT direction (forward/inverse)

  PURPOSE:
    To perform a forward/inverse not-in-place Radix-M complex 
    butterfly pass for use in FFT algorithms.

  DESCRIPTION:
       These routines perform a single forward/inverse, not-in-place 
       Radix-M FFT pass on the complex input vector A.  Forward 
       (negative exponent version) Radix-M complex butterflies are 
       performed on sets of M input points using decimation in time.  
       The algorithm is self-sorting and does not require bit reversing.
       The complex exponential twiddle factor vector used by the routines
       should have been generated by a call to "fft_gen_twiddle". The
       The LA calling parameter is the partial product of the radices 
       remaining to be completed after the current pass.  

  REVISIONS:
    04/20/00 	rdh	baseline conversion from FPS routine
    04/21/00 	rdh	changed phase argument to double for precision
    01/04/01    rdh	massive overhaul to use pointers and Wang's
                          kernel operations
-----------------------------------------------------------------------
*/

void radixpass_2(const fft_real *a, fft_real *c, const fft_real *t, 
                   fft_int n, fft_int la, fft_int dirflag)
{
/* Constants */       
const int RADIXSIZE = (int) 2;

/* Local variables */
fft_int k,l,m;                     
fft_int istep,ispan,ospan;
fft_real o00_re;
fft_real o00_im;
fft_real xreg_re,xreg_im;
fft_real t00_re,t01_re;
fft_real t00_im,t01_im;
fft_cmplx *x, *y, *w;

 /* Verify parameters */
    if(n < RADIXSIZE || la < 1) return;     
  
 /* Define local arrays */
    x = (fft_cmplx *) a;                /* Input data    */
    y = (fft_cmplx *) c;                /* Output data   */
    w = (fft_cmplx *) t;                /* Twiddle table */

 /* Initialize pointers */
    ospan = n/RADIXSIZE;
    ispan = la;
    m     = (ospan/ispan) - 1;
    istep = ispan*(RADIXSIZE-1);

 /* Process the data */
    switch (dirflag) {
      case FFT_FLAGS_FORWARD:                            /* Forward Pass */
        if (ispan != ospan) {
            for(k=0; k<=m; k++) { 
                o00_re = w[0].re;        o00_im = w[0].im;
  
                for(l=1; l<=ispan; l++) {
                /* The fundamental forward pass RADIX 2 kernel */  
                   t00_re  = x[0].re;
                   t00_im  = x[0].im;
                   xreg_re = x[ispan].re;
                   xreg_im = x[ispan].im;
                   t01_re  = o00_re*xreg_re + o00_im*xreg_im;      
                   t01_im  = o00_re*xreg_im - o00_im*xreg_re;      
                   y[0].re     = t00_re + t01_re;
                   y[0].im     = t00_im + t01_im;
                   y[ospan].re = t00_re - t01_re;
                   y[ospan].im = t00_im - t01_im;
                   x++;   
                   y++; 
                }
                x += istep;       
                w += ispan;
            }
	}
	else {
            for(l=1; l<=ispan; l++) {
            /* The fundamental forward pass RADIX 2 kernel */  
               t00_re = x[0].re;
               t00_im = x[0].im;
               t01_re = x[ispan].re;
               t01_im = x[ispan].im;
               y[0].re     = t00_re + t01_re;
               y[0].im     = t00_im + t01_im;
               y[ospan].re = t00_re - t01_re;
               y[ospan].im = t00_im - t01_im;
               x++;  
               y++;   
            }
	}
	break;

      default:                                         /* Inverse Pass */
        if (ispan != ospan) {
            for(k=0; k<=m; k++) { 
                o00_re = w[0].re;        o00_im = w[0].im;
  
                for(l=1; l<=ispan; l++) {
                /* The fundamental inverse pass RADIX 2 kernel */  
                   t00_re  = x[0].re;
                   t00_im  = x[0].im;
                   xreg_re = x[ispan].re;
                   xreg_im = x[ispan].im;
                   t01_re  = o00_re*xreg_re - o00_im*xreg_im;      
                   t01_im  = o00_re*xreg_im + o00_im*xreg_re;      
                   y[0].re     = t00_re + t01_re;
                   y[0].im     = t00_im + t01_im;
                   y[ospan].re = t00_re - t01_re;
                   y[ospan].im = t00_im - t01_im;
                   x++;   
                   y++; 
                }
                x += istep;       
                w += ispan;
            }
	}
	else {
            for(l=1; l<=ispan; l++) {
            /* The fundamental inverse pass RADIX 2 kernel */  
               t00_re = x[0].re;
               t00_im = x[0].im;
               t01_re = x[ispan].re;
               t01_im = x[ispan].im;
               y[0].re     = t00_re + t01_re;
               y[0].im     = t00_im + t01_im;
               y[ospan].re = t00_re - t01_re;
               y[ospan].im = t00_im - t01_im;
               x++;  
               y++;   
            }
	}
	break;
    }

    return;
}

/*-----------------------------------------------------------------------*/

void radixpass_3(const fft_real *a, fft_real *c, const fft_real *t, 
                   fft_int n, fft_int la, fft_int dirflag)
{
/* Constants */
const int RADIXSIZE = (int) 3;

const fft_real s1 = 
                  FFT_CONST(-1.500000000000000000000000000000000000000000000);
const fft_real s2 = 
                  FFT_CONST(+0.866025403784438646763723170752936183471402627);

/* Local variables */
fft_int k,l,m;
fft_int istep,ispan,ospan,tspan;
fft_real o00_re,o01_re;
fft_real o00_im,o01_im;
fft_real t01_re,t02_re;
fft_real t01_im,t02_im;
fft_real xreg_re,xreg_im;
fft_real x00a_re,x00a_im,x00b_re,x00b_im;
fft_real x01a_re,x01a_im,x01b_re,x01b_im;
fft_real x02a_re,x02a_im;
fft_cmplx *x, *y, *w;

 /* Verify parameters */
    if(n < RADIXSIZE || la < 1) return;     
   
 /* Define local arrays */
    x = (fft_cmplx *) a;                /* Input data    */
    y = (fft_cmplx *) c;                /* Output data   */
    w = (fft_cmplx *) t;                /* Twiddle table */

 /* Initialize constants */
    ospan = n/RADIXSIZE;
    ispan = la;
    m     = (ospan/ispan) - 1;
    istep = ispan*(RADIXSIZE-1);
    tspan = 0;

 /* Process the data */
    switch (dirflag) {
      case FFT_FLAGS_FORWARD:                            /* Forward Pass */
        if (ispan != ospan) { 
          for(k=0; k<=m; k++) {   
            tspan  = k*ispan;
            o00_re = w[0].re;        o00_im = w[0].im;
            o01_re = w[tspan*1].re;  o01_im = w[tspan*1].im;
            
            for(l=1; l<=ispan; l++) {
             /* The fundamental forward pass RADIX 3 kernel */  
                xreg_re = x[ispan*1].re;
                xreg_im = x[ispan*1].im;
                t01_re  = o00_re*xreg_re + o00_im*xreg_im;
                t01_im  = o00_re*xreg_im - o00_im*xreg_re;
                xreg_re = x[ispan*2].re;
                xreg_im = x[ispan*2].im;
                t02_re  = o01_re*xreg_re + o01_im*xreg_im;
                t02_im  = o01_re*xreg_im - o01_im*xreg_re;
                x01a_re = t02_re + t01_re;
                x02a_re = t02_re - t01_re;
                x02a_re *= s2;
                x01a_im = t01_im + t02_im;
                x02a_im = t01_im - t02_im;
                x02a_im *= s2;
                x00a_re = x[0].re;
                x00b_re = x00a_re + x01a_re;
                y[0].re = x00b_re;
                x00a_im = x[0].im;
                x00b_im = x00a_im + x01a_im;
                y[0].im = x00b_im; 
                x01b_im = x00b_im + s1*x01a_im;
                x01b_re = x00b_re + s1*x01a_re;
                y[ospan*1].re = x01b_re + x02a_im;
                y[ospan*1].im = x01b_im + x02a_re;
                y[ospan*2].re = x01b_re - x02a_im;
                y[ospan*2].im = x01b_im - x02a_re;

                x++;
                y++;
            }
            x += istep;
            w += ispan;
          }
        }
        else {
          for(l=1; l<=ispan; l++) {
             /* The fundamental forward pass RADIX 3 kernel */  
                t01_re  = x[ispan*1].re;
                t01_im  = x[ispan*1].im;
                t02_re  = x[ispan*2].re;
                t02_im  = x[ispan*2].im;
                x01a_re = t02_re + t01_re;
                x02a_re = t02_re - t01_re;
                x02a_re *= s2;
                x01a_im = t01_im + t02_im;
                x02a_im = t01_im - t02_im;
                x02a_im *= s2;
                x00a_re = x[0].re;
                x00b_re = x00a_re + x01a_re;
                y[0].re = x00b_re;
                x00a_im = x[0].im;
                x00b_im = x00a_im + x01a_im;
                y[0].im = x00b_im; 
                x01b_im = x00b_im + s1*x01a_im;
                x01b_re = x00b_re + s1*x01a_re;
                y[ospan*1].re = x01b_re + x02a_im;
                y[ospan*1].im = x01b_im + x02a_re;
                y[ospan*2].re = x01b_re - x02a_im;
                y[ospan*2].im = x01b_im - x02a_re;

                x++;
                y++;
          }
        }
        break;

      default:                                         /* Inverse Pass */
        if (ispan != ospan) { 
          for(k=0; k<=m; k++) {   
            tspan  = k*ispan;
            o00_re = w[0].re;        o00_im = w[0].im;
            o01_re = w[tspan*1].re;  o01_im = w[tspan*1].im;

            for(l=1; l<=ispan; l++) {
             /* The fundamental inverse pass RADIX 3 kernel */  
                xreg_re = x[ispan*1].re;
                xreg_im = x[ispan*1].im;
                t01_re  = o00_re*xreg_re - o00_im*xreg_im;
                t01_im  = o00_re*xreg_im + o00_im*xreg_re;
                xreg_re = x[ispan*2].re;
                xreg_im = x[ispan*2].im;
                t02_re  = o01_re*xreg_re - o01_im*xreg_im;
                t02_im  = o01_re*xreg_im + o01_im*xreg_re;
                x01a_re = t02_re + t01_re;
                x02a_re = t02_re - t01_re;
                x02a_re *= s2;
                x01a_im = t01_im + t02_im;
                x02a_im = t01_im - t02_im;
                x02a_im *= s2;
                x00a_re = x[0].re;
                x00b_re = x00a_re + x01a_re;
                y[0].re = x00b_re;
                x00a_im = x[0].im;
                x00b_im = x00a_im + x01a_im;
                y[0].im = x00b_im; 
                x01b_im = x00b_im + s1*x01a_im;
                x01b_re = x00b_re + s1*x01a_re;
                y[ospan*1].re = x01b_re - x02a_im;
                y[ospan*1].im = x01b_im - x02a_re;
                y[ospan*2].re = x01b_re + x02a_im;
                y[ospan*2].im = x01b_im + x02a_re;

                x++;
                y++;
            }
            x += istep;
            w += ispan;
          }
        }
        else {
          for(l=1; l<=ispan; l++) {
             /* The fundamental inverse pass RADIX 3 kernel */  
                t01_re  = x[ispan*1].re;
                t01_im  = x[ispan*1].im;
                t02_re  = x[ispan*2].re;
                t02_im  = x[ispan*2].im;
                x01a_re = t02_re + t01_re;
                x02a_re = t02_re - t01_re;
                x02a_re *= s2;
                x01a_im = t01_im + t02_im;
                x02a_im = t01_im - t02_im;
                x02a_im *= s2;
                x00a_re = x[0].re;
                x00b_re = x00a_re + x01a_re;
                y[0].re = x00b_re;
                x00a_im = x[0].im;
                x00b_im = x00a_im + x01a_im;
                y[0].im = x00b_im; 
                x01b_im = x00b_im + s1*x01a_im;
                x01b_re = x00b_re + s1*x01a_re;
                y[ospan*1].re = x01b_re - x02a_im;
                y[ospan*1].im = x01b_im - x02a_re;
                y[ospan*2].re = x01b_re + x02a_im;
                y[ospan*2].im = x01b_im + x02a_re;

                x++;
                y++;
          }
        }
        break;
    }  

    return;
}

/*-----------------------------------------------------------------------*/

void radixpass_4(const fft_real *a, fft_real *c, const fft_real *t, 
                   fft_int n, fft_int la, fft_int dirflag)
{
/* Constants */    
const int RADIXSIZE = (int) 4;

/* Local variables */
fft_int k,l,m;                     
fft_int istep,ispan,ospan,tspan;
fft_real o00_re,o01_re,o02_re;
fft_real o00_im,o01_im,o02_im;
fft_real t00_re,t01_re,t02_re,t03_re;
fft_real t00_im,t01_im,t02_im,t03_im;
fft_real xreg_re,xreg_im;
fft_real x00a_re,x00a_im;
fft_real x01a_re,x01a_im;
fft_real x02a_re,x02a_im;
fft_real x03a_re,x03a_im;
fft_cmplx *x, *y, *w;

 /* Verify parameters */
    if(n < RADIXSIZE || la < 1) return;     
  
 /* Define local arrays */
    x = (fft_cmplx *) a;                /* Input data    */
    y = (fft_cmplx *) c;                /* Output data   */
    w = (fft_cmplx *) t;                /* Twiddle table */

 /* Initialize pointers */
    ospan = n/RADIXSIZE;
    ispan = la;
    m     = (ospan/ispan) - 1;
    istep = ispan*(RADIXSIZE-1);
    tspan = 0;

 /* Process the data */
    switch (dirflag) {
      case FFT_FLAGS_FORWARD:                            /* Forward Pass */
        if (ispan != ospan) { 
            for(k=0; k<=m; k++) { 
                tspan  = k*ispan; 
                o00_re = w[0].re;        o00_im = w[0].im;
                o01_re = w[tspan*1].re;  o01_im = w[tspan*1].im;
                o02_re = w[tspan*2].re;  o02_im = w[tspan*2].im;
  
                for(l=1; l<=ispan; l++) { 
                   t00_re  = x[0].re;
                   t00_im  = x[0].im;
                   xreg_re = x[ispan*1].re;
                   xreg_im = x[ispan*1].im;
                   t01_re  = o00_re*xreg_re + o00_im*xreg_im;
                   t01_im  = o00_re*xreg_im - o00_im*xreg_re;
                   xreg_re = x[ispan*2].re;
                   xreg_im = x[ispan*2].im;
                   t02_re  = o01_re*xreg_re + o01_im*xreg_im;
                   t02_im  = o01_re*xreg_im - o01_im*xreg_re;
                   xreg_re = x[ispan*3].re;
                   xreg_im = x[ispan*3].im;
                   t03_re  = o02_re*xreg_re + o02_im*xreg_im;
                   t03_im  = o02_re*xreg_im - o02_im*xreg_re;
                   x00a_re = t00_re + t02_re;
                   x02a_re = t00_re - t02_re;
                   x00a_im = t00_im + t02_im;
                   x02a_im = t00_im - t02_im;
                   x01a_re = t01_re + t03_re;
                   x03a_re = t01_re - t03_re;
                   x01a_im = t01_im + t03_im;
                   x03a_im = t01_im - t03_im;
                   y[0].re       = x00a_re + x01a_re;
                   y[0].im       = x00a_im + x01a_im;
                   y[ospan*1].re = x02a_re + x03a_im;
                   y[ospan*1].im = x02a_im - x03a_re;
                   y[ospan*2].re = x00a_re - x01a_re;
                   y[ospan*2].im = x00a_im - x01a_im;
                   y[ospan*3].re = x02a_re - x03a_im;
                   y[ospan*3].im = x02a_im + x03a_re;

                   x++;   
                   y++; 
                }
                x += istep;       
                w += ispan;
            }
	}
	else {
            for(l=1; l<=ispan; l++) {
               t00_re  = x[0].re;
               t00_im  = x[0].im;
               t01_re  = x[ispan*1].re;
               t01_im  = x[ispan*1].im;
               t02_re  = x[ispan*2].re;
               t02_im  = x[ispan*2].im;
               t03_re  = x[ispan*3].re;
               t03_im  = x[ispan*3].im;
               x00a_re = t00_re + t02_re;
               x02a_re = t00_re - t02_re;
               x00a_im = t00_im + t02_im;
               x02a_im = t00_im - t02_im;
               x01a_re = t01_re + t03_re;
               x03a_re = t01_re - t03_re;
               x01a_im = t01_im + t03_im;
               x03a_im = t01_im - t03_im;
               y[0].re       = x00a_re + x01a_re;
               y[0].im       = x00a_im + x01a_im;
               y[ospan*1].re = x02a_re + x03a_im;
               y[ospan*1].im = x02a_im - x03a_re;
               y[ospan*2].re = x00a_re - x01a_re;
               y[ospan*2].im = x00a_im - x01a_im;
               y[ospan*3].re = x02a_re - x03a_im;
               y[ospan*3].im = x02a_im + x03a_re;

               x++;  
               y++;   
            }
	}
    	break;

      default:                                         /* Inverse Pass */
        if (ispan != ospan) {
            for(k=0; k<=m; k++) { 
                tspan  = k*ispan;
                o00_re = w[0].re;        o00_im = w[0].im;
                o01_re = w[tspan*1].re;  o01_im = w[tspan*1].im;
                o02_re = w[tspan*2].re;  o02_im = w[tspan*2].im;
  
                for(l=1; l<=ispan; l++) {
                   t00_re  = x[0].re;
                   t00_im  = x[0].im;
                   xreg_re = x[ispan*1].re;
                   xreg_im = x[ispan*1].im;
                   t01_re  = o00_re*xreg_re - o00_im*xreg_im;
                   t01_im  = o00_re*xreg_im + o00_im*xreg_re;
                   xreg_re = x[ispan*2].re;
                   xreg_im = x[ispan*2].im;
                   t02_re  = o01_re*xreg_re - o01_im*xreg_im;
                   t02_im  = o01_re*xreg_im + o01_im*xreg_re;
                   xreg_re = x[ispan*3].re;
                   xreg_im = x[ispan*3].im;
                   t03_re  = o02_re*xreg_re - o02_im*xreg_im;
                   t03_im  = o02_re*xreg_im + o02_im*xreg_re;
                   x00a_re = t00_re + t02_re;
                   x02a_re = t00_re - t02_re;
                   x00a_im = t00_im + t02_im;
                   x02a_im = t00_im - t02_im;
                   x01a_re = t01_re + t03_re;
                   x03a_re = t01_re - t03_re;
                   x01a_im = t01_im + t03_im;
                   x03a_im = t01_im - t03_im;
                   y[0].re       = x00a_re + x01a_re;
                   y[0].im       = x00a_im + x01a_im;
                   y[ospan*1].re = x02a_re - x03a_im;
                   y[ospan*1].im = x02a_im + x03a_re;
                   y[ospan*2].re = x00a_re - x01a_re;
                   y[ospan*2].im = x00a_im - x01a_im;
                   y[ospan*3].re = x02a_re + x03a_im;
                   y[ospan*3].im = x02a_im - x03a_re;
    
                   x++;   
                   y++; 
                }
                x += istep;       
                w += ispan;
            }
	}
	else {
            for(l=1; l<=ispan; l++) {
               t00_re  = x[0].re;
               t00_im  = x[0].im;
               t01_re  = x[ispan*1].re;
               t01_im  = x[ispan*1].im;
               t02_re  = x[ispan*2].re;
               t02_im  = x[ispan*2].im;
               t03_re  = x[ispan*3].re;
               t03_im  = x[ispan*3].im;
               x00a_re = t00_re + t02_re;
               x02a_re = t00_re - t02_re;
               x00a_im = t00_im + t02_im;
               x02a_im = t00_im - t02_im;
               x01a_re = t01_re + t03_re;
               x03a_re = t01_re - t03_re;
               x01a_im = t01_im + t03_im;
               x03a_im = t01_im - t03_im;
               y[0].re       = x00a_re + x01a_re;
               y[0].im       = x00a_im + x01a_im;
               y[ospan*1].re = x02a_re - x03a_im;
               y[ospan*1].im = x02a_im + x03a_re;
               y[ospan*2].re = x00a_re - x01a_re;
               y[ospan*2].im = x00a_im - x01a_im;
               y[ospan*3].re = x02a_re + x03a_im;
               y[ospan*3].im = x02a_im - x03a_re;
    
               x++;  
               y++;   
            }
	}
	break;
    }	

    return;
}

/*-----------------------------------------------------------------------*/

void radixpass_5(const fft_real *a, fft_real *c, const fft_real *t, 
                   fft_int n, fft_int la, fft_int dirflag)
{
/* Constants */
const int RADIXSIZE = (int) 5;

const fft_real s1 = 
                  FFT_CONST(-1.250000000000000000000000000000000000000000000);
const fft_real s2 = 
                  FFT_CONST(+0.559016994374947424102293417182819058860154590);
const fft_real s3 = 
                  FFT_CONST(+0.951056516295153572116439333379382143405698634);
const fft_real s4 = 
                  FFT_CONST(+0.618033988749895013548041333706350997090339661);

/* Local variables */
fft_int k,l,m;                     
fft_int istep,ispan,ospan,tspan;
fft_real o00_re,o01_re,o02_re,o03_re;
fft_real o00_im,o01_im,o02_im,o03_im;
fft_real t01_re,t02_re,t03_re,t04_re;
fft_real t01_im,t02_im,t03_im,t04_im;
fft_real xreg_re,xreg_im;
fft_real x00a_re,x00a_im;
fft_real x01a_re,x01a_im,x01b_re,x01b_im,x01c_re,x01c_im;
fft_real x02a_re,x02a_im,x02b_re,x02b_im,x02c_re,x02c_im;
fft_real x03a_re,x03a_im,x03b_re,x03b_im,x03c_re,x03c_im;
fft_real x04a_re,x04a_im,x04b_re,x04b_im,x04c_re,x04c_im;
fft_cmplx *x, *y, *w;

 /* Verify parameters */
    if(n < RADIXSIZE || la < 1) return;     
  
 /* Define local arrays */
    x = (fft_cmplx *) a;                /* Input data    */
    y = (fft_cmplx *) c;                /* Output data   */
    w = (fft_cmplx *) t;                /* Twiddle table */

 /* Initialize pointers */
    ospan = n/RADIXSIZE;
    ispan = la;
    m     = (ospan/ispan) - 1;
    istep = ispan*(RADIXSIZE-1);
    tspan = 0;

 /* Process the data */
    switch (dirflag) {
      case FFT_FLAGS_FORWARD:                            /* Forward Pass */
        if (ispan != ospan) { 
          for(k=0; k<=m; k++) {   
            tspan  = k*ispan;
            o00_re = w[0].re;        o00_im = w[0].im;
            o01_re = w[tspan*1].re;  o01_im = w[tspan*1].im;
            o02_re = w[tspan*2].re;  o02_im = w[tspan*2].im;
            o03_re = w[tspan*3].re;  o03_im = w[tspan*3].im;

            for(l=1; l<=ispan; l++) {
             /* The fundamental forward pass RADIX 5 kernel */  
                xreg_re = x[ispan*1].re;
                xreg_im = x[ispan*1].im;
                t01_re  = o00_re*xreg_re + o00_im*xreg_im;
                t01_im  = o00_re*xreg_im - o00_im*xreg_re;
                xreg_re = x[ispan*4].re;
                xreg_im = x[ispan*4].im;
                t04_re  = o03_re*xreg_re + o03_im*xreg_im;
                t04_im  = o03_re*xreg_im - o03_im*xreg_re;
                x01a_re = t01_re + t04_re;
                x04a_re = t01_re - t04_re;
                x01a_im = t01_im + t04_im;
                x04a_im = t01_im - t04_im;
                xreg_re = x[ispan*2].re;
                xreg_im = x[ispan*2].im;
                t02_re  = o01_re*xreg_re + o01_im*xreg_im;
                t02_im  = o01_re*xreg_im - o01_im*xreg_re;
                xreg_re = x[ispan*3].re;
                xreg_im = x[ispan*3].im;
                t03_re  = o02_re*xreg_re + o02_im*xreg_im;
                t03_im  = o02_re*xreg_im - o02_im*xreg_re;
                x02a_re = t02_re + t03_re;
                x03a_re = t02_re - t03_re;
                x02a_im = t02_im + t03_im;
                x03a_im = t02_im - t03_im;
                xreg_re = x[0].re;
                x02b_re = x01a_re - x02a_re;
                x01b_re = x01a_re + x02a_re;
                x00a_re = xreg_re + x01b_re;
                y[0].re = x00a_re;
                xreg_im = x[0].im;
                x02b_im = x01a_im - x02a_im;
                x01b_im = x01a_im + x02a_im;
                x00a_im = xreg_im + x01b_im;
                y[0].im = x00a_im;  
                x01b_re = s1*x01b_re + x00a_re;
                x02b_re = s2*x02b_re;
                x01c_re = x01b_re + x02b_re;
                x02c_re = x01b_re - x02b_re;
                x01b_im = s1*x01b_im + x00a_im;
                x02b_im = s2*x02b_im;
                x01c_im = x01b_im + x02b_im;
                x02c_im = x01b_im - x02b_im;
                x03b_re = s3*x03a_im;
                x04b_re = s3*x04a_im;
                x03c_re = s4*x04b_re - x03b_re;
                x04c_re = s4*x03b_re + x04b_re;
                x03b_im = s3*x03a_re;
                x04b_im = s3*x04a_re;
                x03c_im = s4*x04b_im - x03b_im;
                x04c_im = s4*x03b_im + x04b_im;
                y[ospan*1].re = x01c_re + x04c_re; 
                y[ospan*1].im = x01c_im - x04c_im;
                y[ospan*2].re = x02c_re + x03c_re;
                y[ospan*2].im = x02c_im - x03c_im;
                y[ospan*3].re = x02c_re - x03c_re;
                y[ospan*3].im = x02c_im + x03c_im;
                y[ospan*4].re = x01c_re - x04c_re;
                y[ospan*4].im = x01c_im + x04c_im;
         
                x++;
                y++;
            }
            x += istep;
            w += ispan;
          } 
        }
        else {
          for(l=1; l<=ispan; l++) {
             /* The fundamental forward pass RADIX 5 kernel */  
                t01_re  = x[ispan*1].re;
                t01_im  = x[ispan*1].im;
                t04_re  = x[ispan*4].re;
                t04_im  = x[ispan*4].im;
                x01a_re = t01_re + t04_re;
                x04a_re = t01_re - t04_re;
                x01a_im = t01_im + t04_im;
                x04a_im = t01_im - t04_im;
                t02_re  = x[ispan*2].re;
                t02_im  = x[ispan*2].im;
                t03_re  = x[ispan*3].re;
                t03_im  = x[ispan*3].im;
                x02a_re = t02_re + t03_re;
                x03a_re = t02_re - t03_re;
                x02a_im = t02_im + t03_im;
                x03a_im = t02_im - t03_im;
                xreg_re = x[0].re;
                x02b_re = x01a_re - x02a_re;
                x01b_re = x01a_re + x02a_re;
                x00a_re = xreg_re + x01b_re;
                y[0].re = x00a_re;
                xreg_im = x[0].im;
                x02b_im = x01a_im - x02a_im;
                x01b_im = x01a_im + x02a_im;
                x00a_im = xreg_im + x01b_im;
                y[0].im = x00a_im;  
                x01b_re = s1*x01b_re + x00a_re;
                x02b_re = s2*x02b_re;
                x01c_re = x01b_re + x02b_re;
                x02c_re = x01b_re - x02b_re;
                x01b_im = s1*x01b_im + x00a_im;
                x02b_im = s2*x02b_im;
                x01c_im = x01b_im + x02b_im;
                x02c_im = x01b_im - x02b_im;
                x03b_re = s3*x03a_im;
                x04b_re = s3*x04a_im;
                x03c_re = s4*x04b_re - x03b_re;
                x04c_re = s4*x03b_re + x04b_re;
                x03b_im = s3*x03a_re;
                x04b_im = s3*x04a_re;
                x03c_im = s4*x04b_im - x03b_im;
                x04c_im = s4*x03b_im + x04b_im;
                y[ospan*1].re = x01c_re + x04c_re; 
                y[ospan*1].im = x01c_im - x04c_im;
                y[ospan*2].re = x02c_re + x03c_re;
                y[ospan*2].im = x02c_im - x03c_im;
                y[ospan*3].re = x02c_re - x03c_re;
                y[ospan*3].im = x02c_im + x03c_im;
                y[ospan*4].re = x01c_re - x04c_re;
                y[ospan*4].im = x01c_im + x04c_im;
         
                x++;
                y++;
          }
        }
        break;

      default:                                         /* Inverse Pass */
        if (ispan != ospan) { 
          for(k=0; k<=m; k++) {   
            tspan  = k*ispan;
            o00_re = w[0].re;        o00_im = w[0].im;
            o01_re = w[tspan*1].re;  o01_im = w[tspan*1].im;
            o02_re = w[tspan*2].re;  o02_im = w[tspan*2].im;
            o03_re = w[tspan*3].re;  o03_im = w[tspan*3].im;

            for(l=1; l<=ispan; l++) {
             /* The fundamental inverse pass RADIX 5 kernel */  
                xreg_re = x[ispan*1].re;
                xreg_im = x[ispan*1].im;
                t01_re  = o00_re*xreg_re - o00_im*xreg_im;
                t01_im  = o00_re*xreg_im + o00_im*xreg_re;
                xreg_re = x[ispan*4].re;
                xreg_im = x[ispan*4].im;
                t04_re  = o03_re*xreg_re - o03_im*xreg_im;
                t04_im  = o03_re*xreg_im + o03_im*xreg_re;
                x01a_re = t01_re + t04_re;
                x04a_re = t01_re - t04_re;
                x01a_im = t01_im + t04_im;
                x04a_im = t01_im - t04_im;
                xreg_re = x[ispan*2].re;
                xreg_im = x[ispan*2].im;
                t02_re  = o01_re*xreg_re - o01_im*xreg_im;
                t02_im  = o01_re*xreg_im + o01_im*xreg_re;
                xreg_re = x[ispan*3].re;
                xreg_im = x[ispan*3].im;
                t03_re  = o02_re*xreg_re - o02_im*xreg_im;
                t03_im  = o02_re*xreg_im + o02_im*xreg_re;
                x02a_re = t02_re + t03_re;
                x03a_re = t02_re - t03_re;
                x02a_im = t02_im + t03_im;
                x03a_im = t02_im - t03_im;
                xreg_re = x[0].re;
                x02b_re = x01a_re - x02a_re;
                x01b_re = x01a_re + x02a_re;
                x00a_re = xreg_re + x01b_re;
                y[0].re = x00a_re;
                xreg_im = x[0].im;
                x02b_im = x01a_im - x02a_im;
                x01b_im = x01a_im + x02a_im;
                x00a_im = xreg_im + x01b_im;
                y[0].im = x00a_im;  
                x01b_re = s1*x01b_re + x00a_re;
                x02b_re = s2*x02b_re;
                x01c_re = x01b_re + x02b_re;
                x02c_re = x01b_re - x02b_re;
                x01b_im = s1*x01b_im + x00a_im;
                x02b_im = s2*x02b_im;
                x01c_im = x01b_im + x02b_im;
                x02c_im = x01b_im - x02b_im;
                x03b_re = s3*x03a_im;
                x04b_re = s3*x04a_im;
                x03c_re = s4*x04b_re - x03b_re;
                x04c_re = s4*x03b_re + x04b_re;
                x03b_im = s3*x03a_re;
                x04b_im = s3*x04a_re;
                x03c_im = s4*x04b_im - x03b_im;
                x04c_im = s4*x03b_im + x04b_im;
                y[ospan*1].re = x01c_re - x04c_re; 
                y[ospan*1].im = x01c_im + x04c_im;
                y[ospan*2].re = x02c_re - x03c_re;
                y[ospan*2].im = x02c_im + x03c_im;
                y[ospan*3].re = x02c_re + x03c_re;
                y[ospan*3].im = x02c_im - x03c_im;
                y[ospan*4].re = x01c_re + x04c_re;
                y[ospan*4].im = x01c_im - x04c_im;
         
                x++;
                y++;
            }
            x += istep;
            w += ispan;
          }
        }
        else {
          for(l=1; l<=ispan; l++) {
             /* The fundamental inverse pass RADIX 5 kernel */  
                t01_re  = x[ispan*1].re;
                t01_im  = x[ispan*1].im;
                t04_re  = x[ispan*4].re;
                t04_im  = x[ispan*4].im;
                x01a_re = t01_re + t04_re;
                x04a_re = t01_re - t04_re;
                x01a_im = t01_im + t04_im;
                x04a_im = t01_im - t04_im;
                t02_re  = x[ispan*2].re;
                t02_im  = x[ispan*2].im;
                t03_re  = x[ispan*3].re;
                t03_im  = x[ispan*3].im;
                x02a_re = t02_re + t03_re;
                x03a_re = t02_re - t03_re;
                x02a_im = t02_im + t03_im;
                x03a_im = t02_im - t03_im;
                xreg_re = x[0].re;
                x02b_re = x01a_re - x02a_re;
                x01b_re = x01a_re + x02a_re;
                x00a_re = xreg_re + x01b_re;
                y[0].re = x00a_re;
                xreg_im = x[0].im;
                x02b_im = x01a_im - x02a_im;
                x01b_im = x01a_im + x02a_im;
                x00a_im = xreg_im + x01b_im;
                y[0].im = x00a_im;  
                x01b_re = s1*x01b_re + x00a_re;
                x02b_re = s2*x02b_re;
                x01c_re = x01b_re + x02b_re;
                x02c_re = x01b_re - x02b_re;
                x01b_im = s1*x01b_im + x00a_im;
                x02b_im = s2*x02b_im;
                x01c_im = x01b_im + x02b_im;
                x02c_im = x01b_im - x02b_im;
                x03b_re = s3*x03a_im;
                x04b_re = s3*x04a_im;
                x03c_re = s4*x04b_re - x03b_re;
                x04c_re = s4*x03b_re + x04b_re;
                x03b_im = s3*x03a_re;
                x04b_im = s3*x04a_re;
                x03c_im = s4*x04b_im - x03b_im;
                x04c_im = s4*x03b_im + x04b_im;
                y[ospan*1].re = x01c_re - x04c_re; 
                y[ospan*1].im = x01c_im + x04c_im;
                y[ospan*2].re = x02c_re - x03c_re;
                y[ospan*2].im = x02c_im + x03c_im;
                y[ospan*3].re = x02c_re + x03c_re;
                y[ospan*3].im = x02c_im - x03c_im;
                y[ospan*4].re = x01c_re + x04c_re;
                y[ospan*4].im = x01c_im - x04c_im;
         
                x++;
                y++;
          }
        }
        break;
    }
  
    return;
}

/*-----------------------------------------------------------------------*/

void radixpass_8(const fft_real *a, fft_real *c, const fft_real *t, 
                   fft_int n, fft_int la, fft_int dirflag)
{
/* Constants */
const int RADIXSIZE = (int) 8;

const fft_real s1 = 
                  FFT_CONST(+0.707106781186547524400844362104849039284835938);

/* Local variables */
fft_int k,l,m;                     
fft_int istep,ispan,ospan,tspan;
fft_real o00_re,o01_re,o02_re,o03_re,o04_re,o05_re,o06_re;
fft_real o00_im,o01_im,o02_im,o03_im,o04_im,o05_im,o06_im;
fft_real t00_re,t01_re,t02_re,t03_re,t04_re,t05_re,t06_re,t07_re;
fft_real t00_im,t01_im,t02_im,t03_im,t04_im,t05_im,t06_im,t07_im;
fft_real xreg_re,xreg_im;
fft_real x00a_re,x00a_im,x00b_re,x00b_im;
fft_real x01a_re,x01a_im,x01b_re,x01b_im;
fft_real x02a_re,x02a_im,x02b_re,x02b_im;
fft_real x03a_re,x03a_im,x03b_re,x03b_im;
fft_real x04a_re,x04a_im,x04b_re,x04b_im;
fft_real x05a_re,x05a_im,x05b_re,x05b_im,x05c_re,x05c_im,x05d_re,x05d_im;
fft_real x06a_re,x06a_im,x06b_re,x06b_im;
fft_real x07a_re,x07a_im,x07b_re,x07b_im,x07c_re,x07c_im,x07d_re,x07d_im;
fft_cmplx *x, *y, *w;

 /* Verify parameters */
    if(n < RADIXSIZE || la < 1) return;     
  
 /* Define local arrays */
    x = (fft_cmplx *) a;                /* Input data    */
    y = (fft_cmplx *) c;                /* Output data   */
    w = (fft_cmplx *) t;                /* Twiddle table */

 /* Initialize pointers */
    ospan = n/RADIXSIZE;
    ispan = la;
    m     = (ospan/ispan) - 1;
    istep = ispan*(RADIXSIZE-1);
    tspan = 0;

 /* Process the data */
    switch (dirflag) {
      case FFT_FLAGS_FORWARD:                            /* Forward Pass */
        if (ispan != ospan) {
            for(k=0; k<=m; k++) { 
                tspan  = k*ispan;
                o00_re = w[0].re;        o00_im = w[0].im;
                o01_re = w[tspan*1].re;  o01_im = w[tspan*1].im;
                o02_re = w[tspan*2].re;  o02_im = w[tspan*2].im;
                o03_re = w[tspan*3].re;  o03_im = w[tspan*3].im;
                o04_re = w[tspan*4].re;  o04_im = w[tspan*4].im;
                o05_re = w[tspan*5].re;  o05_im = w[tspan*5].im;
                o06_re = w[tspan*6].re;  o06_im = w[tspan*6].im;
  
                for(l=1; l<=ispan; l++) {
                /* The fundamental forward pass RADIX 8 kernel */  
                   t00_re  = x[0].re;
                   t00_im  = x[0].im;
                   xreg_re = x[ispan*1].re;
                   xreg_im = x[ispan*1].im;
                   t01_re  = o00_re*xreg_re + o00_im*xreg_im;
                   t01_im  = o00_re*xreg_im - o00_im*xreg_re;
                   xreg_re = x[ispan*2].re;
                   xreg_im = x[ispan*2].im;
                   t02_re  = o01_re*xreg_re + o01_im*xreg_im;
                   t02_im  = o01_re*xreg_im - o01_im*xreg_re;
                   xreg_re = x[ispan*3].re;
                   xreg_im = x[ispan*3].im;
                   t03_re  = o02_re*xreg_re + o02_im*xreg_im;
                   t03_im  = o02_re*xreg_im - o02_im*xreg_re;
                   xreg_re = x[ispan*4].re;
                   xreg_im = x[ispan*4].im;
                   t04_re  = o03_re*xreg_re + o03_im*xreg_im;
                   t04_im  = o03_re*xreg_im - o03_im*xreg_re;
                   xreg_re = x[ispan*5].re;
                   xreg_im = x[ispan*5].im;
                   t05_re  = o04_re*xreg_re + o04_im*xreg_im;
                   t05_im  = o04_re*xreg_im - o04_im*xreg_re;
                   xreg_re = x[ispan*6].re;
                   xreg_im = x[ispan*6].im;
                   t06_re  = o05_re*xreg_re + o05_im*xreg_im;
                   t06_im  = o05_re*xreg_im - o05_im*xreg_re;
                   xreg_re = x[ispan*7].re;
                   xreg_im = x[ispan*7].im;
                   t07_re  = o06_re*xreg_re + o06_im*xreg_im;
                   t07_im  = o06_re*xreg_im - o06_im*xreg_re;
                   x04a_re = t00_re - t04_re;
                   x00a_re = t00_re + t04_re;
                   x02a_re = t02_re + t06_re;
                   x06a_re = t02_re - t06_re;
                   x00b_re = x00a_re + x02a_re;
                   x02b_re = x00a_re - x02a_re;
                   x04a_im = t00_im - t04_im;
                   x00a_im = t00_im + t04_im;
                   x02a_im = t02_im + t06_im;
                   x06a_im = t02_im - t06_im;
                   x00b_im = x00a_im + x02a_im;
                   x02b_im = x00a_im - x02a_im;
                   x05a_re = t01_re - t05_re;
                   x01a_re = t01_re + t05_re;
                   x03a_re = t03_re + t07_re;
                   x07a_re = t03_re - t07_re;
                   x01b_re = x01a_re + x03a_re;
                   x03b_re = x01a_re - x03a_re;
                   x05a_im = t01_im - t05_im;
                   x01a_im = t01_im + t05_im;
                   x03a_im = t03_im + t07_im;
                   x07a_im = t03_im - t07_im;
                   x01b_im = x01a_im + x03a_im;
                   x03b_im = x01a_im - x03a_im;
                   y[0].re       = x00b_re + x01b_re;                          
                   y[0].im       = x00b_im + x01b_im;
                   y[ospan*4].re = x00b_re - x01b_re;       
                   y[ospan*4].im = x00b_im - x01b_im;       
                   y[ospan*2].re = x02b_re + x03b_im;      
                   y[ospan*2].im = x02b_im - x03b_re;      
                   y[ospan*6].re = x02b_re - x03b_im;      
                   y[ospan*6].im = x02b_im + x03b_re;      
                   x07b_re = x05a_re + x07a_re;
                   x05b_re = x05a_re - x07a_re;
                   x07b_im = x05a_im + x07a_im;
                   x05b_im = x05a_im - x07a_im;
                   x05c_re = s1*x05b_re;
                   x04b_re = x04a_re + x05c_re;
                   x05d_re = x04a_re - x05c_re;
                   x05c_im = s1*x05b_im;
                   x04b_im = x04a_im + x05c_im;
                   x05d_im = x04a_im - x05c_im;
                   x07c_re = s1*x07b_im;
                   x06b_re = x06a_im + x07c_re;
                   x07d_re = x06a_im - x07c_re;
                   x07c_im = s1*x07b_re;
                   x06b_im = x06a_re + x07c_im;
                   x07d_im = x07c_im - x06a_re;
                   y[ospan*1].re = x04b_re + x06b_re;
                   y[ospan*1].im = x04b_im - x06b_im;
                   y[ospan*7].re = x04b_re - x06b_re;
                   y[ospan*7].im = x04b_im + x06b_im;
                   y[ospan*3].re = x05d_re - x07d_re;
                   y[ospan*3].im = x05d_im - x07d_im;
                   y[ospan*5].re = x05d_re + x07d_re;
                   y[ospan*5].im = x05d_im + x07d_im;
            
                   x++;   
                   y++; 
                }
                x += istep;       
                w += ispan;
            }
	}
	else {
            for(l=1; l<=ispan; l++) {
            /* The fundamental forward pass RADIX 8 kernel */  
               t00_re = x[0].re;
               t00_im = x[0].im;
               t01_re = x[ispan*1].re;
               t01_im = x[ispan*1].im;
               t02_re = x[ispan*2].re;
               t02_im = x[ispan*2].im;
               t03_re = x[ispan*3].re;
               t03_im = x[ispan*3].im;
               t04_re = x[ispan*4].re;
               t04_im = x[ispan*4].im;
               t05_re = x[ispan*5].re;
               t05_im = x[ispan*5].im;
               t06_re = x[ispan*6].re;
               t06_im = x[ispan*6].im;
               t07_re = x[ispan*7].re;
               t07_im = x[ispan*7].im;
               x04a_re = t00_re - t04_re;
               x00a_re = t00_re + t04_re;
               x02a_re = t02_re + t06_re;
               x06a_re = t02_re - t06_re;
               x00b_re = x00a_re + x02a_re;
               x02b_re = x00a_re - x02a_re;
               x04a_im = t00_im - t04_im;
               x00a_im = t00_im + t04_im;
               x02a_im = t02_im + t06_im;
               x06a_im = t02_im - t06_im;
               x00b_im = x00a_im + x02a_im;
               x02b_im = x00a_im - x02a_im;
               x05a_re = t01_re - t05_re;
               x01a_re = t01_re + t05_re;
               x03a_re = t03_re + t07_re;
               x07a_re = t03_re - t07_re;
               x01b_re = x01a_re + x03a_re;
               x03b_re = x01a_re - x03a_re;
               x05a_im = t01_im - t05_im;
               x01a_im = t01_im + t05_im;
               x03a_im = t03_im + t07_im;
               x07a_im = t03_im - t07_im;
               x01b_im = x01a_im + x03a_im;
               x03b_im = x01a_im - x03a_im;
               y[0].re       = x00b_re + x01b_re;                          
               y[0].im       = x00b_im + x01b_im;
               y[ospan*4].re = x00b_re - x01b_re;       
               y[ospan*4].im = x00b_im - x01b_im;       
               y[ospan*2].re = x02b_re + x03b_im;      
               y[ospan*2].im = x02b_im - x03b_re;      
               y[ospan*6].re = x02b_re - x03b_im;      
               y[ospan*6].im = x02b_im + x03b_re;      
               x07b_re = x05a_re + x07a_re;
               x05b_re = x05a_re - x07a_re;
               x07b_im = x05a_im + x07a_im;
               x05b_im = x05a_im - x07a_im;
               x05c_re = s1*x05b_re;
               x04b_re = x04a_re + x05c_re;
               x05d_re = x04a_re - x05c_re;
               x05c_im = s1*x05b_im;
               x04b_im = x04a_im + x05c_im;
               x05d_im = x04a_im - x05c_im;
               x07c_re = s1*x07b_im;
               x06b_re = x06a_im + x07c_re;
               x07d_re = x06a_im - x07c_re;
               x07c_im = s1*x07b_re;
               x06b_im = x06a_re + x07c_im;
               x07d_im = x07c_im - x06a_re;
               y[ospan*1].re = x04b_re + x06b_re;
               y[ospan*1].im = x04b_im - x06b_im;
               y[ospan*7].re = x04b_re - x06b_re;
               y[ospan*7].im = x04b_im + x06b_im;
               y[ospan*3].re = x05d_re - x07d_re;
               y[ospan*3].im = x05d_im - x07d_im;
               y[ospan*5].re = x05d_re + x07d_re;
               y[ospan*5].im = x05d_im + x07d_im;
    
               x++;  
               y++;   
            }
	}
	break;

      default:                                         /* Inverse Pass */
        if (ispan != ospan) {
            for(k=0; k<=m; k++) { 
                tspan  = k*ispan;
                o00_re = w[0].re;        o00_im = w[0].im;
                o01_re = w[tspan*1].re;  o01_im = w[tspan*1].im;
                o02_re = w[tspan*2].re;  o02_im = w[tspan*2].im;
                o03_re = w[tspan*3].re;  o03_im = w[tspan*3].im;
                o04_re = w[tspan*4].re;  o04_im = w[tspan*4].im;
                o05_re = w[tspan*5].re;  o05_im = w[tspan*5].im;
                o06_re = w[tspan*6].re;  o06_im = w[tspan*6].im;
  
                for(l=1; l<=ispan; l++) {
                /* The fundamental inverse pass RADIX 8 kernel */  
                   t00_re  = x[0].re;
                   t00_im  = x[0].im;
                   xreg_re = x[ispan*1].re;
                   xreg_im = x[ispan*1].im;
                   t01_re  = o00_re*xreg_re - o00_im*xreg_im;
                   t01_im  = o00_re*xreg_im + o00_im*xreg_re;
                   xreg_re = x[ispan*2].re;
                   xreg_im = x[ispan*2].im;
                   t02_re  = o01_re*xreg_re - o01_im*xreg_im;
                   t02_im  = o01_re*xreg_im + o01_im*xreg_re;
                   xreg_re = x[ispan*3].re;
                   xreg_im = x[ispan*3].im;
                   t03_re  = o02_re*xreg_re - o02_im*xreg_im;
                   t03_im  = o02_re*xreg_im + o02_im*xreg_re;
                   xreg_re = x[ispan*4].re;
                   xreg_im = x[ispan*4].im;
                   t04_re  = o03_re*xreg_re - o03_im*xreg_im;
                   t04_im  = o03_re*xreg_im + o03_im*xreg_re;
                   xreg_re = x[ispan*5].re;
                   xreg_im = x[ispan*5].im;
                   t05_re  = o04_re*xreg_re - o04_im*xreg_im;
                   t05_im  = o04_re*xreg_im + o04_im*xreg_re;
                   xreg_re = x[ispan*6].re;
                   xreg_im = x[ispan*6].im;
                   t06_re  = o05_re*xreg_re - o05_im*xreg_im;
                   t06_im  = o05_re*xreg_im + o05_im*xreg_re;
                   xreg_re = x[ispan*7].re;
                   xreg_im = x[ispan*7].im;
                   t07_re  = o06_re*xreg_re - o06_im*xreg_im;
                   t07_im  = o06_re*xreg_im + o06_im*xreg_re;
                   x04a_re = t00_re - t04_re;
                   x00a_re = t00_re + t04_re;
                   x02a_re = t02_re + t06_re;
                   x06a_re = t02_re - t06_re;
                   x00b_re = x00a_re + x02a_re;
                   x02b_re = x00a_re - x02a_re;
                   x04a_im = t00_im - t04_im;
                   x00a_im = t00_im + t04_im;
                   x02a_im = t02_im + t06_im;
                   x06a_im = t02_im - t06_im;
                   x00b_im = x00a_im + x02a_im;
                   x02b_im = x00a_im - x02a_im;
                   x05a_re = t01_re - t05_re;
                   x01a_re = t01_re + t05_re;
                   x03a_re = t03_re + t07_re;
                   x07a_re = t03_re - t07_re;
                   x01b_re = x01a_re + x03a_re;
                   x03b_re = x01a_re - x03a_re;
                   x05a_im = t01_im - t05_im;
                   x01a_im = t01_im + t05_im;
                   x03a_im = t03_im + t07_im;
                   x07a_im = t03_im - t07_im;
                   x01b_im = x01a_im + x03a_im;
                   x03b_im = x01a_im - x03a_im;
                   y[0].re       = x00b_re + x01b_re;
                   y[0].im       = x00b_im + x01b_im;
                   y[ospan*4].re = x00b_re - x01b_re;
                   y[ospan*4].im = x00b_im - x01b_im;
                   y[ospan*2].re = x02b_re - x03b_im;
                   y[ospan*2].im = x02b_im + x03b_re;
                   y[ospan*6].re = x02b_re + x03b_im;
                   y[ospan*6].im = x02b_im - x03b_re;
                   x07b_re = x05a_re + x07a_re;
                   x05b_re = x05a_re - x07a_re;
                   x07b_im = x05a_im + x07a_im;
                   x05b_im = x05a_im - x07a_im;
                   x05c_re = s1*x05b_re;
                   x04b_re = x04a_re + x05c_re;
                   x05d_re = x04a_re - x05c_re;
                   x05c_im = s1*x05b_im;
                   x04b_im = x04a_im + x05c_im;
                   x05d_im = x04a_im - x05c_im;
                   x07c_re = s1*x07b_im;
                   x06b_re = x06a_im + x07c_re;
                   x07d_re = x06a_im - x07c_re;
                   x07c_im = s1*x07b_re;
                   x06b_im = x06a_re + x07c_im;
                   x07d_im = x07c_im - x06a_re;
                   y[ospan*1].re = x04b_re - x06b_re;
                   y[ospan*1].im = x04b_im + x06b_im;
                   y[ospan*7].re = x04b_re + x06b_re;
                   y[ospan*7].im = x04b_im - x06b_im;
                   y[ospan*3].re = x05d_re + x07d_re;
                   y[ospan*3].im = x05d_im + x07d_im;
                   y[ospan*5].re = x05d_re - x07d_re;
                   y[ospan*5].im = x05d_im - x07d_im;

                   x++;   
                   y++; 
                }
                x += istep;       
                w += ispan;
            }
	}
	else {
            for(l=1; l<=ispan; l++) {
            /* The fundamental inverse pass RADIX 8 kernel */  
               t00_re = x[0].re;
               t00_im = x[0].im;
               t01_re = x[ispan*1].re;
               t01_im = x[ispan*1].im;
               t02_re = x[ispan*2].re;
               t02_im = x[ispan*2].im;
               t03_re = x[ispan*3].re;
               t03_im = x[ispan*3].im;
               t04_re = x[ispan*4].re;
               t04_im = x[ispan*4].im;
               t05_re = x[ispan*5].re;
               t05_im = x[ispan*5].im;
               t06_re = x[ispan*6].re;
               t06_im = x[ispan*6].im;
               t07_re = x[ispan*7].re;
               t07_im = x[ispan*7].im;
               x04a_re = t00_re - t04_re;
               x00a_re = t00_re + t04_re;
               x02a_re = t02_re + t06_re;
               x06a_re = t02_re - t06_re;
               x00b_re = x00a_re + x02a_re;
               x02b_re = x00a_re - x02a_re;
               x04a_im = t00_im - t04_im;
               x00a_im = t00_im + t04_im;
               x02a_im = t02_im + t06_im;
               x06a_im = t02_im - t06_im;
               x00b_im = x00a_im + x02a_im;
               x02b_im = x00a_im - x02a_im;
               x05a_re = t01_re - t05_re;
               x01a_re = t01_re + t05_re;
               x03a_re = t03_re + t07_re;
               x07a_re = t03_re - t07_re;
               x01b_re = x01a_re + x03a_re;
               x03b_re = x01a_re - x03a_re;
               x05a_im = t01_im - t05_im;
               x01a_im = t01_im + t05_im;
               x03a_im = t03_im + t07_im;
               x07a_im = t03_im - t07_im;
               x01b_im = x01a_im + x03a_im;
               x03b_im = x01a_im - x03a_im;
               y[0].re       = x00b_re + x01b_re;       
               y[0].im       = x00b_im + x01b_im;       
               y[ospan*4].re = x00b_re - x01b_re;                          
               y[ospan*4].im = x00b_im - x01b_im;
               y[ospan*2].re = x02b_re - x03b_im;      
               y[ospan*2].im = x02b_im + x03b_re;      
               y[ospan*6].re = x02b_re + x03b_im;      
               y[ospan*6].im = x02b_im - x03b_re;      
               x07b_re = x05a_re + x07a_re;
               x05b_re = x05a_re - x07a_re;
               x07b_im = x05a_im + x07a_im;
               x05b_im = x05a_im - x07a_im;
               x05c_re = s1*x05b_re;
               x04b_re = x04a_re + x05c_re;
               x05d_re = x04a_re - x05c_re;
               x05c_im = s1*x05b_im;
               x04b_im = x04a_im + x05c_im;
               x05d_im = x04a_im - x05c_im;
               x07c_re = s1*x07b_im;
               x06b_re = x06a_im + x07c_re;
               x07d_re = x06a_im - x07c_re;
               x07c_im = s1*x07b_re;
               x06b_im = x06a_re + x07c_im;
               x07d_im = x07c_im - x06a_re;
               y[ospan*1].re = x04b_re - x06b_re;
               y[ospan*1].im = x04b_im + x06b_im;
               y[ospan*7].re = x04b_re + x06b_re;
               y[ospan*7].im = x04b_im - x06b_im;
               y[ospan*3].re = x05d_re + x07d_re;
               y[ospan*3].im = x05d_im + x07d_im;
               y[ospan*5].re = x05d_re - x07d_re;
               y[ospan*5].im = x05d_im - x07d_im;

               x++;  
               y++;   
            }
	}
	break;
    }	

    return;
}

/*-----------------------------------------------------------------------*/

void radixpass_16(const fft_real *a, fft_real *c, const fft_real *t, 
                   fft_int n, fft_int la, fft_int dirflag)
{
/* Constants */
const int RADIXSIZE = (int) 16;

const fft_real s1 = 
                  FFT_CONST(0.707106781186547524400844362104849039284835938689);
const fft_real s2 = 
                  FFT_CONST(1.414213562373095048801688724209698078569671875377);
const fft_real s3 = 
                  FFT_CONST(0.923879532511286756128183189396788286822416626864);
const fft_real s4 = 
                  FFT_CONST(0.382683432365089771728459984030398866761344562486);

/* Local variables */
fft_int rtnjump;
fft_int k,l,m;                     
fft_int istep,ispan,ospan,tspan;
fft_real o00_re,o01_re,o02_re,o03_re,o04_re,o05_re,o06_re,o07_re;
fft_real o00_im,o01_im,o02_im,o03_im,o04_im,o05_im,o06_im,o07_im;
fft_real o08_re,o09_re,o10_re,o11_re,o12_re,o13_re,o14_re;
fft_real o08_im,o09_im,o10_im,o11_im,o12_im,o13_im,o14_im;
fft_real t00_re,t01_re,t02_re,t03_re,t04_re,t05_re,t06_re,t07_re;
fft_real t00_im,t01_im,t02_im,t03_im,t04_im,t05_im,t06_im,t07_im;
fft_real t08_re,t09_re,t10_re,t11_re,t12_re,t13_re,t14_re,t15_re;
fft_real t08_im,t09_im,t10_im,t11_im,t12_im,t13_im,t14_im,t15_im;
fft_real x00a_re,x00a_im,x00b_re,x00b_im,x00c_re,x00c_im;
fft_real x01a_re,x01a_im,x01b_re,x01b_im,x01c_re,x01c_im;
fft_real x02a_re,x02a_im,x02b_re,x02b_im,x02c_re,x02c_im;
fft_real x03a_re,x03a_im,x03b_re,x03b_im,x03c_re,x03c_im;
fft_real x04a_re,x04a_im,x04b_re,x04b_im,x04c_re,x04c_im;
fft_real x05a_re,x05a_im,x05b_re,x05b_im,x05c_re,x05c_im;
fft_real x05d_re,x05d_im,x05e_re,x05e_im;
fft_real x06a_re,x06a_im,x06b_re,x06b_im,x06c_re,x06c_im;
fft_real x07a_re,x07a_im,x07b_re,x07b_im,x07c_re,x07c_im;
fft_real x07d_re,x07d_im,x07e_re,x07e_im;
fft_real x08a_re,x08a_im,x08b_re,x08b_im,x08c_re,x08c_im;
fft_real x09a_re,x09a_im,x09b_re,x09b_im,x09c_re,x09c_im;
fft_real x09d_re,x09d_im,x09e_re,x09e_im,x09f_re,x09f_im;
fft_real x10a_re,x10a_im,x10b_re,x10b_im,x10c_re,x10c_im;
fft_real x10d_re,x10d_im,x10e_re,x10e_im;
fft_real x11a_re,x11a_im,x11b_re,x11b_im,x11c_re,x11c_im;
fft_real x11d_re,x11d_im,x11e_re,x11e_im,x11f_re,x11f_im;
fft_real x12a_re,x12a_im,x12b_re,x12b_im,x12c_re,x12c_im;
fft_real x13a_re,x13a_im,x13b_re,x13b_im,x13c_re,x13c_im;
fft_real x13d_re,x13d_im,x13e_re,x13e_im;
fft_real x14a_re,x14a_im,x14b_re,x14b_im,x14c_re,x14c_im;
fft_real x14d_re,x14d_im,x14e_re,x14e_im;
fft_real x15a_re,x15a_im,x15b_re,x15b_im,x15c_re,x15c_im;
fft_real x15d_re,x15d_im,x15e_re,x15e_im,x15f_re,x15f_im;
fft_cmplx *x, *y, *w;

 /* Verify parameters */
    if(n < RADIXSIZE || la < 1) return;     
  
 /* Define local arrays */
    x = (fft_cmplx *) a;                /* Input data    */
    y = (fft_cmplx *) c;                /* Output data   */
    w = (fft_cmplx *) t;                /* Twiddle table */

 /* Initialize pointers */
    ospan = n/RADIXSIZE;
    ispan = la;
    m     = (ospan/ispan) - 1;
    istep = ispan*(RADIXSIZE-1);
    tspan = 0;

 /* Process the data */
    switch (dirflag) {
      case FFT_FLAGS_FORWARD:                            /* Forward Pass */
        if (ispan != ospan) {
            rtnjump = 0;
            for(k=0; k<=m; k++) { 
                tspan  = k*ispan;
                o00_re = w[0].re;         o00_im = w[0].im;
                o01_re = w[tspan*1].re;   o01_im = w[tspan*1].im;
                o02_re = w[tspan*2].re;   o02_im = w[tspan*2].im;
                o03_re = w[tspan*3].re;   o03_im = w[tspan*3].im;
                o04_re = w[tspan*4].re;   o04_im = w[tspan*4].im;
                o05_re = w[tspan*5].re;   o05_im = w[tspan*5].im;
                o06_re = w[tspan*6].re;   o06_im = w[tspan*6].im;
                o07_re = w[tspan*7].re;   o07_im = w[tspan*7].im;
                o08_re = w[tspan*8].re;   o08_im = w[tspan*8].im;
                o09_re = w[tspan*9].re;   o09_im = w[tspan*9].im;
                o10_re = w[tspan*10].re;  o10_im = w[tspan*10].im;
                o11_re = w[tspan*11].re;  o11_im = w[tspan*11].im;
                o12_re = w[tspan*12].re;  o12_im = w[tspan*12].im;
                o13_re = w[tspan*13].re;  o13_im = w[tspan*13].im;
                o14_re = w[tspan*14].re;  o14_im = w[tspan*14].im;
  
                for(l=1; l<=ispan; l++) {
                   t00_re = x[0].re;
                   t00_im = x[0].im;
                   t01_re = o00_re*x[ispan*1].re  + o00_im*x[ispan*1].im;
                   t01_im = o00_re*x[ispan*1].im  - o00_im*x[ispan*1].re;
                   t02_re = o01_re*x[ispan*2].re  + o01_im*x[ispan*2].im;
                   t02_im = o01_re*x[ispan*2].im  - o01_im*x[ispan*2].re;
                   t03_re = o02_re*x[ispan*3].re  + o02_im*x[ispan*3].im;
                   t03_im = o02_re*x[ispan*3].im  - o02_im*x[ispan*3].re;
                   t04_re = o03_re*x[ispan*4].re  + o03_im*x[ispan*4].im;
                   t04_im = o03_re*x[ispan*4].im  - o03_im*x[ispan*4].re;
                   t05_re = o04_re*x[ispan*5].re  + o04_im*x[ispan*5].im;
                   t05_im = o04_re*x[ispan*5].im  - o04_im*x[ispan*5].re;
                   t06_re = o05_re*x[ispan*6].re  + o05_im*x[ispan*6].im;
                   t06_im = o05_re*x[ispan*6].im  - o05_im*x[ispan*6].re;
                   t07_re = o06_re*x[ispan*7].re  + o06_im*x[ispan*7].im;
                   t07_im = o06_re*x[ispan*7].im  - o06_im*x[ispan*7].re;
                   t08_re = o07_re*x[ispan*8].re  + o07_im*x[ispan*8].im;
                   t08_im = o07_re*x[ispan*8].im  - o07_im*x[ispan*8].re;
                   t09_re = o08_re*x[ispan*9].re  + o08_im*x[ispan*9].im;
                   t09_im = o08_re*x[ispan*9].im  - o08_im*x[ispan*9].re;
                   t10_re = o09_re*x[ispan*10].re + o09_im*x[ispan*10].im;
                   t10_im = o09_re*x[ispan*10].im - o09_im*x[ispan*10].re;
                   t11_re = o10_re*x[ispan*11].re + o10_im*x[ispan*11].im;
                   t11_im = o10_re*x[ispan*11].im - o10_im*x[ispan*11].re;
                   t12_re = o11_re*x[ispan*12].re + o11_im*x[ispan*12].im;
                   t12_im = o11_re*x[ispan*12].im - o11_im*x[ispan*12].re;
                   t13_re = o12_re*x[ispan*13].re + o12_im*x[ispan*13].im;
                   t13_im = o12_re*x[ispan*13].im - o12_im*x[ispan*13].re;
                   t14_re = o13_re*x[ispan*14].re + o13_im*x[ispan*14].im;
                   t14_im = o13_re*x[ispan*14].im - o13_im*x[ispan*14].re;
                   t15_re = o14_re*x[ispan*15].re + o14_im*x[ispan*15].im;
                   t15_im = o14_re*x[ispan*15].im - o14_im*x[ispan*15].re;

                   goto DO_RADIX16F_KERNEL;
                   RTN_RADIX16F_KERNEL_0:

                   x++;   
                   y++; 
                }
                x += istep;       
                w += ispan;
            }
	}
	else {
            rtnjump = 1;
            for(l=1; l<=ispan; l++) {
                   t00_re = x[0].re;
                   t00_im = x[0].im;
                   t01_re = x[ispan*1].re;
                   t01_im = x[ispan*1].im;
                   t02_re = x[ispan*2].re;
                   t02_im = x[ispan*2].im;
                   t03_re = x[ispan*3].re;
                   t03_im = x[ispan*3].im;
                   t04_re = x[ispan*4].re;
                   t04_im = x[ispan*4].im;
                   t05_re = x[ispan*5].re;
                   t05_im = x[ispan*5].im;
                   t06_re = x[ispan*6].re;
                   t06_im = x[ispan*6].im;
                   t07_re = x[ispan*7].re;
                   t07_im = x[ispan*7].im;
                   t08_re = x[ispan*8].re;
                   t08_im = x[ispan*8].im;
                   t09_re = x[ispan*9].re;
                   t09_im = x[ispan*9].im;
                   t10_re = x[ispan*10].re;
                   t10_im = x[ispan*10].im;
                   t11_re = x[ispan*11].re;
                   t11_im = x[ispan*11].im;
                   t12_re = x[ispan*12].re;
                   t12_im = x[ispan*12].im;
                   t13_re = x[ispan*13].re;
                   t13_im = x[ispan*13].im;
                   t14_re = x[ispan*14].re;
                   t14_im = x[ispan*14].im;
                   t15_re = x[ispan*15].re;
                   t15_im = x[ispan*15].im;

               goto DO_RADIX16F_KERNEL;
               RTN_RADIX16F_KERNEL_1:

               x++;  
               y++;   
            }
	}

	break;

      default:                                         /* Inverse Pass */
        if (ispan != ospan) {
            rtnjump = 0;
            for(k=0; k<=m; k++) { 
                tspan  = k*ispan;
                o00_re = w[0].re;         o00_im = w[0].im;
                o01_re = w[tspan*1].re;   o01_im = w[tspan*1].im;
                o02_re = w[tspan*2].re;   o02_im = w[tspan*2].im;
                o03_re = w[tspan*3].re;   o03_im = w[tspan*3].im;
                o04_re = w[tspan*4].re;   o04_im = w[tspan*4].im;
                o05_re = w[tspan*5].re;   o05_im = w[tspan*5].im;
                o06_re = w[tspan*6].re;   o06_im = w[tspan*6].im;
                o07_re = w[tspan*7].re;   o07_im = w[tspan*7].im;
                o08_re = w[tspan*8].re;   o08_im = w[tspan*8].im;
                o09_re = w[tspan*9].re;   o09_im = w[tspan*9].im;
                o10_re = w[tspan*10].re;  o10_im = w[tspan*10].im;
                o11_re = w[tspan*11].re;  o11_im = w[tspan*11].im;
                o12_re = w[tspan*12].re;  o12_im = w[tspan*12].im;
                o13_re = w[tspan*13].re;  o13_im = w[tspan*13].im;
                o14_re = w[tspan*14].re;  o14_im = w[tspan*14].im;
  
                for(l=1; l<=ispan; l++) {
                   t00_re = x[0].re;
                   t00_im = x[0].im;
                   t01_re = o00_re*x[ispan*1].re  - o00_im*x[ispan*1].im;
                   t01_im = o00_re*x[ispan*1].im  + o00_im*x[ispan*1].re;
                   t02_re = o01_re*x[ispan*2].re  - o01_im*x[ispan*2].im;
                   t02_im = o01_re*x[ispan*2].im  + o01_im*x[ispan*2].re;
                   t03_re = o02_re*x[ispan*3].re  - o02_im*x[ispan*3].im;
                   t03_im = o02_re*x[ispan*3].im  + o02_im*x[ispan*3].re;
                   t04_re = o03_re*x[ispan*4].re  - o03_im*x[ispan*4].im;
                   t04_im = o03_re*x[ispan*4].im  + o03_im*x[ispan*4].re;
                   t05_re = o04_re*x[ispan*5].re  - o04_im*x[ispan*5].im;
                   t05_im = o04_re*x[ispan*5].im  + o04_im*x[ispan*5].re;
                   t06_re = o05_re*x[ispan*6].re  - o05_im*x[ispan*6].im;
                   t06_im = o05_re*x[ispan*6].im  + o05_im*x[ispan*6].re;
                   t07_re = o06_re*x[ispan*7].re  - o06_im*x[ispan*7].im;
                   t07_im = o06_re*x[ispan*7].im  + o06_im*x[ispan*7].re;
                   t08_re = o07_re*x[ispan*8].re  - o07_im*x[ispan*8].im;
                   t08_im = o07_re*x[ispan*8].im  + o07_im*x[ispan*8].re;
                   t09_re = o08_re*x[ispan*9].re  - o08_im*x[ispan*9].im;
                   t09_im = o08_re*x[ispan*9].im  + o08_im*x[ispan*9].re;
                   t10_re = o09_re*x[ispan*10].re - o09_im*x[ispan*10].im;
                   t10_im = o09_re*x[ispan*10].im + o09_im*x[ispan*10].re;
                   t11_re = o10_re*x[ispan*11].re - o10_im*x[ispan*11].im;
                   t11_im = o10_re*x[ispan*11].im + o10_im*x[ispan*11].re;
                   t12_re = o11_re*x[ispan*12].re - o11_im*x[ispan*12].im;
                   t12_im = o11_re*x[ispan*12].im + o11_im*x[ispan*12].re;
                   t13_re = o12_re*x[ispan*13].re - o12_im*x[ispan*13].im;
                   t13_im = o12_re*x[ispan*13].im + o12_im*x[ispan*13].re;
                   t14_re = o13_re*x[ispan*14].re - o13_im*x[ispan*14].im;
                   t14_im = o13_re*x[ispan*14].im + o13_im*x[ispan*14].re;
                   t15_re = o14_re*x[ispan*15].re - o14_im*x[ispan*15].im;
                   t15_im = o14_re*x[ispan*15].im + o14_im*x[ispan*15].re;

                   goto DO_RADIX16I_KERNEL;
                   RTN_RADIX16I_KERNEL_0:

                   x++;   
                   y++; 
                }
                x += istep;       
                w += ispan;
            }
	}
	else {
            rtnjump = 1;
            for(l=1; l<=ispan; l++) {
                   t00_re = x[0].re;
                   t00_im = x[0].im;
                   t01_re = x[ispan*1].re;
                   t01_im = x[ispan*1].im;
                   t02_re = x[ispan*2].re;
                   t02_im = x[ispan*2].im;
                   t03_re = x[ispan*3].re;
                   t03_im = x[ispan*3].im;
                   t04_re = x[ispan*4].re;
                   t04_im = x[ispan*4].im;
                   t05_re = x[ispan*5].re;
                   t05_im = x[ispan*5].im;
                   t06_re = x[ispan*6].re;
                   t06_im = x[ispan*6].im;
                   t07_re = x[ispan*7].re;
                   t07_im = x[ispan*7].im;
                   t08_re = x[ispan*8].re;
                   t08_im = x[ispan*8].im;
                   t09_re = x[ispan*9].re;
                   t09_im = x[ispan*9].im;
                   t10_re = x[ispan*10].re;
                   t10_im = x[ispan*10].im;
                   t11_re = x[ispan*11].re;
                   t11_im = x[ispan*11].im;
                   t12_re = x[ispan*12].re;
                   t12_im = x[ispan*12].im;
                   t13_re = x[ispan*13].re;
                   t13_im = x[ispan*13].im;
                   t14_re = x[ispan*14].re;
                   t14_im = x[ispan*14].im;
                   t15_re = x[ispan*15].re;
                   t15_im = x[ispan*15].im;

               goto DO_RADIX16I_KERNEL;
               RTN_RADIX16I_KERNEL_1:

               x++;  
               y++;   
            }
	}

	break;
    }	

 /* Skip Kernel Code */
    goto RADIX16_RETURN;

    {
     /* The fundamental forward pass RADIX 16 kernel */  
        DO_RADIX16F_KERNEL:
                   x00c_re = t00_re + t08_re;
                   x08c_re = t00_re - t08_re;
                   x00c_im = t00_im + t08_im;
                   x08c_im = t00_im - t08_im;
                   x02c_re = t02_re + t10_re;
                   x10e_re = t02_re - t10_re;
                   x02c_im = t02_im + t10_im;
                   x10e_im = t02_im - t10_im;
                   x04c_re = t04_re + t12_re;
                   x12c_re = t04_re - t12_re;
                   x04c_im = t04_im + t12_im;
                   x12c_im = t04_im - t12_im;
                   x06c_re = t06_re + t14_re;
                   x14e_re = t06_re - t14_re;
                   x06c_im = t06_im + t14_im;
                   x14e_im = t06_im - t14_im;
                   x00b_re = x00c_re + x04c_re;
                   x04b_re = x00c_re - x04c_re;
                   x02b_re = x02c_re + x06c_re;
                   x06b_re = x02c_re - x06c_re;
                   x00a_re = x00b_re + x02b_re;
                   x02a_re = x00b_re - x02b_re;
                   x00b_im = x00c_im + x04c_im;
                   x04b_im = x00c_im - x04c_im;
                   x02b_im = x02c_im + x06c_im;
                   x06b_im = x02c_im - x06c_im;
                   x00a_im = x00b_im + x02b_im;
                   x02a_im = x00b_im - x02b_im;
                   x01c_re = t01_re + t09_re;
                   x09f_re = t01_re - t09_re;
                   x01c_im = t01_im + t09_im;
                   x09f_im = t01_im - t09_im;
                   x03c_re = t03_re + t11_re;
                   x11f_re = t03_re - t11_re;
                   x03c_im = t03_im + t11_im;
                   x11f_im = t03_im - t11_im;
                   x05e_re = t05_re + t13_re;
                   x13e_re = t05_re - t13_re;
                   x05e_im = t05_im + t13_im;
                   x13e_im = t05_im - t13_im;
                   x07e_re = t07_re + t15_re;
                   x15f_re = t07_re - t15_re;
                   x07e_im = t07_im + t15_im;
                   x15f_im = t07_im - t15_im;
                   x01b_re = x01c_re + x05e_re;
                   x05d_re = x01c_re - x05e_re;
                   x03b_re = x03c_re + x07e_re;
                   x07d_re = x03c_re - x07e_re;
                   x01a_re = x01b_re + x03b_re;
                   x03a_re = x01b_re - x03b_re;
                   x01b_im = x01c_im + x05e_im;
                   x05d_im = x01c_im - x05e_im;
                   x03b_im = x03c_im + x07e_im;
                   x07d_im = x03c_im - x07e_im;
                   x01a_im = x01b_im + x03b_im;
                   x03a_im = x01b_im - x03b_im;

                   y[0*ospan].re  = x00a_re + x01a_re;
                   y[0*ospan].im  = x00a_im + x01a_im;
                   y[8*ospan].re  = x00a_re - x01a_re;
                   y[8*ospan].im  = x00a_im - x01a_im;
                   y[4*ospan].re  = x02a_re + x03a_im;
                   y[4*ospan].im  = x02a_im - x03a_re;
                   y[12*ospan].re = x02a_re - x03a_im;
                   y[12*ospan].im = x02a_im + x03a_re;

                   x07c_re = x05d_re + x07d_re;
                   x05c_re = x05d_re - x07d_re;
                   x05b_re = s1*x05c_re;
                   x04a_re = x04b_re + x05b_re;
                   x05a_re = x04b_re - x05b_re;
                   x07b_re = s1*x07c_re;
                   x06a_im = x06b_re + x07b_re;
                   x07a_im = x06b_re - x07b_re;
                   x07c_im = x05d_im + x07d_im;
                   x05c_im = x05d_im - x07d_im;
                   x05b_im = s1*x05c_im;
                   x04a_im = x04b_im + x05b_im;
                   x05a_im = x04b_im - x05b_im;
                   x07b_im = s1*x07c_im;
                   x06a_re = x06b_im + x07b_im;
                   x07a_re = x06b_im - x07b_im;

                   y[2*ospan].re  = x04a_re + x06a_re;
                   y[2*ospan].im  = x04a_im - x06a_im;
                   y[14*ospan].re = x04a_re - x06a_re;
                   y[14*ospan].im = x04a_im + x06a_im;
                   y[6*ospan].re  = x05a_re - x07a_re;
                   y[6*ospan].im  = x05a_im + x07a_im;
                   y[10*ospan].re = x05a_re + x07a_re;
                   y[10*ospan].im = x05a_im - x07a_im;

                   x09e_re = x09f_re - x15f_re;
                   x15e_re = x09f_re + x15f_re;
                   x09e_im = x09f_im - x15f_im;
                   x15e_im = x09f_im + x15f_im;
                   x11e_re = x11f_re - x13e_re;
                   x13d_re = x11f_re + x13e_re;
                   x13d_re = x13d_re - x15e_re;
                   x15d_re = s2*x15e_re;
                   x13c_re = x13d_re + x15d_re;
                   x15c_re = x13d_re - x15d_re;
                   x11e_im = x11f_im - x13e_im;
                   x13d_im = x11f_im + x13e_im;
                   x13d_im = x13d_im - x15e_im;
                   x15d_im = s2*x15e_im;
                   x13c_im = x13d_im + x15d_im;
                   x15c_im = x13d_im - x15d_im;
                   x14d_re = x10e_re + x14e_re;
                   x10d_re = x10e_re - x14e_re;
                   x10c_re = s1*x10d_re;
                   x08b_re = x08c_re + x10c_re;
                   x10b_re = x08c_re - x10c_re;
                   x14c_re = s1*x14d_re;
                   x14d_im = x10e_im + x14e_im;
                   x10d_im = x10e_im - x14e_im;
                   x10c_im = s1*x10d_im;
                   x08b_im = x08c_im + x10c_im;
                   x10b_im = x08c_im - x10c_im;
                   x14c_im = s1*x14d_im;
                   x12b_im = -x12c_re - x14c_re;
                   x14b_im = -x12c_re + x14c_re;
                   x12b_re = x12c_im + x14c_im;
                   x14b_re = x12c_im - x14c_im;
                   x09d_re = x09e_re - x11e_re;
                   x11d_re = s2*x11e_re;
                   x09c_re = x09d_re + x11d_re;
                   x11c_re = x09d_re - x11d_re;
                   x09b_re = s3*x09c_re;
                   x08a_re = x08b_re + x09b_re;
                   x09a_re = x08b_re - x09b_re;
                   x09d_im = x09e_im - x11e_im;
                   x11d_im = s2*x11e_im;
                   x09c_im = x09d_im + x11d_im;
                   x11c_im = x09d_im - x11d_im;
                   x09b_im = s3*x09c_im;
                   x08a_im = x08b_im + x09b_im;
                   x09a_im = x08b_im - x09b_im;
                   x13b_im = s3*x13c_im;
                   x12a_re = x12b_re + x13b_im;
                   x13a_re = x12b_re - x13b_im;
                   x13b_re = s3*x13c_re;
                   x12a_im = x12b_im - x13b_re;
                   x13a_im = x12b_im + x13b_re;

                   y[1*ospan].re  = x08a_re + x12a_re;
                   y[1*ospan].im  = x08a_im + x12a_im;
                   y[15*ospan].re = x08a_re - x12a_re;
                   y[15*ospan].im = x08a_im - x12a_im;
                   y[7*ospan].re  = x09a_re - x13a_re;
                   y[7*ospan].im  = x09a_im - x13a_im;
                   y[9*ospan].re  = x09a_re + x13a_re;
                   y[9*ospan].im  = x09a_im + x13a_im;

                   x11b_re = s4*x11c_re;
                   x10a_re = x10b_re + x11b_re;
                   x11a_re = x10b_re - x11b_re;
                   x11b_im = s4*x11c_im;
                   x10a_im = x10b_im + x11b_im;
                   x11a_im = x10b_im - x11b_im;
                   x15b_im = s4*x15c_im;
                   x14a_re = x14b_re + x15b_im;
                   x15a_re = x14b_re - x15b_im;
                   x15b_re = s4*x15c_re;
                   x14a_im = x14b_im - x15b_re;
                   x15a_im = x14b_im + x15b_re;

                   y[3*ospan].re  = x10a_re - x14a_re;
                   y[3*ospan].im  = x10a_im - x14a_im;
                   y[13*ospan].re = x10a_re + x14a_re;
                   y[13*ospan].im = x10a_im + x14a_im;
                   y[5*ospan].re  = x11a_re + x15a_re;
                   y[5*ospan].im  = x11a_im + x15a_im;
                   y[11*ospan].re = x11a_re - x15a_re;
                   y[11*ospan].im = x11a_im - x15a_im;

                   if (rtnjump == 0)
                     goto RTN_RADIX16F_KERNEL_0;
                   else
                     goto RTN_RADIX16F_KERNEL_1;
    }

    {
     /* The fundamental inverse pass RADIX 16 kernel */  
        DO_RADIX16I_KERNEL:
                   x00c_re = t00_re + t08_re;
                   x08c_re = t00_re - t08_re;
                   x00c_im = t00_im + t08_im;
                   x08c_im = t00_im - t08_im;
                   x02c_re = t02_re + t10_re;
                   x10e_re = t02_re - t10_re;
                   x02c_im = t02_im + t10_im;
                   x10e_im = t02_im - t10_im;
                   x04c_re = t04_re + t12_re;
                   x12c_re = t04_re - t12_re;
                   x04c_im = t04_im + t12_im;
                   x12c_im = t04_im - t12_im;
                   x06c_re = t06_re + t14_re;
                   x14e_re = t06_re - t14_re;
                   x06c_im = t06_im + t14_im;
                   x14e_im = t06_im - t14_im;
                   x00b_re = x00c_re + x04c_re;
                   x04b_re = x00c_re - x04c_re;
                   x02b_re = x02c_re + x06c_re;
                   x06b_re = x02c_re - x06c_re;
                   x00a_re = x00b_re + x02b_re;
                   x02a_re = x00b_re - x02b_re;
                   x00b_im = x00c_im + x04c_im;
                   x04b_im = x00c_im - x04c_im;
                   x02b_im = x02c_im + x06c_im;
                   x06b_im = x02c_im - x06c_im;
                   x00a_im = x00b_im + x02b_im;
                   x02a_im = x00b_im - x02b_im;
                   x01c_re = t01_re + t09_re;
                   x09f_re = t01_re - t09_re;
                   x01c_im = t01_im + t09_im;
                   x09f_im = t01_im - t09_im;
                   x03c_re = t03_re + t11_re;
                   x11f_re = t03_re - t11_re;
                   x03c_im = t03_im + t11_im;
                   x11f_im = t03_im - t11_im;
                   x05e_re = t05_re + t13_re;
                   x13e_re = t05_re - t13_re;
                   x05e_im = t05_im + t13_im;
                   x13e_im = t05_im - t13_im;
                   x07e_re = t07_re + t15_re;
                   x15f_re = t07_re - t15_re;
                   x07e_im = t07_im + t15_im;
                   x15f_im = t07_im - t15_im;
                   x01b_re = x01c_re + x05e_re;
                   x05d_re = x01c_re - x05e_re;
                   x03b_re = x03c_re + x07e_re;
                   x07d_re = x03c_re - x07e_re;
                   x01a_re = x01b_re + x03b_re;
                   x03a_re = x01b_re - x03b_re;
                   x01b_im = x01c_im + x05e_im;
                   x05d_im = x01c_im - x05e_im;
                   x03b_im = x03c_im + x07e_im;
                   x07d_im = x03c_im - x07e_im;
                   x01a_im = x01b_im + x03b_im;
                   x03a_im = x01b_im - x03b_im;

                   y[0*ospan].re  = x00a_re + x01a_re;
                   y[0*ospan].im  = x00a_im + x01a_im;
                   y[8*ospan].re  = x00a_re - x01a_re;
                   y[8*ospan].im  = x00a_im - x01a_im;
                   y[4*ospan].re  = x02a_re - x03a_im;
                   y[4*ospan].im  = x02a_im + x03a_re;
                   y[12*ospan].re = x02a_re + x03a_im;
                   y[12*ospan].im = x02a_im - x03a_re;

                   x07c_re = x05d_re + x07d_re;
                   x05c_re = x05d_re - x07d_re;
                   x05b_re = s1*x05c_re;
                   x04a_re = x04b_re + x05b_re;
                   x05a_re = x04b_re - x05b_re;
                   x07b_re = s1*x07c_re;
                   x06a_im = x06b_re + x07b_re;
                   x07a_im = x06b_re - x07b_re;
                   x07c_im = x05d_im + x07d_im;
                   x05c_im = x05d_im - x07d_im;
                   x05b_im = s1*x05c_im;
                   x04a_im = x04b_im + x05b_im;
                   x05a_im = x04b_im - x05b_im;
                   x07b_im = s1*x07c_im;
                   x06a_re = x06b_im + x07b_im;
                   x07a_re = x06b_im - x07b_im;

                   y[2*ospan].re  = x04a_re - x06a_re;
                   y[2*ospan].im  = x04a_im + x06a_im;
                   y[14*ospan].re = x04a_re + x06a_re;
                   y[14*ospan].im = x04a_im - x06a_im;
                   y[6*ospan].re  = x05a_re + x07a_re;
                   y[6*ospan].im  = x05a_im - x07a_im;
                   y[10*ospan].re = x05a_re - x07a_re;
                   y[10*ospan].im = x05a_im + x07a_im;

                   x09e_re = x09f_re - x15f_re;
                   x15e_re = x09f_re + x15f_re;
                   x09e_im = x09f_im - x15f_im;
                   x15e_im = x09f_im + x15f_im;
                   x11e_re = x11f_re - x13e_re;
                   x13d_re = x11f_re + x13e_re;
                   x13d_re = x13d_re - x15e_re;
                   x15d_re = s2*x15e_re;
                   x13c_re = x13d_re + x15d_re;
                   x15c_re = x13d_re - x15d_re;
                   x11e_im = x11f_im - x13e_im;
                   x13d_im = x11f_im + x13e_im;
                   x13d_im = x13d_im - x15e_im;
                   x15d_im = s2*x15e_im;
                   x13c_im = x13d_im + x15d_im;
                   x15c_im = x13d_im - x15d_im;
                   x14d_re = x10e_re + x14e_re;
                   x10d_re = x10e_re - x14e_re;
                   x10c_re = s1*x10d_re;
                   x08b_re = x08c_re + x10c_re;
                   x10b_re = x08c_re - x10c_re;
                   x14c_re = s1*x14d_re;
                   x14d_im = x10e_im + x14e_im;
                   x10d_im = x10e_im - x14e_im;
                   x10c_im = s1*x10d_im;
                   x08b_im = x08c_im + x10c_im;
                   x10b_im = x08c_im - x10c_im;
                   x14c_im = s1*x14d_im;
                   x12b_im = -x12c_re - x14c_re;
                   x14b_im = -x12c_re + x14c_re;
                   x12b_re = x12c_im + x14c_im;
                   x14b_re = x12c_im - x14c_im;
                   x09d_re = x09e_re - x11e_re;
                   x11d_re = s2*x11e_re;
                   x09c_re = x09d_re + x11d_re;
                   x11c_re = x09d_re - x11d_re;
                   x09b_re = s3*x09c_re;
                   x08a_re = x08b_re + x09b_re;
                   x09a_re = x08b_re - x09b_re;
                   x09d_im = x09e_im - x11e_im;
                   x11d_im = s2*x11e_im;
                   x09c_im = x09d_im + x11d_im;
                   x11c_im = x09d_im - x11d_im;
                   x09b_im = s3*x09c_im;
                   x08a_im = x08b_im + x09b_im;
                   x09a_im = x08b_im - x09b_im;
                   x13b_im = s3*x13c_im;
                   x12a_re = x12b_re + x13b_im;
                   x13a_re = x12b_re - x13b_im;
                   x13b_re = s3*x13c_re;
                   x12a_im = x12b_im - x13b_re;
                   x13a_im = x12b_im + x13b_re;

                   y[1*ospan].re  = x08a_re - x12a_re;
                   y[1*ospan].im  = x08a_im - x12a_im;
                   y[15*ospan].re = x08a_re + x12a_re;
                   y[15*ospan].im = x08a_im + x12a_im;
                   y[7*ospan].re  = x09a_re + x13a_re;
                   y[7*ospan].im  = x09a_im + x13a_im;
                   y[9*ospan].re  = x09a_re - x13a_re;
                   y[9*ospan].im  = x09a_im - x13a_im;

                   x11b_re = s4*x11c_re;
                   x10a_re = x10b_re + x11b_re;
                   x11a_re = x10b_re - x11b_re;
                   x11b_im = s4*x11c_im;
                   x10a_im = x10b_im + x11b_im;
                   x11a_im = x10b_im - x11b_im;
                   x15b_im = s4*x15c_im;
                   x14a_re = x14b_re + x15b_im;
                   x15a_re = x14b_re - x15b_im;
                   x15b_re = s4*x15c_re;
                   x14a_im = x14b_im - x15b_re;
                   x15a_im = x14b_im + x15b_re;

                   y[3*ospan].re  = x10a_re + x14a_re;
                   y[3*ospan].im  = x10a_im + x14a_im;
                   y[13*ospan].re = x10a_re - x14a_re;
                   y[13*ospan].im = x10a_im - x14a_im;
                   y[5*ospan].re  = x11a_re - x15a_re;
                   y[5*ospan].im  = x11a_im - x15a_im;
                   y[11*ospan].re = x11a_re + x15a_re;
                   y[11*ospan].im = x11a_im + x15a_im;

                   if (rtnjump == 0)
                     goto RTN_RADIX16I_KERNEL_0;
                   else
                     goto RTN_RADIX16I_KERNEL_1;
    }

    RADIX16_RETURN:

    return;
}

/*-----------------------------------------------------------------------*/

void radixpass_32(const fft_real *a, fft_real *c, const fft_real *t, 
                   fft_int n, fft_int la, fft_int dirflag)
{
/* Constants */
const int RADIXSIZE = (int) 32;

const fft_real s1 = 
                  FFT_CONST(0.707106781186547524400844362104849039284835938689);
const fft_real s2 = 
                  FFT_CONST(1.414213562373095048801688724209698078569671875377);
const fft_real s3 = 
                  FFT_CONST(0.923879532511286756128183189396788286822416626864);
const fft_real s4 = 
                  FFT_CONST(0.382683432365089771728459984030398866761344562486);
const fft_real s5 = 
                  FFT_CONST(1.847759065022573512256366378793576573644833252727);
const fft_real s6 = 
                  FFT_CONST(0.765366864730179543456919968060797733522689125971);
const fft_real s7 = 
                  FFT_CONST(0.980785280403230449126182236134239036973933731893);
const fft_real s8 = 
                  FFT_CONST(0.195090322016128267848284868477022240927691618752);
const fft_real s9 = 
                  FFT_CONST(0.831469612302545237078788377617905756738560812987);
const fft_real s10 = 
                  FFT_CONST(0.555570233019602224742830813948532874374937191755);

/* Local variables */
fft_int rtnjump;
fft_int k,l,m;                     
fft_int istep,ispan,ospan,tspan;
fft_real o00_re,o01_re,o02_re,o03_re,o04_re,o05_re,o06_re,o07_re;
fft_real o00_im,o01_im,o02_im,o03_im,o04_im,o05_im,o06_im,o07_im;
fft_real o08_re,o09_re,o10_re,o11_re,o12_re,o13_re,o14_re,o15_re;
fft_real o08_im,o09_im,o10_im,o11_im,o12_im,o13_im,o14_im,o15_im;
fft_real o16_re,o17_re,o18_re,o19_re,o20_re,o21_re,o22_re,o23_re;
fft_real o16_im,o17_im,o18_im,o19_im,o20_im,o21_im,o22_im,o23_im;
fft_real o24_re,o25_re,o26_re,o27_re,o28_re,o29_re,o30_re;
fft_real o24_im,o25_im,o26_im,o27_im,o28_im,o29_im,o30_im;
fft_real t00_re,t01_re,t02_re,t03_re,t04_re,t05_re,t06_re,t07_re;
fft_real t00_im,t01_im,t02_im,t03_im,t04_im,t05_im,t06_im,t07_im;
fft_real t08_re,t09_re,t10_re,t11_re,t12_re,t13_re,t14_re,t15_re;
fft_real t08_im,t09_im,t10_im,t11_im,t12_im,t13_im,t14_im,t15_im;
fft_real t16_re,t17_re,t18_re,t19_re,t20_re,t21_re,t22_re,t23_re;
fft_real t16_im,t17_im,t18_im,t19_im,t20_im,t21_im,t22_im,t23_im;
fft_real t24_re,t25_re,t26_re,t27_re,t28_re,t29_re,t30_re,t31_re;
fft_real t24_im,t25_im,t26_im,t27_im,t28_im,t29_im,t30_im,t31_im;
fft_real x00z_re,x00z_im,x00a_re,x00a_im,x00b_re,x00b_im,x00c_re,x00c_im;
fft_real x01z_re,x01z_im,x01a_re,x01a_im,x01b_re,x01b_im,x01c_re,x01c_im;
fft_real x02z_re,x02z_im,x02a_re,x02a_im,x02b_re,x02b_im,x02c_re,x02c_im;
fft_real x03z_re,x03z_im,x03a_re,x03a_im,x03b_re,x03b_im,x03c_re,x03c_im;
fft_real x04z_re,x04z_im,x04a_re,x04a_im,x04b_re,x04b_im,x04c_re,x04c_im;
fft_real x05z_re,x05z_im,x05a_re,x05a_im,x05b_re,x05b_im,x05c_re,x05c_im;
fft_real x05d_re,x05d_im,x05e_re,x05e_im;
fft_real x06z_re,x06z_im,x06a_re,x06a_im,x06b_re,x06b_im,x06c_re,x06c_im;
fft_real x07z_re,x07z_im,x07a_re,x07a_im,x07b_re,x07b_im,x07c_re,x07c_im;
fft_real x07d_re,x07d_im,x07e_re,x07e_im;
fft_real x08z_re,x08z_im,x08a_re,x08a_im,x08b_re,x08b_im,x08c_re,x08c_im;
fft_real x09z_re,x09z_im,x09a_re,x09a_im,x09b_re,x09b_im,x09c_re,x09c_im;
fft_real x09d_re,x09d_im,x09e_re,x09e_im,x09f_re,x09f_im;
fft_real x10z_re,x10z_im,x10a_re,x10a_im,x10b_re,x10b_im,x10c_re,x10c_im;
fft_real x10d_re,x10d_im,x10e_re,x10e_im;
fft_real x11z_re,x11z_im,x11a_re,x11a_im,x11b_re,x11b_im,x11c_re,x11c_im;
fft_real x11d_re,x11d_im,x11e_re,x11e_im,x11f_re,x11f_im;
fft_real x12z_re,x12z_im,x12a_re,x12a_im,x12b_re,x12b_im,x12c_re,x12c_im;
fft_real x13z_re,x13z_im,x13a_re,x13a_im,x13b_re,x13b_im,x13c_re,x13c_im;
fft_real x13d_re,x13d_im,x13e_re,x13e_im;
fft_real x14z_re,x14z_im,x14a_re,x14a_im,x14b_re,x14b_im,x14c_re,x14c_im;
fft_real x14d_re,x14d_im,x14e_re,x14e_im;
fft_real x15z_re,x15z_im,x15a_re,x15a_im,x15b_re,x15b_im,x15c_re,x15c_im;
fft_real x15d_re,x15d_im,x15e_re,x15e_im,x15f_re,x15f_im;
fft_real x16z_re,x16z_im,x16a_re,x16a_im,x16b_re,x16b_im,x16c_re,x16c_im;
fft_real x17z_re,x17z_im,x17a_re,x17a_im,x17b_re,x17b_im,x17c_re,x17c_im;
fft_real x17d_re,x17d_im,x17e_re,x17e_im,x17f_re,x17f_im,x17g_re,x17g_im;
fft_real x18z_re,x18z_im,x18a_re,x18a_im,x18b_re,x18b_im,x18c_re,x18c_im;
fft_real x18d_re,x18d_im,x18e_re,x18e_im,x18f_re,x18f_im;
fft_real x19z_re,x19z_im,x19a_re,x19a_im,x19b_re,x19b_im,x19c_re,x19c_im;
fft_real x19d_re,x19d_im,x19e_re,x19e_im,x19f_re,x19f_im,x19g_re,x19g_im;
fft_real x20z_re,x20z_im,x20a_re,x20a_im,x20b_re,x20b_im,x20c_re,x20c_im;
fft_real x20d_re,x20d_im,x20e_re,x20e_im;
fft_real x21z_re,x21z_im,x21a_re,x21a_im,x21b_re,x21b_im,x21c_re,x21c_im;
fft_real x21d_re,x21d_im,x21e_re,x21e_im,x21f_re,x21f_im,x21g_re,x21g_im;
fft_real x22z_re,x22z_im,x22a_re,x22a_im,x22b_re,x22b_im,x22c_re,x22c_im;
fft_real x22d_re,x22d_im,x22e_re,x22e_im,x22f_re,x22f_im;
fft_real x23z_re,x23z_im,x23a_re,x23a_im,x23b_re,x23b_im,x23c_re,x23c_im;
fft_real x23d_re,x23d_im,x23e_re,x23e_im,x23f_re,x23f_im,x23g_re,x23g_im;
fft_real x24z_re,x24z_im,x24a_re,x24a_im,x24b_re,x24b_im,x24c_re,x24c_im;
fft_real x25z_re,x25z_im,x25a_re,x25a_im,x25b_re,x25b_im,x25c_re,x25c_im;
fft_real x25d_re,x25d_im,x25e_re,x25e_im,x25f_re,x25f_im,x25g_re,x25g_im;
fft_real x26z_re,x26z_im,x26a_re,x26a_im,x26b_re,x26b_im,x26c_re,x26c_im;
fft_real x26d_re,x26d_im,x26e_re,x26e_im,x26f_re,x26f_im;
fft_real x27z_re,x27z_im,x27a_re,x27a_im,x27b_re,x27b_im,x27c_re,x27c_im;
fft_real x27d_re,x27d_im,x27e_re,x27e_im,x27f_re,x27f_im,x27g_re,x27g_im;
fft_real x28z_re,x28z_im,x28a_re,x28a_im,x28b_re,x28b_im,x28c_re,x28c_im;
fft_real x28d_re,x28d_im,x28e_re,x28e_im;
fft_real x29z_re,x29z_im,x29a_re,x29a_im,x29b_re,x29b_im,x29c_re,x29c_im;
fft_real x29d_re,x29d_im,x29e_re,x29e_im,x29f_re,x29f_im,x29g_re,x29g_im;
fft_real x30z_re,x30z_im,x30a_re,x30a_im,x30b_re,x30b_im,x30c_re,x30c_im;
fft_real x30d_re,x30d_im,x30e_re,x30e_im,x30f_re,x30f_im;
fft_real x31z_re,x31z_im,x31a_re,x31a_im,x31b_re,x31b_im,x31c_re,x31c_im;
fft_real x31d_re,x31d_im,x31e_re,x31e_im,x31f_re,x31f_im,x31g_re,x31g_im;
fft_cmplx *x, *y, *w;

 /* Verify parameters */
    if(n < RADIXSIZE || la < 1) return;     
  
 /* Define local arrays */
    x = (fft_cmplx *) a;                /* Input data    */
    y = (fft_cmplx *) c;                /* Output data   */
    w = (fft_cmplx *) t;                /* Twiddle table */

 /* Initialize pointers */
    ospan = n/RADIXSIZE;
    ispan = la;
    m     = (ospan/ispan) - 1;
    istep = ispan*(RADIXSIZE-1);
    tspan = 0;

 /* Process the data */
    switch (dirflag) {
      case FFT_FLAGS_FORWARD:                            /* Forward Pass */
        if (ispan != ospan) {
            rtnjump = 0;
            for(k=0; k<=m; k++) { 
                tspan  = k*ispan;
                o00_re = w[0].re;         o00_im = w[0].im;
                o01_re = w[tspan*1].re;   o01_im = w[tspan*1].im;
                o02_re = w[tspan*2].re;   o02_im = w[tspan*2].im;
                o03_re = w[tspan*3].re;   o03_im = w[tspan*3].im;
                o04_re = w[tspan*4].re;   o04_im = w[tspan*4].im;
                o05_re = w[tspan*5].re;   o05_im = w[tspan*5].im;
                o06_re = w[tspan*6].re;   o06_im = w[tspan*6].im;
                o07_re = w[tspan*7].re;   o07_im = w[tspan*7].im;
                o08_re = w[tspan*8].re;   o08_im = w[tspan*8].im;
                o09_re = w[tspan*9].re;   o09_im = w[tspan*9].im;
                o10_re = w[tspan*10].re;  o10_im = w[tspan*10].im;
                o11_re = w[tspan*11].re;  o11_im = w[tspan*11].im;
                o12_re = w[tspan*12].re;  o12_im = w[tspan*12].im;
                o13_re = w[tspan*13].re;  o13_im = w[tspan*13].im;
                o14_re = w[tspan*14].re;  o14_im = w[tspan*14].im;
                o15_re = w[tspan*15].re;  o15_im = w[tspan*15].im;
                o16_re = w[tspan*16].re;  o16_im = w[tspan*16].im;
                o17_re = w[tspan*17].re;  o17_im = w[tspan*17].im;
                o18_re = w[tspan*18].re;  o18_im = w[tspan*18].im;
                o19_re = w[tspan*19].re;  o19_im = w[tspan*19].im;
                o20_re = w[tspan*20].re;  o20_im = w[tspan*20].im;
                o21_re = w[tspan*21].re;  o21_im = w[tspan*21].im;
                o22_re = w[tspan*22].re;  o22_im = w[tspan*22].im;
                o23_re = w[tspan*23].re;  o23_im = w[tspan*23].im;
                o24_re = w[tspan*24].re;  o24_im = w[tspan*24].im;
                o25_re = w[tspan*25].re;  o25_im = w[tspan*25].im;
                o26_re = w[tspan*26].re;  o26_im = w[tspan*26].im;
                o27_re = w[tspan*27].re;  o27_im = w[tspan*27].im;
                o28_re = w[tspan*28].re;  o28_im = w[tspan*28].im;
                o29_re = w[tspan*29].re;  o29_im = w[tspan*29].im;
                o30_re = w[tspan*30].re;  o30_im = w[tspan*30].im;
  
                for(l=1; l<=ispan; l++) {
                   t00_re = x[0].re;
                   t00_im = x[0].im;
                   t01_re = o00_re*x[ispan*1].re  + o00_im*x[ispan*1].im;
                   t01_im = o00_re*x[ispan*1].im  - o00_im*x[ispan*1].re;
                   t02_re = o01_re*x[ispan*2].re  + o01_im*x[ispan*2].im;
                   t02_im = o01_re*x[ispan*2].im  - o01_im*x[ispan*2].re;
                   t03_re = o02_re*x[ispan*3].re  + o02_im*x[ispan*3].im;
                   t03_im = o02_re*x[ispan*3].im  - o02_im*x[ispan*3].re;
                   t04_re = o03_re*x[ispan*4].re  + o03_im*x[ispan*4].im;
                   t04_im = o03_re*x[ispan*4].im  - o03_im*x[ispan*4].re;
                   t05_re = o04_re*x[ispan*5].re  + o04_im*x[ispan*5].im;
                   t05_im = o04_re*x[ispan*5].im  - o04_im*x[ispan*5].re;
                   t06_re = o05_re*x[ispan*6].re  + o05_im*x[ispan*6].im;
                   t06_im = o05_re*x[ispan*6].im  - o05_im*x[ispan*6].re;
                   t07_re = o06_re*x[ispan*7].re  + o06_im*x[ispan*7].im;
                   t07_im = o06_re*x[ispan*7].im  - o06_im*x[ispan*7].re;
                   t08_re = o07_re*x[ispan*8].re  + o07_im*x[ispan*8].im;
                   t08_im = o07_re*x[ispan*8].im  - o07_im*x[ispan*8].re;
                   t09_re = o08_re*x[ispan*9].re  + o08_im*x[ispan*9].im;
                   t09_im = o08_re*x[ispan*9].im  - o08_im*x[ispan*9].re;
                   t10_re = o09_re*x[ispan*10].re + o09_im*x[ispan*10].im;
                   t10_im = o09_re*x[ispan*10].im - o09_im*x[ispan*10].re;
                   t11_re = o10_re*x[ispan*11].re + o10_im*x[ispan*11].im;
                   t11_im = o10_re*x[ispan*11].im - o10_im*x[ispan*11].re;
                   t12_re = o11_re*x[ispan*12].re + o11_im*x[ispan*12].im;
                   t12_im = o11_re*x[ispan*12].im - o11_im*x[ispan*12].re;
                   t13_re = o12_re*x[ispan*13].re + o12_im*x[ispan*13].im;
                   t13_im = o12_re*x[ispan*13].im - o12_im*x[ispan*13].re;
                   t14_re = o13_re*x[ispan*14].re + o13_im*x[ispan*14].im;
                   t14_im = o13_re*x[ispan*14].im - o13_im*x[ispan*14].re;
                   t15_re = o14_re*x[ispan*15].re + o14_im*x[ispan*15].im;
                   t15_im = o14_re*x[ispan*15].im - o14_im*x[ispan*15].re;
                   t16_re = o15_re*x[ispan*16].re + o15_im*x[ispan*16].im;
                   t16_im = o15_re*x[ispan*16].im - o15_im*x[ispan*16].re;
                   t17_re = o16_re*x[ispan*17].re + o16_im*x[ispan*17].im;
                   t17_im = o16_re*x[ispan*17].im - o16_im*x[ispan*17].re;
                   t18_re = o17_re*x[ispan*18].re + o17_im*x[ispan*18].im;
                   t18_im = o17_re*x[ispan*18].im - o17_im*x[ispan*18].re;
                   t19_re = o18_re*x[ispan*19].re + o18_im*x[ispan*19].im;
                   t19_im = o18_re*x[ispan*19].im - o18_im*x[ispan*19].re;
                   t20_re = o19_re*x[ispan*20].re + o19_im*x[ispan*20].im;
                   t20_im = o19_re*x[ispan*20].im - o19_im*x[ispan*20].re;
                   t21_re = o20_re*x[ispan*21].re + o20_im*x[ispan*21].im;
                   t21_im = o20_re*x[ispan*21].im - o20_im*x[ispan*21].re;
                   t22_re = o21_re*x[ispan*22].re + o21_im*x[ispan*22].im;
                   t22_im = o21_re*x[ispan*22].im - o21_im*x[ispan*22].re;
                   t23_re = o22_re*x[ispan*23].re + o22_im*x[ispan*23].im;
                   t23_im = o22_re*x[ispan*23].im - o22_im*x[ispan*23].re;
                   t24_re = o23_re*x[ispan*24].re + o23_im*x[ispan*24].im;
                   t24_im = o23_re*x[ispan*24].im - o23_im*x[ispan*24].re;
                   t25_re = o24_re*x[ispan*25].re + o24_im*x[ispan*25].im;
                   t25_im = o24_re*x[ispan*25].im - o24_im*x[ispan*25].re;
                   t26_re = o25_re*x[ispan*26].re + o25_im*x[ispan*26].im;
                   t26_im = o25_re*x[ispan*26].im - o25_im*x[ispan*26].re;
                   t27_re = o26_re*x[ispan*27].re + o26_im*x[ispan*27].im;
                   t27_im = o26_re*x[ispan*27].im - o26_im*x[ispan*27].re;
                   t28_re = o27_re*x[ispan*28].re + o27_im*x[ispan*28].im;
                   t28_im = o27_re*x[ispan*28].im - o27_im*x[ispan*28].re;
                   t29_re = o28_re*x[ispan*29].re + o28_im*x[ispan*29].im;
                   t29_im = o28_re*x[ispan*29].im - o28_im*x[ispan*29].re;
                   t30_re = o29_re*x[ispan*30].re + o29_im*x[ispan*30].im;
                   t30_im = o29_re*x[ispan*30].im - o29_im*x[ispan*30].re;
                   t31_re = o30_re*x[ispan*31].re + o30_im*x[ispan*31].im;
                   t31_im = o30_re*x[ispan*31].im - o30_im*x[ispan*31].re;

                   goto DO_RADIX32F_KERNEL;
                   RTN_RADIX32F_KERNEL_0:

                   x++;   
                   y++; 
                }
                x += istep;       
                w += ispan;
            }
	}
	else {
            rtnjump = 1;
            for(l=1; l<=ispan; l++) {
                   t00_re = x[0].re;
                   t00_im = x[0].im;
                   t01_re = x[ispan*1].re;
                   t01_im = x[ispan*1].im;
                   t02_re = x[ispan*2].re;
                   t02_im = x[ispan*2].im;
                   t03_re = x[ispan*3].re;
                   t03_im = x[ispan*3].im;
                   t04_re = x[ispan*4].re;
                   t04_im = x[ispan*4].im;
                   t05_re = x[ispan*5].re;
                   t05_im = x[ispan*5].im;
                   t06_re = x[ispan*6].re;
                   t06_im = x[ispan*6].im;
                   t07_re = x[ispan*7].re;
                   t07_im = x[ispan*7].im;
                   t08_re = x[ispan*8].re;
                   t08_im = x[ispan*8].im;
                   t09_re = x[ispan*9].re;
                   t09_im = x[ispan*9].im;
                   t10_re = x[ispan*10].re;
                   t10_im = x[ispan*10].im;
                   t11_re = x[ispan*11].re;
                   t11_im = x[ispan*11].im;
                   t12_re = x[ispan*12].re;
                   t12_im = x[ispan*12].im;
                   t13_re = x[ispan*13].re;
                   t13_im = x[ispan*13].im;
                   t14_re = x[ispan*14].re;
                   t14_im = x[ispan*14].im;
                   t15_re = x[ispan*15].re;
                   t15_im = x[ispan*15].im;
                   t16_re = x[ispan*16].re;
                   t16_im = x[ispan*16].im;
                   t17_re = x[ispan*17].re;
                   t17_im = x[ispan*17].im;
                   t18_re = x[ispan*18].re;
                   t18_im = x[ispan*18].im;
                   t19_re = x[ispan*19].re;
                   t19_im = x[ispan*19].im;
                   t20_re = x[ispan*20].re;
                   t20_im = x[ispan*20].im;
                   t21_re = x[ispan*21].re;
                   t21_im = x[ispan*21].im;
                   t22_re = x[ispan*22].re;
                   t22_im = x[ispan*22].im;
                   t23_re = x[ispan*23].re;
                   t23_im = x[ispan*23].im;
                   t24_re = x[ispan*24].re;
                   t24_im = x[ispan*24].im;
                   t25_re = x[ispan*25].re;
                   t25_im = x[ispan*25].im;
                   t26_re = x[ispan*26].re;
                   t26_im = x[ispan*26].im;
                   t27_re = x[ispan*27].re;
                   t27_im = x[ispan*27].im;
                   t28_re = x[ispan*28].re;
                   t28_im = x[ispan*28].im;
                   t29_re = x[ispan*29].re;
                   t29_im = x[ispan*29].im;
                   t30_re = x[ispan*30].re;
                   t30_im = x[ispan*30].im;
                   t31_re = x[ispan*31].re;
                   t31_im = x[ispan*31].im;

               goto DO_RADIX32F_KERNEL;
               RTN_RADIX32F_KERNEL_1:

               x++;  
               y++;   
            }
	}

	break;

      default:                                         /* Inverse Pass */
        if (ispan != ospan) {
            rtnjump = 0;
            for(k=0; k<=m; k++) { 
                tspan  = k*ispan;
                o00_re = w[0].re;         o00_im = w[0].im;
                o01_re = w[tspan*1].re;   o01_im = w[tspan*1].im;
                o02_re = w[tspan*2].re;   o02_im = w[tspan*2].im;
                o03_re = w[tspan*3].re;   o03_im = w[tspan*3].im;
                o04_re = w[tspan*4].re;   o04_im = w[tspan*4].im;
                o05_re = w[tspan*5].re;   o05_im = w[tspan*5].im;
                o06_re = w[tspan*6].re;   o06_im = w[tspan*6].im;
                o07_re = w[tspan*7].re;   o07_im = w[tspan*7].im;
                o08_re = w[tspan*8].re;   o08_im = w[tspan*8].im;
                o09_re = w[tspan*9].re;   o09_im = w[tspan*9].im;
                o10_re = w[tspan*10].re;  o10_im = w[tspan*10].im;
                o11_re = w[tspan*11].re;  o11_im = w[tspan*11].im;
                o12_re = w[tspan*12].re;  o12_im = w[tspan*12].im;
                o13_re = w[tspan*13].re;  o13_im = w[tspan*13].im;
                o14_re = w[tspan*14].re;  o14_im = w[tspan*14].im;
                o15_re = w[tspan*15].re;  o15_im = w[tspan*15].im;
                o16_re = w[tspan*16].re;  o16_im = w[tspan*16].im;
                o17_re = w[tspan*17].re;  o17_im = w[tspan*17].im;
                o18_re = w[tspan*18].re;  o18_im = w[tspan*18].im;
                o19_re = w[tspan*19].re;  o19_im = w[tspan*19].im;
                o20_re = w[tspan*20].re;  o20_im = w[tspan*20].im;
                o21_re = w[tspan*21].re;  o21_im = w[tspan*21].im;
                o22_re = w[tspan*22].re;  o22_im = w[tspan*22].im;
                o23_re = w[tspan*23].re;  o23_im = w[tspan*23].im;
                o24_re = w[tspan*24].re;  o24_im = w[tspan*24].im;
                o25_re = w[tspan*25].re;  o25_im = w[tspan*25].im;
                o26_re = w[tspan*26].re;  o26_im = w[tspan*26].im;
                o27_re = w[tspan*27].re;  o27_im = w[tspan*27].im;
                o28_re = w[tspan*28].re;  o28_im = w[tspan*28].im;
                o29_re = w[tspan*29].re;  o29_im = w[tspan*29].im;
                o30_re = w[tspan*30].re;  o30_im = w[tspan*30].im;
  
                for(l=1; l<=ispan; l++) {
                   t00_re = x[0].re;
                   t00_im = x[0].im;
                   t01_re = o00_re*x[ispan*1].re  - o00_im*x[ispan*1].im;
                   t01_im = o00_re*x[ispan*1].im  + o00_im*x[ispan*1].re;
                   t02_re = o01_re*x[ispan*2].re  - o01_im*x[ispan*2].im;
                   t02_im = o01_re*x[ispan*2].im  + o01_im*x[ispan*2].re;
                   t03_re = o02_re*x[ispan*3].re  - o02_im*x[ispan*3].im;
                   t03_im = o02_re*x[ispan*3].im  + o02_im*x[ispan*3].re;
                   t04_re = o03_re*x[ispan*4].re  - o03_im*x[ispan*4].im;
                   t04_im = o03_re*x[ispan*4].im  + o03_im*x[ispan*4].re;
                   t05_re = o04_re*x[ispan*5].re  - o04_im*x[ispan*5].im;
                   t05_im = o04_re*x[ispan*5].im  + o04_im*x[ispan*5].re;
                   t06_re = o05_re*x[ispan*6].re  - o05_im*x[ispan*6].im;
                   t06_im = o05_re*x[ispan*6].im  + o05_im*x[ispan*6].re;
                   t07_re = o06_re*x[ispan*7].re  - o06_im*x[ispan*7].im;
                   t07_im = o06_re*x[ispan*7].im  + o06_im*x[ispan*7].re;
                   t08_re = o07_re*x[ispan*8].re  - o07_im*x[ispan*8].im;
                   t08_im = o07_re*x[ispan*8].im  + o07_im*x[ispan*8].re;
                   t09_re = o08_re*x[ispan*9].re  - o08_im*x[ispan*9].im;
                   t09_im = o08_re*x[ispan*9].im  + o08_im*x[ispan*9].re;
                   t10_re = o09_re*x[ispan*10].re - o09_im*x[ispan*10].im;
                   t10_im = o09_re*x[ispan*10].im + o09_im*x[ispan*10].re;
                   t11_re = o10_re*x[ispan*11].re - o10_im*x[ispan*11].im;
                   t11_im = o10_re*x[ispan*11].im + o10_im*x[ispan*11].re;
                   t12_re = o11_re*x[ispan*12].re - o11_im*x[ispan*12].im;
                   t12_im = o11_re*x[ispan*12].im + o11_im*x[ispan*12].re;
                   t13_re = o12_re*x[ispan*13].re - o12_im*x[ispan*13].im;
                   t13_im = o12_re*x[ispan*13].im + o12_im*x[ispan*13].re;
                   t14_re = o13_re*x[ispan*14].re - o13_im*x[ispan*14].im;
                   t14_im = o13_re*x[ispan*14].im + o13_im*x[ispan*14].re;
                   t15_re = o14_re*x[ispan*15].re - o14_im*x[ispan*15].im;
                   t15_im = o14_re*x[ispan*15].im + o14_im*x[ispan*15].re;
                   t16_re = o15_re*x[ispan*16].re - o15_im*x[ispan*16].im;
                   t16_im = o15_re*x[ispan*16].im + o15_im*x[ispan*16].re;
                   t17_re = o16_re*x[ispan*17].re - o16_im*x[ispan*17].im;
                   t17_im = o16_re*x[ispan*17].im + o16_im*x[ispan*17].re;
                   t18_re = o17_re*x[ispan*18].re - o17_im*x[ispan*18].im;
                   t18_im = o17_re*x[ispan*18].im + o17_im*x[ispan*18].re;
                   t19_re = o18_re*x[ispan*19].re - o18_im*x[ispan*19].im;
                   t19_im = o18_re*x[ispan*19].im + o18_im*x[ispan*19].re;
                   t20_re = o19_re*x[ispan*20].re - o19_im*x[ispan*20].im;
                   t20_im = o19_re*x[ispan*20].im + o19_im*x[ispan*20].re;
                   t21_re = o20_re*x[ispan*21].re - o20_im*x[ispan*21].im;
                   t21_im = o20_re*x[ispan*21].im + o20_im*x[ispan*21].re;
                   t22_re = o21_re*x[ispan*22].re - o21_im*x[ispan*22].im;
                   t22_im = o21_re*x[ispan*22].im + o21_im*x[ispan*22].re;
                   t23_re = o22_re*x[ispan*23].re - o22_im*x[ispan*23].im;
                   t23_im = o22_re*x[ispan*23].im + o22_im*x[ispan*23].re;
                   t24_re = o23_re*x[ispan*24].re - o23_im*x[ispan*24].im;
                   t24_im = o23_re*x[ispan*24].im + o23_im*x[ispan*24].re;
                   t25_re = o24_re*x[ispan*25].re - o24_im*x[ispan*25].im;
                   t25_im = o24_re*x[ispan*25].im + o24_im*x[ispan*25].re;
                   t26_re = o25_re*x[ispan*26].re - o25_im*x[ispan*26].im;
                   t26_im = o25_re*x[ispan*26].im + o25_im*x[ispan*26].re;
                   t27_re = o26_re*x[ispan*27].re - o26_im*x[ispan*27].im;
                   t27_im = o26_re*x[ispan*27].im + o26_im*x[ispan*27].re;
                   t28_re = o27_re*x[ispan*28].re - o27_im*x[ispan*28].im;
                   t28_im = o27_re*x[ispan*28].im + o27_im*x[ispan*28].re;
                   t29_re = o28_re*x[ispan*29].re - o28_im*x[ispan*29].im;
                   t29_im = o28_re*x[ispan*29].im + o28_im*x[ispan*29].re;
                   t30_re = o29_re*x[ispan*30].re - o29_im*x[ispan*30].im;
                   t30_im = o29_re*x[ispan*30].im + o29_im*x[ispan*30].re;
                   t31_re = o30_re*x[ispan*31].re - o30_im*x[ispan*31].im;
                   t31_im = o30_re*x[ispan*31].im + o30_im*x[ispan*31].re;

                   goto DO_RADIX32I_KERNEL;
                   RTN_RADIX32I_KERNEL_0:

                   x++;   
                   y++; 
                }
                x += istep;       
                w += ispan;
            }
	}
	else {
            rtnjump = 1;
            for(l=1; l<=ispan; l++) {
                   t00_re = x[0].re;
                   t00_im = x[0].im;
                   t01_re = x[ispan*1].re;
                   t01_im = x[ispan*1].im;
                   t02_re = x[ispan*2].re;
                   t02_im = x[ispan*2].im;
                   t03_re = x[ispan*3].re;
                   t03_im = x[ispan*3].im;
                   t04_re = x[ispan*4].re;
                   t04_im = x[ispan*4].im;
                   t05_re = x[ispan*5].re;
                   t05_im = x[ispan*5].im;
                   t06_re = x[ispan*6].re;
                   t06_im = x[ispan*6].im;
                   t07_re = x[ispan*7].re;
                   t07_im = x[ispan*7].im;
                   t08_re = x[ispan*8].re;
                   t08_im = x[ispan*8].im;
                   t09_re = x[ispan*9].re;
                   t09_im = x[ispan*9].im;
                   t10_re = x[ispan*10].re;
                   t10_im = x[ispan*10].im;
                   t11_re = x[ispan*11].re;
                   t11_im = x[ispan*11].im;
                   t12_re = x[ispan*12].re;
                   t12_im = x[ispan*12].im;
                   t13_re = x[ispan*13].re;
                   t13_im = x[ispan*13].im;
                   t14_re = x[ispan*14].re;
                   t14_im = x[ispan*14].im;
                   t15_re = x[ispan*15].re;
                   t15_im = x[ispan*15].im;
                   t16_re = x[ispan*16].re;
                   t16_im = x[ispan*16].im;
                   t17_re = x[ispan*17].re;
                   t17_im = x[ispan*17].im;
                   t18_re = x[ispan*18].re;
                   t18_im = x[ispan*18].im;
                   t19_re = x[ispan*19].re;
                   t19_im = x[ispan*19].im;
                   t20_re = x[ispan*20].re;
                   t20_im = x[ispan*20].im;
                   t21_re = x[ispan*21].re;
                   t21_im = x[ispan*21].im;
                   t22_re = x[ispan*22].re;
                   t22_im = x[ispan*22].im;
                   t23_re = x[ispan*23].re;
                   t23_im = x[ispan*23].im;
                   t24_re = x[ispan*24].re;
                   t24_im = x[ispan*24].im;
                   t25_re = x[ispan*25].re;
                   t25_im = x[ispan*25].im;
                   t26_re = x[ispan*26].re;
                   t26_im = x[ispan*26].im;
                   t27_re = x[ispan*27].re;
                   t27_im = x[ispan*27].im;
                   t28_re = x[ispan*28].re;
                   t28_im = x[ispan*28].im;
                   t29_re = x[ispan*29].re;
                   t29_im = x[ispan*29].im;
                   t30_re = x[ispan*30].re;
                   t30_im = x[ispan*30].im;
                   t31_re = x[ispan*31].re;
                   t31_im = x[ispan*31].im;

               goto DO_RADIX32I_KERNEL;
               RTN_RADIX32I_KERNEL_1:

               x++;  
               y++;   
            }
	}

	break;
    }	

 /* Skip Kernel Code */
    goto RADIX32_RETURN;

    {
     /* The fundamental forward pass RADIX 32 kernel */  
        DO_RADIX32F_KERNEL:
                   x00z_re = t00_re + t16_re;
                   x16z_re = t00_re - t16_re;
                   x00z_im = t00_im + t16_im;
                   x16z_im = t00_im - t16_im;
                   x01z_re = t01_re + t17_re;
                   x17z_re = t01_re - t17_re;
                   x01z_im = t01_im + t17_im;
                   x17z_im = t01_im - t17_im;
                   x02z_re = t02_re + t18_re;
                   x18z_re = t02_re - t18_re;
                   x02z_im = t02_im + t18_im;
                   x18z_im = t02_im - t18_im;
                   x03z_re = t03_re + t19_re;
                   x19z_re = t03_re - t19_re;
                   x03z_im = t03_im + t19_im;
                   x19z_im = t03_im - t19_im;
                   x04z_re = t04_re + t20_re;
                   x20z_re = t04_re - t20_re;
                   x04z_im = t04_im + t20_im;
                   x20z_im = t04_im - t20_im;
                   x05z_re = t05_re + t21_re;
                   x21z_re = t05_re - t21_re;
                   x05z_im = t05_im + t21_im;
                   x21z_im = t05_im - t21_im;
                   x06z_re = t06_re + t22_re;
                   x22z_re = t06_re - t22_re;
                   x06z_im = t06_im + t22_im;
                   x22z_im = t06_im - t22_im;
                   x07z_re = t07_re + t23_re;
                   x23z_re = t07_re - t23_re;
                   x07z_im = t07_im + t23_im;
                   x23z_im = t07_im - t23_im;
                   x08z_re = t08_re + t24_re;
                   x24z_re = t08_re - t24_re;
                   x08z_im = t08_im + t24_im;
                   x24z_im = t08_im - t24_im;
                   x09z_re = t09_re + t25_re;
                   x25z_re = t09_re - t25_re;
                   x09z_im = t09_im + t25_im;
                   x25z_im = t09_im - t25_im;
                   x10z_re = t10_re + t26_re;
                   x26z_re = t10_re - t26_re;
                   x10z_im = t10_im + t26_im;
                   x26z_im = t10_im - t26_im;
                   x11z_re = t11_re + t27_re;
                   x27z_re = t11_re - t27_re;
                   x11z_im = t11_im + t27_im;
                   x27z_im = t11_im - t27_im;
                   x12z_re = t12_re + t28_re;
                   x28z_re = t12_re - t28_re;
                   x12z_im = t12_im + t28_im;
                   x28z_im = t12_im - t28_im;
                   x13z_re = t13_re + t29_re;
                   x29z_re = t13_re - t29_re;
                   x13z_im = t13_im + t29_im;
                   x29z_im = t13_im - t29_im;
                   x14z_re = t14_re + t30_re;
                   x30z_re = t14_re - t30_re;
                   x14z_im = t14_im + t30_im;
                   x30z_im = t14_im - t30_im;
                   x15z_re = t15_re + t31_re;
                   x31z_re = t15_re - t31_re;
                   x15z_im = t15_im + t31_im;
                   x31z_im = t15_im - t31_im;

                   x00c_re = x00z_re + x08z_re;
                   x08c_re = x00z_re - x08z_re;
                   x00c_im = x00z_im + x08z_im;
                   x08c_im = x00z_im - x08z_im;
                   x02c_re = x02z_re + x10z_re;
                   x10e_re = x02z_re - x10z_re;
                   x02c_im = x02z_im + x10z_im;
                   x10e_im = x02z_im - x10z_im;
                   x04c_re = x04z_re + x12z_re;
                   x12c_re = x04z_re - x12z_re;
                   x04c_im = x04z_im + x12z_im;
                   x12c_im = x04z_im - x12z_im;
                   x06c_re = x06z_re + x14z_re;
                   x14e_re = x06z_re - x14z_re;
                   x06c_im = x06z_im + x14z_im;
                   x14e_im = x06z_im - x14z_im;
                   x00b_re = x00c_re + x04c_re;
                   x04b_re = x00c_re - x04c_re;
                   x02b_re = x02c_re + x06c_re;
                   x06b_re = x02c_re - x06c_re;
                   x00a_re = x00b_re + x02b_re;
                   x02a_re = x00b_re - x02b_re;
                   x00b_im = x00c_im + x04c_im;
                   x04b_im = x00c_im - x04c_im;
                   x02b_im = x02c_im + x06c_im;
                   x06b_im = x02c_im - x06c_im;
                   x00a_im = x00b_im + x02b_im;
                   x02a_im = x00b_im - x02b_im;
                   x01c_re = x01z_re + x09z_re;
                   x09f_re = x01z_re - x09z_re;
                   x01c_im = x01z_im + x09z_im;
                   x09f_im = x01z_im - x09z_im;
                   x03c_re = x03z_re + x11z_re;
                   x11f_re = x03z_re - x11z_re;
                   x03c_im = x03z_im + x11z_im;
                   x11f_im = x03z_im - x11z_im;
                   x05e_re = x05z_re + x13z_re;
                   x13e_re = x05z_re - x13z_re;
                   x05e_im = x05z_im + x13z_im;
                   x13e_im = x05z_im - x13z_im;
                   x07e_re = x07z_re + x15z_re;
                   x15f_re = x07z_re - x15z_re;
                   x07e_im = x07z_im + x15z_im;
                   x15f_im = x07z_im - x15z_im;
                   x01b_re = x01c_re + x05e_re;
                   x05d_re = x01c_re - x05e_re;
                   x03b_re = x03c_re + x07e_re;
                   x07d_re = x03c_re - x07e_re;
                   x01a_re = x01b_re + x03b_re;
                   x03a_re = x01b_re - x03b_re;
                   x01b_im = x01c_im + x05e_im;
                   x05d_im = x01c_im - x05e_im;
                   x03b_im = x03c_im + x07e_im;
                   x07d_im = x03c_im - x07e_im;
                   x01a_im = x01b_im + x03b_im;
                   x03a_im = x01b_im - x03b_im;

                   y[0*ospan].re  = x00a_re + x01a_re;
                   y[0*ospan].im  = x00a_im + x01a_im;
                   y[16*ospan].re = x00a_re - x01a_re;
                   y[16*ospan].im = x00a_im - x01a_im;
                   y[8*ospan].re  = x02a_re + x03a_im;
                   y[8*ospan].im  = x02a_im - x03a_re;
                   y[24*ospan].re = x02a_re - x03a_im;
                   y[24*ospan].im = x02a_im + x03a_re;

                   x07c_re = x05d_re + x07d_re;
                   x05c_re = x05d_re - x07d_re;
                   x05b_re = s1*x05c_re;
                   x04a_re = x04b_re + x05b_re;
                   x05a_re = x04b_re - x05b_re;
                   x07b_re = s1*x07c_re;
                   x06a_im = x06b_re + x07b_re;
                   x07a_im = x06b_re - x07b_re;
                   x07c_im = x05d_im + x07d_im;
                   x05c_im = x05d_im - x07d_im;
                   x05b_im = s1*x05c_im;
                   x04a_im = x04b_im + x05b_im;
                   x05a_im = x04b_im - x05b_im;
                   x07b_im = s1*x07c_im;
                   x06a_re = x06b_im + x07b_im;
                   x07a_re = x06b_im - x07b_im;

                   y[4*ospan].re  = x04a_re + x06a_re;
                   y[4*ospan].im  = x04a_im - x06a_im;
                   y[28*ospan].re = x04a_re - x06a_re;
                   y[28*ospan].im = x04a_im + x06a_im;
                   y[12*ospan].re = x05a_re - x07a_re;
                   y[12*ospan].im = x05a_im + x07a_im;
                   y[20*ospan].re = x05a_re + x07a_re;
                   y[20*ospan].im = x05a_im - x07a_im;

                   x09e_re = x09f_re - x15f_re;
                   x15e_re = x09f_re + x15f_re;
                   x09e_im = x09f_im - x15f_im;
                   x15e_im = x09f_im + x15f_im;
                   x11e_re = x11f_re - x13e_re;
                   x13d_re = x11f_re + x13e_re;
                   x13d_re = x13d_re - x15e_re;
                   x15d_re = s2*x15e_re;
                   x13c_re = x13d_re + x15d_re;
                   x15c_re = x13d_re - x15d_re;
                   x11e_im = x11f_im - x13e_im;
                   x13d_im = x11f_im + x13e_im;
                   x13d_im = x13d_im - x15e_im;
                   x15d_im = s2*x15e_im;
                   x13c_im = x13d_im + x15d_im;
                   x15c_im = x13d_im - x15d_im;
                   x14d_re = x10e_re + x14e_re;
                   x10d_re = x10e_re - x14e_re;
                   x10c_re = s1*x10d_re;
                   x08b_re = x08c_re + x10c_re;
                   x10b_re = x08c_re - x10c_re;
                   x14c_re = s1*x14d_re;
                   x14d_im = x10e_im + x14e_im;
                   x10d_im = x10e_im - x14e_im;
                   x10c_im = s1*x10d_im;
                   x08b_im = x08c_im + x10c_im;
                   x10b_im = x08c_im - x10c_im;
                   x14c_im = s1*x14d_im;
                   x12b_im = -x12c_re - x14c_re;
                   x14b_im = -x12c_re + x14c_re;
                   x12b_re = x12c_im + x14c_im;
                   x14b_re = x12c_im - x14c_im;
                   x09d_re = x09e_re - x11e_re;
                   x11d_re = s2*x11e_re;
                   x09c_re = x09d_re + x11d_re;
                   x11c_re = x09d_re - x11d_re;
                   x09b_re = s3*x09c_re;
                   x08a_re = x08b_re + x09b_re;
                   x09a_re = x08b_re - x09b_re;
                   x09d_im = x09e_im - x11e_im;
                   x11d_im = s2*x11e_im;
                   x09c_im = x09d_im + x11d_im;
                   x11c_im = x09d_im - x11d_im;
                   x09b_im = s3*x09c_im;
                   x08a_im = x08b_im + x09b_im;
                   x09a_im = x08b_im - x09b_im;
                   x13b_im = s3*x13c_im;
                   x12a_re = x12b_re + x13b_im;
                   x13a_re = x12b_re - x13b_im;
                   x13b_re = s3*x13c_re;
                   x12a_im = x12b_im - x13b_re;
                   x13a_im = x12b_im + x13b_re;

                   y[2*ospan].re  = x08a_re + x12a_re;
                   y[2*ospan].im  = x08a_im + x12a_im;
                   y[30*ospan].re = x08a_re - x12a_re;
                   y[30*ospan].im = x08a_im - x12a_im;
                   y[14*ospan].re = x09a_re - x13a_re;
                   y[14*ospan].im = x09a_im - x13a_im;
                   y[18*ospan].re = x09a_re + x13a_re;
                   y[18*ospan].im = x09a_im + x13a_im;

                   x11b_re = s4*x11c_re;
                   x10a_re = x10b_re + x11b_re;
                   x11a_re = x10b_re - x11b_re;
                   x11b_im = s4*x11c_im;
                   x10a_im = x10b_im + x11b_im;
                   x11a_im = x10b_im - x11b_im;
                   x15b_im = s4*x15c_im;
                   x14a_re = x14b_re + x15b_im;
                   x15a_re = x14b_re - x15b_im;
                   x15b_re = s4*x15c_re;
                   x14a_im = x14b_im - x15b_re;
                   x15a_im = x14b_im + x15b_re;

                   y[6*ospan].re  = x10a_re - x14a_re;
                   y[6*ospan].im  = x10a_im - x14a_im;
                   y[26*ospan].re = x10a_re + x14a_re;
                   y[26*ospan].im = x10a_im + x14a_im;
                   y[10*ospan].re = x11a_re + x15a_re;
                   y[10*ospan].im = x11a_im + x15a_im;
                   y[22*ospan].re = x11a_re - x15a_re;
                   y[22*ospan].im = x11a_im - x15a_im;

                   x17g_re = x17z_re - x31z_re;
                   x31g_re = x17z_re + x31z_re;
                   x17g_im = x17z_im - x31z_im;
                   x31g_im = x17z_im + x31z_im;
                   x21g_re = x21z_re - x27z_re;
                   x27g_re = x21z_re + x27z_re;
                   x21g_im = x21z_im - x27z_im;
                   x27g_im = x21z_im + x27z_im;
                   x23g_re = x23z_re - x25z_re;
                   x25g_re = x23z_re + x25z_re;
                   x23g_im = x23z_im - x25z_im;
                   x25g_im = x23z_im + x25z_im;
                   x17f_re = x17g_re - x23g_re;
                   x21f_re = s2*x21g_re;
                   x17e_re = x17f_re + x21f_re;
                   x21e_re = x17f_re - x21f_re;
                   x17f_im = x17g_im - x23g_im;
                   x21f_im = s2*x21g_im;
                   x17e_im = x17f_im + x21f_im;
                   x21e_im = x17f_im - x21f_im;

                   x18f_re = x18z_re - x30z_re;
                   x30f_re = x18z_re + x30z_re;
                   x18f_im = x18z_im - x30z_im;
                   x30f_im = x18z_im + x30z_im;
                   x22f_re = x22z_re - x26z_re;
                   x26f_re = x22z_re + x26z_re; 
                   x22f_im = x22z_im - x26z_im;
                   x26f_im = x22z_im + x26z_im;
                   x18e_re = x18f_re - x22f_re;
                   x22e_re = s2*x22f_re;
                   x18d_re = x18e_re + x22e_re;
                   x22d_re = x18e_re - x22e_re;
                   x18e_im = x18f_im - x22f_im;
                   x22e_im = s2*x22f_im;
                   x18d_im = x18e_im + x22e_im;
                   x22d_im = x18e_im - x22e_im;

                   x19g_im = x19z_im - x29z_im;
                   x29g_im = x19z_im + x29z_im;
                   x19g_re = x19z_re - x29z_re;
                   x29g_re = x19z_re + x29z_re;
                   x19f_re = x19g_re - x21g_re;
                   x23f_re = s2*x23g_re;
                   x19e_re = x19f_re + x23f_re;
                   x23e_re = x19f_re - x23f_re;
                   x19f_im = x19g_im - x21g_im;
                   x23f_im = s2*x23g_im;
                   x19e_im = x19f_im + x23f_im;
                   x23e_im = x19f_im - x23f_im;
                   x17d_re = x17e_re - x19e_re;
                   x19d_re = s5*x19e_re;
                   x17c_re = x17d_re + x19d_re;
                   x19c_re = x17d_re - x19d_re;
                   x17d_im = x17e_im - x19e_im;
                   x19d_im = s5*x19e_im;
                   x17c_im = x17d_im + x19d_im;
                   x19c_im = x17d_im - x19d_im;
                   x21d_re = x21e_re - x23e_re;
                   x23d_re = s6*x23e_re;
                   x21c_re = x21d_re + x23d_re;
                   x23c_re = x21d_re - x23d_re;
                   x21d_im = x21e_im - x23e_im;
                   x23d_im = s6*x23e_im;
                   x21c_im = x21d_im + x23d_im;
                   x23c_im = x21d_im - x23d_im;
                   x25f_re = x25g_re - x31g_re;
                   x29f_re = s2*x29g_re;
                   x25e_re = x25f_re + x29f_re;
                   x29e_re = x25f_re - x29f_re;
                   x25f_im = x25g_im - x31g_im;
                   x29f_im = s2*x29g_im;
                   x25e_im = x25f_im + x29f_im;
                   x29e_im = x25f_im - x29f_im;
                   x26e_re = x26f_re - x30f_re;
                   x30e_re = s2*x30f_re;
                   x26d_re = x26e_re + x30e_re;
                   x30d_re = x26e_re - x30e_re;
                   x26e_im = x26f_im - x30f_im;
                   x30e_im = s2*x30f_im;
                   x26d_im = x26e_im + x30e_im;
                   x30d_im = x26e_im - x30e_im;
                   x27f_re = x27g_re - x29g_re;
                   x31f_re = s2*x31g_re;
                   x27e_re = x27f_re + x31f_re;
                   x31e_re = x27f_re - x31f_re;
                   x27f_im = x27g_im - x29g_im;
                   x31f_im = s2*x31g_im;
                   x27e_im = x27f_im + x31f_im;
                   x31e_im = x27f_im - x31f_im;
                   x25d_re = x25e_re - x27e_re;
                   x27d_re = s5*x27e_re;
                   x27c_re = x25d_re - x27d_re;
                   x25c_re = x25d_re + x27d_re;
                   x25d_im = x25e_im - x27e_im;
                   x27d_im = s5*x27e_im;
                   x27c_im = x25d_im - x27d_im;
                   x25c_im = x25d_im + x27d_im;
                   x29d_re = x29e_re - x31e_re;
                   x31d_re = s6*x31e_re;
                   x29c_re = x29d_re + x31d_re;
                   x31c_re = x29d_re - x31d_re;
                   x29d_im = x29e_im - x31e_im;
                   x31d_im = s6*x31e_im;
                   x29c_im = x29d_im + x31d_im;
                   x31c_im = x29d_im - x31d_im;

                   x20e_re = x20z_re - x28z_re;
                   x28e_re = x20z_re + x28z_re;
                   x20e_im = x20z_im - x28z_im;
                   x28e_im = x20z_im + x28z_im;
                   x28d_im = s1*x28e_im;
                   x24c_re = x24z_im + x28d_im;
                   x28c_re = x24z_im - x28d_im;
                   x26c_im = s3*x26d_im;
                   x24b_re = x24c_re + x26c_im;
                   x26b_re = x24c_re - x26c_im;
                   x25b_im = s7*x25c_im;
                   x24a_re = x24b_re + x25b_im;
                   x25a_re = x24b_re - x25b_im;
                   x28d_re = s1*x28e_re;
                   x24c_im = x28d_re + x24z_re;
                   x28c_im = x28d_re - x24z_re;
                   x26c_re = s3*x26d_re;
                   x24b_im = x26c_re + x24c_im;
                   x26b_im = x26c_re - x24c_im;
                   x25b_re = s7*x25c_re;
                   x24a_im = x25b_re + x24b_im;
                   x25a_im = x25b_re - x24b_im;
                   x20d_re = s1*x20e_re;
                   x20c_re = x16z_re - x20d_re;
                   x16c_re = x16z_re + x20d_re;
                   x18c_re = s3*x18d_re;
                   x16b_re = x16c_re + x18c_re;
                   x18b_re = x16c_re - x18c_re;
                   x17b_re = s7*x17c_re;
                   x16a_re = x16b_re + x17b_re;
                   x17a_re = x16b_re - x17b_re;
                   x20d_im = s1*x20e_im;
                   x20c_im = x16z_im - x20d_im;
                   x16c_im = x16z_im + x20d_im;
                   x18c_im = s3*x18d_im;
                   x16b_im = x16c_im + x18c_im;
                   x18b_im = x16c_im - x18c_im;
                   x17b_im = s7*x17c_im;
                   x16a_im = x16b_im + x17b_im;
                   x17a_im = x16b_im - x17b_im;

                   y[1*ospan].re  = x16a_re + x24a_re;
                   y[1*ospan].im  = x16a_im - x24a_im;
                   y[31*ospan].re = x16a_re - x24a_re;
                   y[31*ospan].im = x16a_im + x24a_im;
                   y[15*ospan].re = x17a_re - x25a_re;
                   y[15*ospan].im = x17a_im - x25a_im;
                   y[17*ospan].re = x17a_re + x25a_re;
                   y[17*ospan].im = x17a_im + x25a_im;
               
                   x27b_im = s8*x27c_im;
                   x26a_re = x26b_re + x27b_im;
                   x27a_re = x26b_re - x27b_im;
                   x27b_re = s8*x27c_re;
                   x26a_im = x26b_im - x27b_re;
                   x27a_im = x26b_im + x27b_re;
                   x19b_re = s8*x19c_re;
                   x18a_re = x18b_re + x19b_re;
                   x19a_re = x18b_re - x19b_re;
                   x19b_im = s8*x19c_im;
                   x18a_im = x18b_im + x19b_im;
                   x19a_im = x18b_im - x19b_im;
               
                   y[7*ospan].re  = x18a_re - x26a_re;
                   y[7*ospan].im  = x18a_im - x26a_im;
                   y[25*ospan].re = x18a_re + x26a_re;
                   y[25*ospan].im = x18a_im + x26a_im;
                   y[9*ospan].re  = x19a_re + x27a_re;
                   y[9*ospan].im  = x19a_im + x27a_im;
                   y[23*ospan].re = x19a_re - x27a_re;
                   y[23*ospan].im = x19a_im - x27a_im;

                   x30c_im = s4*x30d_im;
                   x28b_re = x28c_re + x30c_im;
                   x30b_re = x28c_re - x30c_im;
                   x29b_im = s9*x29c_im;
                   x28a_re = x28b_re + x29b_im;
                   x29a_re = x28b_re - x29b_im;
                   x30c_re = s4*x30d_re;
                   x28b_im = x28c_im - x30c_re;
                   x30b_im = x28c_im + x30c_re;
                   x29b_re = s9*x29c_re;
                   x28a_im = x28b_im - x29b_re;
                   x29a_im = x28b_im + x29b_re;
                   x22c_re = s4*x22d_re;
                   x20b_re = x20c_re + x22c_re;
                   x22b_re = x20c_re - x22c_re;
                   x21b_re = s9*x21c_re;
                   x20a_re = x20b_re + x21b_re;
                   x21a_re = x20b_re - x21b_re;
                   x22c_im = s4*x22d_im;
                   x20b_im = x20c_im + x22c_im;
                   x22b_im = x20c_im - x22c_im;
                   x21b_im = s9*x21c_im;
                   x20a_im = x20b_im + x21b_im;
                   x21a_im = x20b_im - x21b_im;
               
                   y[3*ospan].re  = x20a_re - x28a_re;
                   y[3*ospan].im  = x20a_im - x28a_im;
                   y[29*ospan].re = x20a_re + x28a_re;
                   y[29*ospan].im = x20a_im + x28a_im;
                   y[13*ospan].re = x21a_re + x29a_re;
                   y[13*ospan].im = x21a_im + x29a_im;
                   y[19*ospan].re = x21a_re - x29a_re;
                   y[19*ospan].im = x21a_im - x29a_im;

                   x23b_re = s10*x23c_re;
                   x22a_re = x22b_re + x23b_re;
                   x23a_re = x22b_re - x23b_re;
                   x23b_im = s10*x23c_im;
                   x22a_im = x22b_im + x23b_im;
                   x23a_im = x22b_im - x23b_im;
                   x31b_im = s10*x31c_im;
                   x30a_re = x30b_re + x31b_im;
                   x31a_re = x30b_re - x31b_im;
                   x31b_re = s10*x31c_re;
                   x31a_im = x30b_im + x31b_re;
                   x30a_im = x30b_im - x31b_re;

                   y[5*ospan].re  = x22a_re + x30a_re;
                   y[5*ospan].im  = x22a_im + x30a_im;
                   y[27*ospan].re = x22a_re - x30a_re;
                   y[27*ospan].im = x22a_im - x30a_im;
                   y[11*ospan].re = x23a_re - x31a_re;
                   y[11*ospan].im = x23a_im - x31a_im;
                   y[21*ospan].re = x23a_re + x31a_re;
                   y[21*ospan].im = x23a_im + x31a_im;

                   if (rtnjump == 0)
                     goto RTN_RADIX32F_KERNEL_0;
                   else
                     goto RTN_RADIX32F_KERNEL_1;
    }

    {
     /* The fundamental inverse pass RADIX 32 kernel */  
        DO_RADIX32I_KERNEL:
                   x00z_re = t00_re + t16_re;
                   x16z_re = t00_re - t16_re;
                   x00z_im = t00_im + t16_im;
                   x16z_im = t00_im - t16_im;
                   x01z_re = t01_re + t17_re;
                   x17z_re = t01_re - t17_re;
                   x01z_im = t01_im + t17_im;
                   x17z_im = t01_im - t17_im;
                   x02z_re = t02_re + t18_re;
                   x18z_re = t02_re - t18_re;
                   x02z_im = t02_im + t18_im;
                   x18z_im = t02_im - t18_im;
                   x03z_re = t03_re + t19_re;
                   x19z_re = t03_re - t19_re;
                   x03z_im = t03_im + t19_im;
                   x19z_im = t03_im - t19_im;
                   x04z_re = t04_re + t20_re;
                   x20z_re = t04_re - t20_re;
                   x04z_im = t04_im + t20_im;
                   x20z_im = t04_im - t20_im;
                   x05z_re = t05_re + t21_re;
                   x21z_re = t05_re - t21_re;
                   x05z_im = t05_im + t21_im;
                   x21z_im = t05_im - t21_im;
                   x06z_re = t06_re + t22_re;
                   x22z_re = t06_re - t22_re;
                   x06z_im = t06_im + t22_im;
                   x22z_im = t06_im - t22_im;
                   x07z_re = t07_re + t23_re;
                   x23z_re = t07_re - t23_re;
                   x07z_im = t07_im + t23_im;
                   x23z_im = t07_im - t23_im;
                   x08z_re = t08_re + t24_re;
                   x24z_re = t08_re - t24_re;
                   x08z_im = t08_im + t24_im;
                   x24z_im = t08_im - t24_im;
                   x09z_re = t09_re + t25_re;
                   x25z_re = t09_re - t25_re;
                   x09z_im = t09_im + t25_im;
                   x25z_im = t09_im - t25_im;
                   x10z_re = t10_re + t26_re;
                   x26z_re = t10_re - t26_re;
                   x10z_im = t10_im + t26_im;
                   x26z_im = t10_im - t26_im;
                   x11z_re = t11_re + t27_re;
                   x27z_re = t11_re - t27_re;
                   x11z_im = t11_im + t27_im;
                   x27z_im = t11_im - t27_im;
                   x12z_re = t12_re + t28_re;
                   x28z_re = t12_re - t28_re;
                   x12z_im = t12_im + t28_im;
                   x28z_im = t12_im - t28_im;
                   x13z_re = t13_re + t29_re;
                   x29z_re = t13_re - t29_re;
                   x13z_im = t13_im + t29_im;
                   x29z_im = t13_im - t29_im;
                   x14z_re = t14_re + t30_re;
                   x30z_re = t14_re - t30_re;
                   x14z_im = t14_im + t30_im;
                   x30z_im = t14_im - t30_im;
                   x15z_re = t15_re + t31_re;
                   x31z_re = t15_re - t31_re;
                   x15z_im = t15_im + t31_im;
                   x31z_im = t15_im - t31_im;

                   x00c_re = x00z_re + x08z_re;
                   x08c_re = x00z_re - x08z_re;
                   x00c_im = x00z_im + x08z_im;
                   x08c_im = x00z_im - x08z_im;
                   x02c_re = x02z_re + x10z_re;
                   x10e_re = x02z_re - x10z_re;
                   x02c_im = x02z_im + x10z_im;
                   x10e_im = x02z_im - x10z_im;
                   x04c_re = x04z_re + x12z_re;
                   x12c_re = x04z_re - x12z_re;
                   x04c_im = x04z_im + x12z_im;
                   x12c_im = x04z_im - x12z_im;
                   x06c_re = x06z_re + x14z_re;
                   x14e_re = x06z_re - x14z_re;
                   x06c_im = x06z_im + x14z_im;
                   x14e_im = x06z_im - x14z_im;
                   x00b_re = x00c_re + x04c_re;
                   x04b_re = x00c_re - x04c_re;
                   x02b_re = x02c_re + x06c_re;
                   x06b_re = x02c_re - x06c_re;
                   x00a_re = x00b_re + x02b_re;
                   x02a_re = x00b_re - x02b_re;
                   x00b_im = x00c_im + x04c_im;
                   x04b_im = x00c_im - x04c_im;
                   x02b_im = x02c_im + x06c_im;
                   x06b_im = x02c_im - x06c_im;
                   x00a_im = x00b_im + x02b_im;
                   x02a_im = x00b_im - x02b_im;
                   x01c_re = x01z_re + x09z_re;
                   x09f_re = x01z_re - x09z_re;
                   x01c_im = x01z_im + x09z_im;
                   x09f_im = x01z_im - x09z_im;
                   x03c_re = x03z_re + x11z_re;
                   x11f_re = x03z_re - x11z_re;
                   x03c_im = x03z_im + x11z_im;
                   x11f_im = x03z_im - x11z_im;
                   x05e_re = x05z_re + x13z_re;
                   x13e_re = x05z_re - x13z_re;
                   x05e_im = x05z_im + x13z_im;
                   x13e_im = x05z_im - x13z_im;
                   x07e_re = x07z_re + x15z_re;
                   x15f_re = x07z_re - x15z_re;
                   x07e_im = x07z_im + x15z_im;
                   x15f_im = x07z_im - x15z_im;
                   x01b_re = x01c_re + x05e_re;
                   x05d_re = x01c_re - x05e_re;
                   x03b_re = x03c_re + x07e_re;
                   x07d_re = x03c_re - x07e_re;
                   x01a_re = x01b_re + x03b_re;
                   x03a_re = x01b_re - x03b_re;
                   x01b_im = x01c_im + x05e_im;
                   x05d_im = x01c_im - x05e_im;
                   x03b_im = x03c_im + x07e_im;
                   x07d_im = x03c_im - x07e_im;
                   x01a_im = x01b_im + x03b_im;
                   x03a_im = x01b_im - x03b_im;

                   y[0*ospan].re  = x00a_re + x01a_re;
                   y[0*ospan].im  = x00a_im + x01a_im;
                   y[16*ospan].re = x00a_re - x01a_re;
                   y[16*ospan].im = x00a_im - x01a_im;
                   y[8*ospan].re  = x02a_re - x03a_im;
                   y[8*ospan].im  = x02a_im + x03a_re;
                   y[24*ospan].re = x02a_re + x03a_im;
                   y[24*ospan].im = x02a_im - x03a_re;

                   x07c_re = x05d_re + x07d_re;
                   x05c_re = x05d_re - x07d_re;
                   x05b_re = s1*x05c_re;
                   x04a_re = x04b_re + x05b_re;
                   x05a_re = x04b_re - x05b_re;
                   x07b_re = s1*x07c_re;
                   x06a_im = x06b_re + x07b_re;
                   x07a_im = x06b_re - x07b_re;
                   x07c_im = x05d_im + x07d_im;
                   x05c_im = x05d_im - x07d_im;
                   x05b_im = s1*x05c_im;
                   x04a_im = x04b_im + x05b_im;
                   x05a_im = x04b_im - x05b_im;
                   x07b_im = s1*x07c_im;
                   x06a_re = x06b_im + x07b_im;
                   x07a_re = x06b_im - x07b_im;

                   y[4*ospan].re  = x04a_re - x06a_re;
                   y[4*ospan].im  = x04a_im + x06a_im;
                   y[28*ospan].re = x04a_re + x06a_re;
                   y[28*ospan].im = x04a_im - x06a_im;
                   y[12*ospan].re = x05a_re + x07a_re;
                   y[12*ospan].im = x05a_im - x07a_im;
                   y[20*ospan].re = x05a_re - x07a_re;
                   y[20*ospan].im = x05a_im + x07a_im;

                   x09e_re = x09f_re - x15f_re;
                   x15e_re = x09f_re + x15f_re;
                   x09e_im = x09f_im - x15f_im;
                   x15e_im = x09f_im + x15f_im;
                   x11e_re = x11f_re - x13e_re;
                   x13d_re = x11f_re + x13e_re;
                   x13d_re = x13d_re - x15e_re;
                   x15d_re = s2*x15e_re;
                   x13c_re = x13d_re + x15d_re;
                   x15c_re = x13d_re - x15d_re;
                   x11e_im = x11f_im - x13e_im;
                   x13d_im = x11f_im + x13e_im;
                   x13d_im = x13d_im - x15e_im;
                   x15d_im = s2*x15e_im;
                   x13c_im = x13d_im + x15d_im;
                   x15c_im = x13d_im - x15d_im;
                   x14d_re = x10e_re + x14e_re;
                   x10d_re = x10e_re - x14e_re;
                   x10c_re = s1*x10d_re;
                   x08b_re = x08c_re + x10c_re;
                   x10b_re = x08c_re - x10c_re;
                   x14c_re = s1*x14d_re;
                   x14d_im = x10e_im + x14e_im;
                   x10d_im = x10e_im - x14e_im;
                   x10c_im = s1*x10d_im;
                   x08b_im = x08c_im + x10c_im;
                   x10b_im = x08c_im - x10c_im;
                   x14c_im = s1*x14d_im;
                   x12b_im = -x12c_re - x14c_re;
                   x14b_im = -x12c_re + x14c_re;
                   x12b_re = x12c_im + x14c_im;
                   x14b_re = x12c_im - x14c_im;
                   x09d_re = x09e_re - x11e_re;
                   x11d_re = s2*x11e_re;
                   x09c_re = x09d_re + x11d_re;
                   x11c_re = x09d_re - x11d_re;
                   x09b_re = s3*x09c_re;
                   x08a_re = x08b_re + x09b_re;
                   x09a_re = x08b_re - x09b_re;
                   x09d_im = x09e_im - x11e_im;
                   x11d_im = s2*x11e_im;
                   x09c_im = x09d_im + x11d_im;
                   x11c_im = x09d_im - x11d_im;
                   x09b_im = s3*x09c_im;
                   x08a_im = x08b_im + x09b_im;
                   x09a_im = x08b_im - x09b_im;
                   x13b_im = s3*x13c_im;
                   x12a_re = x12b_re + x13b_im;
                   x13a_re = x12b_re - x13b_im;
                   x13b_re = s3*x13c_re;
                   x12a_im = x12b_im - x13b_re;
                   x13a_im = x12b_im + x13b_re;

                   y[2*ospan].re  = x08a_re - x12a_re;
                   y[2*ospan].im  = x08a_im - x12a_im;
                   y[30*ospan].re = x08a_re + x12a_re;
                   y[30*ospan].im = x08a_im + x12a_im;
                   y[14*ospan].re = x09a_re + x13a_re;
                   y[14*ospan].im = x09a_im + x13a_im;
                   y[18*ospan].re = x09a_re - x13a_re;
                   y[18*ospan].im = x09a_im - x13a_im;

                   x11b_re = s4*x11c_re;
                   x10a_re = x10b_re + x11b_re;
                   x11a_re = x10b_re - x11b_re;
                   x11b_im = s4*x11c_im;
                   x10a_im = x10b_im + x11b_im;
                   x11a_im = x10b_im - x11b_im;
                   x15b_im = s4*x15c_im;
                   x14a_re = x14b_re + x15b_im;
                   x15a_re = x14b_re - x15b_im;
                   x15b_re = s4*x15c_re;
                   x14a_im = x14b_im - x15b_re;
                   x15a_im = x14b_im + x15b_re;

                   y[6*ospan].re  = x10a_re + x14a_re;
                   y[6*ospan].im  = x10a_im + x14a_im;
                   y[26*ospan].re = x10a_re - x14a_re;
                   y[26*ospan].im = x10a_im - x14a_im;
                   y[10*ospan].re = x11a_re - x15a_re;
                   y[10*ospan].im = x11a_im - x15a_im;
                   y[22*ospan].re = x11a_re + x15a_re;
                   y[22*ospan].im = x11a_im + x15a_im;

                   x17g_re = x17z_re - x31z_re;
                   x31g_re = x17z_re + x31z_re;
                   x17g_im = x17z_im - x31z_im;
                   x31g_im = x17z_im + x31z_im;
                   x21g_re = x21z_re - x27z_re;
                   x27g_re = x21z_re + x27z_re;
                   x21g_im = x21z_im - x27z_im;
                   x27g_im = x21z_im + x27z_im;
                   x23g_re = x23z_re - x25z_re;
                   x25g_re = x23z_re + x25z_re;
                   x23g_im = x23z_im - x25z_im;
                   x25g_im = x23z_im + x25z_im;
                   x17f_re = x17g_re - x23g_re;
                   x21f_re = s2*x21g_re;
                   x17e_re = x17f_re + x21f_re;
                   x21e_re = x17f_re - x21f_re;
                   x17f_im = x17g_im - x23g_im;
                   x21f_im = s2*x21g_im;
                   x17e_im = x17f_im + x21f_im;
                   x21e_im = x17f_im - x21f_im;

                   x18f_re = x18z_re - x30z_re;
                   x30f_re = x18z_re + x30z_re;
                   x18f_im = x18z_im - x30z_im;
                   x30f_im = x18z_im + x30z_im;
                   x22f_re = x22z_re - x26z_re;
                   x26f_re = x22z_re + x26z_re; 
                   x22f_im = x22z_im - x26z_im;
                   x26f_im = x22z_im + x26z_im;
                   x18e_re = x18f_re - x22f_re;
                   x22e_re = s2*x22f_re;
                   x18d_re = x18e_re + x22e_re;
                   x22d_re = x18e_re - x22e_re;
                   x18e_im = x18f_im - x22f_im;
                   x22e_im = s2*x22f_im;
                   x18d_im = x18e_im + x22e_im;
                   x22d_im = x18e_im - x22e_im;

                   x19g_im = x19z_im - x29z_im;
                   x29g_im = x19z_im + x29z_im;
                   x19g_re = x19z_re - x29z_re;
                   x29g_re = x19z_re + x29z_re;
                   x19f_re = x19g_re - x21g_re;
                   x23f_re = s2*x23g_re;
                   x19e_re = x19f_re + x23f_re;
                   x23e_re = x19f_re - x23f_re;
                   x19f_im = x19g_im - x21g_im;
                   x23f_im = s2*x23g_im;
                   x19e_im = x19f_im + x23f_im;
                   x23e_im = x19f_im - x23f_im;
                   x17d_re = x17e_re - x19e_re;
                   x19d_re = s5*x19e_re;
                   x17c_re = x17d_re + x19d_re;
                   x19c_re = x17d_re - x19d_re;
                   x17d_im = x17e_im - x19e_im;
                   x19d_im = s5*x19e_im;
                   x17c_im = x17d_im + x19d_im;
                   x19c_im = x17d_im - x19d_im;
                   x21d_re = x21e_re - x23e_re;
                   x23d_re = s6*x23e_re;
                   x21c_re = x21d_re + x23d_re;
                   x23c_re = x21d_re - x23d_re;
                   x21d_im = x21e_im - x23e_im;
                   x23d_im = s6*x23e_im;
                   x21c_im = x21d_im + x23d_im;
                   x23c_im = x21d_im - x23d_im;
                   x25f_re = x25g_re - x31g_re;
                   x29f_re = s2*x29g_re;
                   x25e_re = x25f_re + x29f_re;
                   x29e_re = x25f_re - x29f_re;
                   x25f_im = x25g_im - x31g_im;
                   x29f_im = s2*x29g_im;
                   x25e_im = x25f_im + x29f_im;
                   x29e_im = x25f_im - x29f_im;
                   x26e_re = x26f_re - x30f_re;
                   x30e_re = s2*x30f_re;
                   x26d_re = x26e_re + x30e_re;
                   x30d_re = x26e_re - x30e_re;
                   x26e_im = x26f_im - x30f_im;
                   x30e_im = s2*x30f_im;
                   x26d_im = x26e_im + x30e_im;
                   x30d_im = x26e_im - x30e_im;
                   x27f_re = x27g_re - x29g_re;
                   x31f_re = s2*x31g_re;
                   x27e_re = x27f_re + x31f_re;
                   x31e_re = x27f_re - x31f_re;
                   x27f_im = x27g_im - x29g_im;
                   x31f_im = s2*x31g_im;
                   x27e_im = x27f_im + x31f_im;
                   x31e_im = x27f_im - x31f_im;
                   x25d_re = x25e_re - x27e_re;
                   x27d_re = s5*x27e_re;
                   x27c_re = x25d_re - x27d_re;
                   x25c_re = x25d_re + x27d_re;
                   x25d_im = x25e_im - x27e_im;
                   x27d_im = s5*x27e_im;
                   x27c_im = x25d_im - x27d_im;
                   x25c_im = x25d_im + x27d_im;
                   x29d_re = x29e_re - x31e_re;
                   x31d_re = s6*x31e_re;
                   x29c_re = x29d_re + x31d_re;
                   x31c_re = x29d_re - x31d_re;
                   x29d_im = x29e_im - x31e_im;
                   x31d_im = s6*x31e_im;
                   x29c_im = x29d_im + x31d_im;
                   x31c_im = x29d_im - x31d_im;

                   x20e_re = x20z_re - x28z_re;
                   x28e_re = x20z_re + x28z_re;
                   x20e_im = x20z_im - x28z_im;
                   x28e_im = x20z_im + x28z_im;
                   x28d_im = s1*x28e_im;
                   x24c_re = x24z_im + x28d_im;
                   x28c_re = x24z_im - x28d_im;
                   x26c_im = s3*x26d_im;
                   x24b_re = x24c_re + x26c_im;
                   x26b_re = x24c_re - x26c_im;
                   x25b_im = s7*x25c_im;
                   x24a_re = x24b_re + x25b_im;
                   x25a_re = x24b_re - x25b_im;
                   x28d_re = s1*x28e_re;
                   x24c_im = x28d_re + x24z_re;
                   x28c_im = x28d_re - x24z_re;
                   x26c_re = s3*x26d_re;
                   x24b_im = x26c_re + x24c_im;
                   x26b_im = x26c_re - x24c_im;
                   x25b_re = s7*x25c_re;
                   x24a_im = x25b_re + x24b_im;
                   x25a_im = x25b_re - x24b_im;
                   x20d_re = s1*x20e_re;
                   x20c_re = x16z_re - x20d_re;
                   x16c_re = x16z_re + x20d_re;
                   x18c_re = s3*x18d_re;
                   x16b_re = x16c_re + x18c_re;
                   x18b_re = x16c_re - x18c_re;
                   x17b_re = s7*x17c_re;
                   x16a_re = x16b_re + x17b_re;
                   x17a_re = x16b_re - x17b_re;
                   x20d_im = s1*x20e_im;
                   x20c_im = x16z_im - x20d_im;
                   x16c_im = x16z_im + x20d_im;
                   x18c_im = s3*x18d_im;
                   x16b_im = x16c_im + x18c_im;
                   x18b_im = x16c_im - x18c_im;
                   x17b_im = s7*x17c_im;
                   x16a_im = x16b_im + x17b_im;
                   x17a_im = x16b_im - x17b_im;

                   y[1*ospan].re  = x16a_re - x24a_re;
                   y[1*ospan].im  = x16a_im + x24a_im;
                   y[31*ospan].re = x16a_re + x24a_re;
                   y[31*ospan].im = x16a_im - x24a_im;
                   y[17*ospan].re = x17a_re - x25a_re;
                   y[17*ospan].im = x17a_im - x25a_im;
                   y[15*ospan].re = x17a_re + x25a_re;
                   y[15*ospan].im = x17a_im + x25a_im;
               
                   x27b_im = s8*x27c_im;
                   x26a_re = x26b_re + x27b_im;
                   x27a_re = x26b_re - x27b_im;
                   x27b_re = s8*x27c_re;
                   x26a_im = x26b_im - x27b_re;
                   x27a_im = x26b_im + x27b_re;
                   x19b_re = s8*x19c_re;
                   x18a_re = x18b_re + x19b_re;
                   x19a_re = x18b_re - x19b_re;
                   x19b_im = s8*x19c_im;
                   x18a_im = x18b_im + x19b_im;
                   x19a_im = x18b_im - x19b_im;
               
                   y[7*ospan].re  = x18a_re + x26a_re;
                   y[7*ospan].im  = x18a_im + x26a_im;
                   y[25*ospan].re = x18a_re - x26a_re;
                   y[25*ospan].im = x18a_im - x26a_im;
                   y[9*ospan].re  = x19a_re - x27a_re;
                   y[9*ospan].im  = x19a_im - x27a_im;
                   y[23*ospan].re = x19a_re + x27a_re;
                   y[23*ospan].im = x19a_im + x27a_im;

                   x30c_im = s4*x30d_im;
                   x28b_re = x28c_re + x30c_im;
                   x30b_re = x28c_re - x30c_im;
                   x29b_im = s9*x29c_im;
                   x28a_re = x28b_re + x29b_im;
                   x29a_re = x28b_re - x29b_im;
                   x30c_re = s4*x30d_re;
                   x28b_im = x28c_im - x30c_re;
                   x30b_im = x28c_im + x30c_re;
                   x29b_re = s9*x29c_re;
                   x28a_im = x28b_im - x29b_re;
                   x29a_im = x28b_im + x29b_re;
                   x22c_re = s4*x22d_re;
                   x20b_re = x20c_re + x22c_re;
                   x22b_re = x20c_re - x22c_re;
                   x21b_re = s9*x21c_re;
                   x20a_re = x20b_re + x21b_re;
                   x21a_re = x20b_re - x21b_re;
                   x22c_im = s4*x22d_im;
                   x20b_im = x20c_im + x22c_im;
                   x22b_im = x20c_im - x22c_im;
                   x21b_im = s9*x21c_im;
                   x20a_im = x20b_im + x21b_im;
                   x21a_im = x20b_im - x21b_im;
               
                   y[3*ospan].re  = x20a_re + x28a_re;
                   y[3*ospan].im  = x20a_im + x28a_im;
                   y[29*ospan].re = x20a_re - x28a_re;
                   y[29*ospan].im = x20a_im - x28a_im;
                   y[13*ospan].re = x21a_re - x29a_re;
                   y[13*ospan].im = x21a_im - x29a_im;
                   y[19*ospan].re = x21a_re + x29a_re;
                   y[19*ospan].im = x21a_im + x29a_im;

                   x23b_re = s10*x23c_re;
                   x22a_re = x22b_re + x23b_re;
                   x23a_re = x22b_re - x23b_re;
                   x23b_im = s10*x23c_im;
                   x22a_im = x22b_im + x23b_im;
                   x23a_im = x22b_im - x23b_im;
                   x31b_im = s10*x31c_im;
                   x30a_re = x30b_re + x31b_im;
                   x31a_re = x30b_re - x31b_im;
                   x31b_re = s10*x31c_re;
                   x31a_im = x30b_im + x31b_re;
                   x30a_im = x30b_im - x31b_re;

                   y[5*ospan].re  = x22a_re - x30a_re;
                   y[5*ospan].im  = x22a_im - x30a_im;
                   y[27*ospan].re = x22a_re + x30a_re;
                   y[27*ospan].im = x22a_im + x30a_im;
                   y[11*ospan].re = x23a_re + x31a_re;
                   y[11*ospan].im = x23a_im + x31a_im;
                   y[21*ospan].re = x23a_re - x31a_re;
                   y[21*ospan].im = x23a_im - x31a_im;

                   if (rtnjump == 0)
                     goto RTN_RADIX32I_KERNEL_0;
                   else
                     goto RTN_RADIX32I_KERNEL_1;
    }

    RADIX32_RETURN:

    return;
}

/*-----------------------------------------------------------------------*/

void radixpass_generic(const fft_real *a, fft_real *c, const fft_real *s, 
                       fft_int n, fft_int la, fft_int dirflag,
		       fft_int radixsize, fft_cmplx *t, const fft_cmplx *wn)
{
/* Local variables */
fft_int k,l,m;                     
fft_int istep,ispan,ospan,tspan;
fft_int irow,icol,idxw;
fft_real wxx_re, wxx_im;
fft_real xxx_re, xxx_im;
fft_real txx_re, txx_im;
fft_real yxx_re, yxx_im;
fft_cmplx *t_tmp, *wn_tmp, *x_tmp;
fft_cmplx *x, *y, *w;

 /* Verify parameters */
    if(n < radixsize || la < 1) return;     
  
 /* Define local arrays */
    x = (fft_cmplx *) a;                /* Input data    */
    y = (fft_cmplx *) c;                /* Output data   */
    w = (fft_cmplx *) s;                /* Twiddle table */

 /* Initialize pointers */
    ospan = n/radixsize;
    ispan = la;
    m     = (ospan/ispan) - 1;
    istep = ispan*(radixsize-1);
    tspan = 0;

    switch (dirflag) {
      case FFT_FLAGS_FORWARD:                            /* Forward Pass */
        for(k=0; k<=m; k++) { 
	    tspan = k*ispan;
            for(l=1; l<=ispan; l++) {
                w     = (fft_cmplx *) s; 
                x_tmp = (fft_cmplx *) x; 
                t[0].re = x[0].re;
                t[0].im = x[0].im;
	        for(irow=1; irow<radixsize; irow++) {
                    wxx_re = w[0].re;
                    wxx_im = w[0].im;
                    w     += tspan;
                    x_tmp += ispan;
                    xxx_re = x_tmp[0].re;
                    xxx_im = x_tmp[0].im;
                    t[irow].re = wxx_re*xxx_re + wxx_im*xxx_im;
	            t[irow].im = wxx_re*xxx_im - wxx_im*xxx_re;
		}
                idxw = 0;    
	        for(irow=0; irow<radixsize; irow++) {
	            yxx_re = 0.0;
	            yxx_im = 0.0;
                    t_tmp  = (fft_cmplx *) t;
                    wn_tmp = (fft_cmplx *) wn;
                    wn_tmp += idxw;
	            for(icol=0; icol<radixsize; icol++, wn_tmp++, t_tmp++) {
                        wxx_re = wn_tmp[0].re; wxx_im = wn_tmp[0].im; 
                        txx_re = t_tmp[0].re;  txx_im = t_tmp[0].im; 
                        yxx_re += wxx_re*txx_re - wxx_im*txx_im;
                        yxx_im += wxx_re*txx_im + wxx_im*txx_re;
		    }
	            y[ospan*irow].re = yxx_re;
	            y[ospan*irow].im = yxx_im;
		    idxw += radixsize;
		}
                x ++;    
                y ++;     
            }
            x += istep;    
            w += ispan;
        }
        break;

      default:                                           /* Inverse Pass */
        for(k=0; k<=m; k++) { 
	    tspan = k*ispan;
            for(l=1; l<=ispan; l++) {
                w     = (fft_cmplx *) s; 
                x_tmp = (fft_cmplx *) x; 
                t[0].re = x[0].re;
                t[0].im = x[0].im;
	        for(irow=1; irow<radixsize; irow++) {
                    wxx_re = w[0].re;
                    wxx_im = w[0].im;
                    w     += tspan;
                    x_tmp += ispan;
                    xxx_re = x_tmp[0].re;
                    xxx_im = x_tmp[0].im;
                    t[irow].re = wxx_re*xxx_re - wxx_im*xxx_im;
	            t[irow].im = wxx_re*xxx_im + wxx_im*xxx_re;
		}
                idxw = 0;    
	        for(irow=0; irow<radixsize; irow++) {
	            yxx_re = 0.0;
	            yxx_im = 0.0;
                    t_tmp  = (fft_cmplx *) t;
                    wn_tmp = (fft_cmplx *) wn;
                    wn_tmp += idxw;
	            for(icol=0; icol<radixsize; icol++, wn_tmp++, t_tmp++) {
                        wxx_re = wn_tmp[0].re; wxx_im = wn_tmp[0].im; 
                        txx_re = t_tmp[0].re;  txx_im = t_tmp[0].im; 
                        yxx_re += wxx_re*txx_re + wxx_im*txx_im;
                        yxx_im += wxx_re*txx_im - wxx_im*txx_re;
		    }
	            y[ospan*irow].re = yxx_re;
	            y[ospan*irow].im = yxx_im;
		    idxw += radixsize;
		}
                x ++;    
                y ++;     
            }
            x += istep;    
            w += ispan;
        }
        break;
    }

    return;
}


/*
-----------------------------------------------------------------------
  FUNCTION: 
    fft_init()          

  SYNOPSIS: 
    void fft_init (FFT_Plan *plan, fft_int fftsize, fft_int fftflags)

  ARGUMENTS:
    FFT_Plan *plan   --- output execution plan (structure) for fft_work()  
    fft_int fftsize  --- desired real/complex FFT size
    fft_int fftflags --- flag options to incorporate in plan

  PURPOSE:
    Creates and validates an FFT plan for subsequent execution
    by the function fft_work().

  REVISIONS:
    05/05/00 	rdh 	baseline
-----------------------------------------------------------------------
*/

void fft_init (FFT_Plan *plan, fft_int fftsize, fft_int fftflags)
{
 /* Local variables */
    fft_int i,ii;
    fft_int nsize, lclflags, typeflag;
    fft_int mradixid[FFT_RADIX_MAX+1];
    fft_int mradixcnt[FFT_RADIX_MAX+1];

 /* Initialize plan */
    plan->flags     = FFT_FLAGS_NULL;
    plan->validflag = FFT_FLAGS_NULL;
    plan->scale     = FFT_FLAGS_NULL;
    plan->size      = (fftsize > 1) ? (fftsize) : 1;
    plan->type      = FFT_FLAGS_COMPLEX;
    plan->direction = FFT_FLAGS_FORWARD;
    plan->packing   = FFT_FLAGS_PACKED;
    plan->radixevenodd = 0;                 
    plan->radixgeneric = 0;
    plan->radixcount   = 0;
    plan->rdxgen_t     = NULL;
    plan->rdxgen_w     = NULL;
    plan->twidtable    = NULL;
    plan->workbuf      = NULL;
    for (i=0; i<(FFT_RADIX_MAX+1); i++) {
      plan->radixpassid[i]  = 0;
      plan->radixpasscnt[i] = 0;
    }

 /* Create plan based on flags */
    plan->flags = fftflags;
    lclflags    = fftflags;
    if ((lclflags & FFT_FLAGS_REAL) == FFT_FLAGS_REAL)
      plan->type = FFT_FLAGS_REAL;
      
    if ((lclflags & FFT_FLAGS_COMPLEX) == FFT_FLAGS_COMPLEX)
      plan->type = FFT_FLAGS_COMPLEX;
      
    if ((lclflags & FFT_FLAGS_FORWARD) == FFT_FLAGS_FORWARD)
      plan->direction = FFT_FLAGS_FORWARD;
      
    if ((lclflags & FFT_FLAGS_INVERSE) == FFT_FLAGS_INVERSE)
      plan->direction = FFT_FLAGS_INVERSE;

    if ((lclflags & FFT_FLAGS_PACKED) == FFT_FLAGS_PACKED)
      plan->packing = FFT_FLAGS_PACKED;
      
    if ((lclflags & FFT_FLAGS_UNPACKED) == FFT_FLAGS_UNPACKED)
      plan->packing = FFT_FLAGS_UNPACKED;
      
    if ((lclflags & FFT_FLAGS_NONYQUIST) == FFT_FLAGS_NONYQUIST)
      plan->packing = FFT_FLAGS_NONYQUIST;

    if ((lclflags & FFT_FLAGS_SCALE) == FFT_FLAGS_SCALE)    
      plan->scale = FFT_FLAGS_SCALE;                   

 /* Check for invalid combinations of flags */
    if (plan->type == FFT_FLAGS_COMPLEX) plan->packing = FFT_FLAGS_PACKED;

 /* Attempt to factor the plan into available radix routines */
    nsize = (fftsize > 0) ? (fftsize) : 0;
    if (plan->type == FFT_FLAGS_REAL) {
      if (fft_checkradix(nsize,FFT_FLAGS_REAL) >= 0) {
        nsize /= 2;
        plan->size = nsize;
        fft_genradix(nsize,plan);                
        plan->validflag = FFT_FLAGS_VALIDPLAN;
      }
      else {
        nsize = 0;
        plan->size = nsize;
        plan->validflag = FFT_FLAGS_NULL;
      }
    }
    else {
      if (fft_checkradix(nsize,FFT_FLAGS_COMPLEX) >= 0) {
        nsize = fftsize;
        plan->size = nsize;
        fft_genradix(nsize,plan);                   
        plan->validflag = FFT_FLAGS_VALIDPLAN;
      }
      else {
        nsize = 0;
        plan->size = nsize;
        plan->validflag = FFT_FLAGS_NULL;
      }
    }

    if (plan->validflag == FFT_FLAGS_VALIDPLAN) {
      for (i=0; i<(FFT_RADIX_MAX+1); i++){
        mradixid[i]  = plan->radixpassid[i];
	mradixcnt[i] = plan->radixpasscnt[i];
      }
      for (i=0; i<(FFT_RADIX_MAX+1); i++){
        plan->radixpassid[i]  = 0;           
	plan->radixpasscnt[i] = 0;
      }
      ii = 0;
      plan->radixcount = 0;
      for (i=0; i<(FFT_RADIX_MAX+1); i++){
        if (mradixcnt[i] != 0) {
	  plan->radixcount       = plan->radixcount + 1;
          plan->radixpassid[ii]  = mradixid[i];
	  plan->radixpasscnt[ii] = mradixcnt[i];
          plan->radixevenodd     += mradixcnt[i];                     
	  ii ++;
	}
      }
      if (plan->radixgeneric != 0) plan->radixevenodd += 1;
      plan->radixevenodd = intmod(plan->radixevenodd,2);
    }
    else {
      plan->radixgeneric = 0;
      plan->radixcount   = 0;
      for (i=0; i<(FFT_RADIX_MAX+1); i++){
        plan->radixpassid[i]  = 0;           
        plan->radixpasscnt[i] = 0;             
      }
    }

 /* Allocate twiddle factor table and working buffer */
    if (plan->validflag == FFT_FLAGS_VALIDPLAN) {
      nsize = (plan->type == FFT_FLAGS_COMPLEX) ? plan->size : 2*(plan->size);
      plan->twidtable = (fft_real *) malloc((nsize+4)*sizeof(fft_cmplx));
      plan->workbuf   = (fft_real *) malloc((nsize+4)*sizeof(fft_cmplx));
      if ((plan->twidtable == NULL)||(plan->workbuf == NULL)) {
        if (plan->twidtable != NULL) free(plan->twidtable);
        if (plan->workbuf != NULL)   free(plan->workbuf);
	plan->validflag = FFT_FLAGS_OUTOFMEM;
      }
      else {
        typeflag = plan->type;
        fft_gen_twiddle(plan->twidtable,nsize,typeflag);
      }
    }

 /* Allocate DFT matrix and t-vector buffers if a generic pass is required */
    if ((plan->validflag == FFT_FLAGS_VALIDPLAN)&&(plan->radixgeneric > 0)) {
      nsize = plan->radixgeneric;
      plan->rdxgen_t = (fft_cmplx *) malloc(nsize*sizeof(fft_cmplx));
      plan->rdxgen_w = (fft_cmplx *) malloc(nsize*nsize*sizeof(fft_cmplx));
      if ((plan->rdxgen_t != NULL)&&(plan->rdxgen_w != NULL)) {
        fft_generic_matrix(nsize,plan->rdxgen_w);
      }
      else {
        plan->validflag = FFT_FLAGS_OUTOFMEM;
	plan->radixgeneric = 0;
	plan->radixcount   = 0;
        for (i=0; i<(FFT_RADIX_MAX+1); i++){
          plan->radixpassid[i]  = 0;           
          plan->radixpasscnt[i] = 0;             
        }
	if (plan->rdxgen_t  != NULL) free(plan->rdxgen_t);
	if (plan->rdxgen_w  != NULL) free(plan->rdxgen_w);
        if (plan->twidtable != NULL) free(plan->twidtable);
        if (plan->workbuf   != NULL) free(plan->workbuf);
      }
    }

    return;
}


/*
-----------------------------------------------------------------------
  FUNCTION: 
    fft_free()          

  SYNOPSIS: 
    void fft_free (FFT_Plan *plan)

  ARGUMENTS:
    FFT_Plan *plan --- input FFT plan for execution by fft_work()

  PURPOSE:
    Destroys contents of a plan created by fft_init() and frees all
    memory allocated for twiddle tables and working buffers.

  REVISIONS:
    05/05/00 	rdh	baseline
-----------------------------------------------------------------------
*/

void fft_free (FFT_Plan *plan)
{
 /* Local variables */
    fft_int i;

 /* Free any memory we malloc'd */
    if (plan->rdxgen_t  != NULL) free(plan->rdxgen_t);
    if (plan->rdxgen_w  != NULL) free(plan->rdxgen_w);
    if (plan->twidtable != NULL) free(plan->twidtable);
    if (plan->workbuf   != NULL) free(plan->workbuf);
 
 /* Mark plan as invalid and initialize */
    plan->flags     = FFT_FLAGS_NULL;
    plan->validflag = FFT_FLAGS_NULL;
    plan->scale     = FFT_FLAGS_NULL;
    plan->size      = 0;
    plan->type      = FFT_FLAGS_COMPLEX;
    plan->direction = FFT_FLAGS_FORWARD;
    plan->packing   = FFT_FLAGS_PACKED;
    plan->radixgeneric = 0;
    plan->radixcount   = 0;
    plan->rdxgen_t     = NULL;
    plan->rdxgen_w     = NULL;
    plan->twidtable    = NULL;
    plan->workbuf      = NULL;
    for (i=0; i<(FFT_RADIX_MAX+1); i++) {
      plan->radixpassid[i]  = 0;
      plan->radixpasscnt[i] = 0;
    }

    return;
}


/*
-----------------------------------------------------------------------
  FUNCTION: 
    fft_func_scale()  

  SYNOPSIS: 
    void fft_func_scale (fft_cmplx *cin, fft_int n, fft_int dirflag,
                         fft_int typeflag, fft_int packflag)

  ARGUMENTS:
    fft_cmplx *cin   --- complex input/output vector treated as real array
    fft_int n        --- FFT size (in real samples)    
    fft_int dirflag  --- forward/inverse FFT flag 
    fft_int typeflag --- COMPLEX/REAL FFT type    
    fft_int packflag --- type of real FFT bin packing/unpacking

  PURPOSE:
    Scales FFT/IFFT data to produce a numerically correct result.
    Without a call to this function, some FFT/IFFT options will
    produce a result which is incorrect by a constant scale factor.

  REVISIONS:
    02/12/00 	rdh	baseline
-----------------------------------------------------------------------
*/

void fft_func_scale (fft_cmplx *cin, fft_int n, fft_int dirflag,   
                     fft_int typeflag, fft_int packflag)
{
    return;
}


/*
-----------------------------------------------------------------------
  FUNCTION: 
    fft_pack()        

  SYNOPSIS: 
    void fft_pack (fft_cmplx *ain, const fft_real *t, fft_int n, 
                   fft_int dirflag, fft_int packflag)

  ARGUMENTS:
    fft_cmplx *ain   --- complex input/output vector treated as real array
    fft_real  *t     --- complex twiddle factor vector treated as real array
    fft_int n        --- FFT size (in real samples)    
    fft_int dirflag  --- forward/inverse FFT flag 
    fft_int packflag --- type of real FFT bin packing/unpacking

  PURPOSE:
    Packs and unpacks input data to allow fft_work() to perform a
    complex FFT/IFFT on real input/output data

  REVISIONS:
    05/01/00 	rdh	baseline
-----------------------------------------------------------------------
*/

void fft_pack (fft_cmplx *ain, const fft_real *t, fft_int n, 
               fft_int dirflag, fft_int packflag)
{
 /* Local Variables */
    fft_int i,nd2;
    fft_real wr,wi,xr,xi,yr,yi,tr,ti;       
    fft_real xrpyr,xrmyr,xipyi,ximyi;
    fft_cmplx *x, *y, *w;

    if(n < 1) return;              

 /* Verify parameters */
    nd2 = (fft_int) n/2;

 /* Perform a Forward Pack Pass */
    switch (dirflag) {
      case FFT_FLAGS_FORWARD:
        w  = (fft_cmplx *) t;
        w += (2*n-1);
        y  = (fft_cmplx *) ain;
        y += (n-1);
        x  = (fft_cmplx *) ain;
        tr = x[0].re;
        ti = x[0].im;
        xr = tr + ti;
        xi = tr - ti;
        x[0].re = ((fft_real) 2.0)*xr;   
        x[0].im = ((fft_real) 2.0)*xi;     
        x ++;

        for(i=1; i<=nd2; i++, x++, y--, w--) {
            xr = x[0].re;     yr = y[0].re;  
            xrpyr = xr + yr;
            xrmyr = xr - yr;
            xi = x[0].im;     yi = y[0].im;
            xipyi = xi + yi;
            ximyi = xi - yi;
            wr = w[0].re;     wi = w[0].im;
            tr = wr*xipyi + wi*xrmyr;
            ti = wi*xipyi - wr*xrmyr;
            xr = xrpyr + tr;
            yr = xrpyr - tr;
            xi = ti + ximyi;
            yi = ti - ximyi;
            x[0].re = xr;        
            x[0].im = xi;       
            y[0].re = yr;         
            y[0].im = yi;         
	}
	/* Unpack the spectral data */
	switch (packflag) {
          case FFT_FLAGS_UNPACKED:
	    ain[n].re = ain[0].im;
	    ain[n].im = 0.0;
	    ain[0].im = 0.0;
	    break;
	  
	  case FFT_FLAGS_NONYQUIST:
	    ain[0].im = 0.0;
	    break;
	}
        break;
      
 /* Perform an Inverse Unpack Pass */
      case FFT_FLAGS_INVERSE:
	/* Data to be inverse FFT'd must be re-packed */
	switch (packflag) {
          case FFT_FLAGS_UNPACKED:
	    ain[0].im = ain[n].re;
	    ain[n].re = 0.0;
	    ain[n].im = 0.0;
	    break;
	}

        w  = (fft_cmplx *) t;
        w += (n+1);
        y  = (fft_cmplx *) ain;
        y += (n-1);
        x  = (fft_cmplx *) ain;
        tr = x[0].re;
        ti = x[0].im;
        x[0].re = tr + ti;
        x[0].im = tr - ti;
        x ++;

        for(i=1; i<=nd2; i++, x++, y--, w++) {
            xr = x[0].re;     yr = y[0].re;       
            xrpyr = xr + yr;
            xrmyr = xr - yr;
            xi = x[0].im;     yi = y[0].im;       
            xipyi = xi + yi;
            ximyi = xi - yi;
            wr = w[0].re;     wi = w[0].im;
            tr = wr*xipyi + wi*xrmyr;
            ti = wi*xipyi - wr*xrmyr;
            xr = xrpyr + tr;
            yr = xrpyr - tr;
            xi = ti + ximyi;
            yi = ti - ximyi;
            x[0].re = xr;              
            x[0].im = xi;              
            y[0].re = yr;              
            y[0].im = yi;              
	}
        break;
    }

    return;
}


/*
-----------------------------------------------------------------------
  FUNCTION: 
    fft_work()          

  SYNOPSIS: 
    void fft_work(fft_cmplx *a, FFT_Plan *plan)

  ARGUMENTS:
    fft_cmplx *a   --- input/output vector of data to perform FFT/IFFT on
    FFT_Plan *plan --- validated FFT plan to execute

  PURPOSE:
    Executes an FFT plan to perform either a real or complex FFT on the
    specified data buffer

  REVISIONS:
    05/05/00 	rdh	baseline
    09/12/01 	rdh	modified to use "radixevenodd" to eliminate "memcpy"
-----------------------------------------------------------------------
*/

void fft_work(fft_cmplx *a, FFT_Plan *plan)
{
/* Local variables */
   fft_int i, ii;
   fft_int la, nsize, bufflag, dirflag, typeflag;     
   fft_int iradix, iradixcnt;
   fft_real *indat[3];
   fft_real *outdat[3];

 /* "radixpass_M" function pointer array initialization */
    void (*pasfunc[FFT_RADIX_MAX+1])(const fft_real *a,
                     fft_real *c, const fft_real *t, 
                     fft_int n, fft_int la,
                     fft_int dirflag) =
      { NULL,        NULL,         radixpass_2, radixpass_3, radixpass_4,
        radixpass_5, NULL,         NULL,        radixpass_8, NULL,
        NULL,        NULL,         NULL,        NULL,        NULL,  
        NULL,        radixpass_16, NULL,        NULL,        NULL,  
        NULL,        NULL,         NULL,        NULL,        NULL,  
        NULL,        NULL,         NULL,        NULL,        NULL,  
        NULL,        NULL,         radixpass_32 };

 /* Check input size */
    if(plan->size < 2) return;

 /* Assign temporary variables */
    nsize    = plan->size;
    dirflag  = plan->direction;
    typeflag = plan->type;
  
 /* Assign unswapped and swapped versions of input arrays */
    bufflag = (plan->radixevenodd == 0) ? 0 : 2;
    indat[0] = (fft_real *) a;   outdat[0] = plan->workbuf;
    indat[1] = plan->workbuf;    outdat[1] = (fft_real *) a;
    indat[2] = (fft_real *) a;   outdat[2] = (fft_real *) a;

 /* Pack the data if forward real FFT */
    if ((typeflag == FFT_FLAGS_REAL)&&(dirflag == FFT_FLAGS_INVERSE))
      fft_pack(a,plan->twidtable,nsize,dirflag,plan->packing);
   
 /* Execute the Radix-M passes */  
    la = plan->size;
    if (plan->radixgeneric != 0) {
      la /= plan->radixgeneric;
      radixpass_generic(indat[bufflag], outdat[bufflag],
                        plan->twidtable, nsize, la, dirflag,
			plan->radixgeneric, plan->rdxgen_t, plan->rdxgen_w);
      bufflag = (bufflag == 0) ? 1 : 0;
    }
    for (i=(plan->radixcount-1); i>=0; i--) {                  
      iradix    = (plan->radixpassid[i] > 0) ? plan->radixpassid[i] : 1;
      iradixcnt = plan->radixpasscnt[i];
      for (ii=iradixcnt; ii > 0; ii--) {
        la /= iradix;
        pasfunc[iradix](indat[bufflag], outdat[bufflag],
                   plan->twidtable, nsize, la, dirflag);
        bufflag = (bufflag == 0) ? 1 : 0;
      }
    }
 
 /* Unpack the data if inverse real FFT */
    if ((typeflag == FFT_FLAGS_REAL)&&(dirflag == FFT_FLAGS_FORWARD))
      fft_pack(a,plan->twidtable,nsize,dirflag,plan->packing);

 /* Perform any required operations on the output data buffer */
    if (plan->scale == FFT_FLAGS_SCALE) 
        fft_func_scale(a,nsize,dirflag,typeflag,plan->packing);
 
    return;
}


/*
-----------------------------------------------------------------------
  FUNCTION: 
    nm_fft()           

  SYNOPSIS: 
    fft_int nm_fft (fft_cmplx *cbuf, fft_int fftsize, fft_int fftflags) 

  ARGUMENTS:
    fft_cmplx *cbuf    --- complex/real input/output array of FFT/IFFT data
    fft_int   fftsize  --- size of (number of real samples) in input/output
    fft_int   fftflags --- processing flags (see fftlib.h) 

  RETURN:
    (fft_int) -2 --- twiddle table/scratch buffer allocation failed
    (fft_int) -1 --- computation of FFT failed
    (fft_int)  0 --- computation of FFT succeeded

  PURPOSE:
    Core NextMidas (TM) FFT engine. Performs plan allocation and memory
    management. Calls fft_init(), fft_work(), and fft_free() to do the
    real work.

  REVISIONS:
    05/05/00 	rdh	baseline
    05/04/01 	rdh	changed to auto-initialization algorithm
    07/07/01 	rdh	fixed direction bug in plan-search
    07/11/01 	rdh	modified for return status (type now fft_int)
    02/20/02 	rdh	modified for return status of -2 (out of memory)             
-----------------------------------------------------------------------
*/

fft_int nm_fft (fft_cmplx *cbuf, fft_int fftsize, fft_int fftflags)
{
 /* Active plan list, initialized to nulls on library load. */
    static fft_int init_magic = 0x00000000;
    static FFT_Plan *plan_ptr[FFT_MAX_PLANS];

 /* Local variables */
    FFT_Plan *tempplan;
    fft_int i,tempsize;
    fft_int lclflags,lcltype,lcldir;
    fft_int irtn;

 /* Assume success */
    irtn = 0;

 /* Get local copy of flags */
    lclflags = fftflags;
    if ((lclflags & FFT_FLAGS_REAL) == FFT_FLAGS_REAL)    
      lcltype = FFT_FLAGS_REAL;   
    else
      lcltype = FFT_FLAGS_COMPLEX;
    if ((lclflags & FFT_FLAGS_FORWARD) == FFT_FLAGS_FORWARD)
      lcldir = FFT_FLAGS_FORWARD;
    else
      lcldir = FFT_FLAGS_INVERSE;

 /* Clear all active plans and free associated memory */
    if ((lclflags & FFT_FLAGS_FREEPLANS) == FFT_FLAGS_FREEPLANS) {
      for (i=0; i<FFT_MAX_PLANS; i++) {
        if (plan_ptr[i] != NULL) {
          fft_free(plan_ptr[i]);	
	  if (plan_ptr[i] != NULL) free(plan_ptr[i]);
          plan_ptr[i] = NULL;
	}
      }	
      return (irtn);
    }

 /* If initialization flag is not set, initialize the pointer array */
    if (init_magic != 0xE4D3C2B1) {
      init_magic = 0xE4D3C2B1;
      for (i=0; i<FFT_MAX_PLANS; i++) plan_ptr[i] = NULL;
    }

 /* If flags indicate plan pointer array initialization, do so and return */
    if ((lclflags & FFT_FLAGS_INITPLANS) == FFT_FLAGS_INITPLANS) {
      for (i=0; i<FFT_MAX_PLANS; i++) plan_ptr[i] = NULL;
      return (irtn);
    }

 /* Now, check to see if any existing plan matches requested size/type */
    tempplan = NULL;
    i = 0;
    while (i < FFT_MAX_PLANS) {
      if (plan_ptr[i] != NULL) {
        tempsize = (lcltype == FFT_FLAGS_REAL) ? 
                      (2 * plan_ptr[i]->size) : (plan_ptr[i]->size);
        if (tempsize == fftsize) {
          if ((plan_ptr[i]->type == lcltype)&&
              (plan_ptr[i]->direction == lcldir)) {
            tempplan = plan_ptr[i];
            break;
          }
        }
      }
      else {
        break;
      }
      i++;
    }

    if ((tempplan == NULL)&&(i < FFT_MAX_PLANS)) {
     /* Create new plan if required */
        plan_ptr[i] = malloc(sizeof(FFT_Plan));
        fft_init(plan_ptr[i],fftsize,lclflags);
    }
    else if ((tempplan == NULL)&&(i == FFT_MAX_PLANS)) {
     /* If we've exhausted all the plans, free the last one for re-use */
        i = FFT_MAX_PLANS-1;
        if (plan_ptr[i] != NULL) fft_free(plan_ptr[i]);
        fft_init(plan_ptr[i],fftsize,lclflags);
    }

 /* Execute the plan (assume failure) */
    irtn = -1;
    if (plan_ptr[i] != NULL) {
      if (plan_ptr[i]->validflag == FFT_FLAGS_VALIDPLAN) {
        irtn = 0;
        fft_work(cbuf, plan_ptr[i]);
      }
      else {
        if (plan_ptr[i]->validflag == FFT_FLAGS_OUTOFMEM) irtn = -2;
      }
    }

    return (irtn);
}


/*
-----------------------------------------------------------------------

  FUNCTION: 
    nm_fftsize()    

  SYNOPSIS: 
    fft_int nm_fftsize(fft_int n, fft_int flags, fft_int searchdir)

  ARGUMENTS:
    fft_int n     --- desired FFT size                   
    fft_int flags --- FFT parameter flags (only real/complex matters)    
    fft_int searchdir --- search direction 
                           Negative -> best size <= n
                           Positive -> best size >= n
                           Zero     -> best size closest to n

  RETURN:
    (fft_int)  m --- nearest efficient FFT size that factors
    (fft_int)  0 --- no FFT size could be found that factored (WHAT???)

  PURPOSE:
    Can be called prior to doing a call to nm_fft() to determine 
    whether a desired FFT size can be efficiently computed by the
    available radix passes. If the size cannot be computed efficiently,
    the nearest efficient size is returned.

  REVISIONS:
    02/15/01 	rdh	baseline

-----------------------------------------------------------------------
*/

fft_int nm_fftsize(fft_int n, fft_int flags, fft_int searchdir)
{
  fft_int lclflags;
  fft_int nfft,nfft_lo,nfft_hi;
  fft_int irtn = -1;

  /* Initialize */
     lclflags = flags;
     nfft     = 0;
     nfft_lo  = n;
     nfft_hi  = n;

  /* Dumb search for a good FFT size (single step trial and error) */
     while ((nfft == 0)&&(nfft_hi < 2147483647)) {
       /* Check high */
       if (searchdir >= 0) {
         if (nfft_hi < 2147483647) irtn = fft_checkradix(nfft_hi,lclflags);
         if (irtn >= 0) 
           nfft = nfft_hi;
         else
           nfft_hi = (nfft_hi < 2147483647) ? (nfft_hi+1) : (2147483647);
       }
       /* Check low */
       if (searchdir <= 0) {
         if (nfft_lo > 1) irtn = fft_checkradix(nfft_lo,lclflags);
         if (irtn >= 0) 
           nfft = nfft_lo;
         else
           nfft_lo = (nfft_lo > 1) ? (nfft_lo-1) : (1);
       }
     }

  return (nfft);
}
