/*
 * Decompiled with CFR 0.152.
 */
package nxm.sys.libm;

import nxm.sys.inc.MidasReference;
import nxm.sys.lib.Convert;
import nxm.sys.lib.Data;
import nxm.sys.lib.DataFile;
import nxm.sys.lib.MidasException;
import nxm.sys.lib.Native;
import nxm.sys.lib.Shell;
import nxm.sys.libm.Operator;

public class Fft
extends Operator {
    private static final boolean USE_NATIVE = true;
    public static final int REAL = 1;
    public static final int COMPLEX = 2;
    public static final int FORWARD = 8;
    public static final int INVERSE = 16;
    public static final int PACKED = 32;
    public static final int UNPACKED = 64;
    public static final int NONYQUIST = 128;
    public static final int SCALE = 256;
    public static final int INITPLANS = 0x10000000;
    public static final int FREEPLANS = 0x20000000;
    public static final int VALIDPLAN = 0x40000000;
    private static final int FFT_RADIX_MIN = 2;
    private static final int FFT_RADIX_MAX = 32;
    private static final int FFT_FLAGS_NULL = 0;
    private static final int FFT_FLAGS_REAL = 1;
    private static final int FFT_FLAGS_COMPLEX = 2;
    private static final int FFT_FLAGS_FORWARD = 8;
    private static final int FFT_FLAGS_INVERSE = 16;
    private static final int FFT_FLAGS_PACKED = 32;
    private static final int FFT_FLAGS_UNPACKED = 64;
    private static final int FFT_FLAGS_NONYQUIST = 128;
    private static final int FFT_FLAGS_SCALE = 256;
    private static final int FFT_FLAGS_INITPLANS = 0x10000000;
    private static final int FFT_FLAGS_FREEPLANS = 0x20000000;
    private static final int FFT_FLAGS_VALIDPLAN = 0x40000000;
    private static final int FFT_FLAGS_OUTOFMEM = Integer.MIN_VALUE;
    private static final int[][] radix_decomp_profile = new int[][]{{2, 0, Integer.MAX_VALUE, Integer.MAX_VALUE}, {3, 0, Integer.MAX_VALUE, Integer.MAX_VALUE}, {4, 0, Integer.MAX_VALUE, Integer.MAX_VALUE}, {5, 0, Integer.MAX_VALUE, Integer.MAX_VALUE}, {8, 0, Integer.MAX_VALUE, Integer.MAX_VALUE}, {16, 4000, Integer.MAX_VALUE, 1}, {32, 0, 0, 0}};
    private static final double twopi = Math.PI * 2;
    private static boolean loaded = Shell.loadLibrary("sys", "libm", "Fft");
    private long planp;
    private int fflags;
    private int size;
    private int dir;
    private int nc;
    private int logn;
    private double[] twd;
    private double[] rtwd;
    private float[] twf;
    private float[] rtwf;
    private DataFile hcbi;
    private DataFile hcbo;
    private boolean useNative;
    private boolean useCuda;
    private int planFlags;
    private int planValidflag;
    private int planScale;
    private int planSize;
    private int planType;
    private int planDirection;
    private int planPacking;
    private int planRadixevenodd;
    private int planRadixgeneric;
    private int planRadixcount;
    private int[] planRadixpassid;
    private int[] planRadixpasscnt;
    private float[] planRdxgen_t;
    private float[] planRdxgen_w;
    private float[] planTwidtable;
    private float[] planWorkbuf;

    public Fft() {
    }

    public Fft(int size, int fftFlags) {
        this.init(size, fftFlags);
    }

    @Override
    public int validate(char type, int n, int mode) {
        switch (mode) {
            case 33: {
                n /= 2;
                break;
            }
            case 34: {
                break;
            }
            default: {
                n = -1;
            }
        }
        return n;
    }

    public static void process(Object buf, int size, int flag) {
        boolean useNative;
        boolean bl = useNative = Native.useMath && loaded;
        if (useNative) {
            Fft.processNative(buf, size, flag);
        } else {
            new Fft(size, flag).work(buf);
        }
    }

    private static native void processNative(Object var0, int var1, int var2);

    private static native long initPlan(int var0, int var1);

    private static native void workPlan(long var0, Object var2);

    private static native long freePlan(long var0);

    private static native long initPlanCU(int var0, int var1);

    private static native void workPlanCU(long var0, Object var2);

    private static native long freePlanCU(long var0);

    public boolean XX(float[] a, float[] b, int size, int mode) {
        if (a != b) {
            System.arraycopy(a, 0, b, 0, size);
        }
        Fft.process(b, size, mode);
        return true;
    }

    public static int validSize(int size, int flags) {
        while (true) {
            try {
                new Fft(size, flags);
                return size;
            }
            catch (MidasException me) {
                ++size;
                continue;
            }
            break;
        }
    }

    public void init(int size, int fftFlags) {
        this.fflags = fftFlags;
        this.size = size;
        this.nc = (fftFlags & 2) != 0 ? size : size / 2;
        this.dir = (fftFlags & 0x10) != 0 ? -1 : 1;
        this.useNative = Native.useMath && loaded;
        boolean bl = this.useCuda = this.useNative && Native.useCuda;
        this.planp = this.useNative ? (this.useCuda ? Fft.initPlanCU(size, fftFlags) : Fft.initPlan(size, fftFlags)) : this.init_internal();
        if (this.planp == 0L) {
            throw new MidasException("Problem initializing FFT plan: size=" + size + " flags=" + fftFlags);
        }
    }

    public void setDebugFileIn(Object ref, String filename) {
        this.hcbi = new DataFile();
        if (ref instanceof MidasReference) {
            this.hcbi.init((MidasReference)ref, (Object)filename, "2000", "SF", 0);
        } else {
            this.hcbi.init(Convert.ref2Midas(ref), (Object)filename, "2000", "SF", 0);
        }
        this.hcbi.setSubSize(this.size);
        this.hcbi.setSize(1.0);
        this.hcbi.open(2);
    }

    public void setDebugFileOut(Object ref, String filename) {
        this.hcbo = new DataFile();
        if (ref instanceof MidasReference) {
            this.hcbo.init((MidasReference)ref, (Object)filename, "2000", "CF", 0);
        } else {
            this.hcbo.init(Convert.ref2Midas(ref), (Object)filename, "2000", "CF", 0);
        }
        this.hcbo.setSubSize(this.nc);
        this.hcbo.setSize(1.0);
        this.hcbo.open(2);
    }

    public void work(Object buf) {
        if (this.hcbi != null) {
            this.hcbi.write(Convert.o2ba(buf), 0, this.size * 4);
        }
        if (this.useNative) {
            if (this.useCuda) {
                Fft.workPlanCU(this.planp, buf);
            } else {
                Fft.workPlan(this.planp, buf);
            }
        } else if (buf instanceof float[]) {
            this.fft_work((float[])buf);
        } else if (buf instanceof double[]) {
            this.fft_work((double[])buf);
        } else if (buf instanceof byte[]) {
            byte[] bbuf = (byte[])buf;
            float[] fbuf = new float[bbuf.length / 4];
            Convert.bb2ja(bbuf, 0, (byte)70, fbuf, 0, (byte)70, fbuf.length);
            this.fft_work(fbuf);
            Convert.ja2bb(fbuf, 0, (byte)70, bbuf, 0, (byte)70, fbuf.length);
        }
        if (this.hcbo != null) {
            this.hcbo.write(Convert.o2ba(buf), 0, this.nc * 8);
        }
    }

    public void work(Data data) {
        if (this.hcbi != null) {
            this.hcbi.write(data.buf, 0, this.size * 4);
        }
        if (this.useNative) {
            if (this.useCuda) {
                Fft.workPlanCU(this.planp, data.buf);
            } else {
                Fft.workPlan(this.planp, data.buf);
            }
            float[] buf = data.castF(true);
            data.uncast(buf, true);
        } else {
            float[] buf = data.castF(true);
            this.fft_work(buf);
            data.uncast(buf, true);
        }
        if (this.hcbo != null) {
            this.hcbo.write(data.buf, 0, this.nc * 8);
        }
    }

    public void rotate(Data data) {
        float[] buf = data.castF(true);
        if (data.spa == 1) {
            this.rotateSF(buf);
        }
        if (data.spa == 2) {
            this.rotateCF(buf);
        }
        data.uncast(buf, true);
    }

    public void unrotate(Data data, int nrot) {
        float[] buf = data.castF(true);
        if (data.spa == 1) {
            this.shiftLeftSF(buf, nrot);
        }
        if (data.spa == 2) {
            this.shiftLeftCF(buf, nrot);
        }
        data.uncast(buf, true);
    }

    public void rotateSF(float[] buf) {
        int i = 0;
        int j = 0;
        int k = this.size / 2 + 1;
        float a = buf[j++];
        float b = buf[j++];
        while (k < this.size) {
            buf[i++] = buf[k];
            buf[k++] = buf[j++];
        }
        buf[i++] = a;
        buf[i++] = b;
    }

    public void rotateCF(float[] buf) {
        int i = 0;
        int j = 0;
        int k = this.size + 2;
        float ar = buf[j++];
        float ai = buf[j++];
        float br = buf[j++];
        float bi = buf[j++];
        while (k < this.size * 2) {
            buf[i++] = buf[k];
            buf[i++] = buf[k + 1];
            buf[k++] = buf[j++];
            buf[k++] = buf[j++];
        }
        buf[i++] = ar;
        buf[i++] = ai;
        buf[i++] = br;
        buf[i++] = bi;
    }

    public void shiftLeftSF(float[] buf, int amount) {
        int i = this.size - 1;
        int j = this.size - 1;
        int k = amount - 1;
        float a = buf[j--];
        float b = buf[j--];
        while (k >= 0) {
            buf[i--] = buf[k];
            buf[k--] = buf[j--];
        }
        buf[i--] = a;
        buf[i--] = b;
    }

    public void shiftLeftCF(float[] buf, int amount) {
        int i = this.size * 2 - 1;
        int j = this.size * 2 - 1;
        int k = amount * 2 - 1;
        float ar = buf[j--];
        float ai = buf[j--];
        float br = buf[j--];
        float bi = buf[j--];
        while (k >= 0) {
            buf[i--] = buf[k];
            buf[i--] = buf[k - 1];
            buf[k--] = buf[j--];
            buf[k--] = buf[j--];
        }
        buf[i--] = ar;
        buf[i--] = ai;
        buf[i--] = br;
        buf[i--] = bi;
    }

    public void free() {
        if (this.hcbi != null) {
            this.hcbi.close();
        }
        if (this.hcbo != null) {
            this.hcbo.close();
        }
        if (this.useNative && this.planp != 0L) {
            this.planp = this.useCuda ? Fft.freePlanCU(this.planp) : Fft.freePlan(this.planp);
        }
    }

    @Override
    public int getInputs() {
        return 1;
    }

    @Override
    public int getOutputs() {
        return 1;
    }

    private long init_internal() {
        int nsize;
        int ii;
        int[] mradixid = new int[33];
        int[] mradixcnt = new int[33];
        this.planFlags = 0;
        this.planValidflag = 0;
        this.planScale = 0;
        this.planSize = this.size;
        this.planType = 2;
        this.planDirection = 8;
        this.planPacking = 32;
        this.planRadixevenodd = 0;
        this.planRadixgeneric = 0;
        this.planRadixcount = 0;
        this.planRdxgen_t = null;
        this.planRdxgen_w = null;
        this.planTwidtable = null;
        this.planWorkbuf = null;
        this.planRadixpassid = new int[33];
        this.planRadixpasscnt = new int[33];
        for (ii = 0; ii < 33; ++ii) {
            this.planRadixpassid[ii] = 0;
            this.planRadixpasscnt[ii] = 0;
        }
        this.planFlags = this.fflags;
        int lclflags = this.fflags;
        if ((lclflags & 1) == 1) {
            this.planType = 1;
        }
        if ((lclflags & 2) == 2) {
            this.planType = 2;
        }
        if ((lclflags & 8) == 8) {
            this.planDirection = 8;
        }
        if ((lclflags & 0x10) == 16) {
            this.planDirection = 16;
        }
        if ((lclflags & 0x20) == 32) {
            this.planPacking = 32;
        }
        if ((lclflags & 0x40) == 64) {
            this.planPacking = 64;
        }
        if ((lclflags & 0x80) == 128) {
            this.planPacking = 128;
        }
        if ((lclflags & 0x100) == 256) {
            this.planScale = 256;
        }
        if (this.planType == 2) {
            this.planPacking = 32;
        }
        int n = nsize = this.size > 0 ? this.size : 0;
        if (this.planType == 1) {
            if (this.fft_checkradix(nsize, 1) >= 0) {
                this.planSize = nsize /= 2;
                this.fft_genradix(nsize);
                this.planValidflag = 0x40000000;
            } else {
                this.planSize = nsize = 0;
                this.planValidflag = 0;
            }
        } else if (this.fft_checkradix(nsize, 2) >= 0) {
            this.planSize = nsize = this.size;
            this.fft_genradix(nsize);
            this.planValidflag = 0x40000000;
        } else {
            this.planSize = nsize = 0;
            this.planValidflag = 0;
        }
        if (this.planValidflag == 0x40000000) {
            for (ii = 0; ii < 33; ++ii) {
                mradixid[ii] = this.planRadixpassid[ii];
                mradixcnt[ii] = this.planRadixpasscnt[ii];
            }
            for (ii = 0; ii < 33; ++ii) {
                this.planRadixpassid[ii] = 0;
                this.planRadixpasscnt[ii] = 0;
            }
            int jj = 0;
            this.planRadixcount = 0;
            for (ii = 0; ii < 33; ++ii) {
                if (mradixcnt[ii] == 0) continue;
                ++this.planRadixcount;
                this.planRadixpassid[jj] = mradixid[ii];
                this.planRadixpasscnt[jj] = mradixcnt[ii];
                this.planRadixevenodd += mradixcnt[ii];
                ++jj;
            }
            if (this.planRadixgeneric != 0) {
                ++this.planRadixevenodd;
            }
            this.planRadixevenodd = this.intmod(this.planRadixevenodd, 2);
        } else {
            this.planRadixgeneric = 0;
            this.planRadixcount = 0;
            for (ii = 0; ii < 33; ++ii) {
                this.planRadixpassid[ii] = 0;
                this.planRadixpasscnt[ii] = 0;
            }
        }
        if (this.planValidflag == 0x40000000) {
            nsize = this.planType == 2 ? this.planSize : 2 * this.planSize;
            this.planTwidtable = new float[nsize * 2 + 8];
            this.planWorkbuf = new float[nsize * 2 + 8];
            this.fft_gen_twiddle(this.planTwidtable, nsize, this.planType);
        }
        if (this.planValidflag == 0x40000000 && this.planRadixgeneric > 0) {
            nsize = this.planRadixgeneric;
            this.planRdxgen_t = new float[nsize * 2];
            this.planRdxgen_w = new float[nsize * nsize * 2];
            this.fft_generic_matrix(nsize, this.planRdxgen_w);
        }
        return this.planValidflag == 0x40000000 ? 1L : 0L;
    }

    void fft_work(float[] a) {
        boolean destIsInput;
        if (this.planSize < 2) {
            return;
        }
        boolean srcIsInput = true;
        boolean bl = destIsInput = this.planRadixevenodd != 0;
        if (this.planType == 1 && this.planDirection == 16) {
            this.fft_pack(a, this.planTwidtable, this.planSize, this.planDirection, this.planPacking);
        }
        int partProd = this.planSize;
        if (this.planRadixgeneric != 0) {
            this.radixpass_generic(srcIsInput ? a : this.planWorkbuf, destIsInput ? a : this.planWorkbuf, this.planTwidtable, this.planSize, partProd /= this.planRadixgeneric, this.planDirection, this.planRadixgeneric, this.planRdxgen_t, this.planRdxgen_w);
            if (destIsInput) {
                srcIsInput = true;
                destIsInput = false;
            } else {
                srcIsInput = false;
                destIsInput = true;
            }
        }
        for (int ii = this.planRadixcount - 1; ii >= 0; --ii) {
            int iradixcnt;
            int iradix = this.planRadixpassid[ii] > 0 ? this.planRadixpassid[ii] : 1;
            block10: for (int jj = iradixcnt = this.planRadixpasscnt[ii]; jj > 0; --jj) {
                partProd /= iradix;
                switch (iradix) {
                    case 2: {
                        this.radixpass_2(srcIsInput ? a : this.planWorkbuf, destIsInput ? a : this.planWorkbuf, this.planTwidtable, this.planSize, partProd, this.planDirection);
                        if (destIsInput) {
                            srcIsInput = true;
                            destIsInput = false;
                            continue block10;
                        }
                        srcIsInput = false;
                        destIsInput = true;
                        continue block10;
                    }
                    case 3: {
                        this.radixpass_3(srcIsInput ? a : this.planWorkbuf, destIsInput ? a : this.planWorkbuf, this.planTwidtable, this.planSize, partProd, this.planDirection);
                        if (destIsInput) {
                            srcIsInput = true;
                            destIsInput = false;
                            continue block10;
                        }
                        srcIsInput = false;
                        destIsInput = true;
                        continue block10;
                    }
                    case 4: {
                        this.radixpass_4(srcIsInput ? a : this.planWorkbuf, destIsInput ? a : this.planWorkbuf, this.planTwidtable, this.planSize, partProd, this.planDirection);
                        if (destIsInput) {
                            srcIsInput = true;
                            destIsInput = false;
                            continue block10;
                        }
                        srcIsInput = false;
                        destIsInput = true;
                        continue block10;
                    }
                    case 5: {
                        this.radixpass_5(srcIsInput ? a : this.planWorkbuf, destIsInput ? a : this.planWorkbuf, this.planTwidtable, this.planSize, partProd, this.planDirection);
                        if (destIsInput) {
                            srcIsInput = true;
                            destIsInput = false;
                            continue block10;
                        }
                        srcIsInput = false;
                        destIsInput = true;
                        continue block10;
                    }
                    case 8: {
                        this.radixpass_8(srcIsInput ? a : this.planWorkbuf, destIsInput ? a : this.planWorkbuf, this.planTwidtable, this.planSize, partProd, this.planDirection);
                        if (destIsInput) {
                            srcIsInput = true;
                            destIsInput = false;
                            continue block10;
                        }
                        srcIsInput = false;
                        destIsInput = true;
                        continue block10;
                    }
                    case 16: {
                        this.radixpass_16(srcIsInput ? a : this.planWorkbuf, destIsInput ? a : this.planWorkbuf, this.planTwidtable, this.planSize, partProd, this.planDirection);
                        if (destIsInput) {
                            srcIsInput = true;
                            destIsInput = false;
                            continue block10;
                        }
                        srcIsInput = false;
                        destIsInput = true;
                        continue block10;
                    }
                    case 32: {
                        this.radixpass_32(srcIsInput ? a : this.planWorkbuf, destIsInput ? a : this.planWorkbuf, this.planTwidtable, this.planSize, partProd, this.planDirection);
                        if (destIsInput) {
                            srcIsInput = true;
                            destIsInput = false;
                            continue block10;
                        }
                        srcIsInput = false;
                        destIsInput = true;
                        continue block10;
                    }
                }
            }
        }
        if (this.planType == 1 && this.planDirection == 8) {
            this.fft_pack(a, this.planTwidtable, this.planSize, this.planDirection, this.planPacking);
        }
        if (this.planScale == 256) {
            this.fft_func_scale(a, this.planSize, this.planDirection, this.planType, this.planPacking);
        }
    }

    int intmod(int iarg, int idiv) {
        return idiv != 0 ? iarg % idiv : -1;
    }

    void fft_genradix(int nsize) {
        int ii;
        nsize = this.planSize > 0 ? this.planSize : 0;
        this.planRadixgeneric = 0;
        for (ii = 0; ii < 33; ++ii) {
            this.planRadixpassid[ii] = ii;
            this.planRadixpasscnt[ii] = 0;
        }
        for (ii = 32; ii >= 2; --ii) {
            for (int ip = 0; ip < 7; ++ip) {
                if (ii != radix_decomp_profile[ip][0] || nsize <= radix_decomp_profile[ip][1] || nsize >= radix_decomp_profile[ip][2]) continue;
                while (nsize > 1 && this.intmod(nsize, ii) == 0 && this.planRadixpasscnt[ii] < radix_decomp_profile[ip][3]) {
                    this.planRadixpasscnt[ii] = this.planRadixpasscnt[ii] + 1;
                    nsize /= ii;
                }
            }
        }
        if (nsize != 1) {
            this.planRadixgeneric = nsize;
            nsize = 1;
        }
        if (nsize != 1) {
            this.planRadixgeneric = 0;
            for (ii = 0; ii < 33; ++ii) {
                this.planRadixpassid[ii] = ii;
                this.planRadixpasscnt[ii] = 0;
            }
        }
    }

    int fft_checkradix(int n, int fftFlags) {
        int lclflags = fftFlags;
        this.planRadixgeneric = 0;
        int nfft = n;
        int irtn = 1;
        if ((lclflags & 1) == 1) {
            int n2 = nfft = this.intmod(n, 2) == 0 ? n / 2 : 0;
        }
        if (nfft > 0) {
            this.fft_genradix(nfft);
        } else {
            irtn = -1;
        }
        if (this.planRadixgeneric > 0) {
            int n3 = irtn = this.planRadixgeneric > 101 ? -1 : 0;
        }
        if (n < 1) {
            irtn = -1;
        }
        return irtn;
    }

    void fft_gen_twiddle(float[] twids, int n, int rcflag) {
        double radphas;
        int ii;
        if (n < 1) {
            return;
        }
        int nfft = n;
        int nfft2 = rcflag == 1 ? nfft / 2 : 0;
        double rnfft = 1.0 / (double)nfft;
        for (ii = nfft - 1; ii >= nfft2; --ii) {
            radphas = Math.PI * 2 * (double)ii * rnfft;
            twids[ii * 2] = (float)Math.cos(radphas);
            twids[ii * 2 + 1] = (float)Math.sin(radphas);
        }
        if (rcflag == 1) {
            for (ii = nfft2 - 1; ii >= 0; --ii) {
                radphas = Math.PI * 4 * (double)ii * rnfft;
                twids[ii * 2] = (float)Math.cos(radphas);
                twids[ii * 2 + 1] = (float)Math.sin(radphas);
            }
        }
    }

    void fft_generic_matrix(int size, float[] matrix) {
        double dn = size;
        double dtwid = size > 0 ? Math.PI * 2 / dn : 0.0;
        for (int irow = 0; irow < size; ++irow) {
            for (int icol = 0; icol < size; ++icol) {
                double dftarg = dtwid * (double)this.intmod(irow * icol, size);
                int idx = irow * size + icol;
                matrix[idx * 2] = (float)Math.cos(dftarg);
                matrix[idx * 2 + 1] = -1.0f * (float)Math.sin(dftarg);
            }
        }
    }

    void radixpass_2(float[] in, float[] out, float[] twids, int size, int la, int dirflag) {
        int RADIXSIZE = 2;
        if (size < 2 || la < 1) {
            return;
        }
        int ospan = 2 * size / 2;
        int ispan = la;
        int mm = ospan / ispan;
        switch (dirflag) {
            case 8: {
                if (ispan != ospan / 2) {
                    for (int kk = 0; kk < mm; kk += 2) {
                        float o00_re = twids[kk * ispan];
                        float o00_im = twids[kk * ispan + 1];
                        int istep = kk * 2;
                        for (int ll = 0; ll < ispan * 2; ll += 2) {
                            float t00_re = in[ispan * (istep + 0) + ll];
                            float t00_im = in[ispan * (istep + 0) + ll + 1];
                            float xreg_re = in[ispan * (istep + 2) + ll];
                            float xreg_im = in[ispan * (istep + 2) + ll + 1];
                            float t01_re = o00_re * xreg_re + o00_im * xreg_im;
                            float t01_im = o00_re * xreg_im - o00_im * xreg_re;
                            out[kk * ispan + ll] = t00_re + t01_re;
                            out[kk * ispan + ll + 1] = t00_im + t01_im;
                            out[kk * ispan + ospan + ll] = t00_re - t01_re;
                            out[kk * ispan + ospan + ll + 1] = t00_im - t01_im;
                        }
                    }
                } else {
                    for (int ll = 0; ll < ispan * 2; ll += 2) {
                        float t00_re = in[ll];
                        float t00_im = in[ll + 1];
                        float t01_re = in[ispan * 2 + ll];
                        float t01_im = in[ispan * 2 + ll + 1];
                        out[ll] = t00_re + t01_re;
                        out[ll + 1] = t00_im + t01_im;
                        out[ospan + ll] = t00_re - t01_re;
                        out[ospan + ll + 1] = t00_im - t01_im;
                    }
                }
                break;
            }
            default: {
                if (ispan != ospan / 2) {
                    for (int kk = 0; kk < mm; kk += 2) {
                        float o00_re = twids[kk * ispan];
                        float o00_im = twids[kk * ispan + 1];
                        int istep = kk * 2;
                        for (int ll = 0; ll < ispan * 2; ll += 2) {
                            float t00_re = in[ispan * (istep + 0) + ll];
                            float t00_im = in[ispan * (istep + 0) + ll + 1];
                            float xreg_re = in[ispan * (istep + 2) + ll];
                            float xreg_im = in[ispan * (istep + 2) + ll + 1];
                            float t01_re = o00_re * xreg_re - o00_im * xreg_im;
                            float t01_im = o00_re * xreg_im + o00_im * xreg_re;
                            out[kk * ispan + ll] = t00_re + t01_re;
                            out[kk * ispan + ll + 1] = t00_im + t01_im;
                            out[kk * ispan + ospan + ll] = t00_re - t01_re;
                            out[kk * ispan + ospan + ll + 1] = t00_im - t01_im;
                        }
                    }
                } else {
                    for (int ll = 0; ll < ispan * 2; ll += 2) {
                        float t00_re = in[ll];
                        float t00_im = in[ll + 1];
                        float t01_re = in[ispan * 2 + ll];
                        float t01_im = in[ispan * 2 + ll + 1];
                        out[ll] = t00_re + t01_re;
                        out[ll + 1] = t00_im + t01_im;
                        out[ospan + ll] = t00_re - t01_re;
                        out[ospan + ll + 1] = t00_im - t01_im;
                    }
                }
                break;
            }
        }
    }

    void radixpass_3(float[] in, float[] out, float[] twids, int size, int la, int dirflag) {
        int RADIXSIZE = 3;
        float s1 = -1.5f;
        float s2 = 0.8660254f;
        if (size < 3 || la < 1) {
            return;
        }
        int ospan = 2 * size / 3;
        int ispan = la;
        int mm = ospan / ispan;
        switch (dirflag) {
            case 8: {
                if (ispan != ospan / 2) {
                    for (int kk = 0; kk < mm; kk += 2) {
                        float o00_re = twids[kk * ispan];
                        float o00_im = twids[kk * ispan + 1];
                        float o01_re = twids[2 * kk * ispan];
                        float o01_im = twids[2 * kk * ispan + 1];
                        int istep = kk * 3;
                        for (int ll = 0; ll < ispan * 2; ll += 2) {
                            float x00b_im;
                            float x00b_re;
                            float xreg_re = in[ispan * (istep + 2) + ll];
                            float xreg_im = in[ispan * (istep + 2) + ll + 1];
                            float t01_re = o00_re * xreg_re + o00_im * xreg_im;
                            float t01_im = o00_re * xreg_im - o00_im * xreg_re;
                            xreg_re = in[ispan * (istep + 4) + ll];
                            xreg_im = in[ispan * (istep + 4) + ll + 1];
                            float t02_re = o01_re * xreg_re + o01_im * xreg_im;
                            float t02_im = o01_re * xreg_im - o01_im * xreg_re;
                            float x01a_re = t02_re + t01_re;
                            float x02a_re = t02_re - t01_re;
                            x02a_re *= 0.8660254f;
                            float x01a_im = t01_im + t02_im;
                            float x02a_im = t01_im - t02_im;
                            x02a_im *= 0.8660254f;
                            float x00a_re = in[ispan * (istep + 0) + ll];
                            out[kk * ispan + ll] = x00b_re = x00a_re + x01a_re;
                            float x00a_im = in[ispan * (istep + 0) + ll + 1];
                            out[kk * ispan + ll + 1] = x00b_im = x00a_im + x01a_im;
                            float x01b_im = x00b_im + -1.5f * x01a_im;
                            float x01b_re = x00b_re + -1.5f * x01a_re;
                            out[kk * ispan + ospan + ll] = x01b_re + x02a_im;
                            out[kk * ispan + ospan + ll + 1] = x01b_im + x02a_re;
                            out[kk * ispan + ospan * 2 + ll] = x01b_re - x02a_im;
                            out[kk * ispan + ospan * 2 + ll + 1] = x01b_im - x02a_re;
                        }
                    }
                } else {
                    for (int ll = 0; ll < ispan * 2; ll += 2) {
                        float x00b_im;
                        float x00b_re;
                        float t01_re = in[ispan * 2 + ll];
                        float t01_im = in[ispan * 2 + ll + 1];
                        float t02_re = in[ispan * 4 + ll];
                        float t02_im = in[ispan * 4 + ll + 1];
                        float x01a_re = t02_re + t01_re;
                        float x02a_re = t02_re - t01_re;
                        x02a_re *= 0.8660254f;
                        float x01a_im = t01_im + t02_im;
                        float x02a_im = t01_im - t02_im;
                        x02a_im *= 0.8660254f;
                        float x00a_re = in[ll];
                        out[ll] = x00b_re = x00a_re + x01a_re;
                        float x00a_im = in[ll + 1];
                        out[ll + 1] = x00b_im = x00a_im + x01a_im;
                        float x01b_im = x00b_im + -1.5f * x01a_im;
                        float x01b_re = x00b_re + -1.5f * x01a_re;
                        out[ospan + ll] = x01b_re + x02a_im;
                        out[ospan + ll + 1] = x01b_im + x02a_re;
                        out[ospan * 2 + ll] = x01b_re - x02a_im;
                        out[ospan * 2 + ll + 1] = x01b_im - x02a_re;
                    }
                }
                break;
            }
            default: {
                if (ispan != ospan / 2) {
                    for (int kk = 0; kk < mm; kk += 2) {
                        float o00_re = twids[kk * ispan];
                        float o00_im = twids[kk * ispan + 1];
                        float o01_re = twids[2 * kk * ispan];
                        float o01_im = twids[2 * kk * ispan + 1];
                        int istep = kk * 3;
                        for (int ll = 0; ll < ispan * 2; ll += 2) {
                            float x00b_im;
                            float x00b_re;
                            float xreg_re = in[ispan * (istep + 2) + ll];
                            float xreg_im = in[ispan * (istep + 2) + ll + 1];
                            float t01_re = o00_re * xreg_re - o00_im * xreg_im;
                            float t01_im = o00_re * xreg_im + o00_im * xreg_re;
                            xreg_re = in[ispan * (istep + 4) + ll];
                            xreg_im = in[ispan * (istep + 4) + ll + 1];
                            float t02_re = o01_re * xreg_re - o01_im * xreg_im;
                            float t02_im = o01_re * xreg_im + o01_im * xreg_re;
                            float x01a_re = t02_re + t01_re;
                            float x02a_re = t02_re - t01_re;
                            x02a_re *= 0.8660254f;
                            float x01a_im = t01_im + t02_im;
                            float x02a_im = t01_im - t02_im;
                            x02a_im *= 0.8660254f;
                            float x00a_re = in[ispan * (istep + 0) + ll];
                            out[kk * ispan + ll] = x00b_re = x00a_re + x01a_re;
                            float x00a_im = in[ispan * (istep + 0) + ll + 1];
                            out[kk * ispan + ll + 1] = x00b_im = x00a_im + x01a_im;
                            float x01b_im = x00b_im + -1.5f * x01a_im;
                            float x01b_re = x00b_re + -1.5f * x01a_re;
                            out[kk * ispan + ospan + ll] = x01b_re - x02a_im;
                            out[kk * ispan + ospan + ll + 1] = x01b_im - x02a_re;
                            out[kk * ispan + 2 * ospan + ll] = x01b_re + x02a_im;
                            out[kk * ispan + 2 * ospan + ll + 1] = x01b_im + x02a_re;
                        }
                    }
                } else {
                    for (int ll = 0; ll < ispan * 2; ll += 2) {
                        float x00b_im;
                        float x00b_re;
                        float t01_re = in[2 * ispan + ll];
                        float t01_im = in[2 * ispan + ll + 1];
                        float t02_re = in[4 * ispan + ll];
                        float t02_im = in[4 * ispan + ll + 1];
                        float x01a_re = t02_re + t01_re;
                        float x02a_re = t02_re - t01_re;
                        x02a_re *= 0.8660254f;
                        float x01a_im = t01_im + t02_im;
                        float x02a_im = t01_im - t02_im;
                        x02a_im *= 0.8660254f;
                        float x00a_re = in[ll];
                        out[ll] = x00b_re = x00a_re + x01a_re;
                        float x00a_im = in[ll + 1];
                        out[ll + 1] = x00b_im = x00a_im + x01a_im;
                        float x01b_im = x00b_im + -1.5f * x01a_im;
                        float x01b_re = x00b_re + -1.5f * x01a_re;
                        out[ospan + ll] = x01b_re - x02a_im;
                        out[ospan + ll + 1] = x01b_im - x02a_re;
                        out[ospan * 2 + ll] = x01b_re + x02a_im;
                        out[ospan * 2 + ll + 1] = x01b_im + x02a_re;
                    }
                }
                break;
            }
        }
    }

    void radixpass_4(float[] in, float[] out, float[] twids, int size, int la, int dirflag) {
        int RADIXSIZE = 4;
        if (size < 4 || la < 1) {
            return;
        }
        int ospan = 2 * size / 4;
        int ispan = la;
        int mm = ospan / ispan;
        switch (dirflag) {
            case 8: {
                if (ispan != ospan / 2) {
                    for (int kk = 0; kk < mm; kk += 2) {
                        float o00_re = twids[kk * ispan];
                        float o00_im = twids[kk * ispan + 1];
                        float o01_re = twids[2 * kk * ispan];
                        float o01_im = twids[2 * kk * ispan + 1];
                        float o02_re = twids[3 * kk * ispan];
                        float o02_im = twids[3 * kk * ispan + 1];
                        int istep = kk * 4;
                        for (int ll = 0; ll < ispan * 2; ll += 2) {
                            float t00_re = in[ispan * (istep + 0) + ll];
                            float t00_im = in[ispan * (istep + 0) + ll + 1];
                            float xreg_re = in[ispan * (istep + 2) + ll];
                            float xreg_im = in[ispan * (istep + 2) + ll + 1];
                            float t01_re = o00_re * xreg_re + o00_im * xreg_im;
                            float t01_im = o00_re * xreg_im - o00_im * xreg_re;
                            xreg_re = in[ispan * (istep + 4) + ll];
                            xreg_im = in[ispan * (istep + 4) + ll + 1];
                            float t02_re = o01_re * xreg_re + o01_im * xreg_im;
                            float t02_im = o01_re * xreg_im - o01_im * xreg_re;
                            xreg_re = in[ispan * (istep + 6) + ll];
                            xreg_im = in[ispan * (istep + 6) + ll + 1];
                            float t03_re = o02_re * xreg_re + o02_im * xreg_im;
                            float t03_im = o02_re * xreg_im - o02_im * xreg_re;
                            float x00a_re = t00_re + t02_re;
                            float x02a_re = t00_re - t02_re;
                            float x00a_im = t00_im + t02_im;
                            float x02a_im = t00_im - t02_im;
                            float x01a_re = t01_re + t03_re;
                            float x03a_re = t01_re - t03_re;
                            float x01a_im = t01_im + t03_im;
                            float x03a_im = t01_im - t03_im;
                            out[kk * ispan + ll] = x00a_re + x01a_re;
                            out[kk * ispan + ll + 1] = x00a_im + x01a_im;
                            out[kk * ispan + ospan + ll] = x02a_re + x03a_im;
                            out[kk * ispan + ospan + ll + 1] = x02a_im - x03a_re;
                            out[kk * ispan + 2 * ospan + ll] = x00a_re - x01a_re;
                            out[kk * ispan + 2 * ospan + ll + 1] = x00a_im - x01a_im;
                            out[kk * ispan + 3 * ospan + ll] = x02a_re - x03a_im;
                            out[kk * ispan + 3 * ospan + ll + 1] = x02a_im + x03a_re;
                        }
                    }
                } else {
                    for (int ll = 0; ll < ispan * 2; ll += 2) {
                        float t00_re = in[ll];
                        float t00_im = in[ll + 1];
                        float t01_re = in[2 * ispan + ll];
                        float t01_im = in[2 * ispan + ll + 1];
                        float t02_re = in[4 * ispan + ll];
                        float t02_im = in[4 * ispan + ll + 1];
                        float t03_re = in[6 * ispan + ll];
                        float t03_im = in[6 * ispan + ll + 1];
                        float x00a_re = t00_re + t02_re;
                        float x02a_re = t00_re - t02_re;
                        float x00a_im = t00_im + t02_im;
                        float x02a_im = t00_im - t02_im;
                        float x01a_re = t01_re + t03_re;
                        float x03a_re = t01_re - t03_re;
                        float x01a_im = t01_im + t03_im;
                        float x03a_im = t01_im - t03_im;
                        out[ll] = x00a_re + x01a_re;
                        out[ll + 1] = x00a_im + x01a_im;
                        out[ospan + ll] = x02a_re + x03a_im;
                        out[ospan + ll + 1] = x02a_im - x03a_re;
                        out[2 * ospan + ll] = x00a_re - x01a_re;
                        out[2 * ospan + ll + 1] = x00a_im - x01a_im;
                        out[3 * ospan + ll] = x02a_re - x03a_im;
                        out[3 * ospan + ll + 1] = x02a_im + x03a_re;
                    }
                }
                break;
            }
            default: {
                if (ispan != ospan / 2) {
                    for (int kk = 0; kk < mm; kk += 2) {
                        float o00_re = twids[kk * ispan];
                        float o00_im = twids[kk * ispan + 1];
                        float o01_re = twids[2 * kk * ispan];
                        float o01_im = twids[2 * kk * ispan + 1];
                        float o02_re = twids[3 * kk * ispan];
                        float o02_im = twids[3 * kk * ispan + 1];
                        int istep = kk * 4;
                        for (int ll = 0; ll < ispan * 2; ll += 2) {
                            float t00_re = in[ispan * (istep + 0) + ll];
                            float t00_im = in[ispan * (istep + 0) + ll + 1];
                            float xreg_re = in[ispan * (istep + 2) + ll];
                            float xreg_im = in[ispan * (istep + 2) + ll + 1];
                            float t01_re = o00_re * xreg_re - o00_im * xreg_im;
                            float t01_im = o00_re * xreg_im + o00_im * xreg_re;
                            xreg_re = in[ispan * (istep + 4) + ll];
                            xreg_im = in[ispan * (istep + 4) + ll + 1];
                            float t02_re = o01_re * xreg_re - o01_im * xreg_im;
                            float t02_im = o01_re * xreg_im + o01_im * xreg_re;
                            xreg_re = in[ispan * (istep + 6) + ll];
                            xreg_im = in[ispan * (istep + 6) + ll + 1];
                            float t03_re = o02_re * xreg_re - o02_im * xreg_im;
                            float t03_im = o02_re * xreg_im + o02_im * xreg_re;
                            float x00a_re = t00_re + t02_re;
                            float x02a_re = t00_re - t02_re;
                            float x00a_im = t00_im + t02_im;
                            float x02a_im = t00_im - t02_im;
                            float x01a_re = t01_re + t03_re;
                            float x03a_re = t01_re - t03_re;
                            float x01a_im = t01_im + t03_im;
                            float x03a_im = t01_im - t03_im;
                            out[kk * ispan + ll] = x00a_re + x01a_re;
                            out[kk * ispan + ll + 1] = x00a_im + x01a_im;
                            out[kk * ispan + ospan + ll] = x02a_re - x03a_im;
                            out[kk * ispan + ospan + ll + 1] = x02a_im + x03a_re;
                            out[kk * ispan + 2 * ospan + ll] = x00a_re - x01a_re;
                            out[kk * ispan + 2 * ospan + ll + 1] = x00a_im - x01a_im;
                            out[kk * ispan + 3 * ospan + ll] = x02a_re + x03a_im;
                            out[kk * ispan + 3 * ospan + ll + 1] = x02a_im - x03a_re;
                        }
                    }
                } else {
                    for (int ll = 0; ll < ispan * 2; ll += 2) {
                        float t00_re = in[ll];
                        float t00_im = in[ll + 1];
                        float t01_re = in[2 * ispan + ll];
                        float t01_im = in[2 * ispan + ll + 1];
                        float t02_re = in[4 * ispan + ll];
                        float t02_im = in[4 * ispan + ll + 1];
                        float t03_re = in[6 * ispan + ll];
                        float t03_im = in[6 * ispan + ll + 1];
                        float x00a_re = t00_re + t02_re;
                        float x02a_re = t00_re - t02_re;
                        float x00a_im = t00_im + t02_im;
                        float x02a_im = t00_im - t02_im;
                        float x01a_re = t01_re + t03_re;
                        float x03a_re = t01_re - t03_re;
                        float x01a_im = t01_im + t03_im;
                        float x03a_im = t01_im - t03_im;
                        out[ll] = x00a_re + x01a_re;
                        out[ll + 1] = x00a_im + x01a_im;
                        out[ospan + ll] = x02a_re - x03a_im;
                        out[ospan + ll + 1] = x02a_im + x03a_re;
                        out[2 * ospan + ll] = x00a_re - x01a_re;
                        out[2 * ospan + ll + 1] = x00a_im - x01a_im;
                        out[3 * ospan + ll] = x02a_re + x03a_im;
                        out[3 * ospan + ll + 1] = x02a_im - x03a_re;
                    }
                }
                break;
            }
        }
    }

    void radixpass_5(float[] in, float[] out, float[] twids, int size, int la, int dirflag) {
        int RADIXSIZE = 5;
        float s1 = -1.25f;
        float s2 = 0.559017f;
        float s3 = 0.95105654f;
        float s4 = 0.618034f;
        if (size < 5 || la < 1) {
            return;
        }
        int ospan = 2 * size / 5;
        int ispan = la;
        int mm = ospan / ispan;
        switch (dirflag) {
            case 8: {
                if (ispan != ospan / 2) {
                    for (int kk = 0; kk < mm; kk += 2) {
                        float o00_re = twids[kk * ispan];
                        float o00_im = twids[kk * ispan + 1];
                        float o01_re = twids[2 * kk * ispan];
                        float o01_im = twids[2 * kk * ispan + 1];
                        float o02_re = twids[3 * kk * ispan];
                        float o02_im = twids[3 * kk * ispan + 1];
                        float o03_re = twids[4 * kk * ispan];
                        float o03_im = twids[4 * kk * ispan + 1];
                        int istep = kk * 5;
                        for (int ll = 0; ll < ispan * 2; ll += 2) {
                            float x00a_im;
                            float x00a_re;
                            float xreg_re = in[ispan * (istep + 2) + ll];
                            float xreg_im = in[ispan * (istep + 2) + ll + 1];
                            float t01_re = o00_re * xreg_re + o00_im * xreg_im;
                            float t01_im = o00_re * xreg_im - o00_im * xreg_re;
                            xreg_re = in[ispan * (istep + 8) + ll];
                            xreg_im = in[ispan * (istep + 8) + ll + 1];
                            float t04_re = o03_re * xreg_re + o03_im * xreg_im;
                            float t04_im = o03_re * xreg_im - o03_im * xreg_re;
                            float x01a_re = t01_re + t04_re;
                            float x04a_re = t01_re - t04_re;
                            float x01a_im = t01_im + t04_im;
                            float x04a_im = t01_im - t04_im;
                            xreg_re = in[ispan * (istep + 4) + ll];
                            xreg_im = in[ispan * (istep + 4) + ll + 1];
                            float t02_re = o01_re * xreg_re + o01_im * xreg_im;
                            float t02_im = o01_re * xreg_im - o01_im * xreg_re;
                            xreg_re = in[ispan * (istep + 6) + ll];
                            xreg_im = in[ispan * (istep + 6) + ll + 1];
                            float t03_re = o02_re * xreg_re + o02_im * xreg_im;
                            float t03_im = o02_re * xreg_im - o02_im * xreg_re;
                            float x02a_re = t02_re + t03_re;
                            float x03a_re = t02_re - t03_re;
                            float x02a_im = t02_im + t03_im;
                            float x03a_im = t02_im - t03_im;
                            xreg_re = in[ispan * (istep + 0) + ll];
                            float x02b_re = x01a_re - x02a_re;
                            float x01b_re = x01a_re + x02a_re;
                            out[kk * ispan + ll] = x00a_re = xreg_re + x01b_re;
                            xreg_im = in[ispan * (istep + 0) + ll + 1];
                            float x02b_im = x01a_im - x02a_im;
                            float x01b_im = x01a_im + x02a_im;
                            out[kk * ispan + ll + 1] = x00a_im = xreg_im + x01b_im;
                            x01b_re = -1.25f * x01b_re + x00a_re;
                            x02b_re = 0.559017f * x02b_re;
                            float x01c_re = x01b_re + x02b_re;
                            float x02c_re = x01b_re - x02b_re;
                            x01b_im = -1.25f * x01b_im + x00a_im;
                            x02b_im = 0.559017f * x02b_im;
                            float x01c_im = x01b_im + x02b_im;
                            float x02c_im = x01b_im - x02b_im;
                            float x03b_re = 0.95105654f * x03a_im;
                            float x04b_re = 0.95105654f * x04a_im;
                            float x03c_re = 0.618034f * x04b_re - x03b_re;
                            float x04c_re = 0.618034f * x03b_re + x04b_re;
                            float x03b_im = 0.95105654f * x03a_re;
                            float x04b_im = 0.95105654f * x04a_re;
                            float x03c_im = 0.618034f * x04b_im - x03b_im;
                            float x04c_im = 0.618034f * x03b_im + x04b_im;
                            out[kk * ispan + ospan + ll] = x01c_re + x04c_re;
                            out[kk * ispan + ospan + ll + 1] = x01c_im - x04c_im;
                            out[kk * ispan + ospan * 2 + ll] = x02c_re + x03c_re;
                            out[kk * ispan + ospan * 2 + ll + 1] = x02c_im - x03c_im;
                            out[kk * ispan + ospan * 3 + ll] = x02c_re - x03c_re;
                            out[kk * ispan + ospan * 3 + ll + 1] = x02c_im + x03c_im;
                            out[kk * ispan + ospan * 4 + ll] = x01c_re - x04c_re;
                            out[kk * ispan + ospan * 4 + ll + 1] = x01c_im + x04c_im;
                        }
                    }
                } else {
                    for (int ll = 0; ll < ispan * 2; ll += 2) {
                        float x00a_im;
                        float x00a_re;
                        float t01_re = in[ispan * 2 + ll];
                        float t01_im = in[ispan * 2 + ll + 1];
                        float t04_re = in[ispan * 8 + ll];
                        float t04_im = in[ispan * 8 + ll + 1];
                        float x01a_re = t01_re + t04_re;
                        float x04a_re = t01_re - t04_re;
                        float x01a_im = t01_im + t04_im;
                        float x04a_im = t01_im - t04_im;
                        float t02_re = in[ispan * 4 + ll];
                        float t02_im = in[ispan * 4 + ll + 1];
                        float t03_re = in[ispan * 6 + ll];
                        float t03_im = in[ispan * 6 + ll + 1];
                        float x02a_re = t02_re + t03_re;
                        float x03a_re = t02_re - t03_re;
                        float x02a_im = t02_im + t03_im;
                        float x03a_im = t02_im - t03_im;
                        float xreg_re = in[ll];
                        float x02b_re = x01a_re - x02a_re;
                        float x01b_re = x01a_re + x02a_re;
                        out[ll] = x00a_re = xreg_re + x01b_re;
                        float xreg_im = in[ll + 1];
                        float x02b_im = x01a_im - x02a_im;
                        float x01b_im = x01a_im + x02a_im;
                        out[ll + 1] = x00a_im = xreg_im + x01b_im;
                        x01b_re = -1.25f * x01b_re + x00a_re;
                        x02b_re = 0.559017f * x02b_re;
                        float x01c_re = x01b_re + x02b_re;
                        float x02c_re = x01b_re - x02b_re;
                        x01b_im = -1.25f * x01b_im + x00a_im;
                        x02b_im = 0.559017f * x02b_im;
                        float x01c_im = x01b_im + x02b_im;
                        float x02c_im = x01b_im - x02b_im;
                        float x03b_re = 0.95105654f * x03a_im;
                        float x04b_re = 0.95105654f * x04a_im;
                        float x03c_re = 0.618034f * x04b_re - x03b_re;
                        float x04c_re = 0.618034f * x03b_re + x04b_re;
                        float x03b_im = 0.95105654f * x03a_re;
                        float x04b_im = 0.95105654f * x04a_re;
                        float x03c_im = 0.618034f * x04b_im - x03b_im;
                        float x04c_im = 0.618034f * x03b_im + x04b_im;
                        out[ospan + ll] = x01c_re + x04c_re;
                        out[ospan + ll + 1] = x01c_im - x04c_im;
                        out[ospan * 2 + ll] = x02c_re + x03c_re;
                        out[ospan * 2 + ll + 1] = x02c_im - x03c_im;
                        out[ospan * 3 + ll] = x02c_re - x03c_re;
                        out[ospan * 3 + ll + 1] = x02c_im + x03c_im;
                        out[ospan * 4 + ll] = x01c_re - x04c_re;
                        out[ospan * 4 + ll + 1] = x01c_im + x04c_im;
                    }
                }
                break;
            }
            default: {
                if (ispan != ospan / 2) {
                    for (int kk = 0; kk < mm; kk += 2) {
                        float o00_re = twids[kk * ispan];
                        float o00_im = twids[kk * ispan + 1];
                        float o01_re = twids[2 * kk * ispan];
                        float o01_im = twids[2 * kk * ispan + 1];
                        float o02_re = twids[3 * kk * ispan];
                        float o02_im = twids[3 * kk * ispan + 1];
                        float o03_re = twids[4 * kk * ispan];
                        float o03_im = twids[4 * kk * ispan + 1];
                        int istep = kk * 5;
                        for (int ll = 0; ll < ispan * 2; ll += 2) {
                            float x00a_im;
                            float x00a_re;
                            float xreg_re = in[ispan * (istep + 2) + ll];
                            float xreg_im = in[ispan * (istep + 2) + ll + 1];
                            float t01_re = o00_re * xreg_re - o00_im * xreg_im;
                            float t01_im = o00_re * xreg_im + o00_im * xreg_re;
                            xreg_re = in[ispan * (istep + 8) + ll];
                            xreg_im = in[ispan * (istep + 8) + ll + 1];
                            float t04_re = o03_re * xreg_re - o03_im * xreg_im;
                            float t04_im = o03_re * xreg_im + o03_im * xreg_re;
                            float x01a_re = t01_re + t04_re;
                            float x04a_re = t01_re - t04_re;
                            float x01a_im = t01_im + t04_im;
                            float x04a_im = t01_im - t04_im;
                            xreg_re = in[ispan * (istep + 4) + ll];
                            xreg_im = in[ispan * (istep + 4) + ll + 1];
                            float t02_re = o01_re * xreg_re - o01_im * xreg_im;
                            float t02_im = o01_re * xreg_im + o01_im * xreg_re;
                            xreg_re = in[ispan * (istep + 6) + ll];
                            xreg_im = in[ispan * (istep + 6) + ll + 1];
                            float t03_re = o02_re * xreg_re - o02_im * xreg_im;
                            float t03_im = o02_re * xreg_im + o02_im * xreg_re;
                            float x02a_re = t02_re + t03_re;
                            float x03a_re = t02_re - t03_re;
                            float x02a_im = t02_im + t03_im;
                            float x03a_im = t02_im - t03_im;
                            xreg_re = in[ispan * (istep + 0) + ll];
                            float x02b_re = x01a_re - x02a_re;
                            float x01b_re = x01a_re + x02a_re;
                            out[kk * ispan + ll] = x00a_re = xreg_re + x01b_re;
                            xreg_im = in[ispan * (istep + 0) + ll + 1];
                            float x02b_im = x01a_im - x02a_im;
                            float x01b_im = x01a_im + x02a_im;
                            out[kk * ispan + ll + 1] = x00a_im = xreg_im + x01b_im;
                            x01b_re = -1.25f * x01b_re + x00a_re;
                            x02b_re = 0.559017f * x02b_re;
                            float x01c_re = x01b_re + x02b_re;
                            float x02c_re = x01b_re - x02b_re;
                            x01b_im = -1.25f * x01b_im + x00a_im;
                            x02b_im = 0.559017f * x02b_im;
                            float x01c_im = x01b_im + x02b_im;
                            float x02c_im = x01b_im - x02b_im;
                            float x03b_re = 0.95105654f * x03a_im;
                            float x04b_re = 0.95105654f * x04a_im;
                            float x03c_re = 0.618034f * x04b_re - x03b_re;
                            float x04c_re = 0.618034f * x03b_re + x04b_re;
                            float x03b_im = 0.95105654f * x03a_re;
                            float x04b_im = 0.95105654f * x04a_re;
                            float x03c_im = 0.618034f * x04b_im - x03b_im;
                            float x04c_im = 0.618034f * x03b_im + x04b_im;
                            out[kk * ispan + ospan + ll] = x01c_re - x04c_re;
                            out[kk * ispan + ospan + ll + 1] = x01c_im + x04c_im;
                            out[kk * ispan + ospan * 2 + ll] = x02c_re - x03c_re;
                            out[kk * ispan + ospan * 2 + ll + 1] = x02c_im + x03c_im;
                            out[kk * ispan + ospan * 3 + ll] = x02c_re + x03c_re;
                            out[kk * ispan + ospan * 3 + ll + 1] = x02c_im - x03c_im;
                            out[kk * ispan + ospan * 4 + ll] = x01c_re + x04c_re;
                            out[kk * ispan + ospan * 4 + ll + 1] = x01c_im - x04c_im;
                        }
                    }
                } else {
                    for (int ll = 0; ll < ispan * 2; ll += 2) {
                        float x00a_im;
                        float x00a_re;
                        float t01_re = in[ispan * 2 + ll];
                        float t01_im = in[ispan * 2 + ll + 1];
                        float t04_re = in[ispan * 8 + ll];
                        float t04_im = in[ispan * 8 + ll + 1];
                        float x01a_re = t01_re + t04_re;
                        float x04a_re = t01_re - t04_re;
                        float x01a_im = t01_im + t04_im;
                        float x04a_im = t01_im - t04_im;
                        float t02_re = in[ispan * 4 + ll];
                        float t02_im = in[ispan * 4 + ll + 1];
                        float t03_re = in[ispan * 6 + ll];
                        float t03_im = in[ispan * 6 + ll + 1];
                        float x02a_re = t02_re + t03_re;
                        float x03a_re = t02_re - t03_re;
                        float x02a_im = t02_im + t03_im;
                        float x03a_im = t02_im - t03_im;
                        float xreg_re = in[ll];
                        float x02b_re = x01a_re - x02a_re;
                        float x01b_re = x01a_re + x02a_re;
                        out[ll] = x00a_re = xreg_re + x01b_re;
                        float xreg_im = in[ll + 1];
                        float x02b_im = x01a_im - x02a_im;
                        float x01b_im = x01a_im + x02a_im;
                        out[ll + 1] = x00a_im = xreg_im + x01b_im;
                        x01b_re = -1.25f * x01b_re + x00a_re;
                        x02b_re = 0.559017f * x02b_re;
                        float x01c_re = x01b_re + x02b_re;
                        float x02c_re = x01b_re - x02b_re;
                        x01b_im = -1.25f * x01b_im + x00a_im;
                        x02b_im = 0.559017f * x02b_im;
                        float x01c_im = x01b_im + x02b_im;
                        float x02c_im = x01b_im - x02b_im;
                        float x03b_re = 0.95105654f * x03a_im;
                        float x04b_re = 0.95105654f * x04a_im;
                        float x03c_re = 0.618034f * x04b_re - x03b_re;
                        float x04c_re = 0.618034f * x03b_re + x04b_re;
                        float x03b_im = 0.95105654f * x03a_re;
                        float x04b_im = 0.95105654f * x04a_re;
                        float x03c_im = 0.618034f * x04b_im - x03b_im;
                        float x04c_im = 0.618034f * x03b_im + x04b_im;
                        out[ospan + ll] = x01c_re - x04c_re;
                        out[ospan + ll + 1] = x01c_im + x04c_im;
                        out[ospan * 2 + ll] = x02c_re - x03c_re;
                        out[ospan * 2 + ll + 1] = x02c_im + x03c_im;
                        out[ospan * 3 + ll] = x02c_re + x03c_re;
                        out[ospan * 3 + ll + 1] = x02c_im - x03c_im;
                        out[ospan * 4 + ll] = x01c_re + x04c_re;
                        out[ospan * 4 + ll + 1] = x01c_im - x04c_im;
                    }
                }
                break;
            }
        }
    }

    void radixpass_8(float[] in, float[] out, float[] twids, int size, int la, int dirflag) {
        int RADIXSIZE = 8;
        float s1 = 0.70710677f;
        if (size < 8 || la < 1) {
            return;
        }
        int ospan = 2 * size / 8;
        int ispan = la;
        int mm = ospan / ispan;
        switch (dirflag) {
            case 8: {
                if (ispan != ospan / 2) {
                    for (int kk = 0; kk < mm; kk += 2) {
                        float o00_re = twids[kk * ispan];
                        float o00_im = twids[kk * ispan + 1];
                        float o01_re = twids[2 * kk * ispan];
                        float o01_im = twids[2 * kk * ispan + 1];
                        float o02_re = twids[3 * kk * ispan];
                        float o02_im = twids[3 * kk * ispan + 1];
                        float o03_re = twids[4 * kk * ispan];
                        float o03_im = twids[4 * kk * ispan + 1];
                        float o04_re = twids[5 * kk * ispan];
                        float o04_im = twids[5 * kk * ispan + 1];
                        float o05_re = twids[6 * kk * ispan];
                        float o05_im = twids[6 * kk * ispan + 1];
                        float o06_re = twids[7 * kk * ispan];
                        float o06_im = twids[7 * kk * ispan + 1];
                        int istep = kk * 8;
                        for (int ll = 0; ll < ispan * 2; ll += 2) {
                            float t00_re = in[ispan * (istep + 0) + ll];
                            float t00_im = in[ispan * (istep + 0) + ll + 1];
                            float xreg_re = in[ispan * (istep + 2) + ll];
                            float xreg_im = in[ispan * (istep + 2) + ll + 1];
                            float t01_re = o00_re * xreg_re + o00_im * xreg_im;
                            float t01_im = o00_re * xreg_im - o00_im * xreg_re;
                            xreg_re = in[ispan * (istep + 4) + ll];
                            xreg_im = in[ispan * (istep + 4) + ll + 1];
                            float t02_re = o01_re * xreg_re + o01_im * xreg_im;
                            float t02_im = o01_re * xreg_im - o01_im * xreg_re;
                            xreg_re = in[ispan * (istep + 6) + ll];
                            xreg_im = in[ispan * (istep + 6) + ll + 1];
                            float t03_re = o02_re * xreg_re + o02_im * xreg_im;
                            float t03_im = o02_re * xreg_im - o02_im * xreg_re;
                            xreg_re = in[ispan * (istep + 8) + ll];
                            xreg_im = in[ispan * (istep + 8) + ll + 1];
                            float t04_re = o03_re * xreg_re + o03_im * xreg_im;
                            float t04_im = o03_re * xreg_im - o03_im * xreg_re;
                            xreg_re = in[ispan * (istep + 10) + ll];
                            xreg_im = in[ispan * (istep + 10) + ll + 1];
                            float t05_re = o04_re * xreg_re + o04_im * xreg_im;
                            float t05_im = o04_re * xreg_im - o04_im * xreg_re;
                            xreg_re = in[ispan * (istep + 12) + ll];
                            xreg_im = in[ispan * (istep + 12) + ll + 1];
                            float t06_re = o05_re * xreg_re + o05_im * xreg_im;
                            float t06_im = o05_re * xreg_im - o05_im * xreg_re;
                            xreg_re = in[ispan * (istep + 14) + ll];
                            xreg_im = in[ispan * (istep + 14) + ll + 1];
                            float t07_re = o06_re * xreg_re + o06_im * xreg_im;
                            float t07_im = o06_re * xreg_im - o06_im * xreg_re;
                            float x04a_re = t00_re - t04_re;
                            float x00a_re = t00_re + t04_re;
                            float x02a_re = t02_re + t06_re;
                            float x06a_re = t02_re - t06_re;
                            float x00b_re = x00a_re + x02a_re;
                            float x02b_re = x00a_re - x02a_re;
                            float x04a_im = t00_im - t04_im;
                            float x00a_im = t00_im + t04_im;
                            float x02a_im = t02_im + t06_im;
                            float x06a_im = t02_im - t06_im;
                            float x00b_im = x00a_im + x02a_im;
                            float x02b_im = x00a_im - x02a_im;
                            float x05a_re = t01_re - t05_re;
                            float x01a_re = t01_re + t05_re;
                            float x03a_re = t03_re + t07_re;
                            float x07a_re = t03_re - t07_re;
                            float x01b_re = x01a_re + x03a_re;
                            float x03b_re = x01a_re - x03a_re;
                            float x05a_im = t01_im - t05_im;
                            float x01a_im = t01_im + t05_im;
                            float x03a_im = t03_im + t07_im;
                            float x07a_im = t03_im - t07_im;
                            float x01b_im = x01a_im + x03a_im;
                            float x03b_im = x01a_im - x03a_im;
                            out[kk * ispan + 0 * ospan + ll] = x00b_re + x01b_re;
                            out[kk * ispan + 0 * ospan + ll + 1] = x00b_im + x01b_im;
                            out[kk * ispan + 4 * ospan + ll] = x00b_re - x01b_re;
                            out[kk * ispan + 4 * ospan + ll + 1] = x00b_im - x01b_im;
                            out[kk * ispan + 2 * ospan + ll] = x02b_re + x03b_im;
                            out[kk * ispan + 2 * ospan + ll + 1] = x02b_im - x03b_re;
                            out[kk * ispan + 6 * ospan + ll] = x02b_re - x03b_im;
                            out[kk * ispan + 6 * ospan + ll + 1] = x02b_im + x03b_re;
                            float x07b_re = x05a_re + x07a_re;
                            float x05b_re = x05a_re - x07a_re;
                            float x07b_im = x05a_im + x07a_im;
                            float x05b_im = x05a_im - x07a_im;
                            float x05c_re = 0.70710677f * x05b_re;
                            float x04b_re = x04a_re + x05c_re;
                            float x05d_re = x04a_re - x05c_re;
                            float x05c_im = 0.70710677f * x05b_im;
                            float x04b_im = x04a_im + x05c_im;
                            float x05d_im = x04a_im - x05c_im;
                            float x07c_re = 0.70710677f * x07b_im;
                            float x06b_re = x06a_im + x07c_re;
                            float x07d_re = x06a_im - x07c_re;
                            float x07c_im = 0.70710677f * x07b_re;
                            float x06b_im = x06a_re + x07c_im;
                            float x07d_im = x07c_im - x06a_re;
                            out[kk * ispan + 1 * ospan + ll] = x04b_re + x06b_re;
                            out[kk * ispan + 1 * ospan + ll + 1] = x04b_im - x06b_im;
                            out[kk * ispan + 7 * ospan + ll] = x04b_re - x06b_re;
                            out[kk * ispan + 7 * ospan + ll + 1] = x04b_im + x06b_im;
                            out[kk * ispan + 3 * ospan + ll] = x05d_re - x07d_re;
                            out[kk * ispan + 3 * ospan + ll + 1] = x05d_im - x07d_im;
                            out[kk * ispan + 5 * ospan + ll] = x05d_re + x07d_re;
                            out[kk * ispan + 5 * ospan + ll + 1] = x05d_im + x07d_im;
                        }
                    }
                } else {
                    for (int ll = 0; ll < ispan * 2; ll += 2) {
                        float t00_re = in[0 * ispan + ll];
                        float t00_im = in[0 * ispan + ll + 1];
                        float t01_re = in[2 * ispan + ll];
                        float t01_im = in[2 * ispan + ll + 1];
                        float t02_re = in[4 * ispan + ll];
                        float t02_im = in[4 * ispan + ll + 1];
                        float t03_re = in[6 * ispan + ll];
                        float t03_im = in[6 * ispan + ll + 1];
                        float t04_re = in[8 * ispan + ll];
                        float t04_im = in[8 * ispan + ll + 1];
                        float t05_re = in[10 * ispan + ll];
                        float t05_im = in[10 * ispan + ll + 1];
                        float t06_re = in[12 * ispan + ll];
                        float t06_im = in[12 * ispan + ll + 1];
                        float t07_re = in[14 * ispan + ll];
                        float t07_im = in[14 * ispan + ll + 1];
                        float x04a_re = t00_re - t04_re;
                        float x00a_re = t00_re + t04_re;
                        float x02a_re = t02_re + t06_re;
                        float x06a_re = t02_re - t06_re;
                        float x00b_re = x00a_re + x02a_re;
                        float x02b_re = x00a_re - x02a_re;
                        float x04a_im = t00_im - t04_im;
                        float x00a_im = t00_im + t04_im;
                        float x02a_im = t02_im + t06_im;
                        float x06a_im = t02_im - t06_im;
                        float x00b_im = x00a_im + x02a_im;
                        float x02b_im = x00a_im - x02a_im;
                        float x05a_re = t01_re - t05_re;
                        float x01a_re = t01_re + t05_re;
                        float x03a_re = t03_re + t07_re;
                        float x07a_re = t03_re - t07_re;
                        float x01b_re = x01a_re + x03a_re;
                        float x03b_re = x01a_re - x03a_re;
                        float x05a_im = t01_im - t05_im;
                        float x01a_im = t01_im + t05_im;
                        float x03a_im = t03_im + t07_im;
                        float x07a_im = t03_im - t07_im;
                        float x01b_im = x01a_im + x03a_im;
                        float x03b_im = x01a_im - x03a_im;
                        out[0 * ospan + ll] = x00b_re + x01b_re;
                        out[0 * ospan + ll + 1] = x00b_im + x01b_im;
                        out[4 * ospan + ll] = x00b_re - x01b_re;
                        out[4 * ospan + ll + 1] = x00b_im - x01b_im;
                        out[2 * ospan + ll] = x02b_re + x03b_im;
                        out[2 * ospan + ll + 1] = x02b_im - x03b_re;
                        out[6 * ospan + ll] = x02b_re - x03b_im;
                        out[6 * ospan + ll + 1] = x02b_im + x03b_re;
                        float x07b_re = x05a_re + x07a_re;
                        float x05b_re = x05a_re - x07a_re;
                        float x07b_im = x05a_im + x07a_im;
                        float x05b_im = x05a_im - x07a_im;
                        float x05c_re = 0.70710677f * x05b_re;
                        float x04b_re = x04a_re + x05c_re;
                        float x05d_re = x04a_re - x05c_re;
                        float x05c_im = 0.70710677f * x05b_im;
                        float x04b_im = x04a_im + x05c_im;
                        float x05d_im = x04a_im - x05c_im;
                        float x07c_re = 0.70710677f * x07b_im;
                        float x06b_re = x06a_im + x07c_re;
                        float x07d_re = x06a_im - x07c_re;
                        float x07c_im = 0.70710677f * x07b_re;
                        float x06b_im = x06a_re + x07c_im;
                        float x07d_im = x07c_im - x06a_re;
                        out[1 * ospan + ll] = x04b_re + x06b_re;
                        out[1 * ospan + ll + 1] = x04b_im - x06b_im;
                        out[7 * ospan + ll] = x04b_re - x06b_re;
                        out[7 * ospan + ll + 1] = x04b_im + x06b_im;
                        out[3 * ospan + ll] = x05d_re - x07d_re;
                        out[3 * ospan + ll + 1] = x05d_im - x07d_im;
                        out[5 * ospan + ll] = x05d_re + x07d_re;
                        out[5 * ospan + ll + 1] = x05d_im + x07d_im;
                    }
                }
                break;
            }
            default: {
                if (ispan != ospan / 2) {
                    for (int kk = 0; kk < mm; kk += 2) {
                        float o00_re = twids[kk * ispan];
                        float o00_im = twids[kk * ispan + 1];
                        float o01_re = twids[2 * kk * ispan];
                        float o01_im = twids[2 * kk * ispan + 1];
                        float o02_re = twids[3 * kk * ispan];
                        float o02_im = twids[3 * kk * ispan + 1];
                        float o03_re = twids[4 * kk * ispan];
                        float o03_im = twids[4 * kk * ispan + 1];
                        float o04_re = twids[5 * kk * ispan];
                        float o04_im = twids[5 * kk * ispan + 1];
                        float o05_re = twids[6 * kk * ispan];
                        float o05_im = twids[6 * kk * ispan + 1];
                        float o06_re = twids[7 * kk * ispan];
                        float o06_im = twids[7 * kk * ispan + 1];
                        int istep = kk * 8;
                        for (int ll = 0; ll < ispan * 2; ll += 2) {
                            float t00_re = in[ispan * (istep + 0) + ll];
                            float t00_im = in[ispan * (istep + 0) + ll + 1];
                            float xreg_re = in[ispan * (istep + 2) + ll];
                            float xreg_im = in[ispan * (istep + 2) + ll + 1];
                            float t01_re = o00_re * xreg_re - o00_im * xreg_im;
                            float t01_im = o00_re * xreg_im + o00_im * xreg_re;
                            xreg_re = in[ispan * (istep + 4) + ll];
                            xreg_im = in[ispan * (istep + 4) + ll + 1];
                            float t02_re = o01_re * xreg_re - o01_im * xreg_im;
                            float t02_im = o01_re * xreg_im + o01_im * xreg_re;
                            xreg_re = in[ispan * (istep + 6) + ll];
                            xreg_im = in[ispan * (istep + 6) + ll + 1];
                            float t03_re = o02_re * xreg_re - o02_im * xreg_im;
                            float t03_im = o02_re * xreg_im + o02_im * xreg_re;
                            xreg_re = in[ispan * (istep + 8) + ll];
                            xreg_im = in[ispan * (istep + 8) + ll + 1];
                            float t04_re = o03_re * xreg_re - o03_im * xreg_im;
                            float t04_im = o03_re * xreg_im + o03_im * xreg_re;
                            xreg_re = in[ispan * (istep + 10) + ll];
                            xreg_im = in[ispan * (istep + 10) + ll + 1];
                            float t05_re = o04_re * xreg_re - o04_im * xreg_im;
                            float t05_im = o04_re * xreg_im + o04_im * xreg_re;
                            xreg_re = in[ispan * (istep + 12) + ll];
                            xreg_im = in[ispan * (istep + 12) + ll + 1];
                            float t06_re = o05_re * xreg_re - o05_im * xreg_im;
                            float t06_im = o05_re * xreg_im + o05_im * xreg_re;
                            xreg_re = in[ispan * (istep + 14) + ll];
                            xreg_im = in[ispan * (istep + 14) + ll + 1];
                            float t07_re = o06_re * xreg_re - o06_im * xreg_im;
                            float t07_im = o06_re * xreg_im + o06_im * xreg_re;
                            float x04a_re = t00_re - t04_re;
                            float x00a_re = t00_re + t04_re;
                            float x02a_re = t02_re + t06_re;
                            float x06a_re = t02_re - t06_re;
                            float x00b_re = x00a_re + x02a_re;
                            float x02b_re = x00a_re - x02a_re;
                            float x04a_im = t00_im - t04_im;
                            float x00a_im = t00_im + t04_im;
                            float x02a_im = t02_im + t06_im;
                            float x06a_im = t02_im - t06_im;
                            float x00b_im = x00a_im + x02a_im;
                            float x02b_im = x00a_im - x02a_im;
                            float x05a_re = t01_re - t05_re;
                            float x01a_re = t01_re + t05_re;
                            float x03a_re = t03_re + t07_re;
                            float x07a_re = t03_re - t07_re;
                            float x01b_re = x01a_re + x03a_re;
                            float x03b_re = x01a_re - x03a_re;
                            float x05a_im = t01_im - t05_im;
                            float x01a_im = t01_im + t05_im;
                            float x03a_im = t03_im + t07_im;
                            float x07a_im = t03_im - t07_im;
                            float x01b_im = x01a_im + x03a_im;
                            float x03b_im = x01a_im - x03a_im;
                            out[kk * ispan + 0 * ospan + ll] = x00b_re + x01b_re;
                            out[kk * ispan + 0 * ospan + ll + 1] = x00b_im + x01b_im;
                            out[kk * ispan + 4 * ospan + ll] = x00b_re - x01b_re;
                            out[kk * ispan + 4 * ospan + ll + 1] = x00b_im - x01b_im;
                            out[kk * ispan + 2 * ospan + ll] = x02b_re - x03b_im;
                            out[kk * ispan + 2 * ospan + ll + 1] = x02b_im + x03b_re;
                            out[kk * ispan + 6 * ospan + ll] = x02b_re + x03b_im;
                            out[kk * ispan + 6 * ospan + ll + 1] = x02b_im - x03b_re;
                            float x07b_re = x05a_re + x07a_re;
                            float x05b_re = x05a_re - x07a_re;
                            float x07b_im = x05a_im + x07a_im;
                            float x05b_im = x05a_im - x07a_im;
                            float x05c_re = 0.70710677f * x05b_re;
                            float x04b_re = x04a_re + x05c_re;
                            float x05d_re = x04a_re - x05c_re;
                            float x05c_im = 0.70710677f * x05b_im;
                            float x04b_im = x04a_im + x05c_im;
                            float x05d_im = x04a_im - x05c_im;
                            float x07c_re = 0.70710677f * x07b_im;
                            float x06b_re = x06a_im + x07c_re;
                            float x07d_re = x06a_im - x07c_re;
                            float x07c_im = 0.70710677f * x07b_re;
                            float x06b_im = x06a_re + x07c_im;
                            float x07d_im = x07c_im - x06a_re;
                            out[kk * ispan + 1 * ospan + ll] = x04b_re - x06b_re;
                            out[kk * ispan + 1 * ospan + ll + 1] = x04b_im + x06b_im;
                            out[kk * ispan + 7 * ospan + ll] = x04b_re + x06b_re;
                            out[kk * ispan + 7 * ospan + ll + 1] = x04b_im - x06b_im;
                            out[kk * ispan + 3 * ospan + ll] = x05d_re + x07d_re;
                            out[kk * ispan + 3 * ospan + ll + 1] = x05d_im + x07d_im;
                            out[kk * ispan + 5 * ospan + ll] = x05d_re - x07d_re;
                            out[kk * ispan + 5 * ospan + ll + 1] = x05d_im - x07d_im;
                        }
                    }
                } else {
                    for (int ll = 0; ll < ispan * 2; ll += 2) {
                        float t00_re = in[0 * ispan + ll];
                        float t00_im = in[0 * ispan + ll + 1];
                        float t01_re = in[2 * ispan + ll];
                        float t01_im = in[2 * ispan + ll + 1];
                        float t02_re = in[4 * ispan + ll];
                        float t02_im = in[4 * ispan + ll + 1];
                        float t03_re = in[6 * ispan + ll];
                        float t03_im = in[6 * ispan + ll + 1];
                        float t04_re = in[8 * ispan + ll];
                        float t04_im = in[8 * ispan + ll + 1];
                        float t05_re = in[10 * ispan + ll];
                        float t05_im = in[10 * ispan + ll + 1];
                        float t06_re = in[12 * ispan + ll];
                        float t06_im = in[12 * ispan + ll + 1];
                        float t07_re = in[14 * ispan + ll];
                        float t07_im = in[14 * ispan + ll + 1];
                        float x04a_re = t00_re - t04_re;
                        float x00a_re = t00_re + t04_re;
                        float x02a_re = t02_re + t06_re;
                        float x06a_re = t02_re - t06_re;
                        float x00b_re = x00a_re + x02a_re;
                        float x02b_re = x00a_re - x02a_re;
                        float x04a_im = t00_im - t04_im;
                        float x00a_im = t00_im + t04_im;
                        float x02a_im = t02_im + t06_im;
                        float x06a_im = t02_im - t06_im;
                        float x00b_im = x00a_im + x02a_im;
                        float x02b_im = x00a_im - x02a_im;
                        float x05a_re = t01_re - t05_re;
                        float x01a_re = t01_re + t05_re;
                        float x03a_re = t03_re + t07_re;
                        float x07a_re = t03_re - t07_re;
                        float x01b_re = x01a_re + x03a_re;
                        float x03b_re = x01a_re - x03a_re;
                        float x05a_im = t01_im - t05_im;
                        float x01a_im = t01_im + t05_im;
                        float x03a_im = t03_im + t07_im;
                        float x07a_im = t03_im - t07_im;
                        float x01b_im = x01a_im + x03a_im;
                        float x03b_im = x01a_im - x03a_im;
                        out[0 * ospan + ll] = x00b_re + x01b_re;
                        out[0 * ospan + ll + 1] = x00b_im + x01b_im;
                        out[4 * ospan + ll] = x00b_re - x01b_re;
                        out[4 * ospan + ll + 1] = x00b_im - x01b_im;
                        out[2 * ospan + ll] = x02b_re - x03b_im;
                        out[2 * ospan + ll + 1] = x02b_im + x03b_re;
                        out[6 * ospan + ll] = x02b_re + x03b_im;
                        out[6 * ospan + ll + 1] = x02b_im - x03b_re;
                        float x07b_re = x05a_re + x07a_re;
                        float x05b_re = x05a_re - x07a_re;
                        float x07b_im = x05a_im + x07a_im;
                        float x05b_im = x05a_im - x07a_im;
                        float x05c_re = 0.70710677f * x05b_re;
                        float x04b_re = x04a_re + x05c_re;
                        float x05d_re = x04a_re - x05c_re;
                        float x05c_im = 0.70710677f * x05b_im;
                        float x04b_im = x04a_im + x05c_im;
                        float x05d_im = x04a_im - x05c_im;
                        float x07c_re = 0.70710677f * x07b_im;
                        float x06b_re = x06a_im + x07c_re;
                        float x07d_re = x06a_im - x07c_re;
                        float x07c_im = 0.70710677f * x07b_re;
                        float x06b_im = x06a_re + x07c_im;
                        float x07d_im = x07c_im - x06a_re;
                        out[1 * ospan + ll] = x04b_re - x06b_re;
                        out[1 * ospan + ll + 1] = x04b_im + x06b_im;
                        out[7 * ospan + ll] = x04b_re + x06b_re;
                        out[7 * ospan + ll + 1] = x04b_im - x06b_im;
                        out[3 * ospan + ll] = x05d_re + x07d_re;
                        out[3 * ospan + ll + 1] = x05d_im + x07d_im;
                        out[5 * ospan + ll] = x05d_re - x07d_re;
                        out[5 * ospan + ll + 1] = x05d_im - x07d_im;
                    }
                }
                break;
            }
        }
    }

    void radixpass_16(float[] in, float[] out, float[] twids, int size, int la, int dirflag) {
        int RADIXSIZE = 16;
        float s1 = 0.70710677f;
        float s2 = 1.4142135f;
        float s3 = 0.9238795f;
        float s4 = 0.38268343f;
        if (size < 16 || la < 1) {
            return;
        }
        int ospan = 2 * size / 16;
        int ispan = la;
        int mm = ospan / ispan;
        switch (dirflag) {
            case 8: {
                if (ispan != ospan / 2) {
                    for (int kk = 0; kk < mm; kk += 2) {
                        float o00_re = twids[1 * kk * ispan];
                        float o00_im = twids[1 * kk * ispan];
                        float o01_re = twids[2 * kk * ispan];
                        float o01_im = twids[2 * kk * ispan + 1];
                        float o02_re = twids[3 * kk * ispan];
                        float o02_im = twids[3 * kk * ispan + 1];
                        float o03_re = twids[4 * kk * ispan];
                        float o03_im = twids[4 * kk * ispan + 1];
                        float o04_re = twids[5 * kk * ispan];
                        float o04_im = twids[5 * kk * ispan + 1];
                        float o05_re = twids[6 * kk * ispan];
                        float o05_im = twids[6 * kk * ispan + 1];
                        float o06_re = twids[7 * kk * ispan];
                        float o06_im = twids[7 * kk * ispan + 1];
                        float o07_re = twids[8 * kk * ispan];
                        float o07_im = twids[8 * kk * ispan + 1];
                        float o08_re = twids[9 * kk * ispan];
                        float o08_im = twids[9 * kk * ispan + 1];
                        float o09_re = twids[10 * kk * ispan];
                        float o09_im = twids[10 * kk * ispan + 1];
                        float o10_re = twids[11 * kk * ispan];
                        float o10_im = twids[11 * kk * ispan + 1];
                        float o11_re = twids[12 * kk * ispan];
                        float o11_im = twids[12 * kk * ispan + 1];
                        float o12_re = twids[13 * kk * ispan];
                        float o12_im = twids[13 * kk * ispan + 1];
                        float o13_re = twids[14 * kk * ispan];
                        float o13_im = twids[14 * kk * ispan + 1];
                        float o14_re = twids[15 * kk * ispan];
                        float o14_im = twids[15 * kk * ispan + 1];
                        int istep = kk * 16;
                        for (int ll = 0; ll < ispan * 2; ll += 2) {
                            float t00_re = in[ispan * (istep + 0) + ll];
                            float t00_im = in[ispan * (istep + 0) + ll + 1];
                            float t01_re = o00_re * in[ispan * (istep + 2) + ll] + o00_im * in[ispan * (istep + 2) + ll + 1];
                            float t01_im = o00_re * in[ispan * (istep + 2) + ll + 1] - o00_im * in[ispan * (istep + 2) + ll];
                            float t02_re = o01_re * in[ispan * (istep + 4) + ll] + o01_im * in[ispan * (istep + 4) + ll + 1];
                            float t02_im = o01_re * in[ispan * (istep + 4) + ll + 1] - o01_im * in[ispan * (istep + 4) + ll];
                            float t03_re = o02_re * in[ispan * (istep + 6) + ll] + o02_im * in[ispan * (istep + 6) + ll + 1];
                            float t03_im = o02_re * in[ispan * (istep + 6) + ll + 1] - o02_im * in[ispan * (istep + 6) + ll];
                            float t04_re = o03_re * in[ispan * (istep + 8) + ll] + o03_im * in[ispan * (istep + 8) + ll + 1];
                            float t04_im = o03_re * in[ispan * (istep + 8) + ll + 1] - o03_im * in[ispan * (istep + 8) + ll];
                            float t05_re = o04_re * in[ispan * (istep + 10) + ll] + o04_im * in[ispan * (istep + 10) + ll + 1];
                            float t05_im = o04_re * in[ispan * (istep + 10) + ll + 1] - o04_im * in[ispan * (istep + 10) + ll];
                            float t06_re = o05_re * in[ispan * (istep + 12) + ll] + o05_im * in[ispan * (istep + 12) + ll + 1];
                            float t06_im = o05_re * in[ispan * (istep + 12) + ll + 1] - o05_im * in[ispan * (istep + 12) + ll];
                            float t07_re = o06_re * in[ispan * (istep + 14) + ll] + o06_im * in[ispan * (istep + 14) + ll + 1];
                            float t07_im = o06_re * in[ispan * (istep + 14) + ll + 1] - o06_im * in[ispan * (istep + 14) + ll];
                            float t08_re = o07_re * in[ispan * (istep + 16) + ll] + o07_im * in[ispan * (istep + 16) + ll + 1];
                            float t08_im = o07_re * in[ispan * (istep + 16) + ll + 1] - o07_im * in[ispan * (istep + 16) + ll];
                            float t09_re = o08_re * in[ispan * (istep + 18) + ll] + o08_im * in[ispan * (istep + 18) + ll + 1];
                            float t09_im = o08_re * in[ispan * (istep + 18) + ll + 1] - o08_im * in[ispan * (istep + 18) + ll];
                            float t10_re = o09_re * in[ispan * (istep + 20) + ll] + o09_im * in[ispan * (istep + 20) + ll + 1];
                            float t10_im = o09_re * in[ispan * (istep + 20) + ll + 1] - o09_im * in[ispan * (istep + 20) + ll];
                            float t11_re = o10_re * in[ispan * (istep + 22) + ll] + o10_im * in[ispan * (istep + 22) + ll + 1];
                            float t11_im = o10_re * in[ispan * (istep + 22) + ll + 1] - o10_im * in[ispan * (istep + 22) + ll];
                            float t12_re = o11_re * in[ispan * (istep + 24) + ll] + o11_im * in[ispan * (istep + 24) + ll + 1];
                            float t12_im = o11_re * in[ispan * (istep + 24) + ll + 1] - o11_im * in[ispan * (istep + 24) + ll];
                            float t13_re = o12_re * in[ispan * (istep + 26) + ll] + o12_im * in[ispan * (istep + 26) + ll + 1];
                            float t13_im = o12_re * in[ispan * (istep + 26) + ll + 1] - o12_im * in[ispan * (istep + 26) + ll];
                            float t14_re = o13_re * in[ispan * (istep + 28) + ll] + o13_im * in[ispan * (istep + 28) + ll + 1];
                            float t14_im = o13_re * in[ispan * (istep + 28) + ll + 1] - o13_im * in[ispan * (istep + 28) + ll];
                            float t15_re = o14_re * in[ispan * (istep + 30) + ll] + o14_im * in[ispan * (istep + 30) + ll + 1];
                            float t15_im = o14_re * in[ispan * (istep + 30) + ll + 1] - o14_im * in[ispan * (istep + 30) + ll];
                            float x00c_re = t00_re + t08_re;
                            float x08c_re = t00_re - t08_re;
                            float x00c_im = t00_im + t08_im;
                            float x08c_im = t00_im - t08_im;
                            float x02c_re = t02_re + t10_re;
                            float x10e_re = t02_re - t10_re;
                            float x02c_im = t02_im + t10_im;
                            float x10e_im = t02_im - t10_im;
                            float x04c_re = t04_re + t12_re;
                            float x12c_re = t04_re - t12_re;
                            float x04c_im = t04_im + t12_im;
                            float x12c_im = t04_im - t12_im;
                            float x06c_re = t06_re + t14_re;
                            float x14e_re = t06_re - t14_re;
                            float x06c_im = t06_im + t14_im;
                            float x14e_im = t06_im - t14_im;
                            float x00b_re = x00c_re + x04c_re;
                            float x04b_re = x00c_re - x04c_re;
                            float x02b_re = x02c_re + x06c_re;
                            float x06b_re = x02c_re - x06c_re;
                            float x00a_re = x00b_re + x02b_re;
                            float x02a_re = x00b_re - x02b_re;
                            float x00b_im = x00c_im + x04c_im;
                            float x04b_im = x00c_im - x04c_im;
                            float x02b_im = x02c_im + x06c_im;
                            float x06b_im = x02c_im - x06c_im;
                            float x00a_im = x00b_im + x02b_im;
                            float x02a_im = x00b_im - x02b_im;
                            float x01c_re = t01_re + t09_re;
                            float x09f_re = t01_re - t09_re;
                            float x01c_im = t01_im + t09_im;
                            float x09f_im = t01_im - t09_im;
                            float x03c_re = t03_re + t11_re;
                            float x11f_re = t03_re - t11_re;
                            float x03c_im = t03_im + t11_im;
                            float x11f_im = t03_im - t11_im;
                            float x05e_re = t05_re + t13_re;
                            float x13e_re = t05_re - t13_re;
                            float x05e_im = t05_im + t13_im;
                            float x13e_im = t05_im - t13_im;
                            float x07e_re = t07_re + t15_re;
                            float x15f_re = t07_re - t15_re;
                            float x07e_im = t07_im + t15_im;
                            float x15f_im = t07_im - t15_im;
                            float x01b_re = x01c_re + x05e_re;
                            float x05d_re = x01c_re - x05e_re;
                            float x03b_re = x03c_re + x07e_re;
                            float x07d_re = x03c_re - x07e_re;
                            float x01a_re = x01b_re + x03b_re;
                            float x03a_re = x01b_re - x03b_re;
                            float x01b_im = x01c_im + x05e_im;
                            float x05d_im = x01c_im - x05e_im;
                            float x03b_im = x03c_im + x07e_im;
                            float x07d_im = x03c_im - x07e_im;
                            float x01a_im = x01b_im + x03b_im;
                            float x03a_im = x01b_im - x03b_im;
                            out[kk * ispan + 0 * ospan + ll] = x00a_re + x01a_re;
                            out[kk * ispan + 0 * ospan + ll + 1] = x00a_im + x01a_im;
                            out[kk * ispan + 8 * ospan + ll] = x00a_re - x01a_re;
                            out[kk * ispan + 8 * ospan + ll + 1] = x00a_im - x01a_im;
                            out[kk * ispan + 4 * ospan + ll] = x02a_re + x03a_im;
                            out[kk * ispan + 4 * ospan + ll + 1] = x02a_im - x03a_re;
                            out[kk * ispan + 12 * ospan + ll] = x02a_re - x03a_im;
                            out[kk * ispan + 12 * ospan + ll + 1] = x02a_im + x03a_re;
                            float x07c_re = x05d_re + x07d_re;
                            float x05c_re = x05d_re - x07d_re;
                            float x05b_re = 0.70710677f * x05c_re;
                            float x04a_re = x04b_re + x05b_re;
                            float x05a_re = x04b_re - x05b_re;
                            float x07b_re = 0.70710677f * x07c_re;
                            float x06a_im = x06b_re + x07b_re;
                            float x07a_im = x06b_re - x07b_re;
                            float x07c_im = x05d_im + x07d_im;
                            float x05c_im = x05d_im - x07d_im;
                            float x05b_im = 0.70710677f * x05c_im;
                            float x04a_im = x04b_im + x05b_im;
                            float x05a_im = x04b_im - x05b_im;
                            float x07b_im = 0.70710677f * x07c_im;
                            float x06a_re = x06b_im + x07b_im;
                            float x07a_re = x06b_im - x07b_im;
                            out[kk * ispan + 2 * ospan + ll] = x04a_re + x06a_re;
                            out[kk * ispan + 2 * ospan + ll + 1] = x04a_im - x06a_im;
                            out[kk * ispan + 14 * ospan + ll] = x04a_re - x06a_re;
                            out[kk * ispan + 14 * ospan + ll + 1] = x04a_im + x06a_im;
                            out[kk * ispan + 6 * ospan + ll] = x05a_re - x07a_re;
                            out[kk * ispan + 6 * ospan + ll + 1] = x05a_im + x07a_im;
                            out[kk * ispan + 10 * ospan + ll] = x05a_re + x07a_re;
                            out[kk * ispan + 10 * ospan + ll + 1] = x05a_im - x07a_im;
                            float x09e_re = x09f_re - x15f_re;
                            float x15e_re = x09f_re + x15f_re;
                            float x09e_im = x09f_im - x15f_im;
                            float x15e_im = x09f_im + x15f_im;
                            float x11e_re = x11f_re - x13e_re;
                            float x13d_re = x11f_re + x13e_re;
                            float x15d_re = 1.4142135f * x15e_re;
                            float x13c_re = (x13d_re -= x15e_re) + x15d_re;
                            float x15c_re = x13d_re - x15d_re;
                            float x11e_im = x11f_im - x13e_im;
                            float x13d_im = x11f_im + x13e_im;
                            float x15d_im = 1.4142135f * x15e_im;
                            float x13c_im = (x13d_im -= x15e_im) + x15d_im;
                            float x15c_im = x13d_im - x15d_im;
                            float x14d_re = x10e_re + x14e_re;
                            float x10d_re = x10e_re - x14e_re;
                            float x10c_re = 0.70710677f * x10d_re;
                            float x08b_re = x08c_re + x10c_re;
                            float x10b_re = x08c_re - x10c_re;
                            float x14c_re = 0.70710677f * x14d_re;
                            float x14d_im = x10e_im + x14e_im;
                            float x10d_im = x10e_im - x14e_im;
                            float x10c_im = 0.70710677f * x10d_im;
                            float x08b_im = x08c_im + x10c_im;
                            float x10b_im = x08c_im - x10c_im;
                            float x14c_im = 0.70710677f * x14d_im;
                            float x12b_im = -x12c_re - x14c_re;
                            float x14b_im = -x12c_re + x14c_re;
                            float x12b_re = x12c_im + x14c_im;
                            float x14b_re = x12c_im - x14c_im;
                            float x09d_re = x09e_re - x11e_re;
                            float x11d_re = 1.4142135f * x11e_re;
                            float x09c_re = x09d_re + x11d_re;
                            float x11c_re = x09d_re - x11d_re;
                            float x09b_re = 0.9238795f * x09c_re;
                            float x08a_re = x08b_re + x09b_re;
                            float x09a_re = x08b_re - x09b_re;
                            float x09d_im = x09e_im - x11e_im;
                            float x11d_im = 1.4142135f * x11e_im;
                            float x09c_im = x09d_im + x11d_im;
                            float x11c_im = x09d_im - x11d_im;
                            float x09b_im = 0.9238795f * x09c_im;
                            float x08a_im = x08b_im + x09b_im;
                            float x09a_im = x08b_im - x09b_im;
                            float x13b_im = 0.9238795f * x13c_im;
                            float x12a_re = x12b_re + x13b_im;
                            float x13a_re = x12b_re - x13b_im;
                            float x13b_re = 0.9238795f * x13c_re;
                            float x12a_im = x12b_im - x13b_re;
                            float x13a_im = x12b_im + x13b_re;
                            out[kk * ispan + 1 * ospan + ll] = x08a_re + x12a_re;
                            out[kk * ispan + 1 * ospan + ll + 1] = x08a_im + x12a_im;
                            out[kk * ispan + 15 * ospan + ll] = x08a_re - x12a_re;
                            out[kk * ispan + 15 * ospan + ll + 1] = x08a_im - x12a_im;
                            out[kk * ispan + 7 * ospan + ll] = x09a_re - x13a_re;
                            out[kk * ispan + 7 * ospan + ll + 1] = x09a_im - x13a_im;
                            out[kk * ispan + 9 * ospan + ll] = x09a_re + x13a_re;
                            out[kk * ispan + 9 * ospan + ll + 1] = x09a_im + x13a_im;
                            float x11b_re = 0.38268343f * x11c_re;
                            float x10a_re = x10b_re + x11b_re;
                            float x11a_re = x10b_re - x11b_re;
                            float x11b_im = 0.38268343f * x11c_im;
                            float x10a_im = x10b_im + x11b_im;
                            float x11a_im = x10b_im - x11b_im;
                            float x15b_im = 0.38268343f * x15c_im;
                            float x14a_re = x14b_re + x15b_im;
                            float x15a_re = x14b_re - x15b_im;
                            float x15b_re = 0.38268343f * x15c_re;
                            float x14a_im = x14b_im - x15b_re;
                            float x15a_im = x14b_im + x15b_re;
                            out[kk * ispan + 3 * ospan + ll] = x10a_re - x14a_re;
                            out[kk * ispan + 3 * ospan + ll + 1] = x10a_im - x14a_im;
                            out[kk * ispan + 13 * ospan + ll] = x10a_re + x14a_re;
                            out[kk * ispan + 13 * ospan + ll + 1] = x10a_im + x14a_im;
                            out[kk * ispan + 5 * ospan + ll] = x11a_re + x15a_re;
                            out[kk * ispan + 5 * ospan + ll + 1] = x11a_im + x15a_im;
                            out[kk * ispan + 11 * ospan + ll] = x11a_re - x15a_re;
                            out[kk * ispan + 11 * ospan + ll + 1] = x11a_im - x15a_im;
                        }
                    }
                } else {
                    for (int ll = 0; ll < ispan * 2; ll += 2) {
                        float t00_re = in[0 * ispan + ll];
                        float t00_im = in[0 * ispan + ll + 1];
                        float t01_re = in[2 * ispan + ll];
                        float t01_im = in[2 * ispan + ll + 1];
                        float t02_re = in[4 * ispan + ll];
                        float t02_im = in[4 * ispan + ll + 1];
                        float t03_re = in[6 * ispan + ll];
                        float t03_im = in[6 * ispan + ll + 1];
                        float t04_re = in[8 * ispan + ll];
                        float t04_im = in[8 * ispan + ll + 1];
                        float t05_re = in[10 * ispan + ll];
                        float t05_im = in[10 * ispan + ll + 1];
                        float t06_re = in[12 * ispan + ll];
                        float t06_im = in[12 * ispan + ll + 1];
                        float t07_re = in[14 * ispan + ll];
                        float t07_im = in[14 * ispan + ll + 1];
                        float t08_re = in[16 * ispan + ll];
                        float t08_im = in[16 * ispan + ll + 1];
                        float t09_re = in[18 * ispan + ll];
                        float t09_im = in[18 * ispan + ll + 1];
                        float t10_re = in[20 * ispan + ll];
                        float t10_im = in[20 * ispan + ll + 1];
                        float t11_re = in[22 * ispan + ll];
                        float t11_im = in[22 * ispan + ll + 1];
                        float t12_re = in[24 * ispan + ll];
                        float t12_im = in[24 * ispan + ll + 1];
                        float t13_re = in[26 * ispan + ll];
                        float t13_im = in[26 * ispan + ll + 1];
                        float t14_re = in[28 * ispan + ll];
                        float t14_im = in[28 * ispan + ll + 1];
                        float t15_re = in[30 * ispan + ll];
                        float t15_im = in[30 * ispan + ll + 1];
                        float x00c_re = t00_re + t08_re;
                        float x08c_re = t00_re - t08_re;
                        float x00c_im = t00_im + t08_im;
                        float x08c_im = t00_im - t08_im;
                        float x02c_re = t02_re + t10_re;
                        float x10e_re = t02_re - t10_re;
                        float x02c_im = t02_im + t10_im;
                        float x10e_im = t02_im - t10_im;
                        float x04c_re = t04_re + t12_re;
                        float x12c_re = t04_re - t12_re;
                        float x04c_im = t04_im + t12_im;
                        float x12c_im = t04_im - t12_im;
                        float x06c_re = t06_re + t14_re;
                        float x14e_re = t06_re - t14_re;
                        float x06c_im = t06_im + t14_im;
                        float x14e_im = t06_im - t14_im;
                        float x00b_re = x00c_re + x04c_re;
                        float x04b_re = x00c_re - x04c_re;
                        float x02b_re = x02c_re + x06c_re;
                        float x06b_re = x02c_re - x06c_re;
                        float x00a_re = x00b_re + x02b_re;
                        float x02a_re = x00b_re - x02b_re;
                        float x00b_im = x00c_im + x04c_im;
                        float x04b_im = x00c_im - x04c_im;
                        float x02b_im = x02c_im + x06c_im;
                        float x06b_im = x02c_im - x06c_im;
                        float x00a_im = x00b_im + x02b_im;
                        float x02a_im = x00b_im - x02b_im;
                        float x01c_re = t01_re + t09_re;
                        float x09f_re = t01_re - t09_re;
                        float x01c_im = t01_im + t09_im;
                        float x09f_im = t01_im - t09_im;
                        float x03c_re = t03_re + t11_re;
                        float x11f_re = t03_re - t11_re;
                        float x03c_im = t03_im + t11_im;
                        float x11f_im = t03_im - t11_im;
                        float x05e_re = t05_re + t13_re;
                        float x13e_re = t05_re - t13_re;
                        float x05e_im = t05_im + t13_im;
                        float x13e_im = t05_im - t13_im;
                        float x07e_re = t07_re + t15_re;
                        float x15f_re = t07_re - t15_re;
                        float x07e_im = t07_im + t15_im;
                        float x15f_im = t07_im - t15_im;
                        float x01b_re = x01c_re + x05e_re;
                        float x05d_re = x01c_re - x05e_re;
                        float x03b_re = x03c_re + x07e_re;
                        float x07d_re = x03c_re - x07e_re;
                        float x01a_re = x01b_re + x03b_re;
                        float x03a_re = x01b_re - x03b_re;
                        float x01b_im = x01c_im + x05e_im;
                        float x05d_im = x01c_im - x05e_im;
                        float x03b_im = x03c_im + x07e_im;
                        float x07d_im = x03c_im - x07e_im;
                        float x01a_im = x01b_im + x03b_im;
                        float x03a_im = x01b_im - x03b_im;
                        out[0 * ospan + ll] = x00a_re + x01a_re;
                        out[0 * ospan + ll + 1] = x00a_im + x01a_im;
                        out[8 * ospan + ll] = x00a_re - x01a_re;
                        out[8 * ospan + ll + 1] = x00a_im - x01a_im;
                        out[4 * ospan + ll] = x02a_re + x03a_im;
                        out[4 * ospan + ll + 1] = x02a_im - x03a_re;
                        out[12 * ospan + ll] = x02a_re - x03a_im;
                        out[12 * ospan + ll + 1] = x02a_im + x03a_re;
                        float x07c_re = x05d_re + x07d_re;
                        float x05c_re = x05d_re - x07d_re;
                        float x05b_re = 0.70710677f * x05c_re;
                        float x04a_re = x04b_re + x05b_re;
                        float x05a_re = x04b_re - x05b_re;
                        float x07b_re = 0.70710677f * x07c_re;
                        float x06a_im = x06b_re + x07b_re;
                        float x07a_im = x06b_re - x07b_re;
                        float x07c_im = x05d_im + x07d_im;
                        float x05c_im = x05d_im - x07d_im;
                        float x05b_im = 0.70710677f * x05c_im;
                        float x04a_im = x04b_im + x05b_im;
                        float x05a_im = x04b_im - x05b_im;
                        float x07b_im = 0.70710677f * x07c_im;
                        float x06a_re = x06b_im + x07b_im;
                        float x07a_re = x06b_im - x07b_im;
                        out[2 * ospan + ll] = x04a_re + x06a_re;
                        out[2 * ospan + ll + 1] = x04a_im - x06a_im;
                        out[14 * ospan + ll] = x04a_re - x06a_re;
                        out[14 * ospan + ll + 1] = x04a_im + x06a_im;
                        out[6 * ospan + ll] = x05a_re - x07a_re;
                        out[6 * ospan + ll + 1] = x05a_im + x07a_im;
                        out[10 * ospan + ll] = x05a_re + x07a_re;
                        out[10 * ospan + ll + 1] = x05a_im - x07a_im;
                        float x09e_re = x09f_re - x15f_re;
                        float x15e_re = x09f_re + x15f_re;
                        float x09e_im = x09f_im - x15f_im;
                        float x15e_im = x09f_im + x15f_im;
                        float x11e_re = x11f_re - x13e_re;
                        float x13d_re = x11f_re + x13e_re;
                        float x15d_re = 1.4142135f * x15e_re;
                        float x13c_re = (x13d_re -= x15e_re) + x15d_re;
                        float x15c_re = x13d_re - x15d_re;
                        float x11e_im = x11f_im - x13e_im;
                        float x13d_im = x11f_im + x13e_im;
                        float x15d_im = 1.4142135f * x15e_im;
                        float x13c_im = (x13d_im -= x15e_im) + x15d_im;
                        float x15c_im = x13d_im - x15d_im;
                        float x14d_re = x10e_re + x14e_re;
                        float x10d_re = x10e_re - x14e_re;
                        float x10c_re = 0.70710677f * x10d_re;
                        float x08b_re = x08c_re + x10c_re;
                        float x10b_re = x08c_re - x10c_re;
                        float x14c_re = 0.70710677f * x14d_re;
                        float x14d_im = x10e_im + x14e_im;
                        float x10d_im = x10e_im - x14e_im;
                        float x10c_im = 0.70710677f * x10d_im;
                        float x08b_im = x08c_im + x10c_im;
                        float x10b_im = x08c_im - x10c_im;
                        float x14c_im = 0.70710677f * x14d_im;
                        float x12b_im = -x12c_re - x14c_re;
                        float x14b_im = -x12c_re + x14c_re;
                        float x12b_re = x12c_im + x14c_im;
                        float x14b_re = x12c_im - x14c_im;
                        float x09d_re = x09e_re - x11e_re;
                        float x11d_re = 1.4142135f * x11e_re;
                        float x09c_re = x09d_re + x11d_re;
                        float x11c_re = x09d_re - x11d_re;
                        float x09b_re = 0.9238795f * x09c_re;
                        float x08a_re = x08b_re + x09b_re;
                        float x09a_re = x08b_re - x09b_re;
                        float x09d_im = x09e_im - x11e_im;
                        float x11d_im = 1.4142135f * x11e_im;
                        float x09c_im = x09d_im + x11d_im;
                        float x11c_im = x09d_im - x11d_im;
                        float x09b_im = 0.9238795f * x09c_im;
                        float x08a_im = x08b_im + x09b_im;
                        float x09a_im = x08b_im - x09b_im;
                        float x13b_im = 0.9238795f * x13c_im;
                        float x12a_re = x12b_re + x13b_im;
                        float x13a_re = x12b_re - x13b_im;
                        float x13b_re = 0.9238795f * x13c_re;
                        float x12a_im = x12b_im - x13b_re;
                        float x13a_im = x12b_im + x13b_re;
                        out[1 * ospan + ll] = x08a_re + x12a_re;
                        out[1 * ospan + ll + 1] = x08a_im + x12a_im;
                        out[15 * ospan + ll] = x08a_re - x12a_re;
                        out[15 * ospan + ll + 1] = x08a_im - x12a_im;
                        out[7 * ospan + ll] = x09a_re - x13a_re;
                        out[7 * ospan + ll + 1] = x09a_im - x13a_im;
                        out[9 * ospan + ll] = x09a_re + x13a_re;
                        out[9 * ospan + ll + 1] = x09a_im + x13a_im;
                        float x11b_re = 0.38268343f * x11c_re;
                        float x10a_re = x10b_re + x11b_re;
                        float x11a_re = x10b_re - x11b_re;
                        float x11b_im = 0.38268343f * x11c_im;
                        float x10a_im = x10b_im + x11b_im;
                        float x11a_im = x10b_im - x11b_im;
                        float x15b_im = 0.38268343f * x15c_im;
                        float x14a_re = x14b_re + x15b_im;
                        float x15a_re = x14b_re - x15b_im;
                        float x15b_re = 0.38268343f * x15c_re;
                        float x14a_im = x14b_im - x15b_re;
                        float x15a_im = x14b_im + x15b_re;
                        out[3 * ospan + ll] = x10a_re - x14a_re;
                        out[3 * ospan + ll + 1] = x10a_im - x14a_im;
                        out[13 * ospan + ll] = x10a_re + x14a_re;
                        out[13 * ospan + ll + 1] = x10a_im + x14a_im;
                        out[5 * ospan + ll] = x11a_re + x15a_re;
                        out[5 * ospan + ll + 1] = x11a_im + x15a_im;
                        out[11 * ospan + ll] = x11a_re - x15a_re;
                        out[11 * ospan + ll + 1] = x11a_im - x15a_im;
                    }
                }
                break;
            }
            default: {
                if (ispan != ospan / 2) {
                    for (int kk = 0; kk < mm; kk += 2) {
                        float o00_re = twids[1 * kk * ispan];
                        float o00_im = twids[1 * kk * ispan + 1];
                        float o01_re = twids[2 * kk * ispan];
                        float o01_im = twids[2 * kk * ispan + 1];
                        float o02_re = twids[3 * kk * ispan];
                        float o02_im = twids[3 * kk * ispan + 1];
                        float o03_re = twids[4 * kk * ispan];
                        float o03_im = twids[4 * kk * ispan + 1];
                        float o04_re = twids[5 * kk * ispan];
                        float o04_im = twids[5 * kk * ispan + 1];
                        float o05_re = twids[6 * kk * ispan];
                        float o05_im = twids[6 * kk * ispan + 1];
                        float o06_re = twids[7 * kk * ispan];
                        float o06_im = twids[7 * kk * ispan + 1];
                        float o07_re = twids[8 * kk * ispan];
                        float o07_im = twids[8 * kk * ispan + 1];
                        float o08_re = twids[9 * kk * ispan];
                        float o08_im = twids[9 * kk * ispan + 1];
                        float o09_re = twids[10 * kk * ispan];
                        float o09_im = twids[10 * kk * ispan + 1];
                        float o10_re = twids[11 * kk * ispan];
                        float o10_im = twids[11 * kk * ispan + 1];
                        float o11_re = twids[12 * kk * ispan];
                        float o11_im = twids[12 * kk * ispan + 1];
                        float o12_re = twids[13 * kk * ispan];
                        float o12_im = twids[13 * kk * ispan + 1];
                        float o13_re = twids[14 * kk * ispan];
                        float o13_im = twids[14 * kk * ispan + 1];
                        float o14_re = twids[15 * kk * ispan];
                        float o14_im = twids[15 * kk * ispan + 1];
                        int istep = kk * 16;
                        for (int ll = 0; ll < ispan * 2; ll += 2) {
                            float t00_re = in[ispan * (istep + 0) + ll];
                            float t00_im = in[ispan * (istep + 0) + ll + 1];
                            float t01_re = o00_re * in[ispan * (istep + 2) + ll] - o00_im * in[ispan * (istep + 2) + ll + 1];
                            float t01_im = o00_re * in[ispan * (istep + 2) + ll + 1] + o00_im * in[ispan * (istep + 2) + ll];
                            float t02_re = o01_re * in[ispan * (istep + 4) + ll] - o01_im * in[ispan * (istep + 4) + ll + 1];
                            float t02_im = o01_re * in[ispan * (istep + 4) + ll + 1] + o01_im * in[ispan * (istep + 4) + ll];
                            float t03_re = o02_re * in[ispan * (istep + 6) + ll] - o02_im * in[ispan * (istep + 6) + ll + 1];
                            float t03_im = o02_re * in[ispan * (istep + 6) + ll + 1] + o02_im * in[ispan * (istep + 6) + ll];
                            float t04_re = o03_re * in[ispan * (istep + 8) + ll] - o03_im * in[ispan * (istep + 8) + ll + 1];
                            float t04_im = o03_re * in[ispan * (istep + 8) + ll + 1] + o03_im * in[ispan * (istep + 8) + ll];
                            float t05_re = o04_re * in[ispan * (istep + 10) + ll] - o04_im * in[ispan * (istep + 10) + ll + 1];
                            float t05_im = o04_re * in[ispan * (istep + 10) + ll + 1] + o04_im * in[ispan * (istep + 10) + ll];
                            float t06_re = o05_re * in[ispan * (istep + 12) + ll] - o05_im * in[ispan * (istep + 12) + ll + 1];
                            float t06_im = o05_re * in[ispan * (istep + 12) + ll + 1] + o05_im * in[ispan * (istep + 12) + ll];
                            float t07_re = o06_re * in[ispan * (istep + 14) + ll] - o06_im * in[ispan * (istep + 14) + ll + 1];
                            float t07_im = o06_re * in[ispan * (istep + 14) + ll + 1] + o06_im * in[ispan * (istep + 14) + ll];
                            float t08_re = o07_re * in[ispan * (istep + 16) + ll] - o07_im * in[ispan * (istep + 16) + ll + 1];
                            float t08_im = o07_re * in[ispan * (istep + 16) + ll + 1] + o07_im * in[ispan * (istep + 16) + ll];
                            float t09_re = o08_re * in[ispan * (istep + 18) + ll] - o08_im * in[ispan * (istep + 18) + ll + 1];
                            float t09_im = o08_re * in[ispan * (istep + 18) + ll + 1] + o08_im * in[ispan * (istep + 18) + ll];
                            float t10_re = o09_re * in[ispan * (istep + 20) + ll] - o09_im * in[ispan * (istep + 20) + ll + 1];
                            float t10_im = o09_re * in[ispan * (istep + 20) + ll + 1] + o09_im * in[ispan * (istep + 20) + ll];
                            float t11_re = o10_re * in[ispan * (istep + 22) + ll] - o10_im * in[ispan * (istep + 22) + ll + 1];
                            float t11_im = o10_re * in[ispan * (istep + 22) + ll + 1] + o10_im * in[ispan * (istep + 22) + ll];
                            float t12_re = o11_re * in[ispan * (istep + 24) + ll] - o11_im * in[ispan * (istep + 24) + ll + 1];
                            float t12_im = o11_re * in[ispan * (istep + 24) + ll + 1] + o11_im * in[ispan * (istep + 24) + ll];
                            float t13_re = o12_re * in[ispan * (istep + 26) + ll] - o12_im * in[ispan * (istep + 26) + ll + 1];
                            float t13_im = o12_re * in[ispan * (istep + 26) + ll + 1] + o12_im * in[ispan * (istep + 26) + ll];
                            float t14_re = o13_re * in[ispan * (istep + 28) + ll] - o13_im * in[ispan * (istep + 28) + ll + 1];
                            float t14_im = o13_re * in[ispan * (istep + 28) + ll + 1] + o13_im * in[ispan * (istep + 28) + ll];
                            float t15_re = o14_re * in[ispan * (istep + 30) + ll] - o14_im * in[ispan * (istep + 30) + ll + 1];
                            float t15_im = o14_re * in[ispan * (istep + 30) + ll + 1] + o14_im * in[ispan * (istep + 30) + ll];
                            float x00c_re = t00_re + t08_re;
                            float x08c_re = t00_re - t08_re;
                            float x00c_im = t00_im + t08_im;
                            float x08c_im = t00_im - t08_im;
                            float x02c_re = t02_re + t10_re;
                            float x10e_re = t02_re - t10_re;
                            float x02c_im = t02_im + t10_im;
                            float x10e_im = t02_im - t10_im;
                            float x04c_re = t04_re + t12_re;
                            float x12c_re = t04_re - t12_re;
                            float x04c_im = t04_im + t12_im;
                            float x12c_im = t04_im - t12_im;
                            float x06c_re = t06_re + t14_re;
                            float x14e_re = t06_re - t14_re;
                            float x06c_im = t06_im + t14_im;
                            float x14e_im = t06_im - t14_im;
                            float x00b_re = x00c_re + x04c_re;
                            float x04b_re = x00c_re - x04c_re;
                            float x02b_re = x02c_re + x06c_re;
                            float x06b_re = x02c_re - x06c_re;
                            float x00a_re = x00b_re + x02b_re;
                            float x02a_re = x00b_re - x02b_re;
                            float x00b_im = x00c_im + x04c_im;
                            float x04b_im = x00c_im - x04c_im;
                            float x02b_im = x02c_im + x06c_im;
                            float x06b_im = x02c_im - x06c_im;
                            float x00a_im = x00b_im + x02b_im;
                            float x02a_im = x00b_im - x02b_im;
                            float x01c_re = t01_re + t09_re;
                            float x09f_re = t01_re - t09_re;
                            float x01c_im = t01_im + t09_im;
                            float x09f_im = t01_im - t09_im;
                            float x03c_re = t03_re + t11_re;
                            float x11f_re = t03_re - t11_re;
                            float x03c_im = t03_im + t11_im;
                            float x11f_im = t03_im - t11_im;
                            float x05e_re = t05_re + t13_re;
                            float x13e_re = t05_re - t13_re;
                            float x05e_im = t05_im + t13_im;
                            float x13e_im = t05_im - t13_im;
                            float x07e_re = t07_re + t15_re;
                            float x15f_re = t07_re - t15_re;
                            float x07e_im = t07_im + t15_im;
                            float x15f_im = t07_im - t15_im;
                            float x01b_re = x01c_re + x05e_re;
                            float x05d_re = x01c_re - x05e_re;
                            float x03b_re = x03c_re + x07e_re;
                            float x07d_re = x03c_re - x07e_re;
                            float x01a_re = x01b_re + x03b_re;
                            float x03a_re = x01b_re - x03b_re;
                            float x01b_im = x01c_im + x05e_im;
                            float x05d_im = x01c_im - x05e_im;
                            float x03b_im = x03c_im + x07e_im;
                            float x07d_im = x03c_im - x07e_im;
                            float x01a_im = x01b_im + x03b_im;
                            float x03a_im = x01b_im - x03b_im;
                            out[kk * ispan + 0 * ospan + ll] = x00a_re + x01a_re;
                            out[kk * ispan + 0 * ospan + ll + 1] = x00a_im + x01a_im;
                            out[kk * ispan + 8 * ospan + ll] = x00a_re - x01a_re;
                            out[kk * ispan + 8 * ospan + ll + 1] = x00a_im - x01a_im;
                            out[kk * ispan + 4 * ospan + ll] = x02a_re - x03a_im;
                            out[kk * ispan + 4 * ospan + ll + 1] = x02a_im + x03a_re;
                            out[kk * ispan + 12 * ospan + ll] = x02a_re + x03a_im;
                            out[kk * ispan + 12 * ospan + ll + 1] = x02a_im - x03a_re;
                            float x07c_re = x05d_re + x07d_re;
                            float x05c_re = x05d_re - x07d_re;
                            float x05b_re = 0.70710677f * x05c_re;
                            float x04a_re = x04b_re + x05b_re;
                            float x05a_re = x04b_re - x05b_re;
                            float x07b_re = 0.70710677f * x07c_re;
                            float x06a_im = x06b_re + x07b_re;
                            float x07a_im = x06b_re - x07b_re;
                            float x07c_im = x05d_im + x07d_im;
                            float x05c_im = x05d_im - x07d_im;
                            float x05b_im = 0.70710677f * x05c_im;
                            float x04a_im = x04b_im + x05b_im;
                            float x05a_im = x04b_im - x05b_im;
                            float x07b_im = 0.70710677f * x07c_im;
                            float x06a_re = x06b_im + x07b_im;
                            float x07a_re = x06b_im - x07b_im;
                            out[kk * ispan + 2 * ospan + ll] = x04a_re - x06a_re;
                            out[kk * ispan + 2 * ospan + ll + 1] = x04a_im + x06a_im;
                            out[kk * ispan + 14 * ospan + ll] = x04a_re + x06a_re;
                            out[kk * ispan + 14 * ospan + ll + 1] = x04a_im - x06a_im;
                            out[kk * ispan + 6 * ospan + ll] = x05a_re + x07a_re;
                            out[kk * ispan + 6 * ospan + ll + 1] = x05a_im - x07a_im;
                            out[kk * ispan + 10 * ospan + ll] = x05a_re - x07a_re;
                            out[kk * ispan + 10 * ospan + ll + 1] = x05a_im + x07a_im;
                            float x09e_re = x09f_re - x15f_re;
                            float x15e_re = x09f_re + x15f_re;
                            float x09e_im = x09f_im - x15f_im;
                            float x15e_im = x09f_im + x15f_im;
                            float x11e_re = x11f_re - x13e_re;
                            float x13d_re = x11f_re + x13e_re;
                            float x15d_re = 1.4142135f * x15e_re;
                            float x13c_re = (x13d_re -= x15e_re) + x15d_re;
                            float x15c_re = x13d_re - x15d_re;
                            float x11e_im = x11f_im - x13e_im;
                            float x13d_im = x11f_im + x13e_im;
                            float x15d_im = 1.4142135f * x15e_im;
                            float x13c_im = (x13d_im -= x15e_im) + x15d_im;
                            float x15c_im = x13d_im - x15d_im;
                            float x14d_re = x10e_re + x14e_re;
                            float x10d_re = x10e_re - x14e_re;
                            float x10c_re = 0.70710677f * x10d_re;
                            float x08b_re = x08c_re + x10c_re;
                            float x10b_re = x08c_re - x10c_re;
                            float x14c_re = 0.70710677f * x14d_re;
                            float x14d_im = x10e_im + x14e_im;
                            float x10d_im = x10e_im - x14e_im;
                            float x10c_im = 0.70710677f * x10d_im;
                            float x08b_im = x08c_im + x10c_im;
                            float x10b_im = x08c_im - x10c_im;
                            float x14c_im = 0.70710677f * x14d_im;
                            float x12b_im = -x12c_re - x14c_re;
                            float x14b_im = -x12c_re + x14c_re;
                            float x12b_re = x12c_im + x14c_im;
                            float x14b_re = x12c_im - x14c_im;
                            float x09d_re = x09e_re - x11e_re;
                            float x11d_re = 1.4142135f * x11e_re;
                            float x09c_re = x09d_re + x11d_re;
                            float x11c_re = x09d_re - x11d_re;
                            float x09b_re = 0.9238795f * x09c_re;
                            float x08a_re = x08b_re + x09b_re;
                            float x09a_re = x08b_re - x09b_re;
                            float x09d_im = x09e_im - x11e_im;
                            float x11d_im = 1.4142135f * x11e_im;
                            float x09c_im = x09d_im + x11d_im;
                            float x11c_im = x09d_im - x11d_im;
                            float x09b_im = 0.9238795f * x09c_im;
                            float x08a_im = x08b_im + x09b_im;
                            float x09a_im = x08b_im - x09b_im;
                            float x13b_im = 0.9238795f * x13c_im;
                            float x12a_re = x12b_re + x13b_im;
                            float x13a_re = x12b_re - x13b_im;
                            float x13b_re = 0.9238795f * x13c_re;
                            float x12a_im = x12b_im - x13b_re;
                            float x13a_im = x12b_im + x13b_re;
                            out[kk * ispan + 1 * ospan + ll] = x08a_re - x12a_re;
                            out[kk * ispan + 1 * ospan + ll + 1] = x08a_im - x12a_im;
                            out[kk * ispan + 15 * ospan + ll] = x08a_re + x12a_re;
                            out[kk * ispan + 15 * ospan + ll + 1] = x08a_im + x12a_im;
                            out[kk * ispan + 7 * ospan + ll] = x09a_re + x13a_re;
                            out[kk * ispan + 7 * ospan + ll + 1] = x09a_im + x13a_im;
                            out[kk * ispan + 9 * ospan + ll] = x09a_re - x13a_re;
                            out[kk * ispan + 9 * ospan + ll + 1] = x09a_im - x13a_im;
                            float x11b_re = 0.38268343f * x11c_re;
                            float x10a_re = x10b_re + x11b_re;
                            float x11a_re = x10b_re - x11b_re;
                            float x11b_im = 0.38268343f * x11c_im;
                            float x10a_im = x10b_im + x11b_im;
                            float x11a_im = x10b_im - x11b_im;
                            float x15b_im = 0.38268343f * x15c_im;
                            float x14a_re = x14b_re + x15b_im;
                            float x15a_re = x14b_re - x15b_im;
                            float x15b_re = 0.38268343f * x15c_re;
                            float x14a_im = x14b_im - x15b_re;
                            float x15a_im = x14b_im + x15b_re;
                            out[kk * ispan + 3 * ospan + ll] = x10a_re + x14a_re;
                            out[kk * ispan + 3 * ospan + ll + 1] = x10a_im + x14a_im;
                            out[kk * ispan + 13 * ospan + ll] = x10a_re - x14a_re;
                            out[kk * ispan + 13 * ospan + ll + 1] = x10a_im - x14a_im;
                            out[kk * ispan + 5 * ospan + ll] = x11a_re - x15a_re;
                            out[kk * ispan + 5 * ospan + ll + 1] = x11a_im - x15a_im;
                            out[kk * ispan + 11 * ospan + ll] = x11a_re + x15a_re;
                            out[kk * ispan + 11 * ospan + ll + 1] = x11a_im + x15a_im;
                        }
                    }
                } else {
                    for (int ll = 0; ll < ispan * 2; ll += 2) {
                        float t00_re = in[0 * ispan + ll];
                        float t00_im = in[0 * ispan + ll + 1];
                        float t01_re = in[2 * ispan + ll];
                        float t01_im = in[2 * ispan + ll + 1];
                        float t02_re = in[4 * ispan + ll];
                        float t02_im = in[4 * ispan + ll + 1];
                        float t03_re = in[6 * ispan + ll];
                        float t03_im = in[6 * ispan + ll + 1];
                        float t04_re = in[8 * ispan + ll];
                        float t04_im = in[8 * ispan + ll + 1];
                        float t05_re = in[10 * ispan + ll];
                        float t05_im = in[10 * ispan + ll + 1];
                        float t06_re = in[12 * ispan + ll];
                        float t06_im = in[12 * ispan + ll + 1];
                        float t07_re = in[14 * ispan + ll];
                        float t07_im = in[14 * ispan + ll + 1];
                        float t08_re = in[16 * ispan + ll];
                        float t08_im = in[16 * ispan + ll + 1];
                        float t09_re = in[18 * ispan + ll];
                        float t09_im = in[18 * ispan + ll + 1];
                        float t10_re = in[20 * ispan + ll];
                        float t10_im = in[20 * ispan + ll + 1];
                        float t11_re = in[22 * ispan + ll];
                        float t11_im = in[22 * ispan + ll + 1];
                        float t12_re = in[24 * ispan + ll];
                        float t12_im = in[24 * ispan + ll + 1];
                        float t13_re = in[26 * ispan + ll];
                        float t13_im = in[26 * ispan + ll + 1];
                        float t14_re = in[28 * ispan + ll];
                        float t14_im = in[28 * ispan + ll + 1];
                        float t15_re = in[30 * ispan + ll];
                        float t15_im = in[30 * ispan + ll + 1];
                        float x00c_re = t00_re + t08_re;
                        float x08c_re = t00_re - t08_re;
                        float x00c_im = t00_im + t08_im;
                        float x08c_im = t00_im - t08_im;
                        float x02c_re = t02_re + t10_re;
                        float x10e_re = t02_re - t10_re;
                        float x02c_im = t02_im + t10_im;
                        float x10e_im = t02_im - t10_im;
                        float x04c_re = t04_re + t12_re;
                        float x12c_re = t04_re - t12_re;
                        float x04c_im = t04_im + t12_im;
                        float x12c_im = t04_im - t12_im;
                        float x06c_re = t06_re + t14_re;
                        float x14e_re = t06_re - t14_re;
                        float x06c_im = t06_im + t14_im;
                        float x14e_im = t06_im - t14_im;
                        float x00b_re = x00c_re + x04c_re;
                        float x04b_re = x00c_re - x04c_re;
                        float x02b_re = x02c_re + x06c_re;
                        float x06b_re = x02c_re - x06c_re;
                        float x00a_re = x00b_re + x02b_re;
                        float x02a_re = x00b_re - x02b_re;
                        float x00b_im = x00c_im + x04c_im;
                        float x04b_im = x00c_im - x04c_im;
                        float x02b_im = x02c_im + x06c_im;
                        float x06b_im = x02c_im - x06c_im;
                        float x00a_im = x00b_im + x02b_im;
                        float x02a_im = x00b_im - x02b_im;
                        float x01c_re = t01_re + t09_re;
                        float x09f_re = t01_re - t09_re;
                        float x01c_im = t01_im + t09_im;
                        float x09f_im = t01_im - t09_im;
                        float x03c_re = t03_re + t11_re;
                        float x11f_re = t03_re - t11_re;
                        float x03c_im = t03_im + t11_im;
                        float x11f_im = t03_im - t11_im;
                        float x05e_re = t05_re + t13_re;
                        float x13e_re = t05_re - t13_re;
                        float x05e_im = t05_im + t13_im;
                        float x13e_im = t05_im - t13_im;
                        float x07e_re = t07_re + t15_re;
                        float x15f_re = t07_re - t15_re;
                        float x07e_im = t07_im + t15_im;
                        float x15f_im = t07_im - t15_im;
                        float x01b_re = x01c_re + x05e_re;
                        float x05d_re = x01c_re - x05e_re;
                        float x03b_re = x03c_re + x07e_re;
                        float x07d_re = x03c_re - x07e_re;
                        float x01a_re = x01b_re + x03b_re;
                        float x03a_re = x01b_re - x03b_re;
                        float x01b_im = x01c_im + x05e_im;
                        float x05d_im = x01c_im - x05e_im;
                        float x03b_im = x03c_im + x07e_im;
                        float x07d_im = x03c_im - x07e_im;
                        float x01a_im = x01b_im + x03b_im;
                        float x03a_im = x01b_im - x03b_im;
                        out[0 * ospan + ll] = x00a_re + x01a_re;
                        out[0 * ospan + ll + 1] = x00a_im + x01a_im;
                        out[8 * ospan + ll] = x00a_re - x01a_re;
                        out[8 * ospan + ll + 1] = x00a_im - x01a_im;
                        out[4 * ospan + ll] = x02a_re - x03a_im;
                        out[4 * ospan + ll + 1] = x02a_im + x03a_re;
                        out[12 * ospan + ll] = x02a_re + x03a_im;
                        out[12 * ospan + ll + 1] = x02a_im - x03a_re;
                        float x07c_re = x05d_re + x07d_re;
                        float x05c_re = x05d_re - x07d_re;
                        float x05b_re = 0.70710677f * x05c_re;
                        float x04a_re = x04b_re + x05b_re;
                        float x05a_re = x04b_re - x05b_re;
                        float x07b_re = 0.70710677f * x07c_re;
                        float x06a_im = x06b_re + x07b_re;
                        float x07a_im = x06b_re - x07b_re;
                        float x07c_im = x05d_im + x07d_im;
                        float x05c_im = x05d_im - x07d_im;
                        float x05b_im = 0.70710677f * x05c_im;
                        float x04a_im = x04b_im + x05b_im;
                        float x05a_im = x04b_im - x05b_im;
                        float x07b_im = 0.70710677f * x07c_im;
                        float x06a_re = x06b_im + x07b_im;
                        float x07a_re = x06b_im - x07b_im;
                        out[2 * ospan + ll] = x04a_re - x06a_re;
                        out[2 * ospan + ll + 1] = x04a_im + x06a_im;
                        out[14 * ospan + ll] = x04a_re + x06a_re;
                        out[14 * ospan + ll + 1] = x04a_im - x06a_im;
                        out[6 * ospan + ll] = x05a_re + x07a_re;
                        out[6 * ospan + ll + 1] = x05a_im - x07a_im;
                        out[10 * ospan + ll] = x05a_re - x07a_re;
                        out[10 * ospan + ll + 1] = x05a_im + x07a_im;
                        float x09e_re = x09f_re - x15f_re;
                        float x15e_re = x09f_re + x15f_re;
                        float x09e_im = x09f_im - x15f_im;
                        float x15e_im = x09f_im + x15f_im;
                        float x11e_re = x11f_re - x13e_re;
                        float x13d_re = x11f_re + x13e_re;
                        float x15d_re = 1.4142135f * x15e_re;
                        float x13c_re = (x13d_re -= x15e_re) + x15d_re;
                        float x15c_re = x13d_re - x15d_re;
                        float x11e_im = x11f_im - x13e_im;
                        float x13d_im = x11f_im + x13e_im;
                        float x15d_im = 1.4142135f * x15e_im;
                        float x13c_im = (x13d_im -= x15e_im) + x15d_im;
                        float x15c_im = x13d_im - x15d_im;
                        float x14d_re = x10e_re + x14e_re;
                        float x10d_re = x10e_re - x14e_re;
                        float x10c_re = 0.70710677f * x10d_re;
                        float x08b_re = x08c_re + x10c_re;
                        float x10b_re = x08c_re - x10c_re;
                        float x14c_re = 0.70710677f * x14d_re;
                        float x14d_im = x10e_im + x14e_im;
                        float x10d_im = x10e_im - x14e_im;
                        float x10c_im = 0.70710677f * x10d_im;
                        float x08b_im = x08c_im + x10c_im;
                        float x10b_im = x08c_im - x10c_im;
                        float x14c_im = 0.70710677f * x14d_im;
                        float x12b_im = -x12c_re - x14c_re;
                        float x14b_im = -x12c_re + x14c_re;
                        float x12b_re = x12c_im + x14c_im;
                        float x14b_re = x12c_im - x14c_im;
                        float x09d_re = x09e_re - x11e_re;
                        float x11d_re = 1.4142135f * x11e_re;
                        float x09c_re = x09d_re + x11d_re;
                        float x11c_re = x09d_re - x11d_re;
                        float x09b_re = 0.9238795f * x09c_re;
                        float x08a_re = x08b_re + x09b_re;
                        float x09a_re = x08b_re - x09b_re;
                        float x09d_im = x09e_im - x11e_im;
                        float x11d_im = 1.4142135f * x11e_im;
                        float x09c_im = x09d_im + x11d_im;
                        float x11c_im = x09d_im - x11d_im;
                        float x09b_im = 0.9238795f * x09c_im;
                        float x08a_im = x08b_im + x09b_im;
                        float x09a_im = x08b_im - x09b_im;
                        float x13b_im = 0.9238795f * x13c_im;
                        float x12a_re = x12b_re + x13b_im;
                        float x13a_re = x12b_re - x13b_im;
                        float x13b_re = 0.9238795f * x13c_re;
                        float x12a_im = x12b_im - x13b_re;
                        float x13a_im = x12b_im + x13b_re;
                        out[1 * ospan + ll] = x08a_re - x12a_re;
                        out[1 * ospan + ll + 1] = x08a_im - x12a_im;
                        out[15 * ospan + ll] = x08a_re + x12a_re;
                        out[15 * ospan + ll + 1] = x08a_im + x12a_im;
                        out[7 * ospan + ll] = x09a_re + x13a_re;
                        out[7 * ospan + ll + 1] = x09a_im + x13a_im;
                        out[9 * ospan + ll] = x09a_re - x13a_re;
                        out[9 * ospan + ll + 1] = x09a_im - x13a_im;
                        float x11b_re = 0.38268343f * x11c_re;
                        float x10a_re = x10b_re + x11b_re;
                        float x11a_re = x10b_re - x11b_re;
                        float x11b_im = 0.38268343f * x11c_im;
                        float x10a_im = x10b_im + x11b_im;
                        float x11a_im = x10b_im - x11b_im;
                        float x15b_im = 0.38268343f * x15c_im;
                        float x14a_re = x14b_re + x15b_im;
                        float x15a_re = x14b_re - x15b_im;
                        float x15b_re = 0.38268343f * x15c_re;
                        float x14a_im = x14b_im - x15b_re;
                        float x15a_im = x14b_im + x15b_re;
                        out[3 * ospan + ll] = x10a_re + x14a_re;
                        out[3 * ospan + ll + 1] = x10a_im + x14a_im;
                        out[13 * ospan + ll] = x10a_re - x14a_re;
                        out[13 * ospan + ll + 1] = x10a_im - x14a_im;
                        out[5 * ospan + ll] = x11a_re - x15a_re;
                        out[5 * ospan + ll + 1] = x11a_im - x15a_im;
                        out[11 * ospan + ll] = x11a_re + x15a_re;
                        out[11 * ospan + ll + 1] = x11a_im + x15a_im;
                    }
                }
                break;
            }
        }
    }

    /*
     * Opcode count of 15257 triggered aggressive code reduction.  Override with --aggressivesizethreshold.
     * WARNING - void declaration
     */
    void radixpass_32(float[] in, float[] out, float[] twids, int size, int la, int dirflag) {
        if (size < 32 || la < 1) {
            return;
        }
        int ospan = 2 * size / 32;
        int ispan = la;
        int mm = ospan / ispan;
        switch (dirflag) {
            case 8: {
                if (ispan != ospan / 2) {
                    for (int kk = 0; kk < mm; kk += 2) {
                        float o00_re = twids[1 * kk * ispan];
                        float o00_im = twids[1 * kk * ispan + 1];
                        float o01_re = twids[2 * kk * ispan];
                        float o01_im = twids[2 * kk * ispan + 1];
                        float o02_re = twids[3 * kk * ispan];
                        float o02_im = twids[3 * kk * ispan + 1];
                        float o03_re = twids[4 * kk * ispan];
                        float o03_im = twids[4 * kk * ispan + 1];
                        float o04_re = twids[5 * kk * ispan];
                        float o04_im = twids[5 * kk * ispan + 1];
                        float o05_re = twids[6 * kk * ispan];
                        float o05_im = twids[6 * kk * ispan + 1];
                        float o06_re = twids[7 * kk * ispan];
                        float o06_im = twids[7 * kk * ispan + 1];
                        float o07_re = twids[8 * kk * ispan];
                        float o07_im = twids[8 * kk * ispan + 1];
                        float o08_re = twids[9 * kk * ispan];
                        float o08_im = twids[9 * kk * ispan + 1];
                        float o09_re = twids[10 * kk * ispan];
                        float o09_im = twids[10 * kk * ispan + 1];
                        float o10_re = twids[11 * kk * ispan];
                        float o10_im = twids[11 * kk * ispan + 1];
                        float o11_re = twids[12 * kk * ispan];
                        float o11_im = twids[12 * kk * ispan + 1];
                        float o12_re = twids[13 * kk * ispan];
                        float o12_im = twids[13 * kk * ispan + 1];
                        float o13_re = twids[14 * kk * ispan];
                        float o13_im = twids[14 * kk * ispan + 1];
                        float o14_re = twids[15 * kk * ispan];
                        float o14_im = twids[15 * kk * ispan + 1];
                        float o15_re = twids[16 * kk * ispan];
                        float o15_im = twids[16 * kk * ispan + 1];
                        float o16_re = twids[17 * kk * ispan];
                        float o16_im = twids[17 * kk * ispan + 1];
                        float o17_re = twids[18 * kk * ispan];
                        float o17_im = twids[18 * kk * ispan + 1];
                        float o18_re = twids[19 * kk * ispan];
                        float o18_im = twids[19 * kk * ispan + 1];
                        float o19_re = twids[20 * kk * ispan];
                        float o19_im = twids[20 * kk * ispan + 1];
                        float o20_re = twids[21 * kk * ispan];
                        float o20_im = twids[21 * kk * ispan + 1];
                        float o21_re = twids[22 * kk * ispan];
                        float o21_im = twids[22 * kk * ispan + 1];
                        float o22_re = twids[23 * kk * ispan];
                        float o22_im = twids[23 * kk * ispan + 1];
                        float o23_re = twids[24 * kk * ispan];
                        float o23_im = twids[24 * kk * ispan + 1];
                        float o24_re = twids[25 * kk * ispan];
                        float o24_im = twids[25 * kk * ispan + 1];
                        float o25_re = twids[26 * kk * ispan];
                        float o25_im = twids[26 * kk * ispan + 1];
                        float o26_re = twids[27 * kk * ispan];
                        float o26_im = twids[27 * kk * ispan + 1];
                        float o27_re = twids[28 * kk * ispan];
                        float o27_im = twids[28 * kk * ispan + 1];
                        float o28_re = twids[29 * kk * ispan];
                        float o28_im = twids[29 * kk * ispan + 1];
                        float o29_re = twids[30 * kk * ispan];
                        float o29_im = twids[30 * kk * ispan + 1];
                        float o30_re = twids[31 * kk * ispan];
                        float o30_im = twids[31 * kk * ispan + 1];
                        int istep = kk * 32;
                        for (int ll = 0; ll < ispan * 2; ll += 2) {
                            void x31a_im;
                            void x23a_im;
                            void x31a_re;
                            void x23a_re;
                            void x30a_im;
                            void x22a_im;
                            void x30a_re;
                            void x22a_re;
                            void x31b_re;
                            void x30b_im;
                            void x31c_re;
                            void x31b_im;
                            void x30b_re;
                            void x31c_im;
                            void x23b_im;
                            void x22b_im;
                            void x23c_im;
                            void x23b_re;
                            void x22b_re;
                            void x23c_re;
                            void x29a_im;
                            void x21a_im;
                            void x29a_re;
                            void x21a_re;
                            void x28a_im;
                            void x20a_im;
                            void x28a_re;
                            void x20a_re;
                            void x21b_im;
                            void x20b_im;
                            void x21c_im;
                            void x22c_im;
                            void x20c_im;
                            void x22d_im;
                            void x21b_re;
                            void x20b_re;
                            void x21c_re;
                            void x22c_re;
                            void x20c_re;
                            void x22d_re;
                            void x29b_re;
                            void x28b_im;
                            void x29c_re;
                            void x30c_re;
                            void x28c_im;
                            void x30d_re;
                            void x29b_im;
                            void x28b_re;
                            void x29c_im;
                            void x30c_im;
                            void x28c_re;
                            void x30d_im;
                            void x27a_im;
                            void x19a_im;
                            void x27a_re;
                            void x19a_re;
                            void x26a_im;
                            void x18a_im;
                            void x26a_re;
                            void x18a_re;
                            void x19b_im;
                            void x18b_im;
                            void x19c_im;
                            void x19b_re;
                            void x18b_re;
                            void x19c_re;
                            void x27b_re;
                            void x26b_im;
                            void x27c_re;
                            void x27b_im;
                            void x26b_re;
                            void x27c_im;
                            void x25a_im;
                            void x17a_im;
                            void x25a_re;
                            void x17a_re;
                            void x24a_im;
                            void x16a_im;
                            void x24a_re;
                            void x16a_re;
                            void x17b_im;
                            void x16b_im;
                            void x17c_im;
                            void x18c_im;
                            void x16c_im;
                            void x18d_im;
                            void x20d_im;
                            void x16z_im;
                            void x20e_im;
                            void x17b_re;
                            void x16b_re;
                            void x17c_re;
                            void x18c_re;
                            void x16c_re;
                            void x18d_re;
                            void x20d_re;
                            void x16z_re;
                            void x20e_re;
                            void x24b_im;
                            void x25b_re;
                            void x25c_re;
                            void x24c_im;
                            void x26c_re;
                            void x26d_re;
                            void x24z_re;
                            void x28d_re;
                            void x28e_re;
                            void x25b_im;
                            void x24b_re;
                            void x25c_im;
                            void x26c_im;
                            void x24c_re;
                            void x26d_im;
                            void x28d_im;
                            void x24z_im;
                            void x28e_im;
                            void x28z_im;
                            void x20z_im;
                            void x28z_re;
                            void x20z_re;
                            void x31d_im;
                            void x29d_im;
                            void x31e_im;
                            void x29e_im;
                            void x31d_re;
                            void x29d_re;
                            void x31e_re;
                            void x29e_re;
                            void x27d_im;
                            void x25d_im;
                            void x27e_im;
                            void x25e_im;
                            void x27d_re;
                            void x25d_re;
                            void x27e_re;
                            void x25e_re;
                            void x31f_im;
                            void x27f_im;
                            void x27g_im;
                            void x31f_re;
                            void x27f_re;
                            void x27g_re;
                            void x30e_im;
                            void x26e_im;
                            void x30f_im;
                            void x26f_im;
                            void x30e_re;
                            void x26e_re;
                            void x30f_re;
                            void x26f_re;
                            void x29f_im;
                            void x25f_im;
                            void x29g_im;
                            void x31g_im;
                            void x25g_im;
                            void x29f_re;
                            void x25f_re;
                            void x29g_re;
                            void x31g_re;
                            void x25g_re;
                            void x23d_im;
                            void x21d_im;
                            void x23e_im;
                            void x21e_im;
                            void x23d_re;
                            void x21d_re;
                            void x23e_re;
                            void x21e_re;
                            void x19d_im;
                            void x17d_im;
                            void x19e_im;
                            void x17e_im;
                            void x19d_re;
                            void x17d_re;
                            void x19e_re;
                            void x17e_re;
                            void x23f_im;
                            void x19f_im;
                            void x19g_im;
                            void x23f_re;
                            void x19f_re;
                            void x19g_re;
                            void x29z_re;
                            void x19z_re;
                            void x29z_im;
                            void x19z_im;
                            void x22e_im;
                            void x18e_im;
                            void x22f_im;
                            void x18f_im;
                            void x22e_re;
                            void x18e_re;
                            void x22f_re;
                            void x18f_re;
                            void x26z_im;
                            void x22z_im;
                            void x26z_re;
                            void x22z_re;
                            void x30z_im;
                            void x18z_im;
                            void x30z_re;
                            void x18z_re;
                            void x21f_im;
                            void x17f_im;
                            void x21g_im;
                            void x23g_im;
                            void x17g_im;
                            void x21f_re;
                            void x17f_re;
                            void x21g_re;
                            void x23g_re;
                            void x17g_re;
                            void x25z_im;
                            void x23z_im;
                            void x25z_re;
                            void x23z_re;
                            void x27z_im;
                            void x21z_im;
                            void x27z_re;
                            void x21z_re;
                            void x31z_im;
                            void x17z_im;
                            void x31z_re;
                            void x17z_re;
                            void x15a_im;
                            void x11a_im;
                            void x15a_re;
                            void x11a_re;
                            void x14a_im;
                            void x14a_re;
                            void x15b_re;
                            void x14b_im;
                            void x15c_re;
                            void x15b_im;
                            void x14b_re;
                            void x15c_im;
                            void x11b_im;
                            void x11c_im;
                            void x11b_re;
                            void x11c_re;
                            void x13a_im;
                            void x13a_re;
                            void x12a_im;
                            void x12a_re;
                            void x13b_re;
                            void x12b_im;
                            void x13c_re;
                            void x13b_im;
                            void x12b_re;
                            void x13c_im;
                            void x11d_im;
                            void x11e_im;
                            void x11d_re;
                            void x11e_re;
                            void x14c_im;
                            void x12c_im;
                            void x14c_re;
                            void x12c_re;
                            void x14d_im;
                            void x14e_im;
                            void x14d_re;
                            void x14e_re;
                            void x15d_im;
                            void x15e_im;
                            void x13e_im;
                            void x11f_im;
                            void x15d_re;
                            void x15e_re;
                            void x13e_re;
                            void x11f_re;
                            void x15f_im;
                            void x15f_re;
                            void x15z_im;
                            void x15z_re;
                            void x13z_im;
                            void x13z_re;
                            void x11z_im;
                            void x11z_re;
                            void x14z_im;
                            void x14z_re;
                            void x12z_im;
                            void x12z_re;
                            float t00_re = in[ispan * (istep + 0) + ll];
                            float t00_im = in[ispan * (istep + 0) + ll + 1];
                            float t01_re = o00_re * in[ispan * (istep + 2) + ll] + o00_im * in[ispan * (istep + 2) + ll + 1];
                            float t01_im = o00_re * in[ispan * (istep + 2) + ll + 1] - o00_im * in[ispan * (istep + 2) + ll];
                            float t02_re = o01_re * in[ispan * (istep + 4) + ll] + o01_im * in[ispan * (istep + 4) + ll + 1];
                            float t02_im = o01_re * in[ispan * (istep + 4) + ll + 1] - o01_im * in[ispan * (istep + 4) + ll];
                            float t03_re = o02_re * in[ispan * (istep + 6) + ll] + o02_im * in[ispan * (istep + 6) + ll + 1];
                            float t03_im = o02_re * in[ispan * (istep + 6) + ll + 1] - o02_im * in[ispan * (istep + 6) + ll];
                            float t04_re = o03_re * in[ispan * (istep + 8) + ll] + o03_im * in[ispan * (istep + 8) + ll + 1];
                            float t04_im = o03_re * in[ispan * (istep + 8) + ll + 1] - o03_im * in[ispan * (istep + 8) + ll];
                            float t05_re = o04_re * in[ispan * (istep + 10) + ll] + o04_im * in[ispan * (istep + 10) + ll + 1];
                            float t05_im = o04_re * in[ispan * (istep + 10) + ll + 1] - o04_im * in[ispan * (istep + 10) + ll];
                            float t06_re = o05_re * in[ispan * (istep + 12) + ll] + o05_im * in[ispan * (istep + 12) + ll + 1];
                            float t06_im = o05_re * in[ispan * (istep + 12) + ll + 1] - o05_im * in[ispan * (istep + 12) + ll];
                            float t07_re = o06_re * in[ispan * (istep + 14) + ll] + o06_im * in[ispan * (istep + 14) + ll + 1];
                            float t07_im = o06_re * in[ispan * (istep + 14) + ll + 1] - o06_im * in[ispan * (istep + 14) + ll];
                            float t08_re = o07_re * in[ispan * (istep + 16) + ll] + o07_im * in[ispan * (istep + 16) + ll + 1];
                            float t08_im = o07_re * in[ispan * (istep + 16) + ll + 1] - o07_im * in[ispan * (istep + 16) + ll];
                            float t09_re = o08_re * in[ispan * (istep + 18) + ll] + o08_im * in[ispan * (istep + 18) + ll + 1];
                            float t09_im = o08_re * in[ispan * (istep + 18) + ll + 1] - o08_im * in[ispan * (istep + 18) + ll];
                            float t10_re = o09_re * in[ispan * (istep + 20) + ll] + o09_im * in[ispan * (istep + 20) + ll + 1];
                            float t10_im = o09_re * in[ispan * (istep + 20) + ll + 1] - o09_im * in[ispan * (istep + 20) + ll];
                            float t11_re = o10_re * in[ispan * (istep + 22) + ll] + o10_im * in[ispan * (istep + 22) + ll + 1];
                            float t11_im = o10_re * in[ispan * (istep + 22) + ll + 1] - o10_im * in[ispan * (istep + 22) + ll];
                            float t12_re = o11_re * in[ispan * (istep + 24) + ll] + o11_im * in[ispan * (istep + 24) + ll + 1];
                            float t12_im = o11_re * in[ispan * (istep + 24) + ll + 1] - o11_im * in[ispan * (istep + 24) + ll];
                            float t13_re = o12_re * in[ispan * (istep + 26) + ll] + o12_im * in[ispan * (istep + 26) + ll + 1];
                            float t13_im = o12_re * in[ispan * (istep + 26) + ll + 1] - o12_im * in[ispan * (istep + 26) + ll];
                            float t14_re = o13_re * in[ispan * (istep + 28) + ll] + o13_im * in[ispan * (istep + 28) + ll + 1];
                            float t14_im = o13_re * in[ispan * (istep + 28) + ll + 1] - o13_im * in[ispan * (istep + 28) + ll];
                            float t15_re = o14_re * in[ispan * (istep + 30) + ll] + o14_im * in[ispan * (istep + 30) + ll + 1];
                            float t15_im = o14_re * in[ispan * (istep + 30) + ll + 1] - o14_im * in[ispan * (istep + 30) + ll];
                            float t16_re = o15_re * in[ispan * (istep + 32) + ll] + o15_im * in[ispan * (istep + 32) + ll + 1];
                            float t16_im = o15_re * in[ispan * (istep + 32) + ll + 1] - o15_im * in[ispan * (istep + 32) + ll];
                            float t17_re = o16_re * in[ispan * (istep + 34) + ll] + o16_im * in[ispan * (istep + 34) + ll + 1];
                            float t17_im = o16_re * in[ispan * (istep + 34) + ll + 1] - o16_im * in[ispan * (istep + 34) + ll];
                            float t18_re = o17_re * in[ispan * (istep + 36) + ll] + o17_im * in[ispan * (istep + 36) + ll + 1];
                            float t18_im = o17_re * in[ispan * (istep + 36) + ll + 1] - o17_im * in[ispan * (istep + 36) + ll];
                            float t19_re = o18_re * in[ispan * (istep + 38) + ll] + o18_im * in[ispan * (istep + 38) + ll + 1];
                            float t19_im = o18_re * in[ispan * (istep + 38) + ll + 1] - o18_im * in[ispan * (istep + 38) + ll];
                            float t20_re = o19_re * in[ispan * (istep + 40) + ll] + o19_im * in[ispan * (istep + 40) + ll + 1];
                            float t20_im = o19_re * in[ispan * (istep + 40) + ll + 1] - o19_im * in[ispan * (istep + 40) + ll];
                            float t21_re = o20_re * in[ispan * (istep + 42) + ll] + o20_im * in[ispan * (istep + 42) + ll + 1];
                            float t21_im = o20_re * in[ispan * (istep + 42) + ll + 1] - o20_im * in[ispan * (istep + 42) + ll];
                            float t22_re = o21_re * in[ispan * (istep + 44) + ll] + o21_im * in[ispan * (istep + 44) + ll + 1];
                            float t22_im = o21_re * in[ispan * (istep + 44) + ll + 1] - o21_im * in[ispan * (istep + 44) + ll];
                            float t23_re = o22_re * in[ispan * (istep + 46) + ll] + o22_im * in[ispan * (istep + 46) + ll + 1];
                            float t23_im = o22_re * in[ispan * (istep + 46) + ll + 1] - o22_im * in[ispan * (istep + 46) + ll];
                            float t24_re = o23_re * in[ispan * (istep + 48) + ll] + o23_im * in[ispan * (istep + 48) + ll + 1];
                            float t24_im = o23_re * in[ispan * (istep + 48) + ll + 1] - o23_im * in[ispan * (istep + 48) + ll];
                            float t25_re = o24_re * in[ispan * (istep + 50) + ll] + o24_im * in[ispan * (istep + 50) + ll + 1];
                            float t25_im = o24_re * in[ispan * (istep + 50) + ll + 1] - o24_im * in[ispan * (istep + 50) + ll];
                            float t26_re = o25_re * in[ispan * (istep + 52) + ll] + o25_im * in[ispan * (istep + 52) + ll + 1];
                            float t26_im = o25_re * in[ispan * (istep + 52) + ll + 1] - o25_im * in[ispan * (istep + 52) + ll];
                            float t27_re = o26_re * in[ispan * (istep + 54) + ll] + o26_im * in[ispan * (istep + 54) + ll + 1];
                            float t27_im = o26_re * in[ispan * (istep + 54) + ll + 1] - o26_im * in[ispan * (istep + 54) + ll];
                            float t28_re = o27_re * in[ispan * (istep + 56) + ll] + o27_im * in[ispan * (istep + 56) + ll + 1];
                            float t28_im = o27_re * in[ispan * (istep + 56) + ll + 1] - o27_im * in[ispan * (istep + 56) + ll];
                            float t29_re = o28_re * in[ispan * (istep + 58) + ll] + o28_im * in[ispan * (istep + 58) + ll + 1];
                            float t29_im = o28_re * in[ispan * (istep + 58) + ll + 1] - o28_im * in[ispan * (istep + 58) + ll];
                            float t30_re = o29_re * in[ispan * (istep + 60) + ll] + o29_im * in[ispan * (istep + 60) + ll + 1];
                            float t30_im = o29_re * in[ispan * (istep + 60) + ll + 1] - o29_im * in[ispan * (istep + 60) + ll];
                            float t31_re = o30_re * in[ispan * (istep + 62) + ll] + o30_im * in[ispan * (istep + 62) + ll + 1];
                            float t31_im = o30_re * in[ispan * (istep + 62) + ll + 1] - o30_im * in[ispan * (istep + 62) + ll];
                            float x00z_re = t00_re + t16_re;
                            float f = t00_re - t16_re;
                            float x00z_im = t00_im + t16_im;
                            float f2 = t00_im - t16_im;
                            float x01z_re = t01_re + t17_re;
                            float f3 = t01_re - t17_re;
                            float x01z_im = t01_im + t17_im;
                            float f4 = t01_im - t17_im;
                            float x02z_re = t02_re + t18_re;
                            float f5 = t02_re - t18_re;
                            float x02z_im = t02_im + t18_im;
                            float f6 = t02_im - t18_im;
                            float x03z_re = t03_re + t19_re;
                            float f7 = t03_re - t19_re;
                            float x03z_im = t03_im + t19_im;
                            float f8 = t03_im - t19_im;
                            float x04z_re = t04_re + t20_re;
                            float f9 = t04_re - t20_re;
                            float x04z_im = t04_im + t20_im;
                            float f10 = t04_im - t20_im;
                            float x05z_re = t05_re + t21_re;
                            float f11 = t05_re - t21_re;
                            float x05z_im = t05_im + t21_im;
                            float f12 = t05_im - t21_im;
                            float x06z_re = t06_re + t22_re;
                            float f13 = t06_re - t22_re;
                            float x06z_im = t06_im + t22_im;
                            float f14 = t06_im - t22_im;
                            float x07z_re = t07_re + t23_re;
                            float f15 = t07_re - t23_re;
                            float x07z_im = t07_im + t23_im;
                            float f16 = t07_im - t23_im;
                            float x08z_re = t08_re + t24_re;
                            float f17 = t08_re - t24_re;
                            float x08z_im = t08_im + t24_im;
                            float f18 = t08_im - t24_im;
                            float x09z_re = t09_re + t25_re;
                            float f19 = t09_re - t25_re;
                            float x09z_im = t09_im + t25_im;
                            float f20 = t09_im - t25_im;
                            float x10z_re = t10_re + t26_re;
                            float f21 = t10_re - t26_re;
                            float x10z_im = t10_im + t26_im;
                            float f22 = t10_im - t26_im;
                            float f23 = t11_re + t27_re;
                            float f24 = t11_re - t27_re;
                            float f25 = t11_im + t27_im;
                            float f26 = t11_im - t27_im;
                            float f27 = t12_re + t28_re;
                            float f28 = t12_re - t28_re;
                            float f29 = t12_im + t28_im;
                            float f30 = t12_im - t28_im;
                            float f31 = t13_re + t29_re;
                            float f32 = t13_re - t29_re;
                            float f33 = t13_im + t29_im;
                            float f34 = t13_im - t29_im;
                            float f35 = t14_re + t30_re;
                            float f36 = t14_re - t30_re;
                            float f37 = t14_im + t30_im;
                            float f38 = t14_im - t30_im;
                            float f39 = t15_re + t31_re;
                            float f40 = t15_re - t31_re;
                            float f41 = t15_im + t31_im;
                            float f42 = t15_im - t31_im;
                            float x00c_re = x00z_re + x08z_re;
                            float x08c_re = x00z_re - x08z_re;
                            float x00c_im = x00z_im + x08z_im;
                            float x08c_im = x00z_im - x08z_im;
                            float x02c_re = x02z_re + x10z_re;
                            float x10e_re = x02z_re - x10z_re;
                            float x02c_im = x02z_im + x10z_im;
                            float x10e_im = x02z_im - x10z_im;
                            float x04c_re = x04z_re + x12z_re;
                            float f43 = x04z_re - x12z_re;
                            float x04c_im = x04z_im + x12z_im;
                            float f44 = x04z_im - x12z_im;
                            float x06c_re = x06z_re + x14z_re;
                            float f45 = x06z_re - x14z_re;
                            float x06c_im = x06z_im + x14z_im;
                            float f46 = x06z_im - x14z_im;
                            float x00b_re = x00c_re + x04c_re;
                            float x04b_re = x00c_re - x04c_re;
                            float x02b_re = x02c_re + x06c_re;
                            float x06b_re = x02c_re - x06c_re;
                            float x00a_re = x00b_re + x02b_re;
                            float x02a_re = x00b_re - x02b_re;
                            float x00b_im = x00c_im + x04c_im;
                            float x04b_im = x00c_im - x04c_im;
                            float x02b_im = x02c_im + x06c_im;
                            float x06b_im = x02c_im - x06c_im;
                            float x00a_im = x00b_im + x02b_im;
                            float x02a_im = x00b_im - x02b_im;
                            float x01c_re = x01z_re + x09z_re;
                            float x09f_re = x01z_re - x09z_re;
                            float x01c_im = x01z_im + x09z_im;
                            float x09f_im = x01z_im - x09z_im;
                            float x03c_re = x03z_re + x11z_re;
                            float f47 = x03z_re - x11z_re;
                            float x03c_im = x03z_im + x11z_im;
                            float f48 = x03z_im - x11z_im;
                            float x05e_re = x05z_re + x13z_re;
                            float f49 = x05z_re - x13z_re;
                            float x05e_im = x05z_im + x13z_im;
                            float f50 = x05z_im - x13z_im;
                            float x07e_re = x07z_re + x15z_re;
                            float f51 = x07z_re - x15z_re;
                            float x07e_im = x07z_im + x15z_im;
                            float f52 = x07z_im - x15z_im;
                            float x01b_re = x01c_re + x05e_re;
                            float x05d_re = x01c_re - x05e_re;
                            float x03b_re = x03c_re + x07e_re;
                            float x07d_re = x03c_re - x07e_re;
                            float x01a_re = x01b_re + x03b_re;
                            float x03a_re = x01b_re - x03b_re;
                            float x01b_im = x01c_im + x05e_im;
                            float x05d_im = x01c_im - x05e_im;
                            float x03b_im = x03c_im + x07e_im;
                            float x07d_im = x03c_im - x07e_im;
                            float x01a_im = x01b_im + x03b_im;
                            float x03a_im = x01b_im - x03b_im;
                            out[kk * ispan + 0 * ospan + ll] = x00a_re + x01a_re;
                            out[kk * ispan + 0 * ospan + ll + 1] = x00a_im + x01a_im;
                            out[kk * ispan + 16 * ospan + ll] = x00a_re - x01a_re;
                            out[kk * ispan + 16 * ospan + ll + 1] = x00a_im - x01a_im;
                            out[kk * ispan + 8 * ospan + ll] = x02a_re + x03a_im;
                            out[kk * ispan + 8 * ospan + ll + 1] = x02a_im - x03a_re;
                            out[kk * ispan + 24 * ospan + ll] = x02a_re - x03a_im;
                            out[kk * ispan + 24 * ospan + ll + 1] = x02a_im + x03a_re;
                            float x07c_re = x05d_re + x07d_re;
                            float x05c_re = x05d_re - x07d_re;
                            float x05b_re = 0.70710677f * x05c_re;
                            float x04a_re = x04b_re + x05b_re;
                            float x05a_re = x04b_re - x05b_re;
                            float x07b_re = 0.70710677f * x07c_re;
                            float x06a_im = x06b_re + x07b_re;
                            float x07a_im = x06b_re - x07b_re;
                            float x07c_im = x05d_im + x07d_im;
                            float x05c_im = x05d_im - x07d_im;
                            float x05b_im = 0.70710677f * x05c_im;
                            float x04a_im = x04b_im + x05b_im;
                            float x05a_im = x04b_im - x05b_im;
                            float x07b_im = 0.70710677f * x07c_im;
                            float x06a_re = x06b_im + x07b_im;
                            float x07a_re = x06b_im - x07b_im;
                            out[kk * ispan + 4 * ospan + ll] = x04a_re + x06a_re;
                            out[kk * ispan + 4 * ospan + ll + 1] = x04a_im - x06a_im;
                            out[kk * ispan + 28 * ospan + ll] = x04a_re - x06a_re;
                            out[kk * ispan + 28 * ospan + ll + 1] = x04a_im + x06a_im;
                            out[kk * ispan + 12 * ospan + ll] = x05a_re - x07a_re;
                            out[kk * ispan + 12 * ospan + ll + 1] = x05a_im + x07a_im;
                            out[kk * ispan + 20 * ospan + ll] = x05a_re + x07a_re;
                            out[kk * ispan + 20 * ospan + ll + 1] = x05a_im - x07a_im;
                            float x09e_re = x09f_re - x15f_re;
                            float f53 = x09f_re + x15f_re;
                            float x09e_im = x09f_im - x15f_im;
                            float f54 = x09f_im + x15f_im;
                            void var266_862 = x11f_re - x13e_re;
                            void x13d_re = x11f_re + x13e_re - x15e_re;
                            float f55 = 1.4142135f * x15e_re;
                            void var284_934 = x13d_re + x15d_re;
                            void var308_1030 = x13d_re - x15d_re;
                            void var267_866 = x11f_im - x13e_im;
                            void x13d_im = x11f_im + x13e_im - x15e_im;
                            float f56 = 1.4142135f * x15e_im;
                            void var285_938 = x13d_im + x15d_im;
                            void var309_1034 = x13d_im - x15d_im;
                            float f57 = x10e_re + x14e_re;
                            float x10d_re = x10e_re - x14e_re;
                            float x10c_re = 0.70710677f * x10d_re;
                            float x08b_re = x08c_re + x10c_re;
                            float x10b_re = x08c_re - x10c_re;
                            float f58 = 0.70710677f * x14d_re;
                            float f59 = x10e_im + x14e_im;
                            float x10d_im = x10e_im - x14e_im;
                            float x10c_im = 0.70710677f * x10d_im;
                            float x08b_im = x08c_im + x10c_im;
                            float x10b_im = x08c_im - x10c_im;
                            float f60 = 0.70710677f * x14d_im;
                            void var275_898 = -x12c_re - x14c_re;
                            void var295_978 = -x12c_re + x14c_re;
                            void var274_894 = x12c_im + x14c_im;
                            void var294_974 = x12c_im - x14c_im;
                            float x09d_re = x09e_re - x11e_re;
                            float f61 = 1.4142135f * x11e_re;
                            float x09c_re = x09d_re + x11d_re;
                            float f62 = x09d_re - x11d_re;
                            float x09b_re = 0.9238795f * x09c_re;
                            float x08a_re = x08b_re + x09b_re;
                            float x09a_re = x08b_re - x09b_re;
                            float x09d_im = x09e_im - x11e_im;
                            float f63 = 1.4142135f * x11e_im;
                            float x09c_im = x09d_im + x11d_im;
                            float f64 = x09d_im - x11d_im;
                            float x09b_im = 0.9238795f * x09c_im;
                            float x08a_im = x08b_im + x09b_im;
                            float x09a_im = x08b_im - x09b_im;
                            float f65 = 0.9238795f * x13c_im;
                            void var272_886 = x12b_re + x13b_im;
                            void var280_918 = x12b_re - x13b_im;
                            float f66 = 0.9238795f * x13c_re;
                            void var273_890 = x12b_im - x13b_re;
                            void var281_922 = x12b_im + x13b_re;
                            out[kk * ispan + 2 * ospan + ll] = x08a_re + x12a_re;
                            out[kk * ispan + 2 * ospan + ll + 1] = x08a_im + x12a_im;
                            out[kk * ispan + 30 * ospan + ll] = x08a_re - x12a_re;
                            out[kk * ispan + 30 * ospan + ll + 1] = x08a_im - x12a_im;
                            out[kk * ispan + 14 * ospan + ll] = x09a_re - x13a_re;
                            out[kk * ispan + 14 * ospan + ll + 1] = x09a_im - x13a_im;
                            out[kk * ispan + 18 * ospan + ll] = x09a_re + x13a_re;
                            out[kk * ispan + 18 * ospan + ll + 1] = x09a_im + x13a_im;
                            float f67 = 0.38268343f * x11c_re;
                            float x10a_re = x10b_re + x11b_re;
                            float f68 = x10b_re - x11b_re;
                            float f69 = 0.38268343f * x11c_im;
                            float x10a_im = x10b_im + x11b_im;
                            float f70 = x10b_im - x11b_im;
                            float f71 = 0.38268343f * x15c_im;
                            void var292_966 = x14b_re + x15b_im;
                            void var304_1014 = x14b_re - x15b_im;
                            float f72 = 0.38268343f * x15c_re;
                            void var293_970 = x14b_im - x15b_re;
                            void var305_1018 = x14b_im + x15b_re;
                            out[kk * ispan + 6 * ospan + ll] = x10a_re - x14a_re;
                            out[kk * ispan + 6 * ospan + ll + 1] = x10a_im - x14a_im;
                            out[kk * ispan + 26 * ospan + ll] = x10a_re + x14a_re;
                            out[kk * ispan + 26 * ospan + ll + 1] = x10a_im + x14a_im;
                            out[kk * ispan + 10 * ospan + ll] = x11a_re + x15a_re;
                            out[kk * ispan + 10 * ospan + ll + 1] = x11a_im + x15a_im;
                            out[kk * ispan + 22 * ospan + ll] = x11a_re - x15a_re;
                            out[kk * ispan + 22 * ospan + ll + 1] = x11a_im - x15a_im;
                            void var338_1150 = x17z_re - x31z_re;
                            void var538_1950 = x17z_re + x31z_re;
                            void var339_1154 = x17z_im - x31z_im;
                            void var539_1954 = x17z_im + x31z_im;
                            void var396_1382 = x21z_re - x27z_re;
                            void var480_1718 = x21z_re + x27z_re;
                            void var397_1386 = x21z_im - x27z_im;
                            void var481_1722 = x21z_im + x27z_im;
                            void var426_1502 = x23z_re - x25z_re;
                            void var450_1598 = x23z_re + x25z_re;
                            void var427_1506 = x23z_im - x25z_im;
                            void var451_1602 = x23z_im + x25z_im;
                            void var336_1142 = x17g_re - x23g_re;
                            float f73 = 1.4142135f * x21g_re;
                            void var334_1134 = x17f_re + x21f_re;
                            void var392_1366 = x17f_re - x21f_re;
                            void var337_1146 = x17g_im - x23g_im;
                            float f74 = 1.4142135f * x21g_im;
                            void var335_1138 = x17f_im + x21f_im;
                            void var393_1370 = x17f_im - x21f_im;
                            void var352_1206 = x18z_re - x30z_re;
                            void var522_1886 = x18z_re + x30z_re;
                            void var353_1210 = x18z_im - x30z_im;
                            void var523_1890 = x18z_im + x30z_im;
                            void var410_1438 = x22z_re - x26z_re;
                            void var464_1654 = x22z_re + x26z_re;
                            void var411_1442 = x22z_im - x26z_im;
                            void var465_1658 = x22z_im + x26z_im;
                            void var350_1198 = x18f_re - x22f_re;
                            float f75 = 1.4142135f * x22f_re;
                            void var348_1190 = x18e_re + x22e_re;
                            void var406_1422 = x18e_re - x22e_re;
                            void var351_1202 = x18f_im - x22f_im;
                            float f76 = 1.4142135f * x22f_im;
                            void var349_1194 = x18e_im + x22e_im;
                            void var407_1426 = x18e_im - x22e_im;
                            void var369_1274 = x19z_im - x29z_im;
                            void var509_1834 = x19z_im + x29z_im;
                            void var368_1270 = x19z_re - x29z_re;
                            void var508_1830 = x19z_re + x29z_re;
                            void var366_1262 = x19g_re - x21g_re;
                            float f77 = 1.4142135f * x23g_re;
                            void var364_1254 = x19f_re + x23f_re;
                            void var422_1486 = x19f_re - x23f_re;
                            void var367_1266 = x19g_im - x21g_im;
                            float f78 = 1.4142135f * x23g_im;
                            void var365_1258 = x19f_im + x23f_im;
                            void var423_1490 = x19f_im - x23f_im;
                            void var332_1126 = x17e_re - x19e_re;
                            float f79 = 1.847759f * x19e_re;
                            void var330_1118 = x17d_re + x19d_re;
                            void var360_1238 = x17d_re - x19d_re;
                            void var333_1130 = x17e_im - x19e_im;
                            float f80 = 1.847759f * x19e_im;
                            void var331_1122 = x17d_im + x19d_im;
                            void var361_1242 = x17d_im - x19d_im;
                            void var390_1358 = x21e_re - x23e_re;
                            float f81 = 0.76536685f * x23e_re;
                            void var388_1350 = x21d_re + x23d_re;
                            void var418_1470 = x21d_re - x23d_re;
                            void var391_1362 = x21e_im - x23e_im;
                            float f82 = 0.76536685f * x23e_im;
                            void var389_1354 = x21d_im + x23d_im;
                            void var419_1474 = x21d_im - x23d_im;
                            void var448_1590 = x25g_re - x31g_re;
                            float f83 = 1.4142135f * x29g_re;
                            void var446_1582 = x25f_re + x29f_re;
                            void var504_1814 = x25f_re - x29f_re;
                            void var449_1594 = x25g_im - x31g_im;
                            float f84 = 1.4142135f * x29g_im;
                            void var447_1586 = x25f_im + x29f_im;
                            void var505_1818 = x25f_im - x29f_im;
                            void var462_1646 = x26f_re - x30f_re;
                            float f85 = 1.4142135f * x30f_re;
                            void var460_1638 = x26e_re + x30e_re;
                            void var518_1870 = x26e_re - x30e_re;
                            void var463_1650 = x26f_im - x30f_im;
                            float f86 = 1.4142135f * x30f_im;
                            void var461_1642 = x26e_im + x30e_im;
                            void var519_1874 = x26e_im - x30e_im;
                            void var478_1710 = x27g_re - x29g_re;
                            float f87 = 1.4142135f * x31g_re;
                            void var476_1702 = x27f_re + x31f_re;
                            void var534_1934 = x27f_re - x31f_re;
                            void var479_1714 = x27g_im - x29g_im;
                            float f88 = 1.4142135f * x31g_im;
                            void var477_1706 = x27f_im + x31f_im;
                            void var535_1938 = x27f_im - x31f_im;
                            void var444_1574 = x25e_re - x27e_re;
                            float f89 = 1.847759f * x27e_re;
                            void var472_1686 = x25d_re - x27d_re;
                            void var442_1566 = x25d_re + x27d_re;
                            void var445_1578 = x25e_im - x27e_im;
                            float f90 = 1.847759f * x27e_im;
                            void var473_1690 = x25d_im - x27d_im;
                            void var443_1570 = x25d_im + x27d_im;
                            void var502_1806 = x29e_re - x31e_re;
                            float f91 = 0.76536685f * x31e_re;
                            void var500_1798 = x29d_re + x31d_re;
                            void var530_1918 = x29d_re - x31d_re;
                            void var503_1810 = x29e_im - x31e_im;
                            float f92 = 0.76536685f * x31e_im;
                            void var501_1802 = x29d_im + x31d_im;
                            void var531_1922 = x29d_im - x31d_im;
                            void var380_1318 = x20z_re - x28z_re;
                            void var492_1766 = x20z_re + x28z_re;
                            void var381_1322 = x20z_im - x28z_im;
                            void var493_1770 = x20z_im + x28z_im;
                            float f93 = 0.70710677f * x28e_im;
                            void var434_1534 = x24z_im + x28d_im;
                            void var488_1750 = x24z_im - x28d_im;
                            float f94 = 0.9238795f * x26d_im;
                            void var432_1526 = x24c_re + x26c_im;
                            void var456_1622 = x24c_re - x26c_im;
                            float f95 = 0.98078525f * x25c_im;
                            void var430_1518 = x24b_re + x25b_im;
                            void var438_1550 = x24b_re - x25b_im;
                            float f96 = 0.70710677f * x28e_re;
                            void var435_1538 = x28d_re + x24z_re;
                            void var489_1754 = x28d_re - x24z_re;
                            float f97 = 0.9238795f * x26d_re;
                            void var433_1530 = x26c_re + x24c_im;
                            void var457_1626 = x26c_re - x24c_im;
                            float f98 = 0.98078525f * x25c_re;
                            void var431_1522 = x25b_re + x24b_im;
                            void var439_1554 = x25b_re - x24b_im;
                            float f99 = 0.70710677f * x20e_re;
                            void var376_1302 = x16z_re - x20d_re;
                            void var322_1086 = x16z_re + x20d_re;
                            float f100 = 0.9238795f * x18d_re;
                            void var320_1078 = x16c_re + x18c_re;
                            void var344_1174 = x16c_re - x18c_re;
                            float f101 = 0.98078525f * x17c_re;
                            void var318_1070 = x16b_re + x17b_re;
                            void var326_1102 = x16b_re - x17b_re;
                            float f102 = 0.70710677f * x20e_im;
                            void var377_1306 = x16z_im - x20d_im;
                            void var323_1090 = x16z_im + x20d_im;
                            float f103 = 0.9238795f * x18d_im;
                            void var321_1082 = x16c_im + x18c_im;
                            void var345_1178 = x16c_im - x18c_im;
                            float f104 = 0.98078525f * x17c_im;
                            void var319_1074 = x16b_im + x17b_im;
                            void var327_1106 = x16b_im - x17b_im;
                            out[kk * ispan + 1 * ospan + ll] = x16a_re + x24a_re;
                            out[kk * ispan + 1 * ospan + ll + 1] = x16a_im - x24a_im;
                            out[kk * ispan + 31 * ospan + ll] = x16a_re - x24a_re;
                            out[kk * ispan + 31 * ospan + ll + 1] = x16a_im + x24a_im;
                            out[kk * ispan + 15 * ospan + ll] = x17a_re - x25a_re;
                            out[kk * ispan + 15 * ospan + ll + 1] = x17a_im - x25a_im;
                            out[kk * ispan + 17 * ospan + ll] = x17a_re + x25a_re;
                            out[kk * ispan + 17 * ospan + ll + 1] = x17a_im + x25a_im;
                            float f105 = 0.19509032f * x27c_im;
                            void var454_1614 = x26b_re + x27b_im;
                            void var468_1670 = x26b_re - x27b_im;
                            float f106 = 0.19509032f * x27c_re;
                            void var455_1618 = x26b_im - x27b_re;
                            void var469_1674 = x26b_im + x27b_re;
                            float f107 = 0.19509032f * x19c_re;
                            void var342_1166 = x18b_re + x19b_re;
                            void var356_1222 = x18b_re - x19b_re;
                            float f108 = 0.19509032f * x19c_im;
                            void var343_1170 = x18b_im + x19b_im;
                            void var357_1226 = x18b_im - x19b_im;
                            out[kk * ispan + 7 * ospan + ll] = x18a_re - x26a_re;
                            out[kk * ispan + 7 * ospan + ll + 1] = x18a_im - x26a_im;
                            out[kk * ispan + 25 * ospan + ll] = x18a_re + x26a_re;
                            out[kk * ispan + 25 * ospan + ll + 1] = x18a_im + x26a_im;
                            out[kk * ispan + 9 * ospan + ll] = x19a_re + x27a_re;
                            out[kk * ispan + 9 * ospan + ll + 1] = x19a_im + x27a_im;
                            out[kk * ispan + 23 * ospan + ll] = x19a_re - x27a_re;
                            out[kk * ispan + 23 * ospan + ll + 1] = x19a_im - x27a_im;
                            float f109 = 0.38268343f * x30d_im;
                            void var486_1742 = x28c_re + x30c_im;
                            void var514_1854 = x28c_re - x30c_im;
                            float f110 = 0.8314696f * x29c_im;
                            void var484_1734 = x28b_re + x29b_im;
                            void var496_1782 = x28b_re - x29b_im;
                            float f111 = 0.38268343f * x30d_re;
                            void var487_1746 = x28c_im - x30c_re;
                            void var515_1858 = x28c_im + x30c_re;
                            float f112 = 0.8314696f * x29c_re;
                            void var485_1738 = x28b_im - x29b_re;
                            void var497_1786 = x28b_im + x29b_re;
                            float f113 = 0.38268343f * x22d_re;
                            void var374_1294 = x20c_re + x22c_re;
                            void var402_1406 = x20c_re - x22c_re;
                            float f114 = 0.8314696f * x21c_re;
                            void var372_1286 = x20b_re + x21b_re;
                            void var384_1334 = x20b_re - x21b_re;
                            float f115 = 0.38268343f * x22d_im;
                            void var375_1298 = x20c_im + x22c_im;
                            void var403_1410 = x20c_im - x22c_im;
                            float f116 = 0.8314696f * x21c_im;
                            void var373_1290 = x20b_im + x21b_im;
                            void var385_1338 = x20b_im - x21b_im;
                            out[kk * ispan + 3 * ospan + ll] = x20a_re - x28a_re;
                            out[kk * ispan + 3 * ospan + ll + 1] = x20a_im - x28a_im;
                            out[kk * ispan + 29 * ospan + ll] = x20a_re + x28a_re;
                            out[kk * ispan + 29 * ospan + ll + 1] = x20a_im + x28a_im;
                            out[kk * ispan + 13 * ospan + ll] = x21a_re + x29a_re;
                            out[kk * ispan + 13 * ospan + ll + 1] = x21a_im + x29a_im;
                            out[kk * ispan + 19 * ospan + ll] = x21a_re - x29a_re;
                            out[kk * ispan + 19 * ospan + ll + 1] = x21a_im - x29a_im;
                            float f117 = 0.55557024f * x23c_re;
                            void var400_1398 = x22b_re + x23b_re;
                            void var414_1454 = x22b_re - x23b_re;
                            float f118 = 0.55557024f * x23c_im;
                            void var401_1402 = x22b_im + x23b_im;
                            void var415_1458 = x22b_im - x23b_im;
                            float f119 = 0.55557024f * x31c_im;
                            void var512_1846 = x30b_re + x31b_im;
                            void var526_1902 = x30b_re - x31b_im;
                            float f120 = 0.55557024f * x31c_re;
                            void var527_1906 = x30b_im + x31b_re;
                            void var513_1850 = x30b_im - x31b_re;
                            out[kk * ispan + 5 * ospan + ll] = x22a_re + x30a_re;
                            out[kk * ispan + 5 * ospan + ll + 1] = x22a_im + x30a_im;
                            out[kk * ispan + 27 * ospan + ll] = x22a_re - x30a_re;
                            out[kk * ispan + 27 * ospan + ll + 1] = x22a_im - x30a_im;
                            out[kk * ispan + 11 * ospan + ll] = x23a_re - x31a_re;
                            out[kk * ispan + 11 * ospan + ll + 1] = x23a_im - x31a_im;
                            out[kk * ispan + 21 * ospan + ll] = x23a_re + x31a_re;
                            out[kk * ispan + 21 * ospan + ll + 1] = x23a_im + x31a_im;
                        }
                    }
                } else {
                    for (int ll = 0; ll < ispan * 2; ll += 2) {
                        float t00_re = in[0 * ispan + ll];
                        float t00_im = in[0 * ispan + ll + 1];
                        float t01_re = in[2 * ispan + ll];
                        float t01_im = in[2 * ispan + ll + 1];
                        float t02_re = in[4 * ispan + ll];
                        float t02_im = in[4 * ispan + ll + 1];
                        float t03_re = in[6 * ispan + ll];
                        float t03_im = in[6 * ispan + ll + 1];
                        float t04_re = in[8 * ispan + ll];
                        float t04_im = in[8 * ispan + ll + 1];
                        float t05_re = in[10 * ispan + ll];
                        float t05_im = in[10 * ispan + ll + 1];
                        float t06_re = in[12 * ispan + ll];
                        float t06_im = in[12 * ispan + ll + 1];
                        float t07_re = in[14 * ispan + ll];
                        float t07_im = in[14 * ispan + ll + 1];
                        float t08_re = in[16 * ispan + ll];
                        float t08_im = in[16 * ispan + ll + 1];
                        float t09_re = in[18 * ispan + ll];
                        float t09_im = in[18 * ispan + ll + 1];
                        float t10_re = in[20 * ispan + ll];
                        float t10_im = in[20 * ispan + ll + 1];
                        float t11_re = in[22 * ispan + ll];
                        float t11_im = in[22 * ispan + ll + 1];
                        float t12_re = in[24 * ispan + ll];
                        float t12_im = in[24 * ispan + ll + 1];
                        float t13_re = in[26 * ispan + ll];
                        float t13_im = in[26 * ispan + ll + 1];
                        float t14_re = in[28 * ispan + ll];
                        float t14_im = in[28 * ispan + ll + 1];
                        float t15_re = in[30 * ispan + ll];
                        float t15_im = in[30 * ispan + ll + 1];
                        float t16_re = in[32 * ispan + ll];
                        float t16_im = in[32 * ispan + ll + 1];
                        float t17_re = in[34 * ispan + ll];
                        float t17_im = in[34 * ispan + ll + 1];
                        float t18_re = in[36 * ispan + ll];
                        float t18_im = in[36 * ispan + ll + 1];
                        float t19_re = in[38 * ispan + ll];
                        float t19_im = in[38 * ispan + ll + 1];
                        float t20_re = in[40 * ispan + ll];
                        float t20_im = in[40 * ispan + ll + 1];
                        float t21_re = in[42 * ispan + ll];
                        float t21_im = in[42 * ispan + ll + 1];
                        float t22_re = in[44 * ispan + ll];
                        float t22_im = in[44 * ispan + ll + 1];
                        float t23_re = in[46 * ispan + ll];
                        float t23_im = in[46 * ispan + ll + 1];
                        float t24_re = in[48 * ispan + ll];
                        float t24_im = in[48 * ispan + ll + 1];
                        float t25_re = in[50 * ispan + ll];
                        float t25_im = in[50 * ispan + ll + 1];
                        float t26_re = in[52 * ispan + ll];
                        float t26_im = in[52 * ispan + ll + 1];
                        float t27_re = in[54 * ispan + ll];
                        float t27_im = in[54 * ispan + ll + 1];
                        float t28_re = in[56 * ispan + ll];
                        float t28_im = in[56 * ispan + ll + 1];
                        float t29_re = in[58 * ispan + ll];
                        float t29_im = in[58 * ispan + ll + 1];
                        float t30_re = in[60 * ispan + ll];
                        float t30_im = in[60 * ispan + ll + 1];
                        float t31_re = in[62 * ispan + ll];
                        float t31_im = in[62 * ispan + ll + 1];
                        float x00z_re = t00_re + t16_re;
                        float x16z_re = t00_re - t16_re;
                        float x00z_im = t00_im + t16_im;
                        float x16z_im = t00_im - t16_im;
                        float x01z_re = t01_re + t17_re;
                        float x17z_re = t01_re - t17_re;
                        float x01z_im = t01_im + t17_im;
                        float x17z_im = t01_im - t17_im;
                        float x02z_re = t02_re + t18_re;
                        float x18z_re = t02_re - t18_re;
                        float x02z_im = t02_im + t18_im;
                        float x18z_im = t02_im - t18_im;
                        float x03z_re = t03_re + t19_re;
                        float x19z_re = t03_re - t19_re;
                        float x03z_im = t03_im + t19_im;
                        float x19z_im = t03_im - t19_im;
                        float x04z_re = t04_re + t20_re;
                        float x20z_re = t04_re - t20_re;
                        float x04z_im = t04_im + t20_im;
                        float x20z_im = t04_im - t20_im;
                        float x05z_re = t05_re + t21_re;
                        float x21z_re = t05_re - t21_re;
                        float x05z_im = t05_im + t21_im;
                        float x21z_im = t05_im - t21_im;
                        float x06z_re = t06_re + t22_re;
                        float x22z_re = t06_re - t22_re;
                        float x06z_im = t06_im + t22_im;
                        float x22z_im = t06_im - t22_im;
                        float x07z_re = t07_re + t23_re;
                        float x23z_re = t07_re - t23_re;
                        float x07z_im = t07_im + t23_im;
                        float x23z_im = t07_im - t23_im;
                        float x08z_re = t08_re + t24_re;
                        float x24z_re = t08_re - t24_re;
                        float x08z_im = t08_im + t24_im;
                        float x24z_im = t08_im - t24_im;
                        float x09z_re = t09_re + t25_re;
                        float x25z_re = t09_re - t25_re;
                        float x09z_im = t09_im + t25_im;
                        float x25z_im = t09_im - t25_im;
                        float x10z_re = t10_re + t26_re;
                        float x26z_re = t10_re - t26_re;
                        float x10z_im = t10_im + t26_im;
                        float x26z_im = t10_im - t26_im;
                        float x11z_re = t11_re + t27_re;
                        float x27z_re = t11_re - t27_re;
                        float x11z_im = t11_im + t27_im;
                        float x27z_im = t11_im - t27_im;
                        float x12z_re = t12_re + t28_re;
                        float x28z_re = t12_re - t28_re;
                        float x12z_im = t12_im + t28_im;
                        float x28z_im = t12_im - t28_im;
                        float x13z_re = t13_re + t29_re;
                        float x29z_re = t13_re - t29_re;
                        float x13z_im = t13_im + t29_im;
                        float x29z_im = t13_im - t29_im;
                        float x14z_re = t14_re + t30_re;
                        float x30z_re = t14_re - t30_re;
                        float x14z_im = t14_im + t30_im;
                        float x30z_im = t14_im - t30_im;
                        float x15z_re = t15_re + t31_re;
                        float x31z_re = t15_re - t31_re;
                        float x15z_im = t15_im + t31_im;
                        float x31z_im = t15_im - t31_im;
                        float x00c_re = x00z_re + x08z_re;
                        float x08c_re = x00z_re - x08z_re;
                        float x00c_im = x00z_im + x08z_im;
                        float x08c_im = x00z_im - x08z_im;
                        float x02c_re = x02z_re + x10z_re;
                        float x10e_re = x02z_re - x10z_re;
                        float x02c_im = x02z_im + x10z_im;
                        float x10e_im = x02z_im - x10z_im;
                        float x04c_re = x04z_re + x12z_re;
                        float x12c_re = x04z_re - x12z_re;
                        float x04c_im = x04z_im + x12z_im;
                        float x12c_im = x04z_im - x12z_im;
                        float x06c_re = x06z_re + x14z_re;
                        float x14e_re = x06z_re - x14z_re;
                        float x06c_im = x06z_im + x14z_im;
                        float x14e_im = x06z_im - x14z_im;
                        float x00b_re = x00c_re + x04c_re;
                        float x04b_re = x00c_re - x04c_re;
                        float x02b_re = x02c_re + x06c_re;
                        float x06b_re = x02c_re - x06c_re;
                        float x00a_re = x00b_re + x02b_re;
                        float x02a_re = x00b_re - x02b_re;
                        float x00b_im = x00c_im + x04c_im;
                        float x04b_im = x00c_im - x04c_im;
                        float x02b_im = x02c_im + x06c_im;
                        float x06b_im = x02c_im - x06c_im;
                        float x00a_im = x00b_im + x02b_im;
                        float x02a_im = x00b_im - x02b_im;
                        float x01c_re = x01z_re + x09z_re;
                        float x09f_re = x01z_re - x09z_re;
                        float x01c_im = x01z_im + x09z_im;
                        float x09f_im = x01z_im - x09z_im;
                        float x03c_re = x03z_re + x11z_re;
                        float x11f_re = x03z_re - x11z_re;
                        float x03c_im = x03z_im + x11z_im;
                        float x11f_im = x03z_im - x11z_im;
                        float x05e_re = x05z_re + x13z_re;
                        float x13e_re = x05z_re - x13z_re;
                        float x05e_im = x05z_im + x13z_im;
                        float x13e_im = x05z_im - x13z_im;
                        float x07e_re = x07z_re + x15z_re;
                        float x15f_re = x07z_re - x15z_re;
                        float x07e_im = x07z_im + x15z_im;
                        float x15f_im = x07z_im - x15z_im;
                        float x01b_re = x01c_re + x05e_re;
                        float x05d_re = x01c_re - x05e_re;
                        float x03b_re = x03c_re + x07e_re;
                        float x07d_re = x03c_re - x07e_re;
                        float x01a_re = x01b_re + x03b_re;
                        float x03a_re = x01b_re - x03b_re;
                        float x01b_im = x01c_im + x05e_im;
                        float x05d_im = x01c_im - x05e_im;
                        float x03b_im = x03c_im + x07e_im;
                        float x07d_im = x03c_im - x07e_im;
                        float x01a_im = x01b_im + x03b_im;
                        float x03a_im = x01b_im - x03b_im;
                        out[0 * ospan + ll] = x00a_re + x01a_re;
                        out[0 * ospan + ll + 1] = x00a_im + x01a_im;
                        out[16 * ospan + ll] = x00a_re - x01a_re;
                        out[16 * ospan + ll + 1] = x00a_im - x01a_im;
                        out[8 * ospan + ll] = x02a_re + x03a_im;
                        out[8 * ospan + ll + 1] = x02a_im - x03a_re;
                        out[24 * ospan + ll] = x02a_re - x03a_im;
                        out[24 * ospan + ll + 1] = x02a_im + x03a_re;
                        float x07c_re = x05d_re + x07d_re;
                        float x05c_re = x05d_re - x07d_re;
                        float x05b_re = 0.70710677f * x05c_re;
                        float x04a_re = x04b_re + x05b_re;
                        float x05a_re = x04b_re - x05b_re;
                        float x07b_re = 0.70710677f * x07c_re;
                        float x06a_im = x06b_re + x07b_re;
                        float x07a_im = x06b_re - x07b_re;
                        float x07c_im = x05d_im + x07d_im;
                        float x05c_im = x05d_im - x07d_im;
                        float x05b_im = 0.70710677f * x05c_im;
                        float x04a_im = x04b_im + x05b_im;
                        float x05a_im = x04b_im - x05b_im;
                        float x07b_im = 0.70710677f * x07c_im;
                        float x06a_re = x06b_im + x07b_im;
                        float x07a_re = x06b_im - x07b_im;
                        out[4 * ospan + ll] = x04a_re + x06a_re;
                        out[4 * ospan + ll + 1] = x04a_im - x06a_im;
                        out[28 * ospan + ll] = x04a_re - x06a_re;
                        out[28 * ospan + ll + 1] = x04a_im + x06a_im;
                        out[12 * ospan + ll] = x05a_re - x07a_re;
                        out[12 * ospan + ll + 1] = x05a_im + x07a_im;
                        out[20 * ospan + ll] = x05a_re + x07a_re;
                        out[20 * ospan + ll + 1] = x05a_im - x07a_im;
                        float x09e_re = x09f_re - x15f_re;
                        float x15e_re = x09f_re + x15f_re;
                        float x09e_im = x09f_im - x15f_im;
                        float x15e_im = x09f_im + x15f_im;
                        float x11e_re = x11f_re - x13e_re;
                        float x13d_re = x11f_re + x13e_re - x15e_re;
                        float x15d_re = 1.4142135f * x15e_re;
                        float x13c_re = x13d_re + x15d_re;
                        float x15c_re = x13d_re - x15d_re;
                        float x11e_im = x11f_im - x13e_im;
                        float x13d_im = x11f_im + x13e_im - x15e_im;
                        float x15d_im = 1.4142135f * x15e_im;
                        float x13c_im = x13d_im + x15d_im;
                        float x15c_im = x13d_im - x15d_im;
                        float x14d_re = x10e_re + x14e_re;
                        float x10d_re = x10e_re - x14e_re;
                        float x10c_re = 0.70710677f * x10d_re;
                        float x08b_re = x08c_re + x10c_re;
                        float x10b_re = x08c_re - x10c_re;
                        float x14c_re = 0.70710677f * x14d_re;
                        float x14d_im = x10e_im + x14e_im;
                        float x10d_im = x10e_im - x14e_im;
                        float x10c_im = 0.70710677f * x10d_im;
                        float x08b_im = x08c_im + x10c_im;
                        float x10b_im = x08c_im - x10c_im;
                        float x14c_im = 0.70710677f * x14d_im;
                        float x12b_im = -x12c_re - x14c_re;
                        float x14b_im = -x12c_re + x14c_re;
                        float x12b_re = x12c_im + x14c_im;
                        float x14b_re = x12c_im - x14c_im;
                        float x09d_re = x09e_re - x11e_re;
                        float x11d_re = 1.4142135f * x11e_re;
                        float x09c_re = x09d_re + x11d_re;
                        float x11c_re = x09d_re - x11d_re;
                        float x09b_re = 0.9238795f * x09c_re;
                        float x08a_re = x08b_re + x09b_re;
                        float x09a_re = x08b_re - x09b_re;
                        float x09d_im = x09e_im - x11e_im;
                        float x11d_im = 1.4142135f * x11e_im;
                        float x09c_im = x09d_im + x11d_im;
                        float x11c_im = x09d_im - x11d_im;
                        float x09b_im = 0.9238795f * x09c_im;
                        float x08a_im = x08b_im + x09b_im;
                        float x09a_im = x08b_im - x09b_im;
                        float x13b_im = 0.9238795f * x13c_im;
                        float x12a_re = x12b_re + x13b_im;
                        float x13a_re = x12b_re - x13b_im;
                        float x13b_re = 0.9238795f * x13c_re;
                        float x12a_im = x12b_im - x13b_re;
                        float x13a_im = x12b_im + x13b_re;
                        out[2 * ospan + ll] = x08a_re + x12a_re;
                        out[2 * ospan + ll + 1] = x08a_im + x12a_im;
                        out[30 * ospan + ll] = x08a_re - x12a_re;
                        out[30 * ospan + ll + 1] = x08a_im - x12a_im;
                        out[14 * ospan + ll] = x09a_re - x13a_re;
                        out[14 * ospan + ll + 1] = x09a_im - x13a_im;
                        out[18 * ospan + ll] = x09a_re + x13a_re;
                        out[18 * ospan + ll + 1] = x09a_im + x13a_im;
                        float x11b_re = 0.38268343f * x11c_re;
                        float x10a_re = x10b_re + x11b_re;
                        float x11a_re = x10b_re - x11b_re;
                        float x11b_im = 0.38268343f * x11c_im;
                        float x10a_im = x10b_im + x11b_im;
                        float x11a_im = x10b_im - x11b_im;
                        float x15b_im = 0.38268343f * x15c_im;
                        float x14a_re = x14b_re + x15b_im;
                        float x15a_re = x14b_re - x15b_im;
                        float x15b_re = 0.38268343f * x15c_re;
                        float x14a_im = x14b_im - x15b_re;
                        float x15a_im = x14b_im + x15b_re;
                        out[6 * ospan + ll] = x10a_re - x14a_re;
                        out[6 * ospan + ll + 1] = x10a_im - x14a_im;
                        out[26 * ospan + ll] = x10a_re + x14a_re;
                        out[26 * ospan + ll + 1] = x10a_im + x14a_im;
                        out[10 * ospan + ll] = x11a_re + x15a_re;
                        out[10 * ospan + ll + 1] = x11a_im + x15a_im;
                        out[22 * ospan + ll] = x11a_re - x15a_re;
                        out[22 * ospan + ll + 1] = x11a_im - x15a_im;
                        float x17g_re = x17z_re - x31z_re;
                        float x31g_re = x17z_re + x31z_re;
                        float x17g_im = x17z_im - x31z_im;
                        float x31g_im = x17z_im + x31z_im;
                        float x21g_re = x21z_re - x27z_re;
                        float x27g_re = x21z_re + x27z_re;
                        float x21g_im = x21z_im - x27z_im;
                        float x27g_im = x21z_im + x27z_im;
                        float x23g_re = x23z_re - x25z_re;
                        float x25g_re = x23z_re + x25z_re;
                        float x23g_im = x23z_im - x25z_im;
                        float x25g_im = x23z_im + x25z_im;
                        float x17f_re = x17g_re - x23g_re;
                        float x21f_re = 1.4142135f * x21g_re;
                        float x17e_re = x17f_re + x21f_re;
                        float x21e_re = x17f_re - x21f_re;
                        float x17f_im = x17g_im - x23g_im;
                        float x21f_im = 1.4142135f * x21g_im;
                        float x17e_im = x17f_im + x21f_im;
                        float x21e_im = x17f_im - x21f_im;
                        float x18f_re = x18z_re - x30z_re;
                        float x30f_re = x18z_re + x30z_re;
                        float x18f_im = x18z_im - x30z_im;
                        float x30f_im = x18z_im + x30z_im;
                        float x22f_re = x22z_re - x26z_re;
                        float x26f_re = x22z_re + x26z_re;
                        float x22f_im = x22z_im - x26z_im;
                        float x26f_im = x22z_im + x26z_im;
                        float x18e_re = x18f_re - x22f_re;
                        float x22e_re = 1.4142135f * x22f_re;
                        float x18d_re = x18e_re + x22e_re;
                        float x22d_re = x18e_re - x22e_re;
                        float x18e_im = x18f_im - x22f_im;
                        float x22e_im = 1.4142135f * x22f_im;
                        float x18d_im = x18e_im + x22e_im;
                        float x22d_im = x18e_im - x22e_im;
                        float x19g_im = x19z_im - x29z_im;
                        float x29g_im = x19z_im + x29z_im;
                        float x19g_re = x19z_re - x29z_re;
                        float x29g_re = x19z_re + x29z_re;
                        float x19f_re = x19g_re - x21g_re;
                        float x23f_re = 1.4142135f * x23g_re;
                        float x19e_re = x19f_re + x23f_re;
                        float x23e_re = x19f_re - x23f_re;
                        float x19f_im = x19g_im - x21g_im;
                        float x23f_im = 1.4142135f * x23g_im;
                        float x19e_im = x19f_im + x23f_im;
                        float x23e_im = x19f_im - x23f_im;
                        float x17d_re = x17e_re - x19e_re;
                        float x19d_re = 1.847759f * x19e_re;
                        float x17c_re = x17d_re + x19d_re;
                        float x19c_re = x17d_re - x19d_re;
                        float x17d_im = x17e_im - x19e_im;
                        float x19d_im = 1.847759f * x19e_im;
                        float x17c_im = x17d_im + x19d_im;
                        float x19c_im = x17d_im - x19d_im;
                        float x21d_re = x21e_re - x23e_re;
                        float x23d_re = 0.76536685f * x23e_re;
                        float x21c_re = x21d_re + x23d_re;
                        float x23c_re = x21d_re - x23d_re;
                        float x21d_im = x21e_im - x23e_im;
                        float x23d_im = 0.76536685f * x23e_im;
                        float x21c_im = x21d_im + x23d_im;
                        float x23c_im = x21d_im - x23d_im;
                        float x25f_re = x25g_re - x31g_re;
                        float x29f_re = 1.4142135f * x29g_re;
                        float x25e_re = x25f_re + x29f_re;
                        float x29e_re = x25f_re - x29f_re;
                        float x25f_im = x25g_im - x31g_im;
                        float x29f_im = 1.4142135f * x29g_im;
                        float x25e_im = x25f_im + x29f_im;
                        float x29e_im = x25f_im - x29f_im;
                        float x26e_re = x26f_re - x30f_re;
                        float x30e_re = 1.4142135f * x30f_re;
                        float x26d_re = x26e_re + x30e_re;
                        float x30d_re = x26e_re - x30e_re;
                        float x26e_im = x26f_im - x30f_im;
                        float x30e_im = 1.4142135f * x30f_im;
                        float x26d_im = x26e_im + x30e_im;
                        float x30d_im = x26e_im - x30e_im;
                        float x27f_re = x27g_re - x29g_re;
                        float x31f_re = 1.4142135f * x31g_re;
                        float x27e_re = x27f_re + x31f_re;
                        float x31e_re = x27f_re - x31f_re;
                        float x27f_im = x27g_im - x29g_im;
                        float x31f_im = 1.4142135f * x31g_im;
                        float x27e_im = x27f_im + x31f_im;
                        float x31e_im = x27f_im - x31f_im;
                        float x25d_re = x25e_re - x27e_re;
                        float x27d_re = 1.847759f * x27e_re;
                        float x27c_re = x25d_re - x27d_re;
                        float x25c_re = x25d_re + x27d_re;
                        float x25d_im = x25e_im - x27e_im;
                        float x27d_im = 1.847759f * x27e_im;
                        float x27c_im = x25d_im - x27d_im;
                        float x25c_im = x25d_im + x27d_im;
                        float x29d_re = x29e_re - x31e_re;
                        float x31d_re = 0.76536685f * x31e_re;
                        float x29c_re = x29d_re + x31d_re;
                        float x31c_re = x29d_re - x31d_re;
                        float x29d_im = x29e_im - x31e_im;
                        float x31d_im = 0.76536685f * x31e_im;
                        float x29c_im = x29d_im + x31d_im;
                        float x31c_im = x29d_im - x31d_im;
                        float x20e_re = x20z_re - x28z_re;
                        float x28e_re = x20z_re + x28z_re;
                        float x20e_im = x20z_im - x28z_im;
                        float x28e_im = x20z_im + x28z_im;
                        float x28d_im = 0.70710677f * x28e_im;
                        float x24c_re = x24z_im + x28d_im;
                        float x28c_re = x24z_im - x28d_im;
                        float x26c_im = 0.9238795f * x26d_im;
                        float x24b_re = x24c_re + x26c_im;
                        float x26b_re = x24c_re - x26c_im;
                        float x25b_im = 0.98078525f * x25c_im;
                        float x24a_re = x24b_re + x25b_im;
                        float x25a_re = x24b_re - x25b_im;
                        float x28d_re = 0.70710677f * x28e_re;
                        float x24c_im = x28d_re + x24z_re;
                        float x28c_im = x28d_re - x24z_re;
                        float x26c_re = 0.9238795f * x26d_re;
                        float x24b_im = x26c_re + x24c_im;
                        float x26b_im = x26c_re - x24c_im;
                        float x25b_re = 0.98078525f * x25c_re;
                        float x24a_im = x25b_re + x24b_im;
                        float x25a_im = x25b_re - x24b_im;
                        float x20d_re = 0.70710677f * x20e_re;
                        float x20c_re = x16z_re - x20d_re;
                        float x16c_re = x16z_re + x20d_re;
                        float x18c_re = 0.9238795f * x18d_re;
                        float x16b_re = x16c_re + x18c_re;
                        float x18b_re = x16c_re - x18c_re;
                        float x17b_re = 0.98078525f * x17c_re;
                        float x16a_re = x16b_re + x17b_re;
                        float x17a_re = x16b_re - x17b_re;
                        float x20d_im = 0.70710677f * x20e_im;
                        float x20c_im = x16z_im - x20d_im;
                        float x16c_im = x16z_im + x20d_im;
                        float x18c_im = 0.9238795f * x18d_im;
                        float x16b_im = x16c_im + x18c_im;
                        float x18b_im = x16c_im - x18c_im;
                        float x17b_im = 0.98078525f * x17c_im;
                        float x16a_im = x16b_im + x17b_im;
                        float x17a_im = x16b_im - x17b_im;
                        out[1 * ospan + ll] = x16a_re + x24a_re;
                        out[1 * ospan + ll + 1] = x16a_im - x24a_im;
                        out[31 * ospan + ll] = x16a_re - x24a_re;
                        out[31 * ospan + ll + 1] = x16a_im + x24a_im;
                        out[15 * ospan + ll] = x17a_re - x25a_re;
                        out[15 * ospan + ll + 1] = x17a_im - x25a_im;
                        out[17 * ospan + ll] = x17a_re + x25a_re;
                        out[17 * ospan + ll + 1] = x17a_im + x25a_im;
                        float x27b_im = 0.19509032f * x27c_im;
                        float x26a_re = x26b_re + x27b_im;
                        float x27a_re = x26b_re - x27b_im;
                        float x27b_re = 0.19509032f * x27c_re;
                        float x26a_im = x26b_im - x27b_re;
                        float x27a_im = x26b_im + x27b_re;
                        float x19b_re = 0.19509032f * x19c_re;
                        float x18a_re = x18b_re + x19b_re;
                        float x19a_re = x18b_re - x19b_re;
                        float x19b_im = 0.19509032f * x19c_im;
                        float x18a_im = x18b_im + x19b_im;
                        float x19a_im = x18b_im - x19b_im;
                        out[7 * ospan + ll] = x18a_re - x26a_re;
                        out[7 * ospan + ll + 1] = x18a_im - x26a_im;
                        out[25 * ospan + ll] = x18a_re + x26a_re;
                        out[25 * ospan + ll + 1] = x18a_im + x26a_im;
                        out[9 * ospan + ll] = x19a_re + x27a_re;
                        out[9 * ospan + ll + 1] = x19a_im + x27a_im;
                        out[23 * ospan + ll] = x19a_re - x27a_re;
                        out[23 * ospan + ll + 1] = x19a_im - x27a_im;
                        float x30c_im = 0.38268343f * x30d_im;
                        float x28b_re = x28c_re + x30c_im;
                        float x30b_re = x28c_re - x30c_im;
                        float x29b_im = 0.8314696f * x29c_im;
                        float x28a_re = x28b_re + x29b_im;
                        float x29a_re = x28b_re - x29b_im;
                        float x30c_re = 0.38268343f * x30d_re;
                        float x28b_im = x28c_im - x30c_re;
                        float x30b_im = x28c_im + x30c_re;
                        float x29b_re = 0.8314696f * x29c_re;
                        float x28a_im = x28b_im - x29b_re;
                        float x29a_im = x28b_im + x29b_re;
                        float x22c_re = 0.38268343f * x22d_re;
                        float x20b_re = x20c_re + x22c_re;
                        float x22b_re = x20c_re - x22c_re;
                        float x21b_re = 0.8314696f * x21c_re;
                        float x20a_re = x20b_re + x21b_re;
                        float x21a_re = x20b_re - x21b_re;
                        float x22c_im = 0.38268343f * x22d_im;
                        float x20b_im = x20c_im + x22c_im;
                        float x22b_im = x20c_im - x22c_im;
                        float x21b_im = 0.8314696f * x21c_im;
                        float x20a_im = x20b_im + x21b_im;
                        float x21a_im = x20b_im - x21b_im;
                        out[3 * ospan + ll] = x20a_re - x28a_re;
                        out[3 * ospan + ll + 1] = x20a_im - x28a_im;
                        out[29 * ospan + ll] = x20a_re + x28a_re;
                        out[29 * ospan + ll + 1] = x20a_im + x28a_im;
                        out[13 * ospan + ll] = x21a_re + x29a_re;
                        out[13 * ospan + ll + 1] = x21a_im + x29a_im;
                        out[19 * ospan + ll] = x21a_re - x29a_re;
                        out[19 * ospan + ll + 1] = x21a_im - x29a_im;
                        float x23b_re = 0.55557024f * x23c_re;
                        float x22a_re = x22b_re + x23b_re;
                        float x23a_re = x22b_re - x23b_re;
                        float x23b_im = 0.55557024f * x23c_im;
                        float x22a_im = x22b_im + x23b_im;
                        float x23a_im = x22b_im - x23b_im;
                        float x31b_im = 0.55557024f * x31c_im;
                        float x30a_re = x30b_re + x31b_im;
                        float x31a_re = x30b_re - x31b_im;
                        float x31b_re = 0.55557024f * x31c_re;
                        float x31a_im = x30b_im + x31b_re;
                        float x30a_im = x30b_im - x31b_re;
                        out[5 * ospan + ll] = x22a_re + x30a_re;
                        out[5 * ospan + ll + 1] = x22a_im + x30a_im;
                        out[27 * ospan + ll] = x22a_re - x30a_re;
                        out[27 * ospan + ll + 1] = x22a_im - x30a_im;
                        out[11 * ospan + ll] = x23a_re - x31a_re;
                        out[11 * ospan + ll + 1] = x23a_im - x31a_im;
                        out[21 * ospan + ll] = x23a_re + x31a_re;
                        out[21 * ospan + ll + 1] = x23a_im + x31a_im;
                    }
                }
                break;
            }
            default: {
                if (ispan != ospan / 2) {
                    for (int kk = 0; kk < mm; kk += 2) {
                        float o00_re = twids[1 * kk * ispan];
                        float o00_im = twids[1 * kk * ispan + 1];
                        float o01_re = twids[2 * kk * ispan];
                        float o01_im = twids[2 * kk * ispan + 1];
                        float o02_re = twids[3 * kk * ispan];
                        float o02_im = twids[3 * kk * ispan + 1];
                        float o03_re = twids[4 * kk * ispan];
                        float o03_im = twids[4 * kk * ispan + 1];
                        float o04_re = twids[5 * kk * ispan];
                        float o04_im = twids[5 * kk * ispan + 1];
                        float o05_re = twids[6 * kk * ispan];
                        float o05_im = twids[6 * kk * ispan + 1];
                        float o06_re = twids[7 * kk * ispan];
                        float o06_im = twids[7 * kk * ispan + 1];
                        float o07_re = twids[8 * kk * ispan];
                        float o07_im = twids[8 * kk * ispan + 1];
                        float o08_re = twids[9 * kk * ispan];
                        float o08_im = twids[9 * kk * ispan + 1];
                        float o09_re = twids[10 * kk * ispan];
                        float o09_im = twids[10 * kk * ispan + 1];
                        float o10_re = twids[11 * kk * ispan];
                        float o10_im = twids[11 * kk * ispan + 1];
                        float o11_re = twids[12 * kk * ispan];
                        float o11_im = twids[12 * kk * ispan + 1];
                        float o12_re = twids[13 * kk * ispan];
                        float o12_im = twids[13 * kk * ispan + 1];
                        float o13_re = twids[14 * kk * ispan];
                        float o13_im = twids[14 * kk * ispan + 1];
                        float o14_re = twids[15 * kk * ispan];
                        float o14_im = twids[15 * kk * ispan + 1];
                        float o15_re = twids[16 * kk * ispan];
                        float o15_im = twids[16 * kk * ispan + 1];
                        float o16_re = twids[17 * kk * ispan];
                        float o16_im = twids[17 * kk * ispan + 1];
                        float o17_re = twids[18 * kk * ispan];
                        float o17_im = twids[18 * kk * ispan + 1];
                        float o18_re = twids[19 * kk * ispan];
                        float o18_im = twids[19 * kk * ispan + 1];
                        float o19_re = twids[20 * kk * ispan];
                        float o19_im = twids[20 * kk * ispan + 1];
                        float o20_re = twids[21 * kk * ispan];
                        float o20_im = twids[21 * kk * ispan + 1];
                        float o21_re = twids[22 * kk * ispan];
                        float o21_im = twids[22 * kk * ispan + 1];
                        float o22_re = twids[23 * kk * ispan];
                        float o22_im = twids[23 * kk * ispan + 1];
                        float o23_re = twids[24 * kk * ispan];
                        float o23_im = twids[24 * kk * ispan + 1];
                        float o24_re = twids[25 * kk * ispan];
                        float o24_im = twids[25 * kk * ispan + 1];
                        float o25_re = twids[26 * kk * ispan];
                        float o25_im = twids[26 * kk * ispan + 1];
                        float o26_re = twids[27 * kk * ispan];
                        float o26_im = twids[27 * kk * ispan + 1];
                        float o27_re = twids[28 * kk * ispan];
                        float o27_im = twids[28 * kk * ispan + 1];
                        float o28_re = twids[29 * kk * ispan];
                        float o28_im = twids[29 * kk * ispan + 1];
                        float o29_re = twids[30 * kk * ispan];
                        float o29_im = twids[30 * kk * ispan + 1];
                        float o30_re = twids[31 * kk * ispan];
                        float o30_im = twids[31 * kk * ispan + 1];
                        int istep = kk * 32;
                        for (int ll = 0; ll < ispan * 2; ll += 2) {
                            float t00_re = in[ispan * (istep + 0) + ll];
                            float t00_im = in[ispan * (istep + 0) + ll + 1];
                            float t01_re = o00_re * in[ispan * (istep + 2) + ll] - o00_im * in[ispan * (istep + 2) + ll + 1];
                            float t01_im = o00_re * in[ispan * (istep + 2) + ll + 1] + o00_im * in[ispan * (istep + 2) + ll];
                            float t02_re = o01_re * in[ispan * (istep + 4) + ll] - o01_im * in[ispan * (istep + 4) + ll + 1];
                            float t02_im = o01_re * in[ispan * (istep + 4) + ll + 1] + o01_im * in[ispan * (istep + 4) + ll];
                            float t03_re = o02_re * in[ispan * (istep + 6) + ll] - o02_im * in[ispan * (istep + 6) + ll + 1];
                            float t03_im = o02_re * in[ispan * (istep + 6) + ll + 1] + o02_im * in[ispan * (istep + 6) + ll];
                            float t04_re = o03_re * in[ispan * (istep + 8) + ll] - o03_im * in[ispan * (istep + 8) + ll + 1];
                            float t04_im = o03_re * in[ispan * (istep + 8) + ll + 1] + o03_im * in[ispan * (istep + 8) + ll];
                            float t05_re = o04_re * in[ispan * (istep + 10) + ll] - o04_im * in[ispan * (istep + 10) + ll + 1];
                            float t05_im = o04_re * in[ispan * (istep + 10) + ll + 1] + o04_im * in[ispan * (istep + 10) + ll];
                            float t06_re = o05_re * in[ispan * (istep + 12) + ll] - o05_im * in[ispan * (istep + 12) + ll + 1];
                            float t06_im = o05_re * in[ispan * (istep + 12) + ll + 1] + o05_im * in[ispan * (istep + 12) + ll];
                            float t07_re = o06_re * in[ispan * (istep + 14) + ll] - o06_im * in[ispan * (istep + 14) + ll + 1];
                            float t07_im = o06_re * in[ispan * (istep + 14) + ll + 1] + o06_im * in[ispan * (istep + 14) + ll];
                            float t08_re = o07_re * in[ispan * (istep + 16) + ll] - o07_im * in[ispan * (istep + 16) + ll + 1];
                            float t08_im = o07_re * in[ispan * (istep + 16) + ll + 1] + o07_im * in[ispan * (istep + 16) + ll];
                            float t09_re = o08_re * in[ispan * (istep + 18) + ll] - o08_im * in[ispan * (istep + 18) + ll + 1];
                            float t09_im = o08_re * in[ispan * (istep + 18) + ll + 1] + o08_im * in[ispan * (istep + 18) + ll];
                            float t10_re = o09_re * in[ispan * (istep + 20) + ll] - o09_im * in[ispan * (istep + 20) + ll + 1];
                            float t10_im = o09_re * in[ispan * (istep + 20) + ll + 1] + o09_im * in[ispan * (istep + 20) + ll];
                            float t11_re = o10_re * in[ispan * (istep + 22) + ll] - o10_im * in[ispan * (istep + 22) + ll + 1];
                            float t11_im = o10_re * in[ispan * (istep + 22) + ll + 1] + o10_im * in[ispan * (istep + 22) + ll];
                            float t12_re = o11_re * in[ispan * (istep + 24) + ll] - o11_im * in[ispan * (istep + 24) + ll + 1];
                            float t12_im = o11_re * in[ispan * (istep + 24) + ll + 1] + o11_im * in[ispan * (istep + 24) + ll];
                            float t13_re = o12_re * in[ispan * (istep + 26) + ll] - o12_im * in[ispan * (istep + 26) + ll + 1];
                            float t13_im = o12_re * in[ispan * (istep + 26) + ll + 1] + o12_im * in[ispan * (istep + 26) + ll];
                            float t14_re = o13_re * in[ispan * (istep + 28) + ll] - o13_im * in[ispan * (istep + 28) + ll + 1];
                            float t14_im = o13_re * in[ispan * (istep + 28) + ll + 1] + o13_im * in[ispan * (istep + 28) + ll];
                            float t15_re = o14_re * in[ispan * (istep + 30) + ll] - o14_im * in[ispan * (istep + 30) + ll + 1];
                            float t15_im = o14_re * in[ispan * (istep + 30) + ll + 1] + o14_im * in[ispan * (istep + 30) + ll];
                            float t16_re = o15_re * in[ispan * (istep + 32) + ll] - o15_im * in[ispan * (istep + 32) + ll + 1];
                            float t16_im = o15_re * in[ispan * (istep + 32) + ll + 1] + o15_im * in[ispan * (istep + 32) + ll];
                            float t17_re = o16_re * in[ispan * (istep + 34) + ll] - o16_im * in[ispan * (istep + 34) + ll + 1];
                            float t17_im = o16_re * in[ispan * (istep + 34) + ll + 1] + o16_im * in[ispan * (istep + 34) + ll];
                            float t18_re = o17_re * in[ispan * (istep + 36) + ll] - o17_im * in[ispan * (istep + 36) + ll + 1];
                            float t18_im = o17_re * in[ispan * (istep + 36) + ll + 1] + o17_im * in[ispan * (istep + 36) + ll];
                            float t19_re = o18_re * in[ispan * (istep + 38) + ll] - o18_im * in[ispan * (istep + 38) + ll + 1];
                            float t19_im = o18_re * in[ispan * (istep + 38) + ll + 1] + o18_im * in[ispan * (istep + 38) + ll];
                            float t20_re = o19_re * in[ispan * (istep + 40) + ll] - o19_im * in[ispan * (istep + 40) + ll + 1];
                            float t20_im = o19_re * in[ispan * (istep + 40) + ll + 1] + o19_im * in[ispan * (istep + 40) + ll];
                            float t21_re = o20_re * in[ispan * (istep + 42) + ll] - o20_im * in[ispan * (istep + 42) + ll + 1];
                            float t21_im = o20_re * in[ispan * (istep + 42) + ll + 1] + o20_im * in[ispan * (istep + 42) + ll];
                            float t22_re = o21_re * in[ispan * (istep + 44) + ll] - o21_im * in[ispan * (istep + 44) + ll + 1];
                            float t22_im = o21_re * in[ispan * (istep + 44) + ll + 1] + o21_im * in[ispan * (istep + 44) + ll];
                            float t23_re = o22_re * in[ispan * (istep + 46) + ll] - o22_im * in[ispan * (istep + 46) + ll + 1];
                            float t23_im = o22_re * in[ispan * (istep + 46) + ll + 1] + o22_im * in[ispan * (istep + 46) + ll];
                            float t24_re = o23_re * in[ispan * (istep + 48) + ll] - o23_im * in[ispan * (istep + 48) + ll + 1];
                            float t24_im = o23_re * in[ispan * (istep + 48) + ll + 1] + o23_im * in[ispan * (istep + 48) + ll];
                            float t25_re = o24_re * in[ispan * (istep + 50) + ll] - o24_im * in[ispan * (istep + 50) + ll + 1];
                            float t25_im = o24_re * in[ispan * (istep + 50) + ll + 1] + o24_im * in[ispan * (istep + 50) + ll];
                            float t26_re = o25_re * in[ispan * (istep + 52) + ll] - o25_im * in[ispan * (istep + 52) + ll + 1];
                            float t26_im = o25_re * in[ispan * (istep + 52) + ll + 1] + o25_im * in[ispan * (istep + 52) + ll];
                            float t27_re = o26_re * in[ispan * (istep + 54) + ll] - o26_im * in[ispan * (istep + 54) + ll + 1];
                            float t27_im = o26_re * in[ispan * (istep + 54) + ll + 1] + o26_im * in[ispan * (istep + 54) + ll];
                            float t28_re = o27_re * in[ispan * (istep + 56) + ll] - o27_im * in[ispan * (istep + 56) + ll + 1];
                            float t28_im = o27_re * in[ispan * (istep + 56) + ll + 1] + o27_im * in[ispan * (istep + 56) + ll];
                            float t29_re = o28_re * in[ispan * (istep + 58) + ll] - o28_im * in[ispan * (istep + 58) + ll + 1];
                            float t29_im = o28_re * in[ispan * (istep + 58) + ll + 1] + o28_im * in[ispan * (istep + 58) + ll];
                            float t30_re = o29_re * in[ispan * (istep + 60) + ll] - o29_im * in[ispan * (istep + 60) + ll + 1];
                            float t30_im = o29_re * in[ispan * (istep + 60) + ll + 1] + o29_im * in[ispan * (istep + 60) + ll];
                            float t31_re = o30_re * in[ispan * (istep + 62) + ll] - o30_im * in[ispan * (istep + 62) + ll + 1];
                            float t31_im = o30_re * in[ispan * (istep + 62) + ll + 1] + o30_im * in[ispan * (istep + 62) + ll];
                            float x00z_re = t00_re + t16_re;
                            float x16z_re = t00_re - t16_re;
                            float x00z_im = t00_im + t16_im;
                            float x16z_im = t00_im - t16_im;
                            float x01z_re = t01_re + t17_re;
                            float x17z_re = t01_re - t17_re;
                            float x01z_im = t01_im + t17_im;
                            float x17z_im = t01_im - t17_im;
                            float x02z_re = t02_re + t18_re;
                            float x18z_re = t02_re - t18_re;
                            float x02z_im = t02_im + t18_im;
                            float x18z_im = t02_im - t18_im;
                            float x03z_re = t03_re + t19_re;
                            float x19z_re = t03_re - t19_re;
                            float x03z_im = t03_im + t19_im;
                            float x19z_im = t03_im - t19_im;
                            float x04z_re = t04_re + t20_re;
                            float x20z_re = t04_re - t20_re;
                            float x04z_im = t04_im + t20_im;
                            float x20z_im = t04_im - t20_im;
                            float x05z_re = t05_re + t21_re;
                            float x21z_re = t05_re - t21_re;
                            float x05z_im = t05_im + t21_im;
                            float x21z_im = t05_im - t21_im;
                            float x06z_re = t06_re + t22_re;
                            float x22z_re = t06_re - t22_re;
                            float x06z_im = t06_im + t22_im;
                            float x22z_im = t06_im - t22_im;
                            float x07z_re = t07_re + t23_re;
                            float x23z_re = t07_re - t23_re;
                            float x07z_im = t07_im + t23_im;
                            float x23z_im = t07_im - t23_im;
                            float x08z_re = t08_re + t24_re;
                            float x24z_re = t08_re - t24_re;
                            float x08z_im = t08_im + t24_im;
                            float x24z_im = t08_im - t24_im;
                            float x09z_re = t09_re + t25_re;
                            float x25z_re = t09_re - t25_re;
                            float x09z_im = t09_im + t25_im;
                            float x25z_im = t09_im - t25_im;
                            float x10z_re = t10_re + t26_re;
                            float x26z_re = t10_re - t26_re;
                            float x10z_im = t10_im + t26_im;
                            float x26z_im = t10_im - t26_im;
                            float x11z_re = t11_re + t27_re;
                            float x27z_re = t11_re - t27_re;
                            float x11z_im = t11_im + t27_im;
                            float x27z_im = t11_im - t27_im;
                            float x12z_re = t12_re + t28_re;
                            float x28z_re = t12_re - t28_re;
                            float x12z_im = t12_im + t28_im;
                            float x28z_im = t12_im - t28_im;
                            float x13z_re = t13_re + t29_re;
                            float x29z_re = t13_re - t29_re;
                            float x13z_im = t13_im + t29_im;
                            float x29z_im = t13_im - t29_im;
                            float x14z_re = t14_re + t30_re;
                            float x30z_re = t14_re - t30_re;
                            float x14z_im = t14_im + t30_im;
                            float x30z_im = t14_im - t30_im;
                            float x15z_re = t15_re + t31_re;
                            float x31z_re = t15_re - t31_re;
                            float x15z_im = t15_im + t31_im;
                            float x31z_im = t15_im - t31_im;
                            float x00c_re = x00z_re + x08z_re;
                            float x08c_re = x00z_re - x08z_re;
                            float x00c_im = x00z_im + x08z_im;
                            float x08c_im = x00z_im - x08z_im;
                            float x02c_re = x02z_re + x10z_re;
                            float x10e_re = x02z_re - x10z_re;
                            float x02c_im = x02z_im + x10z_im;
                            float x10e_im = x02z_im - x10z_im;
                            float x04c_re = x04z_re + x12z_re;
                            float x12c_re = x04z_re - x12z_re;
                            float x04c_im = x04z_im + x12z_im;
                            float x12c_im = x04z_im - x12z_im;
                            float x06c_re = x06z_re + x14z_re;
                            float x14e_re = x06z_re - x14z_re;
                            float x06c_im = x06z_im + x14z_im;
                            float x14e_im = x06z_im - x14z_im;
                            float x00b_re = x00c_re + x04c_re;
                            float x04b_re = x00c_re - x04c_re;
                            float x02b_re = x02c_re + x06c_re;
                            float x06b_re = x02c_re - x06c_re;
                            float x00a_re = x00b_re + x02b_re;
                            float x02a_re = x00b_re - x02b_re;
                            float x00b_im = x00c_im + x04c_im;
                            float x04b_im = x00c_im - x04c_im;
                            float x02b_im = x02c_im + x06c_im;
                            float x06b_im = x02c_im - x06c_im;
                            float x00a_im = x00b_im + x02b_im;
                            float x02a_im = x00b_im - x02b_im;
                            float x01c_re = x01z_re + x09z_re;
                            float x09f_re = x01z_re - x09z_re;
                            float x01c_im = x01z_im + x09z_im;
                            float x09f_im = x01z_im - x09z_im;
                            float x03c_re = x03z_re + x11z_re;
                            float x11f_re = x03z_re - x11z_re;
                            float x03c_im = x03z_im + x11z_im;
                            float x11f_im = x03z_im - x11z_im;
                            float x05e_re = x05z_re + x13z_re;
                            float x13e_re = x05z_re - x13z_re;
                            float x05e_im = x05z_im + x13z_im;
                            float x13e_im = x05z_im - x13z_im;
                            float x07e_re = x07z_re + x15z_re;
                            float x15f_re = x07z_re - x15z_re;
                            float x07e_im = x07z_im + x15z_im;
                            float x15f_im = x07z_im - x15z_im;
                            float x01b_re = x01c_re + x05e_re;
                            float x05d_re = x01c_re - x05e_re;
                            float x03b_re = x03c_re + x07e_re;
                            float x07d_re = x03c_re - x07e_re;
                            float x01a_re = x01b_re + x03b_re;
                            float x03a_re = x01b_re - x03b_re;
                            float x01b_im = x01c_im + x05e_im;
                            float x05d_im = x01c_im - x05e_im;
                            float x03b_im = x03c_im + x07e_im;
                            float x07d_im = x03c_im - x07e_im;
                            float x01a_im = x01b_im + x03b_im;
                            float x03a_im = x01b_im - x03b_im;
                            out[kk * ispan + 0 * ospan + ll] = x00a_re + x01a_re;
                            out[kk * ispan + 0 * ospan + ll + 1] = x00a_im + x01a_im;
                            out[kk * ispan + 16 * ospan + ll] = x00a_re - x01a_re;
                            out[kk * ispan + 16 * ospan + ll + 1] = x00a_im - x01a_im;
                            out[kk * ispan + 8 * ospan + ll] = x02a_re - x03a_im;
                            out[kk * ispan + 8 * ospan + ll + 1] = x02a_im + x03a_re;
                            out[kk * ispan + 24 * ospan + ll] = x02a_re + x03a_im;
                            out[kk * ispan + 24 * ospan + ll + 1] = x02a_im - x03a_re;
                            float x07c_re = x05d_re + x07d_re;
                            float x05c_re = x05d_re - x07d_re;
                            float x05b_re = 0.70710677f * x05c_re;
                            float x04a_re = x04b_re + x05b_re;
                            float x05a_re = x04b_re - x05b_re;
                            float x07b_re = 0.70710677f * x07c_re;
                            float x06a_im = x06b_re + x07b_re;
                            float x07a_im = x06b_re - x07b_re;
                            float x07c_im = x05d_im + x07d_im;
                            float x05c_im = x05d_im - x07d_im;
                            float x05b_im = 0.70710677f * x05c_im;
                            float x04a_im = x04b_im + x05b_im;
                            float x05a_im = x04b_im - x05b_im;
                            float x07b_im = 0.70710677f * x07c_im;
                            float x06a_re = x06b_im + x07b_im;
                            float x07a_re = x06b_im - x07b_im;
                            out[kk * ispan + 4 * ospan + ll] = x04a_re - x06a_re;
                            out[kk * ispan + 4 * ospan + ll + 1] = x04a_im + x06a_im;
                            out[kk * ispan + 28 * ospan + ll] = x04a_re + x06a_re;
                            out[kk * ispan + 28 * ospan + ll + 1] = x04a_im - x06a_im;
                            out[kk * ispan + 12 * ospan + ll] = x05a_re + x07a_re;
                            out[kk * ispan + 12 * ospan + ll + 1] = x05a_im - x07a_im;
                            out[kk * ispan + 20 * ospan + ll] = x05a_re - x07a_re;
                            out[kk * ispan + 20 * ospan + ll + 1] = x05a_im + x07a_im;
                            float x09e_re = x09f_re - x15f_re;
                            float x15e_re = x09f_re + x15f_re;
                            float x09e_im = x09f_im - x15f_im;
                            float x15e_im = x09f_im + x15f_im;
                            float x11e_re = x11f_re - x13e_re;
                            float x13d_re = x11f_re + x13e_re - x15e_re;
                            float x15d_re = 1.4142135f * x15e_re;
                            float x13c_re = x13d_re + x15d_re;
                            float x15c_re = x13d_re - x15d_re;
                            float x11e_im = x11f_im - x13e_im;
                            float x13d_im = x11f_im + x13e_im - x15e_im;
                            float x15d_im = 1.4142135f * x15e_im;
                            float x13c_im = x13d_im + x15d_im;
                            float x15c_im = x13d_im - x15d_im;
                            float x14d_re = x10e_re + x14e_re;
                            float x10d_re = x10e_re - x14e_re;
                            float x10c_re = 0.70710677f * x10d_re;
                            float x08b_re = x08c_re + x10c_re;
                            float x10b_re = x08c_re - x10c_re;
                            float x14c_re = 0.70710677f * x14d_re;
                            float x14d_im = x10e_im + x14e_im;
                            float x10d_im = x10e_im - x14e_im;
                            float x10c_im = 0.70710677f * x10d_im;
                            float x08b_im = x08c_im + x10c_im;
                            float x10b_im = x08c_im - x10c_im;
                            float x14c_im = 0.70710677f * x14d_im;
                            float x12b_im = -x12c_re - x14c_re;
                            float x14b_im = -x12c_re + x14c_re;
                            float x12b_re = x12c_im + x14c_im;
                            float x14b_re = x12c_im - x14c_im;
                            float x09d_re = x09e_re - x11e_re;
                            float x11d_re = 1.4142135f * x11e_re;
                            float x09c_re = x09d_re + x11d_re;
                            float x11c_re = x09d_re - x11d_re;
                            float x09b_re = 0.9238795f * x09c_re;
                            float x08a_re = x08b_re + x09b_re;
                            float x09a_re = x08b_re - x09b_re;
                            float x09d_im = x09e_im - x11e_im;
                            float x11d_im = 1.4142135f * x11e_im;
                            float x09c_im = x09d_im + x11d_im;
                            float x11c_im = x09d_im - x11d_im;
                            float x09b_im = 0.9238795f * x09c_im;
                            float x08a_im = x08b_im + x09b_im;
                            float x09a_im = x08b_im - x09b_im;
                            float x13b_im = 0.9238795f * x13c_im;
                            float x12a_re = x12b_re + x13b_im;
                            float x13a_re = x12b_re - x13b_im;
                            float x13b_re = 0.9238795f * x13c_re;
                            float x12a_im = x12b_im - x13b_re;
                            float x13a_im = x12b_im + x13b_re;
                            out[kk * ispan + 2 * ospan + ll] = x08a_re - x12a_re;
                            out[kk * ispan + 2 * ospan + ll + 1] = x08a_im - x12a_im;
                            out[kk * ispan + 30 * ospan + ll] = x08a_re + x12a_re;
                            out[kk * ispan + 30 * ospan + ll + 1] = x08a_im + x12a_im;
                            out[kk * ispan + 14 * ospan + ll] = x09a_re + x13a_re;
                            out[kk * ispan + 14 * ospan + ll + 1] = x09a_im + x13a_im;
                            out[kk * ispan + 18 * ospan + ll] = x09a_re - x13a_re;
                            out[kk * ispan + 18 * ospan + ll + 1] = x09a_im - x13a_im;
                            float x11b_re = 0.38268343f * x11c_re;
                            float x10a_re = x10b_re + x11b_re;
                            float x11a_re = x10b_re - x11b_re;
                            float x11b_im = 0.38268343f * x11c_im;
                            float x10a_im = x10b_im + x11b_im;
                            float x11a_im = x10b_im - x11b_im;
                            float x15b_im = 0.38268343f * x15c_im;
                            float x14a_re = x14b_re + x15b_im;
                            float x15a_re = x14b_re - x15b_im;
                            float x15b_re = 0.38268343f * x15c_re;
                            float x14a_im = x14b_im - x15b_re;
                            float x15a_im = x14b_im + x15b_re;
                            out[kk * ispan + 6 * ospan + ll] = x10a_re + x14a_re;
                            out[kk * ispan + 6 * ospan + ll + 1] = x10a_im + x14a_im;
                            out[kk * ispan + 26 * ospan + ll] = x10a_re - x14a_re;
                            out[kk * ispan + 26 * ospan + ll + 1] = x10a_im - x14a_im;
                            out[kk * ispan + 10 * ospan + ll] = x11a_re - x15a_re;
                            out[kk * ispan + 10 * ospan + ll + 1] = x11a_im - x15a_im;
                            out[kk * ispan + 22 * ospan + ll] = x11a_re + x15a_re;
                            out[kk * ispan + 22 * ospan + ll + 1] = x11a_im + x15a_im;
                            float x17g_re = x17z_re - x31z_re;
                            float x31g_re = x17z_re + x31z_re;
                            float x17g_im = x17z_im - x31z_im;
                            float x31g_im = x17z_im + x31z_im;
                            float x21g_re = x21z_re - x27z_re;
                            float x27g_re = x21z_re + x27z_re;
                            float x21g_im = x21z_im - x27z_im;
                            float x27g_im = x21z_im + x27z_im;
                            float x23g_re = x23z_re - x25z_re;
                            float x25g_re = x23z_re + x25z_re;
                            float x23g_im = x23z_im - x25z_im;
                            float x25g_im = x23z_im + x25z_im;
                            float x17f_re = x17g_re - x23g_re;
                            float x21f_re = 1.4142135f * x21g_re;
                            float x17e_re = x17f_re + x21f_re;
                            float x21e_re = x17f_re - x21f_re;
                            float x17f_im = x17g_im - x23g_im;
                            float x21f_im = 1.4142135f * x21g_im;
                            float x17e_im = x17f_im + x21f_im;
                            float x21e_im = x17f_im - x21f_im;
                            float x18f_re = x18z_re - x30z_re;
                            float x30f_re = x18z_re + x30z_re;
                            float x18f_im = x18z_im - x30z_im;
                            float x30f_im = x18z_im + x30z_im;
                            float x22f_re = x22z_re - x26z_re;
                            float x26f_re = x22z_re + x26z_re;
                            float x22f_im = x22z_im - x26z_im;
                            float x26f_im = x22z_im + x26z_im;
                            float x18e_re = x18f_re - x22f_re;
                            float x22e_re = 1.4142135f * x22f_re;
                            float x18d_re = x18e_re + x22e_re;
                            float x22d_re = x18e_re - x22e_re;
                            float x18e_im = x18f_im - x22f_im;
                            float x22e_im = 1.4142135f * x22f_im;
                            float x18d_im = x18e_im + x22e_im;
                            float x22d_im = x18e_im - x22e_im;
                            float x19g_im = x19z_im - x29z_im;
                            float x29g_im = x19z_im + x29z_im;
                            float x19g_re = x19z_re - x29z_re;
                            float x29g_re = x19z_re + x29z_re;
                            float x19f_re = x19g_re - x21g_re;
                            float x23f_re = 1.4142135f * x23g_re;
                            float x19e_re = x19f_re + x23f_re;
                            float x23e_re = x19f_re - x23f_re;
                            float x19f_im = x19g_im - x21g_im;
                            float x23f_im = 1.4142135f * x23g_im;
                            float x19e_im = x19f_im + x23f_im;
                            float x23e_im = x19f_im - x23f_im;
                            float x17d_re = x17e_re - x19e_re;
                            float x19d_re = 1.847759f * x19e_re;
                            float x17c_re = x17d_re + x19d_re;
                            float x19c_re = x17d_re - x19d_re;
                            float x17d_im = x17e_im - x19e_im;
                            float x19d_im = 1.847759f * x19e_im;
                            float x17c_im = x17d_im + x19d_im;
                            float x19c_im = x17d_im - x19d_im;
                            float x21d_re = x21e_re - x23e_re;
                            float x23d_re = 0.76536685f * x23e_re;
                            float x21c_re = x21d_re + x23d_re;
                            float x23c_re = x21d_re - x23d_re;
                            float x21d_im = x21e_im - x23e_im;
                            float x23d_im = 0.76536685f * x23e_im;
                            float x21c_im = x21d_im + x23d_im;
                            float x23c_im = x21d_im - x23d_im;
                            float x25f_re = x25g_re - x31g_re;
                            float x29f_re = 1.4142135f * x29g_re;
                            float x25e_re = x25f_re + x29f_re;
                            float x29e_re = x25f_re - x29f_re;
                            float x25f_im = x25g_im - x31g_im;
                            float x29f_im = 1.4142135f * x29g_im;
                            float x25e_im = x25f_im + x29f_im;
                            float x29e_im = x25f_im - x29f_im;
                            float x26e_re = x26f_re - x30f_re;
                            float x30e_re = 1.4142135f * x30f_re;
                            float x26d_re = x26e_re + x30e_re;
                            float x30d_re = x26e_re - x30e_re;
                            float x26e_im = x26f_im - x30f_im;
                            float x30e_im = 1.4142135f * x30f_im;
                            float x26d_im = x26e_im + x30e_im;
                            float x30d_im = x26e_im - x30e_im;
                            float x27f_re = x27g_re - x29g_re;
                            float x31f_re = 1.4142135f * x31g_re;
                            float x27e_re = x27f_re + x31f_re;
                            float x31e_re = x27f_re - x31f_re;
                            float x27f_im = x27g_im - x29g_im;
                            float x31f_im = 1.4142135f * x31g_im;
                            float x27e_im = x27f_im + x31f_im;
                            float x31e_im = x27f_im - x31f_im;
                            float x25d_re = x25e_re - x27e_re;
                            float x27d_re = 1.847759f * x27e_re;
                            float x27c_re = x25d_re - x27d_re;
                            float x25c_re = x25d_re + x27d_re;
                            float x25d_im = x25e_im - x27e_im;
                            float x27d_im = 1.847759f * x27e_im;
                            float x27c_im = x25d_im - x27d_im;
                            float x25c_im = x25d_im + x27d_im;
                            float x29d_re = x29e_re - x31e_re;
                            float x31d_re = 0.76536685f * x31e_re;
                            float x29c_re = x29d_re + x31d_re;
                            float x31c_re = x29d_re - x31d_re;
                            float x29d_im = x29e_im - x31e_im;
                            float x31d_im = 0.76536685f * x31e_im;
                            float x29c_im = x29d_im + x31d_im;
                            float x31c_im = x29d_im - x31d_im;
                            float x20e_re = x20z_re - x28z_re;
                            float x28e_re = x20z_re + x28z_re;
                            float x20e_im = x20z_im - x28z_im;
                            float x28e_im = x20z_im + x28z_im;
                            float x28d_im = 0.70710677f * x28e_im;
                            float x24c_re = x24z_im + x28d_im;
                            float x28c_re = x24z_im - x28d_im;
                            float x26c_im = 0.9238795f * x26d_im;
                            float x24b_re = x24c_re + x26c_im;
                            float x26b_re = x24c_re - x26c_im;
                            float x25b_im = 0.98078525f * x25c_im;
                            float x24a_re = x24b_re + x25b_im;
                            float x25a_re = x24b_re - x25b_im;
                            float x28d_re = 0.70710677f * x28e_re;
                            float x24c_im = x28d_re + x24z_re;
                            float x28c_im = x28d_re - x24z_re;
                            float x26c_re = 0.9238795f * x26d_re;
                            float x24b_im = x26c_re + x24c_im;
                            float x26b_im = x26c_re - x24c_im;
                            float x25b_re = 0.98078525f * x25c_re;
                            float x24a_im = x25b_re + x24b_im;
                            float x25a_im = x25b_re - x24b_im;
                            float x20d_re = 0.70710677f * x20e_re;
                            float x20c_re = x16z_re - x20d_re;
                            float x16c_re = x16z_re + x20d_re;
                            float x18c_re = 0.9238795f * x18d_re;
                            float x16b_re = x16c_re + x18c_re;
                            float x18b_re = x16c_re - x18c_re;
                            float x17b_re = 0.98078525f * x17c_re;
                            float x16a_re = x16b_re + x17b_re;
                            float x17a_re = x16b_re - x17b_re;
                            float x20d_im = 0.70710677f * x20e_im;
                            float x20c_im = x16z_im - x20d_im;
                            float x16c_im = x16z_im + x20d_im;
                            float x18c_im = 0.9238795f * x18d_im;
                            float x16b_im = x16c_im + x18c_im;
                            float x18b_im = x16c_im - x18c_im;
                            float x17b_im = 0.98078525f * x17c_im;
                            float x16a_im = x16b_im + x17b_im;
                            float x17a_im = x16b_im - x17b_im;
                            out[kk * ispan + 1 * ospan + ll] = x16a_re - x24a_re;
                            out[kk * ispan + 1 * ospan + ll + 1] = x16a_im + x24a_im;
                            out[kk * ispan + 31 * ospan + ll] = x16a_re + x24a_re;
                            out[kk * ispan + 31 * ospan + ll + 1] = x16a_im - x24a_im;
                            out[kk * ispan + 17 * ospan + ll] = x17a_re - x25a_re;
                            out[kk * ispan + 17 * ospan + ll + 1] = x17a_im - x25a_im;
                            out[kk * ispan + 15 * ospan + ll] = x17a_re + x25a_re;
                            out[kk * ispan + 15 * ospan + ll + 1] = x17a_im + x25a_im;
                            float x27b_im = 0.19509032f * x27c_im;
                            float x26a_re = x26b_re + x27b_im;
                            float x27a_re = x26b_re - x27b_im;
                            float x27b_re = 0.19509032f * x27c_re;
                            float x26a_im = x26b_im - x27b_re;
                            float x27a_im = x26b_im + x27b_re;
                            float x19b_re = 0.19509032f * x19c_re;
                            float x18a_re = x18b_re + x19b_re;
                            float x19a_re = x18b_re - x19b_re;
                            float x19b_im = 0.19509032f * x19c_im;
                            float x18a_im = x18b_im + x19b_im;
                            float x19a_im = x18b_im - x19b_im;
                            out[kk * ispan + 7 * ospan + ll] = x18a_re + x26a_re;
                            out[kk * ispan + 7 * ospan + ll + 1] = x18a_im + x26a_im;
                            out[kk * ispan + 25 * ospan + ll] = x18a_re - x26a_re;
                            out[kk * ispan + 25 * ospan + ll + 1] = x18a_im - x26a_im;
                            out[kk * ispan + 9 * ospan + ll] = x19a_re - x27a_re;
                            out[kk * ispan + 9 * ospan + ll + 1] = x19a_im - x27a_im;
                            out[kk * ispan + 23 * ospan + ll] = x19a_re + x27a_re;
                            out[kk * ispan + 23 * ospan + ll + 1] = x19a_im + x27a_im;
                            float x30c_im = 0.38268343f * x30d_im;
                            float x28b_re = x28c_re + x30c_im;
                            float x30b_re = x28c_re - x30c_im;
                            float x29b_im = 0.8314696f * x29c_im;
                            float x28a_re = x28b_re + x29b_im;
                            float x29a_re = x28b_re - x29b_im;
                            float x30c_re = 0.38268343f * x30d_re;
                            float x28b_im = x28c_im - x30c_re;
                            float x30b_im = x28c_im + x30c_re;
                            float x29b_re = 0.8314696f * x29c_re;
                            float x28a_im = x28b_im - x29b_re;
                            float x29a_im = x28b_im + x29b_re;
                            float x22c_re = 0.38268343f * x22d_re;
                            float x20b_re = x20c_re + x22c_re;
                            float x22b_re = x20c_re - x22c_re;
                            float x21b_re = 0.8314696f * x21c_re;
                            float x20a_re = x20b_re + x21b_re;
                            float x21a_re = x20b_re - x21b_re;
                            float x22c_im = 0.38268343f * x22d_im;
                            float x20b_im = x20c_im + x22c_im;
                            float x22b_im = x20c_im - x22c_im;
                            float x21b_im = 0.8314696f * x21c_im;
                            float x20a_im = x20b_im + x21b_im;
                            float x21a_im = x20b_im - x21b_im;
                            out[kk * ispan + 3 * ospan + ll] = x20a_re + x28a_re;
                            out[kk * ispan + 3 * ospan + ll + 1] = x20a_im + x28a_im;
                            out[kk * ispan + 29 * ospan + ll] = x20a_re - x28a_re;
                            out[kk * ispan + 29 * ospan + ll + 1] = x20a_im - x28a_im;
                            out[kk * ispan + 13 * ospan + ll] = x21a_re - x29a_re;
                            out[kk * ispan + 13 * ospan + ll + 1] = x21a_im - x29a_im;
                            out[kk * ispan + 19 * ospan + ll] = x21a_re + x29a_re;
                            out[kk * ispan + 19 * ospan + ll + 1] = x21a_im + x29a_im;
                            float x23b_re = 0.55557024f * x23c_re;
                            float x22a_re = x22b_re + x23b_re;
                            float x23a_re = x22b_re - x23b_re;
                            float x23b_im = 0.55557024f * x23c_im;
                            float x22a_im = x22b_im + x23b_im;
                            float x23a_im = x22b_im - x23b_im;
                            float x31b_im = 0.55557024f * x31c_im;
                            float x30a_re = x30b_re + x31b_im;
                            float x31a_re = x30b_re - x31b_im;
                            float x31b_re = 0.55557024f * x31c_re;
                            float x31a_im = x30b_im + x31b_re;
                            float x30a_im = x30b_im - x31b_re;
                            out[kk * ispan + 5 * ospan + ll] = x22a_re - x30a_re;
                            out[kk * ispan + 5 * ospan + ll + 1] = x22a_im - x30a_im;
                            out[kk * ispan + 27 * ospan + ll] = x22a_re + x30a_re;
                            out[kk * ispan + 27 * ospan + ll + 1] = x22a_im + x30a_im;
                            out[kk * ispan + 11 * ospan + ll] = x23a_re + x31a_re;
                            out[kk * ispan + 11 * ospan + ll + 1] = x23a_im + x31a_im;
                            out[kk * ispan + 21 * ospan + ll] = x23a_re - x31a_re;
                            out[kk * ispan + 21 * ospan + ll + 1] = x23a_im - x31a_im;
                        }
                    }
                } else {
                    for (int ll = 0; ll < ispan * 2; ll += 2) {
                        float t00_re = in[0 * ispan + ll];
                        float t00_im = in[0 * ispan + ll + 1];
                        float t01_re = in[2 * ispan + ll];
                        float t01_im = in[2 * ispan + ll + 1];
                        float t02_re = in[4 * ispan + ll];
                        float t02_im = in[4 * ispan + ll + 1];
                        float t03_re = in[6 * ispan + ll];
                        float t03_im = in[6 * ispan + ll + 1];
                        float t04_re = in[8 * ispan + ll];
                        float t04_im = in[8 * ispan + ll + 1];
                        float t05_re = in[10 * ispan + ll];
                        float t05_im = in[10 * ispan + ll + 1];
                        float t06_re = in[12 * ispan + ll];
                        float t06_im = in[12 * ispan + ll + 1];
                        float t07_re = in[14 * ispan + ll];
                        float t07_im = in[14 * ispan + ll + 1];
                        float t08_re = in[16 * ispan + ll];
                        float t08_im = in[16 * ispan + ll + 1];
                        float t09_re = in[18 * ispan + ll];
                        float t09_im = in[18 * ispan + ll + 1];
                        float t10_re = in[20 * ispan + ll];
                        float t10_im = in[20 * ispan + ll + 1];
                        float t11_re = in[22 * ispan + ll];
                        float t11_im = in[22 * ispan + ll + 1];
                        float t12_re = in[24 * ispan + ll];
                        float t12_im = in[24 * ispan + ll + 1];
                        float t13_re = in[26 * ispan + ll];
                        float t13_im = in[26 * ispan + ll + 1];
                        float t14_re = in[28 * ispan + ll];
                        float t14_im = in[28 * ispan + ll + 1];
                        float t15_re = in[30 * ispan + ll];
                        float t15_im = in[30 * ispan + ll + 1];
                        float t16_re = in[32 * ispan + ll];
                        float t16_im = in[32 * ispan + ll + 1];
                        float t17_re = in[34 * ispan + ll];
                        float t17_im = in[34 * ispan + ll + 1];
                        float t18_re = in[36 * ispan + ll];
                        float t18_im = in[36 * ispan + ll + 1];
                        float t19_re = in[38 * ispan + ll];
                        float t19_im = in[38 * ispan + ll + 1];
                        float t20_re = in[40 * ispan + ll];
                        float t20_im = in[40 * ispan + ll + 1];
                        float t21_re = in[42 * ispan + ll];
                        float t21_im = in[42 * ispan + ll + 1];
                        float t22_re = in[44 * ispan + ll];
                        float t22_im = in[44 * ispan + ll + 1];
                        float t23_re = in[46 * ispan + ll];
                        float t23_im = in[46 * ispan + ll + 1];
                        float t24_re = in[48 * ispan + ll];
                        float t24_im = in[48 * ispan + ll + 1];
                        float t25_re = in[50 * ispan + ll];
                        float t25_im = in[50 * ispan + ll + 1];
                        float t26_re = in[52 * ispan + ll];
                        float t26_im = in[52 * ispan + ll + 1];
                        float t27_re = in[54 * ispan + ll];
                        float t27_im = in[54 * ispan + ll + 1];
                        float t28_re = in[56 * ispan + ll];
                        float t28_im = in[56 * ispan + ll + 1];
                        float t29_re = in[58 * ispan + ll];
                        float t29_im = in[58 * ispan + ll + 1];
                        float t30_re = in[60 * ispan + ll];
                        float t30_im = in[60 * ispan + ll + 1];
                        float t31_re = in[62 * ispan + ll];
                        float t31_im = in[62 * ispan + ll + 1];
                        float x00z_re = t00_re + t16_re;
                        float x16z_re = t00_re - t16_re;
                        float x00z_im = t00_im + t16_im;
                        float x16z_im = t00_im - t16_im;
                        float x01z_re = t01_re + t17_re;
                        float x17z_re = t01_re - t17_re;
                        float x01z_im = t01_im + t17_im;
                        float x17z_im = t01_im - t17_im;
                        float x02z_re = t02_re + t18_re;
                        float x18z_re = t02_re - t18_re;
                        float x02z_im = t02_im + t18_im;
                        float x18z_im = t02_im - t18_im;
                        float x03z_re = t03_re + t19_re;
                        float x19z_re = t03_re - t19_re;
                        float x03z_im = t03_im + t19_im;
                        float x19z_im = t03_im - t19_im;
                        float x04z_re = t04_re + t20_re;
                        float x20z_re = t04_re - t20_re;
                        float x04z_im = t04_im + t20_im;
                        float x20z_im = t04_im - t20_im;
                        float x05z_re = t05_re + t21_re;
                        float x21z_re = t05_re - t21_re;
                        float x05z_im = t05_im + t21_im;
                        float x21z_im = t05_im - t21_im;
                        float x06z_re = t06_re + t22_re;
                        float x22z_re = t06_re - t22_re;
                        float x06z_im = t06_im + t22_im;
                        float x22z_im = t06_im - t22_im;
                        float x07z_re = t07_re + t23_re;
                        float x23z_re = t07_re - t23_re;
                        float x07z_im = t07_im + t23_im;
                        float x23z_im = t07_im - t23_im;
                        float x08z_re = t08_re + t24_re;
                        float x24z_re = t08_re - t24_re;
                        float x08z_im = t08_im + t24_im;
                        float x24z_im = t08_im - t24_im;
                        float x09z_re = t09_re + t25_re;
                        float x25z_re = t09_re - t25_re;
                        float x09z_im = t09_im + t25_im;
                        float x25z_im = t09_im - t25_im;
                        float x10z_re = t10_re + t26_re;
                        float x26z_re = t10_re - t26_re;
                        float x10z_im = t10_im + t26_im;
                        float x26z_im = t10_im - t26_im;
                        float x11z_re = t11_re + t27_re;
                        float x27z_re = t11_re - t27_re;
                        float x11z_im = t11_im + t27_im;
                        float x27z_im = t11_im - t27_im;
                        float x12z_re = t12_re + t28_re;
                        float x28z_re = t12_re - t28_re;
                        float x12z_im = t12_im + t28_im;
                        float x28z_im = t12_im - t28_im;
                        float x13z_re = t13_re + t29_re;
                        float x29z_re = t13_re - t29_re;
                        float x13z_im = t13_im + t29_im;
                        float x29z_im = t13_im - t29_im;
                        float x14z_re = t14_re + t30_re;
                        float x30z_re = t14_re - t30_re;
                        float x14z_im = t14_im + t30_im;
                        float x30z_im = t14_im - t30_im;
                        float x15z_re = t15_re + t31_re;
                        float x31z_re = t15_re - t31_re;
                        float x15z_im = t15_im + t31_im;
                        float x31z_im = t15_im - t31_im;
                        float x00c_re = x00z_re + x08z_re;
                        float x08c_re = x00z_re - x08z_re;
                        float x00c_im = x00z_im + x08z_im;
                        float x08c_im = x00z_im - x08z_im;
                        float x02c_re = x02z_re + x10z_re;
                        float x10e_re = x02z_re - x10z_re;
                        float x02c_im = x02z_im + x10z_im;
                        float x10e_im = x02z_im - x10z_im;
                        float x04c_re = x04z_re + x12z_re;
                        float x12c_re = x04z_re - x12z_re;
                        float x04c_im = x04z_im + x12z_im;
                        float x12c_im = x04z_im - x12z_im;
                        float x06c_re = x06z_re + x14z_re;
                        float x14e_re = x06z_re - x14z_re;
                        float x06c_im = x06z_im + x14z_im;
                        float x14e_im = x06z_im - x14z_im;
                        float x00b_re = x00c_re + x04c_re;
                        float x04b_re = x00c_re - x04c_re;
                        float x02b_re = x02c_re + x06c_re;
                        float x06b_re = x02c_re - x06c_re;
                        float x00a_re = x00b_re + x02b_re;
                        float x02a_re = x00b_re - x02b_re;
                        float x00b_im = x00c_im + x04c_im;
                        float x04b_im = x00c_im - x04c_im;
                        float x02b_im = x02c_im + x06c_im;
                        float x06b_im = x02c_im - x06c_im;
                        float x00a_im = x00b_im + x02b_im;
                        float x02a_im = x00b_im - x02b_im;
                        float x01c_re = x01z_re + x09z_re;
                        float x09f_re = x01z_re - x09z_re;
                        float x01c_im = x01z_im + x09z_im;
                        float x09f_im = x01z_im - x09z_im;
                        float x03c_re = x03z_re + x11z_re;
                        float x11f_re = x03z_re - x11z_re;
                        float x03c_im = x03z_im + x11z_im;
                        float x11f_im = x03z_im - x11z_im;
                        float x05e_re = x05z_re + x13z_re;
                        float x13e_re = x05z_re - x13z_re;
                        float x05e_im = x05z_im + x13z_im;
                        float x13e_im = x05z_im - x13z_im;
                        float x07e_re = x07z_re + x15z_re;
                        float x15f_re = x07z_re - x15z_re;
                        float x07e_im = x07z_im + x15z_im;
                        float x15f_im = x07z_im - x15z_im;
                        float x01b_re = x01c_re + x05e_re;
                        float x05d_re = x01c_re - x05e_re;
                        float x03b_re = x03c_re + x07e_re;
                        float x07d_re = x03c_re - x07e_re;
                        float x01a_re = x01b_re + x03b_re;
                        float x03a_re = x01b_re - x03b_re;
                        float x01b_im = x01c_im + x05e_im;
                        float x05d_im = x01c_im - x05e_im;
                        float x03b_im = x03c_im + x07e_im;
                        float x07d_im = x03c_im - x07e_im;
                        float x01a_im = x01b_im + x03b_im;
                        float x03a_im = x01b_im - x03b_im;
                        out[0 * ospan + ll] = x00a_re + x01a_re;
                        out[0 * ospan + ll + 1] = x00a_im + x01a_im;
                        out[16 * ospan + ll] = x00a_re - x01a_re;
                        out[16 * ospan + ll + 1] = x00a_im - x01a_im;
                        out[8 * ospan + ll] = x02a_re - x03a_im;
                        out[8 * ospan + ll + 1] = x02a_im + x03a_re;
                        out[24 * ospan + ll] = x02a_re + x03a_im;
                        out[24 * ospan + ll + 1] = x02a_im - x03a_re;
                        float x07c_re = x05d_re + x07d_re;
                        float x05c_re = x05d_re - x07d_re;
                        float x05b_re = 0.70710677f * x05c_re;
                        float x04a_re = x04b_re + x05b_re;
                        float x05a_re = x04b_re - x05b_re;
                        float x07b_re = 0.70710677f * x07c_re;
                        float x06a_im = x06b_re + x07b_re;
                        float x07a_im = x06b_re - x07b_re;
                        float x07c_im = x05d_im + x07d_im;
                        float x05c_im = x05d_im - x07d_im;
                        float x05b_im = 0.70710677f * x05c_im;
                        float x04a_im = x04b_im + x05b_im;
                        float x05a_im = x04b_im - x05b_im;
                        float x07b_im = 0.70710677f * x07c_im;
                        float x06a_re = x06b_im + x07b_im;
                        float x07a_re = x06b_im - x07b_im;
                        out[4 * ospan + ll] = x04a_re - x06a_re;
                        out[4 * ospan + ll + 1] = x04a_im + x06a_im;
                        out[28 * ospan + ll] = x04a_re + x06a_re;
                        out[28 * ospan + ll + 1] = x04a_im - x06a_im;
                        out[12 * ospan + ll] = x05a_re + x07a_re;
                        out[12 * ospan + ll + 1] = x05a_im - x07a_im;
                        out[20 * ospan + ll] = x05a_re - x07a_re;
                        out[20 * ospan + ll + 1] = x05a_im + x07a_im;
                        float x09e_re = x09f_re - x15f_re;
                        float x15e_re = x09f_re + x15f_re;
                        float x09e_im = x09f_im - x15f_im;
                        float x15e_im = x09f_im + x15f_im;
                        float x11e_re = x11f_re - x13e_re;
                        float x13d_re = x11f_re + x13e_re - x15e_re;
                        float x15d_re = 1.4142135f * x15e_re;
                        float x13c_re = x13d_re + x15d_re;
                        float x15c_re = x13d_re - x15d_re;
                        float x11e_im = x11f_im - x13e_im;
                        float x13d_im = x11f_im + x13e_im - x15e_im;
                        float x15d_im = 1.4142135f * x15e_im;
                        float x13c_im = x13d_im + x15d_im;
                        float x15c_im = x13d_im - x15d_im;
                        float x14d_re = x10e_re + x14e_re;
                        float x10d_re = x10e_re - x14e_re;
                        float x10c_re = 0.70710677f * x10d_re;
                        float x08b_re = x08c_re + x10c_re;
                        float x10b_re = x08c_re - x10c_re;
                        float x14c_re = 0.70710677f * x14d_re;
                        float x14d_im = x10e_im + x14e_im;
                        float x10d_im = x10e_im - x14e_im;
                        float x10c_im = 0.70710677f * x10d_im;
                        float x08b_im = x08c_im + x10c_im;
                        float x10b_im = x08c_im - x10c_im;
                        float x14c_im = 0.70710677f * x14d_im;
                        float x12b_im = -x12c_re - x14c_re;
                        float x14b_im = -x12c_re + x14c_re;
                        float x12b_re = x12c_im + x14c_im;
                        float x14b_re = x12c_im - x14c_im;
                        float x09d_re = x09e_re - x11e_re;
                        float x11d_re = 1.4142135f * x11e_re;
                        float x09c_re = x09d_re + x11d_re;
                        float x11c_re = x09d_re - x11d_re;
                        float x09b_re = 0.9238795f * x09c_re;
                        float x08a_re = x08b_re + x09b_re;
                        float x09a_re = x08b_re - x09b_re;
                        float x09d_im = x09e_im - x11e_im;
                        float x11d_im = 1.4142135f * x11e_im;
                        float x09c_im = x09d_im + x11d_im;
                        float x11c_im = x09d_im - x11d_im;
                        float x09b_im = 0.9238795f * x09c_im;
                        float x08a_im = x08b_im + x09b_im;
                        float x09a_im = x08b_im - x09b_im;
                        float x13b_im = 0.9238795f * x13c_im;
                        float x12a_re = x12b_re + x13b_im;
                        float x13a_re = x12b_re - x13b_im;
                        float x13b_re = 0.9238795f * x13c_re;
                        float x12a_im = x12b_im - x13b_re;
                        float x13a_im = x12b_im + x13b_re;
                        out[2 * ospan + ll] = x08a_re - x12a_re;
                        out[2 * ospan + ll + 1] = x08a_im - x12a_im;
                        out[30 * ospan + ll] = x08a_re + x12a_re;
                        out[30 * ospan + ll + 1] = x08a_im + x12a_im;
                        out[14 * ospan + ll] = x09a_re + x13a_re;
                        out[14 * ospan + ll + 1] = x09a_im + x13a_im;
                        out[18 * ospan + ll] = x09a_re - x13a_re;
                        out[18 * ospan + ll + 1] = x09a_im - x13a_im;
                        float x11b_re = 0.38268343f * x11c_re;
                        float x10a_re = x10b_re + x11b_re;
                        float x11a_re = x10b_re - x11b_re;
                        float x11b_im = 0.38268343f * x11c_im;
                        float x10a_im = x10b_im + x11b_im;
                        float x11a_im = x10b_im - x11b_im;
                        float x15b_im = 0.38268343f * x15c_im;
                        float x14a_re = x14b_re + x15b_im;
                        float x15a_re = x14b_re - x15b_im;
                        float x15b_re = 0.38268343f * x15c_re;
                        float x14a_im = x14b_im - x15b_re;
                        float x15a_im = x14b_im + x15b_re;
                        out[6 * ospan + ll] = x10a_re + x14a_re;
                        out[6 * ospan + ll + 1] = x10a_im + x14a_im;
                        out[26 * ospan + ll] = x10a_re - x14a_re;
                        out[26 * ospan + ll + 1] = x10a_im - x14a_im;
                        out[10 * ospan + ll] = x11a_re - x15a_re;
                        out[10 * ospan + ll + 1] = x11a_im - x15a_im;
                        out[22 * ospan + ll] = x11a_re + x15a_re;
                        out[22 * ospan + ll + 1] = x11a_im + x15a_im;
                        float x17g_re = x17z_re - x31z_re;
                        float x31g_re = x17z_re + x31z_re;
                        float x17g_im = x17z_im - x31z_im;
                        float x31g_im = x17z_im + x31z_im;
                        float x21g_re = x21z_re - x27z_re;
                        float x27g_re = x21z_re + x27z_re;
                        float x21g_im = x21z_im - x27z_im;
                        float x27g_im = x21z_im + x27z_im;
                        float x23g_re = x23z_re - x25z_re;
                        float x25g_re = x23z_re + x25z_re;
                        float x23g_im = x23z_im - x25z_im;
                        float x25g_im = x23z_im + x25z_im;
                        float x17f_re = x17g_re - x23g_re;
                        float x21f_re = 1.4142135f * x21g_re;
                        float x17e_re = x17f_re + x21f_re;
                        float x21e_re = x17f_re - x21f_re;
                        float x17f_im = x17g_im - x23g_im;
                        float x21f_im = 1.4142135f * x21g_im;
                        float x17e_im = x17f_im + x21f_im;
                        float x21e_im = x17f_im - x21f_im;
                        float x18f_re = x18z_re - x30z_re;
                        float x30f_re = x18z_re + x30z_re;
                        float x18f_im = x18z_im - x30z_im;
                        float x30f_im = x18z_im + x30z_im;
                        float x22f_re = x22z_re - x26z_re;
                        float x26f_re = x22z_re + x26z_re;
                        float x22f_im = x22z_im - x26z_im;
                        float x26f_im = x22z_im + x26z_im;
                        float x18e_re = x18f_re - x22f_re;
                        float x22e_re = 1.4142135f * x22f_re;
                        float x18d_re = x18e_re + x22e_re;
                        float x22d_re = x18e_re - x22e_re;
                        float x18e_im = x18f_im - x22f_im;
                        float x22e_im = 1.4142135f * x22f_im;
                        float x18d_im = x18e_im + x22e_im;
                        float x22d_im = x18e_im - x22e_im;
                        float x19g_im = x19z_im - x29z_im;
                        float x29g_im = x19z_im + x29z_im;
                        float x19g_re = x19z_re - x29z_re;
                        float x29g_re = x19z_re + x29z_re;
                        float x19f_re = x19g_re - x21g_re;
                        float x23f_re = 1.4142135f * x23g_re;
                        float x19e_re = x19f_re + x23f_re;
                        float x23e_re = x19f_re - x23f_re;
                        float x19f_im = x19g_im - x21g_im;
                        float x23f_im = 1.4142135f * x23g_im;
                        float x19e_im = x19f_im + x23f_im;
                        float x23e_im = x19f_im - x23f_im;
                        float x17d_re = x17e_re - x19e_re;
                        float x19d_re = 1.847759f * x19e_re;
                        float x17c_re = x17d_re + x19d_re;
                        float x19c_re = x17d_re - x19d_re;
                        float x17d_im = x17e_im - x19e_im;
                        float x19d_im = 1.847759f * x19e_im;
                        float x17c_im = x17d_im + x19d_im;
                        float x19c_im = x17d_im - x19d_im;
                        float x21d_re = x21e_re - x23e_re;
                        float x23d_re = 0.76536685f * x23e_re;
                        float x21c_re = x21d_re + x23d_re;
                        float x23c_re = x21d_re - x23d_re;
                        float x21d_im = x21e_im - x23e_im;
                        float x23d_im = 0.76536685f * x23e_im;
                        float x21c_im = x21d_im + x23d_im;
                        float x23c_im = x21d_im - x23d_im;
                        float x25f_re = x25g_re - x31g_re;
                        float x29f_re = 1.4142135f * x29g_re;
                        float x25e_re = x25f_re + x29f_re;
                        float x29e_re = x25f_re - x29f_re;
                        float x25f_im = x25g_im - x31g_im;
                        float x29f_im = 1.4142135f * x29g_im;
                        float x25e_im = x25f_im + x29f_im;
                        float x29e_im = x25f_im - x29f_im;
                        float x26e_re = x26f_re - x30f_re;
                        float x30e_re = 1.4142135f * x30f_re;
                        float x26d_re = x26e_re + x30e_re;
                        float x30d_re = x26e_re - x30e_re;
                        float x26e_im = x26f_im - x30f_im;
                        float x30e_im = 1.4142135f * x30f_im;
                        float x26d_im = x26e_im + x30e_im;
                        float x30d_im = x26e_im - x30e_im;
                        float x27f_re = x27g_re - x29g_re;
                        float x31f_re = 1.4142135f * x31g_re;
                        float x27e_re = x27f_re + x31f_re;
                        float x31e_re = x27f_re - x31f_re;
                        float x27f_im = x27g_im - x29g_im;
                        float x31f_im = 1.4142135f * x31g_im;
                        float x27e_im = x27f_im + x31f_im;
                        float x31e_im = x27f_im - x31f_im;
                        float x25d_re = x25e_re - x27e_re;
                        float x27d_re = 1.847759f * x27e_re;
                        float x27c_re = x25d_re - x27d_re;
                        float x25c_re = x25d_re + x27d_re;
                        float x25d_im = x25e_im - x27e_im;
                        float x27d_im = 1.847759f * x27e_im;
                        float x27c_im = x25d_im - x27d_im;
                        float x25c_im = x25d_im + x27d_im;
                        float x29d_re = x29e_re - x31e_re;
                        float x31d_re = 0.76536685f * x31e_re;
                        float x29c_re = x29d_re + x31d_re;
                        float x31c_re = x29d_re - x31d_re;
                        float x29d_im = x29e_im - x31e_im;
                        float x31d_im = 0.76536685f * x31e_im;
                        float x29c_im = x29d_im + x31d_im;
                        float x31c_im = x29d_im - x31d_im;
                        float x20e_re = x20z_re - x28z_re;
                        float x28e_re = x20z_re + x28z_re;
                        float x20e_im = x20z_im - x28z_im;
                        float x28e_im = x20z_im + x28z_im;
                        float x28d_im = 0.70710677f * x28e_im;
                        float x24c_re = x24z_im + x28d_im;
                        float x28c_re = x24z_im - x28d_im;
                        float x26c_im = 0.9238795f * x26d_im;
                        float x24b_re = x24c_re + x26c_im;
                        float x26b_re = x24c_re - x26c_im;
                        float x25b_im = 0.98078525f * x25c_im;
                        float x24a_re = x24b_re + x25b_im;
                        float x25a_re = x24b_re - x25b_im;
                        float x28d_re = 0.70710677f * x28e_re;
                        float x24c_im = x28d_re + x24z_re;
                        float x28c_im = x28d_re - x24z_re;
                        float x26c_re = 0.9238795f * x26d_re;
                        float x24b_im = x26c_re + x24c_im;
                        float x26b_im = x26c_re - x24c_im;
                        float x25b_re = 0.98078525f * x25c_re;
                        float x24a_im = x25b_re + x24b_im;
                        float x25a_im = x25b_re - x24b_im;
                        float x20d_re = 0.70710677f * x20e_re;
                        float x20c_re = x16z_re - x20d_re;
                        float x16c_re = x16z_re + x20d_re;
                        float x18c_re = 0.9238795f * x18d_re;
                        float x16b_re = x16c_re + x18c_re;
                        float x18b_re = x16c_re - x18c_re;
                        float x17b_re = 0.98078525f * x17c_re;
                        float x16a_re = x16b_re + x17b_re;
                        float x17a_re = x16b_re - x17b_re;
                        float x20d_im = 0.70710677f * x20e_im;
                        float x20c_im = x16z_im - x20d_im;
                        float x16c_im = x16z_im + x20d_im;
                        float x18c_im = 0.9238795f * x18d_im;
                        float x16b_im = x16c_im + x18c_im;
                        float x18b_im = x16c_im - x18c_im;
                        float x17b_im = 0.98078525f * x17c_im;
                        float x16a_im = x16b_im + x17b_im;
                        float x17a_im = x16b_im - x17b_im;
                        out[1 * ospan + ll] = x16a_re - x24a_re;
                        out[1 * ospan + ll + 1] = x16a_im + x24a_im;
                        out[31 * ospan + ll] = x16a_re + x24a_re;
                        out[31 * ospan + ll + 1] = x16a_im - x24a_im;
                        out[17 * ospan + ll] = x17a_re - x25a_re;
                        out[17 * ospan + ll + 1] = x17a_im - x25a_im;
                        out[15 * ospan + ll] = x17a_re + x25a_re;
                        out[15 * ospan + ll + 1] = x17a_im + x25a_im;
                        float x27b_im = 0.19509032f * x27c_im;
                        float x26a_re = x26b_re + x27b_im;
                        float x27a_re = x26b_re - x27b_im;
                        float x27b_re = 0.19509032f * x27c_re;
                        float x26a_im = x26b_im - x27b_re;
                        float x27a_im = x26b_im + x27b_re;
                        float x19b_re = 0.19509032f * x19c_re;
                        float x18a_re = x18b_re + x19b_re;
                        float x19a_re = x18b_re - x19b_re;
                        float x19b_im = 0.19509032f * x19c_im;
                        float x18a_im = x18b_im + x19b_im;
                        float x19a_im = x18b_im - x19b_im;
                        out[7 * ospan + ll] = x18a_re + x26a_re;
                        out[7 * ospan + ll + 1] = x18a_im + x26a_im;
                        out[25 * ospan + ll] = x18a_re - x26a_re;
                        out[25 * ospan + ll + 1] = x18a_im - x26a_im;
                        out[9 * ospan + ll] = x19a_re - x27a_re;
                        out[9 * ospan + ll + 1] = x19a_im - x27a_im;
                        out[23 * ospan + ll] = x19a_re + x27a_re;
                        out[23 * ospan + ll + 1] = x19a_im + x27a_im;
                        float x30c_im = 0.38268343f * x30d_im;
                        float x28b_re = x28c_re + x30c_im;
                        float x30b_re = x28c_re - x30c_im;
                        float x29b_im = 0.8314696f * x29c_im;
                        float x28a_re = x28b_re + x29b_im;
                        float x29a_re = x28b_re - x29b_im;
                        float x30c_re = 0.38268343f * x30d_re;
                        float x28b_im = x28c_im - x30c_re;
                        float x30b_im = x28c_im + x30c_re;
                        float x29b_re = 0.8314696f * x29c_re;
                        float x28a_im = x28b_im - x29b_re;
                        float x29a_im = x28b_im + x29b_re;
                        float x22c_re = 0.38268343f * x22d_re;
                        float x20b_re = x20c_re + x22c_re;
                        float x22b_re = x20c_re - x22c_re;
                        float x21b_re = 0.8314696f * x21c_re;
                        float x20a_re = x20b_re + x21b_re;
                        float x21a_re = x20b_re - x21b_re;
                        float x22c_im = 0.38268343f * x22d_im;
                        float x20b_im = x20c_im + x22c_im;
                        float x22b_im = x20c_im - x22c_im;
                        float x21b_im = 0.8314696f * x21c_im;
                        float x20a_im = x20b_im + x21b_im;
                        float x21a_im = x20b_im - x21b_im;
                        out[3 * ospan + ll] = x20a_re + x28a_re;
                        out[3 * ospan + ll + 1] = x20a_im + x28a_im;
                        out[29 * ospan + ll] = x20a_re - x28a_re;
                        out[29 * ospan + ll + 1] = x20a_im - x28a_im;
                        out[13 * ospan + ll] = x21a_re - x29a_re;
                        out[13 * ospan + ll + 1] = x21a_im - x29a_im;
                        out[19 * ospan + ll] = x21a_re + x29a_re;
                        out[19 * ospan + ll + 1] = x21a_im + x29a_im;
                        float x23b_re = 0.55557024f * x23c_re;
                        float x22a_re = x22b_re + x23b_re;
                        float x23a_re = x22b_re - x23b_re;
                        float x23b_im = 0.55557024f * x23c_im;
                        float x22a_im = x22b_im + x23b_im;
                        float x23a_im = x22b_im - x23b_im;
                        float x31b_im = 0.55557024f * x31c_im;
                        float x30a_re = x30b_re + x31b_im;
                        float x31a_re = x30b_re - x31b_im;
                        float x31b_re = 0.55557024f * x31c_re;
                        float x31a_im = x30b_im + x31b_re;
                        float x30a_im = x30b_im - x31b_re;
                        out[5 * ospan + ll] = x22a_re - x30a_re;
                        out[5 * ospan + ll + 1] = x22a_im - x30a_im;
                        out[27 * ospan + ll] = x22a_re + x30a_re;
                        out[27 * ospan + ll + 1] = x22a_im + x30a_im;
                        out[11 * ospan + ll] = x23a_re + x31a_re;
                        out[11 * ospan + ll + 1] = x23a_im + x31a_im;
                        out[21 * ospan + ll] = x23a_re - x31a_re;
                        out[21 * ospan + ll + 1] = x23a_im - x31a_im;
                    }
                }
                break;
            }
        }
    }

    void radixpass_generic(float[] in, float[] out, float[] twids, int size, int la, int dirflag, int radixsize, float[] tt, float[] wn) {
        if (size < radixsize || la < 1) {
            return;
        }
        int ospan = 2 * size / radixsize;
        int ispan = la;
        int mm = ospan / ispan;
        switch (dirflag) {
            case 8: {
                for (int kk = 0; kk < mm; kk += 2) {
                    int istep = kk * radixsize;
                    for (int ll = 0; ll < ispan * 2; ll += 2) {
                        float wxx_im;
                        float wxx_re;
                        int irow;
                        tt[0] = in[ispan * (istep + 0) + ll];
                        tt[1] = in[ispan * (istep + 0) + ll + 1];
                        for (irow = 1; irow < radixsize; ++irow) {
                            wxx_re = twids[irow * kk * ispan];
                            wxx_im = twids[irow * kk * ispan + 1];
                            float xxx_re = in[ispan * (istep + irow * 2) + ll];
                            float xxx_im = in[ispan * (istep + irow * 2) + ll + 1];
                            tt[irow * 2] = wxx_re * xxx_re + wxx_im * xxx_im;
                            tt[irow * 2 + 1] = wxx_re * xxx_im - wxx_im * xxx_re;
                        }
                        for (irow = 0; irow < radixsize; ++irow) {
                            float yxx_re = 0.0f;
                            float yxx_im = 0.0f;
                            for (int icol = 0; icol < radixsize; ++icol) {
                                wxx_re = wn[(irow * radixsize + icol) * 2];
                                wxx_im = wn[(irow * radixsize + icol) * 2 + 1];
                                float txx_re = tt[icol * 2];
                                float txx_im = tt[icol * 2 + 1];
                                yxx_re += wxx_re * txx_re - wxx_im * txx_im;
                                yxx_im += wxx_re * txx_im + wxx_im * txx_re;
                            }
                            out[kk * ispan + irow * ospan + ll] = yxx_re;
                            out[kk * ispan + irow * ospan + ll + 1] = yxx_im;
                        }
                    }
                }
                break;
            }
            default: {
                for (int kk = 0; kk < mm; kk += 2) {
                    int istep = kk * radixsize;
                    for (int ll = 0; ll < ispan * 2; ll += 2) {
                        float wxx_im;
                        float wxx_re;
                        int irow;
                        tt[0] = in[ispan * (istep + 0) + ll];
                        tt[1] = in[ispan * (istep + 0) + ll + 1];
                        for (irow = 1; irow < radixsize; ++irow) {
                            wxx_re = twids[irow * kk * ispan];
                            wxx_im = twids[irow * kk * ispan + 1];
                            float xxx_re = in[ispan * (istep + irow * 2) + ll];
                            float xxx_im = in[ispan * (istep + irow * 2) + ll + 1];
                            tt[irow * 2] = wxx_re * xxx_re - wxx_im * xxx_im;
                            tt[irow * 2 + 1] = wxx_re * xxx_im + wxx_im * xxx_re;
                        }
                        for (irow = 0; irow < radixsize; ++irow) {
                            float yxx_re = 0.0f;
                            float yxx_im = 0.0f;
                            for (int icol = 0; icol < radixsize; ++icol) {
                                wxx_re = wn[(irow * radixsize + icol) * 2];
                                wxx_im = wn[(irow * radixsize + icol) * 2 + 1];
                                float txx_re = tt[icol * 2];
                                float txx_im = tt[icol * 2 + 1];
                                yxx_re += wxx_re * txx_re + wxx_im * txx_im;
                                yxx_im += wxx_re * txx_im - wxx_im * txx_re;
                            }
                            out[kk * ispan + irow * ospan + ll] = yxx_re;
                            out[kk * ispan + irow * ospan + ll + 1] = yxx_im;
                        }
                    }
                }
            }
        }
    }

    void fft_func_scale(float[] cin, int n, int dirflag, int typeflag, int packflag) {
    }

    void fft_pack(float[] ain, float[] twids, int size, int dirflag, int packflag) {
        if (size < 1) {
            return;
        }
        switch (dirflag) {
            case 8: {
                float tr = ain[0];
                float ti = ain[1];
                float xr = tr + ti;
                float xi = tr - ti;
                ain[0] = 2.0f * xr;
                ain[1] = 2.0f * xi;
                int ii = 2;
                int jj = size * 2 - 2;
                int kk = size * 4 - 2;
                while (ii <= size) {
                    xr = ain[ii];
                    float yr = ain[jj];
                    float xrpyr = xr + yr;
                    float xrmyr = xr - yr;
                    xi = ain[ii + 1];
                    float yi = ain[jj + 1];
                    float xipyi = xi + yi;
                    float ximyi = xi - yi;
                    float wr = twids[kk];
                    float wi = twids[kk + 1];
                    tr = wr * xipyi + wi * xrmyr;
                    ti = wi * xipyi - wr * xrmyr;
                    xr = xrpyr + tr;
                    yr = xrpyr - tr;
                    xi = ti + ximyi;
                    yi = ti - ximyi;
                    ain[ii] = xr;
                    ain[ii + 1] = xi;
                    ain[jj] = yr;
                    ain[jj + 1] = yi;
                    ii += 2;
                    jj -= 2;
                    kk -= 2;
                }
                switch (packflag) {
                    case 64: {
                        ain[size * 2] = ain[1];
                        ain[size * 2 + 1] = 0.0f;
                        ain[1] = 0.0f;
                        break;
                    }
                    case 128: {
                        ain[1] = 0.0f;
                    }
                }
                break;
            }
            case 16: {
                switch (packflag) {
                    case 64: {
                        ain[1] = ain[size * 2];
                        ain[size * 2] = 0.0f;
                        ain[size * 2 + 1] = 0.0f;
                    }
                }
                float tr = ain[0];
                float ti = ain[1];
                ain[0] = tr + ti;
                ain[1] = tr - ti;
                int ii = 2;
                int jj = size * 2 - 2;
                int kk = size * 2 + 2;
                while (ii <= size) {
                    float xr = ain[ii];
                    float yr = ain[jj];
                    float xrpyr = xr + yr;
                    float xrmyr = xr - yr;
                    float xi = ain[ii + 1];
                    float yi = ain[jj + 1];
                    float xipyi = xi + yi;
                    float ximyi = xi - yi;
                    float wr = twids[kk];
                    float wi = twids[kk + 1];
                    tr = wr * xipyi + wi * xrmyr;
                    ti = wi * xipyi - wr * xrmyr;
                    xr = xrpyr + tr;
                    yr = xrpyr - tr;
                    xi = ti + ximyi;
                    yi = ti - ximyi;
                    ain[ii] = xr;
                    ain[ii + 1] = xi;
                    ain[jj] = yr;
                    ain[jj + 1] = yi;
                    ii += 2;
                    jj -= 2;
                    kk += 2;
                }
                break;
            }
        }
    }

    protected final void fft_work(double[] data) {
        System.out.println("fft_work is not implemented for double data in Java.  Use native.");
    }
}

