/*
 * Decompiled with CFR 0.152.
 */
package uk.ac.manchester.tornado.examples.vectors;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import jdk.incubator.vector.FloatVector;
import jdk.incubator.vector.Vector;
import jdk.incubator.vector.VectorSpecies;
import uk.ac.manchester.tornado.api.ImmutableTaskGraph;
import uk.ac.manchester.tornado.api.TaskGraph;
import uk.ac.manchester.tornado.api.TornadoExecutionPlan;
import uk.ac.manchester.tornado.api.TornadoExecutionResult;
import uk.ac.manchester.tornado.api.common.TornadoDevice;
import uk.ac.manchester.tornado.api.exceptions.TornadoExecutionPlanException;
import uk.ac.manchester.tornado.api.math.TornadoMath;
import uk.ac.manchester.tornado.api.types.arrays.FloatArray;
import uk.ac.manchester.tornado.api.types.collections.VectorFloat16;
import uk.ac.manchester.tornado.api.types.collections.VectorFloat2;
import uk.ac.manchester.tornado.api.types.collections.VectorFloat4;
import uk.ac.manchester.tornado.api.types.collections.VectorFloat8;
import uk.ac.manchester.tornado.api.types.vectors.Float16;
import uk.ac.manchester.tornado.api.types.vectors.Float2;
import uk.ac.manchester.tornado.api.types.vectors.Float4;
import uk.ac.manchester.tornado.api.types.vectors.Float8;
import uk.ac.manchester.tornado.examples.utils.Utils;

public class DFTVector {
    public static final int WARMUP = 100;
    public static final int ITERATIONS = 100;

    public static void computeDFT(FloatArray inReal, FloatArray inImag, FloatArray outReal, FloatArray outImag) {
        int n = inReal.getSize();
        for (int k = 0; k < n; ++k) {
            float sumReal = 0.0f;
            float simImag = 0.0f;
            for (int t = 0; t < n; ++t) {
                float angle = 2.0f * TornadoMath.floatPI() * (float)t * (float)k / (float)n;
                sumReal += inReal.get(t) * TornadoMath.cos((float)angle) + inImag.get(t) * TornadoMath.sin((float)angle);
                simImag += -inReal.get(t) * TornadoMath.sin((float)angle) + inImag.get(t) * TornadoMath.cos((float)angle);
            }
            outReal.set(k, sumReal);
            outImag.set(k, simImag);
        }
    }

    public static void computeDFTJavaVectorAPI(float[] inreal, float[] inimag, float[] outreal, float[] outimag) {
        int n = inreal.length;
        float[][] cosAngles = new float[n][n];
        float[][] sinAnbles = new float[n][n];
        for (int i = 0; i < cosAngles.length; ++i) {
            for (int j = 0; j < cosAngles.length; ++j) {
                float angle = 2.0f * TornadoMath.floatPI() * (float)j * (float)i / (float)n;
                cosAngles[i][j] = TornadoMath.cos((float)angle);
                sinAnbles[i][j] = TornadoMath.sin((float)angle);
            }
        }
        VectorSpecies speciesPreferred = FloatVector.SPECIES_PREFERRED;
        int upperBound = FloatVector.SPECIES_PREFERRED.loopBound(inreal.length);
        int vectorWidth = FloatVector.SPECIES_PREFERRED.length();
        float[] init = new float[vectorWidth];
        Arrays.fill(init, -1.0f);
        for (int k = 0; k < upperBound; k += vectorWidth) {
            FloatVector sumReal = FloatVector.zero((VectorSpecies)speciesPreferred);
            FloatVector simImag = FloatVector.zero((VectorSpecies)speciesPreferred);
            for (int t = 0; t < upperBound; t += vectorWidth) {
                FloatVector cosAngle = FloatVector.fromArray((VectorSpecies)speciesPreferred, (float[])cosAngles[k], (int)t);
                FloatVector sinAngle = FloatVector.fromArray((VectorSpecies)speciesPreferred, (float[])sinAnbles[k], (int)t);
                FloatVector va = FloatVector.fromArray((VectorSpecies)speciesPreferred, (float[])inreal, (int)t);
                FloatVector res1 = va.mul((Vector)cosAngle);
                FloatVector vb = FloatVector.fromArray((VectorSpecies)speciesPreferred, (float[])inimag, (int)t);
                FloatVector res2 = vb.mul((Vector)sinAngle);
                FloatVector partC = res1.add((Vector)res2);
                sumReal = sumReal.add((Vector)partC);
                FloatVector initVector = FloatVector.fromArray((VectorSpecies)speciesPreferred, (float[])inimag, (int)0);
                va = FloatVector.fromArray((VectorSpecies)speciesPreferred, (float[])inreal, (int)t);
                FloatVector partAImag = va.mul((Vector)initVector).mul((Vector)sinAngle);
                FloatVector partBImag = vb.mul((Vector)cosAngle);
                FloatVector partCImag = partAImag.add((Vector)partBImag);
                simImag = simImag.add((Vector)partCImag);
            }
            sumReal.intoArray(outreal, k);
            simImag.intoArray(outimag, k);
        }
    }

    public static void computeDFTJavaVectorAPIWithStreams(float[] inreal, float[] inimag, float[] outreal, float[] outimag) {
        int n = inreal.length;
        float[][] cosAngles = new float[n][n];
        float[][] sinAnbles = new float[n][n];
        for (int i = 0; i < cosAngles.length; ++i) {
            for (int j = 0; j < cosAngles.length; ++j) {
                float angle = 2.0f * TornadoMath.floatPI() * (float)j * (float)i / (float)n;
                cosAngles[i][j] = TornadoMath.cos((float)angle);
                sinAnbles[i][j] = TornadoMath.sin((float)angle);
            }
        }
        VectorSpecies speciesPreferred = FloatVector.SPECIES_PREFERRED;
        int upperBound = FloatVector.SPECIES_PREFERRED.loopBound(inreal.length);
        int vectorWidth = FloatVector.SPECIES_PREFERRED.length();
        float[] init = new float[vectorWidth];
        Arrays.fill(init, -1.0f);
        ArrayList<Integer> iterationSpace = new ArrayList<Integer>();
        for (int k2 = 0; k2 < upperBound; k2 += vectorWidth) {
            iterationSpace.add(k2);
        }
        ((Stream)iterationSpace.stream().parallel()).forEach(k -> {
            FloatVector sumReal = FloatVector.zero((VectorSpecies)speciesPreferred);
            FloatVector simImag = FloatVector.zero((VectorSpecies)speciesPreferred);
            for (int t = 0; t < upperBound; t += vectorWidth) {
                FloatVector cosAngle = FloatVector.fromArray((VectorSpecies)speciesPreferred, (float[])cosAngles[k], (int)t);
                FloatVector sinAngle = FloatVector.fromArray((VectorSpecies)speciesPreferred, (float[])sinAnbles[k], (int)t);
                FloatVector va = FloatVector.fromArray((VectorSpecies)speciesPreferred, (float[])inreal, (int)t);
                FloatVector res1 = va.mul((Vector)cosAngle);
                FloatVector vb = FloatVector.fromArray((VectorSpecies)speciesPreferred, (float[])inimag, (int)t);
                FloatVector res2 = vb.mul((Vector)sinAngle);
                FloatVector partC = res1.add((Vector)res2);
                sumReal = sumReal.add((Vector)partC);
                FloatVector initVector = FloatVector.fromArray((VectorSpecies)speciesPreferred, (float[])inimag, (int)0);
                va = FloatVector.fromArray((VectorSpecies)speciesPreferred, (float[])inreal, (int)t);
                FloatVector partAImag = va.mul((Vector)initVector).mul((Vector)sinAngle);
                FloatVector partBImag = vb.mul((Vector)cosAngle);
                FloatVector partCImag = partAImag.add((Vector)partBImag);
                simImag = simImag.add((Vector)partCImag);
            }
            sumReal.intoArray(outreal, k.intValue());
            simImag.intoArray(outimag, k.intValue());
        });
    }

    public static void computeDFTVector2(VectorFloat2 inreal, VectorFloat2 inimag, VectorFloat2 outreal, VectorFloat2 outimag) {
        int n = inreal.getLength();
        for (int k = 0; k < n; ++k) {
            Float2 sumReal = new Float2(0.0f, 0.0f);
            Float2 simImag = new Float2(0.0f, 0.0f);
            float base = 2.0f * TornadoMath.floatPI() * (float)k / (float)n;
            for (int t = 0; t < n; ++t) {
                int tt = t * 2;
                float angle0 = base * (float)tt;
                float angle1 = base * (float)(tt + 1);
                Float2 angleVector = new Float2(angle0, angle1);
                Float2 cosAngleVector = TornadoMath.cos((Float2)angleVector);
                Float2 sinAngleVector = TornadoMath.sin((Float2)angleVector);
                Float2 partA = Float2.mult((Float2)inreal.get(t), (Float2)cosAngleVector);
                Float2 partB = Float2.mult((Float2)inimag.get(t), (Float2)sinAngleVector);
                Float2 partC = Float2.add((Float2)partA, (Float2)partB);
                sumReal = Float2.add((Float2)sumReal, (Float2)partC);
                Float2 neg = Float2.mult((Float2)inreal.get(t), (Float2)new Float2(-1.0f, -1.0f));
                Float2 partAImag = Float2.mult((Float2)neg, (Float2)sinAngleVector);
                Float2 partBImag = Float2.mult((Float2)inimag.get(t), (Float2)cosAngleVector);
                Float2 partCImag = Float2.add((Float2)partAImag, (Float2)partBImag);
                simImag = Float2.add((Float2)simImag, (Float2)partCImag);
            }
            outreal.set(k, sumReal);
            outimag.set(k, simImag);
        }
    }

    public static void computeDFTVector4(VectorFloat4 inreal, VectorFloat4 inimag, VectorFloat4 outreal, VectorFloat4 outimag) {
        int n = inreal.getLength();
        for (int k = 0; k < n; ++k) {
            Float4 sumReal = new Float4(0.0f, 0.0f, 0.0f, 0.0f);
            Float4 simImag = new Float4(0.0f, 0.0f, 0.0f, 0.0f);
            float base = 2.0f * TornadoMath.floatPI() * (float)k / (float)n;
            for (int t = 0; t < n; ++t) {
                int tt = t * 4;
                float angle0 = base * (float)tt;
                float angle1 = base * (float)(tt + 1);
                float angle2 = base * (float)(tt + 2);
                float angle3 = base * (float)(tt + 3);
                Float4 angleVector = new Float4(angle0, angle1, angle2, angle3);
                Float4 cosAngleVector = TornadoMath.cos((Float4)angleVector);
                Float4 sinAngleVector = TornadoMath.sin((Float4)angleVector);
                Float4 partA = Float4.mult((Float4)inreal.get(t), (Float4)cosAngleVector);
                Float4 partB = Float4.mult((Float4)inimag.get(t), (Float4)sinAngleVector);
                Float4 partC = Float4.add((Float4)partA, (Float4)partB);
                sumReal = Float4.add((Float4)sumReal, (Float4)partC);
                Float4 neg = Float4.mult((Float4)inreal.get(t), (Float4)new Float4(-1.0f, -1.0f, -1.0f, -1.0f));
                Float4 partAImag = Float4.mult((Float4)neg, (Float4)sinAngleVector);
                Float4 partBImag = Float4.mult((Float4)inimag.get(t), (Float4)cosAngleVector);
                Float4 partCImag = Float4.add((Float4)partAImag, (Float4)partBImag);
                simImag = Float4.add((Float4)simImag, (Float4)partCImag);
            }
            outreal.set(k, sumReal);
            outimag.set(k, simImag);
        }
    }

    public static void computeDFTVector8(VectorFloat8 inreal, VectorFloat8 inimag, VectorFloat8 outreal, VectorFloat8 outimag) {
        int n = inreal.getLength();
        for (int k = 0; k < n; ++k) {
            Float8 sumReal = new Float8(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
            Float8 simImag = new Float8(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
            float base = 2.0f * TornadoMath.floatPI() * (float)k / (float)n;
            for (int t = 0; t < n; ++t) {
                int tt = t * 8;
                float angle0 = base * (float)tt;
                float angle1 = base * (float)(tt + 1);
                float angle2 = base * (float)(tt + 2);
                float angle3 = base * (float)(tt + 3);
                float angle4 = base * (float)(tt + 4);
                float angle5 = base * (float)(tt + 5);
                float angle6 = base * (float)(tt + 6);
                float angle7 = base * (float)(tt + 7);
                Float8 angleVector = new Float8(angle0, angle1, angle2, angle3, angle4, angle5, angle6, angle7);
                Float8 cosAngleVector = TornadoMath.cos((Float8)angleVector);
                Float8 sinAngleVector = TornadoMath.sin((Float8)angleVector);
                Float8 partA = Float8.mult((Float8)inreal.get(t), (Float8)cosAngleVector);
                Float8 partB = Float8.mult((Float8)inimag.get(t), (Float8)sinAngleVector);
                Float8 partC = Float8.add((Float8)partA, (Float8)partB);
                sumReal = Float8.add((Float8)sumReal, (Float8)partC);
                Float8 neg = Float8.mult((Float8)inreal.get(t), (Float8)new Float8(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
                Float8 partAImag = Float8.mult((Float8)neg, (Float8)sinAngleVector);
                Float8 partBImag = Float8.mult((Float8)inimag.get(t), (Float8)cosAngleVector);
                Float8 partCImag = Float8.add((Float8)partAImag, (Float8)partBImag);
                simImag = Float8.add((Float8)simImag, (Float8)partCImag);
            }
            outreal.set(k, sumReal);
            outimag.set(k, simImag);
        }
    }

    public static void computeDFTVector16(VectorFloat16 inreal, VectorFloat16 inimag, VectorFloat16 outreal, VectorFloat16 outimag) {
        int n = inreal.getLength();
        for (int k = 0; k < n; ++k) {
            Float16 sumReal = new Float16(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
            Float16 sumImag = new Float16(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
            float base = 2.0f * TornadoMath.floatPI() * (float)k / (float)n;
            for (int t = 0; t < n; ++t) {
                int tt = t * 16;
                float angle0 = base * (float)tt;
                float angle1 = base * (float)(tt + 1);
                float angle2 = base * (float)(tt + 2);
                float angle3 = base * (float)(tt + 3);
                float angle4 = base * (float)(tt + 4);
                float angle5 = base * (float)(tt + 5);
                float angle6 = base * (float)(tt + 6);
                float angle7 = base * (float)(tt + 7);
                float angle8 = base * (float)(tt + 8);
                float angle9 = base * (float)(tt + 9);
                float angle10 = base * (float)(tt + 10);
                float angle11 = base * (float)(tt + 11);
                float angle12 = base * (float)(tt + 12);
                float angle13 = base * (float)(tt + 13);
                float angle14 = base * (float)(tt + 14);
                float angle15 = base * (float)(tt + 15);
                Float16 angleVector = new Float16(angle0, angle1, angle2, angle3, angle4, angle5, angle6, angle7, angle8, angle9, angle10, angle11, angle12, angle13, angle14, angle15);
                Float16 cosAngleVector = TornadoMath.cos((Float16)angleVector);
                Float16 sinAngleVector = TornadoMath.sin((Float16)angleVector);
                Float16 partA = Float16.mult((Float16)inreal.get(t), (Float16)cosAngleVector);
                Float16 partB = Float16.mult((Float16)inimag.get(t), (Float16)sinAngleVector);
                Float16 partC = Float16.add((Float16)partA, (Float16)partB);
                sumReal = Float16.add((Float16)sumReal, (Float16)partC);
                Float16 neg = Float16.mult((Float16)inreal.get(t), (Float16)new Float16(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
                Float16 partAImag = Float16.mult((Float16)neg, (Float16)sinAngleVector);
                Float16 partBImag = Float16.mult((Float16)inimag.get(t), (Float16)cosAngleVector);
                Float16 partCImag = Float16.add((Float16)partAImag, (Float16)partBImag);
                sumImag = Float16.add((Float16)sumImag, (Float16)partCImag);
            }
            outreal.set(k, sumReal);
            outimag.set(k, sumImag);
        }
    }

    private static void runWithVectorTypes4(int size, TornadoDevice device) {
        VectorFloat4 inReal = new VectorFloat4(size);
        VectorFloat4 inImag = new VectorFloat4(size);
        VectorFloat4 outReal = new VectorFloat4(size);
        VectorFloat4 outImag = new VectorFloat4(size);
        for (int i = 0; i < size; ++i) {
            float valA = 1.0f / (float)(i + 2);
            float valB = 1.0f / (float)(i + 2);
            inReal.set(i, new Float4(valA, valA, valA, valA));
            inImag.set(i, new Float4(valB, valB, valB, valB));
        }
        TaskGraph taskGraph = new TaskGraph("compute").transferToDevice(0, new Object[]{inReal, inImag}).task("withVectors4", DFTVector::computeDFTVector4, (Object)inReal, (Object)inImag, (Object)outReal, (Object)outImag).transferToHost(1, new Object[]{outReal, outImag});
        ImmutableTaskGraph immutableTaskGraph = taskGraph.snapshot();
        try (TornadoExecutionPlan executionPlan = new TornadoExecutionPlan(new ImmutableTaskGraph[]{immutableTaskGraph});){
            executionPlan.withDevice(device).withPreCompilation();
            for (int i = 0; i < 100; ++i) {
                executionPlan.execute();
            }
            ArrayList<Long> kernelTimers = new ArrayList<Long>();
            ArrayList<Long> totalTimers = new ArrayList<Long>();
            for (int i = 0; i < 100; ++i) {
                TornadoExecutionResult executionResult = executionPlan.execute();
                kernelTimers.add(executionResult.getProfilerResult().getDeviceKernelTime());
                totalTimers.add(executionResult.getProfilerResult().getTotalTime());
            }
            long[] kernelTimersLong = kernelTimers.stream().mapToLong(Long::longValue).toArray();
            long[] totalTimersLong = totalTimers.stream().mapToLong(Long::longValue).toArray();
            System.out.println("Stats KernelTime");
            Utils.computeStatistics(kernelTimersLong);
            System.out.println("Stats TotalTime");
            Utils.computeStatistics(totalTimersLong);
        }
        catch (TornadoExecutionPlanException e) {
            e.printStackTrace();
        }
    }

    private static void runWithVectorTypes2(int size, TornadoDevice device) {
        VectorFloat2 inReal = new VectorFloat2(size *= 2);
        VectorFloat2 inImag = new VectorFloat2(size);
        VectorFloat2 outReal = new VectorFloat2(size);
        VectorFloat2 outImag = new VectorFloat2(size);
        for (int i = 0; i < size; ++i) {
            float valA = 1.0f / (float)(i + 2);
            float valB = 1.0f / (float)(i + 2);
            inReal.set(i, new Float2(valA, valA));
            inImag.set(i, new Float2(valB, valB));
        }
        TaskGraph taskGraph = new TaskGraph("compute").transferToDevice(0, new Object[]{inReal, inImag}).task("withVectors2", DFTVector::computeDFTVector2, (Object)inReal, (Object)inImag, (Object)outReal, (Object)outImag).transferToHost(1, new Object[]{outReal, outImag});
        ImmutableTaskGraph immutableTaskGraph = taskGraph.snapshot();
        try (TornadoExecutionPlan executionPlan = new TornadoExecutionPlan(new ImmutableTaskGraph[]{immutableTaskGraph});){
            executionPlan.withDevice(device).withPreCompilation();
            for (int i = 0; i < 100; ++i) {
                executionPlan.execute();
            }
            ArrayList<Long> kernelTimers = new ArrayList<Long>();
            ArrayList<Long> totalTimers = new ArrayList<Long>();
            for (int i = 0; i < 100; ++i) {
                TornadoExecutionResult executionResult = executionPlan.execute();
                kernelTimers.add(executionResult.getProfilerResult().getDeviceKernelTime());
                totalTimers.add(executionResult.getProfilerResult().getTotalTime());
            }
            executionPlan.freeDeviceMemory();
            long[] kernelTimersLong = kernelTimers.stream().mapToLong(Long::longValue).toArray();
            long[] totalTimersLong = totalTimers.stream().mapToLong(Long::longValue).toArray();
            System.out.println("Stats KernelTime");
            Utils.computeStatistics(kernelTimersLong);
            System.out.println("Stats TotalTime");
            Utils.computeStatistics(totalTimersLong);
        }
        catch (TornadoExecutionPlanException e) {
            e.printStackTrace();
        }
    }

    private static void runWithVectorTypes8(int size, TornadoDevice device) {
        VectorFloat8 inReal = new VectorFloat8(size /= 2);
        VectorFloat8 inImag = new VectorFloat8(size);
        VectorFloat8 outReal = new VectorFloat8(size);
        VectorFloat8 outImag = new VectorFloat8(size);
        for (int i = 0; i < size; ++i) {
            float valA = 1.0f / (float)(i + 2);
            float valB = 1.0f / (float)(i + 2);
            inReal.set(i, new Float8(valA, valA, valA, valA, valA, valA, valA, valA));
            inImag.set(i, new Float8(valB, valB, valB, valB, valB, valB, valB, valB));
        }
        TaskGraph taskGraph = new TaskGraph("compute").transferToDevice(0, new Object[]{inReal, inImag}).task("withVectors8", DFTVector::computeDFTVector8, (Object)inReal, (Object)inImag, (Object)outReal, (Object)outImag).transferToHost(1, new Object[]{outReal, outImag});
        ImmutableTaskGraph immutableTaskGraph = taskGraph.snapshot();
        try (TornadoExecutionPlan executionPlan = new TornadoExecutionPlan(new ImmutableTaskGraph[]{immutableTaskGraph});){
            executionPlan.withDevice(device).withPreCompilation();
            for (int i = 0; i < 100; ++i) {
                executionPlan.execute();
            }
            ArrayList<Long> kernelTimers = new ArrayList<Long>();
            ArrayList<Long> totalTimers = new ArrayList<Long>();
            for (int i = 0; i < 100; ++i) {
                TornadoExecutionResult executionResult = executionPlan.execute();
                kernelTimers.add(executionResult.getProfilerResult().getDeviceKernelTime());
                totalTimers.add(executionResult.getProfilerResult().getTotalTime());
            }
            executionPlan.freeDeviceMemory();
            long[] kernelTimersLong = kernelTimers.stream().mapToLong(Long::longValue).toArray();
            long[] totalTimersLong = totalTimers.stream().mapToLong(Long::longValue).toArray();
            System.out.println("Stats KernelTime");
            Utils.computeStatistics(kernelTimersLong);
            System.out.println("Stats TotalTime");
            Utils.computeStatistics(totalTimersLong);
        }
        catch (TornadoExecutionPlanException tornadoExecutionPlanException) {
            // empty catch block
        }
    }

    private static void runWithVectorTypes16(int size, TornadoDevice device) {
        VectorFloat16 inReal = new VectorFloat16(size /= 2);
        VectorFloat16 inImag = new VectorFloat16(size);
        VectorFloat16 outReal = new VectorFloat16(size);
        VectorFloat16 outImag = new VectorFloat16(size);
        for (int i = 0; i < size; ++i) {
            float valA = 1.0f / (float)(i + 2);
            float valB = 1.0f / (float)(i + 2);
            inReal.set(i, new Float16(valA, valA, valA, valA, valA, valA, valA, valA, valA, valA, valA, valA, valA, valA, valA, valA));
            inImag.set(i, new Float16(valB, valB, valB, valB, valB, valB, valB, valB, valB, valB, valB, valB, valB, valB, valB, valB));
        }
        TaskGraph taskGraph = new TaskGraph("compute").transferToDevice(0, new Object[]{inReal, inImag}).task("withVectors16", DFTVector::computeDFTVector16, (Object)inReal, (Object)inImag, (Object)outReal, (Object)outImag).transferToHost(1, new Object[]{outReal, outImag});
        ImmutableTaskGraph immutableTaskGraph = taskGraph.snapshot();
        try (TornadoExecutionPlan executionPlan = new TornadoExecutionPlan(new ImmutableTaskGraph[]{immutableTaskGraph});){
            executionPlan.withDevice(device).withPreCompilation();
            for (int i = 0; i < 100; ++i) {
                executionPlan.execute();
            }
            ArrayList<Long> kernelTimers = new ArrayList<Long>();
            ArrayList<Long> totalTimers = new ArrayList<Long>();
            for (int i = 0; i < 100; ++i) {
                TornadoExecutionResult executionResult = executionPlan.execute();
                kernelTimers.add(executionResult.getProfilerResult().getDeviceKernelTime());
                totalTimers.add(executionResult.getProfilerResult().getTotalTime());
            }
            executionPlan.freeDeviceMemory();
            long[] kernelTimersLong = kernelTimers.stream().mapToLong(Long::longValue).toArray();
            long[] totalTimersLong = totalTimers.stream().mapToLong(Long::longValue).toArray();
            System.out.println("Stats KernelTime");
            Utils.computeStatistics(kernelTimersLong);
            System.out.println("Stats TotalTime");
            Utils.computeStatistics(totalTimersLong);
        }
        catch (TornadoExecutionPlanException tornadoExecutionPlanException) {
            // empty catch block
        }
    }

    private static void computeWithStreams(int size, FloatArray inreal, FloatArray inimag, FloatArray outreal, FloatArray outimag) {
        int n = inreal.getSize();
        IntStream.range(0, size).parallel().forEach(k -> {
            float sumReal = 0.0f;
            float simImag = 0.0f;
            for (int t = 0; t < n; ++t) {
                float angle = 2.0f * TornadoMath.floatPI() * (float)t * (float)k / (float)n;
                sumReal += inreal.get(t) * TornadoMath.cos((float)angle) + inimag.get(t) * TornadoMath.sin((float)angle);
                simImag += -inreal.get(t) * TornadoMath.sin((float)angle) + inimag.get(t) * TornadoMath.cos((float)angle);
            }
            outreal.set(k, sumReal);
            outimag.set(k, simImag);
        });
    }

    private static void runWithJavaStreams(int size) {
        int i;
        FloatArray inReal = new FloatArray(size *= 4);
        FloatArray inImag = new FloatArray(size);
        FloatArray outReal = new FloatArray(size);
        FloatArray outImag = new FloatArray(size);
        for (i = 0; i < size; ++i) {
            inReal.set(i, 1.0f / (float)(i + 2));
            inImag.set(i, 1.0f / (float)(i + 2));
        }
        for (i = 0; i < 100; ++i) {
            DFTVector.computeWithStreams(size, inReal, inImag, outReal, outImag);
        }
        ArrayList<Long> kernelTimersVectors = new ArrayList<Long>();
        for (int i2 = 0; i2 < 100; ++i2) {
            long start = System.nanoTime();
            DFTVector.computeWithStreams(size, inReal, inImag, outReal, outImag);
            long end = System.nanoTime();
            kernelTimersVectors.add(end - start);
        }
        long[] kernelTimersVectorsLong = kernelTimersVectors.stream().mapToLong(Long::longValue).toArray();
        System.out.println("Stats");
        Utils.computeStatistics(kernelTimersVectorsLong);
    }

    private static void runWithoutVectorTypes(int size, TornadoDevice device) {
        FloatArray inReal = new FloatArray(size *= 4);
        FloatArray inImag = new FloatArray(size);
        FloatArray outReal = new FloatArray(size);
        FloatArray outImag = new FloatArray(size);
        for (int i = 0; i < size; ++i) {
            inReal.set(i, 1.0f / (float)(i + 2));
            inImag.set(i, 1.0f / (float)(i + 2));
        }
        TaskGraph taskGraph = new TaskGraph("s0").transferToDevice(0, new Object[]{inReal, inImag}).task("t0", DFTVector::computeDFT, (Object)inReal, (Object)inImag, (Object)outReal, (Object)outImag).transferToHost(1, new Object[]{outReal, outImag});
        ImmutableTaskGraph immutableTaskGraph = taskGraph.snapshot();
        try (TornadoExecutionPlan executionPlan = new TornadoExecutionPlan(new ImmutableTaskGraph[]{immutableTaskGraph});){
            executionPlan.withPreCompilation().withDevice(device);
            for (int i = 0; i < 100; ++i) {
                executionPlan.execute();
            }
            ArrayList<Long> kernelTimers = new ArrayList<Long>();
            ArrayList<Long> totalTimers = new ArrayList<Long>();
            for (int i = 0; i < 100; ++i) {
                TornadoExecutionResult executionResult = executionPlan.execute();
                kernelTimers.add(executionResult.getProfilerResult().getDeviceKernelTime());
                totalTimers.add(executionResult.getProfilerResult().getTotalTime());
            }
            executionPlan.freeDeviceMemory();
            long[] kernelTimersLong = kernelTimers.stream().mapToLong(Long::longValue).toArray();
            long[] totalTimersLong = totalTimers.stream().mapToLong(Long::longValue).toArray();
            System.out.println("Stats KernelTime");
            Utils.computeStatistics(kernelTimersLong);
            System.out.println("Stats TotalTime");
            Utils.computeStatistics(totalTimersLong);
        }
        catch (TornadoExecutionPlanException e) {
            e.printStackTrace();
        }
    }

    private static void runWithJavaVectorAPI(int size) {
        int i;
        float[] inReal = new float[size *= 4];
        float[] inImag = new float[size];
        float[] outReal = new float[size];
        float[] outImag = new float[size];
        for (i = 0; i < size; ++i) {
            inReal[i] = 1.0f / (float)(i + 2);
            inImag[i] = 1.0f / (float)(i + 2);
        }
        for (i = 0; i < 100; ++i) {
            DFTVector.computeDFTJavaVectorAPI(inReal, inImag, outReal, outImag);
        }
        ArrayList<Long> kernelTimersVectors = new ArrayList<Long>();
        for (int i2 = 0; i2 < 100; ++i2) {
            long start = System.nanoTime();
            DFTVector.computeDFTJavaVectorAPI(inReal, inImag, outReal, outImag);
            long end = System.nanoTime();
            kernelTimersVectors.add(end - start);
        }
        long[] kernelTimersVectorsLong = kernelTimersVectors.stream().mapToLong(Long::longValue).toArray();
        System.out.println("Stats");
        Utils.computeStatistics(kernelTimersVectorsLong);
    }

    private static void runWithJavaVectorAPIStreamAPI(int size) {
        int i;
        float[] inReal = new float[size *= 4];
        float[] inImag = new float[size];
        float[] outReal = new float[size];
        float[] outImag = new float[size];
        for (i = 0; i < size; ++i) {
            inReal[i] = 1.0f / (float)(i + 2);
            inImag[i] = 1.0f / (float)(i + 2);
        }
        for (i = 0; i < 100; ++i) {
            DFTVector.computeDFTJavaVectorAPIWithStreams(inReal, inImag, outReal, outImag);
        }
        ArrayList<Long> kernelTimersVectors = new ArrayList<Long>();
        for (int i2 = 0; i2 < 100; ++i2) {
            long start = System.nanoTime();
            DFTVector.computeDFTJavaVectorAPIWithStreams(inReal, inImag, outReal, outImag);
            long end = System.nanoTime();
            kernelTimersVectors.add(end - start);
        }
        long[] kernelTimersVectorsLong = kernelTimersVectors.stream().mapToLong(Long::longValue).toArray();
        System.out.println("Stats");
        Utils.computeStatistics(kernelTimersVectorsLong);
    }

    public static void main(String[] args) {
        String version = "vector4";
        if (args.length > 0) {
            try {
                version = args[0];
            }
            catch (NumberFormatException e) {
                e.printStackTrace();
            }
        }
        int size = 8192;
        if (args.length > 1) {
            try {
                size = Integer.parseInt(args[1]);
            }
            catch (NumberFormatException numberFormatException) {
                // empty catch block
            }
        }
        TornadoDevice device = TornadoExecutionPlan.getDevice((int)0, (int)0);
        if (version.startsWith("vector4")) {
            DFTVector.runWithVectorTypes4(size, device);
        } else if (version.startsWith("vector2")) {
            DFTVector.runWithVectorTypes2(size, device);
        } else if (version.startsWith("vector8")) {
            DFTVector.runWithVectorTypes8(size, device);
        } else if (version.startsWith("vector16")) {
            DFTVector.runWithVectorTypes16(size, device);
        } else if (version.startsWith("stream")) {
            DFTVector.runWithJavaStreams(size);
        } else if (version.startsWith("plain")) {
            DFTVector.runWithoutVectorTypes(size, device);
        } else if (version.startsWith("javaVector")) {
            DFTVector.runWithJavaVectorAPI(size);
        } else if (version.startsWith("javaStreamsVector")) {
            DFTVector.runWithJavaVectorAPIStreamAPI(size);
        } else {
            throw new RuntimeException("Option not found");
        }
    }
}

