/*
 * Decompiled with CFR 0.152.
 */
package uk.ac.manchester.tornado.examples.matrices;

import java.util.Arrays;
import java.util.Random;
import uk.ac.manchester.tornado.api.ImmutableTaskGraph;
import uk.ac.manchester.tornado.api.TaskGraph;
import uk.ac.manchester.tornado.api.TornadoBackend;
import uk.ac.manchester.tornado.api.TornadoExecutionPlan;
import uk.ac.manchester.tornado.api.common.TornadoDevice;
import uk.ac.manchester.tornado.api.runtime.TornadoRuntimeProvider;
import uk.ac.manchester.tornado.api.types.matrix.Matrix2DFloat;

public class MatrixMul2D {
    private static final int WARMING_UP_ITERATIONS = 20;
    private static final int TIMING_ITERATIONS = 50;
    private static final boolean CHECK_RESULT = false;
    private static final float DELTA = 0.01f;

    private static void matrixMultiplication(Matrix2DFloat A, Matrix2DFloat B, Matrix2DFloat C, int size) {
        for (int i = 0; i < size; ++i) {
            for (int j = 0; j < size; ++j) {
                float sum = 0.0f;
                for (int k = 0; k < size; ++k) {
                    sum += A.get(i, k) * B.get(k, j);
                }
                C.set(i, j, sum);
            }
        }
    }

    private static void printMatrices(int size, Matrix2DFloat matrixCCUDA, Matrix2DFloat matrixCOCL) {
        int j;
        int i;
        System.out.println("CUDA:");
        for (i = 0; i < size; ++i) {
            for (j = 0; j < size; ++j) {
                System.out.print(" | " + matrixCCUDA.get(i, j));
            }
            System.out.println(" |");
        }
        System.out.println("OPENCL:");
        for (i = 0; i < size; ++i) {
            for (j = 0; j < size; ++j) {
                System.out.print(" | " + matrixCOCL.get(i, j));
            }
            System.out.println(" |");
        }
    }

    public static void main(String[] args) {
        int i;
        int i2;
        long stop;
        int size = 512;
        if (args.length >= 1) {
            try {
                size = Integer.parseInt(args[0]);
            }
            catch (NumberFormatException nfe) {
                size = 512;
            }
        }
        Matrix2DFloat matrixA = new Matrix2DFloat(size, size);
        Matrix2DFloat matrixB = new Matrix2DFloat(size, size);
        Matrix2DFloat matrixCCUDA = new Matrix2DFloat(size, size);
        Matrix2DFloat matrixCOCL = new Matrix2DFloat(size, size);
        Matrix2DFloat matrixCSeq = new Matrix2DFloat(size, size);
        Random r = new Random();
        for (int i3 = 0; i3 < size; ++i3) {
            for (int j = 0; j < size; ++j) {
                matrixA.set(i3, j, r.nextFloat());
                matrixB.set(i3, j, r.nextFloat());
            }
        }
        TaskGraph cudaTaskGraph = new TaskGraph("cuda_s0").transferToDevice(0, new Object[]{matrixA, matrixB}).task("t0", MatrixMul2D::matrixMultiplication, (Object)matrixA, (Object)matrixB, (Object)matrixCCUDA, (Object)size).transferToHost(1, new Object[]{matrixCCUDA});
        ImmutableTaskGraph immutableTaskGraph = cudaTaskGraph.snapshot();
        TornadoExecutionPlan executorCUDA = new TornadoExecutionPlan(new ImmutableTaskGraph[]{immutableTaskGraph});
        TornadoBackend cudaDriver = TornadoRuntimeProvider.getTornadoRuntime().getBackend(0);
        TornadoDevice cudaDevice = cudaDriver.getDevice(0);
        executorCUDA.withDevice(cudaDevice);
        for (int i4 = 0; i4 < 20; ++i4) {
            executorCUDA.execute();
        }
        long[] execTimesCUDA = new long[50];
        for (int i5 = 0; i5 < 50; ++i5) {
            long start = System.currentTimeMillis();
            executorCUDA.execute();
            stop = System.currentTimeMillis();
            execTimesCUDA[i5] = stop - start;
        }
        TaskGraph oclTaskGraph = new TaskGraph("ocl_s0").transferToDevice(0, new Object[]{matrixA, matrixB}).task("t0", MatrixMul2D::matrixMultiplication, (Object)matrixA, (Object)matrixB, (Object)matrixCOCL, (Object)size).transferToHost(1, new Object[]{matrixCOCL});
        ImmutableTaskGraph immutableTaskGraph1 = oclTaskGraph.snapshot();
        TornadoExecutionPlan executorOCL = new TornadoExecutionPlan(new ImmutableTaskGraph[]{immutableTaskGraph1});
        TornadoBackend oclDriver = TornadoRuntimeProvider.getTornadoRuntime().getBackend(1);
        TornadoDevice oclDevice = null;
        for (i2 = 0; i2 < oclDriver.getNumDevices(); ++i2) {
            TornadoDevice device = oclDriver.getDevice(i2);
            if (!device.getPhysicalDevice().getDeviceName().equalsIgnoreCase(cudaDevice.getPhysicalDevice().getDeviceName())) continue;
            oclDevice = device;
        }
        if (oclDevice == null) {
            System.err.println("There is no device with both OpenCL and CUDA-PTX support");
            System.exit(1);
        }
        executorOCL.withDevice(oclDevice);
        for (i2 = 0; i2 < 20; ++i2) {
            executorOCL.execute();
        }
        long[] execTimesOCL = new long[50];
        for (i = 0; i < 50; ++i) {
            long start = System.currentTimeMillis();
            executorOCL.execute();
            stop = System.currentTimeMillis();
            execTimesOCL[i] = stop - start;
        }
        for (i = 0; i < 20; ++i) {
            MatrixMul2D.matrixMultiplication(matrixA, matrixB, matrixCSeq, size);
        }
        long[] execTimesSequential = new long[50];
        for (int i6 = 0; i6 < 50; ++i6) {
            long start = System.currentTimeMillis();
            MatrixMul2D.matrixMultiplication(matrixA, matrixB, matrixCSeq, size);
            stop = System.currentTimeMillis();
            execTimesSequential[i6] = stop - start;
        }
        double msecCUDAElapsedTime = Arrays.stream(execTimesCUDA).average().orElse(Double.NaN);
        double msecOCLElapsedTime = Arrays.stream(execTimesOCL).average().orElse(Double.NaN);
        double msecSeqElapsedTime = Arrays.stream(execTimesSequential).average().orElse(Double.NaN);
        boolean correctResult = true;
        if (size < 5) {
            MatrixMul2D.printMatrices(size, matrixCCUDA, matrixCOCL);
        }
        double flops = 2.0 * Math.pow(size, 3.0);
        double CUDAGigaFlops = 1.0E-9 * flops / (msecCUDAElapsedTime / 1000.0);
        double OpenCLGigaFlops = 1.0E-9 * flops / (msecOCLElapsedTime / 1000.0);
        double CUDAspeedup = msecSeqElapsedTime / msecCUDAElapsedTime;
        double OpenCLspeedup = msecSeqElapsedTime / msecOCLElapsedTime;
        String formatCUDAFGlops = String.format("%.2f", CUDAGigaFlops);
        String formatOpenCLFGlops = String.format("%.2f", OpenCLGigaFlops);
        System.out.println("\tOpenCL Execution: " + formatOpenCLFGlops + " GFlops, Total time = " + msecOCLElapsedTime + " ms");
        System.out.println("\tPTX Execution: " + formatCUDAFGlops + " GFlops, Total Time = " + msecCUDAElapsedTime + " ms");
        System.out.println("\tOpenCL Speedup: " + OpenCLspeedup + "x");
        System.out.println("\tPTX Speedup: " + CUDAspeedup + "x");
        System.out.println();
    }
}

