/*
 * Decompiled with CFR 0.152.
 */
package uk.ac.manchester.tornado.runtime.interpreter;

import java.util.Arrays;
import java.util.BitSet;
import java.util.HashMap;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.atomic.AtomicInteger;
import uk.ac.manchester.tornado.api.GridScheduler;
import uk.ac.manchester.tornado.api.KernelContext;
import uk.ac.manchester.tornado.api.WorkerGrid;
import uk.ac.manchester.tornado.api.common.Access;
import uk.ac.manchester.tornado.api.common.Event;
import uk.ac.manchester.tornado.api.common.SchedulableTask;
import uk.ac.manchester.tornado.api.common.TornadoDevice;
import uk.ac.manchester.tornado.api.common.TornadoEvents;
import uk.ac.manchester.tornado.api.enums.TornadoExecutionStatus;
import uk.ac.manchester.tornado.api.enums.TornadoVMBackendType;
import uk.ac.manchester.tornado.api.exceptions.TornadoBailoutRuntimeException;
import uk.ac.manchester.tornado.api.exceptions.TornadoDeviceFP64NotSupported;
import uk.ac.manchester.tornado.api.exceptions.TornadoFailureException;
import uk.ac.manchester.tornado.api.exceptions.TornadoInternalError;
import uk.ac.manchester.tornado.api.exceptions.TornadoMemoryException;
import uk.ac.manchester.tornado.api.exceptions.TornadoRuntimeException;
import uk.ac.manchester.tornado.api.memory.DeviceBufferState;
import uk.ac.manchester.tornado.api.memory.XPUBuffer;
import uk.ac.manchester.tornado.api.profiler.ProfilerType;
import uk.ac.manchester.tornado.api.profiler.TornadoProfiler;
import uk.ac.manchester.tornado.api.runtime.TaskContextInterface;
import uk.ac.manchester.tornado.runtime.EmptyEvent;
import uk.ac.manchester.tornado.runtime.common.BatchConfiguration;
import uk.ac.manchester.tornado.runtime.common.KernelStackFrame;
import uk.ac.manchester.tornado.runtime.common.RuntimeUtilities;
import uk.ac.manchester.tornado.runtime.common.TornadoInstalledCode;
import uk.ac.manchester.tornado.runtime.common.TornadoLogger;
import uk.ac.manchester.tornado.runtime.common.TornadoOptions;
import uk.ac.manchester.tornado.runtime.common.TornadoXPUDevice;
import uk.ac.manchester.tornado.runtime.common.XPUDeviceBufferState;
import uk.ac.manchester.tornado.runtime.graph.TornadoExecutionContext;
import uk.ac.manchester.tornado.runtime.graph.TornadoVMBytecodeResult;
import uk.ac.manchester.tornado.runtime.graph.TornadoVMBytecodes;
import uk.ac.manchester.tornado.runtime.interpreter.DebugInterpreter;
import uk.ac.manchester.tornado.runtime.interpreter.InterpreterUtilities;
import uk.ac.manchester.tornado.runtime.profiler.TimeProfiler;
import uk.ac.manchester.tornado.runtime.tasks.DataObjectState;
import uk.ac.manchester.tornado.runtime.tasks.PrebuiltTask;
import uk.ac.manchester.tornado.runtime.tasks.meta.TaskDataContext;

public class TornadoVMInterpreter {
    private static final Event EMPTY_EVENT = new EmptyEvent();
    private static final int MAX_EVENTS = TornadoOptions.MAX_EVENTS;
    private final boolean useDependencies;
    private final HashMap<Object, Access> objectAccesses;
    private final List<Object> objects;
    private final DataObjectState[] dataObjectStates;
    private final KernelStackFrame[] kernelStackFrame;
    private final int[][] events;
    private final int[] eventsIndexes;
    private final TornadoXPUDevice interpreterDevice;
    private final TornadoInstalledCode[] installedCodes;
    private final List<Object> constants;
    private final List<SchedulableTask> taskExecutionContexts;
    private final List<SchedulableTask> localTaskList;
    private final TornadoExecutionContext graphExecutionContext;
    private final TornadoVMBytecodeResult bytecodeResult;
    private TornadoProfiler timeProfiler;
    private double totalTime;
    private long invocations;
    private boolean finishedWarmup;
    private GridScheduler gridScheduler;
    private HashMap<Object, Integer> currentBatchNumberPerObject = new HashMap();
    private HashMap<Object, Integer> totalEvenBatchesPerObject = new HashMap();
    private TornadoLogger logger = new TornadoLogger(this.getClass());

    public TornadoVMInterpreter(TornadoExecutionContext graphExecutionContext, TornadoVMBytecodeResult bytecodeResult, TornadoProfiler timeProfiler, TornadoXPUDevice device) {
        this.graphExecutionContext = graphExecutionContext;
        this.timeProfiler = timeProfiler;
        this.bytecodeResult = bytecodeResult;
        assert (device != null);
        this.interpreterDevice = device;
        this.useDependencies = TornadoOptions.VM_USE_DEPS;
        this.totalTime = 0.0;
        this.invocations = 0L;
        this.logger.debug("init an instance of a TornadoVM interpreter...");
        this.bytecodeResult.getLong();
        this.kernelStackFrame = graphExecutionContext.getKernelStackFrame();
        this.events = new int[this.bytecodeResult.getInt()][MAX_EVENTS];
        this.eventsIndexes = new int[this.events.length];
        this.localTaskList = graphExecutionContext.getTasksForDevice(this.interpreterDevice.getDeviceContext());
        this.installedCodes = new TornadoInstalledCode[this.localTaskList.size()];
        for (int i = 0; i < this.events.length; ++i) {
            Arrays.fill(this.events[i], -1);
            this.eventsIndexes[i] = 0;
        }
        this.logger.debug("created %d kernelStackFrame", this.kernelStackFrame.length);
        this.logger.debug("created %d event lists", this.events.length);
        this.objectAccesses = graphExecutionContext.getObjectsAccesses();
        this.objects = graphExecutionContext.getObjects();
        this.initBatchDataStructures(graphExecutionContext);
        this.dataObjectStates = new DataObjectState[this.objects.size()];
        this.fetchGlobalStates();
        this.rewindBufferToBegin();
        this.constants = graphExecutionContext.getConstants();
        this.taskExecutionContexts = graphExecutionContext.getTasks();
        this.logger.debug("interpreter for device %s is ready to go", device.toString());
        this.bytecodeResult.mark();
    }

    private void initBatchDataStructures(TornadoExecutionContext context) {
        long batchSize = context.getBatchSize();
        if (batchSize != -1L) {
            BatchConfiguration batchConfiguration = BatchConfiguration.computeChunkSizes(context, batchSize);
            int totalChunks = batchConfiguration.getTotalChunks();
            for (Object object : this.objects) {
                this.totalEvenBatchesPerObject.put(object, totalChunks);
                this.currentBatchNumberPerObject.put(object, 0);
            }
        }
    }

    public void setTimeProfiler(TornadoProfiler tornadoProfiler) {
        this.timeProfiler = tornadoProfiler;
    }

    public void fetchGlobalStates() {
        for (int i = 0; i < this.objects.size(); ++i) {
            Object object = this.objects.get(i);
            Access access = this.objectAccesses.get(object);
            TornadoInternalError.guarantee((object != null ? 1 : 0) != 0, (String)"null object found in TornadoVM", (Object[])new Object[0]);
            this.dataObjectStates[i] = this.graphExecutionContext.getLocalStateObject(object, access).getDataObjectState();
        }
    }

    private void rewindBufferToBegin() {
        byte op = this.bytecodeResult.get();
        while (op != TornadoVMBytecodes.BEGIN.value()) {
            TornadoInternalError.guarantee((op == TornadoVMBytecodes.CONTEXT.value() ? 1 : 0) != 0, (String)"invalid code: 0x%x", (Object[])new Object[]{op});
            int deviceIndex = this.bytecodeResult.getInt();
            assert (deviceIndex == this.interpreterDevice.getDeviceContext().getDeviceIndex());
            this.logger.debug("loading context %s", this.interpreterDevice.toString());
            long t0 = System.nanoTime();
            this.interpreterDevice.ensureLoaded(this.graphExecutionContext.getExecutionPlanId());
            long t1 = System.nanoTime();
            this.logger.debug("loaded in %.9f s", (double)(t1 - t0) * 1.0E-9);
            op = this.bytecodeResult.get();
        }
    }

    public void setGridScheduler(GridScheduler gridScheduler) {
        this.gridScheduler = gridScheduler;
    }

    public void printTimes() {
        System.out.printf("bc: complete %d iterations - %.9f s mean and %.9f s total%n", this.invocations, this.totalTime / (double)this.invocations, this.totalTime);
    }

    public void clearProfiles() {
        for (SchedulableTask task : this.taskExecutionContexts) {
            task.meta().getProfiles(this.graphExecutionContext.getExecutionPlanId()).clear();
        }
    }

    public void dumpEvents() {
        if (!TornadoOptions.TORNADO_PROFILER) {
            this.logger.info("profiling and/or event dumping is not enabled");
            return;
        }
        this.interpreterDevice.dumpEvents(this.graphExecutionContext.getExecutionPlanId());
    }

    private void dumpEventProfiled(TornadoEvents eventSet, TaskDataContext meta) {
        BitSet profiles = eventSet.getProfiles();
        int i = profiles.nextSetBit(0);
        while (i != -1) {
            TornadoDevice tornadoDevice = eventSet.getDevice();
            if (tornadoDevice instanceof TornadoXPUDevice) {
                TornadoXPUDevice device = (TornadoXPUDevice)tornadoDevice;
                Event profile = device.resolveEvent(this.graphExecutionContext.getExecutionPlanId(), i);
                if (profile.getStatus() == TornadoExecutionStatus.COMPLETE) {
                    System.out.printf("task: %s %s %9d %9d %9d %9d %9d%n", device.getDeviceName(), meta.getId(), profile.getElapsedTime(), profile.getQueuedTime(), profile.getSubmitTime(), profile.getStartTime(), profile.getEndTime());
                }
            } else {
                throw new TornadoRuntimeException("TornadoDevice not found");
            }
            i = profiles.nextSetBit(i + 1);
        }
    }

    public void dumpProfiles() {
        for (SchedulableTask task : this.taskExecutionContexts) {
            TaskDataContext meta = (TaskDataContext)task.meta();
            meta.getProfiles(this.graphExecutionContext.getExecutionPlanId()).forEach(eventSet -> this.dumpEventProfiled((TornadoEvents)eventSet, meta));
        }
    }

    public void withPreCompilation() {
        this.execute(true);
        this.finishedWarmup = true;
    }

    private boolean isMemoryLimitEnabled() {
        return this.graphExecutionContext.isMemoryLimited();
    }

    private Event execute(boolean isWarmup) {
        isWarmup = isWarmup || TornadoOptions.VIRTUAL_DEVICE_ENABLED;
        this.interpreterDevice.enableThreadSharing();
        if (this.isMemoryLimitEnabled() && this.graphExecutionContext.doesExceedExecutionPlanLimit()) {
            throw new TornadoMemoryException("OutofMemoryException due to executionPlan.withMemoryLimit of " + this.graphExecutionContext.getExecutionPlanMemoryLimit());
        }
        long t0 = System.nanoTime();
        int lastEvent = -1;
        this.initWaitEventList();
        StringBuilder logBuilder = null;
        if (TornadoOptions.LOG_BYTECODES() && !isWarmup) {
            logBuilder = new StringBuilder();
            logBuilder.append(InterpreterUtilities.debugHighLightHelper("Interpreter instance running bytecodes for: ")).append(this.interpreterDevice).append(InterpreterUtilities.debugHighLightHelper(" Running in thread: ")).append(Thread.currentThread().getName()).append("\n");
        }
        while (this.bytecodeResult.hasRemaining()) {
            byte op = this.bytecodeResult.get();
            if (op == TornadoVMBytecodes.ALLOC.value()) {
                long sizeBatch = this.bytecodeResult.getLong();
                int argSize = this.bytecodeResult.getInt();
                int[] args = new int[argSize];
                for (int i = 0; i < argSize; ++i) {
                    args[i] = this.bytecodeResult.getInt();
                }
                if (isWarmup) continue;
                lastEvent = this.executeAlloc(logBuilder, args, sizeBatch);
                continue;
            }
            if (op == TornadoVMBytecodes.DEALLOC.value()) {
                int objectIndex = this.bytecodeResult.getInt();
                if (isWarmup) continue;
                lastEvent = this.executeDeAlloc(logBuilder, objectIndex);
                continue;
            }
            if (op == TornadoVMBytecodes.TRANSFER_HOST_TO_DEVICE_ONCE.value()) {
                int[] waitList;
                int objectIndex = this.bytecodeResult.getInt();
                int eventId = this.bytecodeResult.getInt();
                long offset = this.bytecodeResult.getLong();
                long sizeBatch = this.bytecodeResult.getLong();
                int[] nArray = waitList = this.useDependencies && eventId != -1 ? this.events[eventId] : null;
                if (isWarmup) continue;
                this.transferHostToDeviceOnce(logBuilder, objectIndex, offset, eventId, sizeBatch, waitList);
                continue;
            }
            if (op == TornadoVMBytecodes.TRANSFER_HOST_TO_DEVICE_ALWAYS.value()) {
                int[] waitList;
                int objectIndex = this.bytecodeResult.getInt();
                int eventId = this.bytecodeResult.getInt();
                long offset = this.bytecodeResult.getLong();
                long sizeBatch = this.bytecodeResult.getLong();
                int[] nArray = waitList = this.useDependencies && eventId != -1 ? this.events[eventId] : null;
                if (isWarmup) continue;
                this.transferHostToDeviceAlways(logBuilder, objectIndex, offset, eventId, sizeBatch, waitList);
                continue;
            }
            if (op == TornadoVMBytecodes.TRANSFER_DEVICE_TO_HOST_ALWAYS.value()) {
                int[] waitList;
                int objectIndex = this.bytecodeResult.getInt();
                int eventId = this.bytecodeResult.getInt();
                long offset = this.bytecodeResult.getLong();
                long sizeBatch = this.bytecodeResult.getLong();
                int[] nArray = waitList = this.useDependencies ? this.events[eventId] : null;
                if (isWarmup) continue;
                lastEvent = this.transferDeviceToHost(logBuilder, objectIndex, offset, eventId, sizeBatch, waitList);
                continue;
            }
            if (op == TornadoVMBytecodes.TRANSFER_DEVICE_TO_HOST_ALWAYS_BLOCKING.value()) {
                int[] waitList;
                int objectIndex = this.bytecodeResult.getInt();
                int eventId = this.bytecodeResult.getInt();
                long offset = this.bytecodeResult.getLong();
                long sizeBatch = this.bytecodeResult.getLong();
                int[] nArray = waitList = this.useDependencies ? this.events[eventId] : null;
                if (isWarmup) continue;
                this.transferDeviceToHostBlocking(logBuilder, objectIndex, offset, eventId, sizeBatch, waitList);
                continue;
            }
            if (op == TornadoVMBytecodes.LAUNCH.value()) {
                int callWrapperIndex = this.bytecodeResult.getInt();
                int taskIndex = this.bytecodeResult.getInt();
                int numArgs = this.bytecodeResult.getInt();
                int eventId = this.bytecodeResult.getInt();
                long offset = this.bytecodeResult.getLong();
                long batchThreads = this.bytecodeResult.getLong();
                XPUExecutionFrame executionFrame = this.compileTaskFromBytecodeToBinary(callWrapperIndex, numArgs, eventId, taskIndex, batchThreads);
                if (isWarmup) {
                    this.popArgumentsFromCall(numArgs);
                    continue;
                }
                lastEvent = this.executeLaunch(logBuilder, numArgs, eventId, taskIndex, batchThreads, offset, executionFrame);
                continue;
            }
            if (op == TornadoVMBytecodes.ADD_DEPENDENCY.value()) {
                int eventList = this.bytecodeResult.getInt();
                if (isWarmup) continue;
                this.executeDependency(logBuilder, lastEvent, eventList);
                continue;
            }
            if (op == TornadoVMBytecodes.ON_DEVICE.value()) {
                int objectIndex = this.bytecodeResult.getInt();
                int eventId = this.bytecodeResult.getInt();
                if (isWarmup) continue;
                lastEvent = this.executeOnDevice(logBuilder, objectIndex, eventId);
                continue;
            }
            if (op == TornadoVMBytecodes.PERSIST.value()) {
                int objectIndex = this.bytecodeResult.getInt();
                int eventId = this.bytecodeResult.getInt();
                if (isWarmup) continue;
                lastEvent = this.executePersist(logBuilder, objectIndex, eventId);
                continue;
            }
            if (op == TornadoVMBytecodes.BARRIER.value()) {
                int[] waitList;
                int eventId = this.bytecodeResult.getInt();
                int[] nArray = waitList = this.useDependencies && eventId != -1 ? this.events[eventId] : null;
                if (isWarmup) continue;
                lastEvent = this.executeBarrier(logBuilder, eventId, waitList);
                continue;
            }
            if (op == TornadoVMBytecodes.END.value()) {
                if (isWarmup || !TornadoOptions.LOG_BYTECODES()) break;
                logBuilder.append("bc: ").append(InterpreterUtilities.debugHighLightBC("END\n")).append("\n");
                break;
            }
            this.throwErrorInterpreter(op);
        }
        Event barrier = EMPTY_EVENT;
        if (!isWarmup) {
            if (this.useDependencies) {
                int event = this.interpreterDevice.enqueueMarker(this.graphExecutionContext.getExecutionPlanId());
                barrier = this.interpreterDevice.resolveEvent(this.graphExecutionContext.getExecutionPlanId(), event);
            }
            if (TornadoOptions.USE_VM_FLUSH) {
                this.interpreterDevice.flush(this.graphExecutionContext.getExecutionPlanId());
            }
        }
        long t1 = System.nanoTime();
        double elapsed = (double)(t1 - t0) * 1.0E-9;
        if (!isWarmup) {
            this.totalTime += elapsed;
            ++this.invocations;
        }
        if (this.graphExecutionContext.meta().isDebug()) {
            this.logger.debug("bc: complete elapsed=%.9f s (%d iterations, %.9f s mean)", elapsed, this.invocations, this.totalTime / (double)this.invocations);
        }
        this.bytecodeResult.reset();
        if (TornadoOptions.PRINT_BYTECODES) {
            System.out.println(logBuilder);
        }
        if (!TornadoOptions.DUMP_BYTECODES.isBlank()) {
            RuntimeUtilities.writeBytecodeToFile(logBuilder);
        }
        return barrier;
    }

    private void initWaitEventList() {
        for (int[] waitList : this.events) {
            Arrays.fill(waitList, -1);
        }
    }

    private boolean isPersistentObject(Object object) {
        if (this.graphExecutionContext == null || object == null) {
            return false;
        }
        return this.graphExecutionContext.getPersistedTaskToObjectsMap().values().stream().filter(Objects::nonNull).anyMatch(taskObjects -> taskObjects.contains(object));
    }

    private ObjectAllocationInfo countAndClassifyObjects(int[] args) {
        int persistentObjectsInArgs = 0;
        for (int arg : args) {
            Object dataObject = this.objects.get(arg);
            if (!this.isPersistentObject(dataObject)) continue;
            ++persistentObjectsInArgs;
        }
        int objectsToAlloc = args.length - persistentObjectsInArgs;
        return new ObjectAllocationInfo(persistentObjectsInArgs, objectsToAlloc);
    }

    private int executeAlloc(StringBuilder logBuilder, int[] args, long sizeBatch) {
        ObjectAllocationInfo allocationInfo = this.countAndClassifyObjects(args);
        Object[] objects = new Object[allocationInfo.objectsToAlloc];
        Access[] accesses = new Access[allocationInfo.objectsToAlloc];
        DeviceBufferState[] objectStates = new XPUDeviceBufferState[allocationInfo.objectsToAlloc];
        int allocCounter = 0;
        long preAllocatedSizes = 0L;
        for (int arg : args) {
            Object dataObject = this.objects.get(arg);
            if (!this.isPersistentObject(dataObject)) {
                objects[allocCounter] = this.objects.get(arg);
                objectStates[allocCounter] = this.resolveObjectState(arg);
                accesses[allocCounter] = this.objectAccesses.get(objects[allocCounter]);
                ++allocCounter;
                continue;
            }
            XPUDeviceBufferState state = this.resolveObjectState(arg);
            preAllocatedSizes += state.getXPUBuffer().size();
        }
        long allocationSize = this.interpreterDevice.allocateObjects(objects, sizeBatch, objectStates, accesses);
        long allocationsTotalSize = allocationSize + preAllocatedSizes;
        this.increaseBatchNumber(sizeBatch);
        if (TornadoOptions.LOG_BYTECODES()) {
            int objIndex = 0;
            DeviceBufferState[] deviceBufferStateArray = objectStates;
            int n = deviceBufferStateArray.length;
            for (int i = 0; i < n; ++i) {
                DeviceBufferState state = deviceBufferStateArray[i];
                long size = state.getXPUBuffer().size();
                if (!state.isBufferReused()) {
                    DebugInterpreter.logAllocObject(objects[objIndex], this.interpreterDevice, size, sizeBatch, logBuilder);
                }
                ++objIndex;
            }
        }
        this.graphExecutionContext.setCurrentDeviceMemoryUsage(allocationsTotalSize);
        if (TornadoOptions.isProfilerEnabled() && allocationSize > 0L) {
            for (DeviceBufferState objectState : objectStates) {
                this.timeProfiler.addValueToMetric(ProfilerType.ALLOCATION_BYTES, TimeProfiler.NO_TASK_NAME, objectState.getXPUBuffer().size());
            }
        }
        return -1;
    }

    private void increaseBatchNumber(long sizeBatch) {
        if (sizeBatch != 0L) {
            for (Object object : this.objects) {
                int previousBatch = this.currentBatchNumberPerObject.get(object);
                this.currentBatchNumberPerObject.replace(object, previousBatch, ++previousBatch);
            }
        }
    }

    private int executeDeAlloc(StringBuilder tornadoVMBytecodeList, int objectIndex) {
        int totalNumberOfBatches;
        int currentBatchNumber;
        Object object = this.objects.get(objectIndex);
        if (!this.currentBatchNumberPerObject.isEmpty() && !this.currentBatchNumberPerObject.isEmpty() && (currentBatchNumber = this.currentBatchNumberPerObject.get(object).intValue()) < (totalNumberOfBatches = this.totalEvenBatchesPerObject.get(object).intValue())) {
            return -1;
        }
        XPUDeviceBufferState objectState = this.resolveObjectState(objectIndex);
        long spaceDeallocated = this.interpreterDevice.deallocate(objectState);
        if (TornadoOptions.LOG_BYTECODES() && this.isNotObjectAtomic(object)) {
            boolean materializeDealloc = spaceDeallocated != 0L;
            DebugInterpreter.logDeallocObject(object, this.interpreterDevice, tornadoVMBytecodeList, materializeDealloc);
        }
        this.graphExecutionContext.setCurrentDeviceMemoryUsage(this.graphExecutionContext.getCurrentDeviceMemoryUsage() - spaceDeallocated);
        return -1;
    }

    private int executeOnDevice(StringBuilder logBuilder, int objectIndex, int eventId) {
        Object object = this.objects.get(objectIndex);
        if (TornadoOptions.LOG_BYTECODES()) {
            DebugInterpreter.logOnDeviceObject(object, this.interpreterDevice, logBuilder);
        }
        this.resetEventIndexes(eventId);
        return -1;
    }

    private int executePersist(StringBuilder logBuilder, int objectIndex, int eventId) {
        Object object = this.objects.get(objectIndex);
        if (TornadoOptions.PRINT_BYTECODES) {
            DebugInterpreter.logPersistedObject(object, this.interpreterDevice, logBuilder);
        }
        this.resetEventIndexes(eventId);
        return -1;
    }

    private void transferHostToDeviceOnce(StringBuilder logBuilder, int objectIndex, long offset, int eventId, long sizeBatch, int[] eventWaitList) {
        Object object = this.objects.get(objectIndex);
        if (this.isObjectKernelContext(object)) {
            return;
        }
        XPUDeviceBufferState objectState = this.resolveObjectState(objectIndex);
        List allEvents = sizeBatch > 0L ? this.interpreterDevice.streamIn(this.graphExecutionContext.getExecutionPlanId(), object, sizeBatch, offset, objectState, eventWaitList) : this.interpreterDevice.ensurePresent(this.graphExecutionContext.getExecutionPlanId(), object, objectState, eventWaitList, sizeBatch, offset);
        this.resetEventIndexes(eventId);
        if (TornadoOptions.LOG_BYTECODES() && this.isNotObjectAtomic(object)) {
            long sizeObject = objectState.getXPUBuffer().size();
            DebugInterpreter.logTransferToDeviceOnce(allEvents, object, this.interpreterDevice, sizeObject, sizeBatch, offset, eventId, logBuilder);
        }
        if (TornadoOptions.isProfilerEnabled() && allEvents != null) {
            for (Integer e : allEvents) {
                Event event = this.interpreterDevice.resolveEvent(this.graphExecutionContext.getExecutionPlanId(), e);
                event.waitForEvents(this.graphExecutionContext.getExecutionPlanId());
                long copyInTimer = this.timeProfiler.getTimer(ProfilerType.COPY_IN_TIME);
                this.timeProfiler.setTimer(ProfilerType.COPY_IN_TIME, copyInTimer += event.getElapsedTime());
                this.timeProfiler.addValueToMetric(ProfilerType.TOTAL_COPY_IN_SIZE_BYTES, TimeProfiler.NO_TASK_NAME, objectState.getXPUBuffer().size());
                long dispatchValue = this.timeProfiler.getTimer(ProfilerType.TOTAL_DISPATCH_DATA_TRANSFERS_TIME);
                this.timeProfiler.setTimer(ProfilerType.TOTAL_DISPATCH_DATA_TRANSFERS_TIME, dispatchValue += event.getDriverDispatchTime());
            }
        }
    }

    private void transferHostToDeviceAlways(StringBuilder logBuilder, int objectIndex, long offset, int eventId, long sizeBatch, int[] eventWaitList) {
        Object object = this.objects.get(objectIndex);
        if (this.isObjectKernelContext(object)) {
            return;
        }
        XPUDeviceBufferState objectState = this.resolveObjectState(objectIndex);
        List allEvents = this.interpreterDevice.streamIn(this.graphExecutionContext.getExecutionPlanId(), object, sizeBatch, offset, objectState, eventWaitList);
        this.resetEventIndexes(eventId);
        if (TornadoOptions.LOG_BYTECODES() && this.isNotObjectAtomic(object)) {
            long sizeObject = objectState.getXPUBuffer().size();
            DebugInterpreter.logTransferToDeviceAlways(object, this.interpreterDevice, sizeObject, sizeBatch, offset, eventId, logBuilder);
        }
        if (TornadoOptions.isProfilerEnabled() && allEvents != null) {
            for (Integer e : allEvents) {
                Event event = this.interpreterDevice.resolveEvent(this.graphExecutionContext.getExecutionPlanId(), e);
                event.waitForEvents(this.graphExecutionContext.getExecutionPlanId());
                long copyInTimer = this.timeProfiler.getTimer(ProfilerType.COPY_IN_TIME);
                this.timeProfiler.setTimer(ProfilerType.COPY_IN_TIME, copyInTimer += event.getElapsedTime());
                this.timeProfiler.addValueToMetric(ProfilerType.TOTAL_COPY_IN_SIZE_BYTES, TimeProfiler.NO_TASK_NAME, objectState.getXPUBuffer().size());
                long dispatchValue = this.timeProfiler.getTimer(ProfilerType.TOTAL_DISPATCH_DATA_TRANSFERS_TIME);
                this.timeProfiler.setTimer(ProfilerType.TOTAL_DISPATCH_DATA_TRANSFERS_TIME, dispatchValue += event.getDriverDispatchTime());
            }
        }
    }

    private int transferDeviceToHost(StringBuilder logBuilder, int objectIndex, long offset, int eventId, long sizeBatch, int[] eventWaitList) {
        Object object = this.objects.get(objectIndex);
        if (this.isObjectKernelContext(object)) {
            return 0;
        }
        XPUDeviceBufferState objectState = this.resolveObjectState(objectIndex);
        if (TornadoOptions.LOG_BYTECODES()) {
            long sizeObject = objectState.getXPUBuffer().size();
            DebugInterpreter.logTransferToHostAlways(object, this.interpreterDevice, sizeObject, sizeBatch, offset, eventId, logBuilder);
        }
        int readEvent = this.interpreterDevice.streamOutBlocking(this.graphExecutionContext.getExecutionPlanId(), object, offset, objectState, eventWaitList);
        this.resetEventIndexes(eventId);
        if (TornadoOptions.isProfilerEnabled() && readEvent != -1) {
            Event event = this.interpreterDevice.resolveEvent(this.graphExecutionContext.getExecutionPlanId(), readEvent);
            event.waitForEvents(this.graphExecutionContext.getExecutionPlanId());
            long value = this.timeProfiler.getTimer(ProfilerType.COPY_OUT_TIME);
            this.timeProfiler.setTimer(ProfilerType.COPY_OUT_TIME, value += event.getElapsedTime());
            this.timeProfiler.addValueToMetric(ProfilerType.TOTAL_COPY_OUT_SIZE_BYTES, TimeProfiler.NO_TASK_NAME, objectState.getXPUBuffer().size());
            long dispatchValue = this.timeProfiler.getTimer(ProfilerType.TOTAL_DISPATCH_DATA_TRANSFERS_TIME);
            this.timeProfiler.setTimer(ProfilerType.TOTAL_DISPATCH_DATA_TRANSFERS_TIME, dispatchValue += event.getDriverDispatchTime());
        }
        return readEvent;
    }

    private void transferDeviceToHostBlocking(StringBuilder logBuilder, int objectIndex, long offset, int eventId, long sizeBatch, int[] eventWaitList) {
        Object object = this.objects.get(objectIndex);
        if (this.isObjectKernelContext(object)) {
            return;
        }
        XPUDeviceBufferState objectState = this.resolveObjectState(objectIndex);
        if (TornadoOptions.LOG_BYTECODES()) {
            long sizeOfObject = objectState.getXPUBuffer().size();
            DebugInterpreter.logTransferToHostAlwaysBlocking(object, this.interpreterDevice, logBuilder, sizeOfObject, sizeBatch, offset, eventId);
        }
        int readEvent = this.interpreterDevice.streamOutBlocking(this.graphExecutionContext.getExecutionPlanId(), object, offset, objectState, eventWaitList);
        if (TornadoOptions.isProfilerEnabled() && readEvent != -1) {
            Event event = this.interpreterDevice.resolveEvent(this.graphExecutionContext.getExecutionPlanId(), readEvent);
            event.waitForEvents(this.graphExecutionContext.getExecutionPlanId());
            long value = this.timeProfiler.getTimer(ProfilerType.COPY_OUT_TIME);
            this.timeProfiler.setTimer(ProfilerType.COPY_OUT_TIME, value += event.getElapsedTime());
            this.timeProfiler.addValueToMetric(ProfilerType.TOTAL_COPY_OUT_SIZE_BYTES, TimeProfiler.NO_TASK_NAME, objectState.getXPUBuffer().size());
            long dispatchValue = this.timeProfiler.getTimer(ProfilerType.TOTAL_DISPATCH_DATA_TRANSFERS_TIME);
            this.timeProfiler.setTimer(ProfilerType.TOTAL_DISPATCH_DATA_TRANSFERS_TIME, dispatchValue += event.getDriverDispatchTime());
        }
        this.resetEventIndexes(eventId);
    }

    private boolean isRecompilationNeededForLastBatch(int taskIndex, SchedulableTask task, long batchThreads) {
        return !this.shouldCompile(this.installedCodes[this.globalToLocalTaskIndex(taskIndex)]) && task.getBatchThreads() != 0L && task.getBatchThreads() != batchThreads;
    }

    private boolean currentBatchUsesThreadId(int currentBatch, boolean indexInWrite) {
        return currentBatch > 0 && indexInWrite;
    }

    private void updateBatchThreads(SchedulableTask task, long batchThreads, boolean indexInWrite, int currentBatch) {
        task.setBatchThreads(batchThreads);
        if (task.getBatchSize() == 0L && indexInWrite) {
            task.setBatchSize(batchThreads);
        }
        if (batchThreads != 0L) {
            task.setBatchNumber(currentBatch);
        }
    }

    private void updateMeta(TaskContextInterface meta) {
        meta.setPrintKernelFlag(this.graphExecutionContext.meta().isPrintKernelEnabled());
        meta.setCompilerFlags(TornadoVMBackendType.OPENCL, this.graphExecutionContext.meta().getCompilerFlags(TornadoVMBackendType.OPENCL));
        meta.setCompilerFlags(TornadoVMBackendType.PTX, this.graphExecutionContext.meta().getCompilerFlags(TornadoVMBackendType.PTX));
        meta.setCompilerFlags(TornadoVMBackendType.SPIRV, this.graphExecutionContext.meta().getCompilerFlags(TornadoVMBackendType.SPIRV));
    }

    private XPUExecutionFrame compileTaskFromBytecodeToBinary(int callWrapperIndex, int numArgs, int eventId, int taskIndex, long batchThreads) {
        if (this.interpreterDevice.getDeviceContext().wasReset() && this.finishedWarmup) {
            throw new TornadoFailureException("[ERROR] reset() was called after warmup() on device: " + String.valueOf(this.interpreterDevice) + "!");
        }
        boolean redeployOnDevice = this.graphExecutionContext.redeployOnDevice();
        KernelStackFrame kernelStackFrame = this.resolveCallWrapper(callWrapperIndex, numArgs, this.kernelStackFrame, this.interpreterDevice, redeployOnDevice);
        int[] waitList = this.useDependencies && eventId != -1 ? this.events[eventId] : null;
        SchedulableTask task = this.taskExecutionContexts.get(taskIndex);
        int currentBatch = task.getBatchNumber();
        TaskContextInterface meta = task.meta();
        this.updateMeta(meta);
        boolean indexInWrite = this.interpreterDevice.loopIndexInWrite(task);
        if (this.isRecompilationNeededForLastBatch(taskIndex, task, batchThreads) || this.currentBatchUsesThreadId(currentBatch, indexInWrite)) {
            task.forceCompilation();
            this.installedCodes[this.globalToLocalTaskIndex(taskIndex)].invalidate();
        }
        this.updateBatchThreads(task, batchThreads, indexInWrite, currentBatch);
        task.enableDefaultThreadScheduler(this.graphExecutionContext.useDefaultThreadScheduler());
        if (this.gridScheduler != null && this.gridScheduler.get(task.getId()) != null) {
            task.setUseGridScheduler(true);
            task.setGridScheduler(this.gridScheduler);
        }
        if (this.timeProfiler instanceof TimeProfiler) {
            this.timeProfiler.registerBackend(task.getId(), task.getDevice().getTornadoVMBackend().name());
            this.timeProfiler.registerDeviceID(task.getId(), task.meta().getBackendIndex() + ":" + task.meta().getDeviceIndex());
            this.timeProfiler.registerDeviceName(task.getId(), task.getDevice().getPhysicalDevice().getDeviceName());
        }
        if (this.shouldCompile(this.installedCodes[this.globalToLocalTaskIndex(taskIndex)])) {
            task.setDevice((TornadoDevice)this.interpreterDevice);
            try {
                task.attachProfiler(this.timeProfiler);
                if (taskIndex == this.taskExecutionContexts.size() - 1) {
                    task.forceCompilation();
                }
                this.installedCodes[this.globalToLocalTaskIndex((int)taskIndex)] = this.interpreterDevice.installCode(this.graphExecutionContext.getExecutionPlanId(), task);
                this.profilerUpdateForPreCompiledTask(task);
                if (indexInWrite && batchThreads != 0L) {
                    task.setBatchNumber(++currentBatch);
                }
            }
            catch (TornadoBailoutRuntimeException e) {
                throw new TornadoBailoutRuntimeException("Unable to compile " + task.getFullName() + "\nThe internal error is: " + e.getMessage() + "\nStacktrace: " + Arrays.toString(e.getStackTrace()), (Exception)((Object)e));
            }
            catch (TornadoDeviceFP64NotSupported e) {
                throw e;
            }
            catch (InternalError e) {
                throw new TornadoBailoutRuntimeException("[Internal Error] Unable to compile " + task.getFullName() + "\n" + Arrays.toString(e.getStackTrace()));
            }
        }
        return new XPUExecutionFrame(kernelStackFrame, waitList);
    }

    private void popArgumentsFromCall(int numArgs) {
        for (int i = 0; i < numArgs; ++i) {
            this.bytecodeResult.get();
            this.bytecodeResult.getInt();
        }
    }

    private int executeLaunch(StringBuilder logBuilder, int numArgs, int eventId, int taskIndex, long batchThreads, long offset, XPUExecutionFrame executionFrame) {
        TaskContextInterface argType2;
        int[] nArray;
        TornadoInstalledCode installedCode;
        SchedulableTask task = this.taskExecutionContexts.get(taskIndex);
        KernelStackFrame stackFrame = executionFrame.stackFrame;
        int[] waitList = executionFrame.waitList;
        if (this.installedCodes[this.globalToLocalTaskIndex(taskIndex)] == null) {
            this.installedCodes[this.globalToLocalTaskIndex((int)taskIndex)] = this.interpreterDevice.getCodeFromCache(this.graphExecutionContext.getExecutionPlanId(), task);
        }
        if ((installedCode = this.installedCodes[this.globalToLocalTaskIndex(taskIndex)]) == null) {
            throw new TornadoBailoutRuntimeException("Code generator Failed");
        }
        if (task instanceof PrebuiltTask) {
            PrebuiltTask prebuiltTask = (PrebuiltTask)task;
            nArray = prebuiltTask.getAtomics();
        } else {
            nArray = this.interpreterDevice.checkAtomicsForTask(task);
        }
        int[] atomicsArray = nArray;
        HashMap<Integer, Integer> threadDeploy = new HashMap<Integer, Integer>();
        if (this.gridScheduler != null && this.gridScheduler.get(task.getId()) != null) {
            WorkerGrid workerGrid = this.gridScheduler.get(task.getId());
            long[] global = workerGrid.getGlobalWork();
            int i = 0;
            for (long maxThread : global) {
                threadDeploy.put(i++, (int)maxThread);
            }
        }
        stackFrame.reset();
        stackFrame.setKernelContext(threadDeploy);
        XPUBuffer bufferAtomics = null;
        for (int i = 0; i < numArgs; ++i) {
            byte argType2 = this.bytecodeResult.get();
            int argIndex = this.bytecodeResult.getInt();
            if (argType2 == TornadoVMBytecodes.PUSH_CONSTANT_ARGUMENT.value()) {
                stackFrame.addCallArgument(this.constants.get(argIndex), false);
                continue;
            }
            if (argType2 == TornadoVMBytecodes.PUSH_REFERENCE_ARGUMENT.value()) {
                if (this.isObjectKernelContext(this.objects.get(argIndex))) {
                    stackFrame.addCallArgument(new KernelStackFrame.KernelContextArgument(), false);
                    continue;
                }
                DataObjectState globalState = this.resolveGlobalObjectState(argIndex);
                XPUDeviceBufferState objectState = globalState.getDeviceBufferState(this.interpreterDevice);
                if (!this.isObjectInAtomicRegion(objectState, this.interpreterDevice, task)) {
                    stackFrame.addCallArgument(objectState.getXPUBuffer().toBuffer(), true);
                    continue;
                }
                atomicsArray = this.interpreterDevice.updateAtomicRegionAndObjectState(task, atomicsArray, i, this.objects.get(argIndex), objectState);
                continue;
            }
            TornadoInternalError.shouldNotReachHere();
        }
        if (atomicsArray != null) {
            bufferAtomics = this.interpreterDevice.createOrReuseAtomicsBuffer(atomicsArray, Access.READ_WRITE);
            List allEvents = bufferAtomics.enqueueWrite(this.graphExecutionContext.getExecutionPlanId(), null, 0L, 0L, null, false);
            if (TornadoOptions.isProfilerEnabled()) {
                for (Integer e : allEvents) {
                    Event event = this.interpreterDevice.resolveEvent(this.graphExecutionContext.getExecutionPlanId(), e);
                    event.waitForEvents(this.graphExecutionContext.getExecutionPlanId());
                    long value = this.timeProfiler.getTimer(ProfilerType.COPY_IN_TIME);
                    this.timeProfiler.setTimer(ProfilerType.COPY_IN_TIME, value += event.getElapsedTime());
                }
            }
            if (TornadoOptions.LOG_BYTECODES()) {
                DebugInterpreter.logStreamInAtomic(bufferAtomics, this.interpreterDevice, eventId, logBuilder);
            }
        }
        if (TornadoOptions.LOG_BYTECODES()) {
            DebugInterpreter.logLaunchTask(task, this.interpreterDevice, batchThreads, offset, eventId, logBuilder);
        }
        if ((argType2 = task.meta()) instanceof TaskDataContext) {
            TaskDataContext dataContext = (TaskDataContext)argType2;
            dataContext.attachProfiler(this.timeProfiler);
            dataContext.setGridScheduler(this.gridScheduler);
            dataContext.setThreadInfoEnabled(this.graphExecutionContext.meta().isThreadInfoEnabled());
            try {
                int lastEvent = this.useDependencies ? installedCode.launchWithDependencies(this.graphExecutionContext.getExecutionPlanId(), stackFrame, bufferAtomics, dataContext, batchThreads, waitList) : installedCode.launchWithoutDependencies(this.graphExecutionContext.getExecutionPlanId(), stackFrame, bufferAtomics, dataContext, batchThreads);
                this.resetEventIndexes(eventId);
                return lastEvent;
            }
            catch (Exception e) {
                if (TornadoOptions.DEBUG) {
                    e.printStackTrace();
                }
                throw new TornadoBailoutRuntimeException("Bailout from LAUNCH Bytecode: \nReason: " + String.valueOf(e), e);
            }
        }
        throw new TornadoRuntimeException("task.meta is not instanceof TaskDataContext");
    }

    private void executeDependency(StringBuilder logBuilder, int lastEvent, int eventId) {
        if (this.useDependencies && lastEvent != -1) {
            if (TornadoOptions.LOG_BYTECODES()) {
                DebugInterpreter.logAddDependency(lastEvent, eventId, logBuilder);
            }
            TornadoInternalError.guarantee((this.eventsIndexes[eventId] < this.events[eventId].length ? 1 : 0) != 0, (String)"event list is too small", (Object[])new Object[0]);
            this.events[eventId][this.eventsIndexes[eventId]] = lastEvent;
            int n = eventId;
            this.eventsIndexes[n] = this.eventsIndexes[n] + 1;
        }
    }

    private int executeBarrier(StringBuilder logBuilder, int eventId, int[] waitList) {
        if (TornadoOptions.LOG_BYTECODES()) {
            DebugInterpreter.logBarrier(eventId, logBuilder);
        }
        int lastEvent = this.interpreterDevice.enqueueMarker(this.graphExecutionContext.getExecutionPlanId(), waitList);
        this.resetEventIndexes(eventId);
        return lastEvent;
    }

    private void throwErrorInterpreter(byte op) {
        if (this.graphExecutionContext.meta().isDebug()) {
            this.logger.debug("bc: invalid op 0x%x(%d)", op, op);
        }
        throw new TornadoRuntimeException("[ERROR] TornadoVM Bytecode not recognized");
    }

    private XPUDeviceBufferState resolveObjectState(int index) {
        return this.dataObjectStates[index].getDeviceBufferState(this.interpreterDevice);
    }

    private boolean isObjectKernelContext(Object object) {
        return object instanceof KernelContext;
    }

    private boolean isNotObjectAtomic(Object object) {
        return !(object instanceof AtomicInteger);
    }

    private void resetEventIndexes(int eventList) {
        if (eventList != -1) {
            this.eventsIndexes[eventList] = 0;
        }
    }

    private KernelStackFrame resolveCallWrapper(int index, int numArgs, KernelStackFrame[] kernelStackFrame, TornadoXPUDevice device, boolean redeployOnDevice) {
        if (this.graphExecutionContext.meta().isDebug() && redeployOnDevice) {
            this.logger.debug("Recompiling task on device " + String.valueOf(device));
        }
        if (kernelStackFrame[index] == null || !kernelStackFrame[index].isValid() || redeployOnDevice) {
            kernelStackFrame[index] = device.createKernelStackFrame(this.graphExecutionContext.getExecutionPlanId(), numArgs, Access.NONE);
        }
        return kernelStackFrame[index];
    }

    private boolean shouldCompile(TornadoInstalledCode installedCode) {
        return installedCode == null || !installedCode.isValid();
    }

    private int globalToLocalTaskIndex(int taskIndex) {
        return this.localTaskList.indexOf(this.taskExecutionContexts.get(taskIndex)) == -1 ? 0 : this.localTaskList.indexOf(this.taskExecutionContexts.get(taskIndex));
    }

    private void profilerUpdateForPreCompiledTask(SchedulableTask task) {
        if (task instanceof PrebuiltTask) {
            PrebuiltTask prebuiltTask = (PrebuiltTask)task;
            if (this.timeProfiler instanceof TimeProfiler) {
                this.timeProfiler.registerDeviceID(task.getId(), prebuiltTask.meta().getXPUDevice().getBackendIndex() + ":" + prebuiltTask.meta().getDeviceIndex());
                this.timeProfiler.registerDeviceName(task.getId(), prebuiltTask.meta().getXPUDevice().getPhysicalDevice().getDeviceName());
            }
        }
    }

    private DataObjectState resolveGlobalObjectState(int index) {
        return this.dataObjectStates[index];
    }

    private boolean isObjectInAtomicRegion(XPUDeviceBufferState objectState, TornadoXPUDevice device, SchedulableTask task) {
        return objectState.isAtomicRegionPresent() && device.checkAtomicsParametersForTask(task);
    }

    public void compile() {
        this.execute(true);
    }

    public Event execute() {
        return this.execute(false);
    }

    public void clearInstalledCode() {
        Arrays.fill(this.installedCodes, null);
    }

    private static class XPUExecutionFrame {
        private KernelStackFrame stackFrame;
        private int[] waitList;

        XPUExecutionFrame(KernelStackFrame callWrapper, int[] waitList) {
            this.stackFrame = callWrapper;
            this.waitList = waitList;
        }
    }

    public record ObjectAllocationInfo(int persistentObjectCount, int objectsToAlloc) {
    }
}

