1/* 2 * Copyright (C) 2021 Intel Corporation 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 */ 7 8#include "opencl/source/command_queue/gpgpu_walker.h" 9 10namespace NEO { 11template <typename GfxFamily> 12void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler( 13 LinearStream &commandStream, 14 DeviceQueueHw<GfxFamily> &devQueueHw, 15 PreemptionMode preemptionMode, 16 SchedulerKernel &scheduler, 17 IndirectHeap *ssh, 18 IndirectHeap *dsh, 19 bool isCcsUsed) { 20 21 const auto &kernelInfo = scheduler.getKernelInfo(); 22 23 using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; 24 using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER; 25 using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; 26 27 const auto &hwInfo = devQueueHw.getDevice().getHardwareInfo(); 28 NEO::PipeControlArgs args; 29 MemorySynchronizationCommands<GfxFamily>::addPipeControl(commandStream, args); 30 31 uint32_t interfaceDescriptorIndex = devQueueHw.schedulerIDIndex; 32 const size_t offsetInterfaceDescriptorTable = devQueueHw.colorCalcStateSize; 33 const size_t offsetInterfaceDescriptor = offsetInterfaceDescriptorTable; 34 const size_t totalInterfaceDescriptorTableSize = devQueueHw.interfaceDescriptorEntries * sizeof(INTERFACE_DESCRIPTOR_DATA); 35 36 // Program media interface descriptor load 37 HardwareCommandsHelper<GfxFamily>::sendMediaInterfaceDescriptorLoad( 38 commandStream, 39 offsetInterfaceDescriptor, 40 totalInterfaceDescriptorTableSize); 41 42 DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0); 43 44 // Determine SIMD size 45 uint32_t simd = kernelInfo.getMaxSimdSize(); 46 DEBUG_BREAK_IF(simd != PARALLEL_SCHEDULER_COMPILATION_SIZE_20); 47 48 // Patch our kernel constants 49 scheduler.setGlobalWorkOffsetValues(0, 0, 0); 50 scheduler.setGlobalWorkSizeValues(static_cast<uint32_t>(scheduler.getGws()), 1, 1); 51 scheduler.setLocalWorkSizeValues(static_cast<uint32_t>(scheduler.getLws()), 1, 1); 52 scheduler.setLocalWorkSize2Values(static_cast<uint32_t>(scheduler.getLws()), 1, 1); 53 scheduler.setEnqueuedLocalWorkSizeValues(static_cast<uint32_t>(scheduler.getLws()), 1, 1); 54 scheduler.setNumWorkGroupsValues(static_cast<uint32_t>(scheduler.getGws() / scheduler.getLws()), 0, 0); 55 scheduler.setWorkDim(1); 56 57 // Send our indirect object data 58 size_t localWorkSizes[3] = {scheduler.getLws(), 1, 1}; 59 60 // Create indirectHeap for IOH that is located at the end of device enqueue DSH 61 size_t curbeOffset = devQueueHw.setSchedulerCrossThreadData(scheduler); 62 IndirectHeap indirectObjectHeap(dsh->getCpuBase(), dsh->getMaxAvailableSpace()); 63 indirectObjectHeap.getSpace(curbeOffset); 64 IndirectHeap *ioh = &indirectObjectHeap; 65 66 // Program the walker. Invokes execution so all state should already be programmed 67 auto pGpGpuWalkerCmd = commandStream.getSpaceForCmd<GPGPU_WALKER>(); 68 GPGPU_WALKER cmdWalker = GfxFamily::cmdInitGpgpuWalker; 69 70 bool inlineDataProgrammingRequired = HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(scheduler); 71 auto kernelUsesLocalIds = HardwareCommandsHelper<GfxFamily>::kernelUsesLocalIds(scheduler); 72 73 HardwareCommandsHelper<GfxFamily>::sendIndirectState( 74 commandStream, 75 *dsh, 76 *ioh, 77 *ssh, 78 scheduler, 79 scheduler.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), 80 simd, 81 localWorkSizes, 82 offsetInterfaceDescriptorTable, 83 interfaceDescriptorIndex, 84 preemptionMode, 85 &cmdWalker, 86 nullptr, 87 true, 88 devQueueHw.getDevice()); 89 90 // Implement enabling special WA DisableLSQCROPERFforOCL if needed 91 GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(&commandStream, scheduler, true); 92 93 size_t globalOffsets[3] = {0, 0, 0}; 94 size_t workGroups[3] = {(scheduler.getGws() / scheduler.getLws()), 1, 1}; 95 GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(&cmdWalker, kernelInfo.kernelDescriptor, globalOffsets, globalOffsets, workGroups, localWorkSizes, 96 simd, 1, true, inlineDataProgrammingRequired, 0u); 97 *pGpGpuWalkerCmd = cmdWalker; 98 99 // Implement disabling special WA DisableLSQCROPERFforOCL if needed 100 GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(&commandStream, scheduler, false); 101 102 // Do not put BB_START only when returning in first Scheduler run 103 if (devQueueHw.getSchedulerReturnInstance() != 1) { 104 args.dcFlushEnable = MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed(true, hwInfo); 105 MemorySynchronizationCommands<GfxFamily>::addPipeControl(commandStream, args); 106 107 // Add BB Start Cmd to the SLB in the Primary Batch Buffer 108 auto bbStart = commandStream.getSpaceForCmd<MI_BATCH_BUFFER_START>(); 109 MI_BATCH_BUFFER_START cmdBbStart = GfxFamily::cmdInitBatchBufferStart; 110 cmdBbStart.setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_FIRST_LEVEL_BATCH); 111 uint64_t slbAddress = devQueueHw.getSlbBuffer()->getGpuAddress(); 112 cmdBbStart.setBatchBufferStartAddress(slbAddress); 113 *bbStart = cmdBbStart; 114 } 115} 116} // namespace NEO