1/* 2 * Copyright (C) 2020-2021 Intel Corporation 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 */ 7 8#pragma once 9 10#include "shared/source/built_ins/built_ins.h" 11#include "shared/source/built_ins/sip.h" 12#include "shared/source/command_container/command_encoder.h" 13#include "shared/source/command_container/implicit_scaling.h" 14#include "shared/source/command_stream/command_stream_receiver_hw.h" 15#include "shared/source/command_stream/linear_stream.h" 16#include "shared/source/command_stream/preemption.h" 17#include "shared/source/command_stream/thread_arbitration_policy.h" 18#include "shared/source/device/device.h" 19#include "shared/source/helpers/hw_helper.h" 20#include "shared/source/helpers/hw_info.h" 21#include "shared/source/helpers/pipe_control_args.h" 22#include "shared/source/helpers/preamble.h" 23#include "shared/source/memory_manager/memory_manager.h" 24#include "shared/source/memory_manager/residency_container.h" 25#include "shared/source/os_interface/hw_info_config.h" 26#include "shared/source/os_interface/os_context.h" 27#include "shared/source/page_fault_manager/cpu_page_fault_manager.h" 28#include "shared/source/unified_memory/unified_memory.h" 29#include "shared/source/utilities/software_tags_manager.h" 30 31#include "level_zero/core/source/cmdlist/cmdlist.h" 32#include "level_zero/core/source/cmdlist/cmdlist_hw.h" 33#include "level_zero/core/source/cmdqueue/cmdqueue_hw.h" 34#include "level_zero/core/source/device/device.h" 35#include "level_zero/core/source/driver/driver_handle_imp.h" 36#include "level_zero/core/source/fence/fence.h" 37#include "level_zero/tools/source/metrics/metric.h" 38 39#include <limits> 40#include <thread> 41 42namespace L0 { 43 44template <GFXCORE_FAMILY gfxCoreFamily> 45ze_result_t CommandQueueHw<gfxCoreFamily>::createFence(const ze_fence_desc_t *desc, 46 ze_fence_handle_t *phFence) { 47 *phFence = Fence::create(this, desc); 48 return ZE_RESULT_SUCCESS; 49} 50 51template <GFXCORE_FAMILY gfxCoreFamily> 52ze_result_t CommandQueueHw<gfxCoreFamily>::destroy() { 53 if (commandStream) { 54 delete commandStream; 55 commandStream = nullptr; 56 } 57 buffers.destroy(this->getDevice()); 58 delete this; 59 return ZE_RESULT_SUCCESS; 60} 61 62template <GFXCORE_FAMILY gfxCoreFamily> 63ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists( 64 uint32_t numCommandLists, 65 ze_command_list_handle_t *phCommandLists, 66 ze_fence_handle_t hFence, 67 bool performMigration) { 68 69 using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily; 70 using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; 71 using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; 72 73 using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; 74 using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; 75 76 using MI_LOAD_REGISTER_MEM = typename GfxFamily::MI_LOAD_REGISTER_MEM; 77 using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; 78 79 auto lockCSR = csr->obtainUniqueOwnership(); 80 81 auto anyCommandListWithCooperativeKernels = false; 82 auto anyCommandListWithoutCooperativeKernels = false; 83 84 bool cachedMOCSAllowed = true; 85 86 for (auto i = 0u; i < numCommandLists; i++) { 87 auto commandList = CommandList::fromHandle(phCommandLists[i]); 88 if (peekIsCopyOnlyCommandQueue() != commandList->isCopyOnly()) { 89 return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE; 90 } 91 92 if (this->activeSubDevices < commandList->partitionCount) { 93 return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE; 94 } 95 96 if (commandList->containsCooperativeKernels()) { 97 anyCommandListWithCooperativeKernels = true; 98 } else { 99 anyCommandListWithoutCooperativeKernels = true; 100 } 101 // If the Command List has commands that require uncached MOCS, then any changes to the commands in the queue requires the uncached MOCS 102 if (commandList->requiresQueueUncachedMocs && cachedMOCSAllowed == true) { 103 cachedMOCSAllowed = false; 104 } 105 } 106 107 bool isMixingRegularAndCooperativeKernelsAllowed = NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.get(); 108 if (anyCommandListWithCooperativeKernels && anyCommandListWithoutCooperativeKernels && 109 (!isMixingRegularAndCooperativeKernelsAllowed)) { 110 return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE; 111 } 112 113 size_t spaceForResidency = 0; 114 size_t preemptionSize = 0u; 115 size_t debuggerCmdsSize = 0; 116 constexpr size_t residencyContainerSpaceForPreemption = 2; 117 constexpr size_t residencyContainerSpaceForFence = 1; 118 constexpr size_t residencyContainerSpaceForTagWrite = 1; 119 120 NEO::Device *neoDevice = device->getNEODevice(); 121 auto devicePreemption = device->getDevicePreemptionMode(); 122 const bool initialPreemptionMode = commandQueuePreemptionMode == NEO::PreemptionMode::Initial; 123 NEO::PreemptionMode cmdQueuePreemption = commandQueuePreemptionMode; 124 if (initialPreemptionMode) { 125 cmdQueuePreemption = devicePreemption; 126 } 127 NEO::PreemptionMode statePreemption = cmdQueuePreemption; 128 129 const bool stateSipRequired = (initialPreemptionMode && devicePreemption == NEO::PreemptionMode::MidThread) || 130 (neoDevice->getDebugger() && NEO::Debugger::isDebugEnabled(internalUsage)); 131 132 if (initialPreemptionMode) { 133 preemptionSize += NEO::PreemptionHelper::getRequiredPreambleSize<GfxFamily>(*neoDevice); 134 } 135 136 if (stateSipRequired) { 137 preemptionSize += NEO::PreemptionHelper::getRequiredStateSipCmdSize<GfxFamily>(*neoDevice, csr->isRcs()); 138 } 139 140 preemptionSize += NEO::PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(devicePreemption, commandQueuePreemptionMode); 141 142 if (NEO::Debugger::isDebugEnabled(internalUsage) && !commandQueueDebugCmdsProgrammed) { 143 debuggerCmdsSize += NEO::PreambleHelper<GfxFamily>::getKernelDebuggingCommandsSize(neoDevice->getSourceLevelDebugger() != nullptr); 144 } 145 146 if (devicePreemption == NEO::PreemptionMode::MidThread) { 147 spaceForResidency += residencyContainerSpaceForPreemption; 148 } 149 150 bool directSubmissionEnabled = isCopyOnlyCommandQueue ? csr->isBlitterDirectSubmissionEnabled() : csr->isDirectSubmissionEnabled(); 151 bool programActivePartitionConfig = csr->isProgramActivePartitionConfigRequired(); 152 153 L0::Fence *fence = nullptr; 154 155 device->activateMetricGroups(); 156 157 size_t totalCmdBuffers = 0; 158 uint32_t perThreadScratchSpaceSize = 0; 159 uint32_t perThreadPrivateScratchSize = 0; 160 NEO::PageFaultManager *pageFaultManager = nullptr; 161 if (performMigration) { 162 pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager(); 163 if (pageFaultManager == nullptr) { 164 performMigration = false; 165 } 166 } 167 for (auto i = 0u; i < numCommandLists; i++) { 168 auto commandList = CommandList::fromHandle(phCommandLists[i]); 169 170 bool indirectAllocationsAllowed = commandList->hasIndirectAllocationsAllowed(); 171 if (indirectAllocationsAllowed) { 172 UnifiedMemoryControls unifiedMemoryControls = commandList->getUnifiedMemoryControls(); 173 174 auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); 175 svmAllocsManager->addInternalAllocationsToResidencyContainer(neoDevice->getRootDeviceIndex(), 176 commandList->commandContainer.getResidencyContainer(), 177 unifiedMemoryControls.generateMask()); 178 } 179 180 totalCmdBuffers += commandList->commandContainer.getCmdBufferAllocations().size(); 181 spaceForResidency += commandList->commandContainer.getResidencyContainer().size(); 182 auto commandListPreemption = commandList->getCommandListPreemptionMode(); 183 if (statePreemption != commandListPreemption) { 184 if (preemptionCmdSyncProgramming) { 185 preemptionSize += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl(); 186 } 187 preemptionSize += NEO::PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(commandListPreemption, statePreemption); 188 statePreemption = commandListPreemption; 189 } 190 191 perThreadScratchSpaceSize = std::max(perThreadScratchSpaceSize, commandList->getCommandListPerThreadScratchSize()); 192 193 perThreadPrivateScratchSize = std::max(perThreadPrivateScratchSize, commandList->getCommandListPerThreadPrivateScratchSize()); 194 195 if (commandList->getCommandListPerThreadScratchSize() != 0 || commandList->getCommandListPerThreadPrivateScratchSize() != 0) { 196 if (commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE) != nullptr) { 197 heapContainer.push_back(commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation()); 198 } 199 for (auto element : commandList->commandContainer.sshAllocations) { 200 heapContainer.push_back(element); 201 } 202 } 203 204 partitionCount = std::max(partitionCount, commandList->partitionCount); 205 commandList->csr = csr; 206 commandList->makeResidentAndMigrate(performMigration); 207 } 208 209 size_t linearStreamSizeEstimate = totalCmdBuffers * sizeof(MI_BATCH_BUFFER_START); 210 linearStreamSizeEstimate += csr->getCmdsSizeForHardwareContext(); 211 212 if (directSubmissionEnabled) { 213 linearStreamSizeEstimate += sizeof(MI_BATCH_BUFFER_START); 214 } else { 215 linearStreamSizeEstimate += sizeof(MI_BATCH_BUFFER_END); 216 } 217 218 auto csrHw = reinterpret_cast<NEO::CommandStreamReceiverHw<GfxFamily> *>(csr); 219 if (programActivePartitionConfig) { 220 linearStreamSizeEstimate += csrHw->getCmdSizeForActivePartitionConfig(); 221 } 222 223 const auto &hwInfo = this->device->getHwInfo(); 224 if (hFence) { 225 fence = Fence::fromHandle(hFence); 226 spaceForResidency += residencyContainerSpaceForFence; 227 linearStreamSizeEstimate += isCopyOnlyCommandQueue ? NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite() : NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo); 228 } 229 230 spaceForResidency += residencyContainerSpaceForTagWrite; 231 232 csr->getResidencyAllocations().reserve(spaceForResidency); 233 234 auto scratchSpaceController = csr->getScratchSpaceController(); 235 bool gsbaStateDirty = false; 236 bool frontEndStateDirty = false; 237 handleScratchSpace(heapContainer, 238 scratchSpaceController, 239 gsbaStateDirty, frontEndStateDirty, 240 perThreadScratchSpaceSize, perThreadPrivateScratchSize); 241 242 auto &streamProperties = csr->getStreamProperties(); 243 const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); 244 auto disableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(hwInfo); 245 auto isEngineInstanced = csr->getOsContext().isEngineInstanced(); 246 bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get(); 247 if (!isPatchingVfeStateAllowed) { 248 streamProperties.frontEndState.setProperties(anyCommandListWithCooperativeKernels, disableOverdispatch, 249 isEngineInstanced, hwInfo); 250 } else { 251 streamProperties.frontEndState.singleSliceDispatchCcsMode.set(isEngineInstanced); 252 } 253 frontEndStateDirty |= streamProperties.frontEndState.isDirty(); 254 255 gsbaStateDirty |= csr->getGSBAStateDirty(); 256 frontEndStateDirty |= csr->getMediaVFEStateDirty(); 257 if (!isCopyOnlyCommandQueue) { 258 259 if (!gpgpuEnabled) { 260 linearStreamSizeEstimate += estimatePipelineSelect(); 261 } 262 263 linearStreamSizeEstimate += estimateFrontEndCmdSizeForMultipleCommandLists(frontEndStateDirty, numCommandLists, phCommandLists); 264 265 if (gsbaStateDirty) { 266 linearStreamSizeEstimate += estimateStateBaseAddressCmdSize(); 267 } 268 269 linearStreamSizeEstimate += preemptionSize + debuggerCmdsSize; 270 } 271 272 if (NEO::DebugManager.flags.EnableSWTags.get()) { 273 linearStreamSizeEstimate += NEO::SWTagsManager::estimateSpaceForSWTags<GfxFamily>(); 274 } 275 276 linearStreamSizeEstimate += isCopyOnlyCommandQueue ? NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite() : NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo); 277 278 size_t alignedSize = alignUp<size_t>(linearStreamSizeEstimate, minCmdBufferPtrAlign); 279 size_t padding = alignedSize - linearStreamSizeEstimate; 280 reserveLinearStreamSize(alignedSize); 281 NEO::LinearStream child(commandStream->getSpace(alignedSize), alignedSize); 282 283 const auto globalFenceAllocation = csr->getGlobalFenceAllocation(); 284 if (globalFenceAllocation) { 285 csr->makeResident(*globalFenceAllocation); 286 } 287 288 const auto workPartitionAllocation = csr->getWorkPartitionAllocation(); 289 if (workPartitionAllocation) { 290 csr->makeResident(*workPartitionAllocation); 291 } 292 293 if (NEO::DebugManager.flags.EnableSWTags.get()) { 294 NEO::SWTagsManager *tagsManager = neoDevice->getRootDeviceEnvironment().tagsManager.get(); 295 UNRECOVERABLE_IF(tagsManager == nullptr); 296 csr->makeResident(*tagsManager->getBXMLHeapAllocation()); 297 csr->makeResident(*tagsManager->getSWTagHeapAllocation()); 298 tagsManager->insertBXMLHeapAddress<GfxFamily>(child); 299 tagsManager->insertSWTagHeapAddress<GfxFamily>(child); 300 } 301 302 csr->programHardwareContext(child); 303 304 if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger()) { 305 csr->makeResident(*device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId())); 306 } 307 308 if (!isCopyOnlyCommandQueue) { 309 if (!gpgpuEnabled) { 310 programPipelineSelect(child); 311 } 312 313 if (NEO::Debugger::isDebugEnabled(internalUsage) && !commandQueueDebugCmdsProgrammed && neoDevice->getSourceLevelDebugger()) { 314 NEO::PreambleHelper<GfxFamily>::programKernelDebugging(&child); 315 commandQueueDebugCmdsProgrammed = true; 316 } 317 318 if (gsbaStateDirty) { 319 auto indirectHeap = CommandList::fromHandle(phCommandLists[0])->commandContainer.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT); 320 programStateBaseAddress(scratchSpaceController->calculateNewGSH(), indirectHeap->getGraphicsAllocation()->isAllocatedInLocalMemoryPool(), child, cachedMOCSAllowed); 321 } 322 323 if (initialPreemptionMode) { 324 NEO::PreemptionHelper::programCsrBaseAddress<GfxFamily>(child, *neoDevice, csr->getPreemptionAllocation()); 325 } 326 327 if (stateSipRequired) { 328 NEO::PreemptionHelper::programStateSip<GfxFamily>(child, *neoDevice); 329 } 330 331 if (cmdQueuePreemption != commandQueuePreemptionMode) { 332 NEO::PreemptionHelper::programCmdStream<GfxFamily>(child, 333 cmdQueuePreemption, 334 commandQueuePreemptionMode, 335 csr->getPreemptionAllocation()); 336 } 337 338 statePreemption = cmdQueuePreemption; 339 340 const bool sipKernelUsed = devicePreemption == NEO::PreemptionMode::MidThread || 341 (neoDevice->getDebugger() != nullptr && NEO::Debugger::isDebugEnabled(internalUsage)); 342 343 if (devicePreemption == NEO::PreemptionMode::MidThread) { 344 csr->makeResident(*csr->getPreemptionAllocation()); 345 } 346 347 if (sipKernelUsed) { 348 auto sipIsa = NEO::SipKernel::getSipKernel(*neoDevice).getSipAllocation(); 349 csr->makeResident(*sipIsa); 350 } 351 352 if (NEO::Debugger::isDebugEnabled(internalUsage) && neoDevice->getDebugger()) { 353 UNRECOVERABLE_IF(device->getDebugSurface() == nullptr); 354 csr->makeResident(*device->getDebugSurface()); 355 } 356 } 357 358 if (programActivePartitionConfig) { 359 csrHw->programActivePartitionConfig(child); 360 } 361 362 for (auto i = 0u; i < numCommandLists; ++i) { 363 auto commandList = CommandList::fromHandle(phCommandLists[i]); 364 auto cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations(); 365 auto cmdBufferCount = cmdBufferAllocations.size(); 366 367 auto commandListPreemption = commandList->getCommandListPreemptionMode(); 368 if (statePreemption != commandListPreemption) { 369 if (NEO::DebugManager.flags.EnableSWTags.get()) { 370 neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::PipeControlReasonTag>( 371 child, 372 *neoDevice, 373 "ComandList Preemption Mode update", 0u); 374 } 375 376 if (preemptionCmdSyncProgramming) { 377 NEO::PipeControlArgs args; 378 NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(child, args); 379 } 380 NEO::PreemptionHelper::programCmdStream<GfxFamily>(child, 381 commandListPreemption, 382 statePreemption, 383 csr->getPreemptionAllocation()); 384 statePreemption = commandListPreemption; 385 } 386 387 if (!isCopyOnlyCommandQueue) { 388 bool programVfe = frontEndStateDirty; 389 if (isPatchingVfeStateAllowed) { 390 auto &requiredStreamState = commandList->getRequiredStreamState(); 391 streamProperties.frontEndState.setProperties(requiredStreamState.frontEndState); 392 programVfe |= streamProperties.frontEndState.isDirty(); 393 } 394 395 if (programVfe) { 396 programFrontEnd(scratchSpaceController->getScratchPatchAddress(), scratchSpaceController->getPerThreadScratchSpaceSize(), child); 397 frontEndStateDirty = false; 398 } 399 400 if (isPatchingVfeStateAllowed) { 401 auto &finalStreamState = commandList->getFinalStreamState(); 402 streamProperties.frontEndState.setProperties(finalStreamState.frontEndState); 403 } 404 } 405 406 patchCommands(*commandList, scratchSpaceController->getScratchPatchAddress()); 407 408 for (size_t iter = 0; iter < cmdBufferCount; iter++) { 409 auto allocation = cmdBufferAllocations[iter]; 410 NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&child, allocation->getGpuAddress(), true); 411 } 412 413 printfFunctionContainer.insert(printfFunctionContainer.end(), 414 commandList->getPrintfFunctionContainer().begin(), 415 commandList->getPrintfFunctionContainer().end()); 416 } 417 418 if (performMigration) { 419 auto commandList = CommandList::fromHandle(phCommandLists[0]); 420 commandList->migrateSharedAllocations(); 421 } 422 423 if (stateSipRequired) { 424 NEO::PreemptionHelper::programStateSipEndWa<GfxFamily>(child, *neoDevice); 425 } 426 427 commandQueuePreemptionMode = statePreemption; 428 429 if (hFence) { 430 csr->makeResident(fence->getAllocation()); 431 if (isCopyOnlyCommandQueue) { 432 NEO::MiFlushArgs args; 433 args.commandWithPostSync = true; 434 NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(child, fence->getGpuAddress(), Fence::STATE_SIGNALED, args, hwInfo); 435 } else { 436 NEO::PipeControlArgs args; 437 args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed(true, hwInfo); 438 if (partitionCount > 1) { 439 args.workloadPartitionOffset = true; 440 } 441 fence->setPartitionCount(partitionCount); 442 NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation( 443 child, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, 444 fence->getGpuAddress(), 445 Fence::STATE_SIGNALED, 446 hwInfo, 447 args); 448 } 449 } 450 451 dispatchTaskCountWrite(child, true); 452 453 csr->makeResident(*csr->getTagAllocation()); 454 void *endingCmd = nullptr; 455 if (directSubmissionEnabled) { 456 endingCmd = child.getSpace(0); 457 NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&child, 0ull, false); 458 } else { 459 MI_BATCH_BUFFER_END cmd = GfxFamily::cmdInitBatchBufferEnd; 460 auto buffer = child.getSpaceForCmd<MI_BATCH_BUFFER_END>(); 461 *(MI_BATCH_BUFFER_END *)buffer = cmd; 462 } 463 464 if (padding) { 465 void *paddingPtr = child.getSpace(padding); 466 memset(paddingPtr, 0, padding); 467 } 468 469 auto ret = submitBatchBuffer(ptrDiff(child.getCpuBase(), commandStream->getCpuBase()), csr->getResidencyAllocations(), endingCmd, 470 anyCommandListWithCooperativeKernels); 471 472 this->taskCount = csr->peekTaskCount(); 473 474 csr->makeSurfacePackNonResident(csr->getResidencyAllocations()); 475 476 if (getSynchronousMode() == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS) { 477 this->synchronize(std::numeric_limits<uint64_t>::max()); 478 } 479 480 this->heapContainer.clear(); 481 482 csr->pollForCompletion(); 483 if (ret) { 484 return ZE_RESULT_ERROR_UNKNOWN; 485 } 486 487 return ZE_RESULT_SUCCESS; 488} 489 490template <GFXCORE_FAMILY gfxCoreFamily> 491void CommandQueueHw<gfxCoreFamily>::programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &commandStream) { 492 using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily; 493 UNRECOVERABLE_IF(csr == nullptr); 494 auto &hwInfo = device->getHwInfo(); 495 auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); 496 auto engineGroupType = hwHelper.getEngineGroupType(csr->getOsContext().getEngineType(), 497 csr->getOsContext().getEngineUsage(), hwInfo); 498 auto pVfeState = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(&commandStream, hwInfo, engineGroupType); 499 NEO::PreambleHelper<GfxFamily>::programVfeState(pVfeState, 500 hwInfo, 501 perThreadScratchSpaceSize, 502 scratchAddress, 503 device->getMaxNumHwThreads(), 504 csr->getStreamProperties()); 505 csr->setMediaVFEStateDirty(false); 506} 507 508template <GFXCORE_FAMILY gfxCoreFamily> 509size_t CommandQueueHw<gfxCoreFamily>::estimateFrontEndCmdSize() { 510 using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily; 511 return NEO::PreambleHelper<GfxFamily>::getVFECommandsSize(); 512} 513 514template <GFXCORE_FAMILY gfxCoreFamily> 515size_t CommandQueueHw<gfxCoreFamily>::estimateFrontEndCmdSizeForMultipleCommandLists( 516 bool isFrontEndStateDirty, uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists) { 517 518 auto singleFrontEndCmdSize = estimateFrontEndCmdSize(); 519 bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get(); 520 if (!isPatchingVfeStateAllowed) { 521 return isFrontEndStateDirty * singleFrontEndCmdSize; 522 } 523 524 auto streamPropertiesCopy = csr->getStreamProperties(); 525 size_t estimatedSize = 0; 526 527 for (size_t i = 0; i < numCommandLists; i++) { 528 auto commandList = CommandList::fromHandle(phCommandLists[i]); 529 auto &requiredStreamState = commandList->getRequiredStreamState(); 530 streamPropertiesCopy.frontEndState.setProperties(requiredStreamState.frontEndState); 531 532 if (isFrontEndStateDirty || streamPropertiesCopy.frontEndState.isDirty()) { 533 estimatedSize += singleFrontEndCmdSize; 534 isFrontEndStateDirty = false; 535 } 536 auto &finalStreamState = commandList->getFinalStreamState(); 537 streamPropertiesCopy.frontEndState.setProperties(finalStreamState.frontEndState); 538 } 539 540 return estimatedSize; 541} 542 543template <GFXCORE_FAMILY gfxCoreFamily> 544size_t CommandQueueHw<gfxCoreFamily>::estimatePipelineSelect() { 545 546 using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily; 547 return NEO::PreambleHelper<GfxFamily>::getCmdSizeForPipelineSelect(device->getHwInfo()); 548} 549 550template <GFXCORE_FAMILY gfxCoreFamily> 551void CommandQueueHw<gfxCoreFamily>::programPipelineSelect(NEO::LinearStream &commandStream) { 552 NEO::PipelineSelectArgs args = {0, 0}; 553 using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily; 554 NEO::PreambleHelper<GfxFamily>::programPipelineSelect(&commandStream, args, device->getHwInfo()); 555 gpgpuEnabled = true; 556} 557 558template <GFXCORE_FAMILY gfxCoreFamily> 559void CommandQueueHw<gfxCoreFamily>::dispatchTaskCountWrite(NEO::LinearStream &commandStream, bool flushDataCache) { 560 using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily; 561 using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; 562 using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; 563 564 UNRECOVERABLE_IF(csr == nullptr); 565 566 if (csr->isUpdateTagFromWaitEnabled()) { 567 return; 568 } 569 570 auto taskCountToWrite = csr->peekTaskCount() + 1; 571 auto gpuAddress = static_cast<uint64_t>(csr->getTagAllocation()->getGpuAddress()); 572 573 const auto &hwInfo = this->device->getHwInfo(); 574 if (isCopyOnlyCommandQueue) { 575 NEO::MiFlushArgs args; 576 args.commandWithPostSync = true; 577 args.notifyEnable = csr->isUsedNotifyEnableForPostSync(); 578 NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, gpuAddress, taskCountToWrite, args, hwInfo); 579 } else { 580 NEO::PipeControlArgs args; 581 args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed(true, hwInfo); 582 if (partitionCount > 1) { 583 args.workloadPartitionOffset = true; 584 } 585 args.notifyEnable = csr->isUsedNotifyEnableForPostSync(); 586 NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation( 587 commandStream, 588 POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, 589 gpuAddress, 590 taskCountToWrite, 591 hwInfo, 592 args); 593 } 594} 595 596template <GFXCORE_FAMILY gfxCoreFamily> 597bool CommandQueueHw<gfxCoreFamily>::getPreemptionCmdProgramming() { 598 using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily; 599 return NEO::PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(NEO::PreemptionMode::MidThread, NEO::PreemptionMode::Initial) > 0u; 600} 601 602} // namespace L0 603