1/* 2 * Copyright (C) 2019-2021 Intel Corporation 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 */ 7 8#include "shared/source/aub_mem_dump/aub_mem_dump.h" 9#include "shared/source/command_container/command_encoder.h" 10#include "shared/source/execution_environment/root_device_environment.h" 11#include "shared/source/gmm_helper/gmm.h" 12#include "shared/source/gmm_helper/gmm_helper.h" 13#include "shared/source/helpers/aligned_memory.h" 14#include "shared/source/helpers/basic_math.h" 15#include "shared/source/helpers/constants.h" 16#include "shared/source/helpers/hw_helper.h" 17#include "shared/source/helpers/hw_info.h" 18#include "shared/source/helpers/pipe_control_args.h" 19#include "shared/source/helpers/preamble.h" 20#include "shared/source/helpers/timestamp_packet.h" 21#include "shared/source/memory_manager/allocation_properties.h" 22#include "shared/source/memory_manager/graphics_allocation.h" 23#include "shared/source/os_interface/hw_info_config.h" 24#include "shared/source/os_interface/os_interface.h" 25#include "shared/source/utilities/tag_allocator.h" 26 27namespace NEO { 28 29template <typename Family> 30const AuxTranslationMode HwHelperHw<Family>::defaultAuxTranslationMode = AuxTranslationMode::Builtin; 31 32template <typename Family> 33bool HwHelperHw<Family>::isBufferSizeSuitableForCompression(const size_t size, const HardwareInfo &hwInfo) const { 34 if (DebugManager.flags.OverrideBufferSuitableForRenderCompression.get() != -1) { 35 return !!DebugManager.flags.OverrideBufferSuitableForRenderCompression.get(); 36 } 37 return size > KB; 38} 39 40template <typename Family> 41size_t HwHelperHw<Family>::getMax3dImageWidthOrHeight() const { 42 return 16384; 43} 44 45template <typename Family> 46uint64_t HwHelperHw<Family>::getMaxMemAllocSize() const { 47 //With statefull messages we have an allocation cap of 4GB 48 //Reason to subtract 8KB is that driver may pad the buffer with addition pages for over fetching.. 49 return (4ULL * MemoryConstants::gigaByte) - (8ULL * MemoryConstants::kiloByte); 50} 51 52template <typename Family> 53bool HwHelperHw<Family>::isStatelesToStatefullWithOffsetSupported() const { 54 return true; 55} 56 57template <typename Family> 58bool HwHelperHw<Family>::isL3Configurable(const HardwareInfo &hwInfo) { 59 return PreambleHelper<Family>::isL3Configurable(hwInfo); 60} 61 62template <typename Family> 63SipKernelType HwHelperHw<Family>::getSipKernelType(bool debuggingActive) const { 64 if (!debuggingActive) { 65 return SipKernelType::Csr; 66 } 67 return DebugManager.flags.UseBindlessDebugSip.get() ? SipKernelType::DbgBindless : SipKernelType::DbgCsr; 68} 69 70template <typename Family> 71size_t HwHelperHw<Family>::getMaxBarrierRegisterPerSlice() const { 72 return 32; 73} 74 75template <typename Family> 76size_t HwHelperHw<Family>::getPaddingForISAAllocation() const { 77 return 512; 78} 79 80template <typename Family> 81uint32_t HwHelperHw<Family>::getPitchAlignmentForImage(const HardwareInfo *hwInfo) const { 82 return 4u; 83} 84 85template <typename Family> 86uint32_t HwHelperHw<Family>::getMaxNumSamplers() const { 87 return 16; 88} 89 90template <typename Family> 91const AubMemDump::LrcaHelper &HwHelperHw<Family>::getCsTraits(aub_stream::EngineType engineType) const { 92 return *AUBFamilyMapper<Family>::csTraits[engineType]; 93} 94 95template <typename Family> 96bool HwHelperHw<Family>::isFenceAllocationRequired(const HardwareInfo &hwInfo) const { 97 return false; 98} 99 100template <typename GfxFamily> 101inline bool HwHelperHw<GfxFamily>::checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) { 102 return true; 103} 104 105template <typename Family> 106void HwHelperHw<Family>::setRenderSurfaceStateForBuffer(const RootDeviceEnvironment &rootDeviceEnvironment, 107 void *surfaceStateBuffer, 108 size_t bufferSize, 109 uint64_t gpuVa, 110 size_t offset, 111 uint32_t pitch, 112 GraphicsAllocation *gfxAlloc, 113 bool isReadOnly, 114 uint32_t surfaceType, 115 bool forceNonAuxMode, 116 bool useL1Cache) { 117 using RENDER_SURFACE_STATE = typename Family::RENDER_SURFACE_STATE; 118 using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; 119 using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; 120 121 auto gmmHelper = rootDeviceEnvironment.getGmmHelper(); 122 auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(surfaceStateBuffer); 123 RENDER_SURFACE_STATE state = Family::cmdInitRenderSurfaceState; 124 auto surfaceSize = alignUp(bufferSize, 4); 125 126 SURFACE_STATE_BUFFER_LENGTH Length = {0}; 127 Length.Length = static_cast<uint32_t>(surfaceSize - 1); 128 129 state.setWidth(Length.SurfaceState.Width + 1); 130 state.setHeight(Length.SurfaceState.Height + 1); 131 state.setDepth(Length.SurfaceState.Depth + 1); 132 if (pitch) { 133 state.setSurfacePitch(pitch); 134 } 135 136 // The graphics allocation for Host Ptr surface will be created in makeResident call and GPU address is expected to be the same as CPU address 137 auto bufferStateAddress = (gfxAlloc != nullptr) ? gfxAlloc->getGpuAddress() : gpuVa; 138 bufferStateAddress += offset; 139 140 auto bufferStateSize = (gfxAlloc != nullptr) ? gfxAlloc->getUnderlyingBufferSize() : bufferSize; 141 142 state.setSurfaceType(static_cast<typename RENDER_SURFACE_STATE::SURFACE_TYPE>(surfaceType)); 143 144 state.setSurfaceFormat(SURFACE_FORMAT::SURFACE_FORMAT_RAW); 145 state.setSurfaceVerticalAlignment(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4); 146 state.setSurfaceHorizontalAlignment(RENDER_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4); 147 148 state.setTileMode(RENDER_SURFACE_STATE::TILE_MODE_LINEAR); 149 state.setVerticalLineStride(0); 150 state.setVerticalLineStrideOffset(0); 151 if ((isAligned<MemoryConstants::cacheLineSize>(bufferStateAddress) && isAligned<MemoryConstants::cacheLineSize>(bufferStateSize)) || 152 isReadOnly) { 153 state.setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER)); 154 } else { 155 state.setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED)); 156 } 157 if (DebugManager.flags.OverrideMocsIndexForScratchSpace.get() != -1) { 158 auto mocsIndex = static_cast<uint32_t>(DebugManager.flags.OverrideMocsIndexForScratchSpace.get()) << 1; 159 state.setMemoryObjectControlState(mocsIndex); 160 } 161 162 state.setSurfaceBaseAddress(bufferStateAddress); 163 164 bool isCompressionEnabled = gfxAlloc ? gfxAlloc->isCompressionEnabled() : false; 165 if (isCompressionEnabled && !forceNonAuxMode) { 166 // Its expected to not program pitch/qpitch/baseAddress for Aux surface in CCS scenarios 167 EncodeSurfaceState<Family>::setCoherencyType(&state, RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT); 168 EncodeSurfaceState<Family>::setBufferAuxParamsForCCS(&state); 169 } else { 170 EncodeSurfaceState<Family>::setCoherencyType(&state, RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT); 171 state.setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); 172 } 173 setL1CachePolicy(useL1Cache, &state, rootDeviceEnvironment.getHardwareInfo()); 174 175 *surfaceState = state; 176} 177 178template <typename GfxFamily> 179void NEO::HwHelperHw<GfxFamily>::setL1CachePolicy(bool useL1Cache, typename GfxFamily::RENDER_SURFACE_STATE *surfaceState, const HardwareInfo *hwInfo) {} 180 181template <typename Family> 182bool HwHelperHw<Family>::getEnableLocalMemory(const HardwareInfo &hwInfo) const { 183 if (DebugManager.flags.EnableLocalMemory.get() != -1) { 184 return DebugManager.flags.EnableLocalMemory.get(); 185 } else if (DebugManager.flags.AUBDumpForceAllToLocalMemory.get()) { 186 return true; 187 } 188 189 return OSInterface::osEnableLocalMemory && isLocalMemoryEnabled(hwInfo); 190} 191 192template <typename Family> 193bool HwHelperHw<Family>::is1MbAlignmentSupported(const HardwareInfo &hwInfo, bool isCompressionEnabled) const { 194 return false; 195} 196 197template <typename Family> 198AuxTranslationMode HwHelperHw<Family>::getAuxTranslationMode(const HardwareInfo &hwInfo) { 199 auto mode = HwHelperHw<Family>::defaultAuxTranslationMode; 200 if (DebugManager.flags.ForceAuxTranslationMode.get() != -1) { 201 mode = static_cast<AuxTranslationMode>(DebugManager.flags.ForceAuxTranslationMode.get()); 202 } 203 204 if (mode == AuxTranslationMode::Blit && !hwInfo.capabilityTable.blitterOperationsSupported) { 205 DEBUG_BREAK_IF(true); 206 mode = AuxTranslationMode::Builtin; 207 } 208 209 return mode; 210} 211 212template <typename GfxFamily> 213void MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation( 214 LinearStream &commandStream, 215 POST_SYNC_OPERATION operation, 216 uint64_t gpuAddress, 217 uint64_t immediateData, 218 const HardwareInfo &hwInfo, 219 PipeControlArgs &args) { 220 221 void *commandBuffer = commandStream.getSpace( 222 MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo)); 223 224 MemorySynchronizationCommands<GfxFamily>::setPipeControlAndProgramPostSyncOperation( 225 commandBuffer, 226 operation, 227 gpuAddress, 228 immediateData, 229 hwInfo, 230 args); 231} 232 233template <typename GfxFamily> 234void MemorySynchronizationCommands<GfxFamily>::setPipeControlAndProgramPostSyncOperation( 235 void *&commandsBuffer, 236 POST_SYNC_OPERATION operation, 237 uint64_t gpuAddress, 238 uint64_t immediateData, 239 const HardwareInfo &hwInfo, 240 PipeControlArgs &args) { 241 242 MemorySynchronizationCommands<GfxFamily>::setPipeControlWA(commandsBuffer, gpuAddress, hwInfo); 243 244 setPostSyncExtraProperties(args, hwInfo); 245 MemorySynchronizationCommands<GfxFamily>::setPipeControlWithPostSync(commandsBuffer, operation, gpuAddress, immediateData, args); 246 247 MemorySynchronizationCommands<GfxFamily>::setAdditionalSynchronization(commandsBuffer, gpuAddress, hwInfo); 248} 249 250template <typename GfxFamily> 251void MemorySynchronizationCommands<GfxFamily>::setPipeControlWithPostSync(void *&commandsBuffer, 252 POST_SYNC_OPERATION operation, 253 uint64_t gpuAddress, 254 uint64_t immediateData, 255 PipeControlArgs &args) { 256 PIPE_CONTROL pipeControl = GfxFamily::cmdInitPipeControl; 257 setPipeControl(pipeControl, args); 258 pipeControl.setPostSyncOperation(operation); 259 pipeControl.setAddress(static_cast<uint32_t>(gpuAddress & 0x0000FFFFFFFFULL)); 260 pipeControl.setAddressHigh(static_cast<uint32_t>(gpuAddress >> 32)); 261 if (operation == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { 262 pipeControl.setImmediateData(immediateData); 263 } 264 265 *reinterpret_cast<PIPE_CONTROL *>(commandsBuffer) = pipeControl; 266 commandsBuffer = ptrOffset(commandsBuffer, sizeof(PIPE_CONTROL)); 267} 268 269template <typename GfxFamily> 270void MemorySynchronizationCommands<GfxFamily>::addPipeControlWithPostSync( 271 LinearStream &commandStream, 272 POST_SYNC_OPERATION operation, 273 uint64_t gpuAddress, 274 uint64_t immediateData, 275 PipeControlArgs &args) { 276 void *pipeControl = commandStream.getSpace(sizeof(PIPE_CONTROL)); 277 setPipeControlWithPostSync(pipeControl, operation, gpuAddress, immediateData, args); 278} 279 280template <typename GfxFamily> 281void MemorySynchronizationCommands<GfxFamily>::addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) { 282 size_t requiredSize = MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWA(hwInfo); 283 void *commandBuffer = commandStream.getSpace(requiredSize); 284 setPipeControlWA(commandBuffer, gpuAddress, hwInfo); 285} 286 287template <typename GfxFamily> 288void MemorySynchronizationCommands<GfxFamily>::setPipeControlWA(void *&commandsBuffer, uint64_t gpuAddress, const HardwareInfo &hwInfo) { 289 if (MemorySynchronizationCommands<GfxFamily>::isPipeControlWArequired(hwInfo)) { 290 PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; 291 MemorySynchronizationCommands<GfxFamily>::setPipeControlWAFlags(cmd); 292 *reinterpret_cast<PIPE_CONTROL *>(commandsBuffer) = cmd; 293 commandsBuffer = ptrOffset(commandsBuffer, sizeof(PIPE_CONTROL)); 294 295 MemorySynchronizationCommands<GfxFamily>::setAdditionalSynchronization(commandsBuffer, gpuAddress, hwInfo); 296 } 297} 298 299template <typename GfxFamily> 300void MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) { 301 size_t requiredSize = MemorySynchronizationCommands<GfxFamily>::getSizeForSingleAdditionalSynchronization(hwInfo); 302 void *commandBuffer = commandStream.getSpace(requiredSize); 303 setAdditionalSynchronization(commandBuffer, gpuAddress, hwInfo); 304} 305 306template <typename GfxFamily> 307void MemorySynchronizationCommands<GfxFamily>::setPipeControl(typename GfxFamily::PIPE_CONTROL &pipeControl, PipeControlArgs &args) { 308 pipeControl.setCommandStreamerStallEnable(true); 309 pipeControl.setConstantCacheInvalidationEnable(args.constantCacheInvalidationEnable); 310 pipeControl.setInstructionCacheInvalidateEnable(args.instructionCacheInvalidateEnable); 311 pipeControl.setPipeControlFlushEnable(args.pipeControlFlushEnable); 312 pipeControl.setRenderTargetCacheFlushEnable(args.renderTargetCacheFlushEnable); 313 pipeControl.setStateCacheInvalidationEnable(args.stateCacheInvalidationEnable); 314 pipeControl.setTextureCacheInvalidationEnable(args.textureCacheInvalidationEnable); 315 pipeControl.setVfCacheInvalidationEnable(args.vfCacheInvalidationEnable); 316 pipeControl.setGenericMediaStateClear(args.genericMediaStateClear); 317 pipeControl.setTlbInvalidate(args.tlbInvalidation); 318 pipeControl.setNotifyEnable(args.notifyEnable); 319 pipeControl.setDcFlushEnable(args.dcFlushEnable); 320 321 setPipeControlExtraProperties(pipeControl, args); 322 323 if (DebugManager.flags.FlushAllCaches.get()) { 324 pipeControl.setDcFlushEnable(true); 325 pipeControl.setRenderTargetCacheFlushEnable(true); 326 pipeControl.setInstructionCacheInvalidateEnable(true); 327 pipeControl.setTextureCacheInvalidationEnable(true); 328 pipeControl.setPipeControlFlushEnable(true); 329 pipeControl.setVfCacheInvalidationEnable(true); 330 pipeControl.setConstantCacheInvalidationEnable(true); 331 pipeControl.setStateCacheInvalidationEnable(true); 332 pipeControl.setTlbInvalidate(true); 333 } 334 if (DebugManager.flags.DoNotFlushCaches.get()) { 335 pipeControl.setDcFlushEnable(false); 336 pipeControl.setRenderTargetCacheFlushEnable(false); 337 pipeControl.setInstructionCacheInvalidateEnable(false); 338 pipeControl.setTextureCacheInvalidationEnable(false); 339 pipeControl.setPipeControlFlushEnable(false); 340 pipeControl.setVfCacheInvalidationEnable(false); 341 pipeControl.setConstantCacheInvalidationEnable(false); 342 pipeControl.setStateCacheInvalidationEnable(false); 343 } 344} 345 346template <typename GfxFamily> 347bool MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed(bool isFlushPreferred, const HardwareInfo &hwInfo) { 348 if (isFlushPreferred) { 349 const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); 350 return hwInfoConfig.isDcFlushAllowed(); 351 } 352 return false; 353} 354 355template <typename GfxFamily> 356void MemorySynchronizationCommands<GfxFamily>::addPipeControl(LinearStream &commandStream, PipeControlArgs &args) { 357 using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; 358 PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; 359 MemorySynchronizationCommands<GfxFamily>::setPipeControl(cmd, args); 360 auto pipeControl = commandStream.getSpaceForCmd<PIPE_CONTROL>(); 361 *pipeControl = cmd; 362} 363 364template <typename GfxFamily> 365void MemorySynchronizationCommands<GfxFamily>::addPipeControlWithCSStallOnly(LinearStream &commandStream) { 366 using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; 367 PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; 368 cmd.setCommandStreamerStallEnable(true); 369 auto pipeControl = commandStream.getSpaceForCmd<PIPE_CONTROL>(); 370 *pipeControl = cmd; 371} 372 373template <typename GfxFamily> 374size_t MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl() { 375 return sizeof(typename GfxFamily::PIPE_CONTROL); 376} 377 378template <typename GfxFamily> 379size_t MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(const HardwareInfo &hwInfo) { 380 size_t size = getSizeForSinglePipeControl() + 381 getSizeForPipeControlWA(hwInfo) + 382 getSizeForSingleAdditionalSynchronization(hwInfo); 383 return size; 384} 385 386template <typename GfxFamily> 387size_t MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWA(const HardwareInfo &hwInfo) { 388 size_t size = 0; 389 if (MemorySynchronizationCommands<GfxFamily>::isPipeControlWArequired(hwInfo)) { 390 size = getSizeForSinglePipeControl() + 391 getSizeForSingleAdditionalSynchronization(hwInfo); 392 } 393 return size; 394} 395 396template <typename GfxFamily> 397void MemorySynchronizationCommands<GfxFamily>::setAdditionalSynchronization(void *&commandsBuffer, uint64_t gpuAddress, const HardwareInfo &hwInfo) { 398} 399 400template <typename GfxFamily> 401inline size_t MemorySynchronizationCommands<GfxFamily>::getSizeForSingleAdditionalSynchronization(const HardwareInfo &hwInfo) { 402 return 0u; 403} 404 405template <typename GfxFamily> 406inline size_t MemorySynchronizationCommands<GfxFamily>::getSizeForAdditonalSynchronization(const HardwareInfo &hwInfo) { 407 return 0u; 408} 409 410template <typename GfxFamily> 411uint32_t HwHelperHw<GfxFamily>::getMetricsLibraryGenId() const { 412 return static_cast<uint32_t>(MetricsLibraryApi::ClientGen::Gen9); 413} 414 415template <typename GfxFamily> 416bool HwHelperHw<GfxFamily>::tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) { 417 if (DebugManager.flags.ForceLinearImages.get() || forceLinearStorage || isSharedContext) { 418 return false; 419 } 420 return !isImage1d; 421} 422 423template <typename GfxFamily> 424uint32_t HwHelperHw<GfxFamily>::alignSlmSize(uint32_t slmSize) { 425 if (slmSize == 0u) { 426 return 0u; 427 } 428 slmSize = std::max(slmSize, 1024u); 429 slmSize = Math::nextPowerOfTwo(slmSize); 430 UNRECOVERABLE_IF(slmSize > 64u * KB); 431 return slmSize; 432} 433 434template <typename GfxFamily> 435uint32_t HwHelperHw<GfxFamily>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) { 436 auto value = std::max(slmSize, 1024u); 437 value = Math::nextPowerOfTwo(value); 438 value = Math::getMinLsbSet(value); 439 value = value - 9; 440 DEBUG_BREAK_IF(value > 7); 441 return value * !!slmSize; 442} 443 444template <typename GfxFamily> 445uint32_t HwHelperHw<GfxFamily>::getBarriersCountFromHasBarriers(uint32_t hasBarriers) { 446 return hasBarriers; 447} 448 449template <typename GfxFamily> 450inline bool HwHelperHw<GfxFamily>::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const { 451 return false; 452} 453 454template <typename GfxFamily> 455bool HwHelperHw<GfxFamily>::isWorkaroundRequired(uint32_t lowestSteppingWithBug, uint32_t steppingWithFix, const HardwareInfo &hwInfo) const { 456 const auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); 457 auto lowestHwRevIdWithBug = hwInfoConfig->getHwRevIdFromStepping(lowestSteppingWithBug, hwInfo); 458 auto hwRevIdWithFix = hwInfoConfig->getHwRevIdFromStepping(steppingWithFix, hwInfo); 459 if ((lowestHwRevIdWithBug == CommonConstants::invalidStepping) || (hwRevIdWithFix == CommonConstants::invalidStepping)) { 460 return false; 461 } 462 return (lowestHwRevIdWithBug <= hwInfo.platform.usRevId && hwInfo.platform.usRevId < hwRevIdWithFix); 463} 464 465template <typename GfxFamily> 466bool HwHelperHw<GfxFamily>::isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo) { 467 return false; 468} 469 470template <typename GfxFamily> 471bool HwHelperHw<GfxFamily>::isWaDisableRccRhwoOptimizationRequired() const { 472 return false; 473} 474 475template <typename GfxFamily> 476inline uint32_t HwHelperHw<GfxFamily>::getMinimalSIMDSize() { 477 return 8u; 478} 479 480template <typename GfxFamily> 481inline bool HwHelperHw<GfxFamily>::isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo, bool isSimulation) const { 482 return false; 483} 484 485template <typename GfxFamily> 486inline bool HwHelperHw<GfxFamily>::isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const { 487 return allocation.isAllocatedInLocalMemoryPool() && 488 (HwInfoConfig::get(hwInfo.platform.eProductFamily)->getLocalMemoryAccessMode(hwInfo) == LocalMemoryAccessMode::CpuAccessDisallowed || 489 !allocation.isAllocationLockable()); 490} 491 492template <typename GfxFamily> 493std::unique_ptr<TagAllocatorBase> HwHelperHw<GfxFamily>::createTimestampPacketAllocator(const std::vector<uint32_t> &rootDeviceIndices, MemoryManager *memoryManager, 494 size_t initialTagCount, CommandStreamReceiverType csrType, DeviceBitfield deviceBitfield) const { 495 bool doNotReleaseNodes = (csrType > CommandStreamReceiverType::CSR_HW) || 496 DebugManager.flags.DisableTimestampPacketOptimizations.get(); 497 498 auto tagAlignment = getTimestampPacketAllocatorAlignment(); 499 500 if (DebugManager.flags.OverrideTimestampPacketSize.get() != -1) { 501 if (DebugManager.flags.OverrideTimestampPacketSize.get() == 4) { 502 using TimestampPackets32T = TimestampPackets<uint32_t>; 503 return std::make_unique<TagAllocator<TimestampPackets32T>>(rootDeviceIndices, memoryManager, initialTagCount, tagAlignment, sizeof(TimestampPackets32T), doNotReleaseNodes, deviceBitfield); 504 } else if (DebugManager.flags.OverrideTimestampPacketSize.get() == 8) { 505 using TimestampPackets64T = TimestampPackets<uint64_t>; 506 return std::make_unique<TagAllocator<TimestampPackets64T>>(rootDeviceIndices, memoryManager, initialTagCount, tagAlignment, sizeof(TimestampPackets64T), doNotReleaseNodes, deviceBitfield); 507 } else { 508 UNRECOVERABLE_IF(true); 509 } 510 } 511 512 using TimestampPacketType = typename GfxFamily::TimestampPacketType; 513 using TimestampPacketsT = TimestampPackets<TimestampPacketType>; 514 515 return std::make_unique<TagAllocator<TimestampPacketsT>>(rootDeviceIndices, memoryManager, initialTagCount, tagAlignment, sizeof(TimestampPacketsT), doNotReleaseNodes, deviceBitfield); 516} 517 518template <typename GfxFamily> 519size_t HwHelperHw<GfxFamily>::getTimestampPacketAllocatorAlignment() const { 520 return MemoryConstants::cacheLineSize * 4; 521} 522 523template <typename GfxFamily> 524size_t HwHelperHw<GfxFamily>::getSingleTimestampPacketSize() const { 525 return HwHelperHw<GfxFamily>::getSingleTimestampPacketSizeHw(); 526} 527 528template <typename GfxFamily> 529size_t HwHelperHw<GfxFamily>::getSingleTimestampPacketSizeHw() { 530 if (DebugManager.flags.OverrideTimestampPacketSize.get() != -1) { 531 if (DebugManager.flags.OverrideTimestampPacketSize.get() == 4) { 532 return TimestampPackets<uint32_t>::getSinglePacketSize(); 533 } else if (DebugManager.flags.OverrideTimestampPacketSize.get() == 8) { 534 return TimestampPackets<uint64_t>::getSinglePacketSize(); 535 } else { 536 UNRECOVERABLE_IF(true); 537 } 538 } 539 540 return TimestampPackets<typename GfxFamily::TimestampPacketType>::getSinglePacketSize(); 541} 542 543template <typename GfxFamily> 544size_t MemorySynchronizationCommands<GfxFamily>::getSizeForFullCacheFlush() { 545 return sizeof(typename GfxFamily::PIPE_CONTROL); 546} 547 548template <typename GfxFamily> 549void MemorySynchronizationCommands<GfxFamily>::addFullCacheFlush(LinearStream &commandStream, const HardwareInfo &hwInfo) { 550 using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; 551 552 PIPE_CONTROL *pipeControl = commandStream.getSpaceForCmd<PIPE_CONTROL>(); 553 PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; 554 555 PipeControlArgs args; 556 args.dcFlushEnable = MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed(true, hwInfo); 557 args.renderTargetCacheFlushEnable = true; 558 args.instructionCacheInvalidateEnable = true; 559 args.textureCacheInvalidationEnable = true; 560 args.pipeControlFlushEnable = true; 561 args.constantCacheInvalidationEnable = true; 562 args.stateCacheInvalidationEnable = true; 563 args.tlbInvalidation = true; 564 MemorySynchronizationCommands<GfxFamily>::setCacheFlushExtraProperties(args); 565 MemorySynchronizationCommands<GfxFamily>::setPipeControl(cmd, args); 566 *pipeControl = cmd; 567} 568 569template <typename GfxFamily> 570const StackVec<size_t, 3> HwHelperHw<GfxFamily>::getDeviceSubGroupSizes() const { 571 return {8, 16, 32}; 572} 573 574template <typename GfxFamily> 575const StackVec<uint32_t, 6> HwHelperHw<GfxFamily>::getThreadsPerEUConfigs() const { 576 return {}; 577} 578 579template <typename GfxFamily> 580void HwHelperHw<GfxFamily>::setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const HardwareInfo &hwInfo) const {} 581 582template <typename GfxFamily> 583bool HwHelperHw<GfxFamily>::isBankOverrideRequired(const HardwareInfo &hwInfo) const { 584 return false; 585} 586 587template <typename GfxFamily> 588uint32_t HwHelperHw<GfxFamily>::getDefaultThreadArbitrationPolicy() const { 589 return 0; 590} 591 592template <typename GfxFamily> 593bool HwHelperHw<GfxFamily>::useOnlyGlobalTimestamps() const { 594 return false; 595} 596 597template <typename GfxFamily> 598bool HwHelperHw<GfxFamily>::useSystemMemoryPlacementForISA(const HardwareInfo &hwInfo) const { 599 return !getEnableLocalMemory(hwInfo); 600} 601 602template <typename GfxFamily> 603bool HwHelperHw<GfxFamily>::isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const { 604 return false; 605} 606 607template <typename GfxFamily> 608bool MemorySynchronizationCommands<GfxFamily>::isPipeControlPriorToPipelineSelectWArequired(const HardwareInfo &hwInfo) { 609 return false; 610} 611 612template <typename GfxFamily> 613bool HwHelperHw<GfxFamily>::isRcsAvailable(const HardwareInfo &hwInfo) const { 614 return true; 615} 616 617template <typename GfxFamily> 618bool HwHelperHw<GfxFamily>::isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const { 619 return true; 620} 621 622template <typename GfxFamily> 623uint32_t HwHelperHw<GfxFamily>::adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType, 624 const HardwareInfo &hwInfo, bool isEngineInstanced) const { 625 return maxWorkGroupCount; 626} 627 628template <typename GfxFamily> 629bool HwHelperHw<GfxFamily>::isKmdMigrationSupported(const HardwareInfo &hwInfo) const { 630 return false; 631} 632 633template <typename GfxFamily> 634bool HwHelperHw<GfxFamily>::isCooperativeEngineSupported(const HardwareInfo &hwInfo) const { 635 return false; 636} 637 638template <typename GfxFamily> 639bool HwHelperHw<GfxFamily>::isCopyOnlyEngineType(EngineGroupType type) const { 640 return NEO::EngineGroupType::Copy == type; 641} 642 643template <typename GfxFamily> 644bool HwHelperHw<GfxFamily>::isSipWANeeded(const HardwareInfo &hwInfo) const { 645 return false; 646} 647 648template <typename GfxFamily> 649bool HwHelperHw<GfxFamily>::isAdditionalFeatureFlagRequired(const FeatureTable *featureTable) const { 650 return false; 651} 652 653template <typename GfxFamily> 654uint32_t HwHelperHw<GfxFamily>::getDefaultRevisionId(const HardwareInfo &hwInfo) const { 655 return 0u; 656} 657 658template <typename GfxFamily> 659uint32_t HwHelperHw<GfxFamily>::getNumCacheRegions() const { 660 return 0; 661} 662 663template <typename GfxFamily> 664bool HwHelperHw<GfxFamily>::isSubDeviceEngineSupported(const HardwareInfo &hwInfo, const DeviceBitfield &deviceBitfield, aub_stream::EngineType engineType) const { 665 return true; 666} 667 668template <typename GfxFamily> 669size_t HwHelperHw<GfxFamily>::getPreemptionAllocationAlignment() const { 670 return 256 * MemoryConstants::kiloByte; 671} 672 673template <typename GfxFamily> 674void HwHelperHw<GfxFamily>::applyAdditionalCompressionSettings(Gmm &gmm, bool isNotCompressed) const {} 675 676template <typename GfxFamily> 677void HwHelperHw<GfxFamily>::applyRenderCompressionFlag(Gmm &gmm, uint32_t isCompressed) const { 678 gmm.resourceParams.Flags.Info.RenderCompressed = isCompressed; 679} 680 681template <typename GfxFamily> 682bool HwHelperHw<GfxFamily>::isEngineTypeRemappingToHwSpecificRequired() const { 683 return false; 684} 685 686template <typename GfxFamily> 687bool HwHelperHw<GfxFamily>::isSipKernelAsHexadecimalArrayPreferred() const { 688 return false; 689} 690 691template <typename GfxFamily> 692void HwHelperHw<GfxFamily>::setSipKernelData(uint32_t *&sipKernelBinary, size_t &kernelBinarySize) const { 693} 694 695template <typename GfxFamily> 696void HwHelperHw<GfxFamily>::adjustPreemptionSurfaceSize(size_t &csrSize) const { 697} 698 699template <typename GfxFamily> 700void HwHelperHw<GfxFamily>::encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) { 701 EncodeSurfaceState<GfxFamily>::encodeBuffer(args); 702} 703 704template <typename GfxFamily> 705bool HwHelperHw<GfxFamily>::disableL3CacheForDebug() const { 706 return false; 707} 708 709template <typename GfxFamily> 710bool HwHelperHw<GfxFamily>::isRevisionSpecificBinaryBuiltinRequired() const { 711 return false; 712} 713} // namespace NEO 714