1 /* 2 * Copyright (C) 2018-2021 Intel Corporation 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 */ 7 8 #pragma once 9 #include "shared/source/aub_mem_dump/aub_mem_dump.h" 10 #include "shared/source/built_ins/sip.h" 11 #include "shared/source/command_container/command_encoder.h" 12 #include "shared/source/command_stream/linear_stream.h" 13 #include "shared/source/commands/bxml_generator_glue.h" 14 #include "shared/source/helpers/aux_translation.h" 15 #include "shared/source/helpers/definitions/engine_group_types.h" 16 #include "shared/source/helpers/engine_node_helper.h" 17 #include "shared/source/helpers/options.h" 18 #include "shared/source/utilities/stackvec.h" 19 20 #include "hw_cmds.h" 21 #include "third_party/aub_stream/headers/aubstream.h" 22 23 #include <cstdint> 24 #include <string> 25 #include <type_traits> 26 27 namespace NEO { 28 class GmmHelper; 29 class GraphicsAllocation; 30 class TagAllocatorBase; 31 class Gmm; 32 struct AllocationData; 33 struct AllocationProperties; 34 struct EncodeSurfaceStateArgs; 35 struct EngineControl; 36 struct RootDeviceEnvironment; 37 struct PipeControlArgs; 38 39 class HwHelper { 40 public: 41 static HwHelper &get(GFXCORE_FAMILY gfxCore); 42 virtual uint32_t getBindingTableStateSurfaceStatePointer(const void *pBindingTable, uint32_t index) = 0; 43 virtual size_t getBindingTableStateSize() const = 0; 44 virtual uint32_t getBindingTableStateAlignement() const = 0; 45 virtual size_t getInterfaceDescriptorDataSize() const = 0; 46 virtual size_t getMaxBarrierRegisterPerSlice() const = 0; 47 virtual size_t getPaddingForISAAllocation() const = 0; 48 virtual uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const = 0; 49 virtual uint32_t getPitchAlignmentForImage(const HardwareInfo *hwInfo) const = 0; 50 virtual uint32_t getMaxNumSamplers() const = 0; 51 virtual void adjustDefaultEngineType(HardwareInfo *pHwInfo) = 0; 52 virtual bool isL3Configurable(const HardwareInfo &hwInfo) = 0; 53 virtual SipKernelType getSipKernelType(bool debuggingActive) const = 0; 54 virtual bool isLocalMemoryEnabled(const HardwareInfo &hwInfo) const = 0; 55 virtual bool is1MbAlignmentSupported(const HardwareInfo &hwInfo, bool isCompressionEnabled) const = 0; 56 virtual bool isFenceAllocationRequired(const HardwareInfo &hwInfo) const = 0; 57 virtual const AubMemDump::LrcaHelper &getCsTraits(aub_stream::EngineType engineType) const = 0; 58 virtual bool hvAlign4Required() const = 0; 59 virtual bool preferSmallWorkgroupSizeForKernel(const size_t size, const HardwareInfo &hwInfo) const = 0; 60 virtual bool isBufferSizeSuitableForCompression(const size_t size, const HardwareInfo &hwInfo) const = 0; 61 virtual bool checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) = 0; 62 virtual bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const = 0; 63 static bool compressedBuffersSupported(const HardwareInfo &hwInfo); 64 static bool compressedImagesSupported(const HardwareInfo &hwInfo); 65 static bool cacheFlushAfterWalkerSupported(const HardwareInfo &hwInfo); 66 virtual bool timestampPacketWriteSupported() const = 0; 67 virtual bool isTimestampWaitSupported() const = 0; 68 virtual size_t getRenderSurfaceStateSize() const = 0; 69 virtual void setRenderSurfaceStateForBuffer(const RootDeviceEnvironment &rootDeviceEnvironment, 70 void *surfaceStateBuffer, 71 size_t bufferSize, 72 uint64_t gpuVa, 73 size_t offset, 74 uint32_t pitch, 75 GraphicsAllocation *gfxAlloc, 76 bool isReadOnly, 77 uint32_t surfaceType, 78 bool forceNonAuxMode, 79 bool useL1Cache) = 0; 80 virtual const EngineInstancesContainer getGpgpuEngineInstances(const HardwareInfo &hwInfo) const = 0; 81 virtual EngineGroupType getEngineGroupType(aub_stream::EngineType engineType, EngineUsage engineUsage, const HardwareInfo &hwInfo) const = 0; 82 virtual const StackVec<size_t, 3> getDeviceSubGroupSizes() const = 0; 83 virtual const StackVec<uint32_t, 6> getThreadsPerEUConfigs() const = 0; 84 virtual bool getEnableLocalMemory(const HardwareInfo &hwInfo) const = 0; 85 virtual std::string getExtensions() const = 0; 86 static uint32_t getMaxThreadsForVfe(const HardwareInfo &hwInfo); 87 virtual uint32_t getMetricsLibraryGenId() const = 0; 88 virtual uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0; 89 virtual bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) = 0; 90 virtual uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) = 0; 91 virtual uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount, 92 uint32_t threadsPerEu) = 0; 93 virtual uint32_t alignSlmSize(uint32_t slmSize) = 0; 94 virtual uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) = 0; 95 96 virtual bool isWaDisableRccRhwoOptimizationRequired() const = 0; 97 virtual bool isAdditionalFeatureFlagRequired(const FeatureTable *featureTable) const = 0; 98 virtual uint32_t getMinimalSIMDSize() = 0; 99 virtual bool isWorkaroundRequired(uint32_t lowestSteppingWithBug, uint32_t steppingWithFix, const HardwareInfo &hwInfo) const = 0; 100 virtual bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const = 0; 101 virtual bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const = 0; 102 virtual uint64_t getGpuTimeStampInNS(uint64_t timeStamp, double frequency) const = 0; 103 virtual uint32_t getBindlessSurfaceExtendedMessageDescriptorValue(uint32_t surfStateOffset) const = 0; 104 virtual void setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const HardwareInfo &hwInfo) const = 0; 105 virtual bool isBankOverrideRequired(const HardwareInfo &hwInfo) const = 0; 106 virtual bool isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo, bool isSimulation) const = 0; 107 virtual uint32_t getGlobalTimeStampBits() const = 0; 108 virtual uint32_t getDefaultThreadArbitrationPolicy() const = 0; 109 virtual bool useOnlyGlobalTimestamps() const = 0; 110 virtual bool useSystemMemoryPlacementForISA(const HardwareInfo &hwInfo) const = 0; 111 virtual bool packedFormatsSupported() const = 0; 112 virtual bool isAssignEngineRoundRobinSupported() const = 0; 113 virtual bool isRcsAvailable(const HardwareInfo &hwInfo) const = 0; 114 virtual bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const = 0; 115 virtual uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType, 116 const HardwareInfo &hwInfo, bool isEngineInstanced) const = 0; 117 virtual size_t getMaxFillPaternSizeForCopyEngine() const = 0; 118 virtual bool isCopyOnlyEngineType(EngineGroupType type) const = 0; 119 virtual bool isSipWANeeded(const HardwareInfo &hwInfo) const = 0; 120 virtual bool isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const = 0; 121 virtual bool isKmdMigrationSupported(const HardwareInfo &hwInfo) const = 0; 122 virtual bool isCooperativeEngineSupported(const HardwareInfo &hwInfo) const = 0; 123 virtual aub_stream::MMIOList getExtraMmioList(const HardwareInfo &hwInfo, const GmmHelper &gmmHelper) const = 0; 124 virtual uint32_t getDefaultRevisionId(const HardwareInfo &hwInfo) const = 0; 125 virtual uint32_t getNumCacheRegions() const = 0; 126 virtual bool isSubDeviceEngineSupported(const HardwareInfo &hwInfo, const DeviceBitfield &deviceBitfield, aub_stream::EngineType engineType) const = 0; 127 virtual uint32_t getPlanarYuvMaxHeight() const = 0; 128 virtual size_t getPreemptionAllocationAlignment() const = 0; 129 virtual std::unique_ptr<TagAllocatorBase> createTimestampPacketAllocator(const std::vector<uint32_t> &rootDeviceIndices, MemoryManager *memoryManager, 130 size_t initialTagCount, CommandStreamReceiverType csrType, 131 DeviceBitfield deviceBitfield) const = 0; 132 virtual size_t getTimestampPacketAllocatorAlignment() const = 0; 133 virtual size_t getSingleTimestampPacketSize() const = 0; 134 virtual void applyAdditionalCompressionSettings(Gmm &gmm, bool isNotCompressed) const = 0; 135 virtual void applyRenderCompressionFlag(Gmm &gmm, uint32_t isCompressed) const = 0; 136 virtual bool unTypedDataPortCacheFlushRequired() const = 0; 137 virtual bool isEngineTypeRemappingToHwSpecificRequired() const = 0; 138 139 static uint32_t getSubDevicesCount(const HardwareInfo *pHwInfo); 140 static uint32_t getCopyEnginesCount(const HardwareInfo &hwInfo); 141 142 virtual bool isSipKernelAsHexadecimalArrayPreferred() const = 0; 143 virtual void setSipKernelData(uint32_t *&sipKernelBinary, size_t &kernelBinarySize) const = 0; 144 virtual void adjustPreemptionSurfaceSize(size_t &csrSize) const = 0; 145 virtual size_t getSamplerStateSize() const = 0; 146 147 virtual bool isScratchSpaceSurfaceStateAccessible() const = 0; 148 virtual uint64_t getRenderSurfaceStateBaseAddress(void *renderSurfaceState) const = 0; 149 virtual uint32_t getRenderSurfaceStatePitch(void *renderSurfaceState) const = 0; 150 virtual size_t getMax3dImageWidthOrHeight() const = 0; 151 virtual uint64_t getMaxMemAllocSize() const = 0; 152 virtual bool isStatelesToStatefullWithOffsetSupported() const = 0; 153 virtual void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) = 0; 154 virtual bool disableL3CacheForDebug() const = 0; 155 virtual bool isRevisionSpecificBinaryBuiltinRequired() const = 0; 156 157 protected: 158 HwHelper() = default; 159 }; 160 161 template <typename GfxFamily> 162 class HwHelperHw : public HwHelper { 163 public: get()164 static HwHelperHw<GfxFamily> &get() { 165 static HwHelperHw<GfxFamily> hwHelper; 166 return hwHelper; 167 } 168 getBindingTableStateSurfaceStatePointer(const void * pBindingTable,uint32_t index)169 uint32_t getBindingTableStateSurfaceStatePointer(const void *pBindingTable, uint32_t index) override { 170 using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; 171 172 const BINDING_TABLE_STATE *bindingTableState = static_cast<const BINDING_TABLE_STATE *>(pBindingTable); 173 return bindingTableState[index].getRawData(0); 174 } 175 getBindingTableStateSize()176 size_t getBindingTableStateSize() const override { 177 using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; 178 return sizeof(BINDING_TABLE_STATE); 179 } 180 getBindingTableStateAlignement()181 uint32_t getBindingTableStateAlignement() const override { 182 using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; 183 return BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE; 184 } 185 getInterfaceDescriptorDataSize()186 size_t getInterfaceDescriptorDataSize() const override { 187 using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; 188 return sizeof(INTERFACE_DESCRIPTOR_DATA); 189 } 190 getRenderSurfaceStateSize()191 size_t getRenderSurfaceStateSize() const override { 192 using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; 193 return sizeof(RENDER_SURFACE_STATE); 194 } 195 getSamplerStateSize()196 size_t getSamplerStateSize() const override { 197 using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; 198 return sizeof(SAMPLER_STATE); 199 } 200 getBindlessSurfaceExtendedMessageDescriptorValue(uint32_t surfStateOffset)201 uint32_t getBindlessSurfaceExtendedMessageDescriptorValue(uint32_t surfStateOffset) const override { 202 using DataPortBindlessSurfaceExtendedMessageDescriptor = typename GfxFamily::DataPortBindlessSurfaceExtendedMessageDescriptor; 203 DataPortBindlessSurfaceExtendedMessageDescriptor messageExtDescriptor = {}; 204 messageExtDescriptor.setBindlessSurfaceOffset(surfStateOffset); 205 return messageExtDescriptor.getBindlessSurfaceOffsetToPatch(); 206 } 207 getRenderSurfaceStateBaseAddress(void * renderSurfaceState)208 uint64_t getRenderSurfaceStateBaseAddress(void *renderSurfaceState) const override { 209 return reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(renderSurfaceState)->getSurfaceBaseAddress(); 210 } 211 getRenderSurfaceStatePitch(void * renderSurfaceState)212 uint32_t getRenderSurfaceStatePitch(void *renderSurfaceState) const override { 213 return reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(renderSurfaceState)->getSurfacePitch(); 214 } 215 216 const AubMemDump::LrcaHelper &getCsTraits(aub_stream::EngineType engineType) const override; 217 218 size_t getMaxBarrierRegisterPerSlice() const override; 219 220 size_t getPaddingForISAAllocation() const override; 221 222 uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const override; 223 224 uint32_t getPitchAlignmentForImage(const HardwareInfo *hwInfo) const override; 225 226 uint32_t getMaxNumSamplers() const override; 227 228 void adjustDefaultEngineType(HardwareInfo *pHwInfo) override; 229 230 bool isL3Configurable(const HardwareInfo &hwInfo) override; 231 232 SipKernelType getSipKernelType(bool debuggingActive) const override; 233 234 bool isLocalMemoryEnabled(const HardwareInfo &hwInfo) const override; 235 236 bool hvAlign4Required() const override; 237 238 bool isBufferSizeSuitableForCompression(const size_t size, const HardwareInfo &hwInfo) const override; 239 240 bool checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) override; 241 242 bool timestampPacketWriteSupported() const override; 243 244 bool isTimestampWaitSupported() const override; 245 246 bool is1MbAlignmentSupported(const HardwareInfo &hwInfo, bool isCompressionEnabled) const override; 247 248 bool isFenceAllocationRequired(const HardwareInfo &hwInfo) const override; 249 250 void setRenderSurfaceStateForBuffer(const RootDeviceEnvironment &rootDeviceEnvironment, 251 void *surfaceStateBuffer, 252 size_t bufferSize, 253 uint64_t gpuVa, 254 size_t offset, 255 uint32_t pitch, 256 GraphicsAllocation *gfxAlloc, 257 bool isReadOnly, 258 uint32_t surfaceType, 259 bool forceNonAuxMode, 260 bool useL1Cache) override; 261 262 MOCKABLE_VIRTUAL void setL1CachePolicy(bool useL1Cache, typename GfxFamily::RENDER_SURFACE_STATE *surfaceState, const HardwareInfo *hwInfo); 263 264 const EngineInstancesContainer getGpgpuEngineInstances(const HardwareInfo &hwInfo) const override; 265 266 EngineGroupType getEngineGroupType(aub_stream::EngineType engineType, EngineUsage engineUsage, const HardwareInfo &hwInfo) const override; 267 268 const StackVec<size_t, 3> getDeviceSubGroupSizes() const override; 269 270 const StackVec<uint32_t, 6> getThreadsPerEUConfigs() const override; 271 272 bool getEnableLocalMemory(const HardwareInfo &hwInfo) const override; 273 274 std::string getExtensions() const override; 275 276 uint32_t getMetricsLibraryGenId() const override; 277 278 uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const override; 279 280 bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) override; 281 282 uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) override; 283 284 uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount, uint32_t threadsPerEu) override; 285 286 uint32_t alignSlmSize(uint32_t slmSize) override; 287 288 uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) override; 289 290 static AuxTranslationMode getAuxTranslationMode(const HardwareInfo &hwInfo); 291 292 bool isWorkaroundRequired(uint32_t lowestSteppingWithBug, uint32_t steppingWithFix, const HardwareInfo &hwInfo) const override; 293 294 bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const override; 295 296 bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const override; 297 298 static bool isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo); 299 300 bool isWaDisableRccRhwoOptimizationRequired() const override; 301 302 bool isAdditionalFeatureFlagRequired(const FeatureTable *featureTable) const override; 303 304 uint32_t getMinimalSIMDSize() override; 305 306 uint64_t getGpuTimeStampInNS(uint64_t timeStamp, double frequency) const override; 307 308 bool isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo, bool isSimulation) const override; 309 310 uint32_t getGlobalTimeStampBits() const override; 311 312 void setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const HardwareInfo &hwInfo) const override; 313 314 bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const override; 315 316 bool isBankOverrideRequired(const HardwareInfo &hwInfo) const override; 317 318 uint32_t getDefaultThreadArbitrationPolicy() const override; 319 320 bool useOnlyGlobalTimestamps() const override; 321 322 bool useSystemMemoryPlacementForISA(const HardwareInfo &hwInfo) const override; 323 324 bool packedFormatsSupported() const override; 325 326 bool isRcsAvailable(const HardwareInfo &hwInfo) const override; 327 328 bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const override; 329 330 uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType, 331 const HardwareInfo &hwInfo, bool isEngineInstanced) const override; 332 333 size_t getMaxFillPaternSizeForCopyEngine() const override; 334 335 bool isKmdMigrationSupported(const HardwareInfo &hwInfo) const override; 336 337 bool isCooperativeEngineSupported(const HardwareInfo &hwInfo) const override; 338 339 bool isCopyOnlyEngineType(EngineGroupType type) const override; 340 341 bool isSipWANeeded(const HardwareInfo &hwInfo) const override; 342 343 bool isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const override; 344 345 aub_stream::MMIOList getExtraMmioList(const HardwareInfo &hwInfo, const GmmHelper &gmmHelper) const override; 346 347 uint32_t getDefaultRevisionId(const HardwareInfo &hwInfo) const override; 348 349 uint32_t getNumCacheRegions() const override; 350 351 bool isSubDeviceEngineSupported(const HardwareInfo &hwInfo, const DeviceBitfield &deviceBitfield, aub_stream::EngineType engineType) const override; 352 353 uint32_t getPlanarYuvMaxHeight() const override; 354 355 size_t getPreemptionAllocationAlignment() const override; 356 357 std::unique_ptr<TagAllocatorBase> createTimestampPacketAllocator(const std::vector<uint32_t> &rootDeviceIndices, MemoryManager *memoryManager, 358 size_t initialTagCount, CommandStreamReceiverType csrType, 359 DeviceBitfield deviceBitfield) const override; 360 size_t getTimestampPacketAllocatorAlignment() const override; 361 362 size_t getSingleTimestampPacketSize() const override; 363 static size_t getSingleTimestampPacketSizeHw(); 364 365 void applyAdditionalCompressionSettings(Gmm &gmm, bool isNotCompressed) const override; 366 367 bool preferSmallWorkgroupSizeForKernel(const size_t size, const HardwareInfo &hwInfo) const override; 368 369 void applyRenderCompressionFlag(Gmm &gmm, uint32_t isCompressed) const override; 370 371 bool unTypedDataPortCacheFlushRequired() const override; 372 373 bool isAssignEngineRoundRobinSupported() const override; 374 375 bool isEngineTypeRemappingToHwSpecificRequired() const override; 376 377 bool isSipKernelAsHexadecimalArrayPreferred() const override; 378 379 void setSipKernelData(uint32_t *&sipKernelBinary, size_t &kernelBinarySize) const override; 380 381 void adjustPreemptionSurfaceSize(size_t &csrSize) const override; 382 383 bool isScratchSpaceSurfaceStateAccessible() const override; 384 385 size_t getMax3dImageWidthOrHeight() const override; 386 uint64_t getMaxMemAllocSize() const override; 387 bool isStatelesToStatefullWithOffsetSupported() const override; 388 void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) override; 389 bool disableL3CacheForDebug() const override; 390 bool isRevisionSpecificBinaryBuiltinRequired() const override; 391 392 protected: 393 static const AuxTranslationMode defaultAuxTranslationMode; 394 HwHelperHw() = default; 395 }; 396 397 struct DwordBuilder { 398 static uint32_t build(uint32_t bitNumberToSet, bool masked, bool set = true, uint32_t initValue = 0) { 399 uint32_t dword = initValue; 400 if (set) { 401 dword |= (1 << bitNumberToSet); 402 } 403 if (masked) { 404 dword |= (1 << (bitNumberToSet + 16)); 405 } 406 return dword; 407 }; 408 }; 409 410 template <typename GfxFamily> 411 struct LriHelper { 412 using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; 413 414 static void program(LinearStream *cmdStream, uint32_t address, uint32_t value, bool remap); 415 }; 416 417 template <typename GfxFamily> 418 struct MemorySynchronizationCommands { 419 using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; 420 using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; 421 422 static void addPipeControlAndProgramPostSyncOperation(LinearStream &commandStream, 423 POST_SYNC_OPERATION operation, 424 uint64_t gpuAddress, 425 uint64_t immediateData, 426 const HardwareInfo &hwInfo, 427 PipeControlArgs &args); 428 static void setPipeControlAndProgramPostSyncOperation(void *&commandsBuffer, 429 POST_SYNC_OPERATION operation, 430 uint64_t gpuAddress, 431 uint64_t immediateData, 432 const HardwareInfo &hwInfo, 433 PipeControlArgs &args); 434 435 static void addPipeControlWithPostSync(LinearStream &commandStream, 436 POST_SYNC_OPERATION operation, 437 uint64_t gpuAddress, 438 uint64_t immediateData, 439 PipeControlArgs &args); 440 static void setPipeControlWithPostSync(void *&commandsBuffer, 441 POST_SYNC_OPERATION operation, 442 uint64_t gpuAddress, 443 uint64_t immediateData, 444 PipeControlArgs &args); 445 446 static void setPostSyncExtraProperties(PipeControlArgs &args, const HardwareInfo &hwInfo); 447 static void setPipeControlWAFlags(PIPE_CONTROL &pipeControl); 448 449 static void addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo); 450 static void setPipeControlWA(void *&commandsBuffer, uint64_t gpuAddress, const HardwareInfo &hwInfo); 451 452 static void addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo); 453 static void setAdditionalSynchronization(void *&commandsBuffer, uint64_t gpuAddress, const HardwareInfo &hwInfo); 454 455 static void addPipeControl(LinearStream &commandStream, PipeControlArgs &args); 456 static void setPipeControl(PIPE_CONTROL &pipeControl, PipeControlArgs &args); 457 458 static void addPipeControlWithCSStallOnly(LinearStream &commandStream); 459 460 static bool isDcFlushAllowed(bool isFlushPreferred, const HardwareInfo &hwInfo); 461 462 static void addFullCacheFlush(LinearStream &commandStream, const HardwareInfo &hwInfo); 463 static void setCacheFlushExtraProperties(PipeControlArgs &args); 464 465 static size_t getSizeForPipeControlWithPostSyncOperation(const HardwareInfo &hwInfo); 466 static size_t getSizeForPipeControlWA(const HardwareInfo &hwInfo); 467 static size_t getSizeForSinglePipeControl(); 468 static size_t getSizeForSingleAdditionalSynchronization(const HardwareInfo &hwInfo); 469 static size_t getSizeForAdditonalSynchronization(const HardwareInfo &hwInfo); 470 static size_t getSizeForFullCacheFlush(); 471 472 static bool isPipeControlWArequired(const HardwareInfo &hwInfo); 473 static bool isPipeControlPriorToPipelineSelectWArequired(const HardwareInfo &hwInfo); 474 475 protected: 476 static void setPipeControlExtraProperties(PIPE_CONTROL &pipeControl, PipeControlArgs &args); 477 }; 478 479 union SURFACE_STATE_BUFFER_LENGTH { 480 uint32_t Length; 481 struct SurfaceState { 482 uint32_t Width : BITFIELD_RANGE(0, 6); 483 uint32_t Height : BITFIELD_RANGE(7, 20); 484 uint32_t Depth : BITFIELD_RANGE(21, 31); 485 } SurfaceState; 486 }; 487 488 } // namespace NEO 489