1 /*
2  * Copyright (C) 2018-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #pragma once
9 #include "shared/source/aub_mem_dump/aub_mem_dump.h"
10 #include "shared/source/built_ins/sip.h"
11 #include "shared/source/command_container/command_encoder.h"
12 #include "shared/source/command_stream/linear_stream.h"
13 #include "shared/source/commands/bxml_generator_glue.h"
14 #include "shared/source/helpers/aux_translation.h"
15 #include "shared/source/helpers/definitions/engine_group_types.h"
16 #include "shared/source/helpers/engine_node_helper.h"
17 #include "shared/source/helpers/options.h"
18 #include "shared/source/utilities/stackvec.h"
19 
20 #include "hw_cmds.h"
21 #include "third_party/aub_stream/headers/aubstream.h"
22 
23 #include <cstdint>
24 #include <string>
25 #include <type_traits>
26 
27 namespace NEO {
28 class GmmHelper;
29 class GraphicsAllocation;
30 class TagAllocatorBase;
31 class Gmm;
32 struct AllocationData;
33 struct AllocationProperties;
34 struct EncodeSurfaceStateArgs;
35 struct EngineControl;
36 struct RootDeviceEnvironment;
37 struct PipeControlArgs;
38 
39 class HwHelper {
40   public:
41     static HwHelper &get(GFXCORE_FAMILY gfxCore);
42     virtual uint32_t getBindingTableStateSurfaceStatePointer(const void *pBindingTable, uint32_t index) = 0;
43     virtual size_t getBindingTableStateSize() const = 0;
44     virtual uint32_t getBindingTableStateAlignement() const = 0;
45     virtual size_t getInterfaceDescriptorDataSize() const = 0;
46     virtual size_t getMaxBarrierRegisterPerSlice() const = 0;
47     virtual size_t getPaddingForISAAllocation() const = 0;
48     virtual uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const = 0;
49     virtual uint32_t getPitchAlignmentForImage(const HardwareInfo *hwInfo) const = 0;
50     virtual uint32_t getMaxNumSamplers() const = 0;
51     virtual void adjustDefaultEngineType(HardwareInfo *pHwInfo) = 0;
52     virtual bool isL3Configurable(const HardwareInfo &hwInfo) = 0;
53     virtual SipKernelType getSipKernelType(bool debuggingActive) const = 0;
54     virtual bool isLocalMemoryEnabled(const HardwareInfo &hwInfo) const = 0;
55     virtual bool is1MbAlignmentSupported(const HardwareInfo &hwInfo, bool isCompressionEnabled) const = 0;
56     virtual bool isFenceAllocationRequired(const HardwareInfo &hwInfo) const = 0;
57     virtual const AubMemDump::LrcaHelper &getCsTraits(aub_stream::EngineType engineType) const = 0;
58     virtual bool hvAlign4Required() const = 0;
59     virtual bool preferSmallWorkgroupSizeForKernel(const size_t size, const HardwareInfo &hwInfo) const = 0;
60     virtual bool isBufferSizeSuitableForCompression(const size_t size, const HardwareInfo &hwInfo) const = 0;
61     virtual bool checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) = 0;
62     virtual bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const = 0;
63     static bool compressedBuffersSupported(const HardwareInfo &hwInfo);
64     static bool compressedImagesSupported(const HardwareInfo &hwInfo);
65     static bool cacheFlushAfterWalkerSupported(const HardwareInfo &hwInfo);
66     virtual bool timestampPacketWriteSupported() const = 0;
67     virtual bool isTimestampWaitSupported() const = 0;
68     virtual size_t getRenderSurfaceStateSize() const = 0;
69     virtual void setRenderSurfaceStateForBuffer(const RootDeviceEnvironment &rootDeviceEnvironment,
70                                                 void *surfaceStateBuffer,
71                                                 size_t bufferSize,
72                                                 uint64_t gpuVa,
73                                                 size_t offset,
74                                                 uint32_t pitch,
75                                                 GraphicsAllocation *gfxAlloc,
76                                                 bool isReadOnly,
77                                                 uint32_t surfaceType,
78                                                 bool forceNonAuxMode,
79                                                 bool useL1Cache) = 0;
80     virtual const EngineInstancesContainer getGpgpuEngineInstances(const HardwareInfo &hwInfo) const = 0;
81     virtual EngineGroupType getEngineGroupType(aub_stream::EngineType engineType, EngineUsage engineUsage, const HardwareInfo &hwInfo) const = 0;
82     virtual const StackVec<size_t, 3> getDeviceSubGroupSizes() const = 0;
83     virtual const StackVec<uint32_t, 6> getThreadsPerEUConfigs() const = 0;
84     virtual bool getEnableLocalMemory(const HardwareInfo &hwInfo) const = 0;
85     virtual std::string getExtensions() const = 0;
86     static uint32_t getMaxThreadsForVfe(const HardwareInfo &hwInfo);
87     virtual uint32_t getMetricsLibraryGenId() const = 0;
88     virtual uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0;
89     virtual bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) = 0;
90     virtual uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) = 0;
91     virtual uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
92                                                    uint32_t threadsPerEu) = 0;
93     virtual uint32_t alignSlmSize(uint32_t slmSize) = 0;
94     virtual uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) = 0;
95 
96     virtual bool isWaDisableRccRhwoOptimizationRequired() const = 0;
97     virtual bool isAdditionalFeatureFlagRequired(const FeatureTable *featureTable) const = 0;
98     virtual uint32_t getMinimalSIMDSize() = 0;
99     virtual bool isWorkaroundRequired(uint32_t lowestSteppingWithBug, uint32_t steppingWithFix, const HardwareInfo &hwInfo) const = 0;
100     virtual bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const = 0;
101     virtual bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const = 0;
102     virtual uint64_t getGpuTimeStampInNS(uint64_t timeStamp, double frequency) const = 0;
103     virtual uint32_t getBindlessSurfaceExtendedMessageDescriptorValue(uint32_t surfStateOffset) const = 0;
104     virtual void setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const HardwareInfo &hwInfo) const = 0;
105     virtual bool isBankOverrideRequired(const HardwareInfo &hwInfo) const = 0;
106     virtual bool isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo, bool isSimulation) const = 0;
107     virtual uint32_t getGlobalTimeStampBits() const = 0;
108     virtual uint32_t getDefaultThreadArbitrationPolicy() const = 0;
109     virtual bool useOnlyGlobalTimestamps() const = 0;
110     virtual bool useSystemMemoryPlacementForISA(const HardwareInfo &hwInfo) const = 0;
111     virtual bool packedFormatsSupported() const = 0;
112     virtual bool isAssignEngineRoundRobinSupported() const = 0;
113     virtual bool isRcsAvailable(const HardwareInfo &hwInfo) const = 0;
114     virtual bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const = 0;
115     virtual uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
116                                              const HardwareInfo &hwInfo, bool isEngineInstanced) const = 0;
117     virtual size_t getMaxFillPaternSizeForCopyEngine() const = 0;
118     virtual bool isCopyOnlyEngineType(EngineGroupType type) const = 0;
119     virtual bool isSipWANeeded(const HardwareInfo &hwInfo) const = 0;
120     virtual bool isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const = 0;
121     virtual bool isKmdMigrationSupported(const HardwareInfo &hwInfo) const = 0;
122     virtual bool isCooperativeEngineSupported(const HardwareInfo &hwInfo) const = 0;
123     virtual aub_stream::MMIOList getExtraMmioList(const HardwareInfo &hwInfo, const GmmHelper &gmmHelper) const = 0;
124     virtual uint32_t getDefaultRevisionId(const HardwareInfo &hwInfo) const = 0;
125     virtual uint32_t getNumCacheRegions() const = 0;
126     virtual bool isSubDeviceEngineSupported(const HardwareInfo &hwInfo, const DeviceBitfield &deviceBitfield, aub_stream::EngineType engineType) const = 0;
127     virtual uint32_t getPlanarYuvMaxHeight() const = 0;
128     virtual size_t getPreemptionAllocationAlignment() const = 0;
129     virtual std::unique_ptr<TagAllocatorBase> createTimestampPacketAllocator(const std::vector<uint32_t> &rootDeviceIndices, MemoryManager *memoryManager,
130                                                                              size_t initialTagCount, CommandStreamReceiverType csrType,
131                                                                              DeviceBitfield deviceBitfield) const = 0;
132     virtual size_t getTimestampPacketAllocatorAlignment() const = 0;
133     virtual size_t getSingleTimestampPacketSize() const = 0;
134     virtual void applyAdditionalCompressionSettings(Gmm &gmm, bool isNotCompressed) const = 0;
135     virtual void applyRenderCompressionFlag(Gmm &gmm, uint32_t isCompressed) const = 0;
136     virtual bool unTypedDataPortCacheFlushRequired() const = 0;
137     virtual bool isEngineTypeRemappingToHwSpecificRequired() const = 0;
138 
139     static uint32_t getSubDevicesCount(const HardwareInfo *pHwInfo);
140     static uint32_t getCopyEnginesCount(const HardwareInfo &hwInfo);
141 
142     virtual bool isSipKernelAsHexadecimalArrayPreferred() const = 0;
143     virtual void setSipKernelData(uint32_t *&sipKernelBinary, size_t &kernelBinarySize) const = 0;
144     virtual void adjustPreemptionSurfaceSize(size_t &csrSize) const = 0;
145     virtual size_t getSamplerStateSize() const = 0;
146 
147     virtual bool isScratchSpaceSurfaceStateAccessible() const = 0;
148     virtual uint64_t getRenderSurfaceStateBaseAddress(void *renderSurfaceState) const = 0;
149     virtual uint32_t getRenderSurfaceStatePitch(void *renderSurfaceState) const = 0;
150     virtual size_t getMax3dImageWidthOrHeight() const = 0;
151     virtual uint64_t getMaxMemAllocSize() const = 0;
152     virtual bool isStatelesToStatefullWithOffsetSupported() const = 0;
153     virtual void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) = 0;
154     virtual bool disableL3CacheForDebug() const = 0;
155     virtual bool isRevisionSpecificBinaryBuiltinRequired() const = 0;
156 
157   protected:
158     HwHelper() = default;
159 };
160 
161 template <typename GfxFamily>
162 class HwHelperHw : public HwHelper {
163   public:
get()164     static HwHelperHw<GfxFamily> &get() {
165         static HwHelperHw<GfxFamily> hwHelper;
166         return hwHelper;
167     }
168 
getBindingTableStateSurfaceStatePointer(const void * pBindingTable,uint32_t index)169     uint32_t getBindingTableStateSurfaceStatePointer(const void *pBindingTable, uint32_t index) override {
170         using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
171 
172         const BINDING_TABLE_STATE *bindingTableState = static_cast<const BINDING_TABLE_STATE *>(pBindingTable);
173         return bindingTableState[index].getRawData(0);
174     }
175 
getBindingTableStateSize()176     size_t getBindingTableStateSize() const override {
177         using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
178         return sizeof(BINDING_TABLE_STATE);
179     }
180 
getBindingTableStateAlignement()181     uint32_t getBindingTableStateAlignement() const override {
182         using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
183         return BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE;
184     }
185 
getInterfaceDescriptorDataSize()186     size_t getInterfaceDescriptorDataSize() const override {
187         using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
188         return sizeof(INTERFACE_DESCRIPTOR_DATA);
189     }
190 
getRenderSurfaceStateSize()191     size_t getRenderSurfaceStateSize() const override {
192         using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
193         return sizeof(RENDER_SURFACE_STATE);
194     }
195 
getSamplerStateSize()196     size_t getSamplerStateSize() const override {
197         using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE;
198         return sizeof(SAMPLER_STATE);
199     }
200 
getBindlessSurfaceExtendedMessageDescriptorValue(uint32_t surfStateOffset)201     uint32_t getBindlessSurfaceExtendedMessageDescriptorValue(uint32_t surfStateOffset) const override {
202         using DataPortBindlessSurfaceExtendedMessageDescriptor = typename GfxFamily::DataPortBindlessSurfaceExtendedMessageDescriptor;
203         DataPortBindlessSurfaceExtendedMessageDescriptor messageExtDescriptor = {};
204         messageExtDescriptor.setBindlessSurfaceOffset(surfStateOffset);
205         return messageExtDescriptor.getBindlessSurfaceOffsetToPatch();
206     }
207 
getRenderSurfaceStateBaseAddress(void * renderSurfaceState)208     uint64_t getRenderSurfaceStateBaseAddress(void *renderSurfaceState) const override {
209         return reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(renderSurfaceState)->getSurfaceBaseAddress();
210     }
211 
getRenderSurfaceStatePitch(void * renderSurfaceState)212     uint32_t getRenderSurfaceStatePitch(void *renderSurfaceState) const override {
213         return reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(renderSurfaceState)->getSurfacePitch();
214     }
215 
216     const AubMemDump::LrcaHelper &getCsTraits(aub_stream::EngineType engineType) const override;
217 
218     size_t getMaxBarrierRegisterPerSlice() const override;
219 
220     size_t getPaddingForISAAllocation() const override;
221 
222     uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const override;
223 
224     uint32_t getPitchAlignmentForImage(const HardwareInfo *hwInfo) const override;
225 
226     uint32_t getMaxNumSamplers() const override;
227 
228     void adjustDefaultEngineType(HardwareInfo *pHwInfo) override;
229 
230     bool isL3Configurable(const HardwareInfo &hwInfo) override;
231 
232     SipKernelType getSipKernelType(bool debuggingActive) const override;
233 
234     bool isLocalMemoryEnabled(const HardwareInfo &hwInfo) const override;
235 
236     bool hvAlign4Required() const override;
237 
238     bool isBufferSizeSuitableForCompression(const size_t size, const HardwareInfo &hwInfo) const override;
239 
240     bool checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) override;
241 
242     bool timestampPacketWriteSupported() const override;
243 
244     bool isTimestampWaitSupported() const override;
245 
246     bool is1MbAlignmentSupported(const HardwareInfo &hwInfo, bool isCompressionEnabled) const override;
247 
248     bool isFenceAllocationRequired(const HardwareInfo &hwInfo) const override;
249 
250     void setRenderSurfaceStateForBuffer(const RootDeviceEnvironment &rootDeviceEnvironment,
251                                         void *surfaceStateBuffer,
252                                         size_t bufferSize,
253                                         uint64_t gpuVa,
254                                         size_t offset,
255                                         uint32_t pitch,
256                                         GraphicsAllocation *gfxAlloc,
257                                         bool isReadOnly,
258                                         uint32_t surfaceType,
259                                         bool forceNonAuxMode,
260                                         bool useL1Cache) override;
261 
262     MOCKABLE_VIRTUAL void setL1CachePolicy(bool useL1Cache, typename GfxFamily::RENDER_SURFACE_STATE *surfaceState, const HardwareInfo *hwInfo);
263 
264     const EngineInstancesContainer getGpgpuEngineInstances(const HardwareInfo &hwInfo) const override;
265 
266     EngineGroupType getEngineGroupType(aub_stream::EngineType engineType, EngineUsage engineUsage, const HardwareInfo &hwInfo) const override;
267 
268     const StackVec<size_t, 3> getDeviceSubGroupSizes() const override;
269 
270     const StackVec<uint32_t, 6> getThreadsPerEUConfigs() const override;
271 
272     bool getEnableLocalMemory(const HardwareInfo &hwInfo) const override;
273 
274     std::string getExtensions() const override;
275 
276     uint32_t getMetricsLibraryGenId() const override;
277 
278     uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const override;
279 
280     bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) override;
281 
282     uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) override;
283 
284     uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount, uint32_t threadsPerEu) override;
285 
286     uint32_t alignSlmSize(uint32_t slmSize) override;
287 
288     uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) override;
289 
290     static AuxTranslationMode getAuxTranslationMode(const HardwareInfo &hwInfo);
291 
292     bool isWorkaroundRequired(uint32_t lowestSteppingWithBug, uint32_t steppingWithFix, const HardwareInfo &hwInfo) const override;
293 
294     bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const override;
295 
296     bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const override;
297 
298     static bool isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo);
299 
300     bool isWaDisableRccRhwoOptimizationRequired() const override;
301 
302     bool isAdditionalFeatureFlagRequired(const FeatureTable *featureTable) const override;
303 
304     uint32_t getMinimalSIMDSize() override;
305 
306     uint64_t getGpuTimeStampInNS(uint64_t timeStamp, double frequency) const override;
307 
308     bool isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo, bool isSimulation) const override;
309 
310     uint32_t getGlobalTimeStampBits() const override;
311 
312     void setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const HardwareInfo &hwInfo) const override;
313 
314     bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const override;
315 
316     bool isBankOverrideRequired(const HardwareInfo &hwInfo) const override;
317 
318     uint32_t getDefaultThreadArbitrationPolicy() const override;
319 
320     bool useOnlyGlobalTimestamps() const override;
321 
322     bool useSystemMemoryPlacementForISA(const HardwareInfo &hwInfo) const override;
323 
324     bool packedFormatsSupported() const override;
325 
326     bool isRcsAvailable(const HardwareInfo &hwInfo) const override;
327 
328     bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const override;
329 
330     uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
331                                      const HardwareInfo &hwInfo, bool isEngineInstanced) const override;
332 
333     size_t getMaxFillPaternSizeForCopyEngine() const override;
334 
335     bool isKmdMigrationSupported(const HardwareInfo &hwInfo) const override;
336 
337     bool isCooperativeEngineSupported(const HardwareInfo &hwInfo) const override;
338 
339     bool isCopyOnlyEngineType(EngineGroupType type) const override;
340 
341     bool isSipWANeeded(const HardwareInfo &hwInfo) const override;
342 
343     bool isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const override;
344 
345     aub_stream::MMIOList getExtraMmioList(const HardwareInfo &hwInfo, const GmmHelper &gmmHelper) const override;
346 
347     uint32_t getDefaultRevisionId(const HardwareInfo &hwInfo) const override;
348 
349     uint32_t getNumCacheRegions() const override;
350 
351     bool isSubDeviceEngineSupported(const HardwareInfo &hwInfo, const DeviceBitfield &deviceBitfield, aub_stream::EngineType engineType) const override;
352 
353     uint32_t getPlanarYuvMaxHeight() const override;
354 
355     size_t getPreemptionAllocationAlignment() const override;
356 
357     std::unique_ptr<TagAllocatorBase> createTimestampPacketAllocator(const std::vector<uint32_t> &rootDeviceIndices, MemoryManager *memoryManager,
358                                                                      size_t initialTagCount, CommandStreamReceiverType csrType,
359                                                                      DeviceBitfield deviceBitfield) const override;
360     size_t getTimestampPacketAllocatorAlignment() const override;
361 
362     size_t getSingleTimestampPacketSize() const override;
363     static size_t getSingleTimestampPacketSizeHw();
364 
365     void applyAdditionalCompressionSettings(Gmm &gmm, bool isNotCompressed) const override;
366 
367     bool preferSmallWorkgroupSizeForKernel(const size_t size, const HardwareInfo &hwInfo) const override;
368 
369     void applyRenderCompressionFlag(Gmm &gmm, uint32_t isCompressed) const override;
370 
371     bool unTypedDataPortCacheFlushRequired() const override;
372 
373     bool isAssignEngineRoundRobinSupported() const override;
374 
375     bool isEngineTypeRemappingToHwSpecificRequired() const override;
376 
377     bool isSipKernelAsHexadecimalArrayPreferred() const override;
378 
379     void setSipKernelData(uint32_t *&sipKernelBinary, size_t &kernelBinarySize) const override;
380 
381     void adjustPreemptionSurfaceSize(size_t &csrSize) const override;
382 
383     bool isScratchSpaceSurfaceStateAccessible() const override;
384 
385     size_t getMax3dImageWidthOrHeight() const override;
386     uint64_t getMaxMemAllocSize() const override;
387     bool isStatelesToStatefullWithOffsetSupported() const override;
388     void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) override;
389     bool disableL3CacheForDebug() const override;
390     bool isRevisionSpecificBinaryBuiltinRequired() const override;
391 
392   protected:
393     static const AuxTranslationMode defaultAuxTranslationMode;
394     HwHelperHw() = default;
395 };
396 
397 struct DwordBuilder {
398     static uint32_t build(uint32_t bitNumberToSet, bool masked, bool set = true, uint32_t initValue = 0) {
399         uint32_t dword = initValue;
400         if (set) {
401             dword |= (1 << bitNumberToSet);
402         }
403         if (masked) {
404             dword |= (1 << (bitNumberToSet + 16));
405         }
406         return dword;
407     };
408 };
409 
410 template <typename GfxFamily>
411 struct LriHelper {
412     using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
413 
414     static void program(LinearStream *cmdStream, uint32_t address, uint32_t value, bool remap);
415 };
416 
417 template <typename GfxFamily>
418 struct MemorySynchronizationCommands {
419     using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
420     using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION;
421 
422     static void addPipeControlAndProgramPostSyncOperation(LinearStream &commandStream,
423                                                           POST_SYNC_OPERATION operation,
424                                                           uint64_t gpuAddress,
425                                                           uint64_t immediateData,
426                                                           const HardwareInfo &hwInfo,
427                                                           PipeControlArgs &args);
428     static void setPipeControlAndProgramPostSyncOperation(void *&commandsBuffer,
429                                                           POST_SYNC_OPERATION operation,
430                                                           uint64_t gpuAddress,
431                                                           uint64_t immediateData,
432                                                           const HardwareInfo &hwInfo,
433                                                           PipeControlArgs &args);
434 
435     static void addPipeControlWithPostSync(LinearStream &commandStream,
436                                            POST_SYNC_OPERATION operation,
437                                            uint64_t gpuAddress,
438                                            uint64_t immediateData,
439                                            PipeControlArgs &args);
440     static void setPipeControlWithPostSync(void *&commandsBuffer,
441                                            POST_SYNC_OPERATION operation,
442                                            uint64_t gpuAddress,
443                                            uint64_t immediateData,
444                                            PipeControlArgs &args);
445 
446     static void setPostSyncExtraProperties(PipeControlArgs &args, const HardwareInfo &hwInfo);
447     static void setPipeControlWAFlags(PIPE_CONTROL &pipeControl);
448 
449     static void addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo);
450     static void setPipeControlWA(void *&commandsBuffer, uint64_t gpuAddress, const HardwareInfo &hwInfo);
451 
452     static void addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo);
453     static void setAdditionalSynchronization(void *&commandsBuffer, uint64_t gpuAddress, const HardwareInfo &hwInfo);
454 
455     static void addPipeControl(LinearStream &commandStream, PipeControlArgs &args);
456     static void setPipeControl(PIPE_CONTROL &pipeControl, PipeControlArgs &args);
457 
458     static void addPipeControlWithCSStallOnly(LinearStream &commandStream);
459 
460     static bool isDcFlushAllowed(bool isFlushPreferred, const HardwareInfo &hwInfo);
461 
462     static void addFullCacheFlush(LinearStream &commandStream, const HardwareInfo &hwInfo);
463     static void setCacheFlushExtraProperties(PipeControlArgs &args);
464 
465     static size_t getSizeForPipeControlWithPostSyncOperation(const HardwareInfo &hwInfo);
466     static size_t getSizeForPipeControlWA(const HardwareInfo &hwInfo);
467     static size_t getSizeForSinglePipeControl();
468     static size_t getSizeForSingleAdditionalSynchronization(const HardwareInfo &hwInfo);
469     static size_t getSizeForAdditonalSynchronization(const HardwareInfo &hwInfo);
470     static size_t getSizeForFullCacheFlush();
471 
472     static bool isPipeControlWArequired(const HardwareInfo &hwInfo);
473     static bool isPipeControlPriorToPipelineSelectWArequired(const HardwareInfo &hwInfo);
474 
475   protected:
476     static void setPipeControlExtraProperties(PIPE_CONTROL &pipeControl, PipeControlArgs &args);
477 };
478 
479 union SURFACE_STATE_BUFFER_LENGTH {
480     uint32_t Length;
481     struct SurfaceState {
482         uint32_t Width : BITFIELD_RANGE(0, 6);
483         uint32_t Height : BITFIELD_RANGE(7, 20);
484         uint32_t Depth : BITFIELD_RANGE(21, 31);
485     } SurfaceState;
486 };
487 
488 } // namespace NEO
489