1/*
2 * Copyright (C) 2019-2021 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 */
7
8#include "shared/source/aub_mem_dump/aub_mem_dump.h"
9#include "shared/source/command_container/command_encoder.h"
10#include "shared/source/execution_environment/root_device_environment.h"
11#include "shared/source/gmm_helper/gmm.h"
12#include "shared/source/gmm_helper/gmm_helper.h"
13#include "shared/source/helpers/aligned_memory.h"
14#include "shared/source/helpers/basic_math.h"
15#include "shared/source/helpers/constants.h"
16#include "shared/source/helpers/hw_helper.h"
17#include "shared/source/helpers/hw_info.h"
18#include "shared/source/helpers/pipe_control_args.h"
19#include "shared/source/helpers/preamble.h"
20#include "shared/source/helpers/timestamp_packet.h"
21#include "shared/source/memory_manager/allocation_properties.h"
22#include "shared/source/memory_manager/graphics_allocation.h"
23#include "shared/source/os_interface/hw_info_config.h"
24#include "shared/source/os_interface/os_interface.h"
25#include "shared/source/utilities/tag_allocator.h"
26
27namespace NEO {
28
29template <typename Family>
30const AuxTranslationMode HwHelperHw<Family>::defaultAuxTranslationMode = AuxTranslationMode::Builtin;
31
32template <typename Family>
33bool HwHelperHw<Family>::isBufferSizeSuitableForCompression(const size_t size, const HardwareInfo &hwInfo) const {
34    if (DebugManager.flags.OverrideBufferSuitableForRenderCompression.get() != -1) {
35        return !!DebugManager.flags.OverrideBufferSuitableForRenderCompression.get();
36    }
37    return size > KB;
38}
39
40template <typename Family>
41size_t HwHelperHw<Family>::getMax3dImageWidthOrHeight() const {
42    return 16384;
43}
44
45template <typename Family>
46uint64_t HwHelperHw<Family>::getMaxMemAllocSize() const {
47    //With statefull messages we have an allocation cap of 4GB
48    //Reason to subtract 8KB is that driver may pad the buffer with addition pages for over fetching..
49    return (4ULL * MemoryConstants::gigaByte) - (8ULL * MemoryConstants::kiloByte);
50}
51
52template <typename Family>
53bool HwHelperHw<Family>::isStatelesToStatefullWithOffsetSupported() const {
54    return true;
55}
56
57template <typename Family>
58bool HwHelperHw<Family>::isL3Configurable(const HardwareInfo &hwInfo) {
59    return PreambleHelper<Family>::isL3Configurable(hwInfo);
60}
61
62template <typename Family>
63SipKernelType HwHelperHw<Family>::getSipKernelType(bool debuggingActive) const {
64    if (!debuggingActive) {
65        return SipKernelType::Csr;
66    }
67    return DebugManager.flags.UseBindlessDebugSip.get() ? SipKernelType::DbgBindless : SipKernelType::DbgCsr;
68}
69
70template <typename Family>
71size_t HwHelperHw<Family>::getMaxBarrierRegisterPerSlice() const {
72    return 32;
73}
74
75template <typename Family>
76size_t HwHelperHw<Family>::getPaddingForISAAllocation() const {
77    return 512;
78}
79
80template <typename Family>
81uint32_t HwHelperHw<Family>::getPitchAlignmentForImage(const HardwareInfo *hwInfo) const {
82    return 4u;
83}
84
85template <typename Family>
86uint32_t HwHelperHw<Family>::getMaxNumSamplers() const {
87    return 16;
88}
89
90template <typename Family>
91const AubMemDump::LrcaHelper &HwHelperHw<Family>::getCsTraits(aub_stream::EngineType engineType) const {
92    return *AUBFamilyMapper<Family>::csTraits[engineType];
93}
94
95template <typename Family>
96bool HwHelperHw<Family>::isFenceAllocationRequired(const HardwareInfo &hwInfo) const {
97    return false;
98}
99
100template <typename GfxFamily>
101inline bool HwHelperHw<GfxFamily>::checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) {
102    return true;
103}
104
105template <typename Family>
106void HwHelperHw<Family>::setRenderSurfaceStateForBuffer(const RootDeviceEnvironment &rootDeviceEnvironment,
107                                                        void *surfaceStateBuffer,
108                                                        size_t bufferSize,
109                                                        uint64_t gpuVa,
110                                                        size_t offset,
111                                                        uint32_t pitch,
112                                                        GraphicsAllocation *gfxAlloc,
113                                                        bool isReadOnly,
114                                                        uint32_t surfaceType,
115                                                        bool forceNonAuxMode,
116                                                        bool useL1Cache) {
117    using RENDER_SURFACE_STATE = typename Family::RENDER_SURFACE_STATE;
118    using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT;
119    using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
120
121    auto gmmHelper = rootDeviceEnvironment.getGmmHelper();
122    auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(surfaceStateBuffer);
123    RENDER_SURFACE_STATE state = Family::cmdInitRenderSurfaceState;
124    auto surfaceSize = alignUp(bufferSize, 4);
125
126    SURFACE_STATE_BUFFER_LENGTH Length = {0};
127    Length.Length = static_cast<uint32_t>(surfaceSize - 1);
128
129    state.setWidth(Length.SurfaceState.Width + 1);
130    state.setHeight(Length.SurfaceState.Height + 1);
131    state.setDepth(Length.SurfaceState.Depth + 1);
132    if (pitch) {
133        state.setSurfacePitch(pitch);
134    }
135
136    // The graphics allocation for Host Ptr surface will be created in makeResident call and GPU address is expected to be the same as CPU address
137    auto bufferStateAddress = (gfxAlloc != nullptr) ? gfxAlloc->getGpuAddress() : gpuVa;
138    bufferStateAddress += offset;
139
140    auto bufferStateSize = (gfxAlloc != nullptr) ? gfxAlloc->getUnderlyingBufferSize() : bufferSize;
141
142    state.setSurfaceType(static_cast<typename RENDER_SURFACE_STATE::SURFACE_TYPE>(surfaceType));
143
144    state.setSurfaceFormat(SURFACE_FORMAT::SURFACE_FORMAT_RAW);
145    state.setSurfaceVerticalAlignment(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4);
146    state.setSurfaceHorizontalAlignment(RENDER_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4);
147
148    state.setTileMode(RENDER_SURFACE_STATE::TILE_MODE_LINEAR);
149    state.setVerticalLineStride(0);
150    state.setVerticalLineStrideOffset(0);
151    if ((isAligned<MemoryConstants::cacheLineSize>(bufferStateAddress) && isAligned<MemoryConstants::cacheLineSize>(bufferStateSize)) ||
152        isReadOnly) {
153        state.setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER));
154    } else {
155        state.setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED));
156    }
157    if (DebugManager.flags.OverrideMocsIndexForScratchSpace.get() != -1) {
158        auto mocsIndex = static_cast<uint32_t>(DebugManager.flags.OverrideMocsIndexForScratchSpace.get()) << 1;
159        state.setMemoryObjectControlState(mocsIndex);
160    }
161
162    state.setSurfaceBaseAddress(bufferStateAddress);
163
164    bool isCompressionEnabled = gfxAlloc ? gfxAlloc->isCompressionEnabled() : false;
165    if (isCompressionEnabled && !forceNonAuxMode) {
166        // Its expected to not program pitch/qpitch/baseAddress for Aux surface in CCS scenarios
167        EncodeSurfaceState<Family>::setCoherencyType(&state, RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
168        EncodeSurfaceState<Family>::setBufferAuxParamsForCCS(&state);
169    } else {
170        EncodeSurfaceState<Family>::setCoherencyType(&state, RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT);
171        state.setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE);
172    }
173    setL1CachePolicy(useL1Cache, &state, rootDeviceEnvironment.getHardwareInfo());
174
175    *surfaceState = state;
176}
177
178template <typename GfxFamily>
179void NEO::HwHelperHw<GfxFamily>::setL1CachePolicy(bool useL1Cache, typename GfxFamily::RENDER_SURFACE_STATE *surfaceState, const HardwareInfo *hwInfo) {}
180
181template <typename Family>
182bool HwHelperHw<Family>::getEnableLocalMemory(const HardwareInfo &hwInfo) const {
183    if (DebugManager.flags.EnableLocalMemory.get() != -1) {
184        return DebugManager.flags.EnableLocalMemory.get();
185    } else if (DebugManager.flags.AUBDumpForceAllToLocalMemory.get()) {
186        return true;
187    }
188
189    return OSInterface::osEnableLocalMemory && isLocalMemoryEnabled(hwInfo);
190}
191
192template <typename Family>
193bool HwHelperHw<Family>::is1MbAlignmentSupported(const HardwareInfo &hwInfo, bool isCompressionEnabled) const {
194    return false;
195}
196
197template <typename Family>
198AuxTranslationMode HwHelperHw<Family>::getAuxTranslationMode(const HardwareInfo &hwInfo) {
199    auto mode = HwHelperHw<Family>::defaultAuxTranslationMode;
200    if (DebugManager.flags.ForceAuxTranslationMode.get() != -1) {
201        mode = static_cast<AuxTranslationMode>(DebugManager.flags.ForceAuxTranslationMode.get());
202    }
203
204    if (mode == AuxTranslationMode::Blit && !hwInfo.capabilityTable.blitterOperationsSupported) {
205        DEBUG_BREAK_IF(true);
206        mode = AuxTranslationMode::Builtin;
207    }
208
209    return mode;
210}
211
212template <typename GfxFamily>
213void MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
214    LinearStream &commandStream,
215    POST_SYNC_OPERATION operation,
216    uint64_t gpuAddress,
217    uint64_t immediateData,
218    const HardwareInfo &hwInfo,
219    PipeControlArgs &args) {
220
221    void *commandBuffer = commandStream.getSpace(
222        MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo));
223
224    MemorySynchronizationCommands<GfxFamily>::setPipeControlAndProgramPostSyncOperation(
225        commandBuffer,
226        operation,
227        gpuAddress,
228        immediateData,
229        hwInfo,
230        args);
231}
232
233template <typename GfxFamily>
234void MemorySynchronizationCommands<GfxFamily>::setPipeControlAndProgramPostSyncOperation(
235    void *&commandsBuffer,
236    POST_SYNC_OPERATION operation,
237    uint64_t gpuAddress,
238    uint64_t immediateData,
239    const HardwareInfo &hwInfo,
240    PipeControlArgs &args) {
241
242    MemorySynchronizationCommands<GfxFamily>::setPipeControlWA(commandsBuffer, gpuAddress, hwInfo);
243
244    setPostSyncExtraProperties(args, hwInfo);
245    MemorySynchronizationCommands<GfxFamily>::setPipeControlWithPostSync(commandsBuffer, operation, gpuAddress, immediateData, args);
246
247    MemorySynchronizationCommands<GfxFamily>::setAdditionalSynchronization(commandsBuffer, gpuAddress, hwInfo);
248}
249
250template <typename GfxFamily>
251void MemorySynchronizationCommands<GfxFamily>::setPipeControlWithPostSync(void *&commandsBuffer,
252                                                                          POST_SYNC_OPERATION operation,
253                                                                          uint64_t gpuAddress,
254                                                                          uint64_t immediateData,
255                                                                          PipeControlArgs &args) {
256    PIPE_CONTROL pipeControl = GfxFamily::cmdInitPipeControl;
257    setPipeControl(pipeControl, args);
258    pipeControl.setPostSyncOperation(operation);
259    pipeControl.setAddress(static_cast<uint32_t>(gpuAddress & 0x0000FFFFFFFFULL));
260    pipeControl.setAddressHigh(static_cast<uint32_t>(gpuAddress >> 32));
261    if (operation == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
262        pipeControl.setImmediateData(immediateData);
263    }
264
265    *reinterpret_cast<PIPE_CONTROL *>(commandsBuffer) = pipeControl;
266    commandsBuffer = ptrOffset(commandsBuffer, sizeof(PIPE_CONTROL));
267}
268
269template <typename GfxFamily>
270void MemorySynchronizationCommands<GfxFamily>::addPipeControlWithPostSync(
271    LinearStream &commandStream,
272    POST_SYNC_OPERATION operation,
273    uint64_t gpuAddress,
274    uint64_t immediateData,
275    PipeControlArgs &args) {
276    void *pipeControl = commandStream.getSpace(sizeof(PIPE_CONTROL));
277    setPipeControlWithPostSync(pipeControl, operation, gpuAddress, immediateData, args);
278}
279
280template <typename GfxFamily>
281void MemorySynchronizationCommands<GfxFamily>::addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) {
282    size_t requiredSize = MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWA(hwInfo);
283    void *commandBuffer = commandStream.getSpace(requiredSize);
284    setPipeControlWA(commandBuffer, gpuAddress, hwInfo);
285}
286
287template <typename GfxFamily>
288void MemorySynchronizationCommands<GfxFamily>::setPipeControlWA(void *&commandsBuffer, uint64_t gpuAddress, const HardwareInfo &hwInfo) {
289    if (MemorySynchronizationCommands<GfxFamily>::isPipeControlWArequired(hwInfo)) {
290        PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl;
291        MemorySynchronizationCommands<GfxFamily>::setPipeControlWAFlags(cmd);
292        *reinterpret_cast<PIPE_CONTROL *>(commandsBuffer) = cmd;
293        commandsBuffer = ptrOffset(commandsBuffer, sizeof(PIPE_CONTROL));
294
295        MemorySynchronizationCommands<GfxFamily>::setAdditionalSynchronization(commandsBuffer, gpuAddress, hwInfo);
296    }
297}
298
299template <typename GfxFamily>
300void MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) {
301    size_t requiredSize = MemorySynchronizationCommands<GfxFamily>::getSizeForSingleAdditionalSynchronization(hwInfo);
302    void *commandBuffer = commandStream.getSpace(requiredSize);
303    setAdditionalSynchronization(commandBuffer, gpuAddress, hwInfo);
304}
305
306template <typename GfxFamily>
307void MemorySynchronizationCommands<GfxFamily>::setPipeControl(typename GfxFamily::PIPE_CONTROL &pipeControl, PipeControlArgs &args) {
308    pipeControl.setCommandStreamerStallEnable(true);
309    pipeControl.setConstantCacheInvalidationEnable(args.constantCacheInvalidationEnable);
310    pipeControl.setInstructionCacheInvalidateEnable(args.instructionCacheInvalidateEnable);
311    pipeControl.setPipeControlFlushEnable(args.pipeControlFlushEnable);
312    pipeControl.setRenderTargetCacheFlushEnable(args.renderTargetCacheFlushEnable);
313    pipeControl.setStateCacheInvalidationEnable(args.stateCacheInvalidationEnable);
314    pipeControl.setTextureCacheInvalidationEnable(args.textureCacheInvalidationEnable);
315    pipeControl.setVfCacheInvalidationEnable(args.vfCacheInvalidationEnable);
316    pipeControl.setGenericMediaStateClear(args.genericMediaStateClear);
317    pipeControl.setTlbInvalidate(args.tlbInvalidation);
318    pipeControl.setNotifyEnable(args.notifyEnable);
319    pipeControl.setDcFlushEnable(args.dcFlushEnable);
320
321    setPipeControlExtraProperties(pipeControl, args);
322
323    if (DebugManager.flags.FlushAllCaches.get()) {
324        pipeControl.setDcFlushEnable(true);
325        pipeControl.setRenderTargetCacheFlushEnable(true);
326        pipeControl.setInstructionCacheInvalidateEnable(true);
327        pipeControl.setTextureCacheInvalidationEnable(true);
328        pipeControl.setPipeControlFlushEnable(true);
329        pipeControl.setVfCacheInvalidationEnable(true);
330        pipeControl.setConstantCacheInvalidationEnable(true);
331        pipeControl.setStateCacheInvalidationEnable(true);
332        pipeControl.setTlbInvalidate(true);
333    }
334    if (DebugManager.flags.DoNotFlushCaches.get()) {
335        pipeControl.setDcFlushEnable(false);
336        pipeControl.setRenderTargetCacheFlushEnable(false);
337        pipeControl.setInstructionCacheInvalidateEnable(false);
338        pipeControl.setTextureCacheInvalidationEnable(false);
339        pipeControl.setPipeControlFlushEnable(false);
340        pipeControl.setVfCacheInvalidationEnable(false);
341        pipeControl.setConstantCacheInvalidationEnable(false);
342        pipeControl.setStateCacheInvalidationEnable(false);
343    }
344}
345
346template <typename GfxFamily>
347bool MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed(bool isFlushPreferred, const HardwareInfo &hwInfo) {
348    if (isFlushPreferred) {
349        const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
350        return hwInfoConfig.isDcFlushAllowed();
351    }
352    return false;
353}
354
355template <typename GfxFamily>
356void MemorySynchronizationCommands<GfxFamily>::addPipeControl(LinearStream &commandStream, PipeControlArgs &args) {
357    using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
358    PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl;
359    MemorySynchronizationCommands<GfxFamily>::setPipeControl(cmd, args);
360    auto pipeControl = commandStream.getSpaceForCmd<PIPE_CONTROL>();
361    *pipeControl = cmd;
362}
363
364template <typename GfxFamily>
365void MemorySynchronizationCommands<GfxFamily>::addPipeControlWithCSStallOnly(LinearStream &commandStream) {
366    using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
367    PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl;
368    cmd.setCommandStreamerStallEnable(true);
369    auto pipeControl = commandStream.getSpaceForCmd<PIPE_CONTROL>();
370    *pipeControl = cmd;
371}
372
373template <typename GfxFamily>
374size_t MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl() {
375    return sizeof(typename GfxFamily::PIPE_CONTROL);
376}
377
378template <typename GfxFamily>
379size_t MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(const HardwareInfo &hwInfo) {
380    size_t size = getSizeForSinglePipeControl() +
381                  getSizeForPipeControlWA(hwInfo) +
382                  getSizeForSingleAdditionalSynchronization(hwInfo);
383    return size;
384}
385
386template <typename GfxFamily>
387size_t MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWA(const HardwareInfo &hwInfo) {
388    size_t size = 0;
389    if (MemorySynchronizationCommands<GfxFamily>::isPipeControlWArequired(hwInfo)) {
390        size = getSizeForSinglePipeControl() +
391               getSizeForSingleAdditionalSynchronization(hwInfo);
392    }
393    return size;
394}
395
396template <typename GfxFamily>
397void MemorySynchronizationCommands<GfxFamily>::setAdditionalSynchronization(void *&commandsBuffer, uint64_t gpuAddress, const HardwareInfo &hwInfo) {
398}
399
400template <typename GfxFamily>
401inline size_t MemorySynchronizationCommands<GfxFamily>::getSizeForSingleAdditionalSynchronization(const HardwareInfo &hwInfo) {
402    return 0u;
403}
404
405template <typename GfxFamily>
406inline size_t MemorySynchronizationCommands<GfxFamily>::getSizeForAdditonalSynchronization(const HardwareInfo &hwInfo) {
407    return 0u;
408}
409
410template <typename GfxFamily>
411uint32_t HwHelperHw<GfxFamily>::getMetricsLibraryGenId() const {
412    return static_cast<uint32_t>(MetricsLibraryApi::ClientGen::Gen9);
413}
414
415template <typename GfxFamily>
416bool HwHelperHw<GfxFamily>::tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) {
417    if (DebugManager.flags.ForceLinearImages.get() || forceLinearStorage || isSharedContext) {
418        return false;
419    }
420    return !isImage1d;
421}
422
423template <typename GfxFamily>
424uint32_t HwHelperHw<GfxFamily>::alignSlmSize(uint32_t slmSize) {
425    if (slmSize == 0u) {
426        return 0u;
427    }
428    slmSize = std::max(slmSize, 1024u);
429    slmSize = Math::nextPowerOfTwo(slmSize);
430    UNRECOVERABLE_IF(slmSize > 64u * KB);
431    return slmSize;
432}
433
434template <typename GfxFamily>
435uint32_t HwHelperHw<GfxFamily>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) {
436    auto value = std::max(slmSize, 1024u);
437    value = Math::nextPowerOfTwo(value);
438    value = Math::getMinLsbSet(value);
439    value = value - 9;
440    DEBUG_BREAK_IF(value > 7);
441    return value * !!slmSize;
442}
443
444template <typename GfxFamily>
445uint32_t HwHelperHw<GfxFamily>::getBarriersCountFromHasBarriers(uint32_t hasBarriers) {
446    return hasBarriers;
447}
448
449template <typename GfxFamily>
450inline bool HwHelperHw<GfxFamily>::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const {
451    return false;
452}
453
454template <typename GfxFamily>
455bool HwHelperHw<GfxFamily>::isWorkaroundRequired(uint32_t lowestSteppingWithBug, uint32_t steppingWithFix, const HardwareInfo &hwInfo) const {
456    const auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily);
457    auto lowestHwRevIdWithBug = hwInfoConfig->getHwRevIdFromStepping(lowestSteppingWithBug, hwInfo);
458    auto hwRevIdWithFix = hwInfoConfig->getHwRevIdFromStepping(steppingWithFix, hwInfo);
459    if ((lowestHwRevIdWithBug == CommonConstants::invalidStepping) || (hwRevIdWithFix == CommonConstants::invalidStepping)) {
460        return false;
461    }
462    return (lowestHwRevIdWithBug <= hwInfo.platform.usRevId && hwInfo.platform.usRevId < hwRevIdWithFix);
463}
464
465template <typename GfxFamily>
466bool HwHelperHw<GfxFamily>::isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo) {
467    return false;
468}
469
470template <typename GfxFamily>
471bool HwHelperHw<GfxFamily>::isWaDisableRccRhwoOptimizationRequired() const {
472    return false;
473}
474
475template <typename GfxFamily>
476inline uint32_t HwHelperHw<GfxFamily>::getMinimalSIMDSize() {
477    return 8u;
478}
479
480template <typename GfxFamily>
481inline bool HwHelperHw<GfxFamily>::isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo, bool isSimulation) const {
482    return false;
483}
484
485template <typename GfxFamily>
486inline bool HwHelperHw<GfxFamily>::isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const {
487    return allocation.isAllocatedInLocalMemoryPool() &&
488           (HwInfoConfig::get(hwInfo.platform.eProductFamily)->getLocalMemoryAccessMode(hwInfo) == LocalMemoryAccessMode::CpuAccessDisallowed ||
489            !allocation.isAllocationLockable());
490}
491
492template <typename GfxFamily>
493std::unique_ptr<TagAllocatorBase> HwHelperHw<GfxFamily>::createTimestampPacketAllocator(const std::vector<uint32_t> &rootDeviceIndices, MemoryManager *memoryManager,
494                                                                                        size_t initialTagCount, CommandStreamReceiverType csrType, DeviceBitfield deviceBitfield) const {
495    bool doNotReleaseNodes = (csrType > CommandStreamReceiverType::CSR_HW) ||
496                             DebugManager.flags.DisableTimestampPacketOptimizations.get();
497
498    auto tagAlignment = getTimestampPacketAllocatorAlignment();
499
500    if (DebugManager.flags.OverrideTimestampPacketSize.get() != -1) {
501        if (DebugManager.flags.OverrideTimestampPacketSize.get() == 4) {
502            using TimestampPackets32T = TimestampPackets<uint32_t>;
503            return std::make_unique<TagAllocator<TimestampPackets32T>>(rootDeviceIndices, memoryManager, initialTagCount, tagAlignment, sizeof(TimestampPackets32T), doNotReleaseNodes, deviceBitfield);
504        } else if (DebugManager.flags.OverrideTimestampPacketSize.get() == 8) {
505            using TimestampPackets64T = TimestampPackets<uint64_t>;
506            return std::make_unique<TagAllocator<TimestampPackets64T>>(rootDeviceIndices, memoryManager, initialTagCount, tagAlignment, sizeof(TimestampPackets64T), doNotReleaseNodes, deviceBitfield);
507        } else {
508            UNRECOVERABLE_IF(true);
509        }
510    }
511
512    using TimestampPacketType = typename GfxFamily::TimestampPacketType;
513    using TimestampPacketsT = TimestampPackets<TimestampPacketType>;
514
515    return std::make_unique<TagAllocator<TimestampPacketsT>>(rootDeviceIndices, memoryManager, initialTagCount, tagAlignment, sizeof(TimestampPacketsT), doNotReleaseNodes, deviceBitfield);
516}
517
518template <typename GfxFamily>
519size_t HwHelperHw<GfxFamily>::getTimestampPacketAllocatorAlignment() const {
520    return MemoryConstants::cacheLineSize * 4;
521}
522
523template <typename GfxFamily>
524size_t HwHelperHw<GfxFamily>::getSingleTimestampPacketSize() const {
525    return HwHelperHw<GfxFamily>::getSingleTimestampPacketSizeHw();
526}
527
528template <typename GfxFamily>
529size_t HwHelperHw<GfxFamily>::getSingleTimestampPacketSizeHw() {
530    if (DebugManager.flags.OverrideTimestampPacketSize.get() != -1) {
531        if (DebugManager.flags.OverrideTimestampPacketSize.get() == 4) {
532            return TimestampPackets<uint32_t>::getSinglePacketSize();
533        } else if (DebugManager.flags.OverrideTimestampPacketSize.get() == 8) {
534            return TimestampPackets<uint64_t>::getSinglePacketSize();
535        } else {
536            UNRECOVERABLE_IF(true);
537        }
538    }
539
540    return TimestampPackets<typename GfxFamily::TimestampPacketType>::getSinglePacketSize();
541}
542
543template <typename GfxFamily>
544size_t MemorySynchronizationCommands<GfxFamily>::getSizeForFullCacheFlush() {
545    return sizeof(typename GfxFamily::PIPE_CONTROL);
546}
547
548template <typename GfxFamily>
549void MemorySynchronizationCommands<GfxFamily>::addFullCacheFlush(LinearStream &commandStream, const HardwareInfo &hwInfo) {
550    using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
551
552    PIPE_CONTROL *pipeControl = commandStream.getSpaceForCmd<PIPE_CONTROL>();
553    PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl;
554
555    PipeControlArgs args;
556    args.dcFlushEnable = MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed(true, hwInfo);
557    args.renderTargetCacheFlushEnable = true;
558    args.instructionCacheInvalidateEnable = true;
559    args.textureCacheInvalidationEnable = true;
560    args.pipeControlFlushEnable = true;
561    args.constantCacheInvalidationEnable = true;
562    args.stateCacheInvalidationEnable = true;
563    args.tlbInvalidation = true;
564    MemorySynchronizationCommands<GfxFamily>::setCacheFlushExtraProperties(args);
565    MemorySynchronizationCommands<GfxFamily>::setPipeControl(cmd, args);
566    *pipeControl = cmd;
567}
568
569template <typename GfxFamily>
570const StackVec<size_t, 3> HwHelperHw<GfxFamily>::getDeviceSubGroupSizes() const {
571    return {8, 16, 32};
572}
573
574template <typename GfxFamily>
575const StackVec<uint32_t, 6> HwHelperHw<GfxFamily>::getThreadsPerEUConfigs() const {
576    return {};
577}
578
579template <typename GfxFamily>
580void HwHelperHw<GfxFamily>::setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const HardwareInfo &hwInfo) const {}
581
582template <typename GfxFamily>
583bool HwHelperHw<GfxFamily>::isBankOverrideRequired(const HardwareInfo &hwInfo) const {
584    return false;
585}
586
587template <typename GfxFamily>
588uint32_t HwHelperHw<GfxFamily>::getDefaultThreadArbitrationPolicy() const {
589    return 0;
590}
591
592template <typename GfxFamily>
593bool HwHelperHw<GfxFamily>::useOnlyGlobalTimestamps() const {
594    return false;
595}
596
597template <typename GfxFamily>
598bool HwHelperHw<GfxFamily>::useSystemMemoryPlacementForISA(const HardwareInfo &hwInfo) const {
599    return !getEnableLocalMemory(hwInfo);
600}
601
602template <typename GfxFamily>
603bool HwHelperHw<GfxFamily>::isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const {
604    return false;
605}
606
607template <typename GfxFamily>
608bool MemorySynchronizationCommands<GfxFamily>::isPipeControlPriorToPipelineSelectWArequired(const HardwareInfo &hwInfo) {
609    return false;
610}
611
612template <typename GfxFamily>
613bool HwHelperHw<GfxFamily>::isRcsAvailable(const HardwareInfo &hwInfo) const {
614    return true;
615}
616
617template <typename GfxFamily>
618bool HwHelperHw<GfxFamily>::isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const {
619    return true;
620}
621
622template <typename GfxFamily>
623uint32_t HwHelperHw<GfxFamily>::adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
624                                                        const HardwareInfo &hwInfo, bool isEngineInstanced) const {
625    return maxWorkGroupCount;
626}
627
628template <typename GfxFamily>
629bool HwHelperHw<GfxFamily>::isKmdMigrationSupported(const HardwareInfo &hwInfo) const {
630    return false;
631}
632
633template <typename GfxFamily>
634bool HwHelperHw<GfxFamily>::isCooperativeEngineSupported(const HardwareInfo &hwInfo) const {
635    return false;
636}
637
638template <typename GfxFamily>
639bool HwHelperHw<GfxFamily>::isCopyOnlyEngineType(EngineGroupType type) const {
640    return NEO::EngineGroupType::Copy == type;
641}
642
643template <typename GfxFamily>
644bool HwHelperHw<GfxFamily>::isSipWANeeded(const HardwareInfo &hwInfo) const {
645    return false;
646}
647
648template <typename GfxFamily>
649bool HwHelperHw<GfxFamily>::isAdditionalFeatureFlagRequired(const FeatureTable *featureTable) const {
650    return false;
651}
652
653template <typename GfxFamily>
654uint32_t HwHelperHw<GfxFamily>::getDefaultRevisionId(const HardwareInfo &hwInfo) const {
655    return 0u;
656}
657
658template <typename GfxFamily>
659uint32_t HwHelperHw<GfxFamily>::getNumCacheRegions() const {
660    return 0;
661}
662
663template <typename GfxFamily>
664bool HwHelperHw<GfxFamily>::isSubDeviceEngineSupported(const HardwareInfo &hwInfo, const DeviceBitfield &deviceBitfield, aub_stream::EngineType engineType) const {
665    return true;
666}
667
668template <typename GfxFamily>
669size_t HwHelperHw<GfxFamily>::getPreemptionAllocationAlignment() const {
670    return 256 * MemoryConstants::kiloByte;
671}
672
673template <typename GfxFamily>
674void HwHelperHw<GfxFamily>::applyAdditionalCompressionSettings(Gmm &gmm, bool isNotCompressed) const {}
675
676template <typename GfxFamily>
677void HwHelperHw<GfxFamily>::applyRenderCompressionFlag(Gmm &gmm, uint32_t isCompressed) const {
678    gmm.resourceParams.Flags.Info.RenderCompressed = isCompressed;
679}
680
681template <typename GfxFamily>
682bool HwHelperHw<GfxFamily>::isEngineTypeRemappingToHwSpecificRequired() const {
683    return false;
684}
685
686template <typename GfxFamily>
687bool HwHelperHw<GfxFamily>::isSipKernelAsHexadecimalArrayPreferred() const {
688    return false;
689}
690
691template <typename GfxFamily>
692void HwHelperHw<GfxFamily>::setSipKernelData(uint32_t *&sipKernelBinary, size_t &kernelBinarySize) const {
693}
694
695template <typename GfxFamily>
696void HwHelperHw<GfxFamily>::adjustPreemptionSurfaceSize(size_t &csrSize) const {
697}
698
699template <typename GfxFamily>
700void HwHelperHw<GfxFamily>::encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) {
701    EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
702}
703
704template <typename GfxFamily>
705bool HwHelperHw<GfxFamily>::disableL3CacheForDebug() const {
706    return false;
707}
708
709template <typename GfxFamily>
710bool HwHelperHw<GfxFamily>::isRevisionSpecificBinaryBuiltinRequired() const {
711    return false;
712}
713} // namespace NEO
714