1 /*
2 * Copyright (C) 2019-2021 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 */
7
8 #include "shared/source/gen12lp/aub_mapper.h"
9 #include "shared/source/gen12lp/hw_cmds.h"
10
11 using Family = NEO::TGLLPFamily;
12
13 #include "shared/source/helpers/flat_batch_buffer_helper_hw.inl"
14 #include "shared/source/helpers/hw_helper_base.inl"
15 #include "shared/source/helpers/hw_helper_bdw_and_later.inl"
16 #include "shared/source/helpers/hw_helper_tgllp_and_later.inl"
17 #include "shared/source/os_interface/hw_info_config.h"
18
19 #include "engine_node.h"
20
21 namespace NEO {
22
23 template <>
getMax3dImageWidthOrHeight() const24 size_t HwHelperHw<Family>::getMax3dImageWidthOrHeight() const {
25 return 2048;
26 }
27
28 template <>
isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo & hwInfo) const29 bool HwHelperHw<Family>::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const {
30 return isWorkaroundRequired(REVISION_A0, REVISION_B, hwInfo);
31 }
32
33 template <>
isWaDisableRccRhwoOptimizationRequired() const34 bool HwHelperHw<Family>::isWaDisableRccRhwoOptimizationRequired() const {
35 return true;
36 }
37
38 template <>
isAdditionalFeatureFlagRequired(const FeatureTable * featureTable) const39 bool HwHelperHw<Family>::isAdditionalFeatureFlagRequired(const FeatureTable *featureTable) const {
40 return featureTable->flags.ftrGpGpuMidThreadLevelPreempt;
41 }
42
43 template <>
getComputeUnitsUsedForScratch(const HardwareInfo * pHwInfo) const44 uint32_t HwHelperHw<Family>::getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const {
45 /* For ICL+ maxThreadCount equals (EUCount * 8).
46 ThreadCount/EUCount=7 is no longer valid, so we have to force 8 in below formula.
47 This is required to allocate enough scratch space. */
48 return pHwInfo->gtSystemInfo.MaxSubSlicesSupported * pHwInfo->gtSystemInfo.MaxEuPerSubSlice * 8;
49 }
50
51 template <>
isLocalMemoryEnabled(const HardwareInfo & hwInfo) const52 bool HwHelperHw<Family>::isLocalMemoryEnabled(const HardwareInfo &hwInfo) const {
53 return hwInfo.featureTable.flags.ftrLocalMemory;
54 }
55
56 template <>
isBufferSizeSuitableForCompression(const size_t size,const HardwareInfo & hwInfo) const57 bool HwHelperHw<Family>::isBufferSizeSuitableForCompression(const size_t size, const HardwareInfo &hwInfo) const {
58 if (DebugManager.flags.OverrideBufferSuitableForRenderCompression.get() != -1) {
59 return !!DebugManager.flags.OverrideBufferSuitableForRenderCompression.get();
60 }
61 return false;
62 }
63
64 template <>
checkResourceCompatibility(GraphicsAllocation & graphicsAllocation)65 bool HwHelperHw<Family>::checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) {
66 return !graphicsAllocation.isCompressionEnabled();
67 }
68
69 template <>
getPitchAlignmentForImage(const HardwareInfo * hwInfo) const70 uint32_t HwHelperHw<Family>::getPitchAlignmentForImage(const HardwareInfo *hwInfo) const {
71 if (HwInfoConfig::get(hwInfo->platform.eProductFamily)->imagePitchAlignmentWARequired(*hwInfo)) {
72 return 64u;
73 }
74 return 4u;
75 }
76
77 template <>
getMetricsLibraryGenId() const78 uint32_t HwHelperHw<Family>::getMetricsLibraryGenId() const {
79 return static_cast<uint32_t>(MetricsLibraryApi::ClientGen::Gen12);
80 }
81
82 template <>
getGpgpuEngineInstances(const HardwareInfo & hwInfo) const83 const EngineInstancesContainer HwHelperHw<Family>::getGpgpuEngineInstances(const HardwareInfo &hwInfo) const {
84 auto defaultEngine = getChosenEngineType(hwInfo);
85
86 EngineInstancesContainer engines;
87
88 if (defaultEngine == aub_stream::EngineType::ENGINE_CCS && hwInfo.featureTable.flags.ftrCCSNode && !hwInfo.featureTable.flags.ftrGpGpuMidThreadLevelPreempt) {
89 engines.push_back({aub_stream::ENGINE_CCS, EngineUsage::Regular});
90 }
91
92 engines.push_back({aub_stream::ENGINE_RCS, EngineUsage::Regular});
93 engines.push_back({aub_stream::ENGINE_RCS, EngineUsage::LowPriority}); // low priority
94 engines.push_back({defaultEngine, EngineUsage::Internal}); // internal usage
95
96 if (hwInfo.capabilityTable.blitterOperationsSupported) {
97 if (hwInfo.featureTable.ftrBcsInfo.test(0)) {
98 engines.push_back({aub_stream::ENGINE_BCS, EngineUsage::Regular});
99 }
100 }
101
102 return engines;
103 };
104
105 template <>
getEngineGroupType(aub_stream::EngineType engineType,EngineUsage engineUsage,const HardwareInfo & hwInfo) const106 EngineGroupType HwHelperHw<Family>::getEngineGroupType(aub_stream::EngineType engineType, EngineUsage engineUsage, const HardwareInfo &hwInfo) const {
107 switch (engineType) {
108 case aub_stream::ENGINE_RCS:
109 return EngineGroupType::RenderCompute;
110 case aub_stream::ENGINE_CCS:
111 return EngineGroupType::Compute;
112 case aub_stream::ENGINE_BCS:
113 return EngineGroupType::Copy;
114 default:
115 UNRECOVERABLE_IF(true);
116 }
117 }
118
119 template <>
getExtensions() const120 std::string HwHelperHw<Family>::getExtensions() const {
121 return "cl_intel_subgroup_local_block_io ";
122 }
123
124 template <>
setPipeControlExtraProperties(PIPE_CONTROL & pipeControl,PipeControlArgs & args)125 inline void MemorySynchronizationCommands<Family>::setPipeControlExtraProperties(PIPE_CONTROL &pipeControl, PipeControlArgs &args) {
126 pipeControl.setHdcPipelineFlush(args.hdcPipelineFlush);
127
128 if (DebugManager.flags.FlushAllCaches.get()) {
129 pipeControl.setHdcPipelineFlush(true);
130 }
131 if (DebugManager.flags.DoNotFlushCaches.get()) {
132 pipeControl.setHdcPipelineFlush(false);
133 }
134 }
135
136 template <>
setCacheFlushExtraProperties(PipeControlArgs & args)137 void MemorySynchronizationCommands<Family>::setCacheFlushExtraProperties(PipeControlArgs &args) {
138 args.hdcPipelineFlush = true;
139 args.constantCacheInvalidationEnable = false;
140 }
141
142 template <>
useOnlyGlobalTimestamps() const143 bool HwHelperHw<Family>::useOnlyGlobalTimestamps() const {
144 return true;
145 }
146
147 template <>
getMocsIndex(const GmmHelper & gmmHelper,bool l3enabled,bool l1enabled) const148 uint32_t HwHelperHw<Family>::getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const {
149 if (l3enabled) {
150 if (DebugManager.flags.ForceL1Caching.get() != 1) {
151 l1enabled = false;
152 }
153
154 if (l1enabled) {
155 return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST) >> 1;
156 } else {
157 return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1;
158 }
159 }
160
161 return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1;
162 }
163
164 template <>
isPipeControlWArequired(const HardwareInfo & hwInfo)165 bool MemorySynchronizationCommands<Family>::isPipeControlWArequired(const HardwareInfo &hwInfo) {
166 return HwInfoConfig::get(hwInfo.platform.eProductFamily)->pipeControlWARequired(hwInfo);
167 }
168
169 template <>
isPipeControlPriorToPipelineSelectWArequired(const HardwareInfo & hwInfo)170 bool MemorySynchronizationCommands<Family>::isPipeControlPriorToPipelineSelectWArequired(const HardwareInfo &hwInfo) {
171 return MemorySynchronizationCommands<Family>::isPipeControlWArequired(hwInfo);
172 }
173
174 template <>
setExtraAllocationData(AllocationData & allocationData,const AllocationProperties & properties,const HardwareInfo & hwInfo) const175 void HwHelperHw<Family>::setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const HardwareInfo &hwInfo) const {
176 const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
177 if (hwInfoConfig.getLocalMemoryAccessMode(hwInfo) == LocalMemoryAccessMode::CpuAccessDisallowed) {
178 if (GraphicsAllocation::isCpuAccessRequired(properties.allocationType)) {
179 allocationData.flags.useSystemMemory = true;
180 }
181 }
182 if (HwInfoConfig::get(hwInfo.platform.eProductFamily)->isStorageInfoAdjustmentRequired()) {
183 if (properties.allocationType == GraphicsAllocation::AllocationType::BUFFER && !properties.flags.preferCompressed) {
184 allocationData.storageInfo.isLockable = true;
185 }
186 }
187 }
188
189 template class HwHelperHw<Family>;
190 template class FlatBatchBufferHelperHw<Family>;
191 template struct MemorySynchronizationCommands<Family>;
192 template struct LriHelper<Family>;
193 } // namespace NEO
194