1 /*
2 * Copyright (c) 2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file cm_hal_g9.cpp
24 //! \brief Common HAL CM Gen9 functions
25 //!
26
27 #include "cm_hal_g9.h"
28 #include "mhw_render_hwcmd_g9_X.h"
29 #include "renderhal_platform_interface.h"
30 #include "mhw_render.h"
31 #include "hal_oca_interface.h"
32
33 #if defined(ENABLE_KERNELS) && (!defined(_FULL_OPEN_SOURCE))
34 #include "cm_gpucopy_kernel_g9.h"
35 #include "cm_gpuinit_kernel_g9.h"
36 #else
37 unsigned int iGPUCopy_kernel_isa_size_gen9 = 0;
38 unsigned int iGPUInit_kernel_isa_size_Gen9 = 0;
39 unsigned char *pGPUCopy_kernel_isa_gen9 = nullptr;
40 unsigned char *pGPUInit_kernel_isa_Gen9 = nullptr;
41 #endif
42
43 #define CM_NS_PER_TICK_RENDER_G9 (83.333) // For SKL, 83.333 nano seconds per tick in render engine
44 #define CM_NS_PER_TICK_RENDER_G9LP (52.083) //For BXT, 52.083 nano seconds per tick in render engine
45
46 #define PLATFORM_INTEL_BXT 8
47 #define PLATFORM_INTEL_GLK 16
48
49 // Gen9 Surface state tokenized commands - a SURFACE_STATE_G9 command and
50 // a surface state command, either SURFACE_STATE_G9 or SURFACE_STATE_ADV_G9
51 struct PACKET_SURFACE_STATE
52 {
53 SURFACE_STATE_TOKEN_COMMON token;
54 union
55 {
56 mhw_state_heap_g9_X::RENDER_SURFACE_STATE_CMD cmdSurfaceState;
57 mhw_state_heap_g9_X::MEDIA_SURFACE_STATE_CMD cmdSurfaceStateAdv;
58 };
59 };
60
61 //------------------------------------------------------------------------------
62 //| Purpose: Sets Media Walker Parameters from engineering API for GEN9
63 //| Returns: Result of the operation
64 //------------------------------------------------------------------------------
SetMediaWalkerParams(CM_WALKING_PARAMETERS engineeringParams,PCM_HAL_WALKER_PARAMS walkerParams)65 MOS_STATUS CM_HAL_G9_X::SetMediaWalkerParams(
66 CM_WALKING_PARAMETERS engineeringParams,
67 PCM_HAL_WALKER_PARAMS walkerParams)
68 {
69 mhw_render_g9_X::MEDIA_OBJECT_WALKER_CMD mediaWalkerCmd;
70 mediaWalkerCmd.DW5.Value = engineeringParams.Value[0];
71 walkerParams->scoreboardMask = mediaWalkerCmd.DW5.ScoreboardMask;
72
73 mediaWalkerCmd.DW6.Value = engineeringParams.Value[1];
74 walkerParams->colorCountMinusOne = mediaWalkerCmd.DW6.ColorCountMinusOne;
75 walkerParams->midLoopUnitX = mediaWalkerCmd.DW6.MidLoopUnitX;
76 walkerParams->midLoopUnitY = mediaWalkerCmd.DW6.LocalMidLoopUnitY;
77 walkerParams->middleLoopExtraSteps = mediaWalkerCmd.DW6.MiddleLoopExtraSteps;
78
79 mediaWalkerCmd.DW7.Value = engineeringParams.Value[2];
80 walkerParams->localLoopExecCount = mediaWalkerCmd.DW7.LocalLoopExecCount;
81 walkerParams->globalLoopExecCount = mediaWalkerCmd.DW7.GlobalLoopExecCount;
82
83 mediaWalkerCmd.DW8.Value = engineeringParams.Value[3];
84 walkerParams->blockResolution.x = mediaWalkerCmd.DW8.BlockResolutionX;
85 walkerParams->blockResolution.y = mediaWalkerCmd.DW8.BlockResolutionY;
86
87 mediaWalkerCmd.DW9.Value = engineeringParams.Value[4];
88 walkerParams->localStart.x = mediaWalkerCmd.DW9.LocalStartX;
89 walkerParams->localStart.y = mediaWalkerCmd.DW9.LocalStartY;
90
91 mediaWalkerCmd.DW11.Value = engineeringParams.Value[6];
92 walkerParams->localOutLoopStride.x = mediaWalkerCmd.DW11.LocalOuterLoopStrideX;
93 walkerParams->localOutLoopStride.y = mediaWalkerCmd.DW11.LocalOuterLoopStrideY;
94
95 mediaWalkerCmd.DW12.Value = engineeringParams.Value[7];
96 walkerParams->localInnerLoopUnit.x = mediaWalkerCmd.DW12.LocalInnerLoopUnitX;
97 walkerParams->localInnerLoopUnit.y = mediaWalkerCmd.DW12.LocalInnerLoopUnitY;
98
99 mediaWalkerCmd.DW13.Value = engineeringParams.Value[8];
100 walkerParams->globalResolution.x = mediaWalkerCmd.DW13.GlobalResolutionX;
101 walkerParams->globalResolution.y = mediaWalkerCmd.DW13.GlobalResolutionY;
102
103 mediaWalkerCmd.DW14.Value = engineeringParams.Value[9];
104 walkerParams->globalStart.x = mediaWalkerCmd.DW14.GlobalStartX;
105 walkerParams->globalStart.y = mediaWalkerCmd.DW14.GlobalStartY;
106
107 mediaWalkerCmd.DW15.Value = engineeringParams.Value[10];
108 walkerParams->globalOutlerLoopStride.x = mediaWalkerCmd.DW15.GlobalOuterLoopStrideX;
109 walkerParams->globalOutlerLoopStride.y = mediaWalkerCmd.DW15.GlobalOuterLoopStrideY;
110
111 mediaWalkerCmd.DW16.Value = engineeringParams.Value[11];
112 walkerParams->globalInnerLoopUnit.x = mediaWalkerCmd.DW16.GlobalInnerLoopUnitX;
113 walkerParams->globalInnerLoopUnit.y = mediaWalkerCmd.DW16.GlobalInnerLoopUnitY;
114
115 walkerParams->localEnd.x = 0;
116 walkerParams->localEnd.y = 0;
117
118 return MOS_STATUS_SUCCESS;
119 }
120
SetupHwDebugControl(PRENDERHAL_INTERFACE renderHal,PMOS_COMMAND_BUFFER cmdBuffer)121 MOS_STATUS CM_HAL_G9_X::SetupHwDebugControl(
122 PRENDERHAL_INTERFACE renderHal,
123 PMOS_COMMAND_BUFFER cmdBuffer)
124 {
125 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
126 MHW_MI_LOAD_REGISTER_IMM_PARAMS loadRegImm;
127
128 //---------------------------------------
129 CM_CHK_NULL_GOTOFINISH_MOSERROR(renderHal);
130 CM_CHK_NULL_GOTOFINISH_MOSERROR(renderHal->pMhwMiInterface);
131 CM_CHK_NULL_GOTOFINISH_MOSERROR(cmdBuffer);
132 //---------------------------------------
133
134 MOS_ZeroMemory(&loadRegImm, sizeof(MHW_MI_LOAD_REGISTER_IMM_PARAMS));
135
136 // CS_DEBUG_MODE1, global debug enable
137 loadRegImm.dwRegister = CS_DEBUG_MODE1;
138 loadRegImm.dwData = (CS_DEBUG_MODE1_GLOBAL_DEBUG << 16) | CS_DEBUG_MODE1_GLOBAL_DEBUG;
139 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pMhwMiInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &loadRegImm));
140
141 // TD_CTL, force thread breakpoint enable
142 // Also enable external exception, because the source-level debugger has to
143 // be able to interrupt runing EU threads.
144 loadRegImm.dwRegister = TD_CTL;
145 loadRegImm.dwData = TD_CTL_FORCE_THREAD_BKPT_ENABLE | TD_CTL_FORCE_EXT_EXCEPTION_ENABLE;
146 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pMhwMiInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &loadRegImm));
147
148 finish:
149 return eStatus;
150 }
151
152 //------------------------------------------------------------------------------
153 //| Purpose: Registers the sampler 8x8 AVS table: DWORDS 152, 153 and coefficients
154 //| Returns: Result of the operation
155 //------------------------------------------------------------------------------
RegisterSampler8x8AVSTable(PCM_HAL_SAMPLER_8X8_TABLE sampler8x8AvsTable,PCM_AVS_TABLE_STATE_PARAMS avsTable)156 MOS_STATUS CM_HAL_G9_X::RegisterSampler8x8AVSTable(
157 PCM_HAL_SAMPLER_8X8_TABLE sampler8x8AvsTable,
158 PCM_AVS_TABLE_STATE_PARAMS avsTable )
159 {
160 MOS_ZeroMemory( &sampler8x8AvsTable->mhwSamplerAvsTableParam, sizeof( sampler8x8AvsTable->mhwSamplerAvsTableParam ) );
161
162 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteTransitionArea8Pixels = MEDIASTATE_AVS_TRANSITION_AREA_8_PIXELS;
163 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteTransitionArea4Pixels = MEDIASTATE_AVS_TRANSITION_AREA_4_PIXELS;
164 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteMaxDerivative8Pixels = MEDIASTATE_AVS_MAX_DERIVATIVE_8_PIXELS;
165 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteMaxDerivative4Pixels = MEDIASTATE_AVS_MAX_DERIVATIVE_4_PIXELS;
166
167 sampler8x8AvsTable->mhwSamplerAvsTableParam.bEnableRGBAdaptive = false;
168 sampler8x8AvsTable->mhwSamplerAvsTableParam.bAdaptiveFilterAllChannels = avsTable->adaptiveFilterAllChannels;
169
170 // Assign the coefficient table;
171 for ( uint32_t i = 0; i < CM_NUM_HW_POLYPHASE_TABLES_G9; i++ )
172 {
173 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroXFilterCoefficient[0] = ( uint8_t )avsTable->tbl0X[ i ].FilterCoeff_0_0;
174 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroXFilterCoefficient[1] = ( uint8_t )avsTable->tbl0X[ i ].FilterCoeff_0_1;
175
176 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroXFilterCoefficient[2] = ( uint8_t )avsTable->tbl0X[ i ].FilterCoeff_0_2;
177 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroXFilterCoefficient[3] = ( uint8_t )avsTable->tbl0X[ i ].FilterCoeff_0_3;
178
179 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroXFilterCoefficient[4] = ( uint8_t )avsTable->tbl0X[ i ].FilterCoeff_0_4;
180 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroXFilterCoefficient[5] = ( uint8_t )avsTable->tbl0X[ i ].FilterCoeff_0_5;
181
182 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroXFilterCoefficient[6] = ( uint8_t )avsTable->tbl0X[ i ].FilterCoeff_0_6;
183 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroXFilterCoefficient[7] = ( uint8_t )avsTable->tbl0X[ i ].FilterCoeff_0_7;
184
185 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroYFilterCoefficient[0] = ( uint8_t )avsTable->tbl0Y[ i ].FilterCoeff_0_0;
186 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroYFilterCoefficient[1] = ( uint8_t )avsTable->tbl0Y[ i ].FilterCoeff_0_1;
187
188 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroYFilterCoefficient[2] = ( uint8_t )avsTable->tbl0Y[ i ].FilterCoeff_0_2;
189 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroYFilterCoefficient[3] = ( uint8_t )avsTable->tbl0Y[ i ].FilterCoeff_0_3;
190
191 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroYFilterCoefficient[4] = ( uint8_t )avsTable->tbl0Y[ i ].FilterCoeff_0_4;
192 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroYFilterCoefficient[5] = ( uint8_t )avsTable->tbl0Y[ i ].FilterCoeff_0_5;
193
194 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroYFilterCoefficient[6] = ( uint8_t )avsTable->tbl0Y[ i ].FilterCoeff_0_6;
195 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroYFilterCoefficient[7] = ( uint8_t )avsTable->tbl0Y[ i ].FilterCoeff_0_7;
196
197 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].OneXFilterCoefficient[0] = ( uint8_t )avsTable->tbl1X[ i ].FilterCoeff_0_2;
198 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].OneXFilterCoefficient[1] = ( uint8_t )avsTable->tbl1X[ i ].FilterCoeff_0_3;
199 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].OneXFilterCoefficient[2] = ( uint8_t )avsTable->tbl1X[ i ].FilterCoeff_0_4;
200 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].OneXFilterCoefficient[3] = ( uint8_t )avsTable->tbl1X[ i ].FilterCoeff_0_5;
201
202 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].OneYFilterCoefficient[0] = ( uint8_t )avsTable->tbl1Y[ i ].FilterCoeff_0_2;
203 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].OneYFilterCoefficient[1] = ( uint8_t )avsTable->tbl1Y[ i ].FilterCoeff_0_3;
204 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].OneYFilterCoefficient[2] = ( uint8_t )avsTable->tbl1Y[ i ].FilterCoeff_0_4;
205 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].OneYFilterCoefficient[3] = ( uint8_t )avsTable->tbl1Y[ i ].FilterCoeff_0_5;
206 }
207
208 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteDefaultSharpnessLevel = avsTable->defaultSharpLevel;
209 sampler8x8AvsTable->mhwSamplerAvsTableParam.bBypassXAdaptiveFiltering = avsTable->bypassXAF;
210 sampler8x8AvsTable->mhwSamplerAvsTableParam.bBypassYAdaptiveFiltering = avsTable->bypassYAF;
211
212 if ( !avsTable->bypassXAF && !avsTable->bypassYAF )
213 {
214 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteMaxDerivative8Pixels = avsTable->maxDerivative8Pixels;
215 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteMaxDerivative4Pixels = avsTable->maxDerivative4Pixels;
216 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteTransitionArea8Pixels = avsTable->transitionArea8Pixels;
217 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteTransitionArea4Pixels = avsTable->transitionArea4Pixels;
218 }
219
220 for ( int i = 0; i < CM_NUM_HW_POLYPHASE_EXTRA_TABLES_G9; i++ )
221 {
222 int src = i + CM_NUM_HW_POLYPHASE_TABLES_G9;
223 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroXFilterCoefficient[0] = ( uint8_t )avsTable->tbl0X[ src ].FilterCoeff_0_0;
224 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroXFilterCoefficient[1] = ( uint8_t )avsTable->tbl0X[ src ].FilterCoeff_0_1;
225
226 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroXFilterCoefficient[2] = ( uint8_t )avsTable->tbl0X[ src ].FilterCoeff_0_2;
227 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroXFilterCoefficient[3] = ( uint8_t )avsTable->tbl0X[ src ].FilterCoeff_0_3;
228
229 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroXFilterCoefficient[4] = ( uint8_t )avsTable->tbl0X[ src ].FilterCoeff_0_4;
230 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroXFilterCoefficient[5] = ( uint8_t )avsTable->tbl0X[ src ].FilterCoeff_0_5;
231
232 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroXFilterCoefficient[6] = ( uint8_t )avsTable->tbl0X[ src ].FilterCoeff_0_6;
233 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroXFilterCoefficient[7] = ( uint8_t )avsTable->tbl0X[ src ].FilterCoeff_0_7;
234
235 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroYFilterCoefficient[0] = ( uint8_t )avsTable->tbl0Y[ src ].FilterCoeff_0_0;
236 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroYFilterCoefficient[1] = ( uint8_t )avsTable->tbl0Y[ src ].FilterCoeff_0_1;
237
238 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroYFilterCoefficient[2] = ( uint8_t )avsTable->tbl0Y[ src ].FilterCoeff_0_2;
239 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroYFilterCoefficient[3] = ( uint8_t )avsTable->tbl0Y[ src ].FilterCoeff_0_3;
240
241 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroYFilterCoefficient[4] = ( uint8_t )avsTable->tbl0Y[ src ].FilterCoeff_0_4;
242 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroYFilterCoefficient[5] = ( uint8_t )avsTable->tbl0Y[ src ].FilterCoeff_0_5;
243
244 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroYFilterCoefficient[6] = ( uint8_t )avsTable->tbl0Y[ src ].FilterCoeff_0_6;
245 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroYFilterCoefficient[7] = ( uint8_t )avsTable->tbl0Y[ src ].FilterCoeff_0_7;
246
247 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].OneXFilterCoefficient[0] = ( uint8_t )avsTable->tbl1X[ src ].FilterCoeff_0_2;
248 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].OneXFilterCoefficient[1] = ( uint8_t )avsTable->tbl1X[ src ].FilterCoeff_0_3;
249 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].OneXFilterCoefficient[2] = ( uint8_t )avsTable->tbl1X[ src ].FilterCoeff_0_4;
250 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].OneXFilterCoefficient[3] = ( uint8_t )avsTable->tbl1X[ src ].FilterCoeff_0_5;
251
252 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].OneYFilterCoefficient[0] = ( uint8_t )avsTable->tbl1Y[ src ].FilterCoeff_0_2;
253 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].OneYFilterCoefficient[1] = ( uint8_t )avsTable->tbl1Y[ src ].FilterCoeff_0_3;
254 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].OneYFilterCoefficient[2] = ( uint8_t )avsTable->tbl1Y[ src ].FilterCoeff_0_4;
255 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].OneYFilterCoefficient[3] = ( uint8_t )avsTable->tbl1Y[ src ].FilterCoeff_0_5;
256
257 }
258
259 return MOS_STATUS_SUCCESS;
260 }
261
RegisterSampler8x8(PCM_HAL_SAMPLER_8X8_PARAM param)262 MOS_STATUS CM_HAL_G9_X::RegisterSampler8x8(
263 PCM_HAL_SAMPLER_8X8_PARAM param)
264 {
265 PCM_HAL_STATE state = m_cmState;
266 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
267 int16_t samplerIndex = 0;
268 PMHW_SAMPLER_STATE_PARAM samplerEntry = nullptr;
269 PCM_HAL_SAMPLER_8X8_ENTRY sampler8x8Entry = nullptr;
270
271 if (param->sampler8x8State.stateType == CM_SAMPLER8X8_AVS)
272 {
273 for (uint32_t i = 0; i < state->cmDeviceParam.maxSamplerTableSize; i++) {
274 if (!state->samplerTable[i].bInUse) {
275 samplerEntry = &state->samplerTable[i];
276 param->handle = (uint32_t)i << 16;
277 samplerEntry->bInUse = true;
278 break;
279 }
280 }
281
282 for (uint32_t i = 0; i < state->cmDeviceParam.maxSampler8x8TableSize; i++) {
283 if (!state->sampler8x8Table[i].inUse) {
284 sampler8x8Entry = &state->sampler8x8Table[i];
285 samplerIndex = (int16_t)i;
286 param->handle |= (uint32_t)(i & 0xffff);
287 sampler8x8Entry->inUse = true;
288 break;
289 }
290 }
291
292 if (!samplerEntry || !sampler8x8Entry) {
293 eStatus = MOS_STATUS_INVALID_PARAMETER;
294 CM_ASSERTMESSAGE("Sampler or AVS table is full");
295 goto finish;
296 }
297
298 //State data from application
299 samplerEntry->SamplerType = MHW_SAMPLER_TYPE_AVS;
300 samplerEntry->ElementType = MHW_Sampler128Elements;
301 samplerEntry->Avs = param->sampler8x8State.avsParam.avsState;
302 samplerEntry->Avs.stateID = samplerIndex;
303 samplerEntry->Avs.iTable8x8_Index = samplerIndex; // Used for calculating the Media offset of 8x8 table
304 samplerEntry->Avs.pMhwSamplerAvsTableParam = &sampler8x8Entry->sampler8x8State.mhwSamplerAvsTableParam;
305
306 if (samplerEntry->Avs.EightTapAFEnable)
307 param->sampler8x8State.avsParam.avsTable.adaptiveFilterAllChannels = true;
308 else
309 param->sampler8x8State.avsParam.avsTable.adaptiveFilterAllChannels = false;
310
311 CM_CHK_MOSSTATUS_GOTOFINISH(RegisterSampler8x8AVSTable(&sampler8x8Entry->sampler8x8State,
312 ¶m->sampler8x8State.avsParam.avsTable));
313
314 sampler8x8Entry->sampler8x8State.stateType = CM_SAMPLER8X8_AVS;
315 }
316 else if (param->sampler8x8State.stateType == CM_SAMPLER8X8_MISC)
317 {
318 for (uint32_t i = 0; i < state->cmDeviceParam.maxSamplerTableSize; i++)
319 {
320 if (!state->samplerTable[i].bInUse)
321 {
322 samplerEntry = &state->samplerTable[i];
323 param->handle = (uint32_t)i << 16;
324 samplerEntry->bInUse = true;
325 break;
326 }
327 }
328
329 if ( samplerEntry == nullptr )
330 {
331 return MOS_STATUS_INVALID_HANDLE;
332 }
333 samplerEntry->SamplerType = MHW_SAMPLER_TYPE_MISC;
334 samplerEntry->ElementType = MHW_Sampler2Elements;
335
336 samplerEntry->Misc.byteHeight = param->sampler8x8State.miscState.DW0.Height;
337 samplerEntry->Misc.byteWidth = param->sampler8x8State.miscState.DW0.Width;
338 samplerEntry->Misc.wRow[0] = param->sampler8x8State.miscState.DW0.Row0;
339 samplerEntry->Misc.wRow[1] = param->sampler8x8State.miscState.DW1.Row1;
340 samplerEntry->Misc.wRow[2] = param->sampler8x8State.miscState.DW1.Row2;
341 samplerEntry->Misc.wRow[3] = param->sampler8x8State.miscState.DW2.Row3;
342 samplerEntry->Misc.wRow[4] = param->sampler8x8State.miscState.DW2.Row4;
343 samplerEntry->Misc.wRow[5] = param->sampler8x8State.miscState.DW3.Row5;
344 samplerEntry->Misc.wRow[6] = param->sampler8x8State.miscState.DW3.Row6;
345 samplerEntry->Misc.wRow[7] = param->sampler8x8State.miscState.DW4.Row7;
346 samplerEntry->Misc.wRow[8] = param->sampler8x8State.miscState.DW4.Row8;
347 samplerEntry->Misc.wRow[9] = param->sampler8x8State.miscState.DW5.Row9;
348 samplerEntry->Misc.wRow[10] = param->sampler8x8State.miscState.DW5.Row10;
349 samplerEntry->Misc.wRow[11] = param->sampler8x8State.miscState.DW6.Row11;
350 samplerEntry->Misc.wRow[12] = param->sampler8x8State.miscState.DW6.Row12;
351 samplerEntry->Misc.wRow[13] = param->sampler8x8State.miscState.DW7.Row13;
352 samplerEntry->Misc.wRow[14] = param->sampler8x8State.miscState.DW7.Row14;
353 }
354 else if (param->sampler8x8State.stateType == CM_SAMPLER8X8_CONV)
355 {
356 for (uint32_t i = 0; i < state->cmDeviceParam.maxSamplerTableSize; i++)
357 {
358 if (!state->samplerTable[i].bInUse) {
359 samplerEntry = &state->samplerTable[i];
360 param->handle = (uint32_t)i << 16;
361 samplerEntry->bInUse = true;
362 break;
363 }
364 }
365
366 if ( samplerEntry == nullptr )
367 {
368 return MOS_STATUS_INVALID_HANDLE;
369 }
370
371 MOS_ZeroMemory(&samplerEntry->Convolve, sizeof(samplerEntry->Convolve));
372
373 samplerEntry->SamplerType = MHW_SAMPLER_TYPE_CONV;
374
375 samplerEntry->Convolve.ui8Height = param->sampler8x8State.convolveState.height;
376 samplerEntry->Convolve.ui8Width = param->sampler8x8State.convolveState.width;
377 samplerEntry->Convolve.ui8ScaledDownValue = param->sampler8x8State.convolveState.scaleDownValue;
378 samplerEntry->Convolve.ui8SizeOfTheCoefficient = param->sampler8x8State.convolveState.coeffSize;
379
380 samplerEntry->Convolve.ui8MSBWidth = param->sampler8x8State.convolveState.isHorizontal32Mode;
381 samplerEntry->Convolve.ui8MSBHeight = param->sampler8x8State.convolveState.isVertical32Mode;
382 samplerEntry->Convolve.skl_mode = param->sampler8x8State.convolveState.sklMode;
383
384 // Currently use DW0.Reserved0 to save the detailed Convolve Type, the DW0.Reserved0 will be cleared when copy to sampelr heap
385 samplerEntry->Convolve.ui8ConvolveType = param->sampler8x8State.convolveState.nConvolveType;
386 if (samplerEntry->Convolve.skl_mode &&
387 samplerEntry->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_2D)
388 {
389 samplerEntry->ElementType = MHW_Sampler128Elements;
390 }
391 else if ((!samplerEntry->Convolve.skl_mode &&
392 samplerEntry->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_2D)
393 || samplerEntry->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_1P)
394 {
395 samplerEntry->ElementType = MHW_Sampler64Elements;
396 }
397 else
398 {
399 samplerEntry->ElementType = MHW_Sampler8Elements;
400 }
401
402 for ( int i = 0; i < CM_NUM_CONVOLVE_ROWS_SKL; i++ )
403 {
404 MHW_SAMPLER_CONVOLVE_COEFF_TABLE *coeffTable = &(samplerEntry->Convolve.CoeffTable[i]);
405 CM_HAL_CONVOLVE_COEFF_TABLE *sourceTable = &(param->sampler8x8State.convolveState.table[i]);
406 if ( samplerEntry->Convolve.ui8SizeOfTheCoefficient == 1 )
407 {
408 coeffTable->wFilterCoeff[0] = FloatToS3_12( sourceTable->FilterCoeff_0_0 );
409 coeffTable->wFilterCoeff[1] = FloatToS3_12( sourceTable->FilterCoeff_0_1 );
410 coeffTable->wFilterCoeff[2] = FloatToS3_12( sourceTable->FilterCoeff_0_2 );
411 coeffTable->wFilterCoeff[3] = FloatToS3_12( sourceTable->FilterCoeff_0_3 );
412 coeffTable->wFilterCoeff[4] = FloatToS3_12( sourceTable->FilterCoeff_0_4 );
413 coeffTable->wFilterCoeff[5] = FloatToS3_12( sourceTable->FilterCoeff_0_5 );
414 coeffTable->wFilterCoeff[6] = FloatToS3_12( sourceTable->FilterCoeff_0_6 );
415 coeffTable->wFilterCoeff[7] = FloatToS3_12( sourceTable->FilterCoeff_0_7 );
416 coeffTable->wFilterCoeff[8] = FloatToS3_12( sourceTable->FilterCoeff_0_8 );
417 coeffTable->wFilterCoeff[9] = FloatToS3_12( sourceTable->FilterCoeff_0_9 );
418 coeffTable->wFilterCoeff[10] = FloatToS3_12( sourceTable->FilterCoeff_0_10 );
419 coeffTable->wFilterCoeff[11] = FloatToS3_12( sourceTable->FilterCoeff_0_11 );
420 coeffTable->wFilterCoeff[12] = FloatToS3_12( sourceTable->FilterCoeff_0_12 );
421 coeffTable->wFilterCoeff[13] = FloatToS3_12( sourceTable->FilterCoeff_0_13 );
422 coeffTable->wFilterCoeff[14] = FloatToS3_12( sourceTable->FilterCoeff_0_14 );
423 coeffTable->wFilterCoeff[15] = FloatToS3_12( sourceTable->FilterCoeff_0_15 );
424 }
425 else
426 {
427 coeffTable->wFilterCoeff[0] = FloatToS3_4( sourceTable->FilterCoeff_0_0 );
428 coeffTable->wFilterCoeff[1] = FloatToS3_4( sourceTable->FilterCoeff_0_1 );
429 coeffTable->wFilterCoeff[2] = FloatToS3_4( sourceTable->FilterCoeff_0_2 );
430 coeffTable->wFilterCoeff[3] = FloatToS3_4( sourceTable->FilterCoeff_0_3 );
431 coeffTable->wFilterCoeff[4] = FloatToS3_4( sourceTable->FilterCoeff_0_4 );
432 coeffTable->wFilterCoeff[5] = FloatToS3_4( sourceTable->FilterCoeff_0_5 );
433 coeffTable->wFilterCoeff[6] = FloatToS3_4( sourceTable->FilterCoeff_0_6 );
434 coeffTable->wFilterCoeff[7] = FloatToS3_4( sourceTable->FilterCoeff_0_7 );
435 coeffTable->wFilterCoeff[8] = FloatToS3_4( sourceTable->FilterCoeff_0_8 );
436 coeffTable->wFilterCoeff[9] = FloatToS3_4( sourceTable->FilterCoeff_0_9 );
437 coeffTable->wFilterCoeff[10] = FloatToS3_4( sourceTable->FilterCoeff_0_10 );
438 coeffTable->wFilterCoeff[11] = FloatToS3_4( sourceTable->FilterCoeff_0_11 );
439 coeffTable->wFilterCoeff[12] = FloatToS3_4( sourceTable->FilterCoeff_0_12 );
440 coeffTable->wFilterCoeff[13] = FloatToS3_4( sourceTable->FilterCoeff_0_13 );
441 coeffTable->wFilterCoeff[14] = FloatToS3_4( sourceTable->FilterCoeff_0_14 );
442 coeffTable->wFilterCoeff[15] = FloatToS3_4( sourceTable->FilterCoeff_0_15 );
443 }
444 }
445
446 for ( int i = CM_NUM_CONVOLVE_ROWS_SKL; i < CM_NUM_CONVOLVE_ROWS_SKL * 2; i++ )
447 {
448 MHW_SAMPLER_CONVOLVE_COEFF_TABLE *coeffTable = &(samplerEntry->Convolve.CoeffTable[i]);
449 CM_HAL_CONVOLVE_COEFF_TABLE *sourceTable = &(param->sampler8x8State.convolveState.table[i - CM_NUM_CONVOLVE_ROWS_SKL]);
450
451 if ( samplerEntry->Convolve.ui8SizeOfTheCoefficient == 1 )
452 {
453 coeffTable->wFilterCoeff[0] = FloatToS3_12( sourceTable->FilterCoeff_0_16 );
454 coeffTable->wFilterCoeff[1] = FloatToS3_12( sourceTable->FilterCoeff_0_17 );
455 coeffTable->wFilterCoeff[2] = FloatToS3_12( sourceTable->FilterCoeff_0_18 );
456 coeffTable->wFilterCoeff[3] = FloatToS3_12( sourceTable->FilterCoeff_0_19 );
457 coeffTable->wFilterCoeff[4] = FloatToS3_12( sourceTable->FilterCoeff_0_20 );
458 coeffTable->wFilterCoeff[5] = FloatToS3_12( sourceTable->FilterCoeff_0_21 );
459 coeffTable->wFilterCoeff[6] = FloatToS3_12( sourceTable->FilterCoeff_0_22 );
460 coeffTable->wFilterCoeff[7] = FloatToS3_12( sourceTable->FilterCoeff_0_23 );
461 coeffTable->wFilterCoeff[8] = FloatToS3_12( sourceTable->FilterCoeff_0_24 );
462 coeffTable->wFilterCoeff[9] = FloatToS3_12( sourceTable->FilterCoeff_0_25 );
463 coeffTable->wFilterCoeff[10] = FloatToS3_12( sourceTable->FilterCoeff_0_26 );
464 coeffTable->wFilterCoeff[11] = FloatToS3_12( sourceTable->FilterCoeff_0_27 );
465 coeffTable->wFilterCoeff[12] = FloatToS3_12( sourceTable->FilterCoeff_0_28 );
466 coeffTable->wFilterCoeff[13] = FloatToS3_12( sourceTable->FilterCoeff_0_29 );
467 coeffTable->wFilterCoeff[14] = FloatToS3_12( sourceTable->FilterCoeff_0_30 );
468 coeffTable->wFilterCoeff[15] = FloatToS3_12( sourceTable->FilterCoeff_0_31 );
469 }
470 else
471 {
472 coeffTable->wFilterCoeff[0] = FloatToS3_4( sourceTable->FilterCoeff_0_16 );
473 coeffTable->wFilterCoeff[1] = FloatToS3_4( sourceTable->FilterCoeff_0_17 );
474 coeffTable->wFilterCoeff[2] = FloatToS3_4( sourceTable->FilterCoeff_0_18 );
475 coeffTable->wFilterCoeff[3] = FloatToS3_4( sourceTable->FilterCoeff_0_19 );
476 coeffTable->wFilterCoeff[4] = FloatToS3_4( sourceTable->FilterCoeff_0_20 );
477 coeffTable->wFilterCoeff[5] = FloatToS3_4( sourceTable->FilterCoeff_0_21 );
478 coeffTable->wFilterCoeff[6] = FloatToS3_4( sourceTable->FilterCoeff_0_22 );
479 coeffTable->wFilterCoeff[7] = FloatToS3_4( sourceTable->FilterCoeff_0_23 );
480 coeffTable->wFilterCoeff[8] = FloatToS3_4( sourceTable->FilterCoeff_0_24 );
481 coeffTable->wFilterCoeff[9] = FloatToS3_4( sourceTable->FilterCoeff_0_25 );
482 coeffTable->wFilterCoeff[10] = FloatToS3_4( sourceTable->FilterCoeff_0_26 );
483 coeffTable->wFilterCoeff[11] = FloatToS3_4( sourceTable->FilterCoeff_0_27 );
484 coeffTable->wFilterCoeff[12] = FloatToS3_4( sourceTable->FilterCoeff_0_28 );
485 coeffTable->wFilterCoeff[13] = FloatToS3_4( sourceTable->FilterCoeff_0_29 );
486 coeffTable->wFilterCoeff[14] = FloatToS3_4( sourceTable->FilterCoeff_0_30 );
487 coeffTable->wFilterCoeff[15] = FloatToS3_4( sourceTable->FilterCoeff_0_31 );
488 }
489 }
490 }
491
492 finish:
493 return eStatus;
494 }
495
496 /*----------------------------------------------------------------------------
497 | Purpose : Set's surface state memory object control settings
498 | Returns : dword value
499 \---------------------------------------------------------------------------*/
HwSetSurfaceMemoryObjectControl(uint16_t memObjCtl,PRENDERHAL_SURFACE_STATE_PARAMS surfStateParams)500 MOS_STATUS CM_HAL_G9_X::HwSetSurfaceMemoryObjectControl(
501 uint16_t memObjCtl,
502 PRENDERHAL_SURFACE_STATE_PARAMS surfStateParams)
503 {
504 PRENDERHAL_INTERFACE renderHal = m_cmState->renderHal;
505 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
506 MOS_HW_RESOURCE_DEF mosUsage;
507 // The memory object control uint16_t is composed with cache type(8:15), memory type(4:7), ages(0:3)
508 mosUsage = (MOS_HW_RESOURCE_DEF)((memObjCtl & CM_MEMOBJCTL_CACHE_MASK) >> 8);
509 if (mosUsage >= MOS_HW_RESOURCE_DEF_MAX)
510 mosUsage = GetDefaultMOCS();
511
512 surfStateParams->MemObjCtl = renderHal->pOsInterface->pfnCachePolicyGetMemoryObject(mosUsage,
513 renderHal->pOsInterface->pfnGetGmmClientContext(renderHal->pOsInterface)).DwordValue;
514
515 return eStatus;
516 }
517
518
519 #if (_RELEASE_INTERNAL || _DEBUG)
520 #if defined (CM_DIRECT_GUC_SUPPORT)
SubmitDummyCommands(PMHW_BATCH_BUFFER batchBuffer,int32_t taskId,PCM_HAL_KERNEL_PARAM * kernelParam,void ** cmdBuffer)521 MOS_STATUS CM_HAL_G9_X::SubmitDummyCommands(
522 PMHW_BATCH_BUFFER batchBuffer,
523 int32_t taskId,
524 PCM_HAL_KERNEL_PARAM *kernelParam,
525 void **cmdBuffer)
526 {
527 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
528 PCM_HAL_STATE state = m_cmState;
529 PRENDERHAL_INTERFACE renderHal = state->renderHal;
530 MhwRenderInterface *mhwRender = renderHal->pMhwRenderInterface;
531 PRENDERHAL_STATE_HEAP stateHeap = renderHal->pStateHeap;
532 PMOS_INTERFACE osInterface = renderHal->pOsInterface;
533 PMHW_MI_INTERFACE mhwMiInterface = renderHal->pMhwMiInterface;
534 MHW_PIPE_CONTROL_PARAMS pipeCtlParams;
535 MHW_ID_LOAD_PARAMS idLoadParams;
536 int32_t remaining = 0;
537 bool enableWalker = state->walkerParams.CmWalkerEnable;
538 bool enableGpGpu = state->taskParam->blGpGpuWalkerEnabled;
539 MOS_COMMAND_BUFFER mosCmdBuffer;
540 int64_t *taskSyncLocation;
541 int32_t syncOffset;
542 int32_t tmp;
543 RENDERHAL_GENERIC_PROLOG_PARAMS genericPrologParams = {};
544
545 MOS_ZeroMemory(&mosCmdBuffer, sizeof(MOS_COMMAND_BUFFER));
546
547 // Get the task sync offset
548 syncOffset = state->pfnGetTaskSyncLocation(taskId);
549
550 // Initialize the location
551 taskSyncLocation = (int64_t*)(state->renderTimeStampResource.data + syncOffset);
552 *taskSyncLocation = CM_INVALID_INDEX;
553 *(taskSyncLocation + 1) = CM_INVALID_INDEX;
554 if (state->cbbEnabled)
555 {
556 *(taskSyncLocation + 2) = CM_INVALID_TAG;
557 }
558
559 // Register batch buffer for rendering
560 if (!enableWalker && !enableGpGpu)
561 {
562 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnRegisterResource(
563 osInterface,
564 &batchBuffer->OsResource,
565 true,
566 true));
567 }
568 // Register Timestamp Buffer
569 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnRegisterResource(
570 osInterface,
571 &state->renderTimeStampResource.osResource,
572 true,
573 true));
574 // Allocate all available space, unused buffer will be returned later
575 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnGetCommandBuffer(osInterface, &mosCmdBuffer, 0));
576 remaining = mosCmdBuffer.iRemaining;
577
578 // Linux will just return next sync tag here since currently no frame tracking support
579 //dwFrameId = pRenderHal->pfnEnableFrameTracking(pRenderHal, pOsInterface->CurrentGpuContextOrdinal, &genericPrologParams, &OsResource);
580 //pStateHeap->pCurMediaState->dwSyncTag = dwFrameId;
581
582 // Initialize command buffer and insert prolog
583 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnInitCommandBuffer(renderHal, &mosCmdBuffer, &genericPrologParams));
584
585 //Send the First PipeControl Command to indicate the beginning of execution
586 pipeCtlParams = g_cRenderHal_InitPipeControlParams;
587 pipeCtlParams.presDest = &state->renderTimeStampResource.osResource;
588 pipeCtlParams.dwResourceOffset = syncOffset;
589 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_WRITE_TIMESTAMP_REG;
590 pipeCtlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
591 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtlParams));
592
593 // Send Pipeline Select command
594 CM_CHK_MOSSTATUS_GOTOFINISH(mhwRender->AddPipelineSelectCmd(&mosCmdBuffer, enableGpGpu));
595
596 // issue a PIPE_CONTROL to flush all caches and the stall the CS before
597 // issuing a PIPE_CONTROL to write the timestamp
598 pipeCtlParams = g_cRenderHal_InitPipeControlParams;
599 pipeCtlParams.presDest = &state->renderTimeStampResource.osResource;
600 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
601 pipeCtlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
602 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtlParams));
603
604 // issue a PIPE_CONTROL to write timestamp
605 syncOffset += sizeof(uint64_t);
606 pipeCtlParams = g_cRenderHal_InitPipeControlParams;
607 pipeCtlParams.presDest = &state->renderTimeStampResource.osResource;
608 pipeCtlParams.dwResourceOffset = syncOffset;
609 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_WRITE_TIMESTAMP_REG;
610 pipeCtlParams.dwFlushMode = MHW_FLUSH_READ_CACHE;
611 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtlParams));
612
613 // Add PipeControl to invalidate ISP and MediaState to avoid PageFault issue
614 MHW_PIPE_CONTROL_PARAMS pipeControlParams;
615
616 MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams));
617 pipeControlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
618 pipeControlParams.bGenericMediaStateClear = true;
619 pipeControlParams.bIndirectStatePointersDisable = true;
620 pipeControlParams.bDisableCSStall = false;
621 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeControlParams));
622
623 //Couple to the BB_START , otherwise GPU Hang without it in Linux KMD
624 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMiBatchBufferEnd(&mosCmdBuffer, nullptr));
625
626 // Return unused command buffer space to OS
627 osInterface->pfnReturnCommandBuffer(osInterface, &mosCmdBuffer, 0);
628
629 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnSubmitCommandBuffer(osInterface,
630 &mosCmdBuffer,
631 state->nullHwRenderCm))
632
633 if (state->nullHwRenderCm == false)
634 {
635 stateHeap->pCurMediaState->bBusy = true;
636 if (!enableWalker && !enableGpGpu)
637 {
638 batchBuffer->bBusy = true;
639 }
640 }
641
642 // reset API call number of HW threads
643 state->maxHWThreadValues.apiValue = 0;
644
645 state->pfnReferenceCommandBuffer(&mosCmdBuffer.OsResource, cmdBuffer);
646
647 eStatus = MOS_STATUS_SUCCESS;
648
649 finish:
650 // Failed -> discard all changes in Command Buffer
651 if (eStatus != MOS_STATUS_SUCCESS)
652 {
653 // Buffer overflow - display overflow size
654 if (mosCmdBuffer.iRemaining < 0)
655 {
656 CM_ASSERTMESSAGE("Command Buffer overflow by %d bytes.", -mosCmdBuffer.iRemaining);
657 }
658
659 // Move command buffer back to beginning
660 tmp = remaining - mosCmdBuffer.iRemaining;
661 mosCmdBuffer.iRemaining = remaining;
662 mosCmdBuffer.iOffset -= tmp;
663 mosCmdBuffer.pCmdPtr = mosCmdBuffer.pCmdBase + mosCmdBuffer.iOffset / sizeof(uint32_t);
664
665 // Return unused command buffer space to OS
666 osInterface->pfnReturnCommandBuffer(osInterface, &mosCmdBuffer, 0);
667 }
668
669 return eStatus;
670 }
671 #endif
672 #endif
673
SubmitCommands(PMHW_BATCH_BUFFER batchBuffer,int32_t taskId,PCM_HAL_KERNEL_PARAM * kernelParam,void ** cmdBuffer)674 MOS_STATUS CM_HAL_G9_X::SubmitCommands(
675 PMHW_BATCH_BUFFER batchBuffer,
676 int32_t taskId,
677 PCM_HAL_KERNEL_PARAM *kernelParam,
678 void **cmdBuffer)
679 {
680 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
681 PCM_HAL_STATE state = m_cmState;
682 PRENDERHAL_INTERFACE renderHal = state->renderHal;
683 MhwRenderInterface *mhwRender = renderHal->pMhwRenderInterface;
684 PRENDERHAL_STATE_HEAP stateHeap = renderHal->pStateHeap;
685 PMOS_INTERFACE osInterface = renderHal->pOsInterface;
686 PMHW_MI_INTERFACE mhwMiInterface = renderHal->pMhwMiInterface;
687 MHW_PIPE_CONTROL_PARAMS pipeCtlParams;
688 MHW_ID_LOAD_PARAMS idLoadParams;
689 int32_t remaining = 0;
690 bool enableWalker = state->walkerParams.CmWalkerEnable;
691 bool enableGpGpu = state->taskParam->blGpGpuWalkerEnabled;
692 MOS_COMMAND_BUFFER mosCmdBuffer;
693 uint32_t syncTag;
694 int64_t *taskSyncLocation;
695 int32_t syncOffset;
696 int32_t tmp;
697 PCM_HAL_TASK_PARAM taskParam = state->taskParam;
698 bool sipEnable = renderHal->bSIPKernel? true: false;
699 bool csrEnable = renderHal->bCSRKernel? true: false;
700 PCM_HAL_BB_ARGS bbCmArgs;
701 RENDERHAL_GENERIC_PROLOG_PARAMS genericPrologParams = {};
702 MOS_RESOURCE *osResource;
703 uint32_t tag;
704 uint32_t tagOffset = 0;
705 CM_HAL_MI_REG_OFFSETS miRegG9 = { REG_TIMESTAMP_BASE_G9, REG_GPR_BASE_G9 };
706 #if (_RELEASE_INTERNAL || _DEBUG)
707 #if defined (CM_DIRECT_GUC_SUPPORT)
708 uint64_t batchbufferaddress;
709 #endif
710 #endif
711
712 MOS_CONTEXT *pOsContext = renderHal->pOsInterface->pOsContext;
713 PMHW_MI_MMIOREGISTERS pMmioRegisters = renderHal->pMhwRenderInterface->GetMmioRegisters();
714
715 MOS_ZeroMemory(&mosCmdBuffer, sizeof(MOS_COMMAND_BUFFER));
716
717 // get the tag
718 tag = renderHal->trackerProducer.GetNextTracker(renderHal->currentTrackerIndex);
719
720 // Get the task sync offset
721 syncOffset = state->pfnGetTaskSyncLocation(state, taskId);
722
723 // Initialize the location
724 taskSyncLocation = (int64_t*)(state->renderTimeStampResource.data + syncOffset);
725 *taskSyncLocation = CM_INVALID_INDEX;
726 *(taskSyncLocation + 1) = CM_INVALID_INDEX;
727 if(state->cbbEnabled)
728 {
729 *(taskSyncLocation + 2) = tag;
730 *(taskSyncLocation + 3) = state->renderHal->currentTrackerIndex;
731 }
732
733 // Register batch buffer for rendering
734 if (!enableWalker && !enableGpGpu)
735 {
736 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnRegisterResource(
737 osInterface,
738 &batchBuffer->OsResource,
739 true,
740 true));
741 }
742 #if (_RELEASE_INTERNAL || _DEBUG)
743 #if !defined(CM_DIRECT_GUC_SUPPORT)
744 // Register Timestamp Buffer
745 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnRegisterResource(
746 osInterface,
747 &state->renderTimeStampResource.osResource,
748 true,
749 true));
750 #endif
751 #endif
752 // Allocate all available space, unused buffer will be returned later
753 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnGetCommandBuffer(osInterface, &mosCmdBuffer, 0));
754 remaining = mosCmdBuffer.iRemaining;
755 #if (_RELEASE_INTERNAL || _DEBUG)
756 #if defined(CM_DIRECT_GUC_SUPPORT)
757 batchbufferaddress = osInterface->pfnGetResourceGfxAddress(
758 osInterface,
759 &mosCmdBuffer.OsResource);
760 batchbufferaddress += mosCmdBuffer.iOffset;
761 #endif
762 #endif
763 // Update power option of this command;
764 CM_CHK_MOSSTATUS_GOTOFINISH( state->pfnUpdatePowerOption( state, &state->powerOption ) );
765
766 // use frame tracking to write the tracker ID to CM tracker resource
767 renderHal->trackerProducer.GetLatestTrackerResource(renderHal->currentTrackerIndex, &osResource, &tagOffset);
768 renderHal->pfnSetupPrologParams(renderHal, &genericPrologParams, osResource, tagOffset, tag);
769 FrameTrackerTokenFlat_SetProducer(&stateHeap->pCurMediaState->trackerToken, &renderHal->trackerProducer);
770 FrameTrackerTokenFlat_Merge(&stateHeap->pCurMediaState->trackerToken, renderHal->currentTrackerIndex, tag);
771
772 // Record registers by unified media profiler in the beginning
773 if (state->perfProfiler != nullptr)
774 {
775 CM_CHK_MOSSTATUS_GOTOFINISH(state->perfProfiler->AddPerfCollectStartCmd((void *)state, state->osInterface, mhwMiInterface, &mosCmdBuffer));
776 }
777
778 //Send the First PipeControl Command to indicate the beginning of execution
779 pipeCtlParams = g_cRenderHal_InitPipeControlParams;
780 pipeCtlParams.presDest = &state->renderTimeStampResource.osResource;
781 pipeCtlParams.dwResourceOffset = syncOffset;
782 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_WRITE_TIMESTAMP_REG;
783 pipeCtlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
784 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtlParams));
785
786 // Initialize command buffer and insert prolog
787 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnInitCommandBuffer(renderHal, &mosCmdBuffer, &genericPrologParams));
788
789 HalOcaInterface::On1stLevelBBStart(mosCmdBuffer, *pOsContext, osInterface->CurrentGpuContextHandle,
790 *renderHal->pMhwMiInterface, *pMmioRegisters);
791
792 // update tracker tag used with CM tracker resource
793 renderHal->trackerProducer.StepForward(renderHal->currentTrackerIndex);
794
795 // Increment sync tag
796 syncTag = stateHeap->dwNextTag++;
797
798 //enable SKL L3 config
799 HalCm_GetLegacyRenderHalL3Setting( &state->l3Settings, &renderHal->L3CacheSettings );
800 renderHal->pfnEnableL3Caching(renderHal, &renderHal->L3CacheSettings);
801 mhwRender->SetL3Cache(&mosCmdBuffer);
802
803 if (sipEnable)
804 {
805 CM_CHK_MOSSTATUS_GOTOFINISH(SetupHwDebugControl(renderHal, &mosCmdBuffer));
806 }
807
808 // Adds granularity control for preemption for Gen9.
809 // Supporting Preemption granularity control reg for 3D and GPGPU mode for per ctx and with non-privileged access
810 if ( MEDIA_IS_SKU(state->skuTable, FtrPerCtxtPreemptionGranularityControl ))
811 {
812 MHW_MI_LOAD_REGISTER_IMM_PARAMS loadRegImm;
813 MOS_ZeroMemory( &loadRegImm, sizeof( MHW_MI_LOAD_REGISTER_IMM_PARAMS ) );
814
815 loadRegImm.dwRegister = MHW_RENDER_ENGINE_PREEMPTION_CONTROL_OFFSET;
816
817 // Same reg offset and value for gpgpu pipe and media pipe
818 if ( enableGpGpu )
819 {
820 if ( MEDIA_IS_SKU(state->skuTable, FtrGpGpuThreadGroupLevelPreempt )
821 || MEDIA_IS_SKU(state->skuTable, FtrGpGpuMidThreadLevelPreempt))
822 {
823 //if FtrGpGpuThreadGroupLevelPreempt is true, still program the
824 //it to MID_THREAD_GROUP.Gen9 doesn't support MID_THREAD level
825 loadRegImm.dwData = MHW_RENDER_ENGINE_THREAD_GROUP_PREEMPT_VALUE;
826 state->renderHal->pfnEnableGpgpuMiddleBatchBufferPreemption( state->renderHal );
827 }
828 else if ( MEDIA_IS_SKU(state->skuTable, FtrGpGpuMidBatchPreempt ))
829 {
830 loadRegImm.dwData = MHW_RENDER_ENGINE_MID_BATCH_PREEMPT_VALUE;
831 state->renderHal->pfnEnableGpgpuMiddleBatchBufferPreemption( state->renderHal );
832 }
833 else
834 {
835 // if hit this branch then platform does not support any media preemption in render engine. Still program the register to avoid GPU hang
836 loadRegImm.dwData = MHW_RENDER_ENGINE_MID_BATCH_PREEMPT_VALUE;
837 }
838 }
839 else
840 {
841 if ( MEDIA_IS_SKU(state->skuTable, FtrMediaThreadGroupLevelPreempt)
842 || MEDIA_IS_SKU(state->skuTable, FtrMediaMidThreadLevelPreempt))
843 {
844 //if FtrMediaMidThreadLevelPreempt is true, still program the
845 //it to MID_THREAD_GROUP.Gen9 doesn't support MID_THREAD.
846 loadRegImm.dwData = MHW_RENDER_ENGINE_THREAD_GROUP_PREEMPT_VALUE;
847 }
848 else if ( MEDIA_IS_SKU(state->skuTable, FtrMediaMidBatchPreempt))
849 {
850 loadRegImm.dwData = MHW_RENDER_ENGINE_MID_BATCH_PREEMPT_VALUE;
851 }
852 else
853 {
854 // if hit this branch then platform does not support any media preemption in render engine. Still program the register to avoid GPU hang
855 loadRegImm.dwData = MHW_RENDER_ENGINE_MID_BATCH_PREEMPT_VALUE;
856 }
857 }
858 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMiLoadRegisterImmCmd(&mosCmdBuffer, &loadRegImm ) );
859 }
860
861 // Send Pipeline Select command
862 CM_CHK_MOSSTATUS_GOTOFINISH(mhwRender->AddPipelineSelectCmd(&mosCmdBuffer, enableGpGpu));
863
864 // Send State Base Address command
865 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendStateBaseAddress(renderHal, &mosCmdBuffer));
866
867 // Send Surface States
868 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendSurfaces(renderHal, &mosCmdBuffer));
869
870 if (enableGpGpu) {
871 if (csrEnable) {
872
873 // Send CS_STALL pipe control
874 //Insert a pipe control as synchronization
875 pipeCtlParams = g_cRenderHal_InitPipeControlParams;
876 pipeCtlParams.presDest = &state->renderTimeStampResource.osResource;
877 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
878 pipeCtlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
879 pipeCtlParams.bDisableCSStall = 0;
880 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtlParams));
881
882 }
883
884 if (sipEnable || csrEnable)
885 {
886 // Send SIP State
887 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendSipStateCmd(renderHal, &mosCmdBuffer));
888
889 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnRegisterResource(
890 osInterface,
891 &state->csrResource,
892 true,
893 true));
894
895 // Send csr base addr command
896 CM_CHK_MOSSTATUS_GOTOFINISH(mhwRender->AddGpgpuCsrBaseAddrCmd(&mosCmdBuffer, &state->csrResource));
897 }
898 }
899
900 // Setup VFE State params. Each Renderer MUST call pfnSetVfeStateParams().
901 // See comment in pfnSetVfeStateParams() for details.
902 tmp = RENDERHAL_USE_MEDIA_THREADS_MAX;
903 if (state->maxHWThreadValues.userFeatureValue != 0)
904 {
905 if( state->maxHWThreadValues.userFeatureValue < renderHal->pHwCaps->dwMaxThreads)
906 {
907 tmp = state->maxHWThreadValues.userFeatureValue;
908 }
909 }
910 else if (state->maxHWThreadValues.apiValue != 0)
911 {
912 if( state->maxHWThreadValues.apiValue < renderHal->pHwCaps->dwMaxThreads)
913 {
914 tmp = state->maxHWThreadValues.apiValue;
915 }
916 }
917
918 renderHal->pfnSetVfeStateParams(
919 renderHal,
920 MEDIASTATE_DEBUG_COUNTER_FREE_RUNNING,
921 tmp,
922 state->taskParam->vfeCurbeSize,
923 state->taskParam->urbEntrySize,
924 &state->scoreboardParams);
925
926 // Send VFE State
927 CM_CHK_MOSSTATUS_GOTOFINISH(mhwRender->AddMediaVfeCmd(&mosCmdBuffer,
928 renderHal->pRenderHalPltInterface->GetVfeStateParameters()));
929
930 // Send CURBE Load
931 if (state->taskParam->vfeCurbeSize > 0)
932 {
933 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendCurbeLoad(renderHal, &mosCmdBuffer));
934 }
935
936 // Send Interface Descriptor Load
937 if (state->dshEnabled)
938 {
939 PRENDERHAL_DYNAMIC_STATE dynamicState = stateHeap->pCurMediaState->pDynamicState;
940 idLoadParams.dwInterfaceDescriptorStartOffset = dynamicState->memoryBlock.GetOffset() +
941 dynamicState->MediaID.dwOffset;
942 idLoadParams.dwInterfaceDescriptorLength = dynamicState->MediaID.iCount * stateHeap->dwSizeMediaID;
943 }
944 else
945 {
946 idLoadParams.dwInterfaceDescriptorStartOffset = stateHeap->pCurMediaState->dwOffset + stateHeap->dwOffsetMediaID;
947 idLoadParams.dwInterfaceDescriptorLength = renderHal->StateHeapSettings.iMediaIDs * stateHeap->dwSizeMediaID;
948 }
949 idLoadParams.pKernelState = nullptr;
950 CM_CHK_MOSSTATUS_GOTOFINISH(mhwRender->AddMediaIDLoadCmd(&mosCmdBuffer, &idLoadParams));
951
952 HalOcaInterface::OnDispatch(mosCmdBuffer, *pOsContext, *renderHal->pMhwMiInterface, *pMmioRegisters);
953
954 if (enableWalker)
955 {
956 // send media walker command, if required
957 for (uint32_t i = 0; i < state->taskParam->numKernels; i ++)
958 {
959 // Insert CONDITIONAL_BATCH_BUFFER_END
960 if ( taskParam->conditionalEndBitmap & ((uint64_t)1 << (i)))
961 {
962 // this could be batch buffer end so need to update sync tag, media state flush, write end timestamp
963
964 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendSyncTag(renderHal, &mosCmdBuffer));
965
966 // conditionally write timestamp
967 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_OsAddArtifactConditionalPipeControl(&miRegG9, state, &mosCmdBuffer, syncOffset, &taskParam->conditionalBBEndParams[i], tag));
968
969 // Insert conditional batch buffer end
970 mhwMiInterface->AddMiConditionalBatchBufferEndCmd(&mosCmdBuffer, &taskParam->conditionalBBEndParams[i]);
971 }
972
973 //Insert PIPE_CONTROL at two cases:
974 // 1. synchronization is set
975 // 2. the next kernel has dependency pattern
976 if((i > 0) && ((taskParam->syncBitmap & ((uint64_t)1 << (i-1))) ||
977 (kernelParam[i]->kernelThreadSpaceParam.patternType != CM_NONE_DEPENDENCY)))
978 {
979 //Insert a pipe control as synchronization
980 pipeCtlParams = g_cRenderHal_InitPipeControlParams;
981 pipeCtlParams.presDest = &state->renderTimeStampResource.osResource;
982 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
983 pipeCtlParams.dwFlushMode = MHW_FLUSH_CUSTOM;
984 pipeCtlParams.bInvalidateTextureCache = true;
985 pipeCtlParams.bFlushRenderTargetCache = true;
986 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtlParams));
987 }
988
989 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnSendMediaWalkerState(state, kernelParam[i], &mosCmdBuffer));
990 }
991 }
992 else if (enableGpGpu)
993 {
994 // send GPGPU walker command, if required
995
996 for (uint32_t i = 0; i < state->taskParam->numKernels; i ++)
997 {
998 //Insert PIPE_CONTROL as synchronization if synchronization is set
999 if((i > 0) && (taskParam->syncBitmap & ((uint64_t)1 << (i-1))))
1000 {
1001 //Insert a pipe control as synchronization
1002 pipeCtlParams = g_cRenderHal_InitPipeControlParams;
1003 pipeCtlParams.presDest = &state->renderTimeStampResource.osResource;
1004 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
1005 pipeCtlParams.dwFlushMode = MHW_FLUSH_CUSTOM;
1006 pipeCtlParams.bInvalidateTextureCache = true;
1007 pipeCtlParams.bFlushRenderTargetCache = true;
1008 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtlParams));
1009 }
1010
1011 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnSendGpGpuWalkerState(state, kernelParam[i], &mosCmdBuffer));
1012 }
1013 }
1014 else
1015 {
1016 // Send Start batch buffer command
1017 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMiBatchBufferStartCmd(
1018 &mosCmdBuffer,
1019 batchBuffer));
1020
1021 CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
1022 bbCmArgs = (PCM_HAL_BB_ARGS) batchBuffer->pPrivateData;
1023
1024 if ( (bbCmArgs->refCount == 1) ||
1025 (state->taskParam->reuseBBUpdateMask == 1) )
1026 {
1027 // Add BB end command
1028 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMiBatchBufferEnd(nullptr, batchBuffer));
1029 }
1030 else //reuse BB
1031 {
1032 // Skip BB end command
1033 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->SkipMiBatchBufferEndBb(batchBuffer));
1034 }
1035
1036 // UnLock the batch buffer
1037 if ( (bbCmArgs->refCount == 1) ||
1038 (state->taskParam->reuseBBUpdateMask == 1) )
1039 {
1040 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnUnlockBB(renderHal, batchBuffer));
1041 }
1042 }
1043
1044 // issue a PIPE_CONTROL to flush all caches and the stall the CS before
1045 // issuing a PIPE_CONTROL to write the timestamp
1046 pipeCtlParams = g_cRenderHal_InitPipeControlParams;
1047 pipeCtlParams.presDest = &state->renderTimeStampResource.osResource;
1048 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
1049 pipeCtlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
1050 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtlParams));
1051
1052 if (state->svmBufferUsed || state->statelessBufferUsed)
1053 {
1054 // Find the SVM/statelessBuffer slot, patch it into this dummy pipe_control
1055 for (uint32_t i = 0; i < state->cmDeviceParam.maxBufferTableSize; i++)
1056 {
1057 //register resource here
1058 if (state->bufferTable[i].address)
1059 {
1060 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnRegisterResource(
1061 osInterface,
1062 &state->bufferTable[i].osResource,
1063 true,
1064 false));
1065
1066 // sync resource
1067 MOS_SURFACE mosSurface;
1068 MOS_ZeroMemory(&mosSurface, sizeof(mosSurface));
1069 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnGetResourceInfo(
1070 osInterface,
1071 &state->bufferTable[i].osResource,
1072 &mosSurface));
1073 mosSurface.OsResource = state->bufferTable[i].osResource;
1074
1075 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(HalCm_SurfaceSync(state, &mosSurface, false));
1076 }
1077 }
1078 }
1079
1080 // Send Sync Tag
1081 CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnSendSyncTag( renderHal, &mosCmdBuffer ) );
1082
1083 // Update tracker resource
1084 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnUpdateTrackerResource(state, &mosCmdBuffer, tag));
1085
1086 // issue a PIPE_CONTROL to write timestamp
1087 syncOffset += sizeof(uint64_t);
1088 pipeCtlParams = g_cRenderHal_InitPipeControlParams;
1089 pipeCtlParams.presDest = &state->renderTimeStampResource.osResource;
1090 pipeCtlParams.dwResourceOffset = syncOffset;
1091 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_WRITE_TIMESTAMP_REG;
1092 pipeCtlParams.dwFlushMode = MHW_FLUSH_READ_CACHE;
1093 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtlParams));
1094
1095 // Record registers by unified media profiler in the end
1096 if (state->perfProfiler != nullptr)
1097 {
1098 CM_CHK_MOSSTATUS_GOTOFINISH(state->perfProfiler->AddPerfCollectEndCmd((void *)state, state->osInterface, mhwMiInterface, &mosCmdBuffer));
1099 }
1100
1101 // Add PipeControl to invalidate ISP and MediaState to avoid PageFault issue
1102 MHW_PIPE_CONTROL_PARAMS pipeControlParams;
1103
1104 MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams));
1105 pipeControlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
1106 pipeControlParams.bGenericMediaStateClear = true;
1107 pipeControlParams.bIndirectStatePointersDisable = true;
1108 pipeControlParams.bDisableCSStall = false;
1109 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeControlParams));
1110
1111 if (MEDIA_IS_WA(renderHal->pWaTable, WaSendDummyVFEafterPipelineSelect))
1112 {
1113 MHW_VFE_PARAMS vfeStateParams = {};
1114 vfeStateParams.dwNumberofURBEntries = 1;
1115 CM_CHK_MOSSTATUS_GOTOFINISH(mhwRender->AddMediaVfeCmd(&mosCmdBuffer, &vfeStateParams));
1116 }
1117
1118 HalOcaInterface::On1stLevelBBEnd(mosCmdBuffer, *pOsContext);
1119
1120 //Couple to the BB_START , otherwise GPU Hang without it in KMD.
1121 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMiBatchBufferEnd(&mosCmdBuffer, nullptr));
1122
1123 // Return unused command buffer space to OS
1124 osInterface->pfnReturnCommandBuffer(osInterface, &mosCmdBuffer, 0);
1125
1126 #if MDF_COMMAND_BUFFER_DUMP
1127 if (state->dumpCommandBuffer)
1128 {
1129 state->pfnDumpCommadBuffer(
1130 state,
1131 &mosCmdBuffer,
1132 offsetof(PACKET_SURFACE_STATE, cmdSurfaceState),
1133 mhw_state_heap_g9_X::RENDER_SURFACE_STATE_CMD::byteSize);
1134 }
1135 #endif
1136
1137 #if MDF_SURFACE_STATE_DUMP
1138 if (state->dumpSurfaceState)
1139 {
1140 state->pfnDumpSurfaceState(
1141 state,
1142 offsetof(PACKET_SURFACE_STATE, cmdSurfaceState),
1143 mhw_state_heap_g9_X::RENDER_SURFACE_STATE_CMD::byteSize);
1144
1145 }
1146 #endif
1147
1148 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetGlobalTime(&state->taskTimeStamp->submitTimeInCpu[taskId]));
1149 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetGpuTime(state, &state->taskTimeStamp->submitTimeInGpu[taskId]));
1150
1151 // Submit command buffer
1152 #if (_RELEASE_INTERNAL || _DEBUG)
1153 #if defined (CM_DIRECT_GUC_SUPPORT)
1154 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnSubmitWorkQueue(osInterface, MOS_GPU_NODE_3D, batchbufferaddress));
1155 #endif
1156 #endif
1157 #if !defined (CM_DIRECT_GUC_SUPPORT)
1158 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnSubmitCommandBuffer(osInterface,
1159 &mosCmdBuffer,
1160 state->nullHwRenderCm));
1161 #endif
1162
1163 if (state->nullHwRenderCm == false)
1164 {
1165 stateHeap->pCurMediaState->bBusy = true;
1166 if ( !enableWalker && !enableGpGpu )
1167 {
1168 batchBuffer->bBusy = true;
1169 batchBuffer->dwSyncTag = syncTag;
1170 }
1171 }
1172
1173 // reset API call number of HW threads
1174 state->maxHWThreadValues.apiValue = 0;
1175
1176 state->pfnReferenceCommandBuffer(&mosCmdBuffer.OsResource, cmdBuffer);
1177
1178 eStatus = MOS_STATUS_SUCCESS;
1179
1180 finish:
1181 // Failed -> discard all changes in Command Buffer
1182 if (eStatus != MOS_STATUS_SUCCESS)
1183 {
1184 // Buffer overflow - display overflow size
1185 if (mosCmdBuffer.iRemaining < 0)
1186 {
1187 CM_ASSERTMESSAGE("Command Buffer overflow by %d bytes.", -mosCmdBuffer.iRemaining);
1188 }
1189
1190 // Move command buffer back to beginning
1191 tmp = remaining - mosCmdBuffer.iRemaining;
1192 mosCmdBuffer.iRemaining = remaining;
1193 mosCmdBuffer.iOffset -= tmp;
1194 mosCmdBuffer.pCmdPtr = mosCmdBuffer.pCmdBase + mosCmdBuffer.iOffset/sizeof(uint32_t);
1195
1196 // Return unused command buffer space to OS
1197 osInterface->pfnReturnCommandBuffer(osInterface, &mosCmdBuffer, 0);
1198 }
1199
1200 return eStatus;
1201 }
1202
GetCopyKernelIsa(void * & isa,uint32_t & isaSize)1203 MOS_STATUS CM_HAL_G9_X::GetCopyKernelIsa(void *&isa, uint32_t &isaSize)
1204 {
1205 isa = (void *)pGPUCopy_kernel_isa_gen9;
1206 isaSize = iGPUCopy_kernel_isa_size_gen9;
1207
1208 return MOS_STATUS_SUCCESS;
1209 }
1210
GetInitKernelIsa(void * & isa,uint32_t & isaSize)1211 MOS_STATUS CM_HAL_G9_X::GetInitKernelIsa(void *&isa, uint32_t &isaSize)
1212 {
1213 isa = (void *)pGPUInit_kernel_isa_Gen9;
1214 isaSize = iGPUInit_kernel_isa_size_Gen9;
1215
1216 return MOS_STATUS_SUCCESS;
1217 }
1218
UpdatePlatformInfoFromPower(PCM_PLATFORM_INFO platformInfo,bool euSaturated)1219 MOS_STATUS CM_HAL_G9_X::UpdatePlatformInfoFromPower(
1220 PCM_PLATFORM_INFO platformInfo,
1221 bool euSaturated)
1222 {
1223 PCM_HAL_STATE state = m_cmState;
1224 PRENDERHAL_INTERFACE renderHal = state->renderHal;
1225 CM_POWER_OPTION cmPower;
1226
1227 if ( state->requestSingleSlice ||
1228 renderHal->bRequestSingleSlice ||
1229 (state->powerOption.nSlice != 0 && state->powerOption.nSlice < platformInfo->numSlices))
1230 {
1231 platformInfo->numSubSlices = platformInfo->numSubSlices / platformInfo->numSlices;
1232 if (state->powerOption.nSlice > 1)
1233 {
1234 platformInfo->numSubSlices *= state->powerOption.nSlice;
1235 platformInfo->numSlices = state->powerOption.nSlice;
1236 }
1237 else
1238 {
1239 platformInfo->numSlices = 1;
1240 }
1241 }
1242 else if (euSaturated)
1243 {
1244 // No SSD and EU Saturation, request maximum number of slices/subslices/EUs
1245 cmPower.nSlice = (uint16_t)platformInfo->numSlices;
1246 cmPower.nSubSlice = (uint16_t)platformInfo->numSubSlices;
1247 cmPower.nEU = (uint16_t)(platformInfo->numEUsPerSubSlice * platformInfo->numSubSlices);
1248
1249 state->pfnSetPowerOption(state, &cmPower);
1250 }
1251
1252 return MOS_STATUS_SUCCESS;
1253 }
1254
GetMediaWalkerMaxThreadWidth()1255 uint32_t CM_HAL_G9_X::GetMediaWalkerMaxThreadWidth()
1256 {
1257 return CM_MAX_THREADSPACE_WIDTH_SKLUP_FOR_MW;
1258 }
1259
GetMediaWalkerMaxThreadHeight()1260 uint32_t CM_HAL_G9_X::GetMediaWalkerMaxThreadHeight()
1261 {
1262 return CM_MAX_THREADSPACE_HEIGHT_SKLUP_FOR_MW;
1263 }
1264
GetHwSurfaceBTIInfo(PCM_SURFACE_BTI_INFO btiInfo)1265 MOS_STATUS CM_HAL_G9_X::GetHwSurfaceBTIInfo(
1266 PCM_SURFACE_BTI_INFO btiInfo)
1267 {
1268 if (btiInfo == nullptr)
1269 {
1270 return MOS_STATUS_NULL_POINTER;
1271 }
1272
1273 btiInfo->normalSurfaceStart = CM_GLOBAL_SURFACE_INDEX_START_GEN9_PLUS + \
1274 CM_GLOBAL_SURFACE_NUMBER + CM_GTPIN_SURFACE_NUMBER ;
1275 btiInfo->normalSurfaceEnd = GT_RESERVED_INDEX_START_GEN9_PLUS - 1;
1276 btiInfo->reservedSurfaceStart = CM_GLOBAL_SURFACE_INDEX_START_GEN9_PLUS;
1277 btiInfo->reservedSurfaceEnd = CM_GLOBAL_SURFACE_NUMBER + CM_GTPIN_SURFACE_NUMBER;
1278
1279 return MOS_STATUS_SUCCESS;
1280 }
1281
SetSuggestedL3Conf(L3_SUGGEST_CONFIG l3Config)1282 MOS_STATUS CM_HAL_G9_X::SetSuggestedL3Conf(
1283 L3_SUGGEST_CONFIG l3Config)
1284 {
1285 if (l3Config >= sizeof(SKL_L3_PLANE)/sizeof(L3ConfigRegisterValues))
1286 {
1287 return MOS_STATUS_INVALID_PARAMETER;
1288 }
1289 return HalCm_SetL3Cache((L3ConfigRegisterValues *)&SKL_L3_PLANE[l3Config],
1290 &m_cmState->l3Settings);
1291 }
1292
AllocateSIPCSRResource()1293 MOS_STATUS CM_HAL_G9_X::AllocateSIPCSRResource()
1294 {
1295 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1296 if (Mos_ResourceIsNull(&m_cmState->sipResource.osResource))
1297 {
1298 CM_CHK_MOSSTATUS_RETURN(HalCm_AllocateSipResource(m_cmState)); // create sip resource if it does not exist
1299 CM_CHK_MOSSTATUS_RETURN(HalCm_AllocateCSRResource(m_cmState));
1300 }
1301
1302 return eStatus;
1303 }
1304
GetGenStepInfo(char * & stepInfoStr)1305 MOS_STATUS CM_HAL_G9_X::GetGenStepInfo(char*& stepInfoStr)
1306 {
1307 const char *cmSteppingInfo[] = { "A", "B", "C", "D", "E", "F",
1308 "G", "H", "I", "J" };
1309 uint32_t genStepId = m_cmState->platform.usRevId;
1310
1311 if (m_steppingTable.size() != 0) //check if the stepping table been overwritten
1312 {
1313 if (genStepId < m_steppingTable.size())
1314 {
1315 stepInfoStr = (char *)m_steppingTable[genStepId];
1316 }
1317 else
1318 {
1319 stepInfoStr = nullptr;
1320 }
1321 }
1322 else
1323 {
1324 if (genStepId < sizeof(cmSteppingInfo)/sizeof(const char *))
1325 {
1326 stepInfoStr = (char *)cmSteppingInfo[genStepId];
1327 }
1328 else
1329 {
1330 stepInfoStr = nullptr;
1331 }
1332 }
1333
1334 return MOS_STATUS_SUCCESS;
1335 }
1336
ColorCountSanityCheck(uint32_t colorCount)1337 int32_t CM_HAL_G9_X::ColorCountSanityCheck(uint32_t colorCount)
1338 {
1339 if (colorCount == CM_INVALID_COLOR_COUNT || colorCount > CM_THREADSPACE_MAX_COLOR_COUNT)
1340 {
1341 CM_ASSERTMESSAGE("Error: Invalid color count.");
1342 return CM_INVALID_ARG_VALUE;
1343 }
1344 return CM_SUCCESS;
1345 }
1346
MemoryObjectCtrlPolicyCheck(uint32_t memCtrl)1347 bool CM_HAL_G9_X::MemoryObjectCtrlPolicyCheck(uint32_t memCtrl)
1348 {
1349 if ( memCtrl > MEMORY_OBJECT_CONTROL_SKL_NO_CACHE )
1350 {
1351 return false;
1352 }
1353
1354 return true;
1355 }
1356
GetConvSamplerIndex(PMHW_SAMPLER_STATE_PARAM samplerParam,char * samplerIndexTable,int32_t nSamp8X8Num,int32_t nSampConvNum)1357 int32_t CM_HAL_G9_X::GetConvSamplerIndex(
1358 PMHW_SAMPLER_STATE_PARAM samplerParam,
1359 char *samplerIndexTable,
1360 int32_t nSamp8X8Num,
1361 int32_t nSampConvNum)
1362 {
1363 int32_t samplerIndex = 0;
1364
1365 if ((samplerParam->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_2D) &&
1366 (samplerParam->Convolve.skl_mode))
1367 {
1368 // 2D convolve & SKL+
1369 samplerIndex = 1 + nSampConvNum + nSamp8X8Num;
1370 }
1371 else if (samplerParam->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_1D)
1372 {
1373 // 1D convolve & SKL+
1374 samplerIndex = nSampConvNum;
1375 }
1376 else
1377 {
1378 // 1P convolve SKL+
1379 samplerIndex = 1 + (nSamp8X8Num + nSampConvNum) * 2;
1380 while (samplerIndexTable[samplerIndex] != CM_INVALID_INDEX)
1381 {
1382 samplerIndex += 2;
1383 }
1384
1385 }
1386 return samplerIndex;
1387 }
1388
SetL3CacheConfig(const L3ConfigRegisterValues * values,PCmHalL3Settings cmHalL3Setting)1389 MOS_STATUS CM_HAL_G9_X::SetL3CacheConfig(
1390 const L3ConfigRegisterValues *values,
1391 PCmHalL3Settings cmHalL3Setting)
1392 {
1393 return HalCm_SetL3Cache( values, cmHalL3Setting );
1394 }
1395
GetSamplerParamInfoForSamplerType(PMHW_SAMPLER_STATE_PARAM mhwSamplerParam,SamplerParam & samplerParam)1396 MOS_STATUS CM_HAL_G9_X::GetSamplerParamInfoForSamplerType(
1397 PMHW_SAMPLER_STATE_PARAM mhwSamplerParam,
1398 SamplerParam &samplerParam)
1399 {
1400 const unsigned int samplerElementSize[MAX_ELEMENT_TYPE_COUNT] = {16, 32, 64, 128, 1024, 2048};
1401
1402 // gets element_type
1403 switch (mhwSamplerParam->SamplerType)
1404 {
1405 case MHW_SAMPLER_TYPE_3D:
1406 samplerParam.elementType = MHW_Sampler1Element;
1407 break;
1408 case MHW_SAMPLER_TYPE_MISC:
1409 samplerParam.elementType = MHW_Sampler2Elements;
1410 break;
1411 case MHW_SAMPLER_TYPE_CONV:
1412 if ((!mhwSamplerParam->Convolve.skl_mode &&
1413 mhwSamplerParam->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_2D)
1414 || mhwSamplerParam->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_1P)
1415 {
1416 samplerParam.elementType = MHW_Sampler64Elements;
1417 }
1418 else if (mhwSamplerParam->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_1D)
1419 {
1420 samplerParam.elementType = MHW_Sampler8Elements;
1421 }
1422 else
1423 {
1424 samplerParam.elementType = MHW_Sampler128Elements;
1425 }
1426 break;
1427 case MHW_SAMPLER_TYPE_AVS:
1428 samplerParam.elementType = MHW_Sampler128Elements;
1429 break;
1430 default:
1431 break;
1432 }
1433
1434 // bti_stepping for BDW mode convolve or 1P convolve is 2, other cases are 1.
1435 if ((mhwSamplerParam->SamplerType == MHW_SAMPLER_TYPE_CONV) && ((!mhwSamplerParam->Convolve.skl_mode &&
1436 mhwSamplerParam->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_2D)
1437 || mhwSamplerParam->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_1P))
1438 {
1439 samplerParam.btiStepping = 2;
1440 }
1441 else
1442 {
1443 samplerParam.btiStepping = 1;
1444 }
1445
1446 // gets multiplier
1447 samplerParam.btiMultiplier = samplerElementSize[samplerParam.elementType] / samplerParam.btiStepping;
1448
1449 // gets size
1450 samplerParam.size = samplerElementSize[samplerParam.elementType];
1451
1452 // Temporary solution for conv because MHW use 2048 bytes for all of the convolve samplers.
1453 // size should always be equal to bti_stepping * bti_multiplier except for this one.
1454 if (mhwSamplerParam->SamplerType == MHW_SAMPLER_TYPE_CONV)
1455 {
1456 samplerParam.size = 2048;
1457 }
1458
1459 return MOS_STATUS_SUCCESS;
1460 }
1461
GetExpectedGtSystemConfig(PCM_EXPECTED_GT_SYSTEM_INFO expectedConfig)1462 MOS_STATUS CM_HAL_G9_X::GetExpectedGtSystemConfig(
1463 PCM_EXPECTED_GT_SYSTEM_INFO expectedConfig)
1464 {
1465 if (m_genGT == PLATFORM_INTEL_GT1)
1466 {
1467 expectedConfig->numSlices = SKL_GT1_MAX_NUM_SLICES;
1468 expectedConfig->numSubSlices = SKL_GT1_MAX_NUM_SUBSLICES;
1469 }
1470 else if (m_genGT == PLATFORM_INTEL_GT1_5)
1471 {
1472 expectedConfig->numSlices = SKL_GT1_5_MAX_NUM_SLICES;
1473 expectedConfig->numSubSlices = SKL_GT1_5_MAX_NUM_SUBSLICES;
1474 }
1475 else if (m_genGT == PLATFORM_INTEL_GT2)
1476 {
1477 expectedConfig->numSlices = SKL_GT2_MAX_NUM_SLICES;
1478 expectedConfig->numSubSlices = SKL_GT2_MAX_NUM_SUBSLICES;
1479 }
1480 else if (m_genGT == PLATFORM_INTEL_GT3)
1481 {
1482 expectedConfig->numSlices = SKL_GT3_MAX_NUM_SLICES;
1483 expectedConfig->numSubSlices = SKL_GT3_MAX_NUM_SUBSLICES;
1484 }
1485 else if (m_genGT == PLATFORM_INTEL_GT4)
1486 {
1487 expectedConfig->numSlices = SKL_GT4_MAX_NUM_SLICES;
1488 expectedConfig->numSubSlices = SKL_GT4_MAX_NUM_SUBSLICES;
1489 }
1490 else
1491 {
1492 expectedConfig->numSlices = 0;
1493 expectedConfig->numSubSlices = 0;
1494 }
1495
1496 return MOS_STATUS_SUCCESS;
1497 }
1498
ConverTicksToNanoSecondsDefault(uint64_t ticks)1499 uint64_t CM_HAL_G9_X::ConverTicksToNanoSecondsDefault(uint64_t ticks)
1500 {
1501 if (m_platformID == PLATFORM_INTEL_BXT || m_platformID == PLATFORM_INTEL_GLK)
1502 {
1503 return (uint64_t)(ticks * CM_NS_PER_TICK_RENDER_G9LP);
1504 }
1505 else
1506 {
1507 return (uint64_t)(ticks * CM_NS_PER_TICK_RENDER_G9);
1508 }
1509 }
1510
1511