1 /*
2 * Copyright (c) 2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file      cm_hal_g9.cpp
24 //! \brief     Common HAL CM Gen9 functions
25 //!
26 
27 #include "cm_hal_g9.h"
28 #include "mhw_render_hwcmd_g9_X.h"
29 #include "renderhal_platform_interface.h"
30 #include "mhw_render.h"
31 #include "hal_oca_interface.h"
32 
33 #if defined(ENABLE_KERNELS) && (!defined(_FULL_OPEN_SOURCE))
34 #include "cm_gpucopy_kernel_g9.h"
35 #include "cm_gpuinit_kernel_g9.h"
36 #else
37 unsigned int iGPUCopy_kernel_isa_size_gen9 = 0;
38 unsigned int iGPUInit_kernel_isa_size_Gen9 = 0;
39 unsigned char *pGPUCopy_kernel_isa_gen9 = nullptr;
40 unsigned char *pGPUInit_kernel_isa_Gen9 = nullptr;
41 #endif
42 
43 #define CM_NS_PER_TICK_RENDER_G9        (83.333)   // For SKL, 83.333 nano seconds per tick in render engine
44 #define CM_NS_PER_TICK_RENDER_G9LP      (52.083)   //For BXT, 52.083 nano seconds per tick in render engine
45 
46 #define PLATFORM_INTEL_BXT 8
47 #define PLATFORM_INTEL_GLK 16
48 
49 // Gen9 Surface state tokenized commands - a SURFACE_STATE_G9 command and
50 // a surface state command, either SURFACE_STATE_G9 or SURFACE_STATE_ADV_G9
51 struct PACKET_SURFACE_STATE
52 {
53     SURFACE_STATE_TOKEN_COMMON token;
54     union
55     {
56         mhw_state_heap_g9_X::RENDER_SURFACE_STATE_CMD cmdSurfaceState;
57         mhw_state_heap_g9_X::MEDIA_SURFACE_STATE_CMD cmdSurfaceStateAdv;
58     };
59 };
60 
61 //------------------------------------------------------------------------------
62 //| Purpose: Sets Media Walker Parameters from engineering API for GEN9
63 //| Returns: Result of the operation
64 //------------------------------------------------------------------------------
SetMediaWalkerParams(CM_WALKING_PARAMETERS engineeringParams,PCM_HAL_WALKER_PARAMS walkerParams)65 MOS_STATUS CM_HAL_G9_X::SetMediaWalkerParams(
66     CM_WALKING_PARAMETERS          engineeringParams,
67     PCM_HAL_WALKER_PARAMS          walkerParams)
68 {
69     mhw_render_g9_X::MEDIA_OBJECT_WALKER_CMD mediaWalkerCmd;
70     mediaWalkerCmd.DW5.Value = engineeringParams.Value[0];
71     walkerParams->scoreboardMask = mediaWalkerCmd.DW5.ScoreboardMask;
72 
73     mediaWalkerCmd.DW6.Value = engineeringParams.Value[1];
74     walkerParams->colorCountMinusOne = mediaWalkerCmd.DW6.ColorCountMinusOne;
75     walkerParams->midLoopUnitX = mediaWalkerCmd.DW6.MidLoopUnitX;
76     walkerParams->midLoopUnitY = mediaWalkerCmd.DW6.LocalMidLoopUnitY;
77     walkerParams->middleLoopExtraSteps = mediaWalkerCmd.DW6.MiddleLoopExtraSteps;
78 
79     mediaWalkerCmd.DW7.Value = engineeringParams.Value[2];
80     walkerParams->localLoopExecCount = mediaWalkerCmd.DW7.LocalLoopExecCount;
81     walkerParams->globalLoopExecCount = mediaWalkerCmd.DW7.GlobalLoopExecCount;
82 
83     mediaWalkerCmd.DW8.Value = engineeringParams.Value[3];
84     walkerParams->blockResolution.x = mediaWalkerCmd.DW8.BlockResolutionX;
85     walkerParams->blockResolution.y = mediaWalkerCmd.DW8.BlockResolutionY;
86 
87     mediaWalkerCmd.DW9.Value = engineeringParams.Value[4];
88     walkerParams->localStart.x = mediaWalkerCmd.DW9.LocalStartX;
89     walkerParams->localStart.y = mediaWalkerCmd.DW9.LocalStartY;
90 
91     mediaWalkerCmd.DW11.Value = engineeringParams.Value[6];
92     walkerParams->localOutLoopStride.x = mediaWalkerCmd.DW11.LocalOuterLoopStrideX;
93     walkerParams->localOutLoopStride.y = mediaWalkerCmd.DW11.LocalOuterLoopStrideY;
94 
95     mediaWalkerCmd.DW12.Value = engineeringParams.Value[7];
96     walkerParams->localInnerLoopUnit.x = mediaWalkerCmd.DW12.LocalInnerLoopUnitX;
97     walkerParams->localInnerLoopUnit.y = mediaWalkerCmd.DW12.LocalInnerLoopUnitY;
98 
99     mediaWalkerCmd.DW13.Value = engineeringParams.Value[8];
100     walkerParams->globalResolution.x = mediaWalkerCmd.DW13.GlobalResolutionX;
101     walkerParams->globalResolution.y = mediaWalkerCmd.DW13.GlobalResolutionY;
102 
103     mediaWalkerCmd.DW14.Value = engineeringParams.Value[9];
104     walkerParams->globalStart.x = mediaWalkerCmd.DW14.GlobalStartX;
105     walkerParams->globalStart.y = mediaWalkerCmd.DW14.GlobalStartY;
106 
107     mediaWalkerCmd.DW15.Value = engineeringParams.Value[10];
108     walkerParams->globalOutlerLoopStride.x = mediaWalkerCmd.DW15.GlobalOuterLoopStrideX;
109     walkerParams->globalOutlerLoopStride.y = mediaWalkerCmd.DW15.GlobalOuterLoopStrideY;
110 
111     mediaWalkerCmd.DW16.Value = engineeringParams.Value[11];
112     walkerParams->globalInnerLoopUnit.x = mediaWalkerCmd.DW16.GlobalInnerLoopUnitX;
113     walkerParams->globalInnerLoopUnit.y = mediaWalkerCmd.DW16.GlobalInnerLoopUnitY;
114 
115     walkerParams->localEnd.x = 0;
116     walkerParams->localEnd.y = 0;
117 
118     return MOS_STATUS_SUCCESS;
119 }
120 
SetupHwDebugControl(PRENDERHAL_INTERFACE renderHal,PMOS_COMMAND_BUFFER cmdBuffer)121 MOS_STATUS CM_HAL_G9_X::SetupHwDebugControl(
122     PRENDERHAL_INTERFACE   renderHal,
123     PMOS_COMMAND_BUFFER    cmdBuffer)
124 {
125     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
126     MHW_MI_LOAD_REGISTER_IMM_PARAMS loadRegImm;
127 
128     //---------------------------------------
129     CM_CHK_NULL_GOTOFINISH_MOSERROR(renderHal);
130     CM_CHK_NULL_GOTOFINISH_MOSERROR(renderHal->pMhwMiInterface);
131     CM_CHK_NULL_GOTOFINISH_MOSERROR(cmdBuffer);
132     //---------------------------------------
133 
134     MOS_ZeroMemory(&loadRegImm, sizeof(MHW_MI_LOAD_REGISTER_IMM_PARAMS));
135 
136     // CS_DEBUG_MODE1, global debug enable
137     loadRegImm.dwRegister = CS_DEBUG_MODE1;
138     loadRegImm.dwData = (CS_DEBUG_MODE1_GLOBAL_DEBUG << 16) | CS_DEBUG_MODE1_GLOBAL_DEBUG;
139     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pMhwMiInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &loadRegImm));
140 
141     // TD_CTL, force thread breakpoint enable
142     // Also enable external exception, because the source-level debugger has to
143     // be able to interrupt runing EU threads.
144     loadRegImm.dwRegister = TD_CTL;
145     loadRegImm.dwData = TD_CTL_FORCE_THREAD_BKPT_ENABLE | TD_CTL_FORCE_EXT_EXCEPTION_ENABLE;
146     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pMhwMiInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &loadRegImm));
147 
148 finish:
149     return eStatus;
150 }
151 
152 //------------------------------------------------------------------------------
153 //| Purpose: Registers the sampler 8x8 AVS table: DWORDS 152, 153 and coefficients
154 //| Returns: Result of the operation
155 //------------------------------------------------------------------------------
RegisterSampler8x8AVSTable(PCM_HAL_SAMPLER_8X8_TABLE sampler8x8AvsTable,PCM_AVS_TABLE_STATE_PARAMS avsTable)156 MOS_STATUS CM_HAL_G9_X::RegisterSampler8x8AVSTable(
157     PCM_HAL_SAMPLER_8X8_TABLE  sampler8x8AvsTable,
158     PCM_AVS_TABLE_STATE_PARAMS avsTable )
159 {
160     MOS_ZeroMemory( &sampler8x8AvsTable->mhwSamplerAvsTableParam, sizeof( sampler8x8AvsTable->mhwSamplerAvsTableParam ) );
161 
162     sampler8x8AvsTable->mhwSamplerAvsTableParam.byteTransitionArea8Pixels = MEDIASTATE_AVS_TRANSITION_AREA_8_PIXELS;
163     sampler8x8AvsTable->mhwSamplerAvsTableParam.byteTransitionArea4Pixels = MEDIASTATE_AVS_TRANSITION_AREA_4_PIXELS;
164     sampler8x8AvsTable->mhwSamplerAvsTableParam.byteMaxDerivative8Pixels  = MEDIASTATE_AVS_MAX_DERIVATIVE_8_PIXELS;
165     sampler8x8AvsTable->mhwSamplerAvsTableParam.byteMaxDerivative4Pixels  = MEDIASTATE_AVS_MAX_DERIVATIVE_4_PIXELS;
166 
167     sampler8x8AvsTable->mhwSamplerAvsTableParam.bEnableRGBAdaptive         = false;
168     sampler8x8AvsTable->mhwSamplerAvsTableParam.bAdaptiveFilterAllChannels = avsTable->adaptiveFilterAllChannels;
169 
170     // Assign the coefficient table;
171     for ( uint32_t i = 0; i < CM_NUM_HW_POLYPHASE_TABLES_G9; i++ )
172     {
173         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroXFilterCoefficient[0] = ( uint8_t )avsTable->tbl0X[ i ].FilterCoeff_0_0;
174         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroXFilterCoefficient[1] = ( uint8_t )avsTable->tbl0X[ i ].FilterCoeff_0_1;
175 
176         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroXFilterCoefficient[2] = ( uint8_t )avsTable->tbl0X[ i ].FilterCoeff_0_2;
177         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroXFilterCoefficient[3] = ( uint8_t )avsTable->tbl0X[ i ].FilterCoeff_0_3;
178 
179         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroXFilterCoefficient[4] = ( uint8_t )avsTable->tbl0X[ i ].FilterCoeff_0_4;
180         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroXFilterCoefficient[5] = ( uint8_t )avsTable->tbl0X[ i ].FilterCoeff_0_5;
181 
182         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroXFilterCoefficient[6] = ( uint8_t )avsTable->tbl0X[ i ].FilterCoeff_0_6;
183         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroXFilterCoefficient[7] = ( uint8_t )avsTable->tbl0X[ i ].FilterCoeff_0_7;
184 
185         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroYFilterCoefficient[0] = ( uint8_t )avsTable->tbl0Y[ i ].FilterCoeff_0_0;
186         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroYFilterCoefficient[1] = ( uint8_t )avsTable->tbl0Y[ i ].FilterCoeff_0_1;
187 
188         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroYFilterCoefficient[2] = ( uint8_t )avsTable->tbl0Y[ i ].FilterCoeff_0_2;
189         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroYFilterCoefficient[3] = ( uint8_t )avsTable->tbl0Y[ i ].FilterCoeff_0_3;
190 
191         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroYFilterCoefficient[4] = ( uint8_t )avsTable->tbl0Y[ i ].FilterCoeff_0_4;
192         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroYFilterCoefficient[5] = ( uint8_t )avsTable->tbl0Y[ i ].FilterCoeff_0_5;
193 
194         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroYFilterCoefficient[6] = ( uint8_t )avsTable->tbl0Y[ i ].FilterCoeff_0_6;
195         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].ZeroYFilterCoefficient[7] = ( uint8_t )avsTable->tbl0Y[ i ].FilterCoeff_0_7;
196 
197         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].OneXFilterCoefficient[0]  = ( uint8_t )avsTable->tbl1X[ i ].FilterCoeff_0_2;
198         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].OneXFilterCoefficient[1]  = ( uint8_t )avsTable->tbl1X[ i ].FilterCoeff_0_3;
199         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].OneXFilterCoefficient[2]  = ( uint8_t )avsTable->tbl1X[ i ].FilterCoeff_0_4;
200         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].OneXFilterCoefficient[3]  = ( uint8_t )avsTable->tbl1X[ i ].FilterCoeff_0_5;
201 
202         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].OneYFilterCoefficient[0]  = ( uint8_t )avsTable->tbl1Y[ i ].FilterCoeff_0_2;
203         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].OneYFilterCoefficient[1]  = ( uint8_t )avsTable->tbl1Y[ i ].FilterCoeff_0_3;
204         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].OneYFilterCoefficient[2]  = ( uint8_t )avsTable->tbl1Y[ i ].FilterCoeff_0_4;
205         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[ i ].OneYFilterCoefficient[3]  = ( uint8_t )avsTable->tbl1Y[ i ].FilterCoeff_0_5;
206     }
207 
208     sampler8x8AvsTable->mhwSamplerAvsTableParam.byteDefaultSharpnessLevel = avsTable->defaultSharpLevel;
209     sampler8x8AvsTable->mhwSamplerAvsTableParam.bBypassXAdaptiveFiltering = avsTable->bypassXAF;
210     sampler8x8AvsTable->mhwSamplerAvsTableParam.bBypassYAdaptiveFiltering = avsTable->bypassYAF;
211 
212     if ( !avsTable->bypassXAF  && !avsTable->bypassYAF )
213     {
214         sampler8x8AvsTable->mhwSamplerAvsTableParam.byteMaxDerivative8Pixels  = avsTable->maxDerivative8Pixels;
215         sampler8x8AvsTable->mhwSamplerAvsTableParam.byteMaxDerivative4Pixels  = avsTable->maxDerivative4Pixels;
216         sampler8x8AvsTable->mhwSamplerAvsTableParam.byteTransitionArea8Pixels = avsTable->transitionArea8Pixels;
217         sampler8x8AvsTable->mhwSamplerAvsTableParam.byteTransitionArea4Pixels = avsTable->transitionArea4Pixels;
218     }
219 
220     for ( int i = 0; i < CM_NUM_HW_POLYPHASE_EXTRA_TABLES_G9; i++ )
221     {
222         int src = i + CM_NUM_HW_POLYPHASE_TABLES_G9;
223         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroXFilterCoefficient[0] = ( uint8_t )avsTable->tbl0X[ src ].FilterCoeff_0_0;
224         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroXFilterCoefficient[1] = ( uint8_t )avsTable->tbl0X[ src ].FilterCoeff_0_1;
225 
226         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroXFilterCoefficient[2] = ( uint8_t )avsTable->tbl0X[ src ].FilterCoeff_0_2;
227         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroXFilterCoefficient[3] = ( uint8_t )avsTable->tbl0X[ src ].FilterCoeff_0_3;
228 
229         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroXFilterCoefficient[4] = ( uint8_t )avsTable->tbl0X[ src ].FilterCoeff_0_4;
230         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroXFilterCoefficient[5] = ( uint8_t )avsTable->tbl0X[ src ].FilterCoeff_0_5;
231 
232         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroXFilterCoefficient[6] = ( uint8_t )avsTable->tbl0X[ src ].FilterCoeff_0_6;
233         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroXFilterCoefficient[7] = ( uint8_t )avsTable->tbl0X[ src ].FilterCoeff_0_7;
234 
235         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroYFilterCoefficient[0] = ( uint8_t )avsTable->tbl0Y[ src ].FilterCoeff_0_0;
236         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroYFilterCoefficient[1] = ( uint8_t )avsTable->tbl0Y[ src ].FilterCoeff_0_1;
237 
238         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroYFilterCoefficient[2] = ( uint8_t )avsTable->tbl0Y[ src ].FilterCoeff_0_2;
239         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroYFilterCoefficient[3] = ( uint8_t )avsTable->tbl0Y[ src ].FilterCoeff_0_3;
240 
241         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroYFilterCoefficient[4] = ( uint8_t )avsTable->tbl0Y[ src ].FilterCoeff_0_4;
242         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroYFilterCoefficient[5] = ( uint8_t )avsTable->tbl0Y[ src ].FilterCoeff_0_5;
243 
244         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroYFilterCoefficient[6] = ( uint8_t )avsTable->tbl0Y[ src ].FilterCoeff_0_6;
245         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].ZeroYFilterCoefficient[7] = ( uint8_t )avsTable->tbl0Y[ src ].FilterCoeff_0_7;
246 
247         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].OneXFilterCoefficient[0] = ( uint8_t )avsTable->tbl1X[ src ].FilterCoeff_0_2;
248         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].OneXFilterCoefficient[1] = ( uint8_t )avsTable->tbl1X[ src ].FilterCoeff_0_3;
249         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].OneXFilterCoefficient[2] = ( uint8_t )avsTable->tbl1X[ src ].FilterCoeff_0_4;
250         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].OneXFilterCoefficient[3] = ( uint8_t )avsTable->tbl1X[ src ].FilterCoeff_0_5;
251 
252         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].OneYFilterCoefficient[0] = ( uint8_t )avsTable->tbl1Y[ src ].FilterCoeff_0_2;
253         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].OneYFilterCoefficient[1] = ( uint8_t )avsTable->tbl1Y[ src ].FilterCoeff_0_3;
254         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].OneYFilterCoefficient[2] = ( uint8_t )avsTable->tbl1Y[ src ].FilterCoeff_0_4;
255         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[ i ].OneYFilterCoefficient[3] = ( uint8_t )avsTable->tbl1Y[ src ].FilterCoeff_0_5;
256 
257     }
258 
259     return MOS_STATUS_SUCCESS;
260 }
261 
RegisterSampler8x8(PCM_HAL_SAMPLER_8X8_PARAM param)262 MOS_STATUS CM_HAL_G9_X::RegisterSampler8x8(
263     PCM_HAL_SAMPLER_8X8_PARAM    param)
264 {
265     PCM_HAL_STATE               state = m_cmState;
266     MOS_STATUS                  eStatus = MOS_STATUS_SUCCESS;
267     int16_t                     samplerIndex = 0;
268     PMHW_SAMPLER_STATE_PARAM    samplerEntry = nullptr;
269     PCM_HAL_SAMPLER_8X8_ENTRY   sampler8x8Entry = nullptr;
270 
271     if (param->sampler8x8State.stateType == CM_SAMPLER8X8_AVS)
272     {
273         for (uint32_t i = 0; i < state->cmDeviceParam.maxSamplerTableSize; i++) {
274             if (!state->samplerTable[i].bInUse) {
275                 samplerEntry = &state->samplerTable[i];
276                 param->handle = (uint32_t)i << 16;
277                 samplerEntry->bInUse = true;
278                 break;
279             }
280         }
281 
282         for (uint32_t i = 0; i < state->cmDeviceParam.maxSampler8x8TableSize; i++) {
283             if (!state->sampler8x8Table[i].inUse) {
284                 sampler8x8Entry = &state->sampler8x8Table[i];
285                 samplerIndex = (int16_t)i;
286                 param->handle |= (uint32_t)(i & 0xffff);
287                 sampler8x8Entry->inUse = true;
288                 break;
289             }
290         }
291 
292         if (!samplerEntry || !sampler8x8Entry) {
293             eStatus = MOS_STATUS_INVALID_PARAMETER;
294             CM_ASSERTMESSAGE("Sampler or AVS table is full");
295             goto finish;
296         }
297 
298         //State data from application
299         samplerEntry->SamplerType                  = MHW_SAMPLER_TYPE_AVS;
300         samplerEntry->ElementType                  = MHW_Sampler128Elements;
301         samplerEntry->Avs                          = param->sampler8x8State.avsParam.avsState;
302         samplerEntry->Avs.stateID                  = samplerIndex;
303         samplerEntry->Avs.iTable8x8_Index          = samplerIndex;  // Used for calculating the Media offset of 8x8 table
304         samplerEntry->Avs.pMhwSamplerAvsTableParam = &sampler8x8Entry->sampler8x8State.mhwSamplerAvsTableParam;
305 
306         if (samplerEntry->Avs.EightTapAFEnable)
307             param->sampler8x8State.avsParam.avsTable.adaptiveFilterAllChannels = true;
308         else
309             param->sampler8x8State.avsParam.avsTable.adaptiveFilterAllChannels = false;
310 
311         CM_CHK_MOSSTATUS_GOTOFINISH(RegisterSampler8x8AVSTable(&sampler8x8Entry->sampler8x8State,
312                                                     &param->sampler8x8State.avsParam.avsTable));
313 
314         sampler8x8Entry->sampler8x8State.stateType  = CM_SAMPLER8X8_AVS;
315     }
316     else if (param->sampler8x8State.stateType == CM_SAMPLER8X8_MISC)
317     {
318         for (uint32_t i = 0; i < state->cmDeviceParam.maxSamplerTableSize; i++)
319         {
320             if (!state->samplerTable[i].bInUse)
321             {
322                 samplerEntry = &state->samplerTable[i];
323                 param->handle = (uint32_t)i << 16;
324                 samplerEntry->bInUse = true;
325                 break;
326             }
327         }
328 
329         if ( samplerEntry == nullptr )
330         {
331             return MOS_STATUS_INVALID_HANDLE;
332         }
333         samplerEntry->SamplerType  = MHW_SAMPLER_TYPE_MISC;
334         samplerEntry->ElementType = MHW_Sampler2Elements;
335 
336         samplerEntry->Misc.byteHeight = param->sampler8x8State.miscState.DW0.Height;
337         samplerEntry->Misc.byteWidth  = param->sampler8x8State.miscState.DW0.Width;
338         samplerEntry->Misc.wRow[0]    = param->sampler8x8State.miscState.DW0.Row0;
339         samplerEntry->Misc.wRow[1]    = param->sampler8x8State.miscState.DW1.Row1;
340         samplerEntry->Misc.wRow[2]    = param->sampler8x8State.miscState.DW1.Row2;
341         samplerEntry->Misc.wRow[3]    = param->sampler8x8State.miscState.DW2.Row3;
342         samplerEntry->Misc.wRow[4]    = param->sampler8x8State.miscState.DW2.Row4;
343         samplerEntry->Misc.wRow[5]    = param->sampler8x8State.miscState.DW3.Row5;
344         samplerEntry->Misc.wRow[6]    = param->sampler8x8State.miscState.DW3.Row6;
345         samplerEntry->Misc.wRow[7]    = param->sampler8x8State.miscState.DW4.Row7;
346         samplerEntry->Misc.wRow[8]    = param->sampler8x8State.miscState.DW4.Row8;
347         samplerEntry->Misc.wRow[9]    = param->sampler8x8State.miscState.DW5.Row9;
348         samplerEntry->Misc.wRow[10]   = param->sampler8x8State.miscState.DW5.Row10;
349         samplerEntry->Misc.wRow[11]   = param->sampler8x8State.miscState.DW6.Row11;
350         samplerEntry->Misc.wRow[12]   = param->sampler8x8State.miscState.DW6.Row12;
351         samplerEntry->Misc.wRow[13]   = param->sampler8x8State.miscState.DW7.Row13;
352         samplerEntry->Misc.wRow[14]   = param->sampler8x8State.miscState.DW7.Row14;
353     }
354     else if (param->sampler8x8State.stateType == CM_SAMPLER8X8_CONV)
355     {
356         for (uint32_t i = 0; i < state->cmDeviceParam.maxSamplerTableSize; i++)
357         {
358             if (!state->samplerTable[i].bInUse) {
359                 samplerEntry = &state->samplerTable[i];
360                 param->handle = (uint32_t)i << 16;
361                 samplerEntry->bInUse = true;
362                 break;
363             }
364         }
365 
366         if ( samplerEntry == nullptr )
367         {
368             return MOS_STATUS_INVALID_HANDLE;
369         }
370 
371         MOS_ZeroMemory(&samplerEntry->Convolve, sizeof(samplerEntry->Convolve));
372 
373         samplerEntry->SamplerType  = MHW_SAMPLER_TYPE_CONV;
374 
375         samplerEntry->Convolve.ui8Height               = param->sampler8x8State.convolveState.height;
376         samplerEntry->Convolve.ui8Width                = param->sampler8x8State.convolveState.width;
377         samplerEntry->Convolve.ui8ScaledDownValue      = param->sampler8x8State.convolveState.scaleDownValue;
378         samplerEntry->Convolve.ui8SizeOfTheCoefficient = param->sampler8x8State.convolveState.coeffSize;
379 
380         samplerEntry->Convolve.ui8MSBWidth    = param->sampler8x8State.convolveState.isHorizontal32Mode;
381         samplerEntry->Convolve.ui8MSBHeight   = param->sampler8x8State.convolveState.isVertical32Mode;
382         samplerEntry->Convolve.skl_mode       = param->sampler8x8State.convolveState.sklMode;
383 
384         // Currently use DW0.Reserved0 to save the detailed Convolve Type, the DW0.Reserved0 will be cleared when copy to sampelr heap
385         samplerEntry->Convolve.ui8ConvolveType = param->sampler8x8State.convolveState.nConvolveType;
386         if (samplerEntry->Convolve.skl_mode &&
387             samplerEntry->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_2D)
388         {
389             samplerEntry->ElementType = MHW_Sampler128Elements;
390         }
391         else if ((!samplerEntry->Convolve.skl_mode &&
392                   samplerEntry->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_2D)
393                   || samplerEntry->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_1P)
394         {
395             samplerEntry->ElementType = MHW_Sampler64Elements;
396         }
397         else
398         {
399             samplerEntry->ElementType = MHW_Sampler8Elements;
400         }
401 
402         for ( int i = 0; i < CM_NUM_CONVOLVE_ROWS_SKL; i++ )
403         {
404             MHW_SAMPLER_CONVOLVE_COEFF_TABLE *coeffTable  = &(samplerEntry->Convolve.CoeffTable[i]);
405             CM_HAL_CONVOLVE_COEFF_TABLE      *sourceTable = &(param->sampler8x8State.convolveState.table[i]);
406             if ( samplerEntry->Convolve.ui8SizeOfTheCoefficient == 1 )
407             {
408                 coeffTable->wFilterCoeff[0]  = FloatToS3_12( sourceTable->FilterCoeff_0_0 );
409                 coeffTable->wFilterCoeff[1]  = FloatToS3_12( sourceTable->FilterCoeff_0_1 );
410                 coeffTable->wFilterCoeff[2]  = FloatToS3_12( sourceTable->FilterCoeff_0_2 );
411                 coeffTable->wFilterCoeff[3]  = FloatToS3_12( sourceTable->FilterCoeff_0_3 );
412                 coeffTable->wFilterCoeff[4]  = FloatToS3_12( sourceTable->FilterCoeff_0_4 );
413                 coeffTable->wFilterCoeff[5]  = FloatToS3_12( sourceTable->FilterCoeff_0_5 );
414                 coeffTable->wFilterCoeff[6]  = FloatToS3_12( sourceTable->FilterCoeff_0_6 );
415                 coeffTable->wFilterCoeff[7]  = FloatToS3_12( sourceTable->FilterCoeff_0_7 );
416                 coeffTable->wFilterCoeff[8]  = FloatToS3_12( sourceTable->FilterCoeff_0_8 );
417                 coeffTable->wFilterCoeff[9]  = FloatToS3_12( sourceTable->FilterCoeff_0_9 );
418                 coeffTable->wFilterCoeff[10] = FloatToS3_12( sourceTable->FilterCoeff_0_10 );
419                 coeffTable->wFilterCoeff[11] = FloatToS3_12( sourceTable->FilterCoeff_0_11 );
420                 coeffTable->wFilterCoeff[12] = FloatToS3_12( sourceTable->FilterCoeff_0_12 );
421                 coeffTable->wFilterCoeff[13] = FloatToS3_12( sourceTable->FilterCoeff_0_13 );
422                 coeffTable->wFilterCoeff[14] = FloatToS3_12( sourceTable->FilterCoeff_0_14 );
423                 coeffTable->wFilterCoeff[15] = FloatToS3_12( sourceTable->FilterCoeff_0_15 );
424             }
425             else
426             {
427                 coeffTable->wFilterCoeff[0]  = FloatToS3_4( sourceTable->FilterCoeff_0_0 );
428                 coeffTable->wFilterCoeff[1]  = FloatToS3_4( sourceTable->FilterCoeff_0_1 );
429                 coeffTable->wFilterCoeff[2]  = FloatToS3_4( sourceTable->FilterCoeff_0_2 );
430                 coeffTable->wFilterCoeff[3]  = FloatToS3_4( sourceTable->FilterCoeff_0_3 );
431                 coeffTable->wFilterCoeff[4]  = FloatToS3_4( sourceTable->FilterCoeff_0_4 );
432                 coeffTable->wFilterCoeff[5]  = FloatToS3_4( sourceTable->FilterCoeff_0_5 );
433                 coeffTable->wFilterCoeff[6]  = FloatToS3_4( sourceTable->FilterCoeff_0_6 );
434                 coeffTable->wFilterCoeff[7]  = FloatToS3_4( sourceTable->FilterCoeff_0_7 );
435                 coeffTable->wFilterCoeff[8]  = FloatToS3_4( sourceTable->FilterCoeff_0_8 );
436                 coeffTable->wFilterCoeff[9]  = FloatToS3_4( sourceTable->FilterCoeff_0_9 );
437                 coeffTable->wFilterCoeff[10] = FloatToS3_4( sourceTable->FilterCoeff_0_10 );
438                 coeffTable->wFilterCoeff[11] = FloatToS3_4( sourceTable->FilterCoeff_0_11 );
439                 coeffTable->wFilterCoeff[12] = FloatToS3_4( sourceTable->FilterCoeff_0_12 );
440                 coeffTable->wFilterCoeff[13] = FloatToS3_4( sourceTable->FilterCoeff_0_13 );
441                 coeffTable->wFilterCoeff[14] = FloatToS3_4( sourceTable->FilterCoeff_0_14 );
442                 coeffTable->wFilterCoeff[15] = FloatToS3_4( sourceTable->FilterCoeff_0_15 );
443             }
444         }
445 
446         for ( int i = CM_NUM_CONVOLVE_ROWS_SKL; i < CM_NUM_CONVOLVE_ROWS_SKL * 2; i++ )
447         {
448             MHW_SAMPLER_CONVOLVE_COEFF_TABLE *coeffTable  = &(samplerEntry->Convolve.CoeffTable[i]);
449             CM_HAL_CONVOLVE_COEFF_TABLE      *sourceTable = &(param->sampler8x8State.convolveState.table[i - CM_NUM_CONVOLVE_ROWS_SKL]);
450 
451             if ( samplerEntry->Convolve.ui8SizeOfTheCoefficient == 1 )
452             {
453                 coeffTable->wFilterCoeff[0]  = FloatToS3_12( sourceTable->FilterCoeff_0_16 );
454                 coeffTable->wFilterCoeff[1]  = FloatToS3_12( sourceTable->FilterCoeff_0_17 );
455                 coeffTable->wFilterCoeff[2]  = FloatToS3_12( sourceTable->FilterCoeff_0_18 );
456                 coeffTable->wFilterCoeff[3]  = FloatToS3_12( sourceTable->FilterCoeff_0_19 );
457                 coeffTable->wFilterCoeff[4]  = FloatToS3_12( sourceTable->FilterCoeff_0_20 );
458                 coeffTable->wFilterCoeff[5]  = FloatToS3_12( sourceTable->FilterCoeff_0_21 );
459                 coeffTable->wFilterCoeff[6]  = FloatToS3_12( sourceTable->FilterCoeff_0_22 );
460                 coeffTable->wFilterCoeff[7]  = FloatToS3_12( sourceTable->FilterCoeff_0_23 );
461                 coeffTable->wFilterCoeff[8]  = FloatToS3_12( sourceTable->FilterCoeff_0_24 );
462                 coeffTable->wFilterCoeff[9]  = FloatToS3_12( sourceTable->FilterCoeff_0_25 );
463                 coeffTable->wFilterCoeff[10] = FloatToS3_12( sourceTable->FilterCoeff_0_26 );
464                 coeffTable->wFilterCoeff[11] = FloatToS3_12( sourceTable->FilterCoeff_0_27 );
465                 coeffTable->wFilterCoeff[12] = FloatToS3_12( sourceTable->FilterCoeff_0_28 );
466                 coeffTable->wFilterCoeff[13] = FloatToS3_12( sourceTable->FilterCoeff_0_29 );
467                 coeffTable->wFilterCoeff[14] = FloatToS3_12( sourceTable->FilterCoeff_0_30 );
468                 coeffTable->wFilterCoeff[15] = FloatToS3_12( sourceTable->FilterCoeff_0_31 );
469             }
470             else
471             {
472                 coeffTable->wFilterCoeff[0]  = FloatToS3_4( sourceTable->FilterCoeff_0_16 );
473                 coeffTable->wFilterCoeff[1]  = FloatToS3_4( sourceTable->FilterCoeff_0_17 );
474                 coeffTable->wFilterCoeff[2]  = FloatToS3_4( sourceTable->FilterCoeff_0_18 );
475                 coeffTable->wFilterCoeff[3]  = FloatToS3_4( sourceTable->FilterCoeff_0_19 );
476                 coeffTable->wFilterCoeff[4]  = FloatToS3_4( sourceTable->FilterCoeff_0_20 );
477                 coeffTable->wFilterCoeff[5]  = FloatToS3_4( sourceTable->FilterCoeff_0_21 );
478                 coeffTable->wFilterCoeff[6]  = FloatToS3_4( sourceTable->FilterCoeff_0_22 );
479                 coeffTable->wFilterCoeff[7]  = FloatToS3_4( sourceTable->FilterCoeff_0_23 );
480                 coeffTable->wFilterCoeff[8]  = FloatToS3_4( sourceTable->FilterCoeff_0_24 );
481                 coeffTable->wFilterCoeff[9]  = FloatToS3_4( sourceTable->FilterCoeff_0_25 );
482                 coeffTable->wFilterCoeff[10] = FloatToS3_4( sourceTable->FilterCoeff_0_26 );
483                 coeffTable->wFilterCoeff[11] = FloatToS3_4( sourceTable->FilterCoeff_0_27 );
484                 coeffTable->wFilterCoeff[12] = FloatToS3_4( sourceTable->FilterCoeff_0_28 );
485                 coeffTable->wFilterCoeff[13] = FloatToS3_4( sourceTable->FilterCoeff_0_29 );
486                 coeffTable->wFilterCoeff[14] = FloatToS3_4( sourceTable->FilterCoeff_0_30 );
487                 coeffTable->wFilterCoeff[15] = FloatToS3_4( sourceTable->FilterCoeff_0_31 );
488             }
489         }
490     }
491 
492 finish:
493     return eStatus;
494 }
495 
496 /*----------------------------------------------------------------------------
497 | Purpose   : Set's surface state memory object control settings
498 | Returns   : dword value
499 \---------------------------------------------------------------------------*/
HwSetSurfaceMemoryObjectControl(uint16_t memObjCtl,PRENDERHAL_SURFACE_STATE_PARAMS surfStateParams)500 MOS_STATUS CM_HAL_G9_X::HwSetSurfaceMemoryObjectControl(
501     uint16_t                        memObjCtl,
502     PRENDERHAL_SURFACE_STATE_PARAMS surfStateParams)
503 {
504     PRENDERHAL_INTERFACE renderHal = m_cmState->renderHal;
505     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
506     MOS_HW_RESOURCE_DEF mosUsage;
507     // The memory object control uint16_t is composed with cache type(8:15), memory type(4:7), ages(0:3)
508     mosUsage = (MOS_HW_RESOURCE_DEF)((memObjCtl & CM_MEMOBJCTL_CACHE_MASK) >> 8);
509     if (mosUsage >= MOS_HW_RESOURCE_DEF_MAX)
510         mosUsage = GetDefaultMOCS();
511 
512     surfStateParams->MemObjCtl = renderHal->pOsInterface->pfnCachePolicyGetMemoryObject(mosUsage,
513         renderHal->pOsInterface->pfnGetGmmClientContext(renderHal->pOsInterface)).DwordValue;
514 
515     return eStatus;
516 }
517 
518 
519 #if (_RELEASE_INTERNAL || _DEBUG)
520 #if defined (CM_DIRECT_GUC_SUPPORT)
SubmitDummyCommands(PMHW_BATCH_BUFFER batchBuffer,int32_t taskId,PCM_HAL_KERNEL_PARAM * kernelParam,void ** cmdBuffer)521 MOS_STATUS CM_HAL_G9_X::SubmitDummyCommands(
522     PMHW_BATCH_BUFFER       batchBuffer,
523     int32_t                 taskId,
524     PCM_HAL_KERNEL_PARAM    *kernelParam,
525     void                    **cmdBuffer)
526 {
527     MOS_STATUS                   eStatus = MOS_STATUS_SUCCESS;
528     PCM_HAL_STATE                state = m_cmState;
529     PRENDERHAL_INTERFACE         renderHal = state->renderHal;
530     MhwRenderInterface           *mhwRender = renderHal->pMhwRenderInterface;
531     PRENDERHAL_STATE_HEAP        stateHeap = renderHal->pStateHeap;
532     PMOS_INTERFACE               osInterface = renderHal->pOsInterface;
533     PMHW_MI_INTERFACE            mhwMiInterface = renderHal->pMhwMiInterface;
534     MHW_PIPE_CONTROL_PARAMS      pipeCtlParams;
535     MHW_ID_LOAD_PARAMS           idLoadParams;
536     int32_t                      remaining = 0;
537     bool                         enableWalker = state->walkerParams.CmWalkerEnable;
538     bool                         enableGpGpu = state->taskParam->blGpGpuWalkerEnabled;
539     MOS_COMMAND_BUFFER           mosCmdBuffer;
540     int64_t                      *taskSyncLocation;
541     int32_t                      syncOffset;
542     int32_t                      tmp;
543     RENDERHAL_GENERIC_PROLOG_PARAMS genericPrologParams = {};
544 
545     MOS_ZeroMemory(&mosCmdBuffer, sizeof(MOS_COMMAND_BUFFER));
546 
547     // Get the task sync offset
548     syncOffset = state->pfnGetTaskSyncLocation(taskId);
549 
550     // Initialize the location
551     taskSyncLocation = (int64_t*)(state->renderTimeStampResource.data + syncOffset);
552     *taskSyncLocation = CM_INVALID_INDEX;
553     *(taskSyncLocation + 1) = CM_INVALID_INDEX;
554     if (state->cbbEnabled)
555     {
556         *(taskSyncLocation + 2) = CM_INVALID_TAG;
557     }
558 
559     // Register batch buffer for rendering
560     if (!enableWalker && !enableGpGpu)
561     {
562         CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnRegisterResource(
563             osInterface,
564             &batchBuffer->OsResource,
565             true,
566             true));
567     }
568     // Register Timestamp Buffer
569     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnRegisterResource(
570         osInterface,
571         &state->renderTimeStampResource.osResource,
572         true,
573         true));
574     // Allocate all available space, unused buffer will be returned later
575     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnGetCommandBuffer(osInterface, &mosCmdBuffer, 0));
576     remaining = mosCmdBuffer.iRemaining;
577 
578     // Linux will just return next sync tag here since currently no frame tracking support
579     //dwFrameId = pRenderHal->pfnEnableFrameTracking(pRenderHal, pOsInterface->CurrentGpuContextOrdinal, &genericPrologParams, &OsResource);
580     //pStateHeap->pCurMediaState->dwSyncTag = dwFrameId;
581 
582     // Initialize command buffer and insert prolog
583     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnInitCommandBuffer(renderHal, &mosCmdBuffer, &genericPrologParams));
584 
585     //Send the First PipeControl Command to indicate the beginning of execution
586     pipeCtlParams = g_cRenderHal_InitPipeControlParams;
587     pipeCtlParams.presDest = &state->renderTimeStampResource.osResource;
588     pipeCtlParams.dwResourceOffset = syncOffset;
589     pipeCtlParams.dwPostSyncOp = MHW_FLUSH_WRITE_TIMESTAMP_REG;
590     pipeCtlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
591     CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtlParams));
592 
593     // Send Pipeline Select command
594     CM_CHK_MOSSTATUS_GOTOFINISH(mhwRender->AddPipelineSelectCmd(&mosCmdBuffer, enableGpGpu));
595 
596     // issue a PIPE_CONTROL to flush all caches and the stall the CS before
597     // issuing a PIPE_CONTROL to write the timestamp
598     pipeCtlParams = g_cRenderHal_InitPipeControlParams;
599     pipeCtlParams.presDest = &state->renderTimeStampResource.osResource;
600     pipeCtlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
601     pipeCtlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
602     CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtlParams));
603 
604     // issue a PIPE_CONTROL to write timestamp
605     syncOffset += sizeof(uint64_t);
606     pipeCtlParams = g_cRenderHal_InitPipeControlParams;
607     pipeCtlParams.presDest = &state->renderTimeStampResource.osResource;
608     pipeCtlParams.dwResourceOffset = syncOffset;
609     pipeCtlParams.dwPostSyncOp = MHW_FLUSH_WRITE_TIMESTAMP_REG;
610     pipeCtlParams.dwFlushMode = MHW_FLUSH_READ_CACHE;
611     CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtlParams));
612 
613     // Add PipeControl to invalidate ISP and MediaState to avoid PageFault issue
614     MHW_PIPE_CONTROL_PARAMS pipeControlParams;
615 
616     MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams));
617     pipeControlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
618     pipeControlParams.bGenericMediaStateClear = true;
619     pipeControlParams.bIndirectStatePointersDisable = true;
620     pipeControlParams.bDisableCSStall = false;
621     CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeControlParams));
622 
623     //Couple to the BB_START , otherwise GPU Hang without it in Linux KMD
624     CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMiBatchBufferEnd(&mosCmdBuffer, nullptr));
625 
626     // Return unused command buffer space to OS
627     osInterface->pfnReturnCommandBuffer(osInterface, &mosCmdBuffer, 0);
628 
629     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnSubmitCommandBuffer(osInterface,
630         &mosCmdBuffer,
631         state->nullHwRenderCm))
632 
633     if (state->nullHwRenderCm == false)
634     {
635         stateHeap->pCurMediaState->bBusy = true;
636         if (!enableWalker && !enableGpGpu)
637         {
638             batchBuffer->bBusy = true;
639         }
640     }
641 
642     // reset API call number of HW threads
643     state->maxHWThreadValues.apiValue = 0;
644 
645     state->pfnReferenceCommandBuffer(&mosCmdBuffer.OsResource, cmdBuffer);
646 
647     eStatus = MOS_STATUS_SUCCESS;
648 
649 finish:
650     // Failed -> discard all changes in Command Buffer
651     if (eStatus != MOS_STATUS_SUCCESS)
652     {
653         // Buffer overflow - display overflow size
654         if (mosCmdBuffer.iRemaining < 0)
655         {
656             CM_ASSERTMESSAGE("Command Buffer overflow by %d bytes.", -mosCmdBuffer.iRemaining);
657         }
658 
659         // Move command buffer back to beginning
660         tmp = remaining - mosCmdBuffer.iRemaining;
661         mosCmdBuffer.iRemaining = remaining;
662         mosCmdBuffer.iOffset -= tmp;
663         mosCmdBuffer.pCmdPtr = mosCmdBuffer.pCmdBase + mosCmdBuffer.iOffset / sizeof(uint32_t);
664 
665         // Return unused command buffer space to OS
666         osInterface->pfnReturnCommandBuffer(osInterface, &mosCmdBuffer, 0);
667     }
668 
669     return eStatus;
670 }
671 #endif
672 #endif
673 
SubmitCommands(PMHW_BATCH_BUFFER batchBuffer,int32_t taskId,PCM_HAL_KERNEL_PARAM * kernelParam,void ** cmdBuffer)674 MOS_STATUS CM_HAL_G9_X::SubmitCommands(
675     PMHW_BATCH_BUFFER       batchBuffer,
676     int32_t                 taskId,
677     PCM_HAL_KERNEL_PARAM    *kernelParam,
678     void                    **cmdBuffer)
679 {
680     MOS_STATUS                   eStatus     = MOS_STATUS_SUCCESS;
681     PCM_HAL_STATE                state       = m_cmState;
682     PRENDERHAL_INTERFACE         renderHal   = state->renderHal;
683     MhwRenderInterface           *mhwRender  = renderHal->pMhwRenderInterface;
684     PRENDERHAL_STATE_HEAP        stateHeap   = renderHal->pStateHeap;
685     PMOS_INTERFACE               osInterface = renderHal->pOsInterface;
686     PMHW_MI_INTERFACE            mhwMiInterface = renderHal->pMhwMiInterface;
687     MHW_PIPE_CONTROL_PARAMS      pipeCtlParams;
688     MHW_ID_LOAD_PARAMS           idLoadParams;
689     int32_t                      remaining   = 0;
690     bool                         enableWalker = state->walkerParams.CmWalkerEnable;
691     bool                         enableGpGpu  = state->taskParam->blGpGpuWalkerEnabled;
692     MOS_COMMAND_BUFFER           mosCmdBuffer;
693     uint32_t                     syncTag;
694     int64_t                      *taskSyncLocation;
695     int32_t                      syncOffset;
696     int32_t                      tmp;
697     PCM_HAL_TASK_PARAM           taskParam = state->taskParam;
698     bool                         sipEnable = renderHal->bSIPKernel? true: false;
699     bool                         csrEnable = renderHal->bCSRKernel? true: false;
700     PCM_HAL_BB_ARGS              bbCmArgs;
701     RENDERHAL_GENERIC_PROLOG_PARAMS genericPrologParams = {};
702     MOS_RESOURCE                 *osResource;
703     uint32_t                     tag;
704     uint32_t                     tagOffset = 0;
705     CM_HAL_MI_REG_OFFSETS  miRegG9 = { REG_TIMESTAMP_BASE_G9, REG_GPR_BASE_G9 };
706 #if (_RELEASE_INTERNAL || _DEBUG)
707 #if defined (CM_DIRECT_GUC_SUPPORT)
708     uint64_t                    batchbufferaddress;
709 #endif
710 #endif
711 
712     MOS_CONTEXT               *pOsContext = renderHal->pOsInterface->pOsContext;
713     PMHW_MI_MMIOREGISTERS     pMmioRegisters = renderHal->pMhwRenderInterface->GetMmioRegisters();
714 
715     MOS_ZeroMemory(&mosCmdBuffer, sizeof(MOS_COMMAND_BUFFER));
716 
717     // get the tag
718     tag = renderHal->trackerProducer.GetNextTracker(renderHal->currentTrackerIndex);
719 
720     // Get the task sync offset
721     syncOffset = state->pfnGetTaskSyncLocation(state, taskId);
722 
723     // Initialize the location
724     taskSyncLocation                 = (int64_t*)(state->renderTimeStampResource.data + syncOffset);
725     *taskSyncLocation                = CM_INVALID_INDEX;
726     *(taskSyncLocation + 1)          = CM_INVALID_INDEX;
727     if(state->cbbEnabled)
728     {
729         *(taskSyncLocation + 2)      = tag;
730         *(taskSyncLocation + 3)      = state->renderHal->currentTrackerIndex;
731     }
732 
733     // Register batch buffer for rendering
734     if (!enableWalker && !enableGpGpu)
735     {
736         CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnRegisterResource(
737             osInterface,
738             &batchBuffer->OsResource,
739             true,
740             true));
741     }
742 #if (_RELEASE_INTERNAL || _DEBUG)
743 #if !defined(CM_DIRECT_GUC_SUPPORT)
744     // Register Timestamp Buffer
745     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnRegisterResource(
746         osInterface,
747         &state->renderTimeStampResource.osResource,
748         true,
749         true));
750 #endif
751 #endif
752     // Allocate all available space, unused buffer will be returned later
753     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnGetCommandBuffer(osInterface, &mosCmdBuffer, 0));
754     remaining = mosCmdBuffer.iRemaining;
755 #if (_RELEASE_INTERNAL || _DEBUG)
756 #if defined(CM_DIRECT_GUC_SUPPORT)
757     batchbufferaddress = osInterface->pfnGetResourceGfxAddress(
758         osInterface,
759         &mosCmdBuffer.OsResource);
760     batchbufferaddress += mosCmdBuffer.iOffset;
761 #endif
762 #endif
763     // Update power option of this command;
764     CM_CHK_MOSSTATUS_GOTOFINISH( state->pfnUpdatePowerOption( state, &state->powerOption ) );
765 
766     // use frame tracking to write the tracker ID to CM tracker resource
767     renderHal->trackerProducer.GetLatestTrackerResource(renderHal->currentTrackerIndex, &osResource, &tagOffset);
768     renderHal->pfnSetupPrologParams(renderHal, &genericPrologParams, osResource, tagOffset, tag);
769     FrameTrackerTokenFlat_SetProducer(&stateHeap->pCurMediaState->trackerToken, &renderHal->trackerProducer);
770     FrameTrackerTokenFlat_Merge(&stateHeap->pCurMediaState->trackerToken, renderHal->currentTrackerIndex, tag);
771 
772     // Record registers by unified media profiler in the beginning
773     if (state->perfProfiler != nullptr)
774     {
775         CM_CHK_MOSSTATUS_GOTOFINISH(state->perfProfiler->AddPerfCollectStartCmd((void *)state, state->osInterface, mhwMiInterface, &mosCmdBuffer));
776     }
777 
778     //Send the First PipeControl Command to indicate the beginning of execution
779     pipeCtlParams = g_cRenderHal_InitPipeControlParams;
780     pipeCtlParams.presDest          = &state->renderTimeStampResource.osResource;
781     pipeCtlParams.dwResourceOffset  = syncOffset;
782     pipeCtlParams.dwPostSyncOp      = MHW_FLUSH_WRITE_TIMESTAMP_REG;
783     pipeCtlParams.dwFlushMode       = MHW_FLUSH_WRITE_CACHE;
784     CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtlParams));
785 
786     // Initialize command buffer and insert prolog
787     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnInitCommandBuffer(renderHal, &mosCmdBuffer, &genericPrologParams));
788 
789     HalOcaInterface::On1stLevelBBStart(mosCmdBuffer, *pOsContext, osInterface->CurrentGpuContextHandle,
790         *renderHal->pMhwMiInterface, *pMmioRegisters);
791 
792     // update tracker tag used with CM tracker resource
793     renderHal->trackerProducer.StepForward(renderHal->currentTrackerIndex);
794 
795     // Increment sync tag
796     syncTag = stateHeap->dwNextTag++;
797 
798     //enable SKL L3 config
799     HalCm_GetLegacyRenderHalL3Setting( &state->l3Settings, &renderHal->L3CacheSettings );
800     renderHal->pfnEnableL3Caching(renderHal, &renderHal->L3CacheSettings);
801     mhwRender->SetL3Cache(&mosCmdBuffer);
802 
803     if (sipEnable)
804     {
805         CM_CHK_MOSSTATUS_GOTOFINISH(SetupHwDebugControl(renderHal, &mosCmdBuffer));
806     }
807 
808     // Adds granularity control for preemption for Gen9.
809     // Supporting Preemption granularity control reg for 3D and GPGPU mode for per ctx and with non-privileged access
810     if ( MEDIA_IS_SKU(state->skuTable, FtrPerCtxtPreemptionGranularityControl ))
811     {
812         MHW_MI_LOAD_REGISTER_IMM_PARAMS loadRegImm;
813         MOS_ZeroMemory( &loadRegImm, sizeof( MHW_MI_LOAD_REGISTER_IMM_PARAMS ) );
814 
815         loadRegImm.dwRegister = MHW_RENDER_ENGINE_PREEMPTION_CONTROL_OFFSET;
816 
817         // Same reg offset and value for gpgpu pipe and media pipe
818         if ( enableGpGpu )
819         {
820             if ( MEDIA_IS_SKU(state->skuTable, FtrGpGpuThreadGroupLevelPreempt )
821                 || MEDIA_IS_SKU(state->skuTable, FtrGpGpuMidThreadLevelPreempt))
822             {
823                 //if FtrGpGpuThreadGroupLevelPreempt is true, still program the
824                 //it to MID_THREAD_GROUP.Gen9 doesn't support MID_THREAD level
825                 loadRegImm.dwData = MHW_RENDER_ENGINE_THREAD_GROUP_PREEMPT_VALUE;
826                 state->renderHal->pfnEnableGpgpuMiddleBatchBufferPreemption( state->renderHal );
827             }
828             else if ( MEDIA_IS_SKU(state->skuTable, FtrGpGpuMidBatchPreempt ))
829             {
830                 loadRegImm.dwData = MHW_RENDER_ENGINE_MID_BATCH_PREEMPT_VALUE;
831                 state->renderHal->pfnEnableGpgpuMiddleBatchBufferPreemption( state->renderHal );
832             }
833             else
834             {
835                 // if hit this branch then platform does not support any media preemption in render engine. Still program the register to avoid GPU hang
836                 loadRegImm.dwData = MHW_RENDER_ENGINE_MID_BATCH_PREEMPT_VALUE;
837             }
838         }
839         else
840         {
841             if ( MEDIA_IS_SKU(state->skuTable, FtrMediaThreadGroupLevelPreempt)
842                 || MEDIA_IS_SKU(state->skuTable, FtrMediaMidThreadLevelPreempt))
843             {
844                 //if FtrMediaMidThreadLevelPreempt is true, still program the
845                 //it to MID_THREAD_GROUP.Gen9 doesn't support MID_THREAD.
846                 loadRegImm.dwData = MHW_RENDER_ENGINE_THREAD_GROUP_PREEMPT_VALUE;
847             }
848             else if ( MEDIA_IS_SKU(state->skuTable, FtrMediaMidBatchPreempt))
849             {
850                 loadRegImm.dwData = MHW_RENDER_ENGINE_MID_BATCH_PREEMPT_VALUE;
851             }
852             else
853             {
854                 // if hit this branch then platform does not support any media preemption in render engine. Still program the register to avoid GPU hang
855                 loadRegImm.dwData = MHW_RENDER_ENGINE_MID_BATCH_PREEMPT_VALUE;
856             }
857         }
858         CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMiLoadRegisterImmCmd(&mosCmdBuffer, &loadRegImm ) );
859     }
860 
861     // Send Pipeline Select command
862     CM_CHK_MOSSTATUS_GOTOFINISH(mhwRender->AddPipelineSelectCmd(&mosCmdBuffer, enableGpGpu));
863 
864     // Send State Base Address command
865     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendStateBaseAddress(renderHal, &mosCmdBuffer));
866 
867     // Send Surface States
868     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendSurfaces(renderHal, &mosCmdBuffer));
869 
870     if (enableGpGpu) {
871         if (csrEnable) {
872 
873             // Send CS_STALL pipe control
874             //Insert a pipe control as synchronization
875             pipeCtlParams = g_cRenderHal_InitPipeControlParams;
876             pipeCtlParams.presDest = &state->renderTimeStampResource.osResource;
877             pipeCtlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
878             pipeCtlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
879             pipeCtlParams.bDisableCSStall = 0;
880             CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtlParams));
881 
882         }
883 
884         if (sipEnable || csrEnable)
885         {
886             // Send SIP State
887             CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendSipStateCmd(renderHal, &mosCmdBuffer));
888 
889             CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnRegisterResource(
890                 osInterface,
891                 &state->csrResource,
892                 true,
893                 true));
894 
895             // Send csr base addr command
896             CM_CHK_MOSSTATUS_GOTOFINISH(mhwRender->AddGpgpuCsrBaseAddrCmd(&mosCmdBuffer, &state->csrResource));
897         }
898     }
899 
900     // Setup VFE State params. Each Renderer MUST call pfnSetVfeStateParams().
901     // See comment in pfnSetVfeStateParams() for details.
902     tmp = RENDERHAL_USE_MEDIA_THREADS_MAX;
903     if (state->maxHWThreadValues.userFeatureValue != 0)
904     {
905         if( state->maxHWThreadValues.userFeatureValue < renderHal->pHwCaps->dwMaxThreads)
906         {
907             tmp = state->maxHWThreadValues.userFeatureValue;
908         }
909     }
910     else if (state->maxHWThreadValues.apiValue != 0)
911     {
912         if( state->maxHWThreadValues.apiValue < renderHal->pHwCaps->dwMaxThreads)
913         {
914             tmp = state->maxHWThreadValues.apiValue;
915         }
916     }
917 
918     renderHal->pfnSetVfeStateParams(
919         renderHal,
920         MEDIASTATE_DEBUG_COUNTER_FREE_RUNNING,
921         tmp,
922         state->taskParam->vfeCurbeSize,
923         state->taskParam->urbEntrySize,
924         &state->scoreboardParams);
925 
926     // Send VFE State
927     CM_CHK_MOSSTATUS_GOTOFINISH(mhwRender->AddMediaVfeCmd(&mosCmdBuffer,
928                      renderHal->pRenderHalPltInterface->GetVfeStateParameters()));
929 
930     // Send CURBE Load
931     if (state->taskParam->vfeCurbeSize > 0)
932     {
933         CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendCurbeLoad(renderHal, &mosCmdBuffer));
934     }
935 
936     // Send Interface Descriptor Load
937     if (state->dshEnabled)
938     {
939         PRENDERHAL_DYNAMIC_STATE dynamicState = stateHeap->pCurMediaState->pDynamicState;
940         idLoadParams.dwInterfaceDescriptorStartOffset = dynamicState->memoryBlock.GetOffset() +
941                                                         dynamicState->MediaID.dwOffset;
942         idLoadParams.dwInterfaceDescriptorLength      = dynamicState->MediaID.iCount * stateHeap->dwSizeMediaID;
943     }
944     else
945     {
946         idLoadParams.dwInterfaceDescriptorStartOffset = stateHeap->pCurMediaState->dwOffset + stateHeap->dwOffsetMediaID;
947         idLoadParams.dwInterfaceDescriptorLength      = renderHal->StateHeapSettings.iMediaIDs * stateHeap->dwSizeMediaID;
948     }
949     idLoadParams.pKernelState = nullptr;
950     CM_CHK_MOSSTATUS_GOTOFINISH(mhwRender->AddMediaIDLoadCmd(&mosCmdBuffer, &idLoadParams));
951 
952     HalOcaInterface::OnDispatch(mosCmdBuffer, *pOsContext, *renderHal->pMhwMiInterface, *pMmioRegisters);
953 
954     if (enableWalker)
955     {
956         // send media walker command, if required
957         for (uint32_t i = 0; i < state->taskParam->numKernels; i ++)
958         {
959             // Insert CONDITIONAL_BATCH_BUFFER_END
960             if ( taskParam->conditionalEndBitmap & ((uint64_t)1 << (i)))
961             {
962                 // this could be batch buffer end so need to update sync tag, media state flush, write end timestamp
963 
964                 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendSyncTag(renderHal, &mosCmdBuffer));
965 
966                 // conditionally write timestamp
967                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_OsAddArtifactConditionalPipeControl(&miRegG9, state, &mosCmdBuffer, syncOffset, &taskParam->conditionalBBEndParams[i], tag));
968 
969                 // Insert conditional batch buffer end
970                 mhwMiInterface->AddMiConditionalBatchBufferEndCmd(&mosCmdBuffer, &taskParam->conditionalBBEndParams[i]);
971             }
972 
973             //Insert PIPE_CONTROL at two cases:
974             // 1. synchronization is set
975             // 2. the next kernel has dependency pattern
976             if((i > 0) && ((taskParam->syncBitmap & ((uint64_t)1 << (i-1))) ||
977                 (kernelParam[i]->kernelThreadSpaceParam.patternType != CM_NONE_DEPENDENCY)))
978             {
979                 //Insert a pipe control as synchronization
980                 pipeCtlParams = g_cRenderHal_InitPipeControlParams;
981                 pipeCtlParams.presDest         = &state->renderTimeStampResource.osResource;
982                 pipeCtlParams.dwPostSyncOp     = MHW_FLUSH_NOWRITE;
983                 pipeCtlParams.dwFlushMode      = MHW_FLUSH_CUSTOM;
984                 pipeCtlParams.bInvalidateTextureCache = true;
985                 pipeCtlParams.bFlushRenderTargetCache = true;
986                 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtlParams));
987             }
988 
989             CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnSendMediaWalkerState(state, kernelParam[i], &mosCmdBuffer));
990         }
991     }
992     else if (enableGpGpu)
993     {
994         // send GPGPU walker command, if required
995 
996         for (uint32_t i = 0; i < state->taskParam->numKernels; i ++)
997         {
998             //Insert PIPE_CONTROL as synchronization if synchronization is set
999             if((i > 0) && (taskParam->syncBitmap & ((uint64_t)1 << (i-1))))
1000             {
1001                 //Insert a pipe control as synchronization
1002                 pipeCtlParams = g_cRenderHal_InitPipeControlParams;
1003                 pipeCtlParams.presDest = &state->renderTimeStampResource.osResource;
1004                 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
1005                 pipeCtlParams.dwFlushMode = MHW_FLUSH_CUSTOM;
1006                 pipeCtlParams.bInvalidateTextureCache = true;
1007                 pipeCtlParams.bFlushRenderTargetCache = true;
1008                 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtlParams));
1009             }
1010 
1011             CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnSendGpGpuWalkerState(state, kernelParam[i], &mosCmdBuffer));
1012         }
1013     }
1014     else
1015     {
1016         // Send Start batch buffer command
1017         CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMiBatchBufferStartCmd(
1018             &mosCmdBuffer,
1019             batchBuffer));
1020 
1021         CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
1022         bbCmArgs = (PCM_HAL_BB_ARGS) batchBuffer->pPrivateData;
1023 
1024         if ( (bbCmArgs->refCount == 1) ||
1025                  (state->taskParam->reuseBBUpdateMask == 1) )
1026         {
1027             // Add BB end command
1028             CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMiBatchBufferEnd(nullptr, batchBuffer));
1029         }
1030         else //reuse BB
1031         {
1032             // Skip BB end command
1033             CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->SkipMiBatchBufferEndBb(batchBuffer));
1034         }
1035 
1036         // UnLock the batch buffer
1037         if ( (bbCmArgs->refCount == 1) ||
1038              (state->taskParam->reuseBBUpdateMask == 1) )
1039         {
1040             CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnUnlockBB(renderHal, batchBuffer));
1041         }
1042     }
1043 
1044     // issue a PIPE_CONTROL to flush all caches and the stall the CS before
1045     // issuing a PIPE_CONTROL to write the timestamp
1046     pipeCtlParams = g_cRenderHal_InitPipeControlParams;
1047     pipeCtlParams.presDest      = &state->renderTimeStampResource.osResource;
1048     pipeCtlParams.dwPostSyncOp  = MHW_FLUSH_NOWRITE;
1049     pipeCtlParams.dwFlushMode   = MHW_FLUSH_WRITE_CACHE;
1050     CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtlParams));
1051 
1052     if (state->svmBufferUsed || state->statelessBufferUsed)
1053     {
1054         // Find the SVM/statelessBuffer slot, patch it into this dummy pipe_control
1055         for (uint32_t i = 0; i < state->cmDeviceParam.maxBufferTableSize; i++)
1056         {
1057             //register resource here
1058             if (state->bufferTable[i].address)
1059             {
1060                 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnRegisterResource(
1061                     osInterface,
1062                     &state->bufferTable[i].osResource,
1063                     true,
1064                     false));
1065 
1066                 // sync resource
1067                 MOS_SURFACE mosSurface;
1068                 MOS_ZeroMemory(&mosSurface, sizeof(mosSurface));
1069                 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnGetResourceInfo(
1070                         osInterface,
1071                         &state->bufferTable[i].osResource,
1072                         &mosSurface));
1073                 mosSurface.OsResource = state->bufferTable[i].osResource;
1074 
1075                 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(HalCm_SurfaceSync(state, &mosSurface, false));
1076             }
1077         }
1078     }
1079 
1080     // Send Sync Tag
1081     CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnSendSyncTag( renderHal, &mosCmdBuffer ) );
1082 
1083     // Update tracker resource
1084     CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnUpdateTrackerResource(state, &mosCmdBuffer, tag));
1085 
1086     // issue a PIPE_CONTROL to write timestamp
1087     syncOffset += sizeof(uint64_t);
1088     pipeCtlParams = g_cRenderHal_InitPipeControlParams;
1089     pipeCtlParams.presDest          = &state->renderTimeStampResource.osResource;
1090     pipeCtlParams.dwResourceOffset  = syncOffset;
1091     pipeCtlParams.dwPostSyncOp      = MHW_FLUSH_WRITE_TIMESTAMP_REG;
1092     pipeCtlParams.dwFlushMode       = MHW_FLUSH_READ_CACHE;
1093     CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtlParams));
1094 
1095     // Record registers by unified media profiler in the end
1096     if (state->perfProfiler != nullptr)
1097     {
1098         CM_CHK_MOSSTATUS_GOTOFINISH(state->perfProfiler->AddPerfCollectEndCmd((void *)state, state->osInterface, mhwMiInterface, &mosCmdBuffer));
1099     }
1100 
1101     // Add PipeControl to invalidate ISP and MediaState to avoid PageFault issue
1102     MHW_PIPE_CONTROL_PARAMS pipeControlParams;
1103 
1104     MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams));
1105     pipeControlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
1106     pipeControlParams.bGenericMediaStateClear = true;
1107     pipeControlParams.bIndirectStatePointersDisable = true;
1108     pipeControlParams.bDisableCSStall = false;
1109     CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeControlParams));
1110 
1111     if (MEDIA_IS_WA(renderHal->pWaTable, WaSendDummyVFEafterPipelineSelect))
1112     {
1113         MHW_VFE_PARAMS vfeStateParams = {};
1114         vfeStateParams.dwNumberofURBEntries = 1;
1115         CM_CHK_MOSSTATUS_GOTOFINISH(mhwRender->AddMediaVfeCmd(&mosCmdBuffer, &vfeStateParams));
1116     }
1117 
1118     HalOcaInterface::On1stLevelBBEnd(mosCmdBuffer, *pOsContext);
1119 
1120     //Couple to the BB_START , otherwise GPU Hang without it in KMD.
1121     CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMiBatchBufferEnd(&mosCmdBuffer, nullptr));
1122 
1123     // Return unused command buffer space to OS
1124     osInterface->pfnReturnCommandBuffer(osInterface, &mosCmdBuffer, 0);
1125 
1126 #if MDF_COMMAND_BUFFER_DUMP
1127     if (state->dumpCommandBuffer)
1128     {
1129         state->pfnDumpCommadBuffer(
1130             state,
1131             &mosCmdBuffer,
1132             offsetof(PACKET_SURFACE_STATE, cmdSurfaceState),
1133             mhw_state_heap_g9_X::RENDER_SURFACE_STATE_CMD::byteSize);
1134     }
1135 #endif
1136 
1137 #if MDF_SURFACE_STATE_DUMP
1138     if (state->dumpSurfaceState)
1139     {
1140         state->pfnDumpSurfaceState(
1141             state,
1142             offsetof(PACKET_SURFACE_STATE, cmdSurfaceState),
1143             mhw_state_heap_g9_X::RENDER_SURFACE_STATE_CMD::byteSize);
1144 
1145     }
1146 #endif
1147 
1148     CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetGlobalTime(&state->taskTimeStamp->submitTimeInCpu[taskId]));
1149     CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetGpuTime(state, &state->taskTimeStamp->submitTimeInGpu[taskId]));
1150 
1151     // Submit command buffer
1152 #if (_RELEASE_INTERNAL || _DEBUG)
1153 #if defined (CM_DIRECT_GUC_SUPPORT)
1154     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnSubmitWorkQueue(osInterface, MOS_GPU_NODE_3D, batchbufferaddress));
1155 #endif
1156 #endif
1157 #if !defined (CM_DIRECT_GUC_SUPPORT)
1158     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnSubmitCommandBuffer(osInterface,
1159         &mosCmdBuffer,
1160         state->nullHwRenderCm));
1161 #endif
1162 
1163     if (state->nullHwRenderCm == false)
1164     {
1165         stateHeap->pCurMediaState->bBusy = true;
1166         if ( !enableWalker && !enableGpGpu )
1167         {
1168             batchBuffer->bBusy     = true;
1169             batchBuffer->dwSyncTag = syncTag;
1170         }
1171     }
1172 
1173     // reset API call number of HW threads
1174     state->maxHWThreadValues.apiValue = 0;
1175 
1176     state->pfnReferenceCommandBuffer(&mosCmdBuffer.OsResource, cmdBuffer);
1177 
1178     eStatus = MOS_STATUS_SUCCESS;
1179 
1180 finish:
1181     // Failed -> discard all changes in Command Buffer
1182     if (eStatus != MOS_STATUS_SUCCESS)
1183     {
1184         // Buffer overflow - display overflow size
1185         if (mosCmdBuffer.iRemaining < 0)
1186         {
1187             CM_ASSERTMESSAGE("Command Buffer overflow by %d bytes.", -mosCmdBuffer.iRemaining);
1188         }
1189 
1190         // Move command buffer back to beginning
1191         tmp = remaining - mosCmdBuffer.iRemaining;
1192         mosCmdBuffer.iRemaining  = remaining;
1193         mosCmdBuffer.iOffset    -= tmp;
1194         mosCmdBuffer.pCmdPtr     = mosCmdBuffer.pCmdBase + mosCmdBuffer.iOffset/sizeof(uint32_t);
1195 
1196         // Return unused command buffer space to OS
1197         osInterface->pfnReturnCommandBuffer(osInterface, &mosCmdBuffer, 0);
1198     }
1199 
1200     return eStatus;
1201 }
1202 
GetCopyKernelIsa(void * & isa,uint32_t & isaSize)1203 MOS_STATUS CM_HAL_G9_X::GetCopyKernelIsa(void  *&isa, uint32_t &isaSize)
1204 {
1205     isa = (void *)pGPUCopy_kernel_isa_gen9;
1206     isaSize = iGPUCopy_kernel_isa_size_gen9;
1207 
1208     return MOS_STATUS_SUCCESS;
1209 }
1210 
GetInitKernelIsa(void * & isa,uint32_t & isaSize)1211 MOS_STATUS CM_HAL_G9_X::GetInitKernelIsa(void  *&isa, uint32_t &isaSize)
1212 {
1213     isa = (void *)pGPUInit_kernel_isa_Gen9;
1214     isaSize = iGPUInit_kernel_isa_size_Gen9;
1215 
1216     return MOS_STATUS_SUCCESS;
1217 }
1218 
UpdatePlatformInfoFromPower(PCM_PLATFORM_INFO platformInfo,bool euSaturated)1219 MOS_STATUS CM_HAL_G9_X::UpdatePlatformInfoFromPower(
1220     PCM_PLATFORM_INFO platformInfo,
1221     bool              euSaturated)
1222 {
1223     PCM_HAL_STATE              state     = m_cmState;
1224     PRENDERHAL_INTERFACE       renderHal = state->renderHal;
1225     CM_POWER_OPTION            cmPower;
1226 
1227     if ( state->requestSingleSlice ||
1228          renderHal->bRequestSingleSlice ||
1229         (state->powerOption.nSlice != 0 && state->powerOption.nSlice < platformInfo->numSlices))
1230     {
1231         platformInfo->numSubSlices = platformInfo->numSubSlices / platformInfo->numSlices;
1232         if (state->powerOption.nSlice > 1)
1233         {
1234             platformInfo->numSubSlices *= state->powerOption.nSlice;
1235             platformInfo->numSlices     = state->powerOption.nSlice;
1236         }
1237         else
1238         {
1239             platformInfo->numSlices     = 1;
1240         }
1241     }
1242     else if (euSaturated)
1243     {
1244         // No SSD and EU Saturation, request maximum number of slices/subslices/EUs
1245         cmPower.nSlice    = (uint16_t)platformInfo->numSlices;
1246         cmPower.nSubSlice = (uint16_t)platformInfo->numSubSlices;
1247         cmPower.nEU       = (uint16_t)(platformInfo->numEUsPerSubSlice * platformInfo->numSubSlices);
1248 
1249         state->pfnSetPowerOption(state, &cmPower);
1250     }
1251 
1252     return MOS_STATUS_SUCCESS;
1253 }
1254 
GetMediaWalkerMaxThreadWidth()1255 uint32_t CM_HAL_G9_X::GetMediaWalkerMaxThreadWidth()
1256 {
1257     return CM_MAX_THREADSPACE_WIDTH_SKLUP_FOR_MW;
1258 }
1259 
GetMediaWalkerMaxThreadHeight()1260 uint32_t CM_HAL_G9_X::GetMediaWalkerMaxThreadHeight()
1261 {
1262     return CM_MAX_THREADSPACE_HEIGHT_SKLUP_FOR_MW;
1263 }
1264 
GetHwSurfaceBTIInfo(PCM_SURFACE_BTI_INFO btiInfo)1265 MOS_STATUS CM_HAL_G9_X::GetHwSurfaceBTIInfo(
1266                       PCM_SURFACE_BTI_INFO btiInfo)
1267 {
1268     if (btiInfo == nullptr)
1269     {
1270         return MOS_STATUS_NULL_POINTER;
1271     }
1272 
1273     btiInfo->normalSurfaceStart      =  CM_GLOBAL_SURFACE_INDEX_START_GEN9_PLUS + \
1274                         CM_GLOBAL_SURFACE_NUMBER + CM_GTPIN_SURFACE_NUMBER ;
1275     btiInfo->normalSurfaceEnd        =  GT_RESERVED_INDEX_START_GEN9_PLUS - 1;
1276     btiInfo->reservedSurfaceStart    =  CM_GLOBAL_SURFACE_INDEX_START_GEN9_PLUS;
1277     btiInfo->reservedSurfaceEnd      =  CM_GLOBAL_SURFACE_NUMBER + CM_GTPIN_SURFACE_NUMBER;
1278 
1279     return MOS_STATUS_SUCCESS;
1280 }
1281 
SetSuggestedL3Conf(L3_SUGGEST_CONFIG l3Config)1282 MOS_STATUS CM_HAL_G9_X::SetSuggestedL3Conf(
1283             L3_SUGGEST_CONFIG l3Config)
1284 {
1285     if (l3Config >= sizeof(SKL_L3_PLANE)/sizeof(L3ConfigRegisterValues))
1286     {
1287         return MOS_STATUS_INVALID_PARAMETER;
1288     }
1289     return HalCm_SetL3Cache((L3ConfigRegisterValues *)&SKL_L3_PLANE[l3Config],
1290                                      &m_cmState->l3Settings);
1291 }
1292 
AllocateSIPCSRResource()1293 MOS_STATUS CM_HAL_G9_X::AllocateSIPCSRResource()
1294 {
1295     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1296     if (Mos_ResourceIsNull(&m_cmState->sipResource.osResource))
1297     {
1298         CM_CHK_MOSSTATUS_RETURN(HalCm_AllocateSipResource(m_cmState)); // create  sip resource if it does not exist
1299         CM_CHK_MOSSTATUS_RETURN(HalCm_AllocateCSRResource(m_cmState));
1300     }
1301 
1302     return eStatus;
1303 }
1304 
GetGenStepInfo(char * & stepInfoStr)1305 MOS_STATUS CM_HAL_G9_X::GetGenStepInfo(char*& stepInfoStr)
1306 {
1307     const char *cmSteppingInfo[] = { "A", "B", "C", "D", "E", "F",
1308                                          "G", "H", "I", "J" };
1309     uint32_t genStepId = m_cmState->platform.usRevId;
1310 
1311     if (m_steppingTable.size() != 0) //check if the stepping table been overwritten
1312     {
1313         if (genStepId < m_steppingTable.size())
1314         {
1315             stepInfoStr = (char *)m_steppingTable[genStepId];
1316         }
1317         else
1318         {
1319             stepInfoStr = nullptr;
1320         }
1321     }
1322     else
1323     {
1324         if (genStepId < sizeof(cmSteppingInfo)/sizeof(const char *))
1325         {
1326             stepInfoStr = (char *)cmSteppingInfo[genStepId];
1327         }
1328         else
1329         {
1330             stepInfoStr = nullptr;
1331     }
1332     }
1333 
1334     return MOS_STATUS_SUCCESS;
1335 }
1336 
ColorCountSanityCheck(uint32_t colorCount)1337 int32_t CM_HAL_G9_X::ColorCountSanityCheck(uint32_t colorCount)
1338 {
1339     if (colorCount == CM_INVALID_COLOR_COUNT || colorCount > CM_THREADSPACE_MAX_COLOR_COUNT)
1340     {
1341         CM_ASSERTMESSAGE("Error: Invalid color count.");
1342         return CM_INVALID_ARG_VALUE;
1343     }
1344     return CM_SUCCESS;
1345 }
1346 
MemoryObjectCtrlPolicyCheck(uint32_t memCtrl)1347 bool CM_HAL_G9_X::MemoryObjectCtrlPolicyCheck(uint32_t memCtrl)
1348 {
1349     if ( memCtrl > MEMORY_OBJECT_CONTROL_SKL_NO_CACHE )
1350     {
1351         return false;
1352     }
1353 
1354     return true;
1355 }
1356 
GetConvSamplerIndex(PMHW_SAMPLER_STATE_PARAM samplerParam,char * samplerIndexTable,int32_t nSamp8X8Num,int32_t nSampConvNum)1357 int32_t CM_HAL_G9_X::GetConvSamplerIndex(
1358     PMHW_SAMPLER_STATE_PARAM  samplerParam,
1359     char                      *samplerIndexTable,
1360     int32_t                   nSamp8X8Num,
1361     int32_t                   nSampConvNum)
1362 {
1363     int32_t samplerIndex = 0;
1364 
1365     if ((samplerParam->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_2D) &&
1366         (samplerParam->Convolve.skl_mode))
1367     {
1368         // 2D convolve & SKL+
1369         samplerIndex = 1 + nSampConvNum + nSamp8X8Num;
1370     }
1371     else if (samplerParam->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_1D)
1372     {
1373         // 1D convolve & SKL+
1374         samplerIndex = nSampConvNum;
1375     }
1376     else
1377     {
1378         // 1P convolve SKL+
1379         samplerIndex = 1 + (nSamp8X8Num + nSampConvNum) * 2;
1380         while (samplerIndexTable[samplerIndex] != CM_INVALID_INDEX)
1381         {
1382             samplerIndex += 2;
1383         }
1384 
1385     }
1386     return samplerIndex;
1387 }
1388 
SetL3CacheConfig(const L3ConfigRegisterValues * values,PCmHalL3Settings cmHalL3Setting)1389 MOS_STATUS CM_HAL_G9_X::SetL3CacheConfig(
1390             const L3ConfigRegisterValues *values,
1391             PCmHalL3Settings cmHalL3Setting)
1392 {
1393     return HalCm_SetL3Cache( values, cmHalL3Setting );
1394 }
1395 
GetSamplerParamInfoForSamplerType(PMHW_SAMPLER_STATE_PARAM mhwSamplerParam,SamplerParam & samplerParam)1396 MOS_STATUS CM_HAL_G9_X::GetSamplerParamInfoForSamplerType(
1397             PMHW_SAMPLER_STATE_PARAM mhwSamplerParam,
1398             SamplerParam  &samplerParam)
1399 {
1400     const unsigned int samplerElementSize[MAX_ELEMENT_TYPE_COUNT] = {16, 32, 64, 128, 1024, 2048};
1401 
1402     // gets element_type
1403     switch (mhwSamplerParam->SamplerType)
1404     {
1405         case MHW_SAMPLER_TYPE_3D:
1406             samplerParam.elementType = MHW_Sampler1Element;
1407             break;
1408         case MHW_SAMPLER_TYPE_MISC:
1409             samplerParam.elementType = MHW_Sampler2Elements;
1410             break;
1411         case MHW_SAMPLER_TYPE_CONV:
1412             if ((!mhwSamplerParam->Convolve.skl_mode &&
1413                  mhwSamplerParam->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_2D)
1414                 || mhwSamplerParam->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_1P)
1415             {
1416                 samplerParam.elementType = MHW_Sampler64Elements;
1417             }
1418             else if (mhwSamplerParam->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_1D)
1419             {
1420                 samplerParam.elementType = MHW_Sampler8Elements;
1421             }
1422             else
1423             {
1424                 samplerParam.elementType = MHW_Sampler128Elements;
1425             }
1426             break;
1427         case MHW_SAMPLER_TYPE_AVS:
1428             samplerParam.elementType = MHW_Sampler128Elements;
1429             break;
1430         default:
1431             break;
1432     }
1433 
1434     // bti_stepping for BDW mode convolve or 1P convolve is 2, other cases are 1.
1435     if ((mhwSamplerParam->SamplerType == MHW_SAMPLER_TYPE_CONV) && ((!mhwSamplerParam->Convolve.skl_mode &&
1436                                                                        mhwSamplerParam->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_2D)
1437                                                                       || mhwSamplerParam->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_1P))
1438     {
1439         samplerParam.btiStepping = 2;
1440     }
1441     else
1442     {
1443         samplerParam.btiStepping = 1;
1444     }
1445 
1446     // gets multiplier
1447     samplerParam.btiMultiplier = samplerElementSize[samplerParam.elementType] / samplerParam.btiStepping;
1448 
1449     // gets size
1450     samplerParam.size = samplerElementSize[samplerParam.elementType];
1451 
1452     // Temporary solution for conv because MHW use 2048 bytes for all of the convolve samplers.
1453     // size should always be equal to bti_stepping * bti_multiplier except for this one.
1454     if (mhwSamplerParam->SamplerType == MHW_SAMPLER_TYPE_CONV)
1455     {
1456         samplerParam.size = 2048;
1457     }
1458 
1459     return MOS_STATUS_SUCCESS;
1460 }
1461 
GetExpectedGtSystemConfig(PCM_EXPECTED_GT_SYSTEM_INFO expectedConfig)1462 MOS_STATUS CM_HAL_G9_X::GetExpectedGtSystemConfig(
1463     PCM_EXPECTED_GT_SYSTEM_INFO expectedConfig)
1464 {
1465     if (m_genGT == PLATFORM_INTEL_GT1)
1466     {
1467         expectedConfig->numSlices    = SKL_GT1_MAX_NUM_SLICES;
1468         expectedConfig->numSubSlices = SKL_GT1_MAX_NUM_SUBSLICES;
1469     }
1470     else if (m_genGT == PLATFORM_INTEL_GT1_5)
1471     {
1472         expectedConfig->numSlices    = SKL_GT1_5_MAX_NUM_SLICES;
1473         expectedConfig->numSubSlices = SKL_GT1_5_MAX_NUM_SUBSLICES;
1474     }
1475     else if (m_genGT == PLATFORM_INTEL_GT2)
1476     {
1477         expectedConfig->numSlices    = SKL_GT2_MAX_NUM_SLICES;
1478         expectedConfig->numSubSlices = SKL_GT2_MAX_NUM_SUBSLICES;
1479     }
1480     else if (m_genGT == PLATFORM_INTEL_GT3)
1481     {
1482         expectedConfig->numSlices    = SKL_GT3_MAX_NUM_SLICES;
1483         expectedConfig->numSubSlices = SKL_GT3_MAX_NUM_SUBSLICES;
1484     }
1485     else if (m_genGT == PLATFORM_INTEL_GT4)
1486     {
1487         expectedConfig->numSlices    = SKL_GT4_MAX_NUM_SLICES;
1488         expectedConfig->numSubSlices = SKL_GT4_MAX_NUM_SUBSLICES;
1489     }
1490     else
1491     {
1492         expectedConfig->numSlices    = 0;
1493         expectedConfig->numSubSlices = 0;
1494     }
1495 
1496     return MOS_STATUS_SUCCESS;
1497 }
1498 
ConverTicksToNanoSecondsDefault(uint64_t ticks)1499 uint64_t CM_HAL_G9_X::ConverTicksToNanoSecondsDefault(uint64_t ticks)
1500 {
1501     if (m_platformID == PLATFORM_INTEL_BXT || m_platformID == PLATFORM_INTEL_GLK)
1502     {
1503         return (uint64_t)(ticks * CM_NS_PER_TICK_RENDER_G9LP);
1504     }
1505     else
1506     {
1507         return (uint64_t)(ticks * CM_NS_PER_TICK_RENDER_G9);
1508     }
1509 }
1510 
1511