1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
14 * of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Wei Lin<wei.w.lin@intel.com>
26 * Yuting Yang<yuting.yang@intel.com>
27 */
28 #include "hal_cm.h"
29 #include "hw_interface_g75.h"
30 #include "hal_cm_g75.h"
31
HalCm_SubmitCommands_g75(PCM_HAL_STATE pState,PGENHW_BATCH_BUFFER pBatchBuffer,INT iTaskId,PCM_HAL_KERNEL_PARAM * pKernels,PVOID * ppCmdBuffer)32 GENOS_STATUS HalCm_SubmitCommands_g75(PCM_HAL_STATE pState,
33 PGENHW_BATCH_BUFFER pBatchBuffer,
34 INT iTaskId,
35 PCM_HAL_KERNEL_PARAM * pKernels,
36 PVOID * ppCmdBuffer)
37 {
38 GENOS_STATUS hr = GENOS_STATUS_SUCCESS;
39 PGENHW_HW_INTERFACE pHwInterface = pState->pHwInterface;
40 PGENOS_INTERFACE pOsInterface = pHwInterface->pOsInterface;
41 PIPELINE_SELECT_CMD_G5 cmd_select =
42 *(pHwInterface->pHwCommands->pPipelineSelectMedia);
43 INT iRemaining = 0;
44 BOOL enableWalker = pState->WalkerParams.CmWalkerEnable;
45 BOOL enableGpGpu = pState->pTaskParam->blGpGpuWalkerEnabled;
46 GENOS_COMMAND_BUFFER CmdBuffer;
47 GENHW_L3_CACHE_CONFIG L3CacheConfig;
48 DWORD dwSyncTag;
49 PINT64 pTaskSyncLocation;
50 INT iSyncOffset;
51 INT iTmp;
52 PCM_HAL_TASK_PARAM pTaskParam = pState->pTaskParam;
53
54 GENOS_ZeroMemory(&CmdBuffer, sizeof(GENOS_COMMAND_BUFFER));
55
56 iSyncOffset = pState->pfnGetTaskSyncLocation(iTaskId);
57
58 pTaskSyncLocation = (PINT64) (pState->TsResource.pData + iSyncOffset);
59 *pTaskSyncLocation = CM_INVALID_INDEX;
60 *(pTaskSyncLocation + 1) = CM_INVALID_INDEX;
61
62 if (!enableWalker && !enableGpGpu) {
63 CM_HRESULT2GENOSSTATUS_AND_CHECK
64 (pOsInterface->pfnRegisterResource
65 (pOsInterface, &pBatchBuffer->OsResource, TRUE, TRUE));
66 }
67 CM_HRESULT2GENOSSTATUS_AND_CHECK(pOsInterface->pfnRegisterResource
68 (pOsInterface,
69 &pState->TsResource.OsResource, TRUE,
70 TRUE));
71
72 CM_HRESULT2GENOSSTATUS_AND_CHECK(pOsInterface->pfnGetCommandBuffer
73 (pOsInterface, &CmdBuffer));
74 iRemaining = CmdBuffer.iRemaining;
75
76 if (pState->bEUSaturationEnabled) {
77 CM_CHK_GENOSSTATUS
78 (pState->pfnSendCommandBufferHeaderEUSaturation
79 (pState, &CmdBuffer));
80 } else {
81 CM_CHK_GENOSSTATUS(pHwInterface->pfnSendCommandBufferHeader
82 (pHwInterface, &CmdBuffer));
83 }
84
85 CM_CHK_GENOSSTATUS(pHwInterface->pfnSendPipeControl(pHwInterface,
86 &CmdBuffer,
87 &pState->
88 TsResource.OsResource,
89 TRUE, iSyncOffset,
90 GFX3DCONTROLOP_WRITETIMESTAMP,
91 GFX3DFLUSH_WRITE_CACHE,
92 0));
93
94 dwSyncTag = pHwInterface->pGeneralStateHeap->dwNextTag++;
95
96 CM_CHK_GENOSSTATUS(pHwInterface->pfnSendSyncTag
97 (pHwInterface, &CmdBuffer));
98
99 if (pState->L3Config.L3_SQCREG1 || pState->L3Config.L3_CNTLREG2 || pState->L3Config.L3_CNTLREG3){
100 L3CacheConfig.dwL3SQCReg1 = pState->L3Config.L3_SQCREG1;
101 L3CacheConfig.dwL3CntlReg2 = pState->L3Config.L3_CNTLREG2;
102 L3CacheConfig.dwL3CntlReg3 = pState->L3Config.L3_CNTLREG3;
103 }
104 else {
105 L3CacheConfig.dwL3SQCReg1 = CM_CONFIG_SQCREG1_VALUE_G75;
106 L3CacheConfig.dwL3CntlReg2 = pState->bSLMMode ? CM_CONFIG_CNTLREG2_VALUE_G75_SLM: CM_CONFIG_CNTLREG2_VALUE_G75_NONSLM;
107 L3CacheConfig.dwL3CntlReg3 = pState->bSLMMode ? CM_CONFIG_CNTLREG3_VALUE_G75_SLM : CM_CONFIG_CNTLREG3_VALUE_G75_NONSLM;
108 }
109 L3CacheConfig.dwL3LRA1Reg = CM_CONFIG_L3LRA1_VALUE_G75;
110
111 HalCm_HwSendL3CacheConfig_g75(pState, &CmdBuffer,&L3CacheConfig);
112
113 if (pHwInterface->bSysRoutine) {
114 CM_CHK_GENOSSTATUS(pHwInterface->pfnSendDebugCtl(pHwInterface,
115 &CmdBuffer));
116 }
117
118 if (enableGpGpu) {
119 cmd_select.DW0.PipelineSelect = GFXPIPELINE_GPGPU;
120 }
121 CM_CHK_GENOSSTATUS(IntelGen_OsAddCommand(&CmdBuffer,
122 &cmd_select,
123 sizeof
124 (PIPELINE_SELECT_CMD_G5)));
125
126 CM_CHK_GENOSSTATUS(pHwInterface->pfnSendStateBaseAddr
127 (pHwInterface, &CmdBuffer));
128
129 CM_CHK_GENOSSTATUS(pHwInterface->pfnSendSurfaces
130 (pHwInterface, &CmdBuffer));
131
132 if (pHwInterface->bSysRoutine) {
133 // Send the SIP_STATE if we loaded a system routine
134 CM_CHK_GENOSSTATUS(pHwInterface->pfnSendStateSip(pHwInterface,
135 &CmdBuffer));
136 }
137
138 iTmp = GENHW_USE_MEDIA_THREADS_MAX;
139 if (pState->MaxHWThreadValues.registryValue != 0) {
140 if (pState->MaxHWThreadValues.registryValue <
141 pHwInterface->pHwCaps->dwMaxThreads) {
142 iTmp = pState->MaxHWThreadValues.registryValue;
143 }
144 } else if (pState->MaxHWThreadValues.APIValue != 0) {
145 if (pState->MaxHWThreadValues.APIValue <
146 pHwInterface->pHwCaps->dwMaxThreads) {
147 iTmp = pState->MaxHWThreadValues.APIValue;
148 }
149 }
150
151 pHwInterface->pfnSetVfeStateParams(pHwInterface,
152 0,
153 iTmp,
154 pState->pTaskParam->dwVfeCurbeSize,
155 pState->pTaskParam->dwUrbEntrySize,
156 &pState->ScoreboardParams);
157
158 CM_CHK_GENOSSTATUS(pHwInterface->pfnSendVfeState
159 (pHwInterface, &CmdBuffer, enableGpGpu));
160
161 if (pState->pTaskParam->dwVfeCurbeSize > 0) {
162 CM_CHK_GENOSSTATUS(pHwInterface->pfnSendCurbeLoad
163 (pHwInterface, &CmdBuffer));
164 }
165 CM_CHK_GENOSSTATUS(pHwInterface->pfnSendIDLoad
166 (pHwInterface, &CmdBuffer));
167
168 if (enableWalker) {
169 for (UINT i = 0; i < pState->pTaskParam->uiNumKernels; i++) {
170 if ((i > 0)
171 &&
172 ((pTaskParam->uiSyncBitmap &
173 ((UINT64) 1 << (i - 1)))
174 || (pKernels[i]->
175 CmKernelThreadSpaceParam.patternType !=
176 CM_DEPENDENCY_NONE))) {
177 CM_CHK_GENOSSTATUS
178 (pHwInterface->pfnSendPipeControl
179 (pHwInterface, &CmdBuffer,
180 &pState->TsResource.OsResource, FALSE, 0,
181 GFX3DCONTROLOP_NOWRITE,
182 GFX3DFLUSH_WRITE_CACHE, 0));
183 }
184
185 CM_CHK_GENOSSTATUS(pState->pfnSendMediaWalkerState
186 (pState, pKernels[i], &CmdBuffer));
187 }
188 } else if (enableGpGpu) {
189 for (UINT i = 0; i < pState->pTaskParam->uiNumKernels; i++) {
190 if ((i > 0)
191 && (pTaskParam->uiSyncBitmap &
192 ((UINT64) 1 << (i - 1)))) {
193 CM_CHK_GENOSSTATUS
194 (pHwInterface->pfnSendPipeControl
195 (pHwInterface, &CmdBuffer,
196 &pState->TsResource.OsResource, FALSE, 0,
197 GFX3DCONTROLOP_NOWRITE,
198 GFX3DFLUSH_WRITE_CACHE, 0));
199 }
200
201 CM_CHK_GENOSSTATUS(pState->pfnSendGpGpuWalkerState
202 (pState, pKernels[i], &CmdBuffer));
203 }
204 } else {
205 CM_CHK_GENOSSTATUS(pHwInterface->pfnSendBatchBufferStart
206 (pHwInterface, &CmdBuffer, pBatchBuffer));
207
208 if ((pBatchBuffer->pBBRenderData->BbArgs.BbCmArgs.uiRefCount ==
209 1) || (pState->pTaskParam->reuseBBUpdateMask == 1)) {
210 pHwInterface->pfnAddBatchBufferEndCmdBb(pHwInterface,
211 pBatchBuffer);
212 } else {
213 pHwInterface->pfnSkipBatchBufferEndCmdBb(pHwInterface,
214 pBatchBuffer);
215 }
216
217 if ((pBatchBuffer->pBBRenderData->BbArgs.BbCmArgs.uiRefCount ==
218 1) || (pState->pTaskParam->reuseBBUpdateMask == 1)) {
219 CM_CHK_GENOSSTATUS(pHwInterface->pfnUnlockBB
220 (pHwInterface, pBatchBuffer));
221 }
222 }
223
224 CM_CHK_GENOSSTATUS(pHwInterface->pfnSendPipeControl(pHwInterface,
225 &CmdBuffer,
226 &pState->
227 TsResource.OsResource,
228 FALSE, 0,
229 GFX3DCONTROLOP_NOWRITE,
230 GFX3DFLUSH_WRITE_CACHE,
231 0));
232
233 iSyncOffset += sizeof(UINT64);
234 CM_CHK_GENOSSTATUS(pHwInterface->pfnSendPipeControl(pHwInterface,
235 &CmdBuffer,
236 &pState->
237 TsResource.OsResource,
238 TRUE, iSyncOffset,
239 GFX3DCONTROLOP_WRITETIMESTAMP,
240 GFX3DFLUSH_READ_CACHE,
241 0));
242
243 CM_CHK_GENOSSTATUS(pHwInterface->pfnSendBatchBufferEnd
244 (pHwInterface, &CmdBuffer));
245
246 pOsInterface->pfnReturnCommandBuffer(pOsInterface, &CmdBuffer);
247
248 CM_HRESULT2GENOSSTATUS_AND_CHECK(pOsInterface->pfnSubmitCommandBuffer
249 (pOsInterface, &CmdBuffer,
250 pState->bNullHwRenderCm));
251
252 if (pState->bNullHwRenderCm == FALSE) {
253 pHwInterface->pGeneralStateHeap->pCurMediaState->bBusy = TRUE;
254 if (!enableWalker && !enableGpGpu) {
255 pBatchBuffer->bBusy = TRUE;
256 pBatchBuffer->dwSyncTag = dwSyncTag;
257 }
258 }
259 pState->MaxHWThreadValues.APIValue = 0;
260
261 pState->bEUSaturationEnabled = FALSE;
262 pState->bEUSaturationNoSSD = FALSE;
263
264 if (ppCmdBuffer) {
265 drm_intel_bo_reference(CmdBuffer.OsResource.bo);
266 *ppCmdBuffer = CmdBuffer.OsResource.bo;
267 }
268
269 hr = GENOS_STATUS_SUCCESS;
270
271 finish:
272 if (hr != GENOS_STATUS_SUCCESS) {
273 if (CmdBuffer.iRemaining < 0) {
274 GENHW_PUBLIC_ASSERTMESSAGE
275 ("Command Buffer overflow by %d bytes.",
276 -CmdBuffer.iRemaining);
277 }
278 iTmp = iRemaining - CmdBuffer.iRemaining;
279 CmdBuffer.iRemaining = iRemaining;
280 CmdBuffer.iOffset -= iTmp;
281 CmdBuffer.pCmdPtr =
282 CmdBuffer.pCmdBase + CmdBuffer.iOffset / sizeof(DWORD);
283
284 pOsInterface->pfnReturnCommandBuffer(pOsInterface, &CmdBuffer);
285 }
286
287 return hr;
288 }
289
HalCm_HwSetSurfaceMemoryObjectControl_g75(PCM_HAL_STATE pState,WORD wMemObjCtl,PGENHW_SURFACE_STATE_PARAMS pParams)290 GENOS_STATUS HalCm_HwSetSurfaceMemoryObjectControl_g75(PCM_HAL_STATE pState,
291 WORD wMemObjCtl,
292 PGENHW_SURFACE_STATE_PARAMS
293 pParams)
294 {
295 GENOS_STATUS hr = GENOS_STATUS_SUCCESS;
296 const WORD rawCacheType = (wMemObjCtl & CM_MEMOBJCTL_CACHE_MASK) >> 8;
297
298 CM_HAL_MEMORY_OBJECT_CONTROL_G75 cache_type =
299 (CM_HAL_MEMORY_OBJECT_CONTROL_G75) rawCacheType;
300
301 if (rawCacheType == CM_INVALID_MEMOBJCTL) {
302 cache_type = CM_MEMORY_OBJECT_CONTROL_L3_LLC_ELLC_WB_CACHED;
303 }
304
305 if (cache_type < CM_MEMORY_OBJECT_CONTROL_USE_PTE
306 || cache_type > CM_MEMORY_OBJECT_CONTROL_L3_ELLC_WB_CACHED) {
307 hr = GENOS_STATUS_UNKNOWN;
308 return hr;
309 }
310
311 pParams->MemObjCtl = cache_type;
312
313 return hr;
314 }
315
HalCm_HwSendL3CacheConfig_g75(PCM_HAL_STATE pState,PGENOS_COMMAND_BUFFER pCmdBuffer,PGENHW_L3_CACHE_CONFIG pL3CacheConfig)316 VOID HalCm_HwSendL3CacheConfig_g75(PCM_HAL_STATE pState,
317 PGENOS_COMMAND_BUFFER pCmdBuffer,
318 PGENHW_L3_CACHE_CONFIG pL3CacheConfig)
319 {
320 GENHW_LOAD_REGISTER_IMM_PARAM LoadRegImm;
321 PGENHW_HW_INTERFACE pHwInterface = pState->pHwInterface;
322
323 GENOS_ZeroMemory(&LoadRegImm, sizeof(GENHW_LOAD_REGISTER_IMM_PARAM));
324 LoadRegImm.dwRegisterAddress = GENHW_REG_L3_CACHE_CNTLREG2_G75;
325 LoadRegImm.dwData = pL3CacheConfig->dwL3CntlReg2;
326 pHwInterface->pfnSendLoadRegImmCmd
327 (pHwInterface, pCmdBuffer, &LoadRegImm);
328
329 GENOS_ZeroMemory(&LoadRegImm, sizeof(GENHW_LOAD_REGISTER_IMM_PARAM));
330 LoadRegImm.dwRegisterAddress = GENHW_REG_L3_CACHE_CNTLREG3_G75;
331 LoadRegImm.dwData = pL3CacheConfig->dwL3CntlReg3;
332
333 pHwInterface->pfnSendLoadRegImmCmd
334 (pHwInterface, pCmdBuffer, &LoadRegImm);
335
336 GENOS_ZeroMemory(&LoadRegImm, sizeof(GENHW_LOAD_REGISTER_IMM_PARAM));
337 LoadRegImm.dwRegisterAddress = GENHW_REG_L3_CACHE_SQCREG1_G75;
338 LoadRegImm.dwData = pL3CacheConfig->dwL3SQCReg1;
339
340 pHwInterface->pfnSendLoadRegImmCmd
341 (pHwInterface, pCmdBuffer, &LoadRegImm);
342
343 GENOS_ZeroMemory(&LoadRegImm, sizeof(GENHW_LOAD_REGISTER_IMM_PARAM));
344 LoadRegImm.dwRegisterAddress = GENHW_REG_L3_CACHE_L3LRA1_G75;
345 LoadRegImm.dwData = pL3CacheConfig->dwL3LRA1Reg;
346
347 pHwInterface->pfnSendLoadRegImmCmd
348 (pHwInterface, pCmdBuffer, &LoadRegImm);
349
350 return;
351 }
352
HalCm_GetPerThreadScratchSpaceSize_g75()353 UINT HalCm_GetPerThreadScratchSpaceSize_g75()
354 {
355 return CM_MAX_SPILL_SIZE_PER_THREAD_HSW_BDW;
356 }
357
HalCm_AddMediaStateFlushBb_g75(PGENHW_HW_INTERFACE pHwInterface,PGENHW_BATCH_BUFFER pBatchBuffer)358 GENOS_STATUS HalCm_AddMediaStateFlushBb_g75(PGENHW_HW_INTERFACE pHwInterface,
359 PGENHW_BATCH_BUFFER pBatchBuffer)
360 {
361 return GENOS_STATUS_SUCCESS;
362 }
363
HalCm_GetTaskSyncLocation_g75(INT iTaskId)364 INT HalCm_GetTaskSyncLocation_g75(INT iTaskId)
365 {
366 return (iTaskId * CM_SYNC_QWORD_PER_TASK * sizeof(UINT64));
367 }
368
HalCm_GetCurbeBlockAlignSize_g75()369 INT HalCm_GetCurbeBlockAlignSize_g75()
370 {
371 return GENHW_CURBE_BLOCK_ALIGN_G7;
372 }
373
HalCm_GetUserDefinedThreadCountPerThreadGroup_g75(PCM_HAL_STATE pState,UINT * pThreadsPerThreadGroup)374 GENOS_STATUS HalCm_GetUserDefinedThreadCountPerThreadGroup_g75(PCM_HAL_STATE
375 pState,
376 UINT *
377 pThreadsPerThreadGroup)
378 {
379 GENOS_STATUS hr = GENOS_STATUS_SUCCESS;
380 int threads_per_eu = 0;
381 int eu_per_subslice = 0;
382 if (pState->pHwInterface->Platform.GtType == GTTYPE_GT1) {
383 threads_per_eu = GENHW_CM_THREADS_PER_EU_HSW_GT1;
384 eu_per_subslice = GENHW_CM_EU_PER_SUBSLICE_HSW_GT1;
385 } else if (pState->pHwInterface->Platform.GtType == GTTYPE_GT2) {
386 threads_per_eu = GENHW_CM_THREADS_PER_EU_HSW_GT2;
387 eu_per_subslice = GENHW_CM_EU_PER_SUBSLICE_HSW_GT2;
388 } else if (pState->pHwInterface->Platform.GtType == GTTYPE_GT3) {
389 threads_per_eu = GENHW_CM_THREADS_PER_EU_HSW_GT3;
390 eu_per_subslice = GENHW_CM_EU_PER_SUBSLICE_HSW_GT3;
391 } else {
392 threads_per_eu = GENHW_CM_THREADS_PER_EU_HSW_GT2;
393 eu_per_subslice = GENHW_CM_EU_PER_SUBSLICE_HSW_GT2;
394 }
395
396 *pThreadsPerThreadGroup = threads_per_eu * eu_per_subslice;
397 return hr;
398 }
399