1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *     Wei Lin<wei.w.lin@intel.com>
26  *     Yuting Yang<yuting.yang@intel.com>
27  */
28 #include "hal_cm.h"
29 #include "hw_interface_g75.h"
30 #include "hal_cm_g75.h"
31 
HalCm_SubmitCommands_g75(PCM_HAL_STATE pState,PGENHW_BATCH_BUFFER pBatchBuffer,INT iTaskId,PCM_HAL_KERNEL_PARAM * pKernels,PVOID * ppCmdBuffer)32 GENOS_STATUS HalCm_SubmitCommands_g75(PCM_HAL_STATE pState,
33 				      PGENHW_BATCH_BUFFER pBatchBuffer,
34 				      INT iTaskId,
35 				      PCM_HAL_KERNEL_PARAM * pKernels,
36 				      PVOID * ppCmdBuffer)
37 {
38 	GENOS_STATUS hr = GENOS_STATUS_SUCCESS;
39 	PGENHW_HW_INTERFACE pHwInterface = pState->pHwInterface;
40 	PGENOS_INTERFACE pOsInterface = pHwInterface->pOsInterface;
41 	PIPELINE_SELECT_CMD_G5 cmd_select =
42 	    *(pHwInterface->pHwCommands->pPipelineSelectMedia);
43 	INT iRemaining = 0;
44 	BOOL enableWalker = pState->WalkerParams.CmWalkerEnable;
45 	BOOL enableGpGpu = pState->pTaskParam->blGpGpuWalkerEnabled;
46 	GENOS_COMMAND_BUFFER CmdBuffer;
47 	GENHW_L3_CACHE_CONFIG L3CacheConfig;
48 	DWORD dwSyncTag;
49 	PINT64 pTaskSyncLocation;
50 	INT iSyncOffset;
51 	INT iTmp;
52 	PCM_HAL_TASK_PARAM pTaskParam = pState->pTaskParam;
53 
54 	GENOS_ZeroMemory(&CmdBuffer, sizeof(GENOS_COMMAND_BUFFER));
55 
56 	iSyncOffset = pState->pfnGetTaskSyncLocation(iTaskId);
57 
58 	pTaskSyncLocation = (PINT64) (pState->TsResource.pData + iSyncOffset);
59 	*pTaskSyncLocation = CM_INVALID_INDEX;
60 	*(pTaskSyncLocation + 1) = CM_INVALID_INDEX;
61 
62 	if (!enableWalker && !enableGpGpu) {
63 		CM_HRESULT2GENOSSTATUS_AND_CHECK
64 		    (pOsInterface->pfnRegisterResource
65 		     (pOsInterface, &pBatchBuffer->OsResource, TRUE, TRUE));
66 	}
67 	CM_HRESULT2GENOSSTATUS_AND_CHECK(pOsInterface->pfnRegisterResource
68 					 (pOsInterface,
69 					  &pState->TsResource.OsResource, TRUE,
70 					  TRUE));
71 
72 	CM_HRESULT2GENOSSTATUS_AND_CHECK(pOsInterface->pfnGetCommandBuffer
73 					 (pOsInterface, &CmdBuffer));
74 	iRemaining = CmdBuffer.iRemaining;
75 
76 	if (pState->bEUSaturationEnabled) {
77 		CM_CHK_GENOSSTATUS
78 		    (pState->pfnSendCommandBufferHeaderEUSaturation
79 		     (pState, &CmdBuffer));
80 	} else {
81 		CM_CHK_GENOSSTATUS(pHwInterface->pfnSendCommandBufferHeader
82 				   (pHwInterface, &CmdBuffer));
83 	}
84 
85 	CM_CHK_GENOSSTATUS(pHwInterface->pfnSendPipeControl(pHwInterface,
86 							    &CmdBuffer,
87 							    &pState->
88 							    TsResource.OsResource,
89 							    TRUE, iSyncOffset,
90 							    GFX3DCONTROLOP_WRITETIMESTAMP,
91 							    GFX3DFLUSH_WRITE_CACHE,
92 							    0));
93 
94 	dwSyncTag = pHwInterface->pGeneralStateHeap->dwNextTag++;
95 
96 	CM_CHK_GENOSSTATUS(pHwInterface->pfnSendSyncTag
97 			   (pHwInterface, &CmdBuffer));
98 
99 	if (pState->L3Config.L3_SQCREG1 || pState->L3Config.L3_CNTLREG2 || pState->L3Config.L3_CNTLREG3){
100 		L3CacheConfig.dwL3SQCReg1  = pState->L3Config.L3_SQCREG1;
101 		L3CacheConfig.dwL3CntlReg2 = pState->L3Config.L3_CNTLREG2;
102 		L3CacheConfig.dwL3CntlReg3 = pState->L3Config.L3_CNTLREG3;
103 	}
104 	else {
105 		L3CacheConfig.dwL3SQCReg1  = CM_CONFIG_SQCREG1_VALUE_G75;
106 		L3CacheConfig.dwL3CntlReg2 = pState->bSLMMode ? CM_CONFIG_CNTLREG2_VALUE_G75_SLM: CM_CONFIG_CNTLREG2_VALUE_G75_NONSLM;
107 		L3CacheConfig.dwL3CntlReg3 = pState->bSLMMode ? CM_CONFIG_CNTLREG3_VALUE_G75_SLM : CM_CONFIG_CNTLREG3_VALUE_G75_NONSLM;
108 	}
109 	L3CacheConfig.dwL3LRA1Reg  = CM_CONFIG_L3LRA1_VALUE_G75;
110 
111 	HalCm_HwSendL3CacheConfig_g75(pState, &CmdBuffer,&L3CacheConfig);
112 
113 	if (pHwInterface->bSysRoutine) {
114 		CM_CHK_GENOSSTATUS(pHwInterface->pfnSendDebugCtl(pHwInterface,
115 								 &CmdBuffer));
116 	}
117 
118 	if (enableGpGpu) {
119 		cmd_select.DW0.PipelineSelect = GFXPIPELINE_GPGPU;
120 	}
121 	CM_CHK_GENOSSTATUS(IntelGen_OsAddCommand(&CmdBuffer,
122 						 &cmd_select,
123 						 sizeof
124 						 (PIPELINE_SELECT_CMD_G5)));
125 
126 	CM_CHK_GENOSSTATUS(pHwInterface->pfnSendStateBaseAddr
127 			   (pHwInterface, &CmdBuffer));
128 
129 	CM_CHK_GENOSSTATUS(pHwInterface->pfnSendSurfaces
130 			   (pHwInterface, &CmdBuffer));
131 
132 	if (pHwInterface->bSysRoutine) {
133 		// Send the SIP_STATE if we loaded a system routine
134 		CM_CHK_GENOSSTATUS(pHwInterface->pfnSendStateSip(pHwInterface,
135 								 &CmdBuffer));
136 	}
137 
138 	iTmp = GENHW_USE_MEDIA_THREADS_MAX;
139 	if (pState->MaxHWThreadValues.registryValue != 0) {
140 		if (pState->MaxHWThreadValues.registryValue <
141 		    pHwInterface->pHwCaps->dwMaxThreads) {
142 			iTmp = pState->MaxHWThreadValues.registryValue;
143 		}
144 	} else if (pState->MaxHWThreadValues.APIValue != 0) {
145 		if (pState->MaxHWThreadValues.APIValue <
146 		    pHwInterface->pHwCaps->dwMaxThreads) {
147 			iTmp = pState->MaxHWThreadValues.APIValue;
148 		}
149 	}
150 
151 	pHwInterface->pfnSetVfeStateParams(pHwInterface,
152 					   0,
153 					   iTmp,
154 					   pState->pTaskParam->dwVfeCurbeSize,
155 					   pState->pTaskParam->dwUrbEntrySize,
156 					   &pState->ScoreboardParams);
157 
158 	CM_CHK_GENOSSTATUS(pHwInterface->pfnSendVfeState
159 			   (pHwInterface, &CmdBuffer, enableGpGpu));
160 
161 	if (pState->pTaskParam->dwVfeCurbeSize > 0) {
162 		CM_CHK_GENOSSTATUS(pHwInterface->pfnSendCurbeLoad
163 				   (pHwInterface, &CmdBuffer));
164 	}
165 	CM_CHK_GENOSSTATUS(pHwInterface->pfnSendIDLoad
166 			   (pHwInterface, &CmdBuffer));
167 
168 	if (enableWalker) {
169 		for (UINT i = 0; i < pState->pTaskParam->uiNumKernels; i++) {
170 			if ((i > 0)
171 			    &&
172 			    ((pTaskParam->uiSyncBitmap &
173 			      ((UINT64) 1 << (i - 1)))
174 			     || (pKernels[i]->
175 				 CmKernelThreadSpaceParam.patternType !=
176 				 CM_DEPENDENCY_NONE))) {
177 				CM_CHK_GENOSSTATUS
178 				    (pHwInterface->pfnSendPipeControl
179 				     (pHwInterface, &CmdBuffer,
180 				      &pState->TsResource.OsResource, FALSE, 0,
181 				      GFX3DCONTROLOP_NOWRITE,
182 				      GFX3DFLUSH_WRITE_CACHE, 0));
183 			}
184 
185 			CM_CHK_GENOSSTATUS(pState->pfnSendMediaWalkerState
186 					   (pState, pKernels[i], &CmdBuffer));
187 		}
188 	} else if (enableGpGpu) {
189 		for (UINT i = 0; i < pState->pTaskParam->uiNumKernels; i++) {
190 			if ((i > 0)
191 			    && (pTaskParam->uiSyncBitmap &
192 				((UINT64) 1 << (i - 1)))) {
193 				CM_CHK_GENOSSTATUS
194 				    (pHwInterface->pfnSendPipeControl
195 				     (pHwInterface, &CmdBuffer,
196 				      &pState->TsResource.OsResource, FALSE, 0,
197 				      GFX3DCONTROLOP_NOWRITE,
198 				      GFX3DFLUSH_WRITE_CACHE, 0));
199 			}
200 
201 			CM_CHK_GENOSSTATUS(pState->pfnSendGpGpuWalkerState
202 					   (pState, pKernels[i], &CmdBuffer));
203 		}
204 	} else {
205 		CM_CHK_GENOSSTATUS(pHwInterface->pfnSendBatchBufferStart
206 				   (pHwInterface, &CmdBuffer, pBatchBuffer));
207 
208 		if ((pBatchBuffer->pBBRenderData->BbArgs.BbCmArgs.uiRefCount ==
209 		     1) || (pState->pTaskParam->reuseBBUpdateMask == 1)) {
210 			pHwInterface->pfnAddBatchBufferEndCmdBb(pHwInterface,
211 								pBatchBuffer);
212 		} else {
213 			pHwInterface->pfnSkipBatchBufferEndCmdBb(pHwInterface,
214 								 pBatchBuffer);
215 		}
216 
217 		if ((pBatchBuffer->pBBRenderData->BbArgs.BbCmArgs.uiRefCount ==
218 		     1) || (pState->pTaskParam->reuseBBUpdateMask == 1)) {
219 			CM_CHK_GENOSSTATUS(pHwInterface->pfnUnlockBB
220 					   (pHwInterface, pBatchBuffer));
221 		}
222 	}
223 
224 	CM_CHK_GENOSSTATUS(pHwInterface->pfnSendPipeControl(pHwInterface,
225 							    &CmdBuffer,
226 							    &pState->
227 							    TsResource.OsResource,
228 							    FALSE, 0,
229 							    GFX3DCONTROLOP_NOWRITE,
230 							    GFX3DFLUSH_WRITE_CACHE,
231 							    0));
232 
233 	iSyncOffset += sizeof(UINT64);
234 	CM_CHK_GENOSSTATUS(pHwInterface->pfnSendPipeControl(pHwInterface,
235 							    &CmdBuffer,
236 							    &pState->
237 							    TsResource.OsResource,
238 							    TRUE, iSyncOffset,
239 							    GFX3DCONTROLOP_WRITETIMESTAMP,
240 							    GFX3DFLUSH_READ_CACHE,
241 							    0));
242 
243 	CM_CHK_GENOSSTATUS(pHwInterface->pfnSendBatchBufferEnd
244 			   (pHwInterface, &CmdBuffer));
245 
246 	pOsInterface->pfnReturnCommandBuffer(pOsInterface, &CmdBuffer);
247 
248 	CM_HRESULT2GENOSSTATUS_AND_CHECK(pOsInterface->pfnSubmitCommandBuffer
249 					 (pOsInterface, &CmdBuffer,
250 					  pState->bNullHwRenderCm));
251 
252 	if (pState->bNullHwRenderCm == FALSE) {
253 		pHwInterface->pGeneralStateHeap->pCurMediaState->bBusy = TRUE;
254 		if (!enableWalker && !enableGpGpu) {
255 			pBatchBuffer->bBusy = TRUE;
256 			pBatchBuffer->dwSyncTag = dwSyncTag;
257 		}
258 	}
259 	pState->MaxHWThreadValues.APIValue = 0;
260 
261 	pState->bEUSaturationEnabled = FALSE;
262 	pState->bEUSaturationNoSSD = FALSE;
263 
264 	if (ppCmdBuffer) {
265 		drm_intel_bo_reference(CmdBuffer.OsResource.bo);
266 		*ppCmdBuffer = CmdBuffer.OsResource.bo;
267 	}
268 
269 	hr = GENOS_STATUS_SUCCESS;
270 
271  finish:
272 	if (hr != GENOS_STATUS_SUCCESS) {
273 		if (CmdBuffer.iRemaining < 0) {
274 			GENHW_PUBLIC_ASSERTMESSAGE
275 			    ("Command Buffer overflow by %d bytes.",
276 			     -CmdBuffer.iRemaining);
277 		}
278 		iTmp = iRemaining - CmdBuffer.iRemaining;
279 		CmdBuffer.iRemaining = iRemaining;
280 		CmdBuffer.iOffset -= iTmp;
281 		CmdBuffer.pCmdPtr =
282 		    CmdBuffer.pCmdBase + CmdBuffer.iOffset / sizeof(DWORD);
283 
284 		pOsInterface->pfnReturnCommandBuffer(pOsInterface, &CmdBuffer);
285 	}
286 
287 	return hr;
288 }
289 
HalCm_HwSetSurfaceMemoryObjectControl_g75(PCM_HAL_STATE pState,WORD wMemObjCtl,PGENHW_SURFACE_STATE_PARAMS pParams)290 GENOS_STATUS HalCm_HwSetSurfaceMemoryObjectControl_g75(PCM_HAL_STATE pState,
291 						       WORD wMemObjCtl,
292 						       PGENHW_SURFACE_STATE_PARAMS
293 						       pParams)
294 {
295 	GENOS_STATUS hr = GENOS_STATUS_SUCCESS;
296 	const WORD rawCacheType = (wMemObjCtl & CM_MEMOBJCTL_CACHE_MASK) >> 8;
297 
298 	CM_HAL_MEMORY_OBJECT_CONTROL_G75 cache_type =
299 	    (CM_HAL_MEMORY_OBJECT_CONTROL_G75) rawCacheType;
300 
301 	if (rawCacheType == CM_INVALID_MEMOBJCTL) {
302 		cache_type = CM_MEMORY_OBJECT_CONTROL_L3_LLC_ELLC_WB_CACHED;
303 	}
304 
305 	if (cache_type < CM_MEMORY_OBJECT_CONTROL_USE_PTE
306 	    || cache_type > CM_MEMORY_OBJECT_CONTROL_L3_ELLC_WB_CACHED) {
307 		hr = GENOS_STATUS_UNKNOWN;
308 		return hr;
309 	}
310 
311 	pParams->MemObjCtl = cache_type;
312 
313 	return hr;
314 }
315 
HalCm_HwSendL3CacheConfig_g75(PCM_HAL_STATE pState,PGENOS_COMMAND_BUFFER pCmdBuffer,PGENHW_L3_CACHE_CONFIG pL3CacheConfig)316 VOID HalCm_HwSendL3CacheConfig_g75(PCM_HAL_STATE pState,
317 				   PGENOS_COMMAND_BUFFER pCmdBuffer,
318 				   PGENHW_L3_CACHE_CONFIG  pL3CacheConfig)
319 {
320 	GENHW_LOAD_REGISTER_IMM_PARAM LoadRegImm;
321 	PGENHW_HW_INTERFACE pHwInterface = pState->pHwInterface;
322 
323 	GENOS_ZeroMemory(&LoadRegImm, sizeof(GENHW_LOAD_REGISTER_IMM_PARAM));
324 	LoadRegImm.dwRegisterAddress = GENHW_REG_L3_CACHE_CNTLREG2_G75;
325 	LoadRegImm.dwData = pL3CacheConfig->dwL3CntlReg2;
326 	pHwInterface->pfnSendLoadRegImmCmd
327 	    (pHwInterface, pCmdBuffer, &LoadRegImm);
328 
329 	GENOS_ZeroMemory(&LoadRegImm, sizeof(GENHW_LOAD_REGISTER_IMM_PARAM));
330 	LoadRegImm.dwRegisterAddress = GENHW_REG_L3_CACHE_CNTLREG3_G75;
331 	LoadRegImm.dwData = pL3CacheConfig->dwL3CntlReg3;
332 
333 	pHwInterface->pfnSendLoadRegImmCmd
334 	    (pHwInterface, pCmdBuffer, &LoadRegImm);
335 
336 	GENOS_ZeroMemory(&LoadRegImm, sizeof(GENHW_LOAD_REGISTER_IMM_PARAM));
337 	LoadRegImm.dwRegisterAddress = GENHW_REG_L3_CACHE_SQCREG1_G75;
338 	LoadRegImm.dwData = pL3CacheConfig->dwL3SQCReg1;
339 
340 	pHwInterface->pfnSendLoadRegImmCmd
341             (pHwInterface, pCmdBuffer, &LoadRegImm);
342 
343 	GENOS_ZeroMemory(&LoadRegImm, sizeof(GENHW_LOAD_REGISTER_IMM_PARAM));
344 	LoadRegImm.dwRegisterAddress = GENHW_REG_L3_CACHE_L3LRA1_G75;
345 	LoadRegImm.dwData = pL3CacheConfig->dwL3LRA1Reg;
346 
347 	pHwInterface->pfnSendLoadRegImmCmd
348 	    (pHwInterface, pCmdBuffer, &LoadRegImm);
349 
350 	return;
351 }
352 
HalCm_GetPerThreadScratchSpaceSize_g75()353 UINT HalCm_GetPerThreadScratchSpaceSize_g75()
354 {
355 	return CM_MAX_SPILL_SIZE_PER_THREAD_HSW_BDW;
356 }
357 
HalCm_AddMediaStateFlushBb_g75(PGENHW_HW_INTERFACE pHwInterface,PGENHW_BATCH_BUFFER pBatchBuffer)358 GENOS_STATUS HalCm_AddMediaStateFlushBb_g75(PGENHW_HW_INTERFACE pHwInterface,
359 					    PGENHW_BATCH_BUFFER pBatchBuffer)
360 {
361 	return GENOS_STATUS_SUCCESS;
362 }
363 
HalCm_GetTaskSyncLocation_g75(INT iTaskId)364 INT HalCm_GetTaskSyncLocation_g75(INT iTaskId)
365 {
366 	return (iTaskId * CM_SYNC_QWORD_PER_TASK * sizeof(UINT64));
367 }
368 
HalCm_GetCurbeBlockAlignSize_g75()369 INT HalCm_GetCurbeBlockAlignSize_g75()
370 {
371 	return GENHW_CURBE_BLOCK_ALIGN_G7;
372 }
373 
HalCm_GetUserDefinedThreadCountPerThreadGroup_g75(PCM_HAL_STATE pState,UINT * pThreadsPerThreadGroup)374 GENOS_STATUS HalCm_GetUserDefinedThreadCountPerThreadGroup_g75(PCM_HAL_STATE
375 							       pState,
376 							       UINT *
377 							       pThreadsPerThreadGroup)
378 {
379 	GENOS_STATUS hr = GENOS_STATUS_SUCCESS;
380 	int threads_per_eu = 0;
381 	int eu_per_subslice = 0;
382 	if (pState->pHwInterface->Platform.GtType == GTTYPE_GT1) {
383 		threads_per_eu = GENHW_CM_THREADS_PER_EU_HSW_GT1;
384 		eu_per_subslice = GENHW_CM_EU_PER_SUBSLICE_HSW_GT1;
385 	} else if (pState->pHwInterface->Platform.GtType == GTTYPE_GT2) {
386 		threads_per_eu = GENHW_CM_THREADS_PER_EU_HSW_GT2;
387 		eu_per_subslice = GENHW_CM_EU_PER_SUBSLICE_HSW_GT2;
388 	} else if (pState->pHwInterface->Platform.GtType == GTTYPE_GT3) {
389 		threads_per_eu = GENHW_CM_THREADS_PER_EU_HSW_GT3;
390 		eu_per_subslice = GENHW_CM_EU_PER_SUBSLICE_HSW_GT3;
391 	} else {
392 		threads_per_eu = GENHW_CM_THREADS_PER_EU_HSW_GT2;
393 		eu_per_subslice = GENHW_CM_EU_PER_SUBSLICE_HSW_GT2;
394 	}
395 
396 	*pThreadsPerThreadGroup = threads_per_eu * eu_per_subslice;
397 	return hr;
398 }
399