1 /*
2 * Copyright (c) 2018, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file cm_ssh.cpp
24 //! \brief Contains Class CmSSH definitions
25 //!
26
27 #include "cm_media_state.h"
28 #include "cm_kernel_ex.h"
29 #include <string>
30 #include <iostream>
31 #include <sstream>
32
33 using namespace CMRT_UMD;
34 using namespace std;
35
CmMediaState(CM_HAL_STATE * cmhal)36 CmMediaState::CmMediaState(CM_HAL_STATE *cmhal):
37 m_cmhal(cmhal),
38 m_heapMgr(nullptr),
39 m_curbeOffsetInternal(0),
40 m_mediaIDOffsetInternal(0),
41 m_samplerHeapOffsetInternal(0),
42 m_scratchSpaceOffsetExternal(0),
43 m_totalCurbeSize(0),
44 m_totalMediaIDSize(0),
45 m_totalSamplerHeapSize(0),
46 m_totalScratchSpaceSize(0),
47 m_mediaIDSize(0),
48 m_scratchSizePerThread(0),
49 m_state(_Empty)
50 {
51 }
52
~CmMediaState()53 CmMediaState::~CmMediaState()
54 {
55 if (m_state == _Allocated)
56 {
57 Submit();
58 }
59 }
60
Initialize(HeapManager * heapMgr)61 MOS_STATUS CmMediaState::Initialize(HeapManager *heapMgr)
62 {
63 if (heapMgr == nullptr || m_cmhal == nullptr)
64 {
65 return MOS_STATUS_NULL_POINTER;
66 }
67 m_heapMgr = heapMgr;
68
69 m_mediaIDSize = m_cmhal->renderHal->pHwSizes->dwSizeInterfaceDescriptor;
70
71 MOS_ZeroMemory(m_curbeOffsets, sizeof(m_curbeOffsets));
72 MOS_ZeroMemory(m_samplerOffsets, sizeof(m_samplerOffsets));
73 MOS_ZeroMemory(m_next3dSamplerOffsets, sizeof(m_next3dSamplerOffsets));
74 MOS_ZeroMemory(m_nextAvsSamplerOffsets, sizeof(m_nextAvsSamplerOffsets));
75 MOS_ZeroMemory(m_nextIndStateOffsets, sizeof(m_nextIndStateOffsets));
76 MOS_ZeroMemory(m_samplerCount, sizeof(m_samplerCount));
77
78 return MOS_STATUS_SUCCESS;
79 }
80
Submit()81 MOS_STATUS CmMediaState::Submit()
82 {
83 std::vector<MemoryBlock> blocks;
84 blocks.push_back(m_memoryBlock);
85 CM_CHK_MOSSTATUS_RETURN(m_heapMgr->SubmitBlocks(blocks));
86 m_state = _Submitted;
87
88 return MOS_STATUS_SUCCESS;
89 }
90
91 #if defined(ANDROID) || defined(LINUX)
92 #define PLATFORM_DIR_SEPERATOR "/"
93 #else
94 #define PLATFORM_DIR_SEPERATOR "\\"
95 #endif
96
Dump()97 void CmMediaState::Dump()
98 {
99 #if MDF_CURBE_DATA_DUMP
100 if (m_cmhal->dumpCurbeData)
101 {
102 char curbeFileNamePrefix[MAX_PATH];
103 char idFileNamePrefix[MAX_PATH];
104 static int fileCount = 0;
105 stringstream curbeFilename;
106 curbeFilename << "HALCM_Curbe_Data_Dumps" << PLATFORM_DIR_SEPERATOR << "curbe_" << fileCount << ".fast.log";
107 stringstream idFilename;
108 idFilename << "HALCM_Curbe_Data_Dumps" << PLATFORM_DIR_SEPERATOR << "id_" << fileCount << ".fast.log";
109
110 ++fileCount;
111
112 GetLogFileLocation(curbeFilename.str().c_str(), curbeFileNamePrefix);
113 GetLogFileLocation(idFilename.str().c_str(), idFileNamePrefix);
114
115 m_memoryBlock.Dump(curbeFileNamePrefix, m_curbeOffsetInternal, m_totalCurbeSize);
116 m_memoryBlock.Dump(idFilename.str(), m_mediaIDOffsetInternal, m_totalMediaIDSize);
117 }
118 #endif
119 }
120
Allocate(CmKernelEx ** kernels,int count,uint32_t trackerIndex,uint32_t trackerID)121 MOS_STATUS CmMediaState::Allocate(CmKernelEx **kernels, int count, uint32_t trackerIndex, uint32_t trackerID)
122 {
123 // calculate the curbe size
124 m_curbeOffsetInternal = 0;
125 uint32_t offset = 0;
126 uint32_t totalCurbeSize = 0;
127 for (int i = 0; i < count; i++)
128 {
129 CmKernelEx *kernel = kernels[i];
130 uint32_t curbeSize = kernel->GetCurbeSize();
131 m_curbeOffsets[i] = totalCurbeSize;
132 totalCurbeSize += curbeSize;
133 }
134 m_totalCurbeSize = totalCurbeSize;
135
136 // calculate the sampler
137 m_samplerHeapOffsetInternal = MOS_ALIGN_CEIL(m_totalCurbeSize, MHW_SAMPLER_STATE_ALIGN);
138 uint32_t totalHeapSize = 0;
139 uint32_t maxSpillSize = 0;
140 for (int i = 0; i < count; i++)
141 {
142 CmKernelEx *kernel = kernels[i];
143 uint32_t heapSize = UpdateHeapSizeAndOffsets(kernel, i);
144 m_samplerOffsets[i] = totalHeapSize;
145 totalHeapSize += heapSize;
146
147 // get the spill size
148 maxSpillSize = MOS_MAX(maxSpillSize, kernel->GetSpillMemUsed());
149 }
150 m_totalSamplerHeapSize = totalHeapSize;
151
152 // calculate the media id
153 m_mediaIDOffsetInternal = m_samplerHeapOffsetInternal + m_totalSamplerHeapSize;
154 m_totalMediaIDSize = count * m_mediaIDSize;
155
156 // caculate the scratch space
157 uint32_t tempScratchOffset = m_mediaIDOffsetInternal + m_totalMediaIDSize;
158 if (maxSpillSize > 0 && (!m_cmhal->cmHalInterface->IsSeparateScratch()))
159 {
160 uint32_t perThreadScratchSpace = 1024;
161 for (perThreadScratchSpace; perThreadScratchSpace < maxSpillSize; perThreadScratchSpace <<= 1);
162
163 // get max thread number
164 MEDIA_SYSTEM_INFO *gtSystemInfo = m_cmhal->osInterface->pfnGetGtSystemInfo(m_cmhal->osInterface);
165 uint32_t numHWThreadsPerEU = gtSystemInfo->ThreadCount / gtSystemInfo->EUCount;
166 uint32_t maxHWThreads = gtSystemInfo->MaxEuPerSubSlice * numHWThreadsPerEU * gtSystemInfo->MaxSubSlicesSupported;
167 // add additional 1k, because the offset of scratch space needs to be 1k aligned
168 m_totalScratchSpaceSize = maxHWThreads * perThreadScratchSpace + MHW_SCRATCH_SPACE_ALIGN;
169 m_scratchSizePerThread = perThreadScratchSpace;
170 // change the extend step in gdsh
171 uint32_t currentExtendSize = m_heapMgr->GetExtendSize();
172 if (currentExtendSize < m_totalScratchSpaceSize)
173 {
174 // update extend size for scratch space
175 m_heapMgr->SetExtendHeapSize(m_totalScratchSpaceSize);
176 }
177 }
178
179 // allocate the memory block
180 uint32_t totalSize = tempScratchOffset + m_totalScratchSpaceSize;
181 PrepareMemoryBlock(totalSize, trackerIndex, trackerID);
182
183 // adjust the offset of scratch space to be 1k aligned
184 if (maxSpillSize > 0 && (!m_cmhal->cmHalInterface->IsSeparateScratch()))
185 {
186 uint32_t scratchOffsetExternal = m_memoryBlock.GetOffset() + tempScratchOffset;
187 m_scratchSpaceOffsetExternal = MOS_ALIGN_CEIL(scratchOffsetExternal, MHW_SCRATCH_SPACE_ALIGN);
188 }
189
190 return MOS_STATUS_SUCCESS;
191 }
192
PrepareMemoryBlock(uint32_t size,uint32_t trackerIndex,uint32_t trackerID)193 MOS_STATUS CmMediaState::PrepareMemoryBlock(uint32_t size, uint32_t trackerIndex, uint32_t trackerID)
194 {
195 uint32_t spaceNeeded = 0;
196 std::vector<MemoryBlock> blocks;
197 std::vector<uint32_t> blockSizes;
198 MemoryBlockManager::AcquireParams acquireParams =
199 MemoryBlockManager::AcquireParams(trackerID, blockSizes);
200 acquireParams.m_trackerIndex = trackerIndex;
201 if (blockSizes.empty())
202 {
203 blockSizes.emplace_back(size);
204 }
205 else
206 {
207 blockSizes[0] = size;
208 }
209
210 m_heapMgr->AcquireSpace(acquireParams, blocks, spaceNeeded);
211
212 if (blocks.empty())
213 {
214 MHW_RENDERHAL_ASSERTMESSAGE("No blocks were acquired");
215 return MOS_STATUS_UNKNOWN;
216 }
217 if (!(blocks[0].IsValid()))
218 {
219 MHW_RENDERHAL_ASSERTMESSAGE("No blocks were acquired");
220 return MOS_STATUS_UNKNOWN;
221 }
222
223 m_memoryBlock = blocks[0];
224
225 // zero memory block
226 m_memoryBlock.AddData(nullptr, 0, 0, true);
227
228 m_state = _Allocated;
229 return MOS_STATUS_SUCCESS;
230 }
231
LoadCurbe(CmKernelEx * kernel,int index)232 MOS_STATUS CmMediaState::LoadCurbe(CmKernelEx *kernel, int index)
233 {
234 return LoadCurbe(kernel->GetCurbe(), kernel->GetCurbeSize(), index);
235 }
236
LoadCurbe(uint8_t * curbe,uint32_t size,int index)237 MOS_STATUS CmMediaState::LoadCurbe(uint8_t *curbe, uint32_t size, int index)
238 {
239 if (m_state != _Allocated)
240 {
241 MHW_RENDERHAL_ASSERTMESSAGE("Media State not allocated yet");
242 return MOS_STATUS_UNKNOWN;
243 }
244
245 m_memoryBlock.AddData(curbe, m_curbeOffsetInternal+m_curbeOffsets[index], size);
246 return MOS_STATUS_SUCCESS;
247 }
248
LoadMediaID(CmKernelEx * kernel,int index,uint32_t btOffset,CmThreadGroupSpace * threadGroupSpace)249 MOS_STATUS CmMediaState::LoadMediaID(CmKernelEx *kernel, int index, uint32_t btOffset, CmThreadGroupSpace *threadGroupSpace)
250 {
251 if (m_state != _Allocated)
252 {
253 MHW_RENDERHAL_ASSERTMESSAGE("Media State not allocated yet");
254 return MOS_STATUS_UNKNOWN;
255 }
256 MHW_ID_ENTRY_PARAMS params;
257 uint32_t mediaStateOffset = m_memoryBlock.GetOffset();
258
259 // Get states, params
260 params.dwMediaIdOffset = mediaStateOffset + m_mediaIDOffsetInternal;
261 params.iMediaId = index;
262 params.dwKernelOffset = kernel->GetOffsetInIsh();
263 params.dwSamplerOffset = GetSamplerHeapOffset(index);
264 params.dwSamplerCount = MOS_MIN(4, (GetSamplerCount(index) + 3 ) / 4);
265 params.dwBindingTableOffset = btOffset;
266 params.iCurbeOffset = m_curbeOffsets[index];
267 params.iCurbeLength = kernel->GetCurbeSizePerThread();
268 if (threadGroupSpace == nullptr)
269 {
270 params.bBarrierEnable = false;
271 params.bGlobalBarrierEnable = false; //It's only applied for BDW+
272 params.dwNumberofThreadsInGPGPUGroup = 1;
273 params.dwSharedLocalMemorySize = 0;
274 params.iCrsThdConDataRdLn = 0;
275 }
276 else
277 {
278 uint32_t threadW = 0;
279 uint32_t threadH = 0;
280 uint32_t threadD = 0;
281 uint32_t groupW = 0;
282 uint32_t groupH = 0;
283 uint32_t groupD = 0;
284 threadGroupSpace->GetThreadGroupSpaceSize(threadW,
285 threadH,
286 threadD,
287 groupW,
288 groupH,
289 groupD);
290 params.bBarrierEnable = (kernel->GetBarrierMode() != CM_NO_BARRIER);
291 params.bGlobalBarrierEnable = (kernel->GetBarrierMode() == CM_GLOBAL_BARRIER);
292 params.dwNumberofThreadsInGPGPUGroup = threadW * threadH * threadD;
293 params.dwSharedLocalMemorySize =
294 m_cmhal->renderHal->pfnEncodeSLMSize(m_cmhal->renderHal, kernel->GetSLMSize());;
295 params.iCrsThdConDataRdLn = kernel->GetCurbeSizeCrossThread();
296 }
297
298 params.memoryBlock = &m_memoryBlock;
299
300 CM_CHK_MOSSTATUS_RETURN(m_cmhal->renderHal->pMhwStateHeap->AddInterfaceDescriptorData(¶ms));
301
302 return MOS_STATUS_SUCCESS;
303 }
304
UpdateHeapSizeAndOffsets(CmKernelEx * kernel,uint32_t kernelIdx)305 uint32_t CmMediaState::UpdateHeapSizeAndOffsets(CmKernelEx *kernel, uint32_t kernelIdx)
306 {
307 uint32_t count3D; // not include the reserved
308 uint32_t countAVS; // not include the reserved
309 uint32_t reservedCount3D = 0;
310 kernel->GetSamplerCount(&count3D, &countAVS);
311 std::map<int, void *>reservedSamplers = kernel->GetReservedSamplerBteIndex();
312
313 m_samplerCount[kernelIdx] = count3D + countAVS + reservedSamplers.size();
314
315 if (m_samplerCount[kernelIdx] ==0)
316 {
317 // no sampler in the kernel
318 return 0;
319 }
320
321 // simplified the sampler allocations
322 // reserved samplers
323 // avs samplers
324 // 3d samplers
325 // 3d indirect states
326
327 // get the area of reserved samplers
328 uint32_t reservedEnd = 0;
329 uint32_t heapSize = 0;
330 for (auto it = reservedSamplers.begin(); it != reservedSamplers.end(); it ++)
331 {
332 int bteIndex = it->first;
333 MHW_SAMPLER_STATE_PARAM *param = (MHW_SAMPLER_STATE_PARAM *)it->second;
334 uint32_t elementSize;
335 if (param->SamplerType == MHW_SAMPLER_TYPE_3D)
336 {
337 reservedCount3D ++;
338 elementSize = m_3dSamplerElementSize;
339 }
340 else
341 {
342 elementSize = m_avsSamplerElementSize;
343 }
344 uint32_t end = (bteIndex + 1) * elementSize;
345 reservedEnd = MOS_MAX(reservedEnd, end);
346 }
347 heapSize = MOS_ALIGN_CEIL(reservedEnd, MHW_SAMPLER_STATE_ALIGN);
348 m_nextAvsSamplerOffsets[kernelIdx] = heapSize;
349 heapSize += countAVS*m_avsSamplerElementSize;
350 m_next3dSamplerOffsets[kernelIdx] = heapSize;
351 heapSize += count3D*m_3dSamplerElementSize;
352 heapSize = MOS_ALIGN_CEIL(heapSize, 1 << MHW_SAMPLER_INDIRECT_SHIFT);
353 m_nextIndStateOffsets[kernelIdx] = heapSize;
354 heapSize += (reservedCount3D + count3D) * m_cmhal->renderHal->pMhwStateHeap->m_HwSizes.dwSizeSamplerIndirectState;
355 heapSize = MOS_ALIGN_CEIL(heapSize, MHW_SAMPLER_STATE_ALIGN);
356
357 return heapSize;
358 }
359
AddSampler(void * samplerParam,int index,int bteIndex)360 int CmMediaState::AddSampler(void *samplerParam, int index, int bteIndex)
361 {
362 uint32_t offset;
363 MHW_SAMPLER_STATE_PARAM *param = (MHW_SAMPLER_STATE_PARAM *)samplerParam;
364 uint32_t elementSize = (param->SamplerType == MHW_SAMPLER_TYPE_3D)?m_3dSamplerElementSize:m_avsSamplerElementSize;
365 if (bteIndex == -1)
366 {
367 uint32_t *nextOffset = (param->SamplerType == MHW_SAMPLER_TYPE_3D)?m_next3dSamplerOffsets:m_nextAvsSamplerOffsets;
368 offset = nextOffset[index];
369 nextOffset[index] += elementSize;
370 }
371 else
372 {
373 offset = bteIndex * elementSize;
374 }
375
376 if (param->SamplerType == MHW_SAMPLER_TYPE_3D)
377 {
378 param->Unorm.IndirectStateOffset = m_samplerHeapOffsetInternal + m_samplerOffsets[index] + m_nextIndStateOffsets[index];
379 m_nextIndStateOffsets[index] += m_cmhal->renderHal->pMhwStateHeap->m_HwSizes.dwSizeSamplerIndirectState;
380 }
381
382 uint32_t heapOffset = m_samplerHeapOffsetInternal + m_samplerOffsets[index] + offset;
383 MOS_STATUS ret = m_cmhal->renderHal->pMhwStateHeap->AddSamplerStateData(heapOffset, &m_memoryBlock, param);
384 if (ret != MOS_STATUS_SUCCESS)
385 {
386 return -1;
387 }
388 return offset/elementSize;
389 }
390