1 /*
2 * Copyright (c) 2018, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file      cm_ssh.cpp
24 //! \brief     Contains Class CmSSH  definitions
25 //!
26 
27 #include "cm_media_state.h"
28 #include "cm_kernel_ex.h"
29 #include <string>
30 #include <iostream>
31 #include <sstream>
32 
33 using namespace CMRT_UMD;
34 using namespace std;
35 
CmMediaState(CM_HAL_STATE * cmhal)36 CmMediaState::CmMediaState(CM_HAL_STATE *cmhal):
37     m_cmhal(cmhal),
38     m_heapMgr(nullptr),
39     m_curbeOffsetInternal(0),
40     m_mediaIDOffsetInternal(0),
41     m_samplerHeapOffsetInternal(0),
42     m_scratchSpaceOffsetExternal(0),
43     m_totalCurbeSize(0),
44     m_totalMediaIDSize(0),
45     m_totalSamplerHeapSize(0),
46     m_totalScratchSpaceSize(0),
47     m_mediaIDSize(0),
48     m_scratchSizePerThread(0),
49     m_state(_Empty)
50 {
51 }
52 
~CmMediaState()53 CmMediaState::~CmMediaState()
54 {
55     if (m_state == _Allocated)
56     {
57         Submit();
58     }
59 }
60 
Initialize(HeapManager * heapMgr)61 MOS_STATUS CmMediaState::Initialize(HeapManager *heapMgr)
62 {
63     if (heapMgr == nullptr || m_cmhal == nullptr)
64     {
65         return MOS_STATUS_NULL_POINTER;
66     }
67     m_heapMgr = heapMgr;
68 
69     m_mediaIDSize = m_cmhal->renderHal->pHwSizes->dwSizeInterfaceDescriptor;
70 
71     MOS_ZeroMemory(m_curbeOffsets, sizeof(m_curbeOffsets));
72     MOS_ZeroMemory(m_samplerOffsets, sizeof(m_samplerOffsets));
73     MOS_ZeroMemory(m_next3dSamplerOffsets, sizeof(m_next3dSamplerOffsets));
74     MOS_ZeroMemory(m_nextAvsSamplerOffsets, sizeof(m_nextAvsSamplerOffsets));
75     MOS_ZeroMemory(m_nextIndStateOffsets, sizeof(m_nextIndStateOffsets));
76     MOS_ZeroMemory(m_samplerCount, sizeof(m_samplerCount));
77 
78     return MOS_STATUS_SUCCESS;
79 }
80 
Submit()81 MOS_STATUS CmMediaState::Submit()
82 {
83     std::vector<MemoryBlock> blocks;
84     blocks.push_back(m_memoryBlock);
85     CM_CHK_MOSSTATUS_RETURN(m_heapMgr->SubmitBlocks(blocks));
86     m_state = _Submitted;
87 
88     return MOS_STATUS_SUCCESS;
89 }
90 
91 #if defined(ANDROID) || defined(LINUX)
92 #define PLATFORM_DIR_SEPERATOR   "/"
93 #else
94 #define PLATFORM_DIR_SEPERATOR   "\\"
95 #endif
96 
Dump()97 void CmMediaState::Dump()
98 {
99 #if MDF_CURBE_DATA_DUMP
100     if (m_cmhal->dumpCurbeData)
101     {
102         char curbeFileNamePrefix[MAX_PATH];
103         char idFileNamePrefix[MAX_PATH];
104         static int fileCount = 0;
105         stringstream curbeFilename;
106         curbeFilename << "HALCM_Curbe_Data_Dumps" << PLATFORM_DIR_SEPERATOR << "curbe_" << fileCount << ".fast.log";
107         stringstream idFilename;
108         idFilename << "HALCM_Curbe_Data_Dumps" << PLATFORM_DIR_SEPERATOR << "id_" << fileCount << ".fast.log";
109 
110         ++fileCount;
111 
112         GetLogFileLocation(curbeFilename.str().c_str(), curbeFileNamePrefix,
113                            m_cmhal->osInterface->pOsContext);
114         GetLogFileLocation(idFilename.str().c_str(), idFileNamePrefix,
115                            m_cmhal->osInterface->pOsContext);
116 
117         m_memoryBlock.Dump(curbeFileNamePrefix, m_curbeOffsetInternal, m_totalCurbeSize);
118         m_memoryBlock.Dump(idFilename.str(), m_mediaIDOffsetInternal, m_totalMediaIDSize);
119     }
120 #endif
121 }
122 
Allocate(CmKernelEx ** kernels,int count,uint32_t trackerIndex,uint32_t trackerID)123 MOS_STATUS CmMediaState::Allocate(CmKernelEx **kernels, int count, uint32_t trackerIndex, uint32_t trackerID)
124 {
125     // calculate the curbe size
126     m_curbeOffsetInternal = 0;
127     uint32_t offset = 0;
128     uint32_t totalCurbeSize = 0;
129     for (int i = 0; i < count; i++)
130     {
131         CmKernelEx *kernel = kernels[i];
132         uint32_t curbeSize = kernel->GetCurbeSize();
133         m_curbeOffsets[i] = totalCurbeSize;
134         totalCurbeSize += curbeSize;
135     }
136     m_totalCurbeSize = totalCurbeSize;
137 
138     // calculate the sampler
139     m_samplerHeapOffsetInternal = MOS_ALIGN_CEIL(m_totalCurbeSize, MHW_SAMPLER_STATE_ALIGN);
140     uint32_t totalHeapSize = 0;
141     uint32_t maxSpillSize = 0;
142     for (int i = 0; i < count; i++)
143     {
144         CmKernelEx *kernel = kernels[i];
145         uint32_t heapSize = UpdateHeapSizeAndOffsets(kernel, i);
146         m_samplerOffsets[i] = totalHeapSize;
147         totalHeapSize += heapSize;
148 
149         // get the spill size
150         maxSpillSize = MOS_MAX(maxSpillSize, kernel->GetSpillMemUsed());
151     }
152     m_totalSamplerHeapSize = totalHeapSize;
153 
154     // calculate the media id
155     m_mediaIDOffsetInternal = m_samplerHeapOffsetInternal + m_totalSamplerHeapSize;
156     m_totalMediaIDSize = count * m_mediaIDSize;
157 
158     // caculate the scratch space
159     uint32_t tempScratchOffset = m_mediaIDOffsetInternal + m_totalMediaIDSize;
160     if (maxSpillSize > 0 && (!m_cmhal->cmHalInterface->IsSeparateScratch()))
161     {
162         uint32_t perThreadScratchSpace = 1024;
163         for (perThreadScratchSpace; perThreadScratchSpace < maxSpillSize; perThreadScratchSpace <<= 1);
164 
165         // get max thread number
166         MEDIA_SYSTEM_INFO *gtSystemInfo = m_cmhal->osInterface->pfnGetGtSystemInfo(m_cmhal->osInterface);
167         uint32_t numHWThreadsPerEU = gtSystemInfo->ThreadCount / gtSystemInfo->EUCount;
168         uint32_t maxHWThreads = gtSystemInfo->MaxEuPerSubSlice * numHWThreadsPerEU * gtSystemInfo->MaxSubSlicesSupported;
169         // add additional 1k, because the offset of scratch space needs to be 1k aligned
170         m_totalScratchSpaceSize = maxHWThreads * perThreadScratchSpace + MHW_SCRATCH_SPACE_ALIGN;
171         m_scratchSizePerThread = perThreadScratchSpace;
172         // change the extend step in gdsh
173         uint32_t currentExtendSize = m_heapMgr->GetExtendSize();
174         if (currentExtendSize < m_totalScratchSpaceSize)
175         {
176             // update extend size for scratch space
177             m_heapMgr->SetExtendHeapSize(m_totalScratchSpaceSize);
178         }
179     }
180 
181     // allocate the memory block
182     uint32_t totalSize = tempScratchOffset + m_totalScratchSpaceSize;
183     PrepareMemoryBlock(totalSize, trackerIndex, trackerID);
184 
185     // adjust the offset of scratch space to be 1k aligned
186     if (maxSpillSize > 0 && (!m_cmhal->cmHalInterface->IsSeparateScratch()))
187     {
188         uint32_t scratchOffsetExternal = m_memoryBlock.GetOffset() + tempScratchOffset;
189         m_scratchSpaceOffsetExternal = MOS_ALIGN_CEIL(scratchOffsetExternal, MHW_SCRATCH_SPACE_ALIGN);
190     }
191 
192     return MOS_STATUS_SUCCESS;
193 }
194 
PrepareMemoryBlock(uint32_t size,uint32_t trackerIndex,uint32_t trackerID)195 MOS_STATUS CmMediaState::PrepareMemoryBlock(uint32_t size, uint32_t trackerIndex, uint32_t trackerID)
196 {
197     uint32_t   spaceNeeded = 0;
198     std::vector<MemoryBlock> blocks;
199     std::vector<uint32_t> blockSizes;
200     MemoryBlockManager::AcquireParams acquireParams =
201         MemoryBlockManager::AcquireParams(trackerID, blockSizes);
202     acquireParams.m_trackerIndex = trackerIndex;
203     if (blockSizes.empty())
204     {
205         blockSizes.emplace_back(size);
206     }
207     else
208     {
209         blockSizes[0] = size;
210     }
211 
212     m_heapMgr->AcquireSpace(acquireParams, blocks, spaceNeeded);
213 
214     if (blocks.empty())
215     {
216         MHW_RENDERHAL_ASSERTMESSAGE("No blocks were acquired");
217         return MOS_STATUS_UNKNOWN;
218     }
219     if (!(blocks[0].IsValid()))
220     {
221         MHW_RENDERHAL_ASSERTMESSAGE("No blocks were acquired");
222         return MOS_STATUS_UNKNOWN;
223     }
224 
225     m_memoryBlock = blocks[0];
226 
227     // zero memory block
228     m_memoryBlock.AddData(nullptr, 0, 0, true);
229 
230     m_state = _Allocated;
231     return MOS_STATUS_SUCCESS;
232 }
233 
LoadCurbe(CmKernelEx * kernel,int index)234 MOS_STATUS CmMediaState::LoadCurbe(CmKernelEx *kernel, int index)
235 {
236     return LoadCurbe(kernel->GetCurbe(), kernel->GetCurbeSize(), index);
237 }
238 
LoadCurbe(uint8_t * curbe,uint32_t size,int index)239 MOS_STATUS CmMediaState::LoadCurbe(uint8_t *curbe, uint32_t size, int index)
240 {
241     if (m_state != _Allocated)
242     {
243         MHW_RENDERHAL_ASSERTMESSAGE("Media State not allocated yet");
244         return MOS_STATUS_UNKNOWN;
245     }
246 
247     m_memoryBlock.AddData(curbe, m_curbeOffsetInternal+m_curbeOffsets[index], size);
248     return MOS_STATUS_SUCCESS;
249 }
250 
LoadMediaID(CmKernelEx * kernel,int index,uint32_t btOffset,CmThreadGroupSpace * threadGroupSpace)251 MOS_STATUS CmMediaState::LoadMediaID(CmKernelEx *kernel, int index, uint32_t btOffset, CmThreadGroupSpace *threadGroupSpace)
252 {
253     if (m_state != _Allocated)
254     {
255         MHW_RENDERHAL_ASSERTMESSAGE("Media State not allocated yet");
256         return MOS_STATUS_UNKNOWN;
257     }
258     MHW_ID_ENTRY_PARAMS params;
259     uint32_t mediaStateOffset = m_memoryBlock.GetOffset();
260 
261     // Get states, params
262     params.dwMediaIdOffset = mediaStateOffset + m_mediaIDOffsetInternal;
263     params.iMediaId = index;
264     params.dwKernelOffset = kernel->GetOffsetInIsh();
265     params.dwSamplerOffset = GetSamplerHeapOffset(index);
266     params.dwSamplerCount = MOS_MIN(4, (GetSamplerCount(index) + 3 ) / 4);
267     params.dwBindingTableOffset = btOffset;
268     params.iCurbeOffset = m_curbeOffsets[index];
269     params.iCurbeLength = kernel->GetCurbeSizePerThread();
270     if (threadGroupSpace == nullptr)
271     {
272         params.bBarrierEnable = false;
273         params.bGlobalBarrierEnable = false;    //It's only applied for BDW+
274         params.dwNumberofThreadsInGPGPUGroup = 1;
275         params.dwSharedLocalMemorySize = 0;
276         params.iCrsThdConDataRdLn = 0;
277     }
278     else
279     {
280         uint32_t threadW = 0;
281         uint32_t threadH = 0;
282         uint32_t threadD = 0;
283         uint32_t groupW = 0;
284         uint32_t groupH = 0;
285         uint32_t groupD = 0;
286         threadGroupSpace->GetThreadGroupSpaceSize(threadW,
287                                                   threadH,
288                                                   threadD,
289                                                   groupW,
290                                                   groupH,
291                                                   groupD);
292         params.bBarrierEnable = (kernel->GetBarrierMode() != CM_NO_BARRIER);
293         params.bGlobalBarrierEnable = (kernel->GetBarrierMode() == CM_GLOBAL_BARRIER);
294         params.dwNumberofThreadsInGPGPUGroup = threadW * threadH * threadD;
295         params.dwSharedLocalMemorySize =
296             m_cmhal->renderHal->pfnEncodeSLMSize(m_cmhal->renderHal, kernel->GetSLMSize());;
297         params.iCrsThdConDataRdLn = kernel->GetCurbeSizeCrossThread();
298     }
299 
300     params.memoryBlock = &m_memoryBlock;
301 
302     CM_CHK_MOSSTATUS_RETURN(m_cmhal->renderHal->pMhwStateHeap->AddInterfaceDescriptorData(&params));
303 
304     return MOS_STATUS_SUCCESS;
305 }
306 
UpdateHeapSizeAndOffsets(CmKernelEx * kernel,uint32_t kernelIdx)307 uint32_t CmMediaState::UpdateHeapSizeAndOffsets(CmKernelEx *kernel, uint32_t kernelIdx)
308 {
309     uint32_t count3D; // not include the reserved
310     uint32_t countAVS; // not include the reserved
311     uint32_t reservedCount3D = 0;
312     kernel->GetSamplerCount(&count3D, &countAVS);
313     std::map<int, void *>reservedSamplers = kernel->GetReservedSamplerBteIndex();
314 
315     m_samplerCount[kernelIdx] = count3D + countAVS + reservedSamplers.size();
316 
317     if (m_samplerCount[kernelIdx] ==0)
318     {
319         // no sampler in the kernel
320         return 0;
321     }
322 
323     // simplified the sampler allocations
324     // reserved samplers
325     // avs samplers
326     // 3d samplers
327     // 3d indirect states
328 
329     // get the area of reserved samplers
330     uint32_t reservedEnd = 0;
331     uint32_t heapSize = 0;
332     for (auto it = reservedSamplers.begin(); it != reservedSamplers.end(); it ++)
333     {
334         int bteIndex = it->first;
335         MHW_SAMPLER_STATE_PARAM *param = (MHW_SAMPLER_STATE_PARAM *)it->second;
336         uint32_t elementSize;
337         if (param->SamplerType == MHW_SAMPLER_TYPE_3D)
338         {
339             reservedCount3D ++;
340             elementSize = m_3dSamplerElementSize;
341         }
342         else
343         {
344             elementSize = m_avsSamplerElementSize;
345         }
346         uint32_t end = (bteIndex + 1) * elementSize;
347         reservedEnd = MOS_MAX(reservedEnd, end);
348     }
349     heapSize = MOS_ALIGN_CEIL(reservedEnd, MHW_SAMPLER_STATE_ALIGN);
350     m_nextAvsSamplerOffsets[kernelIdx] = heapSize;
351     heapSize += countAVS*m_avsSamplerElementSize;
352     m_next3dSamplerOffsets[kernelIdx] = heapSize;
353     heapSize += count3D*m_3dSamplerElementSize;
354     heapSize = MOS_ALIGN_CEIL(heapSize, 1 << MHW_SAMPLER_INDIRECT_SHIFT);
355     m_nextIndStateOffsets[kernelIdx] = heapSize;
356     heapSize += (reservedCount3D + count3D) * m_cmhal->renderHal->pMhwStateHeap->m_HwSizes.dwSizeSamplerIndirectState;
357     heapSize = MOS_ALIGN_CEIL(heapSize, MHW_SAMPLER_STATE_ALIGN);
358 
359     return heapSize;
360 }
361 
AddSampler(void * samplerParam,int index,int bteIndex)362 int CmMediaState::AddSampler(void *samplerParam, int index, int bteIndex)
363 {
364     uint32_t offset;
365     MHW_SAMPLER_STATE_PARAM *param = (MHW_SAMPLER_STATE_PARAM *)samplerParam;
366     uint32_t elementSize = (param->SamplerType == MHW_SAMPLER_TYPE_3D)?m_3dSamplerElementSize:m_avsSamplerElementSize;
367     if (bteIndex == -1)
368     {
369         uint32_t *nextOffset = (param->SamplerType == MHW_SAMPLER_TYPE_3D)?m_next3dSamplerOffsets:m_nextAvsSamplerOffsets;
370         offset = nextOffset[index];
371         nextOffset[index] += elementSize;
372     }
373     else
374     {
375         offset = bteIndex * elementSize;
376     }
377 
378     if (param->SamplerType == MHW_SAMPLER_TYPE_3D)
379     {
380         param->Unorm.IndirectStateOffset = m_samplerHeapOffsetInternal + m_samplerOffsets[index] + m_nextIndStateOffsets[index];
381         m_nextIndStateOffsets[index] += m_cmhal->renderHal->pMhwStateHeap->m_HwSizes.dwSizeSamplerIndirectState;
382     }
383 
384     uint32_t heapOffset = m_samplerHeapOffsetInternal + m_samplerOffsets[index] + offset;
385     MOS_STATUS ret = m_cmhal->renderHal->pMhwStateHeap->AddSamplerStateData(heapOffset, &m_memoryBlock, param);
386     if (ret != MOS_STATUS_SUCCESS)
387     {
388         return -1;
389     }
390     return offset/elementSize;
391 }
392