1 /*
2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx9addrlib.h"
35
36 #include "gfx9_gb_reg.h"
37
38 #include "amdgpu_asic_addr.h"
39
40 ////////////////////////////////////////////////////////////////////////////////////////////////////
41 ////////////////////////////////////////////////////////////////////////////////////////////////////
42
43 namespace Addr
44 {
45
46 /**
47 ************************************************************************************************************************
48 * Gfx9HwlInit
49 *
50 * @brief
51 * Creates an Gfx9Lib object.
52 *
53 * @return
54 * Returns an Gfx9Lib object pointer.
55 ************************************************************************************************************************
56 */
Gfx9HwlInit(const Client * pClient)57 Addr::Lib* Gfx9HwlInit(const Client* pClient)
58 {
59 return V2::Gfx9Lib::CreateObj(pClient);
60 }
61
62 namespace V2
63 {
64
65 ////////////////////////////////////////////////////////////////////////////////////////////////////
66 // Static Const Member
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68
69 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
70 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
71 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
72 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
73 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D
74 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_R
75
76 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
77 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
78 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D
79 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_R
80
81 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
82 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
83 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D
84 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_R
85
86 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
87 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
88 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
89 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
90
91 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_Z_T
92 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T
93 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T
94 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0}, // ADDR_SW_64KB_R_T
95
96 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_Z_x
97 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_x
98 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_x
99 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0}, // ADDR_SW_4KB_R_x
100
101 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X
102 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X
103 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X
104 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0}, // ADDR_SW_64KB_R_X
105
106 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
107 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
108 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
109 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
110 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
111 };
112
113 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};
114
115 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
116
117 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
118
119 /**
120 ************************************************************************************************************************
121 * Gfx9Lib::Gfx9Lib
122 *
123 * @brief
124 * Constructor
125 *
126 ************************************************************************************************************************
127 */
Gfx9Lib(const Client * pClient)128 Gfx9Lib::Gfx9Lib(const Client* pClient)
129 :
130 Lib(pClient)
131 {
132 memset(&m_settings, 0, sizeof(m_settings));
133 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
134 memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
135 m_metaEqOverrideIndex = 0;
136 }
137
138 /**
139 ************************************************************************************************************************
140 * Gfx9Lib::~Gfx9Lib
141 *
142 * @brief
143 * Destructor
144 ************************************************************************************************************************
145 */
~Gfx9Lib()146 Gfx9Lib::~Gfx9Lib()
147 {
148 }
149
150 /**
151 ************************************************************************************************************************
152 * Gfx9Lib::HwlComputeHtileInfo
153 *
154 * @brief
155 * Interface function stub of AddrComputeHtilenfo
156 *
157 * @return
158 * ADDR_E_RETURNCODE
159 ************************************************************************************************************************
160 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const161 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
162 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
163 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
164 ) const
165 {
166 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
167 pIn->swizzleMode);
168
169 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
170
171 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
172
173 if ((numPipeTotal == 1) && (numRbTotal == 1))
174 {
175 numCompressBlkPerMetaBlkLog2 = 10;
176 }
177 else
178 {
179 if (m_settings.applyAliasFix)
180 {
181 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
182 }
183 else
184 {
185 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
186 }
187 }
188
189 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
190
191 Dim3d metaBlkDim = {8, 8, 1};
192 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
193 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
194 UINT_32 heightAmp = totalAmpBits - widthAmp;
195 metaBlkDim.w <<= widthAmp;
196 metaBlkDim.h <<= heightAmp;
197
198 #if DEBUG
199 Dim3d metaBlkDimDbg = {8, 8, 1};
200 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
201 {
202 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
203 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
204 {
205 metaBlkDimDbg.h <<= 1;
206 }
207 else
208 {
209 metaBlkDimDbg.w <<= 1;
210 }
211 }
212 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
213 #endif
214
215 UINT_32 numMetaBlkX;
216 UINT_32 numMetaBlkY;
217 UINT_32 numMetaBlkZ;
218
219 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
220 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
221 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
222
223 const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
224 UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
225
226 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
227 {
228 align *= (numPipeTotal >> 1);
229 }
230
231 align = Max(align, metaBlkSize);
232
233 if (m_settings.metaBaseAlignFix)
234 {
235 align = Max(align, GetBlockSize(pIn->swizzleMode));
236 }
237
238 if (m_settings.htileAlignFix)
239 {
240 const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2;
241 const INT_32 htileCachelineSizeLog2 = 11;
242 const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
243
244 INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
245
246 align <<= rbMaskPadding;
247 }
248
249 pOut->pitch = numMetaBlkX * metaBlkDim.w;
250 pOut->height = numMetaBlkY * metaBlkDim.h;
251 pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize;
252
253 pOut->metaBlkWidth = metaBlkDim.w;
254 pOut->metaBlkHeight = metaBlkDim.h;
255 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
256
257 pOut->baseAlign = align;
258 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
259
260 return ADDR_OK;
261 }
262
263 /**
264 ************************************************************************************************************************
265 * Gfx9Lib::HwlComputeCmaskInfo
266 *
267 * @brief
268 * Interface function stub of AddrComputeCmaskInfo
269 *
270 * @return
271 * ADDR_E_RETURNCODE
272 ************************************************************************************************************************
273 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const274 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
275 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
276 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
277 ) const
278 {
279 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
280
281 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
282 pIn->swizzleMode);
283
284 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
285
286 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
287
288 if ((numPipeTotal == 1) && (numRbTotal == 1))
289 {
290 numCompressBlkPerMetaBlkLog2 = 13;
291 }
292 else
293 {
294 if (m_settings.applyAliasFix)
295 {
296 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
297 }
298 else
299 {
300 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
301 }
302
303 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
304 }
305
306 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
307
308 Dim2d metaBlkDim = {8, 8};
309 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
310 UINT_32 heightAmp = totalAmpBits >> 1;
311 UINT_32 widthAmp = totalAmpBits - heightAmp;
312 metaBlkDim.w <<= widthAmp;
313 metaBlkDim.h <<= heightAmp;
314
315 #if DEBUG
316 Dim2d metaBlkDimDbg = {8, 8};
317 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
318 {
319 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
320 {
321 metaBlkDimDbg.h <<= 1;
322 }
323 else
324 {
325 metaBlkDimDbg.w <<= 1;
326 }
327 }
328 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
329 #endif
330
331 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
332 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
333 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
334
335 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
336
337 if (m_settings.metaBaseAlignFix)
338 {
339 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
340 }
341
342 pOut->pitch = numMetaBlkX * metaBlkDim.w;
343 pOut->height = numMetaBlkY * metaBlkDim.h;
344 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
345 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
346 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
347
348 pOut->metaBlkWidth = metaBlkDim.w;
349 pOut->metaBlkHeight = metaBlkDim.h;
350
351 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
352
353 // Get the CMASK address equation (copied from CmaskAddrFromCoord)
354 UINT_32 fmaskBpp = GetFmaskBpp(1, 1);
355 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
356 UINT_32 metaBlkWidthLog2 = Log2(pOut->metaBlkWidth);
357 UINT_32 metaBlkHeightLog2 = Log2(pOut->metaBlkHeight);
358
359 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
360 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
361 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
362
363 CoordEq *eq = (CoordEq *)((Gfx9Lib *)this)->GetMetaEquation(metaEqParams);
364
365 // Generate the CMASK address equation.
366 pOut->equation.gfx9.num_bits = Min(32u, eq->getsize());
367 bool checked = false;
368 for (unsigned b = 0; b < pOut->equation.gfx9.num_bits; b++) {
369 CoordTerm &bit = (*eq)[b];
370
371 unsigned c;
372 for (c = 0; c < bit.getsize(); c++) {
373 Coordinate &coord = bit[c];
374 pOut->equation.gfx9.bit[b].coord[c].dim = coord.getdim();
375 pOut->equation.gfx9.bit[b].coord[c].ord = coord.getord();
376 }
377 for (; c < 5; c++)
378 pOut->equation.gfx9.bit[b].coord[c].dim = 5; /* meaning invalid */
379 }
380
381 // Reduce num_bits because DIM_M fills the rest of the bits monotonically.
382 for (int b = pOut->equation.gfx9.num_bits - 1; b >= 1; b--) {
383 CoordTerm &prev = (*eq)[b - 1];
384 CoordTerm &cur = (*eq)[b];
385
386 if (cur.getsize() == 1 && cur[0].getdim() == DIM_M &&
387 prev.getsize() == 1 && prev[0].getdim() == DIM_M &&
388 prev[0].getord() + 1 == cur[0].getord())
389 pOut->equation.gfx9.num_bits = b;
390 else
391 break;
392 }
393
394 pOut->equation.gfx9.numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
395 pIn->swizzleMode);
396
397 return ADDR_OK;
398 }
399
400 /**
401 ************************************************************************************************************************
402 * Gfx9Lib::GetMetaMipInfo
403 *
404 * @brief
405 * Get meta mip info
406 *
407 * @return
408 * N/A
409 ************************************************************************************************************************
410 */
GetMetaMipInfo(UINT_32 numMipLevels,Dim3d * pMetaBlkDim,BOOL_32 dataThick,ADDR2_META_MIP_INFO * pInfo,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 * pNumMetaBlkX,UINT_32 * pNumMetaBlkY,UINT_32 * pNumMetaBlkZ) const411 VOID Gfx9Lib::GetMetaMipInfo(
412 UINT_32 numMipLevels, ///< [in] number of mip levels
413 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
414 BOOL_32 dataThick, ///< [in] data surface is thick
415 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
416 UINT_32 mip0Width, ///< [in] mip0 width
417 UINT_32 mip0Height, ///< [in] mip0 height
418 UINT_32 mip0Depth, ///< [in] mip0 depth
419 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
420 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
421 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
422 const
423 {
424 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
425 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
426 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
427 UINT_32 tailWidth = pMetaBlkDim->w;
428 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
429 UINT_32 tailDepth = pMetaBlkDim->d;
430 BOOL_32 inTail = FALSE;
431 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
432
433 if (numMipLevels > 1)
434 {
435 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
436 {
437 // Z major
438 major = ADDR_MAJOR_Z;
439 }
440 else if (numMetaBlkX >= numMetaBlkY)
441 {
442 // X major
443 major = ADDR_MAJOR_X;
444 }
445 else
446 {
447 // Y major
448 major = ADDR_MAJOR_Y;
449 }
450
451 inTail = ((mip0Width <= tailWidth) &&
452 (mip0Height <= tailHeight) &&
453 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
454
455 if (inTail == FALSE)
456 {
457 UINT_32 orderLimit;
458 UINT_32 *pMipDim;
459 UINT_32 *pOrderDim;
460
461 if (major == ADDR_MAJOR_Z)
462 {
463 // Z major
464 pMipDim = &numMetaBlkY;
465 pOrderDim = &numMetaBlkZ;
466 orderLimit = 4;
467 }
468 else if (major == ADDR_MAJOR_X)
469 {
470 // X major
471 pMipDim = &numMetaBlkY;
472 pOrderDim = &numMetaBlkX;
473 orderLimit = 4;
474 }
475 else
476 {
477 // Y major
478 pMipDim = &numMetaBlkX;
479 pOrderDim = &numMetaBlkY;
480 orderLimit = 2;
481 }
482
483 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
484 {
485 *pMipDim += 2;
486 }
487 else
488 {
489 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
490 }
491 }
492 }
493
494 if (pInfo != NULL)
495 {
496 UINT_32 mipWidth = mip0Width;
497 UINT_32 mipHeight = mip0Height;
498 UINT_32 mipDepth = mip0Depth;
499 Dim3d mipCoord = {0};
500
501 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
502 {
503 if (inTail)
504 {
505 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
506 pMetaBlkDim);
507 break;
508 }
509 else
510 {
511 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
512 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
513 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
514
515 pInfo[mip].inMiptail = FALSE;
516 pInfo[mip].startX = mipCoord.w;
517 pInfo[mip].startY = mipCoord.h;
518 pInfo[mip].startZ = mipCoord.d;
519 pInfo[mip].width = mipWidth;
520 pInfo[mip].height = mipHeight;
521 pInfo[mip].depth = dataThick ? mipDepth : 1;
522
523 if ((mip >= 3) || (mip & 1))
524 {
525 switch (major)
526 {
527 case ADDR_MAJOR_X:
528 mipCoord.w += mipWidth;
529 break;
530 case ADDR_MAJOR_Y:
531 mipCoord.h += mipHeight;
532 break;
533 case ADDR_MAJOR_Z:
534 mipCoord.d += mipDepth;
535 break;
536 default:
537 break;
538 }
539 }
540 else
541 {
542 switch (major)
543 {
544 case ADDR_MAJOR_X:
545 mipCoord.h += mipHeight;
546 break;
547 case ADDR_MAJOR_Y:
548 mipCoord.w += mipWidth;
549 break;
550 case ADDR_MAJOR_Z:
551 mipCoord.h += mipHeight;
552 break;
553 default:
554 break;
555 }
556 }
557
558 mipWidth = Max(mipWidth >> 1, 1u);
559 mipHeight = Max(mipHeight >> 1, 1u);
560 mipDepth = Max(mipDepth >> 1, 1u);
561
562 inTail = ((mipWidth <= tailWidth) &&
563 (mipHeight <= tailHeight) &&
564 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
565 }
566 }
567 }
568
569 *pNumMetaBlkX = numMetaBlkX;
570 *pNumMetaBlkY = numMetaBlkY;
571 *pNumMetaBlkZ = numMetaBlkZ;
572 }
573
574 /**
575 ************************************************************************************************************************
576 * Gfx9Lib::HwlComputeDccInfo
577 *
578 * @brief
579 * Interface function to compute DCC key info
580 *
581 * @return
582 * ADDR_E_RETURNCODE
583 ************************************************************************************************************************
584 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const585 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
586 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
587 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
588 ) const
589 {
590 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
591 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
592 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
593
594 if (dataLinear)
595 {
596 metaLinear = TRUE;
597 }
598 else if (metaLinear == TRUE)
599 {
600 pipeAligned = FALSE;
601 }
602
603 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
604
605 if (metaLinear)
606 {
607 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
608 ADDR_ASSERT_ALWAYS();
609
610 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
611 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
612 }
613 else
614 {
615 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
616
617 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
618
619 UINT_32 numFrags = Max(pIn->numFrags, 1u);
620 UINT_32 numSlices = Max(pIn->numSlices, 1u);
621
622 minMetaBlkSize /= numFrags;
623
624 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
625
626 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
627
628 if ((numPipeTotal > 1) || (numRbTotal > 1))
629 {
630 const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
631
632 numCompressBlkPerMetaBlk =
633 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
634
635 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
636 {
637 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
638 }
639 }
640
641 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
642 Dim3d metaBlkDim = compressBlkDim;
643
644 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
645 {
646 if ((metaBlkDim.h < metaBlkDim.w) ||
647 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
648 {
649 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
650 {
651 metaBlkDim.h <<= 1;
652 }
653 else
654 {
655 metaBlkDim.d <<= 1;
656 }
657 }
658 else
659 {
660 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
661 {
662 metaBlkDim.w <<= 1;
663 }
664 else
665 {
666 metaBlkDim.d <<= 1;
667 }
668 }
669 }
670
671 UINT_32 numMetaBlkX;
672 UINT_32 numMetaBlkY;
673 UINT_32 numMetaBlkZ;
674
675 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
676 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
677 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
678
679 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
680
681 if (numFrags > m_maxCompFrag)
682 {
683 sizeAlign *= (numFrags / m_maxCompFrag);
684 }
685
686 if (m_settings.metaBaseAlignFix)
687 {
688 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
689 }
690
691 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
692 numCompressBlkPerMetaBlk * numFrags;
693 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
694 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
695
696 pOut->pitch = numMetaBlkX * metaBlkDim.w;
697 pOut->height = numMetaBlkY * metaBlkDim.h;
698 pOut->depth = numMetaBlkZ * metaBlkDim.d;
699
700 pOut->compressBlkWidth = compressBlkDim.w;
701 pOut->compressBlkHeight = compressBlkDim.h;
702 pOut->compressBlkDepth = compressBlkDim.d;
703
704 pOut->metaBlkWidth = metaBlkDim.w;
705 pOut->metaBlkHeight = metaBlkDim.h;
706 pOut->metaBlkDepth = metaBlkDim.d;
707 pOut->metaBlkSize = numCompressBlkPerMetaBlk * numFrags;
708
709 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
710 pOut->fastClearSizePerSlice =
711 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
712
713 // Get the DCC address equation (copied from DccAddrFromCoord)
714 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
715 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
716 UINT_32 metaBlkWidthLog2 = Log2(pOut->metaBlkWidth);
717 UINT_32 metaBlkHeightLog2 = Log2(pOut->metaBlkHeight);
718 UINT_32 metaBlkDepthLog2 = Log2(pOut->metaBlkDepth);
719 UINT_32 compBlkWidthLog2 = Log2(pOut->compressBlkWidth);
720 UINT_32 compBlkHeightLog2 = Log2(pOut->compressBlkHeight);
721 UINT_32 compBlkDepthLog2 = Log2(pOut->compressBlkDepth);
722
723 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
724 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
725 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
726 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
727
728 CoordEq *eq = (CoordEq *)((Gfx9Lib *)this)->GetMetaEquation(metaEqParams);
729
730 // Generate the DCC address equation.
731 pOut->equation.gfx9.num_bits = Min(32u, eq->getsize());
732 bool checked = false;
733 for (unsigned b = 0; b < pOut->equation.gfx9.num_bits; b++) {
734 CoordTerm &bit = (*eq)[b];
735
736 unsigned c;
737 for (c = 0; c < bit.getsize(); c++) {
738 Coordinate &coord = bit[c];
739 pOut->equation.gfx9.bit[b].coord[c].dim = coord.getdim();
740 pOut->equation.gfx9.bit[b].coord[c].ord = coord.getord();
741 }
742 for (; c < 5; c++)
743 pOut->equation.gfx9.bit[b].coord[c].dim = 5; /* meaning invalid */
744 }
745
746 // Reduce num_bits because DIM_M fills the rest of the bits monotonically.
747 for (int b = pOut->equation.gfx9.num_bits - 1; b >= 1; b--) {
748 CoordTerm &prev = (*eq)[b - 1];
749 CoordTerm &cur = (*eq)[b];
750
751 if (cur.getsize() == 1 && cur[0].getdim() == DIM_M &&
752 prev.getsize() == 1 && prev[0].getdim() == DIM_M &&
753 prev[0].getord() + 1 == cur[0].getord())
754 pOut->equation.gfx9.num_bits = b;
755 else
756 break;
757 }
758
759 pOut->equation.gfx9.numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
760 pIn->swizzleMode);
761 }
762
763 return ADDR_OK;
764 }
765
766 /**
767 ************************************************************************************************************************
768 * Gfx9Lib::HwlComputeMaxBaseAlignments
769 *
770 * @brief
771 * Gets maximum alignments
772 * @return
773 * maximum alignments
774 ************************************************************************************************************************
775 */
HwlComputeMaxBaseAlignments() const776 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
777 {
778 return Size64K;
779 }
780
781 /**
782 ************************************************************************************************************************
783 * Gfx9Lib::HwlComputeMaxMetaBaseAlignments
784 *
785 * @brief
786 * Gets maximum alignments for metadata
787 * @return
788 * maximum alignments for metadata
789 ************************************************************************************************************************
790 */
HwlComputeMaxMetaBaseAlignments() const791 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
792 {
793 // Max base alignment for Htile
794 const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
795 const UINT_32 maxNumRbTotal = m_se * m_rbPerSe;
796
797 // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
798 // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
799 ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
800 const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
801
802 UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
803
804 if (maxNumPipeTotal > 2)
805 {
806 maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
807 }
808
809 maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
810
811 if (m_settings.metaBaseAlignFix)
812 {
813 maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
814 }
815
816 if (m_settings.htileAlignFix)
817 {
818 maxBaseAlignHtile *= maxNumPipeTotal;
819 }
820
821 // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
822
823 // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
824 UINT_32 maxBaseAlignDcc3D = 65536;
825
826 if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
827 {
828 maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
829 }
830
831 // Max base alignment for Msaa Dcc
832 UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
833
834 if (m_settings.metaBaseAlignFix)
835 {
836 maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
837 }
838
839 return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
840 }
841
842 /**
843 ************************************************************************************************************************
844 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
845 *
846 * @brief
847 * Interface function stub of AddrComputeCmaskAddrFromCoord
848 *
849 * @return
850 * ADDR_E_RETURNCODE
851 ************************************************************************************************************************
852 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)853 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
854 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
855 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
856 {
857 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
858 input.size = sizeof(input);
859 input.cMaskFlags = pIn->cMaskFlags;
860 input.colorFlags = pIn->colorFlags;
861 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
862 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
863 input.numSlices = Max(pIn->numSlices, 1u);
864 input.swizzleMode = pIn->swizzleMode;
865 input.resourceType = pIn->resourceType;
866
867 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
868 output.size = sizeof(output);
869
870 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
871
872 if (returnCode == ADDR_OK)
873 {
874 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
875 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
876 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
877 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
878
879 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
880 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
881 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
882
883 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
884
885 UINT_32 xb = pIn->x / output.metaBlkWidth;
886 UINT_32 yb = pIn->y / output.metaBlkHeight;
887 UINT_32 zb = pIn->slice;
888
889 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
890 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
891 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
892
893 UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, 0, blockIndex};
894 UINT_64 address = pMetaEq->solve(coords);
895
896 pOut->addr = address >> 1;
897 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
898
899
900 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
901 pIn->swizzleMode);
902
903 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
904
905 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
906 }
907
908 return returnCode;
909 }
910
911 /**
912 ************************************************************************************************************************
913 * Gfx9Lib::HwlComputeHtileAddrFromCoord
914 *
915 * @brief
916 * Interface function stub of AddrComputeHtileAddrFromCoord
917 *
918 * @return
919 * ADDR_E_RETURNCODE
920 ************************************************************************************************************************
921 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)922 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
923 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
924 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
925 {
926 ADDR_E_RETURNCODE returnCode = ADDR_OK;
927
928 if (pIn->numMipLevels > 1)
929 {
930 returnCode = ADDR_NOTIMPLEMENTED;
931 }
932 else
933 {
934 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
935 input.size = sizeof(input);
936 input.hTileFlags = pIn->hTileFlags;
937 input.depthFlags = pIn->depthflags;
938 input.swizzleMode = pIn->swizzleMode;
939 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
940 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
941 input.numSlices = Max(pIn->numSlices, 1u);
942 input.numMipLevels = Max(pIn->numMipLevels, 1u);
943
944 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
945 output.size = sizeof(output);
946
947 returnCode = ComputeHtileInfo(&input, &output);
948
949 if (returnCode == ADDR_OK)
950 {
951 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
952 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
953 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
954 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
955
956 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
957 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
958 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
959
960 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
961
962 UINT_32 xb = pIn->x / output.metaBlkWidth;
963 UINT_32 yb = pIn->y / output.metaBlkHeight;
964 UINT_32 zb = pIn->slice;
965
966 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
967 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
968 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
969
970 UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, 0, blockIndex};
971 UINT_64 address = pMetaEq->solve(coords);
972
973 pOut->addr = address >> 1;
974
975 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
976 pIn->swizzleMode);
977
978 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
979
980 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
981 }
982 }
983
984 return returnCode;
985 }
986
987 /**
988 ************************************************************************************************************************
989 * Gfx9Lib::HwlComputeHtileCoordFromAddr
990 *
991 * @brief
992 * Interface function stub of AddrComputeHtileCoordFromAddr
993 *
994 * @return
995 * ADDR_E_RETURNCODE
996 ************************************************************************************************************************
997 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)998 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
999 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
1000 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
1001 {
1002 ADDR_E_RETURNCODE returnCode = ADDR_OK;
1003
1004 if (pIn->numMipLevels > 1)
1005 {
1006 returnCode = ADDR_NOTIMPLEMENTED;
1007 }
1008 else
1009 {
1010 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
1011 input.size = sizeof(input);
1012 input.hTileFlags = pIn->hTileFlags;
1013 input.swizzleMode = pIn->swizzleMode;
1014 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
1015 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
1016 input.numSlices = Max(pIn->numSlices, 1u);
1017 input.numMipLevels = Max(pIn->numMipLevels, 1u);
1018
1019 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
1020 output.size = sizeof(output);
1021
1022 returnCode = ComputeHtileInfo(&input, &output);
1023
1024 if (returnCode == ADDR_OK)
1025 {
1026 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
1027 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
1028 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1029 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
1030
1031 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
1032 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
1033 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
1034
1035 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1036
1037 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
1038 pIn->swizzleMode);
1039
1040 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1041
1042 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
1043
1044 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
1045 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1046
1047 UINT_32 coords[NUM_DIMS];
1048 pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, coords);
1049
1050 pOut->slice = coords[DIM_M] / sliceSizeInBlock;
1051 pOut->y = ((coords[DIM_M] % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + coords[DIM_Y];
1052 pOut->x = (coords[DIM_M] % pitchInBlock) * output.metaBlkWidth + coords[DIM_X];
1053 }
1054 }
1055
1056 return returnCode;
1057 }
1058
1059 /**
1060 ************************************************************************************************************************
1061 * Gfx9Lib::HwlSupportComputeDccAddrFromCoord
1062 *
1063 * @brief
1064 * Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
1065 *
1066 * @return
1067 * ADDR_E_RETURNCODE
1068 ************************************************************************************************************************
1069 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)1070 ADDR_E_RETURNCODE Gfx9Lib::HwlSupportComputeDccAddrFromCoord(
1071 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
1072 {
1073 ADDR_E_RETURNCODE returnCode = ADDR_OK;
1074
1075 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
1076 {
1077 returnCode = ADDR_NOTSUPPORTED;
1078 }
1079 else if ((pIn->pitch == 0) ||
1080 (pIn->height == 0) ||
1081 (pIn->compressBlkWidth == 0) ||
1082 (pIn->compressBlkHeight == 0) ||
1083 (pIn->compressBlkDepth == 0) ||
1084 (pIn->metaBlkWidth == 0) ||
1085 (pIn->metaBlkHeight == 0) ||
1086 (pIn->metaBlkDepth == 0) ||
1087 (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
1088 {
1089 returnCode = ADDR_NOTSUPPORTED;
1090 }
1091
1092 return returnCode;
1093 }
1094
1095 /**
1096 ************************************************************************************************************************
1097 * Gfx9Lib::HwlComputeDccAddrFromCoord
1098 *
1099 * @brief
1100 * Interface function stub of AddrComputeDccAddrFromCoord
1101 *
1102 * @return
1103 * N/A
1104 ************************************************************************************************************************
1105 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)1106 VOID Gfx9Lib::HwlComputeDccAddrFromCoord(
1107 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
1108 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
1109 {
1110 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
1111 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
1112 UINT_32 metaBlkWidthLog2 = Log2(pIn->metaBlkWidth);
1113 UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
1114 UINT_32 metaBlkDepthLog2 = Log2(pIn->metaBlkDepth);
1115 UINT_32 compBlkWidthLog2 = Log2(pIn->compressBlkWidth);
1116 UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
1117 UINT_32 compBlkDepthLog2 = Log2(pIn->compressBlkDepth);
1118
1119 MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1120 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1121 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1122 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1123
1124 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1125
1126 UINT_32 xb = pIn->x / pIn->metaBlkWidth;
1127 UINT_32 yb = pIn->y / pIn->metaBlkHeight;
1128 UINT_32 zb = pIn->slice / pIn->metaBlkDepth;
1129
1130 UINT_32 pitchInBlock = pIn->pitch / pIn->metaBlkWidth;
1131 UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
1132 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1133
1134 UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex};
1135 UINT_64 address = pMetaEq->solve(coords);
1136
1137 pOut->addr = address >> 1;
1138
1139 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1140 pIn->swizzleMode);
1141
1142 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1143
1144 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1145 }
1146
1147 /**
1148 ************************************************************************************************************************
1149 * Gfx9Lib::HwlInitGlobalParams
1150 *
1151 * @brief
1152 * Initializes global parameters
1153 *
1154 * @return
1155 * TRUE if all settings are valid
1156 *
1157 ************************************************************************************************************************
1158 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)1159 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1160 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1161 {
1162 BOOL_32 valid = TRUE;
1163
1164 if (m_settings.isArcticIsland)
1165 {
1166 GB_ADDR_CONFIG_GFX9 gbAddrConfig;
1167
1168 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1169
1170 // These values are copied from CModel code
1171 switch (gbAddrConfig.bits.NUM_PIPES)
1172 {
1173 case ADDR_CONFIG_1_PIPE:
1174 m_pipes = 1;
1175 m_pipesLog2 = 0;
1176 break;
1177 case ADDR_CONFIG_2_PIPE:
1178 m_pipes = 2;
1179 m_pipesLog2 = 1;
1180 break;
1181 case ADDR_CONFIG_4_PIPE:
1182 m_pipes = 4;
1183 m_pipesLog2 = 2;
1184 break;
1185 case ADDR_CONFIG_8_PIPE:
1186 m_pipes = 8;
1187 m_pipesLog2 = 3;
1188 break;
1189 case ADDR_CONFIG_16_PIPE:
1190 m_pipes = 16;
1191 m_pipesLog2 = 4;
1192 break;
1193 case ADDR_CONFIG_32_PIPE:
1194 m_pipes = 32;
1195 m_pipesLog2 = 5;
1196 break;
1197 default:
1198 ADDR_ASSERT_ALWAYS();
1199 break;
1200 }
1201
1202 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1203 {
1204 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1205 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1206 m_pipeInterleaveLog2 = 8;
1207 break;
1208 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1209 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1210 m_pipeInterleaveLog2 = 9;
1211 break;
1212 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1213 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1214 m_pipeInterleaveLog2 = 10;
1215 break;
1216 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1217 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1218 m_pipeInterleaveLog2 = 11;
1219 break;
1220 default:
1221 ADDR_ASSERT_ALWAYS();
1222 break;
1223 }
1224
1225 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1226 // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1227 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1228
1229 switch (gbAddrConfig.bits.NUM_BANKS)
1230 {
1231 case ADDR_CONFIG_1_BANK:
1232 m_banks = 1;
1233 m_banksLog2 = 0;
1234 break;
1235 case ADDR_CONFIG_2_BANK:
1236 m_banks = 2;
1237 m_banksLog2 = 1;
1238 break;
1239 case ADDR_CONFIG_4_BANK:
1240 m_banks = 4;
1241 m_banksLog2 = 2;
1242 break;
1243 case ADDR_CONFIG_8_BANK:
1244 m_banks = 8;
1245 m_banksLog2 = 3;
1246 break;
1247 case ADDR_CONFIG_16_BANK:
1248 m_banks = 16;
1249 m_banksLog2 = 4;
1250 break;
1251 default:
1252 ADDR_ASSERT_ALWAYS();
1253 break;
1254 }
1255
1256 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1257 {
1258 case ADDR_CONFIG_1_SHADER_ENGINE:
1259 m_se = 1;
1260 m_seLog2 = 0;
1261 break;
1262 case ADDR_CONFIG_2_SHADER_ENGINE:
1263 m_se = 2;
1264 m_seLog2 = 1;
1265 break;
1266 case ADDR_CONFIG_4_SHADER_ENGINE:
1267 m_se = 4;
1268 m_seLog2 = 2;
1269 break;
1270 case ADDR_CONFIG_8_SHADER_ENGINE:
1271 m_se = 8;
1272 m_seLog2 = 3;
1273 break;
1274 default:
1275 ADDR_ASSERT_ALWAYS();
1276 break;
1277 }
1278
1279 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1280 {
1281 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1282 m_rbPerSe = 1;
1283 m_rbPerSeLog2 = 0;
1284 break;
1285 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1286 m_rbPerSe = 2;
1287 m_rbPerSeLog2 = 1;
1288 break;
1289 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1290 m_rbPerSe = 4;
1291 m_rbPerSeLog2 = 2;
1292 break;
1293 default:
1294 ADDR_ASSERT_ALWAYS();
1295 break;
1296 }
1297
1298 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1299 {
1300 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1301 m_maxCompFrag = 1;
1302 m_maxCompFragLog2 = 0;
1303 break;
1304 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1305 m_maxCompFrag = 2;
1306 m_maxCompFragLog2 = 1;
1307 break;
1308 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1309 m_maxCompFrag = 4;
1310 m_maxCompFragLog2 = 2;
1311 break;
1312 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1313 m_maxCompFrag = 8;
1314 m_maxCompFragLog2 = 3;
1315 break;
1316 default:
1317 ADDR_ASSERT_ALWAYS();
1318 break;
1319 }
1320
1321 if ((m_rbPerSeLog2 == 1) &&
1322 (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1323 ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1324 {
1325 ADDR_ASSERT(m_settings.isVega10 == FALSE);
1326
1327 ADDR_ASSERT(m_settings.isRaven == FALSE);
1328
1329 ADDR_ASSERT(m_settings.isVega20 == FALSE);
1330
1331 if (m_settings.isVega12)
1332 {
1333 m_settings.htileCacheRbConflict = 1;
1334 }
1335 }
1336
1337 // For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
1338 m_blockVarSizeLog2 = 0;
1339 }
1340 else
1341 {
1342 valid = FALSE;
1343 ADDR_NOT_IMPLEMENTED();
1344 }
1345
1346 if (valid)
1347 {
1348 InitEquationTable();
1349 }
1350
1351 return valid;
1352 }
1353
1354 /**
1355 ************************************************************************************************************************
1356 * Gfx9Lib::HwlConvertChipFamily
1357 *
1358 * @brief
1359 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1360 * @return
1361 * ChipFamily
1362 ************************************************************************************************************************
1363 */
HwlConvertChipFamily(UINT_32 uChipFamily,UINT_32 uChipRevision)1364 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1365 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1366 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1367 {
1368 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1369
1370 switch (uChipFamily)
1371 {
1372 case FAMILY_AI:
1373 m_settings.isArcticIsland = 1;
1374 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1375 m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1376 m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1377 m_settings.isDce12 = 1;
1378
1379 if (m_settings.isVega10 == 0)
1380 {
1381 m_settings.htileAlignFix = 1;
1382 m_settings.applyAliasFix = 1;
1383 }
1384
1385 m_settings.metaBaseAlignFix = 1;
1386
1387 m_settings.depthPipeXorDisable = 1;
1388 break;
1389 case FAMILY_RV:
1390 m_settings.isArcticIsland = 1;
1391
1392 if (ASICREV_IS_RAVEN(uChipRevision))
1393 {
1394 m_settings.isRaven = 1;
1395
1396 m_settings.depthPipeXorDisable = 1;
1397 }
1398
1399 if (ASICREV_IS_RAVEN2(uChipRevision))
1400 {
1401 m_settings.isRaven = 1;
1402 }
1403
1404 if (m_settings.isRaven == 0)
1405 {
1406 m_settings.htileAlignFix = 1;
1407 m_settings.applyAliasFix = 1;
1408 }
1409
1410 m_settings.isDcn1 = m_settings.isRaven;
1411
1412 if (ASICREV_IS_RENOIR(uChipRevision))
1413 {
1414 m_settings.isRaven = 1;
1415 m_settings.isDcn2 = 1;
1416 }
1417
1418 m_settings.metaBaseAlignFix = 1;
1419 break;
1420
1421 default:
1422 ADDR_ASSERT(!"No Chip found");
1423 break;
1424 }
1425
1426 return family;
1427 }
1428
1429 /**
1430 ************************************************************************************************************************
1431 * Gfx9Lib::InitRbEquation
1432 *
1433 * @brief
1434 * Init RB equation
1435 * @return
1436 * N/A
1437 ************************************************************************************************************************
1438 */
GetRbEquation(CoordEq * pRbEq,UINT_32 numRbPerSeLog2,UINT_32 numSeLog2) const1439 VOID Gfx9Lib::GetRbEquation(
1440 CoordEq* pRbEq, ///< [out] rb equation
1441 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1442 UINT_32 numSeLog2) ///< [in] number of shader engine
1443 const
1444 {
1445 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1446 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1447 Coordinate cx(DIM_X, rbRegion);
1448 Coordinate cy(DIM_Y, rbRegion);
1449
1450 UINT_32 start = 0;
1451 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1452
1453 // Clear the rb equation
1454 pRbEq->resize(0);
1455 pRbEq->resize(numRbTotalLog2);
1456
1457 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1458 {
1459 // Special case when more than 1 SE, and 2 RB per SE
1460 (*pRbEq)[0].add(cx);
1461 (*pRbEq)[0].add(cy);
1462 cx++;
1463 cy++;
1464
1465 if (m_settings.applyAliasFix == false)
1466 {
1467 (*pRbEq)[0].add(cy);
1468 }
1469
1470 (*pRbEq)[0].add(cy);
1471 start++;
1472 }
1473
1474 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1475
1476 for (UINT_32 i = 0; i < numBits; i++)
1477 {
1478 UINT_32 idx =
1479 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1480
1481 if ((i % 2) == 1)
1482 {
1483 (*pRbEq)[idx].add(cx);
1484 cx++;
1485 }
1486 else
1487 {
1488 (*pRbEq)[idx].add(cy);
1489 cy++;
1490 }
1491 }
1492 }
1493
1494 /**
1495 ************************************************************************************************************************
1496 * Gfx9Lib::GetDataEquation
1497 *
1498 * @brief
1499 * Get data equation for fmask and Z
1500 * @return
1501 * N/A
1502 ************************************************************************************************************************
1503 */
GetDataEquation(CoordEq * pDataEq,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2) const1504 VOID Gfx9Lib::GetDataEquation(
1505 CoordEq* pDataEq, ///< [out] data surface equation
1506 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1507 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1508 AddrResourceType resourceType, ///< [in] data surface resource type
1509 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1510 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1511 const
1512 {
1513 Coordinate cx(DIM_X, 0);
1514 Coordinate cy(DIM_Y, 0);
1515 Coordinate cz(DIM_Z, 0);
1516 Coordinate cs(DIM_S, 0);
1517
1518 // Clear the equation
1519 pDataEq->resize(0);
1520 pDataEq->resize(27);
1521
1522 if (dataSurfaceType == Gfx9DataColor)
1523 {
1524 if (IsLinear(swizzleMode))
1525 {
1526 Coordinate cm(DIM_M, 0);
1527
1528 pDataEq->resize(49);
1529
1530 for (UINT_32 i = 0; i < 49; i++)
1531 {
1532 (*pDataEq)[i].add(cm);
1533 cm++;
1534 }
1535 }
1536 else if (IsThick(resourceType, swizzleMode))
1537 {
1538 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1539 UINT_32 i;
1540 if (IsStandardSwizzle(resourceType, swizzleMode))
1541 {
1542 // Standard 3d swizzle
1543 // Fill in bottom x bits
1544 for (i = elementBytesLog2; i < 4; i++)
1545 {
1546 (*pDataEq)[i].add(cx);
1547 cx++;
1548 }
1549 // Fill in 2 bits of y and then z
1550 for (i = 4; i < 6; i++)
1551 {
1552 (*pDataEq)[i].add(cy);
1553 cy++;
1554 }
1555 for (i = 6; i < 8; i++)
1556 {
1557 (*pDataEq)[i].add(cz);
1558 cz++;
1559 }
1560 if (elementBytesLog2 < 2)
1561 {
1562 // fill in z & y bit
1563 (*pDataEq)[8].add(cz);
1564 (*pDataEq)[9].add(cy);
1565 cz++;
1566 cy++;
1567 }
1568 else if (elementBytesLog2 == 2)
1569 {
1570 // fill in y and x bit
1571 (*pDataEq)[8].add(cy);
1572 (*pDataEq)[9].add(cx);
1573 cy++;
1574 cx++;
1575 }
1576 else
1577 {
1578 // fill in 2 x bits
1579 (*pDataEq)[8].add(cx);
1580 cx++;
1581 (*pDataEq)[9].add(cx);
1582 cx++;
1583 }
1584 }
1585 else
1586 {
1587 // Z 3d swizzle
1588 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1589 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1590 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1591 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1592 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1593 {
1594 (*pDataEq)[i].add(cz);
1595 cz++;
1596 }
1597 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1598 {
1599 // add an x and z
1600 (*pDataEq)[6].add(cx);
1601 (*pDataEq)[7].add(cz);
1602 cx++;
1603 cz++;
1604 }
1605 else if (elementBytesLog2 == 2)
1606 {
1607 // add a y and z
1608 (*pDataEq)[6].add(cy);
1609 (*pDataEq)[7].add(cz);
1610 cy++;
1611 cz++;
1612 }
1613 // add y and x
1614 (*pDataEq)[8].add(cy);
1615 (*pDataEq)[9].add(cx);
1616 cy++;
1617 cx++;
1618 }
1619 // Fill in bit 10 and up
1620 pDataEq->mort3d( cz, cy, cx, 10 );
1621 }
1622 else if (IsThin(resourceType, swizzleMode))
1623 {
1624 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1625 // Color 2D
1626 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1627 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1628 UINT_32 i;
1629 // Fill in bottom x bits
1630 for (i = elementBytesLog2; i < 4; i++)
1631 {
1632 (*pDataEq)[i].add(cx);
1633 cx++;
1634 }
1635 // Fill in bottom y bits
1636 for (i = 4; i < 4 + microYBits; i++)
1637 {
1638 (*pDataEq)[i].add(cy);
1639 cy++;
1640 }
1641 // Fill in last of the micro_x bits
1642 for (i = 4 + microYBits; i < 8; i++)
1643 {
1644 (*pDataEq)[i].add(cx);
1645 cx++;
1646 }
1647 // Fill in x/y bits below sample split
1648 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1649 // Fill in sample bits
1650 for (i = 0; i < numSamplesLog2; i++)
1651 {
1652 cs.set(DIM_S, i);
1653 (*pDataEq)[tileSplitStart + i].add(cs);
1654 }
1655 // Fill in x/y bits above sample split
1656 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1657 {
1658 pDataEq->mort2d(cx, cy, blockSizeLog2);
1659 }
1660 else
1661 {
1662 pDataEq->mort2d(cy, cx, blockSizeLog2);
1663 }
1664 }
1665 else
1666 {
1667 ADDR_ASSERT_ALWAYS();
1668 }
1669 }
1670 else
1671 {
1672 // Fmask or depth
1673 UINT_32 sampleStart = elementBytesLog2;
1674 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1675 UINT_32 ymajStart = 6 + numSamplesLog2;
1676
1677 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1678 {
1679 cs.set(DIM_S, s);
1680 (*pDataEq)[sampleStart + s].add(cs);
1681 }
1682
1683 // Put in the x-major order pixel bits
1684 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1685 // Put in the y-major order pixel bits
1686 pDataEq->mort2d(cy, cx, ymajStart);
1687 }
1688 }
1689
1690 /**
1691 ************************************************************************************************************************
1692 * Gfx9Lib::GetPipeEquation
1693 *
1694 * @brief
1695 * Get pipe equation
1696 * @return
1697 * N/A
1698 ************************************************************************************************************************
1699 */
GetPipeEquation(CoordEq * pPipeEq,CoordEq * pDataEq,UINT_32 pipeInterleaveLog2,UINT_32 numPipeLog2,UINT_32 numSamplesLog2,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType) const1700 VOID Gfx9Lib::GetPipeEquation(
1701 CoordEq* pPipeEq, ///< [out] pipe equation
1702 CoordEq* pDataEq, ///< [in] data equation
1703 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1704 UINT_32 numPipeLog2, ///< [in] number of pipes
1705 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1706 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1707 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1708 AddrResourceType resourceType ///< [in] data surface resource type
1709 ) const
1710 {
1711 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1712 CoordEq dataEq;
1713
1714 pDataEq->copy(dataEq);
1715
1716 if (dataSurfaceType == Gfx9DataColor)
1717 {
1718 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1719 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1720 }
1721
1722 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1723
1724 // This section should only apply to z/stencil, maybe fmask
1725 // If the pipe bit is below the comp block size,
1726 // then keep moving up the address until we find a bit that is above
1727 UINT_32 pipeStart = 0;
1728
1729 if (dataSurfaceType != Gfx9DataColor)
1730 {
1731 Coordinate tileMin(DIM_X, 3);
1732
1733 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1734 {
1735 pipeStart++;
1736 }
1737
1738 // if pipe is 0, then the first pipe bit is above the comp block size,
1739 // so we don't need to do anything
1740 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1741 // we will get the same pipe equation
1742 if (pipeStart != 0)
1743 {
1744 for (UINT_32 i = 0; i < numPipeLog2; i++)
1745 {
1746 // Copy the jth bit above pipe interleave to the current pipe equation bit
1747 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1748 }
1749 }
1750 }
1751
1752 if (IsPrt(swizzleMode))
1753 {
1754 // Clear out bits above the block size if prt's are enabled
1755 dataEq.resize(blockSizeLog2);
1756 dataEq.resize(48);
1757 }
1758
1759 if (IsXor(swizzleMode))
1760 {
1761 CoordEq xorMask;
1762
1763 if (IsThick(resourceType, swizzleMode))
1764 {
1765 CoordEq xorMask2;
1766
1767 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1768
1769 xorMask.resize(numPipeLog2);
1770
1771 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1772 {
1773 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1774 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1775 }
1776 }
1777 else
1778 {
1779 // Xor in the bits above the pipe+gpu bits
1780 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1781
1782 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1783 {
1784 Coordinate co;
1785 CoordEq xorMask2;
1786 // if 1xaa and not prt, then xor in the z bits
1787 xorMask2.resize(0);
1788 xorMask2.resize(numPipeLog2);
1789 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1790 {
1791 co.set(DIM_Z, numPipeLog2 - 1 - pipeIdx);
1792 xorMask2[pipeIdx].add(co);
1793 }
1794
1795 pPipeEq->xorin(xorMask2);
1796 }
1797 }
1798
1799 xorMask.reverse();
1800 pPipeEq->xorin(xorMask);
1801 }
1802 }
1803 /**
1804 ************************************************************************************************************************
1805 * Gfx9Lib::GetMetaEquation
1806 *
1807 * @brief
1808 * Get meta equation for cmask/htile/DCC
1809 * @return
1810 * Pointer to a calculated meta equation
1811 ************************************************************************************************************************
1812 */
GetMetaEquation(const MetaEqParams & metaEqParams)1813 const CoordEq* Gfx9Lib::GetMetaEquation(
1814 const MetaEqParams& metaEqParams)
1815 {
1816 UINT_32 cachedMetaEqIndex;
1817
1818 for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1819 {
1820 if (memcmp(&metaEqParams,
1821 &m_cachedMetaEqKey[cachedMetaEqIndex],
1822 static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1823 {
1824 break;
1825 }
1826 }
1827
1828 CoordEq* pMetaEq = NULL;
1829
1830 if (cachedMetaEqIndex < MaxCachedMetaEq)
1831 {
1832 pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1833 }
1834 else
1835 {
1836 m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1837
1838 pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1839
1840 m_metaEqOverrideIndex %= MaxCachedMetaEq;
1841
1842 GenMetaEquation(pMetaEq,
1843 metaEqParams.maxMip,
1844 metaEqParams.elementBytesLog2,
1845 metaEqParams.numSamplesLog2,
1846 metaEqParams.metaFlag,
1847 metaEqParams.dataSurfaceType,
1848 metaEqParams.swizzleMode,
1849 metaEqParams.resourceType,
1850 metaEqParams.metaBlkWidthLog2,
1851 metaEqParams.metaBlkHeightLog2,
1852 metaEqParams.metaBlkDepthLog2,
1853 metaEqParams.compBlkWidthLog2,
1854 metaEqParams.compBlkHeightLog2,
1855 metaEqParams.compBlkDepthLog2);
1856 }
1857
1858 return pMetaEq;
1859 }
1860
1861 /**
1862 ************************************************************************************************************************
1863 * Gfx9Lib::GenMetaEquation
1864 *
1865 * @brief
1866 * Get meta equation for cmask/htile/DCC
1867 * @return
1868 * N/A
1869 ************************************************************************************************************************
1870 */
GenMetaEquation(CoordEq * pMetaEq,UINT_32 maxMip,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2,ADDR2_META_FLAGS metaFlag,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 metaBlkWidthLog2,UINT_32 metaBlkHeightLog2,UINT_32 metaBlkDepthLog2,UINT_32 compBlkWidthLog2,UINT_32 compBlkHeightLog2,UINT_32 compBlkDepthLog2) const1871 VOID Gfx9Lib::GenMetaEquation(
1872 CoordEq* pMetaEq, ///< [out] meta equation
1873 UINT_32 maxMip, ///< [in] max mip Id
1874 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1875 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1876 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1877 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1878 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1879 AddrResourceType resourceType, ///< [in] data surface resource type
1880 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1881 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1882 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1883 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1884 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1885 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1886 const
1887 {
1888 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1889 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1890
1891 // Get the correct data address and rb equation
1892 CoordEq dataEq;
1893 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1894 elementBytesLog2, numSamplesLog2);
1895
1896 // Get pipe and rb equations
1897 CoordEq pipeEquation;
1898 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1899 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1900 numPipeTotalLog2 = pipeEquation.getsize();
1901
1902 if (metaFlag.linear)
1903 {
1904 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1905 ADDR_ASSERT_ALWAYS();
1906
1907 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1908
1909 dataEq.copy(*pMetaEq);
1910
1911 if (IsLinear(swizzleMode))
1912 {
1913 if (metaFlag.pipeAligned)
1914 {
1915 // Remove the pipe bits
1916 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1917 pMetaEq->shift(-shift, pipeInterleaveLog2);
1918 }
1919 // Divide by comp block size, which for linear (which is always color) is 256 B
1920 pMetaEq->shift(-8);
1921
1922 if (metaFlag.pipeAligned)
1923 {
1924 // Put pipe bits back in
1925 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1926
1927 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1928 {
1929 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1930 }
1931 }
1932 }
1933
1934 pMetaEq->shift(1);
1935 }
1936 else
1937 {
1938 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1939 UINT_32 compFragLog2 =
1940 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1941 maxCompFragLog2 : numSamplesLog2;
1942
1943 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1944
1945 // Make sure the metaaddr is cleared
1946 pMetaEq->resize(0);
1947 pMetaEq->resize(27);
1948
1949 if (IsThick(resourceType, swizzleMode))
1950 {
1951 Coordinate cx(DIM_X, 0);
1952 Coordinate cy(DIM_Y, 0);
1953 Coordinate cz(DIM_Z, 0);
1954
1955 if (maxMip > 0)
1956 {
1957 pMetaEq->mort3d(cy, cx, cz);
1958 }
1959 else
1960 {
1961 pMetaEq->mort3d(cx, cy, cz);
1962 }
1963 }
1964 else
1965 {
1966 Coordinate cx(DIM_X, 0);
1967 Coordinate cy(DIM_Y, 0);
1968 Coordinate cs;
1969
1970 if (maxMip > 0)
1971 {
1972 pMetaEq->mort2d(cy, cx, compFragLog2);
1973 }
1974 else
1975 {
1976 pMetaEq->mort2d(cx, cy, compFragLog2);
1977 }
1978
1979 //------------------------------------------------------------------------------------------------------------------------
1980 // Put the compressible fragments at the lsb
1981 // the uncompressible frags will be at the msb of the micro address
1982 //------------------------------------------------------------------------------------------------------------------------
1983 for (UINT_32 s = 0; s < compFragLog2; s++)
1984 {
1985 cs.set(DIM_S, s);
1986 (*pMetaEq)[s].add(cs);
1987 }
1988 }
1989
1990 // Keep a copy of the pipe equations
1991 CoordEq origPipeEquation;
1992 pipeEquation.copy(origPipeEquation);
1993
1994 Coordinate co;
1995 // filter out everything under the compressed block size
1996 co.set(DIM_X, compBlkWidthLog2);
1997 pMetaEq->Filter('<', co, 0, DIM_X);
1998 co.set(DIM_Y, compBlkHeightLog2);
1999 pMetaEq->Filter('<', co, 0, DIM_Y);
2000 co.set(DIM_Z, compBlkDepthLog2);
2001 pMetaEq->Filter('<', co, 0, DIM_Z);
2002
2003 // For non-color, filter out sample bits
2004 if (dataSurfaceType != Gfx9DataColor)
2005 {
2006 co.set(DIM_X, 0);
2007 pMetaEq->Filter('<', co, 0, DIM_S);
2008 }
2009
2010 // filter out everything above the metablock size
2011 co.set(DIM_X, metaBlkWidthLog2 - 1);
2012 pMetaEq->Filter('>', co, 0, DIM_X);
2013 co.set(DIM_Y, metaBlkHeightLog2 - 1);
2014 pMetaEq->Filter('>', co, 0, DIM_Y);
2015 co.set(DIM_Z, metaBlkDepthLog2 - 1);
2016 pMetaEq->Filter('>', co, 0, DIM_Z);
2017
2018 // filter out everything above the metablock size for the channel bits
2019 co.set(DIM_X, metaBlkWidthLog2 - 1);
2020 pipeEquation.Filter('>', co, 0, DIM_X);
2021 co.set(DIM_Y, metaBlkHeightLog2 - 1);
2022 pipeEquation.Filter('>', co, 0, DIM_Y);
2023 co.set(DIM_Z, metaBlkDepthLog2 - 1);
2024 pipeEquation.Filter('>', co, 0, DIM_Z);
2025
2026 // Make sure we still have the same number of channel bits
2027 if (pipeEquation.getsize() != numPipeTotalLog2)
2028 {
2029 ADDR_ASSERT_ALWAYS();
2030 }
2031
2032 // Loop through all channel and rb bits,
2033 // and make sure these components exist in the metadata address
2034 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2035 {
2036 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
2037 {
2038 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
2039 {
2040 ADDR_ASSERT_ALWAYS();
2041 }
2042 }
2043 }
2044
2045 const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
2046 const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
2047 const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
2048 CoordEq origRbEquation;
2049
2050 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
2051
2052 CoordEq rbEquation = origRbEquation;
2053
2054 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2055 {
2056 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
2057 {
2058 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
2059 {
2060 ADDR_ASSERT_ALWAYS();
2061 }
2062 }
2063 }
2064
2065 if (m_settings.applyAliasFix)
2066 {
2067 co.set(DIM_Z, -1);
2068 }
2069
2070 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
2071 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2072 {
2073 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
2074 {
2075 BOOL_32 isRbEquationInPipeEquation = FALSE;
2076
2077 if (m_settings.applyAliasFix)
2078 {
2079 CoordTerm filteredPipeEq;
2080 filteredPipeEq = pipeEquation[j];
2081
2082 filteredPipeEq.Filter('>', co, 0, DIM_Z);
2083
2084 isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
2085 }
2086 else
2087 {
2088 isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
2089 }
2090
2091 if (isRbEquationInPipeEquation)
2092 {
2093 rbEquation[i].Clear();
2094 }
2095 }
2096 }
2097
2098 bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
2099
2100 // Loop through each bit of the channel, get the smallest coordinate,
2101 // and remove it from the metaaddr, and rb_equation
2102 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2103 {
2104 pipeEquation[i].getsmallest(co);
2105
2106 UINT_32 old_size = pMetaEq->getsize();
2107 pMetaEq->Filter('=', co);
2108 UINT_32 new_size = pMetaEq->getsize();
2109 if (new_size != old_size-1)
2110 {
2111 ADDR_ASSERT_ALWAYS();
2112 }
2113 pipeEquation.remove(co);
2114 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2115 {
2116 if (rbEquation[j].remove(co))
2117 {
2118 // if we actually removed something from this bit, then add the remaining
2119 // channel bits, as these can be removed for this bit
2120 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2121 {
2122 if (pipeEquation[i][k] != co)
2123 {
2124 rbEquation[j].add(pipeEquation[i][k]);
2125 rbAppendedWithPipeBits[j] = true;
2126 }
2127 }
2128 }
2129 }
2130 }
2131
2132 // Loop through the rb bits and see what remain;
2133 // filter out the smallest coordinate if it remains
2134 UINT_32 rbBitsLeft = 0;
2135 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2136 {
2137 BOOL_32 isRbEqAppended = FALSE;
2138
2139 if (m_settings.applyAliasFix)
2140 {
2141 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2142 }
2143 else
2144 {
2145 isRbEqAppended = (rbEquation[i].getsize() > 0);
2146 }
2147
2148 if (isRbEqAppended)
2149 {
2150 rbBitsLeft++;
2151 rbEquation[i].getsmallest(co);
2152 UINT_32 old_size = pMetaEq->getsize();
2153 pMetaEq->Filter('=', co);
2154 UINT_32 new_size = pMetaEq->getsize();
2155 if (new_size != old_size - 1)
2156 {
2157 // assert warning
2158 }
2159 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2160 {
2161 if (rbEquation[j].remove(co))
2162 {
2163 // if we actually removed something from this bit, then add the remaining
2164 // rb bits, as these can be removed for this bit
2165 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2166 {
2167 if (rbEquation[i][k] != co)
2168 {
2169 rbEquation[j].add(rbEquation[i][k]);
2170 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2171 }
2172 }
2173 }
2174 }
2175 }
2176 }
2177
2178 // capture the size of the metaaddr
2179 UINT_32 metaSize = pMetaEq->getsize();
2180 // resize to 49 bits...make this a nibble address
2181 pMetaEq->resize(49);
2182 // Concatenate the macro address above the current address
2183 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2184 {
2185 co.set(DIM_M, j);
2186 (*pMetaEq)[i].add(co);
2187 }
2188
2189 // Multiply by meta element size (in nibbles)
2190 if (dataSurfaceType == Gfx9DataColor)
2191 {
2192 pMetaEq->shift(1);
2193 }
2194 else if (dataSurfaceType == Gfx9DataDepthStencil)
2195 {
2196 pMetaEq->shift(3);
2197 }
2198
2199 //------------------------------------------------------------------------------------------
2200 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2201 // Shift up from pipe interleave number of channel
2202 // and rb bits left, and uncompressed fragments
2203 //------------------------------------------------------------------------------------------
2204
2205 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2206
2207 // Put in the channel bits
2208 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2209 {
2210 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2211 }
2212
2213 // Put in remaining rb bits
2214 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2215 {
2216 BOOL_32 isRbEqAppended = FALSE;
2217
2218 if (m_settings.applyAliasFix)
2219 {
2220 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2221 }
2222 else
2223 {
2224 isRbEqAppended = (rbEquation[i].getsize() > 0);
2225 }
2226
2227 if (isRbEqAppended)
2228 {
2229 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2230 // Mark any rb bit we add in to the rb mask
2231 j++;
2232 }
2233 }
2234
2235 //------------------------------------------------------------------------------------------
2236 // Put in the uncompressed fragment bits
2237 //------------------------------------------------------------------------------------------
2238 for (UINT_32 i = 0; i < uncompFragLog2; i++)
2239 {
2240 co.set(DIM_S, compFragLog2 + i);
2241 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2242 }
2243 }
2244 }
2245
2246 /**
2247 ************************************************************************************************************************
2248 * Gfx9Lib::IsEquationSupported
2249 *
2250 * @brief
2251 * Check if equation is supported for given swizzle mode and resource type.
2252 *
2253 * @return
2254 * TRUE if supported
2255 ************************************************************************************************************************
2256 */
IsEquationSupported(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2) const2257 BOOL_32 Gfx9Lib::IsEquationSupported(
2258 AddrResourceType rsrcType,
2259 AddrSwizzleMode swMode,
2260 UINT_32 elementBytesLog2) const
2261 {
2262 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2263 (IsValidSwMode(swMode) == TRUE) &&
2264 (IsLinear(swMode) == FALSE) &&
2265 (((IsTex2d(rsrcType) == TRUE) &&
2266 ((elementBytesLog2 < 4) ||
2267 ((IsRotateSwizzle(swMode) == FALSE) &&
2268 (IsZOrderSwizzle(swMode) == FALSE)))) ||
2269 ((IsTex3d(rsrcType) == TRUE) &&
2270 (IsRotateSwizzle(swMode) == FALSE) &&
2271 (IsBlock256b(swMode) == FALSE)));
2272
2273 return supported;
2274 }
2275
2276 /**
2277 ************************************************************************************************************************
2278 * Gfx9Lib::InitEquationTable
2279 *
2280 * @brief
2281 * Initialize Equation table.
2282 *
2283 * @return
2284 * N/A
2285 ************************************************************************************************************************
2286 */
InitEquationTable()2287 VOID Gfx9Lib::InitEquationTable()
2288 {
2289 memset(m_equationTable, 0, sizeof(m_equationTable));
2290
2291 // Loop all possible resource type (2D/3D)
2292 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2293 {
2294 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2295
2296 // Loop all possible swizzle mode
2297 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
2298 {
2299 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2300
2301 // Loop all possible bpp
2302 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2303 {
2304 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2305
2306 // Check if the input is supported
2307 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2308 {
2309 ADDR_EQUATION equation;
2310 ADDR_E_RETURNCODE retCode;
2311
2312 memset(&equation, 0, sizeof(ADDR_EQUATION));
2313
2314 // Generate the equation
2315 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2316 {
2317 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2318 }
2319 else if (IsThin(rsrcType, swMode))
2320 {
2321 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2322 }
2323 else
2324 {
2325 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2326 }
2327
2328 // Only fill the equation into the table if the return code is ADDR_OK,
2329 // otherwise if the return code is not ADDR_OK, it indicates this is not
2330 // a valid input, we do nothing but just fill invalid equation index
2331 // into the lookup table.
2332 if (retCode == ADDR_OK)
2333 {
2334 equationIndex = m_numEquations;
2335 ADDR_ASSERT(equationIndex < EquationTableSize);
2336
2337 m_equationTable[equationIndex] = equation;
2338
2339 m_numEquations++;
2340 }
2341 else
2342 {
2343 ADDR_ASSERT_ALWAYS();
2344 }
2345 }
2346
2347 // Fill the index into the lookup table, if the combination is not supported
2348 // fill the invalid equation index
2349 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2350 }
2351 }
2352 }
2353 }
2354
2355 /**
2356 ************************************************************************************************************************
2357 * Gfx9Lib::HwlGetEquationIndex
2358 *
2359 * @brief
2360 * Interface function stub of GetEquationIndex
2361 *
2362 * @return
2363 * ADDR_E_RETURNCODE
2364 ************************************************************************************************************************
2365 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const2366 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2367 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2368 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
2369 ) const
2370 {
2371 AddrResourceType rsrcType = pIn->resourceType;
2372 AddrSwizzleMode swMode = pIn->swizzleMode;
2373 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
2374 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
2375
2376 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2377 {
2378 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2379 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
2380
2381 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2382 }
2383
2384 if (pOut->pMipInfo != NULL)
2385 {
2386 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2387 {
2388 pOut->pMipInfo[i].equationIndex = index;
2389 }
2390 }
2391
2392 return index;
2393 }
2394
2395 /**
2396 ************************************************************************************************************************
2397 * Gfx9Lib::HwlComputeBlock256Equation
2398 *
2399 * @brief
2400 * Interface function stub of ComputeBlock256Equation
2401 *
2402 * @return
2403 * ADDR_E_RETURNCODE
2404 ************************************************************************************************************************
2405 */
HwlComputeBlock256Equation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2406 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2407 AddrResourceType rsrcType,
2408 AddrSwizzleMode swMode,
2409 UINT_32 elementBytesLog2,
2410 ADDR_EQUATION* pEquation) const
2411 {
2412 ADDR_E_RETURNCODE ret = ADDR_OK;
2413
2414 pEquation->numBits = 8;
2415
2416 UINT_32 i = 0;
2417 for (; i < elementBytesLog2; i++)
2418 {
2419 InitChannel(1, 0 , i, &pEquation->addr[i]);
2420 }
2421
2422 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2423
2424 const UINT_32 maxBitsUsed = 4;
2425 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2426 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2427
2428 for (i = 0; i < maxBitsUsed; i++)
2429 {
2430 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2431 InitChannel(1, 1, i, &y[i]);
2432 }
2433
2434 if (IsStandardSwizzle(rsrcType, swMode))
2435 {
2436 switch (elementBytesLog2)
2437 {
2438 case 0:
2439 pixelBit[0] = x[0];
2440 pixelBit[1] = x[1];
2441 pixelBit[2] = x[2];
2442 pixelBit[3] = x[3];
2443 pixelBit[4] = y[0];
2444 pixelBit[5] = y[1];
2445 pixelBit[6] = y[2];
2446 pixelBit[7] = y[3];
2447 break;
2448 case 1:
2449 pixelBit[0] = x[0];
2450 pixelBit[1] = x[1];
2451 pixelBit[2] = x[2];
2452 pixelBit[3] = y[0];
2453 pixelBit[4] = y[1];
2454 pixelBit[5] = y[2];
2455 pixelBit[6] = x[3];
2456 break;
2457 case 2:
2458 pixelBit[0] = x[0];
2459 pixelBit[1] = x[1];
2460 pixelBit[2] = y[0];
2461 pixelBit[3] = y[1];
2462 pixelBit[4] = y[2];
2463 pixelBit[5] = x[2];
2464 break;
2465 case 3:
2466 pixelBit[0] = x[0];
2467 pixelBit[1] = y[0];
2468 pixelBit[2] = y[1];
2469 pixelBit[3] = x[1];
2470 pixelBit[4] = x[2];
2471 break;
2472 case 4:
2473 pixelBit[0] = y[0];
2474 pixelBit[1] = y[1];
2475 pixelBit[2] = x[0];
2476 pixelBit[3] = x[1];
2477 break;
2478 default:
2479 ADDR_ASSERT_ALWAYS();
2480 ret = ADDR_INVALIDPARAMS;
2481 break;
2482 }
2483 }
2484 else if (IsDisplaySwizzle(rsrcType, swMode))
2485 {
2486 switch (elementBytesLog2)
2487 {
2488 case 0:
2489 pixelBit[0] = x[0];
2490 pixelBit[1] = x[1];
2491 pixelBit[2] = x[2];
2492 pixelBit[3] = y[1];
2493 pixelBit[4] = y[0];
2494 pixelBit[5] = y[2];
2495 pixelBit[6] = x[3];
2496 pixelBit[7] = y[3];
2497 break;
2498 case 1:
2499 pixelBit[0] = x[0];
2500 pixelBit[1] = x[1];
2501 pixelBit[2] = x[2];
2502 pixelBit[3] = y[0];
2503 pixelBit[4] = y[1];
2504 pixelBit[5] = y[2];
2505 pixelBit[6] = x[3];
2506 break;
2507 case 2:
2508 pixelBit[0] = x[0];
2509 pixelBit[1] = x[1];
2510 pixelBit[2] = y[0];
2511 pixelBit[3] = x[2];
2512 pixelBit[4] = y[1];
2513 pixelBit[5] = y[2];
2514 break;
2515 case 3:
2516 pixelBit[0] = x[0];
2517 pixelBit[1] = y[0];
2518 pixelBit[2] = x[1];
2519 pixelBit[3] = x[2];
2520 pixelBit[4] = y[1];
2521 break;
2522 case 4:
2523 pixelBit[0] = x[0];
2524 pixelBit[1] = y[0];
2525 pixelBit[2] = x[1];
2526 pixelBit[3] = y[1];
2527 break;
2528 default:
2529 ADDR_ASSERT_ALWAYS();
2530 ret = ADDR_INVALIDPARAMS;
2531 break;
2532 }
2533 }
2534 else if (IsRotateSwizzle(swMode))
2535 {
2536 switch (elementBytesLog2)
2537 {
2538 case 0:
2539 pixelBit[0] = y[0];
2540 pixelBit[1] = y[1];
2541 pixelBit[2] = y[2];
2542 pixelBit[3] = x[1];
2543 pixelBit[4] = x[0];
2544 pixelBit[5] = x[2];
2545 pixelBit[6] = x[3];
2546 pixelBit[7] = y[3];
2547 break;
2548 case 1:
2549 pixelBit[0] = y[0];
2550 pixelBit[1] = y[1];
2551 pixelBit[2] = y[2];
2552 pixelBit[3] = x[0];
2553 pixelBit[4] = x[1];
2554 pixelBit[5] = x[2];
2555 pixelBit[6] = x[3];
2556 break;
2557 case 2:
2558 pixelBit[0] = y[0];
2559 pixelBit[1] = y[1];
2560 pixelBit[2] = x[0];
2561 pixelBit[3] = y[2];
2562 pixelBit[4] = x[1];
2563 pixelBit[5] = x[2];
2564 break;
2565 case 3:
2566 pixelBit[0] = y[0];
2567 pixelBit[1] = x[0];
2568 pixelBit[2] = y[1];
2569 pixelBit[3] = x[1];
2570 pixelBit[4] = x[2];
2571 break;
2572 default:
2573 ADDR_ASSERT_ALWAYS();
2574 case 4:
2575 ret = ADDR_INVALIDPARAMS;
2576 break;
2577 }
2578 }
2579 else
2580 {
2581 ADDR_ASSERT_ALWAYS();
2582 ret = ADDR_INVALIDPARAMS;
2583 }
2584
2585 // Post validation
2586 if (ret == ADDR_OK)
2587 {
2588 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2589 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2590 (microBlockDim.w * (1 << elementBytesLog2)));
2591 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2592 }
2593
2594 return ret;
2595 }
2596
2597 /**
2598 ************************************************************************************************************************
2599 * Gfx9Lib::HwlComputeThinEquation
2600 *
2601 * @brief
2602 * Interface function stub of ComputeThinEquation
2603 *
2604 * @return
2605 * ADDR_E_RETURNCODE
2606 ************************************************************************************************************************
2607 */
HwlComputeThinEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2608 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2609 AddrResourceType rsrcType,
2610 AddrSwizzleMode swMode,
2611 UINT_32 elementBytesLog2,
2612 ADDR_EQUATION* pEquation) const
2613 {
2614 ADDR_E_RETURNCODE ret = ADDR_OK;
2615
2616 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2617
2618 UINT_32 maxXorBits = blockSizeLog2;
2619 if (IsNonPrtXor(swMode))
2620 {
2621 // For non-prt-xor, maybe need to initialize some more bits for xor
2622 // The highest xor bit used in equation will be max the following 3 items:
2623 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2624 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2625 // 3. blockSizeLog2
2626
2627 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2628 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2629 GetPipeXorBits(blockSizeLog2) +
2630 2 * GetBankXorBits(blockSizeLog2));
2631 }
2632
2633 const UINT_32 maxBitsUsed = 14;
2634 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2635 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2636 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2637
2638 const UINT_32 extraXorBits = 16;
2639 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2640 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2641
2642 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2643 {
2644 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2645 InitChannel(1, 1, i, &y[i]);
2646 }
2647
2648 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2649
2650 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2651 {
2652 InitChannel(1, 0 , i, &pixelBit[i]);
2653 }
2654
2655 UINT_32 xIdx = 0;
2656 UINT_32 yIdx = 0;
2657 UINT_32 lowBits = 0;
2658
2659 if (IsZOrderSwizzle(swMode))
2660 {
2661 if (elementBytesLog2 <= 3)
2662 {
2663 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2664 {
2665 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2666 }
2667
2668 lowBits = 6;
2669 }
2670 else
2671 {
2672 ret = ADDR_INVALIDPARAMS;
2673 }
2674 }
2675 else
2676 {
2677 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2678
2679 if (ret == ADDR_OK)
2680 {
2681 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2682 xIdx = Log2(microBlockDim.w);
2683 yIdx = Log2(microBlockDim.h);
2684 lowBits = 8;
2685 }
2686 }
2687
2688 if (ret == ADDR_OK)
2689 {
2690 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2691 {
2692 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2693 }
2694
2695 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2696 {
2697 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2698 }
2699
2700 if (IsXor(swMode))
2701 {
2702 // Fill XOR bits
2703 UINT_32 pipeStart = m_pipeInterleaveLog2;
2704 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2705
2706 UINT_32 bankStart = pipeStart + pipeXorBits;
2707 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2708
2709 for (UINT_32 i = 0; i < pipeXorBits; i++)
2710 {
2711 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2712 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2713 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2714
2715 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2716 }
2717
2718 for (UINT_32 i = 0; i < bankXorBits; i++)
2719 {
2720 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2721 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2722 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2723
2724 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2725 }
2726
2727 if (IsPrt(swMode) == FALSE)
2728 {
2729 for (UINT_32 i = 0; i < pipeXorBits; i++)
2730 {
2731 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2732 }
2733
2734 for (UINT_32 i = 0; i < bankXorBits; i++)
2735 {
2736 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2737 }
2738 }
2739 }
2740
2741 pEquation->numBits = blockSizeLog2;
2742 }
2743
2744 return ret;
2745 }
2746
2747 /**
2748 ************************************************************************************************************************
2749 * Gfx9Lib::HwlComputeThickEquation
2750 *
2751 * @brief
2752 * Interface function stub of ComputeThickEquation
2753 *
2754 * @return
2755 * ADDR_E_RETURNCODE
2756 ************************************************************************************************************************
2757 */
HwlComputeThickEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2758 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2759 AddrResourceType rsrcType,
2760 AddrSwizzleMode swMode,
2761 UINT_32 elementBytesLog2,
2762 ADDR_EQUATION* pEquation) const
2763 {
2764 ADDR_E_RETURNCODE ret = ADDR_OK;
2765
2766 ADDR_ASSERT(IsTex3d(rsrcType));
2767
2768 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2769
2770 UINT_32 maxXorBits = blockSizeLog2;
2771 if (IsNonPrtXor(swMode))
2772 {
2773 // For non-prt-xor, maybe need to initialize some more bits for xor
2774 // The highest xor bit used in equation will be max the following 3:
2775 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2776 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2777 // 3. blockSizeLog2
2778
2779 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2780 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2781 GetPipeXorBits(blockSizeLog2) +
2782 3 * GetBankXorBits(blockSizeLog2));
2783 }
2784
2785 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2786 {
2787 InitChannel(1, 0 , i, &pEquation->addr[i]);
2788 }
2789
2790 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2791
2792 const UINT_32 maxBitsUsed = 12;
2793 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2794 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2795 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2796 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2797
2798 const UINT_32 extraXorBits = 24;
2799 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2800 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2801
2802 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2803 {
2804 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2805 InitChannel(1, 1, i, &y[i]);
2806 InitChannel(1, 2, i, &z[i]);
2807 }
2808
2809 if (IsZOrderSwizzle(swMode))
2810 {
2811 switch (elementBytesLog2)
2812 {
2813 case 0:
2814 pixelBit[0] = x[0];
2815 pixelBit[1] = y[0];
2816 pixelBit[2] = x[1];
2817 pixelBit[3] = y[1];
2818 pixelBit[4] = z[0];
2819 pixelBit[5] = z[1];
2820 pixelBit[6] = x[2];
2821 pixelBit[7] = z[2];
2822 pixelBit[8] = y[2];
2823 pixelBit[9] = x[3];
2824 break;
2825 case 1:
2826 pixelBit[0] = x[0];
2827 pixelBit[1] = y[0];
2828 pixelBit[2] = x[1];
2829 pixelBit[3] = y[1];
2830 pixelBit[4] = z[0];
2831 pixelBit[5] = z[1];
2832 pixelBit[6] = z[2];
2833 pixelBit[7] = y[2];
2834 pixelBit[8] = x[2];
2835 break;
2836 case 2:
2837 pixelBit[0] = x[0];
2838 pixelBit[1] = y[0];
2839 pixelBit[2] = x[1];
2840 pixelBit[3] = z[0];
2841 pixelBit[4] = y[1];
2842 pixelBit[5] = z[1];
2843 pixelBit[6] = y[2];
2844 pixelBit[7] = x[2];
2845 break;
2846 case 3:
2847 pixelBit[0] = x[0];
2848 pixelBit[1] = y[0];
2849 pixelBit[2] = z[0];
2850 pixelBit[3] = x[1];
2851 pixelBit[4] = z[1];
2852 pixelBit[5] = y[1];
2853 pixelBit[6] = x[2];
2854 break;
2855 case 4:
2856 pixelBit[0] = x[0];
2857 pixelBit[1] = y[0];
2858 pixelBit[2] = z[0];
2859 pixelBit[3] = z[1];
2860 pixelBit[4] = y[1];
2861 pixelBit[5] = x[1];
2862 break;
2863 default:
2864 ADDR_ASSERT_ALWAYS();
2865 ret = ADDR_INVALIDPARAMS;
2866 break;
2867 }
2868 }
2869 else if (IsStandardSwizzle(rsrcType, swMode))
2870 {
2871 switch (elementBytesLog2)
2872 {
2873 case 0:
2874 pixelBit[0] = x[0];
2875 pixelBit[1] = x[1];
2876 pixelBit[2] = x[2];
2877 pixelBit[3] = x[3];
2878 pixelBit[4] = y[0];
2879 pixelBit[5] = y[1];
2880 pixelBit[6] = z[0];
2881 pixelBit[7] = z[1];
2882 pixelBit[8] = z[2];
2883 pixelBit[9] = y[2];
2884 break;
2885 case 1:
2886 pixelBit[0] = x[0];
2887 pixelBit[1] = x[1];
2888 pixelBit[2] = x[2];
2889 pixelBit[3] = y[0];
2890 pixelBit[4] = y[1];
2891 pixelBit[5] = z[0];
2892 pixelBit[6] = z[1];
2893 pixelBit[7] = z[2];
2894 pixelBit[8] = y[2];
2895 break;
2896 case 2:
2897 pixelBit[0] = x[0];
2898 pixelBit[1] = x[1];
2899 pixelBit[2] = y[0];
2900 pixelBit[3] = y[1];
2901 pixelBit[4] = z[0];
2902 pixelBit[5] = z[1];
2903 pixelBit[6] = y[2];
2904 pixelBit[7] = x[2];
2905 break;
2906 case 3:
2907 pixelBit[0] = x[0];
2908 pixelBit[1] = y[0];
2909 pixelBit[2] = y[1];
2910 pixelBit[3] = z[0];
2911 pixelBit[4] = z[1];
2912 pixelBit[5] = x[1];
2913 pixelBit[6] = x[2];
2914 break;
2915 case 4:
2916 pixelBit[0] = y[0];
2917 pixelBit[1] = y[1];
2918 pixelBit[2] = z[0];
2919 pixelBit[3] = z[1];
2920 pixelBit[4] = x[0];
2921 pixelBit[5] = x[1];
2922 break;
2923 default:
2924 ADDR_ASSERT_ALWAYS();
2925 ret = ADDR_INVALIDPARAMS;
2926 break;
2927 }
2928 }
2929 else
2930 {
2931 ADDR_ASSERT_ALWAYS();
2932 ret = ADDR_INVALIDPARAMS;
2933 }
2934
2935 if (ret == ADDR_OK)
2936 {
2937 Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2938 UINT_32 xIdx = Log2(microBlockDim.w);
2939 UINT_32 yIdx = Log2(microBlockDim.h);
2940 UINT_32 zIdx = Log2(microBlockDim.d);
2941
2942 pixelBit = pEquation->addr;
2943
2944 const UINT_32 lowBits = 10;
2945 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2946 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2947
2948 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2949 {
2950 if ((i % 3) == 0)
2951 {
2952 pixelBit[i] = x[xIdx++];
2953 }
2954 else if ((i % 3) == 1)
2955 {
2956 pixelBit[i] = z[zIdx++];
2957 }
2958 else
2959 {
2960 pixelBit[i] = y[yIdx++];
2961 }
2962 }
2963
2964 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2965 {
2966 if ((i % 3) == 0)
2967 {
2968 xorExtra[i - blockSizeLog2] = x[xIdx++];
2969 }
2970 else if ((i % 3) == 1)
2971 {
2972 xorExtra[i - blockSizeLog2] = z[zIdx++];
2973 }
2974 else
2975 {
2976 xorExtra[i - blockSizeLog2] = y[yIdx++];
2977 }
2978 }
2979
2980 if (IsXor(swMode))
2981 {
2982 // Fill XOR bits
2983 UINT_32 pipeStart = m_pipeInterleaveLog2;
2984 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2985 for (UINT_32 i = 0; i < pipeXorBits; i++)
2986 {
2987 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2988 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2989 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2990
2991 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2992
2993 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2994 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2995 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2996
2997 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2998 }
2999
3000 UINT_32 bankStart = pipeStart + pipeXorBits;
3001 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
3002 for (UINT_32 i = 0; i < bankXorBits; i++)
3003 {
3004 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
3005 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
3006 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
3007
3008 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
3009
3010 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
3011 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
3012 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
3013
3014 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
3015 }
3016 }
3017
3018 pEquation->numBits = blockSizeLog2;
3019 }
3020
3021 return ret;
3022 }
3023
3024 /**
3025 ************************************************************************************************************************
3026 * Gfx9Lib::IsValidDisplaySwizzleMode
3027 *
3028 * @brief
3029 * Check if a swizzle mode is supported by display engine
3030 *
3031 * @return
3032 * TRUE is swizzle mode is supported by display engine
3033 ************************************************************************************************************************
3034 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3035 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
3036 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3037 {
3038 BOOL_32 support = FALSE;
3039
3040 const UINT_32 swizzleMask = 1 << pIn->swizzleMode;
3041
3042 if (m_settings.isDce12)
3043 {
3044 if (pIn->bpp == 32)
3045 {
3046 support = (Dce12Bpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
3047 }
3048 else if (pIn->bpp <= 64)
3049 {
3050 support = (Dce12NonBpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
3051 }
3052 }
3053 else if (m_settings.isDcn1)
3054 {
3055 if (pIn->bpp < 64)
3056 {
3057 support = (Dcn1NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3058 }
3059 else if (pIn->bpp == 64)
3060 {
3061 support = (Dcn1Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3062 }
3063 }
3064 else if (m_settings.isDcn2)
3065 {
3066 if (pIn->bpp < 64)
3067 {
3068 support = (Dcn2NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3069 }
3070 else if (pIn->bpp == 64)
3071 {
3072 support = (Dcn2Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3073 }
3074 }
3075 else
3076 {
3077 ADDR_NOT_IMPLEMENTED();
3078 }
3079
3080 return support;
3081 }
3082
3083 /**
3084 ************************************************************************************************************************
3085 * Gfx9Lib::HwlComputePipeBankXor
3086 *
3087 * @brief
3088 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3089 *
3090 * @return
3091 * PipeBankXor value
3092 ************************************************************************************************************************
3093 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const3094 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3095 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3096 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
3097 {
3098 if (IsXor(pIn->swizzleMode))
3099 {
3100 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3101 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3102 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3103
3104 UINT_32 pipeXor = 0;
3105 UINT_32 bankXor = 0;
3106
3107 const UINT_32 bankMask = (1 << bankBits) - 1;
3108 const UINT_32 index = pIn->surfIndex & bankMask;
3109
3110 const UINT_32 bpp = pIn->flags.fmask ?
3111 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3112 if (bankBits == 4)
3113 {
3114 static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3115 static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3116
3117 bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3118 }
3119 else if (bankBits > 0)
3120 {
3121 UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3122 bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3123 bankXor = (index * bankIncrease) & bankMask;
3124 }
3125
3126 pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3127 }
3128 else
3129 {
3130 pOut->pipeBankXor = 0;
3131 }
3132
3133 return ADDR_OK;
3134 }
3135
3136 /**
3137 ************************************************************************************************************************
3138 * Gfx9Lib::HwlComputeSlicePipeBankXor
3139 *
3140 * @brief
3141 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3142 *
3143 * @return
3144 * PipeBankXor value
3145 ************************************************************************************************************************
3146 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const3147 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3148 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3149 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const
3150 {
3151 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3152 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3153 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3154
3155 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3156 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3157
3158 pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3159
3160 return ADDR_OK;
3161 }
3162
3163 /**
3164 ************************************************************************************************************************
3165 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3166 *
3167 * @brief
3168 * Compute sub resource offset to support swizzle pattern
3169 *
3170 * @return
3171 * Offset
3172 ************************************************************************************************************************
3173 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const3174 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3175 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3176 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const
3177 {
3178 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3179
3180 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3181 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3182 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3183 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3184 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3185 UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3186
3187 pOut->offset = pIn->slice * pIn->sliceSize +
3188 pIn->macroBlockOffset +
3189 (pIn->mipTailOffset ^ pipeBankXor) -
3190 static_cast<UINT_64>(pipeBankXor);
3191 return ADDR_OK;
3192 }
3193
3194 /**
3195 ************************************************************************************************************************
3196 * Gfx9Lib::ValidateNonSwModeParams
3197 *
3198 * @brief
3199 * Validate compute surface info params except swizzle mode
3200 *
3201 * @return
3202 * TRUE if parameters are valid, FALSE otherwise
3203 ************************************************************************************************************************
3204 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3205 BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
3206 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3207 {
3208 BOOL_32 valid = TRUE;
3209
3210 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3211 {
3212 ADDR_ASSERT_ALWAYS();
3213 valid = FALSE;
3214 }
3215
3216 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
3217 {
3218 ADDR_ASSERT_ALWAYS();
3219 valid = FALSE;
3220 }
3221
3222 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3223 const BOOL_32 msaa = (pIn->numFrags > 1);
3224 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3225
3226 const AddrResourceType rsrcType = pIn->resourceType;
3227 const BOOL_32 tex3d = IsTex3d(rsrcType);
3228 const BOOL_32 tex2d = IsTex2d(rsrcType);
3229 const BOOL_32 tex1d = IsTex1d(rsrcType);
3230
3231 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3232 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3233 const BOOL_32 display = flags.display || flags.rotated;
3234 const BOOL_32 stereo = flags.qbStereo;
3235 const BOOL_32 fmask = flags.fmask;
3236
3237 // Resource type check
3238 if (tex1d)
3239 {
3240 if (msaa || zbuffer || display || stereo || isBc || fmask)
3241 {
3242 ADDR_ASSERT_ALWAYS();
3243 valid = FALSE;
3244 }
3245 }
3246 else if (tex2d)
3247 {
3248 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
3249 {
3250 ADDR_ASSERT_ALWAYS();
3251 valid = FALSE;
3252 }
3253 }
3254 else if (tex3d)
3255 {
3256 if (msaa || zbuffer || display || stereo || fmask)
3257 {
3258 ADDR_ASSERT_ALWAYS();
3259 valid = FALSE;
3260 }
3261 }
3262 else
3263 {
3264 ADDR_ASSERT_ALWAYS();
3265 valid = FALSE;
3266 }
3267
3268 return valid;
3269 }
3270
3271 /**
3272 ************************************************************************************************************************
3273 * Gfx9Lib::ValidateSwModeParams
3274 *
3275 * @brief
3276 * Validate compute surface info related to swizzle mode
3277 *
3278 * @return
3279 * TRUE if parameters are valid, FALSE otherwise
3280 ************************************************************************************************************************
3281 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3282 BOOL_32 Gfx9Lib::ValidateSwModeParams(
3283 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3284 {
3285 BOOL_32 valid = TRUE;
3286
3287 if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
3288 {
3289 ADDR_ASSERT_ALWAYS();
3290 valid = FALSE;
3291 }
3292
3293 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3294 const BOOL_32 msaa = (pIn->numFrags > 1);
3295 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3296 const BOOL_32 is422 = ElemLib::IsMacroPixelPacked(pIn->format);
3297
3298 const AddrResourceType rsrcType = pIn->resourceType;
3299 const BOOL_32 tex3d = IsTex3d(rsrcType);
3300 const BOOL_32 tex2d = IsTex2d(rsrcType);
3301 const BOOL_32 tex1d = IsTex1d(rsrcType);
3302
3303 const AddrSwizzleMode swizzle = pIn->swizzleMode;
3304 const BOOL_32 linear = IsLinear(swizzle);
3305 const BOOL_32 blk256B = IsBlock256b(swizzle);
3306 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
3307
3308 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3309 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3310 const BOOL_32 color = flags.color;
3311 const BOOL_32 texture = flags.texture;
3312 const BOOL_32 display = flags.display || flags.rotated;
3313 const BOOL_32 prt = flags.prt;
3314 const BOOL_32 fmask = flags.fmask;
3315
3316 const BOOL_32 thin3d = tex3d && flags.view3dAs2dArray;
3317 const BOOL_32 zMaxMip = tex3d && mipmap &&
3318 (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);
3319
3320 // Misc check
3321 if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3322 {
3323 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3324 ADDR_ASSERT_ALWAYS();
3325 valid = FALSE;
3326 }
3327
3328 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
3329 {
3330 ADDR_ASSERT_ALWAYS();
3331 valid = FALSE;
3332 }
3333
3334 if ((pIn->bpp == 96) && (linear == FALSE))
3335 {
3336 ADDR_ASSERT_ALWAYS();
3337 valid = FALSE;
3338 }
3339
3340 if (prt && isNonPrtXor)
3341 {
3342 ADDR_ASSERT_ALWAYS();
3343 valid = FALSE;
3344 }
3345
3346 // Resource type check
3347 if (tex1d)
3348 {
3349 if (linear == FALSE)
3350 {
3351 ADDR_ASSERT_ALWAYS();
3352 valid = FALSE;
3353 }
3354 }
3355
3356 // Swizzle type check
3357 if (linear)
3358 {
3359 if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
3360 ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
3361 {
3362 ADDR_ASSERT_ALWAYS();
3363 valid = FALSE;
3364 }
3365 }
3366 else if (IsZOrderSwizzle(swizzle))
3367 {
3368 if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
3369 {
3370 ADDR_ASSERT_ALWAYS();
3371 valid = FALSE;
3372 }
3373 }
3374 else if (IsStandardSwizzle(swizzle))
3375 {
3376 if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
3377 {
3378 ADDR_ASSERT_ALWAYS();
3379 valid = FALSE;
3380 }
3381 }
3382 else if (IsDisplaySwizzle(swizzle))
3383 {
3384 if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
3385 {
3386 ADDR_ASSERT_ALWAYS();
3387 valid = FALSE;
3388 }
3389 }
3390 else if (IsRotateSwizzle(swizzle))
3391 {
3392 if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
3393 {
3394 ADDR_ASSERT_ALWAYS();
3395 valid = FALSE;
3396 }
3397 }
3398 else
3399 {
3400 ADDR_ASSERT_ALWAYS();
3401 valid = FALSE;
3402 }
3403
3404 // Block type check
3405 if (blk256B)
3406 {
3407 if (prt || zbuffer || tex3d || mipmap || msaa)
3408 {
3409 ADDR_ASSERT_ALWAYS();
3410 valid = FALSE;
3411 }
3412 }
3413
3414 return valid;
3415 }
3416
3417 /**
3418 ************************************************************************************************************************
3419 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3420 *
3421 * @brief
3422 * Compute surface info sanity check
3423 *
3424 * @return
3425 * ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3426 ************************************************************************************************************************
3427 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3428 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3429 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3430 {
3431 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
3432 }
3433
3434 /**
3435 ************************************************************************************************************************
3436 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3437 *
3438 * @brief
3439 * Internal function to get suggested surface information for cliet to use
3440 *
3441 * @return
3442 * ADDR_E_RETURNCODE
3443 ************************************************************************************************************************
3444 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const3445 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3446 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3447 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const
3448 {
3449 ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
3450 ElemLib* pElemLib = GetElemLib();
3451
3452 UINT_32 bpp = pIn->bpp;
3453 UINT_32 width = Max(pIn->width, 1u);
3454 UINT_32 height = Max(pIn->height, 1u);
3455 UINT_32 numSamples = Max(pIn->numSamples, 1u);
3456 UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3457
3458 if (pIn->flags.fmask)
3459 {
3460 bpp = GetFmaskBpp(numSamples, numFrags);
3461 numFrags = 1;
3462 numSamples = 1;
3463 pOut->resourceType = ADDR_RSRC_TEX_2D;
3464 }
3465 else
3466 {
3467 // Set format to INVALID will skip this conversion
3468 if (pIn->format != ADDR_FMT_INVALID)
3469 {
3470 UINT_32 expandX, expandY;
3471
3472 // Don't care for this case
3473 ElemMode elemMode = ADDR_UNCOMPRESSED;
3474
3475 // Get compression/expansion factors and element mode which indicates compression/expansion
3476 bpp = pElemLib->GetBitsPerPixel(pIn->format,
3477 &elemMode,
3478 &expandX,
3479 &expandY);
3480
3481 UINT_32 basePitch = 0;
3482 GetElemLib()->AdjustSurfaceInfo(elemMode,
3483 expandX,
3484 expandY,
3485 &bpp,
3486 &basePitch,
3487 &width,
3488 &height);
3489 }
3490
3491 // The output may get changed for volume(3D) texture resource in future
3492 pOut->resourceType = pIn->resourceType;
3493 }
3494
3495 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
3496 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3497 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
3498 const BOOL_32 displayRsrc = pIn->flags.display || pIn->flags.rotated;
3499
3500 // Pre sanity check on non swizzle mode parameters
3501 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3502 localIn.flags = pIn->flags;
3503 localIn.resourceType = pOut->resourceType;
3504 localIn.format = pIn->format;
3505 localIn.bpp = bpp;
3506 localIn.width = width;
3507 localIn.height = height;
3508 localIn.numSlices = numSlices;
3509 localIn.numMipLevels = numMipLevels;
3510 localIn.numSamples = numSamples;
3511 localIn.numFrags = numFrags;
3512
3513 if (ValidateNonSwModeParams(&localIn))
3514 {
3515 // Forbid swizzle mode(s) by client setting
3516 ADDR2_SWMODE_SET allowedSwModeSet = {};
3517 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3518 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx9Blk256BSwModeMask;
3519 allowedSwModeSet.value |=
3520 pIn->forbiddenBlock.macroThin4KB ? 0 :
3521 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask);
3522 allowedSwModeSet.value |=
3523 pIn->forbiddenBlock.macroThick4KB ? 0 :
3524 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick4KBSwModeMask : 0);
3525 allowedSwModeSet.value |=
3526 pIn->forbiddenBlock.macroThin64KB ? 0 :
3527 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask);
3528 allowedSwModeSet.value |=
3529 pIn->forbiddenBlock.macroThick64KB ? 0 :
3530 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick64KBSwModeMask : 0);
3531
3532 if (pIn->preferredSwSet.value != 0)
3533 {
3534 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3535 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3536 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3537 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3538 }
3539
3540 if (pIn->noXor)
3541 {
3542 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3543 }
3544
3545 if (pIn->maxAlign > 0)
3546 {
3547 if (pIn->maxAlign < Size64K)
3548 {
3549 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3550 }
3551
3552 if (pIn->maxAlign < Size4K)
3553 {
3554 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3555 }
3556
3557 if (pIn->maxAlign < Size256)
3558 {
3559 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3560 }
3561 }
3562
3563 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3564 switch (pOut->resourceType)
3565 {
3566 case ADDR_RSRC_TEX_1D:
3567 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3568 break;
3569
3570 case ADDR_RSRC_TEX_2D:
3571 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3572
3573 if (bpp > 64)
3574 {
3575 allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3576 }
3577 break;
3578
3579 case ADDR_RSRC_TEX_3D:
3580 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3581
3582 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3583 {
3584 // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3585 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3586 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3587 allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3588 }
3589
3590 if ((bpp == 128) && pIn->flags.color)
3591 {
3592 allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3593 }
3594
3595 if (pIn->flags.view3dAs2dArray)
3596 {
3597 allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3598 }
3599 break;
3600
3601 default:
3602 ADDR_ASSERT_ALWAYS();
3603 allowedSwModeSet.value = 0;
3604 break;
3605 }
3606
3607 if (pIn->format == ADDR_FMT_32_32_32)
3608 {
3609 allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3610 }
3611
3612 if (ElemLib::IsBlockCompressed(pIn->format))
3613 {
3614 if (pIn->flags.texture)
3615 {
3616 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3617 }
3618 else
3619 {
3620 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3621 }
3622 }
3623
3624 if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3625 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3626 {
3627 allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3628 }
3629
3630 if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3631 {
3632 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3633
3634 if (pIn->flags.noMetadata == FALSE)
3635 {
3636 if (pIn->flags.depth &&
3637 pIn->flags.texture &&
3638 (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3639 {
3640 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3641 // equation from wrong address within memory range a tile covered and use the
3642 // garbage data for compressed Z reading which finally leads to corruption.
3643 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3644 }
3645
3646 if (m_settings.htileCacheRbConflict &&
3647 (pIn->flags.depth || pIn->flags.stencil) &&
3648 (numSlices > 1) &&
3649 (pIn->flags.metaRbUnaligned == FALSE) &&
3650 (pIn->flags.metaPipeUnaligned == FALSE))
3651 {
3652 // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3653 allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3654 }
3655 }
3656 }
3657
3658 if (msaa)
3659 {
3660 allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3661 }
3662
3663 if ((numFrags > 1) &&
3664 (Size4K < (m_pipeInterleaveBytes * numFrags)))
3665 {
3666 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3667 allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3668 }
3669
3670 if (numMipLevels > 1)
3671 {
3672 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3673 }
3674
3675 if (displayRsrc)
3676 {
3677 if (m_settings.isDce12)
3678 {
3679 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3680 }
3681 else if (m_settings.isDcn1)
3682 {
3683 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3684 }
3685 else if (m_settings.isDcn2)
3686 {
3687 allowedSwModeSet.value &= (bpp == 64) ? Dcn2Bpp64SwModeMask : Dcn2NonBpp64SwModeMask;
3688 }
3689 else
3690 {
3691 ADDR_NOT_IMPLEMENTED();
3692 }
3693 }
3694
3695 if (allowedSwModeSet.value != 0)
3696 {
3697 #if DEBUG
3698 // Post sanity check, at least AddrLib should accept the output generated by its own
3699 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3700
3701 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3702 {
3703 if (validateSwModeSet & 1)
3704 {
3705 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3706 ADDR_ASSERT(ValidateSwModeParams(&localIn));
3707 }
3708
3709 validateSwModeSet >>= 1;
3710 }
3711 #endif
3712
3713 pOut->validSwModeSet = allowedSwModeSet;
3714 pOut->canXor = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3715 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3716 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3717
3718 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3719
3720 if (pOut->clientPreferredSwSet.value == 0)
3721 {
3722 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3723 }
3724
3725 // Apply optional restrictions
3726 if (pIn->flags.needEquation)
3727 {
3728 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
3729 }
3730
3731 if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3732 {
3733 pOut->swizzleMode = ADDR_SW_LINEAR;
3734 }
3735 else
3736 {
3737 const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
3738
3739 if ((height > 1) && (computeMinSize == FALSE))
3740 {
3741 // Always ignore linear swizzle mode if:
3742 // 1. This is a (2D/3D) resource with height > 1
3743 // 2. Client doesn't require computing minimize size
3744 allowedSwModeSet.swLinear = 0;
3745 }
3746
3747 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3748
3749 // Determine block size if there are 2 or more block type candidates
3750 if (IsPow2(allowedBlockSet.value) == FALSE)
3751 {
3752 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
3753
3754 swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
3755 swMode[AddrBlockMicro] = ADDR_SW_256B_D;
3756 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_D;
3757 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D;
3758
3759 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3760 {
3761 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
3762 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3763 }
3764
3765 UINT_64 padSize[AddrBlockMaxTiledType] = {};
3766
3767 const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
3768 const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
3769 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3770 UINT_32 minSizeBlk = AddrBlockMicro;
3771 UINT_64 minSize = 0;
3772
3773 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3774
3775 for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
3776 {
3777 if (IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
3778 {
3779 localIn.swizzleMode = swMode[i];
3780
3781 if (localIn.swizzleMode == ADDR_SW_LINEAR)
3782 {
3783 returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
3784 }
3785 else
3786 {
3787 returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
3788 }
3789
3790 if (returnCode == ADDR_OK)
3791 {
3792 padSize[i] = localOut.surfSize;
3793
3794 if ((minSize == 0) ||
3795 BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi))
3796 {
3797 minSize = padSize[i];
3798 minSizeBlk = i;
3799 }
3800 }
3801 else
3802 {
3803 ADDR_ASSERT_ALWAYS();
3804 break;
3805 }
3806 }
3807 }
3808
3809 if (pIn->memoryBudget > 1.0)
3810 {
3811 // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
3812 // smaller-block type again in coming loop
3813 switch (minSizeBlk)
3814 {
3815 case AddrBlockThick64KB:
3816 allowedBlockSet.macroThin64KB = 0;
3817 case AddrBlockThin64KB:
3818 allowedBlockSet.macroThick4KB = 0;
3819 case AddrBlockThick4KB:
3820 allowedBlockSet.macroThin4KB = 0;
3821 case AddrBlockThin4KB:
3822 allowedBlockSet.micro = 0;
3823 case AddrBlockMicro:
3824 allowedBlockSet.linear = 0;
3825 case AddrBlockLinear:
3826 break;
3827
3828 default:
3829 ADDR_ASSERT_ALWAYS();
3830 break;
3831 }
3832
3833 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3834 {
3835 if ((i != minSizeBlk) &&
3836 IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
3837 {
3838 if (BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE)
3839 {
3840 // Clear the block type if the memory waste is unacceptable
3841 allowedBlockSet.value &= ~(1u << (i - 1));
3842 }
3843 }
3844 }
3845
3846 // Remove linear block type if 2 or more block types are allowed
3847 if (IsPow2(allowedBlockSet.value) == FALSE)
3848 {
3849 allowedBlockSet.linear = 0;
3850 }
3851
3852 // Select the biggest allowed block type
3853 minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
3854
3855 if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
3856 {
3857 minSizeBlk = AddrBlockLinear;
3858 }
3859 }
3860
3861 switch (minSizeBlk)
3862 {
3863 case AddrBlockLinear:
3864 allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3865 break;
3866
3867 case AddrBlockMicro:
3868 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3869 allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3870 break;
3871
3872 case AddrBlockThin4KB:
3873 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3874 Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask;
3875 break;
3876
3877 case AddrBlockThick4KB:
3878 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3879 allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask;
3880 break;
3881
3882 case AddrBlockThin64KB:
3883 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3884 Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask;
3885 break;
3886
3887 case AddrBlockThick64KB:
3888 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3889 allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask;
3890 break;
3891
3892 default:
3893 ADDR_ASSERT_ALWAYS();
3894 allowedSwModeSet.value = 0;
3895 break;
3896 }
3897 }
3898
3899 // Block type should be determined.
3900 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3901
3902 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3903
3904 // Determine swizzle type if there are 2 or more swizzle type candidates
3905 if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
3906 {
3907 if (ElemLib::IsBlockCompressed(pIn->format))
3908 {
3909 if (allowedSwSet.sw_D)
3910 {
3911 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3912 }
3913 else
3914 {
3915 ADDR_ASSERT(allowedSwSet.sw_S);
3916 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3917 }
3918 }
3919 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3920 {
3921 if (allowedSwSet.sw_S)
3922 {
3923 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3924 }
3925 else if (allowedSwSet.sw_D)
3926 {
3927 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3928 }
3929 else
3930 {
3931 ADDR_ASSERT(allowedSwSet.sw_R);
3932 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3933 }
3934 }
3935 else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3936 {
3937 if (pIn->flags.color && allowedSwSet.sw_D)
3938 {
3939 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3940 }
3941 else if (allowedSwSet.sw_Z)
3942 {
3943 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3944 }
3945 else
3946 {
3947 ADDR_ASSERT(allowedSwSet.sw_S);
3948 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3949 }
3950 }
3951 else
3952 {
3953 if (pIn->flags.rotated && allowedSwSet.sw_R)
3954 {
3955 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3956 }
3957 else if (allowedSwSet.sw_D)
3958 {
3959 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3960 }
3961 else if (allowedSwSet.sw_S)
3962 {
3963 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3964 }
3965 else
3966 {
3967 ADDR_ASSERT(allowedSwSet.sw_Z);
3968 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3969 }
3970 }
3971
3972 // Swizzle type should be determined.
3973 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3974 }
3975
3976 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
3977 // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3978 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3979 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3980 }
3981
3982 returnCode = ADDR_OK;
3983 }
3984 else
3985 {
3986 // Invalid combination...
3987 ADDR_ASSERT_ALWAYS();
3988 }
3989 }
3990 else
3991 {
3992 // Invalid combination...
3993 ADDR_ASSERT_ALWAYS();
3994 }
3995
3996 return returnCode;
3997 }
3998
3999 /**
4000 ************************************************************************************************************************
4001 * Gfx9Lib::ComputeStereoInfo
4002 *
4003 * @brief
4004 * Compute height alignment and right eye pipeBankXor for stereo surface
4005 *
4006 * @return
4007 * Error code
4008 *
4009 ************************************************************************************************************************
4010 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut,UINT_32 * pHeightAlign) const4011 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
4012 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
4013 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
4014 UINT_32* pHeightAlign
4015 ) const
4016 {
4017 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4018
4019 UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
4020
4021 if (eqIndex < m_numEquations)
4022 {
4023 if (IsXor(pIn->swizzleMode))
4024 {
4025 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4026 const UINT_32 numPipeBits = GetPipeXorBits(blkSizeLog2);
4027 const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2);
4028 const UINT_32 bppLog2 = Log2(pIn->bpp >> 3);
4029 const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
4030 const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
4031
4032 ADDR_ASSERT(maxYCoordBlock256 ==
4033 GetMaxValidChannelIndex(&pEqToCheck->addr[0], Log2Size256, 1));
4034
4035 const UINT_32 maxYCoordInBaseEquation =
4036 (blkSizeLog2 - Log2Size256) / 2 + maxYCoordBlock256;
4037
4038 ADDR_ASSERT(maxYCoordInBaseEquation ==
4039 GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
4040
4041 const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
4042
4043 ADDR_ASSERT(maxYCoordInPipeXor ==
4044 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
4045
4046 const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
4047 0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
4048
4049 ADDR_ASSERT(maxYCoordInBankXor ==
4050 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
4051
4052 const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
4053
4054 if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
4055 {
4056 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
4057
4058 if (pOut->pStereoInfo != NULL)
4059 {
4060 pOut->pStereoInfo->rightSwizzle = 0;
4061
4062 if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
4063 {
4064 if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
4065 {
4066 pOut->pStereoInfo->rightSwizzle |= (1u << 1);
4067 }
4068
4069 if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
4070 {
4071 pOut->pStereoInfo->rightSwizzle |=
4072 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
4073 }
4074
4075 ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
4076 GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
4077 numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
4078 }
4079 }
4080 }
4081 }
4082 }
4083 else
4084 {
4085 ADDR_ASSERT_ALWAYS();
4086 returnCode = ADDR_ERROR;
4087 }
4088
4089 return returnCode;
4090 }
4091
4092 /**
4093 ************************************************************************************************************************
4094 * Gfx9Lib::HwlComputeSurfaceInfoTiled
4095 *
4096 * @brief
4097 * Internal function to calculate alignment for tiled surface
4098 *
4099 * @return
4100 * ADDR_E_RETURNCODE
4101 ************************************************************************************************************************
4102 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4103 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
4104 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4105 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4106 ) const
4107 {
4108 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
4109 &pOut->blockHeight,
4110 &pOut->blockSlices,
4111 pIn->bpp,
4112 pIn->numFrags,
4113 pIn->resourceType,
4114 pIn->swizzleMode);
4115
4116 if (returnCode == ADDR_OK)
4117 {
4118 UINT_32 pitchAlignInElement = pOut->blockWidth;
4119
4120 if ((IsTex2d(pIn->resourceType) == TRUE) &&
4121 (pIn->flags.display || pIn->flags.rotated) &&
4122 (pIn->numMipLevels <= 1) &&
4123 (pIn->numSamples <= 1) &&
4124 (pIn->numFrags <= 1))
4125 {
4126 // Display engine needs pitch align to be at least 32 pixels.
4127 pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
4128 }
4129
4130 pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4131
4132 if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
4133 {
4134 if ((pIn->pitchInElement % pitchAlignInElement) != 0)
4135 {
4136 returnCode = ADDR_INVALIDPARAMS;
4137 }
4138 else if (pIn->pitchInElement < pOut->pitch)
4139 {
4140 returnCode = ADDR_INVALIDPARAMS;
4141 }
4142 else
4143 {
4144 pOut->pitch = pIn->pitchInElement;
4145 }
4146 }
4147
4148 UINT_32 heightAlign = 0;
4149
4150 if (pIn->flags.qbStereo)
4151 {
4152 returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
4153 }
4154
4155 if (returnCode == ADDR_OK)
4156 {
4157 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
4158
4159 if (heightAlign > 1)
4160 {
4161 pOut->height = PowTwoAlign(pOut->height, heightAlign);
4162 }
4163
4164 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
4165
4166 pOut->epitchIsHeight = FALSE;
4167 pOut->mipChainInTail = FALSE;
4168 pOut->firstMipIdInTail = pIn->numMipLevels;
4169
4170 pOut->mipChainPitch = pOut->pitch;
4171 pOut->mipChainHeight = pOut->height;
4172 pOut->mipChainSlice = pOut->numSlices;
4173
4174 if (pIn->numMipLevels > 1)
4175 {
4176 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
4177 pIn->swizzleMode,
4178 pIn->bpp,
4179 pIn->width,
4180 pIn->height,
4181 pIn->numSlices,
4182 pOut->blockWidth,
4183 pOut->blockHeight,
4184 pOut->blockSlices,
4185 pIn->numMipLevels,
4186 pOut->pMipInfo);
4187
4188 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4189
4190 if (endingMipId == 0)
4191 {
4192 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4193 pIn->swizzleMode,
4194 pOut->blockWidth,
4195 pOut->blockHeight,
4196 pOut->blockSlices);
4197
4198 pOut->epitchIsHeight = TRUE;
4199 pOut->pitch = tailMaxDim.w;
4200 pOut->height = tailMaxDim.h;
4201 pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4202 tailMaxDim.d : pIn->numSlices;
4203 pOut->mipChainInTail = TRUE;
4204 }
4205 else
4206 {
4207 UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
4208 UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4209
4210 AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4211 pIn->swizzleMode,
4212 mip0WidthInBlk,
4213 mip0HeightInBlk,
4214 pOut->numSlices / pOut->blockSlices);
4215 if (majorMode == ADDR_MAJOR_Y)
4216 {
4217 UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4218
4219 if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4220 {
4221 mip1WidthInBlk++;
4222 }
4223
4224 pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4225
4226 pOut->epitchIsHeight = FALSE;
4227 }
4228 else
4229 {
4230 UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4231
4232 if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4233 {
4234 mip1HeightInBlk++;
4235 }
4236
4237 pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4238
4239 pOut->epitchIsHeight = TRUE;
4240 }
4241 }
4242
4243 if (pOut->pMipInfo != NULL)
4244 {
4245 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4246
4247 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4248 {
4249 Dim3d mipStartPos = {0};
4250 UINT_32 mipTailOffsetInBytes = 0;
4251
4252 mipStartPos = GetMipStartPos(pIn->resourceType,
4253 pIn->swizzleMode,
4254 pOut->pitch,
4255 pOut->height,
4256 pOut->numSlices,
4257 pOut->blockWidth,
4258 pOut->blockHeight,
4259 pOut->blockSlices,
4260 i,
4261 elementBytesLog2,
4262 &mipTailOffsetInBytes);
4263
4264 UINT_32 pitchInBlock =
4265 pOut->mipChainPitch / pOut->blockWidth;
4266 UINT_32 sliceInBlock =
4267 (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4268 UINT_64 blockIndex =
4269 mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4270 UINT_64 macroBlockOffset =
4271 blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4272
4273 pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4274 pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
4275 }
4276 }
4277 }
4278 else if (pOut->pMipInfo != NULL)
4279 {
4280 pOut->pMipInfo[0].pitch = pOut->pitch;
4281 pOut->pMipInfo[0].height = pOut->height;
4282 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4283 pOut->pMipInfo[0].offset = 0;
4284 }
4285
4286 pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4287 (pIn->bpp >> 3) * pIn->numFrags;
4288 pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
4289 pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4290
4291 if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
4292 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
4293 (pIn->flags.texture == TRUE) &&
4294 (pIn->flags.noMetadata == FALSE) &&
4295 (pIn->flags.metaPipeUnaligned == FALSE))
4296 {
4297 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4298 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4299 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4300 // them, which may cause invalid metadata to be fetched.
4301 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
4302 }
4303
4304 if (pIn->flags.prt)
4305 {
4306 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4307 }
4308 }
4309 }
4310
4311 return returnCode;
4312 }
4313
4314 /**
4315 ************************************************************************************************************************
4316 * Gfx9Lib::HwlComputeSurfaceInfoLinear
4317 *
4318 * @brief
4319 * Internal function to calculate alignment for linear surface
4320 *
4321 * @return
4322 * ADDR_E_RETURNCODE
4323 ************************************************************************************************************************
4324 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4325 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4326 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4327 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4328 ) const
4329 {
4330 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4331 UINT_32 pitch = 0;
4332 UINT_32 actualHeight = 0;
4333 UINT_32 elementBytes = pIn->bpp >> 3;
4334 const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256;
4335
4336 if (IsTex1d(pIn->resourceType))
4337 {
4338 if (pIn->height > 1)
4339 {
4340 returnCode = ADDR_INVALIDPARAMS;
4341 }
4342 else
4343 {
4344 const UINT_32 pitchAlignInElement = alignment / elementBytes;
4345
4346 pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4347 actualHeight = pIn->numMipLevels;
4348
4349 if (pIn->flags.prt == FALSE)
4350 {
4351 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4352 &pitch, &actualHeight);
4353 }
4354
4355 if (returnCode == ADDR_OK)
4356 {
4357 if (pOut->pMipInfo != NULL)
4358 {
4359 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4360 {
4361 pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4362 pOut->pMipInfo[i].pitch = pitch;
4363 pOut->pMipInfo[i].height = 1;
4364 pOut->pMipInfo[i].depth = 1;
4365 }
4366 }
4367 }
4368 }
4369 }
4370 else
4371 {
4372 returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4373 }
4374
4375 if ((pitch == 0) || (actualHeight == 0))
4376 {
4377 returnCode = ADDR_INVALIDPARAMS;
4378 }
4379
4380 if (returnCode == ADDR_OK)
4381 {
4382 pOut->pitch = pitch;
4383 pOut->height = pIn->height;
4384 pOut->numSlices = pIn->numSlices;
4385 pOut->mipChainPitch = pitch;
4386 pOut->mipChainHeight = actualHeight;
4387 pOut->mipChainSlice = pOut->numSlices;
4388 pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4389 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4390 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
4391 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4392 pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4393 pOut->blockHeight = 1;
4394 pOut->blockSlices = 1;
4395 }
4396
4397 // Post calculation validate
4398 ADDR_ASSERT(pOut->sliceSize > 0);
4399
4400 return returnCode;
4401 }
4402
4403 /**
4404 ************************************************************************************************************************
4405 * Gfx9Lib::GetMipChainInfo
4406 *
4407 * @brief
4408 * Internal function to get out information about mip chain
4409 *
4410 * @return
4411 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4412 ************************************************************************************************************************
4413 */
GetMipChainInfo(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 bpp,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 numMipLevel,ADDR2_MIP_INFO * pMipInfo) const4414 UINT_32 Gfx9Lib::GetMipChainInfo(
4415 AddrResourceType resourceType,
4416 AddrSwizzleMode swizzleMode,
4417 UINT_32 bpp,
4418 UINT_32 mip0Width,
4419 UINT_32 mip0Height,
4420 UINT_32 mip0Depth,
4421 UINT_32 blockWidth,
4422 UINT_32 blockHeight,
4423 UINT_32 blockDepth,
4424 UINT_32 numMipLevel,
4425 ADDR2_MIP_INFO* pMipInfo) const
4426 {
4427 const Dim3d tailMaxDim =
4428 GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4429
4430 UINT_32 mipPitch = mip0Width;
4431 UINT_32 mipHeight = mip0Height;
4432 UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
4433 UINT_32 offset = 0;
4434 UINT_32 firstMipIdInTail = numMipLevel;
4435 BOOL_32 inTail = FALSE;
4436 BOOL_32 finalDim = FALSE;
4437 BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
4438 BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
4439
4440 for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4441 {
4442 if (inTail)
4443 {
4444 if (finalDim == FALSE)
4445 {
4446 UINT_32 mipSize;
4447
4448 if (is3dThick)
4449 {
4450 mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4451 }
4452 else
4453 {
4454 mipSize = mipPitch * mipHeight * (bpp >> 3);
4455 }
4456
4457 if (mipSize <= 256)
4458 {
4459 UINT_32 index = Log2(bpp >> 3);
4460
4461 if (is3dThick)
4462 {
4463 mipPitch = Block256_3dZ[index].w;
4464 mipHeight = Block256_3dZ[index].h;
4465 mipDepth = Block256_3dZ[index].d;
4466 }
4467 else
4468 {
4469 mipPitch = Block256_2d[index].w;
4470 mipHeight = Block256_2d[index].h;
4471 }
4472
4473 finalDim = TRUE;
4474 }
4475 }
4476 }
4477 else
4478 {
4479 inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4480 mipPitch, mipHeight, mipDepth);
4481
4482 if (inTail)
4483 {
4484 firstMipIdInTail = mipId;
4485 mipPitch = tailMaxDim.w;
4486 mipHeight = tailMaxDim.h;
4487
4488 if (is3dThick)
4489 {
4490 mipDepth = tailMaxDim.d;
4491 }
4492 }
4493 else
4494 {
4495 mipPitch = PowTwoAlign(mipPitch, blockWidth);
4496 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4497
4498 if (is3dThick)
4499 {
4500 mipDepth = PowTwoAlign(mipDepth, blockDepth);
4501 }
4502 }
4503 }
4504
4505 if (pMipInfo != NULL)
4506 {
4507 pMipInfo[mipId].pitch = mipPitch;
4508 pMipInfo[mipId].height = mipHeight;
4509 pMipInfo[mipId].depth = mipDepth;
4510 pMipInfo[mipId].offset = offset;
4511 }
4512
4513 offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4514
4515 if (finalDim)
4516 {
4517 if (is3dThin)
4518 {
4519 mipDepth = Max(mipDepth >> 1, 1u);
4520 }
4521 }
4522 else
4523 {
4524 mipPitch = Max(mipPitch >> 1, 1u);
4525 mipHeight = Max(mipHeight >> 1, 1u);
4526
4527 if (is3dThick || is3dThin)
4528 {
4529 mipDepth = Max(mipDepth >> 1, 1u);
4530 }
4531 }
4532 }
4533
4534 return firstMipIdInTail;
4535 }
4536
4537 /**
4538 ************************************************************************************************************************
4539 * Gfx9Lib::GetMetaMiptailInfo
4540 *
4541 * @brief
4542 * Get mip tail coordinate information.
4543 *
4544 * @return
4545 * N/A
4546 ************************************************************************************************************************
4547 */
GetMetaMiptailInfo(ADDR2_META_MIP_INFO * pInfo,Dim3d mipCoord,UINT_32 numMipInTail,Dim3d * pMetaBlkDim) const4548 VOID Gfx9Lib::GetMetaMiptailInfo(
4549 ADDR2_META_MIP_INFO* pInfo, ///< [out] output structure to store per mip coord
4550 Dim3d mipCoord, ///< [in] mip tail base coord
4551 UINT_32 numMipInTail, ///< [in] number of mips in tail
4552 Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth
4553 ) const
4554 {
4555 BOOL_32 isThick = (pMetaBlkDim->d > 1);
4556 UINT_32 mipWidth = pMetaBlkDim->w;
4557 UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4558 UINT_32 mipDepth = pMetaBlkDim->d;
4559 UINT_32 minInc;
4560
4561 if (isThick)
4562 {
4563 minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4564 }
4565 else if (pMetaBlkDim->h >= 1024)
4566 {
4567 minInc = 256;
4568 }
4569 else if (pMetaBlkDim->h == 512)
4570 {
4571 minInc = 128;
4572 }
4573 else
4574 {
4575 minInc = 64;
4576 }
4577
4578 UINT_32 blk32MipId = 0xFFFFFFFF;
4579
4580 for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4581 {
4582 pInfo[mip].inMiptail = TRUE;
4583 pInfo[mip].startX = mipCoord.w;
4584 pInfo[mip].startY = mipCoord.h;
4585 pInfo[mip].startZ = mipCoord.d;
4586 pInfo[mip].width = mipWidth;
4587 pInfo[mip].height = mipHeight;
4588 pInfo[mip].depth = mipDepth;
4589
4590 if (mipWidth <= 32)
4591 {
4592 if (blk32MipId == 0xFFFFFFFF)
4593 {
4594 blk32MipId = mip;
4595 }
4596
4597 mipCoord.w = pInfo[blk32MipId].startX;
4598 mipCoord.h = pInfo[blk32MipId].startY;
4599 mipCoord.d = pInfo[blk32MipId].startZ;
4600
4601 switch (mip - blk32MipId)
4602 {
4603 case 0:
4604 mipCoord.w += 32; // 16x16
4605 break;
4606 case 1:
4607 mipCoord.h += 32; // 8x8
4608 break;
4609 case 2:
4610 mipCoord.h += 32; // 4x4
4611 mipCoord.w += 16;
4612 break;
4613 case 3:
4614 mipCoord.h += 32; // 2x2
4615 mipCoord.w += 32;
4616 break;
4617 case 4:
4618 mipCoord.h += 32; // 1x1
4619 mipCoord.w += 48;
4620 break;
4621 // The following are for BC/ASTC formats
4622 case 5:
4623 mipCoord.h += 48; // 1/2 x 1/2
4624 break;
4625 case 6:
4626 mipCoord.h += 48; // 1/4 x 1/4
4627 mipCoord.w += 16;
4628 break;
4629 case 7:
4630 mipCoord.h += 48; // 1/8 x 1/8
4631 mipCoord.w += 32;
4632 break;
4633 case 8:
4634 mipCoord.h += 48; // 1/16 x 1/16
4635 mipCoord.w += 48;
4636 break;
4637 default:
4638 ADDR_ASSERT_ALWAYS();
4639 break;
4640 }
4641
4642 mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4643 mipHeight = mipWidth;
4644
4645 if (isThick)
4646 {
4647 mipDepth = mipWidth;
4648 }
4649 }
4650 else
4651 {
4652 if (mipWidth <= minInc)
4653 {
4654 // if we're below the minimal increment...
4655 if (isThick)
4656 {
4657 // For 3d, just go in z direction
4658 mipCoord.d += mipDepth;
4659 }
4660 else
4661 {
4662 // For 2d, first go across, then down
4663 if ((mipWidth * 2) == minInc)
4664 {
4665 // if we're 2 mips below, that's when we go back in x, and down in y
4666 mipCoord.w -= minInc;
4667 mipCoord.h += minInc;
4668 }
4669 else
4670 {
4671 // otherwise, just go across in x
4672 mipCoord.w += minInc;
4673 }
4674 }
4675 }
4676 else
4677 {
4678 // On even mip, go down, otherwise, go across
4679 if (mip & 1)
4680 {
4681 mipCoord.w += mipWidth;
4682 }
4683 else
4684 {
4685 mipCoord.h += mipHeight;
4686 }
4687 }
4688 // Divide the width by 2
4689 mipWidth >>= 1;
4690 // After the first mip in tail, the mip is always a square
4691 mipHeight = mipWidth;
4692 // ...or for 3d, a cube
4693 if (isThick)
4694 {
4695 mipDepth = mipWidth;
4696 }
4697 }
4698 }
4699 }
4700
4701 /**
4702 ************************************************************************************************************************
4703 * Gfx9Lib::GetMipStartPos
4704 *
4705 * @brief
4706 * Internal function to get out information about mip logical start position
4707 *
4708 * @return
4709 * logical start position in macro block width/heith/depth of one mip level within one slice
4710 ************************************************************************************************************************
4711 */
GetMipStartPos(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 width,UINT_32 height,UINT_32 depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 mipId,UINT_32 log2ElementBytes,UINT_32 * pMipTailBytesOffset) const4712 Dim3d Gfx9Lib::GetMipStartPos(
4713 AddrResourceType resourceType,
4714 AddrSwizzleMode swizzleMode,
4715 UINT_32 width,
4716 UINT_32 height,
4717 UINT_32 depth,
4718 UINT_32 blockWidth,
4719 UINT_32 blockHeight,
4720 UINT_32 blockDepth,
4721 UINT_32 mipId,
4722 UINT_32 log2ElementBytes,
4723 UINT_32* pMipTailBytesOffset) const
4724 {
4725 Dim3d mipStartPos = {0};
4726 const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4727
4728 // Report mip in tail if Mip0 is already in mip tail
4729 BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4730 UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
4731 UINT_32 mipIndexInTail = mipId;
4732
4733 if (inMipTail == FALSE)
4734 {
4735 // Mip 0 dimension, unit in block
4736 UINT_32 mipWidthInBlk = width / blockWidth;
4737 UINT_32 mipHeightInBlk = height / blockHeight;
4738 UINT_32 mipDepthInBlk = depth / blockDepth;
4739 AddrMajorMode majorMode = GetMajorMode(resourceType,
4740 swizzleMode,
4741 mipWidthInBlk,
4742 mipHeightInBlk,
4743 mipDepthInBlk);
4744
4745 UINT_32 endingMip = mipId + 1;
4746
4747 for (UINT_32 i = 1; i <= mipId; i++)
4748 {
4749 if ((i == 1) || (i == 3))
4750 {
4751 if (majorMode == ADDR_MAJOR_Y)
4752 {
4753 mipStartPos.w += mipWidthInBlk;
4754 }
4755 else
4756 {
4757 mipStartPos.h += mipHeightInBlk;
4758 }
4759 }
4760 else
4761 {
4762 if (majorMode == ADDR_MAJOR_X)
4763 {
4764 mipStartPos.w += mipWidthInBlk;
4765 }
4766 else if (majorMode == ADDR_MAJOR_Y)
4767 {
4768 mipStartPos.h += mipHeightInBlk;
4769 }
4770 else
4771 {
4772 mipStartPos.d += mipDepthInBlk;
4773 }
4774 }
4775
4776 BOOL_32 inTail = FALSE;
4777
4778 if (IsThick(resourceType, swizzleMode))
4779 {
4780 UINT_32 dim = log2BlkSize % 3;
4781
4782 if (dim == 0)
4783 {
4784 inTail =
4785 (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4786 }
4787 else if (dim == 1)
4788 {
4789 inTail =
4790 (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4791 }
4792 else
4793 {
4794 inTail =
4795 (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4796 }
4797 }
4798 else
4799 {
4800 if (log2BlkSize & 1)
4801 {
4802 inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4803 }
4804 else
4805 {
4806 inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4807 }
4808 }
4809
4810 if (inTail)
4811 {
4812 endingMip = i;
4813 break;
4814 }
4815
4816 mipWidthInBlk = RoundHalf(mipWidthInBlk);
4817 mipHeightInBlk = RoundHalf(mipHeightInBlk);
4818 mipDepthInBlk = RoundHalf(mipDepthInBlk);
4819 }
4820
4821 if (mipId >= endingMip)
4822 {
4823 inMipTail = TRUE;
4824 mipIndexInTail = mipId - endingMip;
4825 }
4826 }
4827
4828 if (inMipTail)
4829 {
4830 UINT_32 index = mipIndexInTail + MaxMacroBits - log2BlkSize;
4831 ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4832 *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4833 }
4834
4835 return mipStartPos;
4836 }
4837
4838 /**
4839 ************************************************************************************************************************
4840 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4841 *
4842 * @brief
4843 * Internal function to calculate address from coord for tiled swizzle surface
4844 *
4845 * @return
4846 * ADDR_E_RETURNCODE
4847 ************************************************************************************************************************
4848 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4849 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4850 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4851 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4852 ) const
4853 {
4854 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4855 localIn.swizzleMode = pIn->swizzleMode;
4856 localIn.flags = pIn->flags;
4857 localIn.resourceType = pIn->resourceType;
4858 localIn.bpp = pIn->bpp;
4859 localIn.width = Max(pIn->unalignedWidth, 1u);
4860 localIn.height = Max(pIn->unalignedHeight, 1u);
4861 localIn.numSlices = Max(pIn->numSlices, 1u);
4862 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4863 localIn.numSamples = Max(pIn->numSamples, 1u);
4864 localIn.numFrags = Max(pIn->numFrags, 1u);
4865 if (localIn.numMipLevels <= 1)
4866 {
4867 localIn.pitchInElement = pIn->pitchInElement;
4868 }
4869
4870 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4871 ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4872
4873 BOOL_32 valid = (returnCode == ADDR_OK) &&
4874 (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4875 IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4876 ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4877
4878 if (valid)
4879 {
4880 UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
4881 Dim3d mipStartPos = {0};
4882 UINT_32 mipTailBytesOffset = 0;
4883
4884 if (pIn->numMipLevels > 1)
4885 {
4886 // Mip-map chain cannot be MSAA surface
4887 ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4888
4889 mipStartPos = GetMipStartPos(pIn->resourceType,
4890 pIn->swizzleMode,
4891 localOut.pitch,
4892 localOut.height,
4893 localOut.numSlices,
4894 localOut.blockWidth,
4895 localOut.blockHeight,
4896 localOut.blockSlices,
4897 pIn->mipId,
4898 log2ElementBytes,
4899 &mipTailBytesOffset);
4900 }
4901
4902 UINT_32 interleaveOffset = 0;
4903 UINT_32 pipeBits = 0;
4904 UINT_32 pipeXor = 0;
4905 UINT_32 bankBits = 0;
4906 UINT_32 bankXor = 0;
4907
4908 if (IsThin(pIn->resourceType, pIn->swizzleMode))
4909 {
4910 UINT_32 blockOffset = 0;
4911 UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4912
4913 if (IsZOrderSwizzle(pIn->swizzleMode))
4914 {
4915 // Morton generation
4916 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4917 {
4918 UINT_32 totalLowBits = 6 - log2ElementBytes;
4919 UINT_32 mortBits = totalLowBits / 2;
4920 UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4921 // Are 9 bits enough?
4922 UINT_32 highBitsValue =
4923 MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4924 blockOffset = lowBitsValue | highBitsValue;
4925 ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4926 }
4927 else
4928 {
4929 blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4930 }
4931
4932 // Fill LSBs with sample bits
4933 if (pIn->numSamples > 1)
4934 {
4935 blockOffset *= pIn->numSamples;
4936 blockOffset |= pIn->sample;
4937 }
4938
4939 // Shift according to BytesPP
4940 blockOffset <<= log2ElementBytes;
4941 }
4942 else
4943 {
4944 // Micro block offset
4945 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4946 blockOffset = microBlockOffset;
4947
4948 // Micro block dimension
4949 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4950 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4951 // Morton generation, does 12 bit enough?
4952 blockOffset |=
4953 MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4954
4955 // Sample bits start location
4956 UINT_32 sampleStart = log2BlkSize - Log2(pIn->numSamples);
4957 // Join sample bits information to the highest Macro block bits
4958 if (IsNonPrtXor(pIn->swizzleMode))
4959 {
4960 // Non-prt-Xor : xor highest Macro block bits with sample bits
4961 blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4962 }
4963 else
4964 {
4965 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4966 // after this op, the blockOffset only contains log2 Macro block size bits
4967 blockOffset %= (1 << sampleStart);
4968 blockOffset |= (pIn->sample << sampleStart);
4969 ADDR_ASSERT((blockOffset >> log2BlkSize) == 0);
4970 }
4971 }
4972
4973 if (IsXor(pIn->swizzleMode))
4974 {
4975 // Mask off bits above Macro block bits to keep page synonyms working for prt
4976 if (IsPrt(pIn->swizzleMode))
4977 {
4978 blockOffset &= ((1 << log2BlkSize) - 1);
4979 }
4980
4981 // Preserve offset inside pipe interleave
4982 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4983 blockOffset >>= m_pipeInterleaveLog2;
4984
4985 // Pipe/Se xor bits
4986 pipeBits = GetPipeXorBits(log2BlkSize);
4987 // Pipe xor
4988 pipeXor = FoldXor2d(blockOffset, pipeBits);
4989 blockOffset >>= pipeBits;
4990
4991 // Bank xor bits
4992 bankBits = GetBankXorBits(log2BlkSize);
4993 // Bank Xor
4994 bankXor = FoldXor2d(blockOffset, bankBits);
4995 blockOffset >>= bankBits;
4996
4997 // Put all the part back together
4998 blockOffset <<= bankBits;
4999 blockOffset |= bankXor;
5000 blockOffset <<= pipeBits;
5001 blockOffset |= pipeXor;
5002 blockOffset <<= m_pipeInterleaveLog2;
5003 blockOffset |= interleaveOffset;
5004 }
5005
5006 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
5007 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
5008
5009 blockOffset |= mipTailBytesOffset;
5010
5011 if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
5012 {
5013 // Apply slice xor if not MSAA/PRT
5014 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
5015 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
5016 (m_pipeInterleaveLog2 + pipeBits));
5017 }
5018
5019 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
5020 bankBits, pipeBits, &blockOffset);
5021
5022 blockOffset %= (1 << log2BlkSize);
5023
5024 UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
5025 UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
5026 UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
5027 UINT_64 macroBlockIndex =
5028 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
5029 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
5030 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
5031
5032 pOut->addr = blockOffset | (macroBlockIndex << log2BlkSize);
5033 }
5034 else
5035 {
5036 UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
5037
5038 Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
5039
5040 UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
5041 (pIn->y / microBlockDim.h),
5042 (pIn->slice / microBlockDim.d),
5043 8);
5044
5045 blockOffset <<= 10;
5046 blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
5047
5048 if (IsXor(pIn->swizzleMode))
5049 {
5050 // Mask off bits above Macro block bits to keep page synonyms working for prt
5051 if (IsPrt(pIn->swizzleMode))
5052 {
5053 blockOffset &= ((1 << log2BlkSize) - 1);
5054 }
5055
5056 // Preserve offset inside pipe interleave
5057 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
5058 blockOffset >>= m_pipeInterleaveLog2;
5059
5060 // Pipe/Se xor bits
5061 pipeBits = GetPipeXorBits(log2BlkSize);
5062 // Pipe xor
5063 pipeXor = FoldXor3d(blockOffset, pipeBits);
5064 blockOffset >>= pipeBits;
5065
5066 // Bank xor bits
5067 bankBits = GetBankXorBits(log2BlkSize);
5068 // Bank Xor
5069 bankXor = FoldXor3d(blockOffset, bankBits);
5070 blockOffset >>= bankBits;
5071
5072 // Put all the part back together
5073 blockOffset <<= bankBits;
5074 blockOffset |= bankXor;
5075 blockOffset <<= pipeBits;
5076 blockOffset |= pipeXor;
5077 blockOffset <<= m_pipeInterleaveLog2;
5078 blockOffset |= interleaveOffset;
5079 }
5080
5081 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
5082 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
5083 blockOffset |= mipTailBytesOffset;
5084
5085 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
5086 bankBits, pipeBits, &blockOffset);
5087
5088 blockOffset %= (1 << log2BlkSize);
5089
5090 UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w;
5091 UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
5092 UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
5093
5094 UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
5095 UINT_32 sliceSizeInBlock =
5096 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
5097 UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
5098
5099 pOut->addr = blockOffset | (blockIndex << log2BlkSize);
5100 }
5101 }
5102 else
5103 {
5104 returnCode = ADDR_INVALIDPARAMS;
5105 }
5106
5107 return returnCode;
5108 }
5109
5110 /**
5111 ************************************************************************************************************************
5112 * Gfx9Lib::ComputeSurfaceInfoLinear
5113 *
5114 * @brief
5115 * Internal function to calculate padding for linear swizzle 2D/3D surface
5116 *
5117 * @return
5118 * N/A
5119 ************************************************************************************************************************
5120 */
ComputeSurfaceLinearPadding(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pMipmap0PaddedWidth,UINT_32 * pSlice0PaddedHeight,ADDR2_MIP_INFO * pMipInfo) const5121 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
5122 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture
5123 UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element
5124 UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW
5125 ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information
5126 ) const
5127 {
5128 ADDR_E_RETURNCODE returnCode = ADDR_OK;
5129
5130 UINT_32 elementBytes = pIn->bpp >> 3;
5131 UINT_32 pitchAlignInElement = 0;
5132
5133 if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
5134 {
5135 ADDR_ASSERT(pIn->numMipLevels <= 1);
5136 ADDR_ASSERT(pIn->numSlices <= 1);
5137 pitchAlignInElement = 1;
5138 }
5139 else
5140 {
5141 pitchAlignInElement = (256 / elementBytes);
5142 }
5143
5144 UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement);
5145 UINT_32 slice0PaddedHeight = pIn->height;
5146
5147 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
5148 &mipChainWidth, &slice0PaddedHeight);
5149
5150 if (returnCode == ADDR_OK)
5151 {
5152 UINT_32 mipChainHeight = 0;
5153 UINT_32 mipHeight = pIn->height;
5154 UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
5155
5156 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
5157 {
5158 if (pMipInfo != NULL)
5159 {
5160 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
5161 pMipInfo[i].pitch = mipChainWidth;
5162 pMipInfo[i].height = mipHeight;
5163 pMipInfo[i].depth = mipDepth;
5164 }
5165
5166 mipChainHeight += mipHeight;
5167 mipHeight = RoundHalf(mipHeight);
5168 mipHeight = Max(mipHeight, 1u);
5169 }
5170
5171 *pMipmap0PaddedWidth = mipChainWidth;
5172 *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
5173 }
5174
5175 return returnCode;
5176 }
5177
5178 /**
5179 ************************************************************************************************************************
5180 * Gfx9Lib::ComputeThinBlockDimension
5181 *
5182 * @brief
5183 * Internal function to get thin block width/height/depth in element from surface input params.
5184 *
5185 * @return
5186 * N/A
5187 ************************************************************************************************************************
5188 */
ComputeThinBlockDimension(UINT_32 * pWidth,UINT_32 * pHeight,UINT_32 * pDepth,UINT_32 bpp,UINT_32 numSamples,AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const5189 VOID Gfx9Lib::ComputeThinBlockDimension(
5190 UINT_32* pWidth,
5191 UINT_32* pHeight,
5192 UINT_32* pDepth,
5193 UINT_32 bpp,
5194 UINT_32 numSamples,
5195 AddrResourceType resourceType,
5196 AddrSwizzleMode swizzleMode) const
5197 {
5198 ADDR_ASSERT(IsThin(resourceType, swizzleMode));
5199
5200 const UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
5201 const UINT_32 eleBytes = bpp >> 3;
5202 const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
5203 const UINT_32 log2blkSizeIn256B = log2BlkSize - 8;
5204 const UINT_32 widthAmp = log2blkSizeIn256B / 2;
5205 const UINT_32 heightAmp = log2blkSizeIn256B - widthAmp;
5206
5207 ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
5208
5209 *pWidth = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
5210 *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
5211 *pDepth = 1;
5212
5213 if (numSamples > 1)
5214 {
5215 const UINT_32 log2sample = Log2(numSamples);
5216 const UINT_32 q = log2sample >> 1;
5217 const UINT_32 r = log2sample & 1;
5218
5219 if (log2BlkSize & 1)
5220 {
5221 *pWidth >>= q;
5222 *pHeight >>= (q + r);
5223 }
5224 else
5225 {
5226 *pWidth >>= (q + r);
5227 *pHeight >>= q;
5228 }
5229 }
5230 }
5231
5232 } // V2
5233 } // Addr
5234