1 /*
2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx10addrlib.cpp
30 * @brief Contain the implementation for the Gfx10Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx10addrlib.h"
35 #include "gfx10_gb_reg.h"
36
37 #include "amdgpu_asic_addr.h"
38
39 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
40 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
41
42 namespace Addr
43 {
44 /**
45 ************************************************************************************************************************
46 * Gfx10HwlInit
47 *
48 * @brief
49 * Creates an Gfx10Lib object.
50 *
51 * @return
52 * Returns an Gfx10Lib object pointer.
53 ************************************************************************************************************************
54 */
Gfx10HwlInit(const Client * pClient)55 Addr::Lib* Gfx10HwlInit(const Client* pClient)
56 {
57 return V2::Gfx10Lib::CreateObj(pClient);
58 }
59
60 namespace V2
61 {
62
63 ////////////////////////////////////////////////////////////////////////////////////////////////////
64 // Static Const Member
65 ////////////////////////////////////////////////////////////////////////////////////////////////////
66
67 const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
68 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
69 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
70 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
71 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D
72 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
73
74 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
75 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
76 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D
77 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
78
79 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
80 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
81 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D
82 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
83
84 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
85 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
86 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
87 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
88
89 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
90 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T
91 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T
92 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
93
94 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
95 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_X
96 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_X
97 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
98
99 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X
100 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X
101 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X
102 {0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_64KB_R_X
103
104 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_Z_X
105 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
106 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
107 {0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_VAR_R_X
108 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
109 };
110
111 const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
112
113 const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
114 const Dim3d Gfx10Lib::Block4K_Log2_3d[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
115
116 /**
117 ************************************************************************************************************************
118 * Gfx10Lib::Gfx10Lib
119 *
120 * @brief
121 * Constructor
122 *
123 ************************************************************************************************************************
124 */
Gfx10Lib(const Client * pClient)125 Gfx10Lib::Gfx10Lib(const Client* pClient)
126 :
127 Lib(pClient),
128 m_numPkrLog2(0),
129 m_numSaLog2(0),
130 m_colorBaseIndex(0),
131 m_xmaskBaseIndex(0),
132 m_dccBaseIndex(0)
133 {
134 memset(&m_settings, 0, sizeof(m_settings));
135 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
136 }
137
138 /**
139 ************************************************************************************************************************
140 * Gfx10Lib::~Gfx10Lib
141 *
142 * @brief
143 * Destructor
144 ************************************************************************************************************************
145 */
~Gfx10Lib()146 Gfx10Lib::~Gfx10Lib()
147 {
148 }
149
150 /**
151 ************************************************************************************************************************
152 * Gfx10Lib::HwlComputeHtileInfo
153 *
154 * @brief
155 * Interface function stub of AddrComputeHtilenfo
156 *
157 * @return
158 * ADDR_E_RETURNCODE
159 ************************************************************************************************************************
160 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const161 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
162 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
163 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
164 ) const
165 {
166 ADDR_E_RETURNCODE ret = ADDR_OK;
167
168 if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
169 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) ||
170 (pIn->hTileFlags.pipeAligned != TRUE))
171 {
172 ret = ADDR_INVALIDPARAMS;
173 }
174 else
175 {
176 Dim3d metaBlk = {};
177 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
178 ADDR_RSRC_TEX_2D,
179 pIn->swizzleMode,
180 0,
181 0,
182 TRUE,
183 &metaBlk);
184
185 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
186 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
187 pOut->baseAlign = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
188 pOut->metaBlkWidth = metaBlk.w;
189 pOut->metaBlkHeight = metaBlk.h;
190
191 if (pIn->numMipLevels > 1)
192 {
193 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
194
195 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
196
197 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
198 {
199 UINT_32 mipWidth, mipHeight;
200
201 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
202
203 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
204 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
205
206 const UINT_32 pitchInM = mipWidth / metaBlk.w;
207 const UINT_32 heightInM = mipHeight / metaBlk.h;
208 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
209
210 if (pOut->pMipInfo != NULL)
211 {
212 pOut->pMipInfo[i].inMiptail = FALSE;
213 pOut->pMipInfo[i].offset = offset;
214 pOut->pMipInfo[i].sliceSize = mipSliceSize;
215 }
216
217 offset += mipSliceSize;
218 }
219
220 pOut->sliceSize = offset;
221 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
222 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
223
224 if (pOut->pMipInfo != NULL)
225 {
226 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
227 {
228 pOut->pMipInfo[i].inMiptail = TRUE;
229 pOut->pMipInfo[i].offset = 0;
230 pOut->pMipInfo[i].sliceSize = 0;
231 }
232
233 if (pIn->firstMipIdInTail != pIn->numMipLevels)
234 {
235 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
236 }
237 }
238 }
239 else
240 {
241 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
242 const UINT_32 heightInM = pOut->height / metaBlk.h;
243
244 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
245 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
246 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
247
248 if (pOut->pMipInfo != NULL)
249 {
250 pOut->pMipInfo[0].inMiptail = FALSE;
251 pOut->pMipInfo[0].offset = 0;
252 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
253 }
254 }
255
256 // Get the HTILE address equation (copied from HtileAddrFromCoord).
257 // HTILE addressing depends on the number of samples, but this code doesn't support it yet.
258 const UINT_32 index = m_xmaskBaseIndex;
259 const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
260
261 ADDR_C_ASSERT(sizeof(GFX10_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2);
262 pOut->equation.gfx10_bits = (UINT_16 *)GFX10_HTILE_SW_PATTERN[patIdxTable[index]];
263 }
264
265 return ret;
266 }
267
268 /**
269 ************************************************************************************************************************
270 * Gfx10Lib::HwlComputeCmaskInfo
271 *
272 * @brief
273 * Interface function stub of AddrComputeCmaskInfo
274 *
275 * @return
276 * ADDR_E_RETURNCODE
277 ************************************************************************************************************************
278 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const279 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
280 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
281 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
282 ) const
283 {
284 ADDR_E_RETURNCODE ret = ADDR_OK;
285
286 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
287 (pIn->cMaskFlags.pipeAligned != TRUE) ||
288 ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
289 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))))
290 {
291 ret = ADDR_INVALIDPARAMS;
292 }
293 else
294 {
295 Dim3d metaBlk = {};
296 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
297 ADDR_RSRC_TEX_2D,
298 pIn->swizzleMode,
299 0,
300 0,
301 TRUE,
302 &metaBlk);
303
304 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
305 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
306 pOut->baseAlign = metaBlkSize;
307 pOut->metaBlkWidth = metaBlk.w;
308 pOut->metaBlkHeight = metaBlk.h;
309
310 if (pIn->numMipLevels > 1)
311 {
312 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
313
314 UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;
315
316 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
317 {
318 UINT_32 mipWidth, mipHeight;
319
320 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
321
322 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
323 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
324
325 const UINT_32 pitchInM = mipWidth / metaBlk.w;
326 const UINT_32 heightInM = mipHeight / metaBlk.h;
327
328 if (pOut->pMipInfo != NULL)
329 {
330 pOut->pMipInfo[i].inMiptail = FALSE;
331 pOut->pMipInfo[i].offset = metaBlkPerSlice * metaBlkSize;
332 pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
333 }
334
335 metaBlkPerSlice += pitchInM * heightInM;
336 }
337
338 pOut->metaBlkNumPerSlice = metaBlkPerSlice;
339
340 if (pOut->pMipInfo != NULL)
341 {
342 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
343 {
344 pOut->pMipInfo[i].inMiptail = TRUE;
345 pOut->pMipInfo[i].offset = 0;
346 pOut->pMipInfo[i].sliceSize = 0;
347 }
348
349 if (pIn->firstMipIdInTail != pIn->numMipLevels)
350 {
351 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
352 }
353 }
354 }
355 else
356 {
357 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
358 const UINT_32 heightInM = pOut->height / metaBlk.h;
359
360 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
361
362 if (pOut->pMipInfo != NULL)
363 {
364 pOut->pMipInfo[0].inMiptail = FALSE;
365 pOut->pMipInfo[0].offset = 0;
366 pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
367 }
368 }
369
370 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
371 pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;
372
373 // Get the CMASK address equation (copied from CmaskAddrFromCoord)
374 const UINT_32 fmaskBpp = GetFmaskBpp(1, 1);
375 const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3);
376 const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2;
377 const UINT_8* patIdxTable =
378 (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
379 (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
380
381 ADDR_C_ASSERT(sizeof(GFX10_CMASK_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
382 pOut->equation.gfx10_bits = (UINT_16*)GFX10_CMASK_SW_PATTERN[patIdxTable[index]];
383 }
384
385 return ret;
386 }
387
388 /**
389 ************************************************************************************************************************
390 * Gfx10Lib::HwlComputeDccInfo
391 *
392 * @brief
393 * Interface function to compute DCC key info
394 *
395 * @return
396 * ADDR_E_RETURNCODE
397 ************************************************************************************************************************
398 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const399 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
400 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
401 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
402 ) const
403 {
404 ADDR_E_RETURNCODE ret = ADDR_OK;
405
406 if (IsLinear(pIn->swizzleMode) || IsBlock256b(pIn->swizzleMode))
407 {
408 // Hardware support dcc for 256 swizzle mode, but address lib will not support it because we only
409 // select 256 swizzle mode for small surface, and it's not helpful to enable dcc for small surface.
410 ret = ADDR_INVALIDPARAMS;
411 }
412 else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
413 {
414 // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
415 ret = ADDR_INVALIDPARAMS;
416 }
417 else
418 {
419 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
420
421 {
422 // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
423 ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));
424
425 const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
426
427 pOut->compressBlkWidth = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
428 pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
429 pOut->compressBlkDepth = isThick ? Block256_3d[elemLog2].d : 1;
430 }
431
432 if (ret == ADDR_OK)
433 {
434 Dim3d metaBlk = {};
435 const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
436 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
437 pIn->resourceType,
438 pIn->swizzleMode,
439 elemLog2,
440 numFragLog2,
441 pIn->dccKeyFlags.pipeAligned,
442 &metaBlk);
443
444 pOut->dccRamBaseAlign = metaBlkSize;
445 pOut->metaBlkWidth = metaBlk.w;
446 pOut->metaBlkHeight = metaBlk.h;
447 pOut->metaBlkDepth = metaBlk.d;
448 pOut->metaBlkSize = metaBlkSize;
449
450 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
451 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
452 pOut->depth = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);
453
454 if (pIn->numMipLevels > 1)
455 {
456 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
457
458 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
459
460 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
461 {
462 UINT_32 mipWidth, mipHeight;
463
464 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
465
466 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
467 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
468
469 const UINT_32 pitchInM = mipWidth / metaBlk.w;
470 const UINT_32 heightInM = mipHeight / metaBlk.h;
471 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
472
473 if (pOut->pMipInfo != NULL)
474 {
475 pOut->pMipInfo[i].inMiptail = FALSE;
476 pOut->pMipInfo[i].offset = offset;
477 pOut->pMipInfo[i].sliceSize = mipSliceSize;
478 }
479
480 offset += mipSliceSize;
481 }
482
483 pOut->dccRamSliceSize = offset;
484 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
485 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
486
487 if (pOut->pMipInfo != NULL)
488 {
489 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
490 {
491 pOut->pMipInfo[i].inMiptail = TRUE;
492 pOut->pMipInfo[i].offset = 0;
493 pOut->pMipInfo[i].sliceSize = 0;
494 }
495
496 if (pIn->firstMipIdInTail != pIn->numMipLevels)
497 {
498 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
499 }
500 }
501 }
502 else
503 {
504 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
505 const UINT_32 heightInM = pOut->height / metaBlk.h;
506
507 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
508 pOut->dccRamSliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
509 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
510
511 if (pOut->pMipInfo != NULL)
512 {
513 pOut->pMipInfo[0].inMiptail = FALSE;
514 pOut->pMipInfo[0].offset = 0;
515 pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
516 }
517 }
518
519 // Get the DCC address equation (copied from DccAddrFromCoord)
520 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
521 const UINT_32 numPipeLog2 = m_pipesLog2;
522 UINT_32 index = m_dccBaseIndex + elemLog2;
523 const UINT_8* patIdxTable;
524
525 if (m_settings.supportRbPlus)
526 {
527 patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
528
529 if (pIn->dccKeyFlags.pipeAligned)
530 {
531 index += MaxNumOfBpp;
532
533 if (m_numPkrLog2 < 2)
534 {
535 index += m_pipesLog2 * MaxNumOfBpp;
536 }
537 else
538 {
539 // 4 groups for "m_numPkrLog2 < 2" case
540 index += 4 * MaxNumOfBpp;
541
542 const UINT_32 dccPipePerPkr = 3;
543
544 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
545 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
546 }
547 }
548 }
549 else
550 {
551 patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
552
553 if (pIn->dccKeyFlags.pipeAligned)
554 {
555 index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
556 }
557 else
558 {
559 index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
560 }
561 }
562
563 ADDR_C_ASSERT(sizeof(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
564 pOut->equation.gfx10_bits = (UINT_16*)GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]];
565 }
566 }
567
568 return ret;
569 }
570
571 /**
572 ************************************************************************************************************************
573 * Gfx10Lib::HwlComputeCmaskAddrFromCoord
574 *
575 * @brief
576 * Interface function stub of AddrComputeCmaskAddrFromCoord
577 *
578 * @return
579 * ADDR_E_RETURNCODE
580 ************************************************************************************************************************
581 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)582 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
583 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
584 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
585 {
586 // Only support pipe aligned CMask
587 ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE);
588
589 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {};
590 input.size = sizeof(input);
591 input.cMaskFlags = pIn->cMaskFlags;
592 input.colorFlags = pIn->colorFlags;
593 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
594 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
595 input.numSlices = Max(pIn->numSlices, 1u);
596 input.swizzleMode = pIn->swizzleMode;
597 input.resourceType = pIn->resourceType;
598
599 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {};
600 output.size = sizeof(output);
601
602 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
603
604 if (returnCode == ADDR_OK)
605 {
606 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
607 const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3);
608 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
609 const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2;
610 const UINT_8* patIdxTable =
611 (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
612 (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
613
614
615 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
616 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
617 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_CMASK_SW_PATTERN[patIdxTable[index]],
618 blkSizeLog2 + 1, // +1 for nibble offset
619 pIn->x,
620 pIn->y,
621 pIn->slice,
622 0);
623 const UINT_32 xb = pIn->x / output.metaBlkWidth;
624 const UINT_32 yb = pIn->y / output.metaBlkHeight;
625 const UINT_32 pb = output.pitch / output.metaBlkWidth;
626 const UINT_32 blkIndex = (yb * pb) + xb;
627 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
628
629 pOut->addr = (output.sliceSize * pIn->slice) +
630 (blkIndex * (1 << blkSizeLog2)) +
631 ((blkOffset >> 1) ^ pipeXor);
632 pOut->bitPosition = (blkOffset & 1) << 2;
633 }
634
635 return returnCode;
636 }
637
638 /**
639 ************************************************************************************************************************
640 * Gfx10Lib::HwlComputeHtileAddrFromCoord
641 *
642 * @brief
643 * Interface function stub of AddrComputeHtileAddrFromCoord
644 *
645 * @return
646 * ADDR_E_RETURNCODE
647 ************************************************************************************************************************
648 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)649 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
650 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
651 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
652 {
653 ADDR_E_RETURNCODE returnCode = ADDR_OK;
654
655 if (pIn->numMipLevels > 1)
656 {
657 returnCode = ADDR_NOTIMPLEMENTED;
658 }
659 else
660 {
661 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {};
662 input.size = sizeof(input);
663 input.hTileFlags = pIn->hTileFlags;
664 input.depthFlags = pIn->depthflags;
665 input.swizzleMode = pIn->swizzleMode;
666 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
667 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
668 input.numSlices = Max(pIn->numSlices, 1u);
669 input.numMipLevels = 1;
670
671 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {};
672 output.size = sizeof(output);
673
674 returnCode = ComputeHtileInfo(&input, &output);
675
676 if (returnCode == ADDR_OK)
677 {
678 const UINT_32 numSampleLog2 = Log2(pIn->numSamples);
679 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
680 const UINT_32 index = m_xmaskBaseIndex + numSampleLog2;
681 const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
682
683
684 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
685 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
686 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_HTILE_SW_PATTERN[patIdxTable[index]],
687 blkSizeLog2 + 1, // +1 for nibble offset
688 pIn->x,
689 pIn->y,
690 pIn->slice,
691 0);
692 const UINT_32 xb = pIn->x / output.metaBlkWidth;
693 const UINT_32 yb = pIn->y / output.metaBlkHeight;
694 const UINT_32 pb = output.pitch / output.metaBlkWidth;
695 const UINT_32 blkIndex = (yb * pb) + xb;
696 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
697
698 pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
699 (blkIndex * (1 << blkSizeLog2)) +
700 ((blkOffset >> 1) ^ pipeXor);
701 }
702 }
703
704 return returnCode;
705 }
706
707 /**
708 ************************************************************************************************************************
709 * Gfx10Lib::HwlComputeHtileCoordFromAddr
710 *
711 * @brief
712 * Interface function stub of AddrComputeHtileCoordFromAddr
713 *
714 * @return
715 * ADDR_E_RETURNCODE
716 ************************************************************************************************************************
717 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)718 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
719 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
720 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
721 {
722 ADDR_NOT_IMPLEMENTED();
723
724 return ADDR_OK;
725 }
726
727 /**
728 ************************************************************************************************************************
729 * Gfx10Lib::HwlSupportComputeDccAddrFromCoord
730 *
731 * @brief
732 * Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
733 *
734 * @return
735 * ADDR_E_RETURNCODE
736 ************************************************************************************************************************
737 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)738 ADDR_E_RETURNCODE Gfx10Lib::HwlSupportComputeDccAddrFromCoord(
739 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
740 {
741 ADDR_E_RETURNCODE returnCode = ADDR_OK;
742
743 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
744 (pIn->swizzleMode != ADDR_SW_64KB_R_X) ||
745 (pIn->dccKeyFlags.linear == TRUE) ||
746 (pIn->numFrags > 1) ||
747 (pIn->numMipLevels > 1) ||
748 (pIn->mipId > 0))
749 {
750 returnCode = ADDR_NOTSUPPORTED;
751 }
752 else if ((pIn->pitch == 0) ||
753 (pIn->metaBlkWidth == 0) ||
754 (pIn->metaBlkHeight == 0) ||
755 (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
756 {
757 returnCode = ADDR_NOTSUPPORTED;
758 }
759
760 return returnCode;
761 }
762
763 /**
764 ************************************************************************************************************************
765 * Gfx10Lib::HwlComputeDccAddrFromCoord
766 *
767 * @brief
768 * Interface function stub of AddrComputeDccAddrFromCoord
769 *
770 * @return
771 * N/A
772 ************************************************************************************************************************
773 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)774 VOID Gfx10Lib::HwlComputeDccAddrFromCoord(
775 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
776 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
777 {
778 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
779 const UINT_32 numPipeLog2 = m_pipesLog2;
780 const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
781 UINT_32 index = m_dccBaseIndex + elemLog2;
782 const UINT_8* patIdxTable;
783
784 if (m_settings.supportRbPlus)
785 {
786 patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
787
788 if (pIn->dccKeyFlags.pipeAligned)
789 {
790 index += MaxNumOfBpp;
791
792 if (m_numPkrLog2 < 2)
793 {
794 index += m_pipesLog2 * MaxNumOfBpp;
795 }
796 else
797 {
798 // 4 groups for "m_numPkrLog2 < 2" case
799 index += 4 * MaxNumOfBpp;
800
801 const UINT_32 dccPipePerPkr = 3;
802
803 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
804 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
805 }
806 }
807 }
808 else
809 {
810 patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
811
812 if (pIn->dccKeyFlags.pipeAligned)
813 {
814 index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
815 }
816 else
817 {
818 index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
819 }
820 }
821
822 const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
823 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
824 const UINT_32 blkOffset =
825 ComputeOffsetFromSwizzlePattern(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
826 blkSizeLog2 + 1, // +1 for nibble offset
827 pIn->x,
828 pIn->y,
829 pIn->slice,
830 0);
831 const UINT_32 xb = pIn->x / pIn->metaBlkWidth;
832 const UINT_32 yb = pIn->y / pIn->metaBlkHeight;
833 const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth;
834 const UINT_32 blkIndex = (yb * pb) + xb;
835 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
836
837 pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
838 (blkIndex * (1 << blkSizeLog2)) +
839 ((blkOffset >> 1) ^ pipeXor);
840 }
841
842 /**
843 ************************************************************************************************************************
844 * Gfx10Lib::HwlInitGlobalParams
845 *
846 * @brief
847 * Initializes global parameters
848 *
849 * @return
850 * TRUE if all settings are valid
851 *
852 ************************************************************************************************************************
853 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)854 BOOL_32 Gfx10Lib::HwlInitGlobalParams(
855 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
856 {
857 BOOL_32 valid = TRUE;
858 GB_ADDR_CONFIG_GFX10 gbAddrConfig;
859
860 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
861
862 // These values are copied from CModel code
863 switch (gbAddrConfig.bits.NUM_PIPES)
864 {
865 case ADDR_CONFIG_1_PIPE:
866 m_pipes = 1;
867 m_pipesLog2 = 0;
868 break;
869 case ADDR_CONFIG_2_PIPE:
870 m_pipes = 2;
871 m_pipesLog2 = 1;
872 break;
873 case ADDR_CONFIG_4_PIPE:
874 m_pipes = 4;
875 m_pipesLog2 = 2;
876 break;
877 case ADDR_CONFIG_8_PIPE:
878 m_pipes = 8;
879 m_pipesLog2 = 3;
880 break;
881 case ADDR_CONFIG_16_PIPE:
882 m_pipes = 16;
883 m_pipesLog2 = 4;
884 break;
885 case ADDR_CONFIG_32_PIPE:
886 m_pipes = 32;
887 m_pipesLog2 = 5;
888 break;
889 case ADDR_CONFIG_64_PIPE:
890 m_pipes = 64;
891 m_pipesLog2 = 6;
892 break;
893 default:
894 ADDR_ASSERT_ALWAYS();
895 valid = FALSE;
896 break;
897 }
898
899 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
900 {
901 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
902 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
903 m_pipeInterleaveLog2 = 8;
904 break;
905 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
906 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
907 m_pipeInterleaveLog2 = 9;
908 break;
909 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
910 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
911 m_pipeInterleaveLog2 = 10;
912 break;
913 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
914 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
915 m_pipeInterleaveLog2 = 11;
916 break;
917 default:
918 ADDR_ASSERT_ALWAYS();
919 valid = FALSE;
920 break;
921 }
922
923 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
924 // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
925 // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
926 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
927
928 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
929 {
930 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
931 m_maxCompFrag = 1;
932 m_maxCompFragLog2 = 0;
933 break;
934 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
935 m_maxCompFrag = 2;
936 m_maxCompFragLog2 = 1;
937 break;
938 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
939 m_maxCompFrag = 4;
940 m_maxCompFragLog2 = 2;
941 break;
942 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
943 m_maxCompFrag = 8;
944 m_maxCompFragLog2 = 3;
945 break;
946 default:
947 ADDR_ASSERT_ALWAYS();
948 valid = FALSE;
949 break;
950 }
951
952 {
953 // Skip unaligned case
954 m_xmaskBaseIndex += MaxNumOfAA;
955
956 m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfAA;
957 m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
958
959 if (m_settings.supportRbPlus)
960 {
961 m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
962 m_numSaLog2 = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
963
964 ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
965
966 ADDR_C_ASSERT(sizeof(GFX10_HTILE_RBPLUS_PATIDX) / sizeof(GFX10_HTILE_RBPLUS_PATIDX[0]) ==
967 sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX) / sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX[0]));
968
969 if (m_numPkrLog2 >= 2)
970 {
971 m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
972 m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
973 }
974 }
975 else
976 {
977 const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
978 static_cast<UINT_32>(ADDR_CONFIG_1_PIPE) +
979 1;
980
981 ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA);
982
983 ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) ==
984 sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0]));
985 }
986 }
987
988 if (m_settings.supportRbPlus)
989 {
990 // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
991 // corresponding SW_64KB_* mode
992 m_blockVarSizeLog2 = m_pipesLog2 + 14;
993 }
994
995
996 if (valid)
997 {
998 InitEquationTable();
999 }
1000
1001 return valid;
1002 }
1003
1004 /**
1005 ************************************************************************************************************************
1006 * Gfx10Lib::HwlConvertChipFamily
1007 *
1008 * @brief
1009 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1010 * @return
1011 * ChipFamily
1012 ************************************************************************************************************************
1013 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)1014 ChipFamily Gfx10Lib::HwlConvertChipFamily(
1015 UINT_32 chipFamily, ///< [in] chip family defined in atiih.h
1016 UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1017 {
1018 ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
1019
1020 m_settings.dccUnsup3DSwDis = 1;
1021 m_settings.dsMipmapHtileFix = 1;
1022
1023 switch (chipFamily)
1024 {
1025 case FAMILY_NV:
1026 if (ASICREV_IS_NAVI10_P(chipRevision))
1027 {
1028 m_settings.dsMipmapHtileFix = 0;
1029 m_settings.isDcn20 = 1;
1030 }
1031
1032 if (ASICREV_IS_NAVI12_P(chipRevision))
1033 {
1034 m_settings.isDcn20 = 1;
1035 }
1036
1037 if (ASICREV_IS_NAVI14_M(chipRevision))
1038 {
1039 m_settings.isDcn20 = 1;
1040 }
1041
1042 if (ASICREV_IS_SIENNA_CICHLID(chipRevision))
1043 {
1044 m_settings.supportRbPlus = 1;
1045 m_settings.dccUnsup3DSwDis = 0;
1046 }
1047
1048 if (ASICREV_IS_NAVY_FLOUNDER(chipRevision))
1049 {
1050 m_settings.supportRbPlus = 1;
1051 m_settings.dccUnsup3DSwDis = 0;
1052 }
1053
1054 if (ASICREV_IS_DIMGREY_CAVEFISH(chipRevision))
1055 {
1056 m_settings.supportRbPlus = 1;
1057 m_settings.dccUnsup3DSwDis = 0;
1058 }
1059
1060 if (ASICREV_IS_BEIGE_GOBY(chipRevision))
1061 {
1062 m_settings.supportRbPlus = 1;
1063 m_settings.dccUnsup3DSwDis = 0;
1064 }
1065 break;
1066
1067 case FAMILY_VGH:
1068 if (ASICREV_IS_VANGOGH(chipRevision))
1069 {
1070 m_settings.supportRbPlus = 1;
1071 m_settings.dccUnsup3DSwDis = 0;
1072 }
1073 else
1074 {
1075 ADDR_ASSERT(!"Unknown chip revision");
1076 }
1077
1078 break;
1079
1080 case FAMILY_YC:
1081 if (ASICREV_IS_YELLOW_CARP(chipRevision))
1082 {
1083 m_settings.supportRbPlus = 1;
1084 m_settings.dccUnsup3DSwDis = 0;
1085 }
1086 else
1087 {
1088 ADDR_ASSERT(!"Unknown chip revision");
1089 }
1090
1091 break;
1092
1093 default:
1094 ADDR_ASSERT(!"Unknown chip family");
1095 break;
1096 }
1097
1098 m_configFlags.use32bppFor422Fmt = TRUE;
1099
1100 return family;
1101 }
1102
1103 /**
1104 ************************************************************************************************************************
1105 * Gfx10Lib::GetBlk256SizeLog2
1106 *
1107 * @brief
1108 * Get block 256 size
1109 *
1110 * @return
1111 * N/A
1112 ************************************************************************************************************************
1113 */
GetBlk256SizeLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1114 void Gfx10Lib::GetBlk256SizeLog2(
1115 AddrResourceType resourceType, ///< [in] Resource type
1116 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1117 UINT_32 elemLog2, ///< [in] element size log2
1118 UINT_32 numSamplesLog2, ///< [in] number of samples
1119 Dim3d* pBlock ///< [out] block size
1120 ) const
1121 {
1122 if (IsThin(resourceType, swizzleMode))
1123 {
1124 UINT_32 blockBits = 8 - elemLog2;
1125
1126 if (IsZOrderSwizzle(swizzleMode))
1127 {
1128 blockBits -= numSamplesLog2;
1129 }
1130
1131 pBlock->w = (blockBits >> 1) + (blockBits & 1);
1132 pBlock->h = (blockBits >> 1);
1133 pBlock->d = 0;
1134 }
1135 else
1136 {
1137 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1138
1139 UINT_32 blockBits = 8 - elemLog2;
1140
1141 pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
1142 pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
1143 pBlock->h = (blockBits / 3);
1144 }
1145 }
1146
1147 /**
1148 ************************************************************************************************************************
1149 * Gfx10Lib::GetCompressedBlockSizeLog2
1150 *
1151 * @brief
1152 * Get compress block size
1153 *
1154 * @return
1155 * N/A
1156 ************************************************************************************************************************
1157 */
GetCompressedBlockSizeLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1158 void Gfx10Lib::GetCompressedBlockSizeLog2(
1159 Gfx10DataType dataType, ///< [in] Data type
1160 AddrResourceType resourceType, ///< [in] Resource type
1161 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1162 UINT_32 elemLog2, ///< [in] element size log2
1163 UINT_32 numSamplesLog2, ///< [in] number of samples
1164 Dim3d* pBlock ///< [out] block size
1165 ) const
1166 {
1167 if (dataType == Gfx10DataColor)
1168 {
1169 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
1170 }
1171 else
1172 {
1173 ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
1174 pBlock->w = 3;
1175 pBlock->h = 3;
1176 pBlock->d = 0;
1177 }
1178 }
1179
1180 /**
1181 ************************************************************************************************************************
1182 * Gfx10Lib::GetMetaOverlapLog2
1183 *
1184 * @brief
1185 * Get meta block overlap
1186 *
1187 * @return
1188 * N/A
1189 ************************************************************************************************************************
1190 */
GetMetaOverlapLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2) const1191 INT_32 Gfx10Lib::GetMetaOverlapLog2(
1192 Gfx10DataType dataType, ///< [in] Data type
1193 AddrResourceType resourceType, ///< [in] Resource type
1194 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1195 UINT_32 elemLog2, ///< [in] element size log2
1196 UINT_32 numSamplesLog2 ///< [in] number of samples
1197 ) const
1198 {
1199 Dim3d compBlock;
1200 Dim3d microBlock;
1201
1202 GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
1203 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, µBlock);
1204
1205 const INT_32 compSizeLog2 = compBlock.w + compBlock.h + compBlock.d;
1206 const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
1207 const INT_32 maxSizeLog2 = Max(compSizeLog2, blk256SizeLog2);
1208 const INT_32 numPipesLog2 = GetEffectiveNumPipes();
1209 INT_32 overlap = numPipesLog2 - maxSizeLog2;
1210
1211 if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
1212 {
1213 overlap++;
1214 }
1215
1216 // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1217 if ((elemLog2 == 4) && (numSamplesLog2 == 3))
1218 {
1219 overlap--;
1220 }
1221 overlap = Max(overlap, 0);
1222 return overlap;
1223 }
1224
1225 /**
1226 ************************************************************************************************************************
1227 * Gfx10Lib::Get3DMetaOverlapLog2
1228 *
1229 * @brief
1230 * Get 3d meta block overlap
1231 *
1232 * @return
1233 * N/A
1234 ************************************************************************************************************************
1235 */
Get3DMetaOverlapLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2) const1236 INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
1237 AddrResourceType resourceType, ///< [in] Resource type
1238 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1239 UINT_32 elemLog2 ///< [in] element size log2
1240 ) const
1241 {
1242 Dim3d microBlock;
1243 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, µBlock);
1244
1245 INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
1246
1247 if (m_settings.supportRbPlus)
1248 {
1249 overlap++;
1250 }
1251
1252 if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
1253 {
1254 overlap = 0;
1255 }
1256 return overlap;
1257 }
1258
1259 /**
1260 ************************************************************************************************************************
1261 * Gfx10Lib::GetPipeRotateAmount
1262 *
1263 * @brief
1264 * Get pipe rotate amount
1265 *
1266 * @return
1267 * Pipe rotate amount
1268 ************************************************************************************************************************
1269 */
1270
GetPipeRotateAmount(AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const1271 INT_32 Gfx10Lib::GetPipeRotateAmount(
1272 AddrResourceType resourceType, ///< [in] Resource type
1273 AddrSwizzleMode swizzleMode ///< [in] Swizzle mode
1274 ) const
1275 {
1276 INT_32 amount = 0;
1277
1278 if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
1279 {
1280 amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
1281 1 : m_pipesLog2 - (m_numSaLog2 + 1);
1282 }
1283
1284 return amount;
1285 }
1286
1287 /**
1288 ************************************************************************************************************************
1289 * Gfx10Lib::GetMetaBlkSize
1290 *
1291 * @brief
1292 * Get metadata block size
1293 *
1294 * @return
1295 * Meta block size
1296 ************************************************************************************************************************
1297 */
GetMetaBlkSize(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,BOOL_32 pipeAlign,Dim3d * pBlock) const1298 UINT_32 Gfx10Lib::GetMetaBlkSize(
1299 Gfx10DataType dataType, ///< [in] Data type
1300 AddrResourceType resourceType, ///< [in] Resource type
1301 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1302 UINT_32 elemLog2, ///< [in] element size log2
1303 UINT_32 numSamplesLog2, ///< [in] number of samples
1304 BOOL_32 pipeAlign, ///< [in] pipe align
1305 Dim3d* pBlock ///< [out] block size
1306 ) const
1307 {
1308 INT_32 metablkSizeLog2;
1309
1310 {
1311 const INT_32 metaElemSizeLog2 = GetMetaElementSizeLog2(dataType);
1312 const INT_32 metaCacheSizeLog2 = GetMetaCacheSizeLog2(dataType);
1313 const INT_32 compBlkSizeLog2 = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
1314 const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
1315 numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
1316 const INT_32 dataBlkSizeLog2 = GetBlockSizeLog2(swizzleMode);
1317 INT_32 numPipesLog2 = m_pipesLog2;
1318
1319 if (IsThin(resourceType, swizzleMode))
1320 {
1321 if ((pipeAlign == FALSE) ||
1322 (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
1323 (IsDisplaySwizzle(resourceType, swizzleMode) == TRUE))
1324 {
1325 if (pipeAlign)
1326 {
1327 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1328 metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
1329 }
1330 else
1331 {
1332 metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
1333 }
1334 }
1335 else
1336 {
1337 if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1338 {
1339 numPipesLog2++;
1340 }
1341
1342 INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1343
1344 if (numPipesLog2 >= 4)
1345 {
1346 INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1347
1348 // In 16Bpe 8xaa, we have an extra overlap bit
1349 if ((pipeRotateLog2 > 0) &&
1350 (elemLog2 == 4) &&
1351 (numSamplesLog2 == 3) &&
1352 (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1353 {
1354 overlapLog2++;
1355 }
1356
1357 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1358 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1359
1360 if (m_settings.supportRbPlus &&
1361 IsRtOptSwizzle(swizzleMode) &&
1362 (numPipesLog2 == 6) &&
1363 (numSamplesLog2 == 3) &&
1364 (m_maxCompFragLog2 == 3) &&
1365 (metablkSizeLog2 < 15))
1366 {
1367 metablkSizeLog2 = 15;
1368 }
1369 }
1370 else
1371 {
1372 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1373 }
1374
1375 if (dataType == Gfx10DataDepthStencil)
1376 {
1377 // For htile surfaces, pad meta block size to 2K * num_pipes
1378 metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1379 }
1380
1381 const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);
1382
1383 if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1384 {
1385 const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1386
1387 metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1388 }
1389 }
1390
1391 const INT_32 metablkBitsLog2 =
1392 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1393 pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1394 pBlock->h = 1 << (metablkBitsLog2 >> 1);
1395 pBlock->d = 1;
1396 }
1397 else
1398 {
1399 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1400
1401 if (pipeAlign)
1402 {
1403 if (m_settings.supportRbPlus &&
1404 (m_pipesLog2 == m_numSaLog2 + 1) &&
1405 (m_pipesLog2 > 1) &&
1406 IsRbAligned(resourceType, swizzleMode))
1407 {
1408 numPipesLog2++;
1409 }
1410
1411 const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1412
1413 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1414 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1415 metablkSizeLog2 = Max(metablkSizeLog2, 12);
1416 }
1417 else
1418 {
1419 metablkSizeLog2 = 12;
1420 }
1421
1422 const INT_32 metablkBitsLog2 =
1423 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1424 pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1425 pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1426 pBlock->d = 1 << (metablkBitsLog2 / 3);
1427 }
1428 }
1429
1430 return (1 << static_cast<UINT_32>(metablkSizeLog2));
1431 }
1432
1433 /**
1434 ************************************************************************************************************************
1435 * Gfx10Lib::ConvertSwizzlePatternToEquation
1436 *
1437 * @brief
1438 * Convert swizzle pattern to equation.
1439 *
1440 * @return
1441 * N/A
1442 ************************************************************************************************************************
1443 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,AddrResourceType rsrcType,AddrSwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const1444 VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
1445 UINT_32 elemLog2, ///< [in] element bytes log2
1446 AddrResourceType rsrcType, ///< [in] resource type
1447 AddrSwizzleMode swMode, ///< [in] swizzle mode
1448 const ADDR_SW_PATINFO* pPatInfo, ///< [in] swizzle pattern infor
1449 ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern
1450 const
1451 {
1452 ADDR_BIT_SETTING fullSwizzlePattern[20];
1453 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1454
1455 const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern;
1456 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
1457
1458 pEquation->numBits = blockSizeLog2;
1459 pEquation->stackedDepthSlices = FALSE;
1460
1461 for (UINT_32 i = 0; i < elemLog2; i++)
1462 {
1463 pEquation->addr[i].channel = 0;
1464 pEquation->addr[i].valid = 1;
1465 pEquation->addr[i].index = i;
1466 }
1467
1468 if (IsXor(swMode) == FALSE)
1469 {
1470 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1471 {
1472 ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1473
1474 if (pSwizzle[i].x != 0)
1475 {
1476 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1477
1478 pEquation->addr[i].channel = 0;
1479 pEquation->addr[i].valid = 1;
1480 pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;
1481 }
1482 else if (pSwizzle[i].y != 0)
1483 {
1484 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1485
1486 pEquation->addr[i].channel = 1;
1487 pEquation->addr[i].valid = 1;
1488 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1489 }
1490 else
1491 {
1492 ADDR_ASSERT(pSwizzle[i].z != 0);
1493 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1494
1495 pEquation->addr[i].channel = 2;
1496 pEquation->addr[i].valid = 1;
1497 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1498 }
1499
1500 pEquation->xor1[i].value = 0;
1501 pEquation->xor2[i].value = 0;
1502 }
1503 }
1504 else if (IsThin(rsrcType, swMode))
1505 {
1506 Dim3d dim;
1507 ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1508
1509 const UINT_32 blkXLog2 = Log2(dim.w);
1510 const UINT_32 blkYLog2 = Log2(dim.h);
1511 const UINT_32 blkXMask = dim.w - 1;
1512 const UINT_32 blkYMask = dim.h - 1;
1513
1514 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1515 UINT_32 xMask = 0;
1516 UINT_32 yMask = 0;
1517 UINT_32 bMask = (1 << elemLog2) - 1;
1518
1519 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1520 {
1521 if (IsPow2(pSwizzle[i].value))
1522 {
1523 if (pSwizzle[i].x != 0)
1524 {
1525 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1526 xMask |= pSwizzle[i].x;
1527
1528 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1529
1530 ADDR_ASSERT(xLog2 < blkXLog2);
1531
1532 pEquation->addr[i].channel = 0;
1533 pEquation->addr[i].valid = 1;
1534 pEquation->addr[i].index = xLog2 + elemLog2;
1535 }
1536 else
1537 {
1538 ADDR_ASSERT(pSwizzle[i].y != 0);
1539 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1540 yMask |= pSwizzle[i].y;
1541
1542 pEquation->addr[i].channel = 1;
1543 pEquation->addr[i].valid = 1;
1544 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1545
1546 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1547 }
1548
1549 swizzle[i].value = 0;
1550 bMask |= 1 << i;
1551 }
1552 else
1553 {
1554 if (pSwizzle[i].z != 0)
1555 {
1556 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1557
1558 pEquation->xor2[i].channel = 2;
1559 pEquation->xor2[i].valid = 1;
1560 pEquation->xor2[i].index = Log2(pSwizzle[i].z);
1561 }
1562
1563 swizzle[i].x = pSwizzle[i].x;
1564 swizzle[i].y = pSwizzle[i].y;
1565 swizzle[i].z = swizzle[i].s = 0;
1566
1567 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1568
1569 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1570
1571 if (xHi != 0)
1572 {
1573 ADDR_ASSERT(IsPow2(xHi));
1574 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1575
1576 pEquation->xor1[i].channel = 0;
1577 pEquation->xor1[i].valid = 1;
1578 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1579
1580 swizzle[i].x &= blkXMask;
1581 }
1582
1583 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1584
1585 if (yHi != 0)
1586 {
1587 ADDR_ASSERT(IsPow2(yHi));
1588
1589 if (xHi == 0)
1590 {
1591 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1592 pEquation->xor1[i].channel = 1;
1593 pEquation->xor1[i].valid = 1;
1594 pEquation->xor1[i].index = Log2(yHi);
1595 }
1596 else
1597 {
1598 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1599 pEquation->xor2[i].channel = 1;
1600 pEquation->xor2[i].valid = 1;
1601 pEquation->xor2[i].index = Log2(yHi);
1602 }
1603
1604 swizzle[i].y &= blkYMask;
1605 }
1606
1607 if (swizzle[i].value == 0)
1608 {
1609 bMask |= 1 << i;
1610 }
1611 }
1612 }
1613
1614 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1615 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1616
1617 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1618
1619 while (bMask != blockMask)
1620 {
1621 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1622 {
1623 if ((bMask & (1 << i)) == 0)
1624 {
1625 if (IsPow2(swizzle[i].value))
1626 {
1627 if (swizzle[i].x != 0)
1628 {
1629 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1630 xMask |= swizzle[i].x;
1631
1632 const UINT_32 xLog2 = Log2(swizzle[i].x);
1633
1634 ADDR_ASSERT(xLog2 < blkXLog2);
1635
1636 pEquation->addr[i].channel = 0;
1637 pEquation->addr[i].valid = 1;
1638 pEquation->addr[i].index = xLog2 + elemLog2;
1639 }
1640 else
1641 {
1642 ADDR_ASSERT(swizzle[i].y != 0);
1643 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1644 yMask |= swizzle[i].y;
1645
1646 pEquation->addr[i].channel = 1;
1647 pEquation->addr[i].valid = 1;
1648 pEquation->addr[i].index = Log2(swizzle[i].y);
1649
1650 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1651 }
1652
1653 swizzle[i].value = 0;
1654 bMask |= 1 << i;
1655 }
1656 else
1657 {
1658 const UINT_32 x = swizzle[i].x & xMask;
1659 const UINT_32 y = swizzle[i].y & yMask;
1660
1661 if (x != 0)
1662 {
1663 ADDR_ASSERT(IsPow2(x));
1664
1665 if (pEquation->xor1[i].value == 0)
1666 {
1667 pEquation->xor1[i].channel = 0;
1668 pEquation->xor1[i].valid = 1;
1669 pEquation->xor1[i].index = Log2(x) + elemLog2;
1670 }
1671 else
1672 {
1673 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1674 pEquation->xor2[i].channel = 0;
1675 pEquation->xor2[i].valid = 1;
1676 pEquation->xor2[i].index = Log2(x) + elemLog2;
1677 }
1678 }
1679
1680 if (y != 0)
1681 {
1682 ADDR_ASSERT(IsPow2(y));
1683
1684 if (pEquation->xor1[i].value == 0)
1685 {
1686 pEquation->xor1[i].channel = 1;
1687 pEquation->xor1[i].valid = 1;
1688 pEquation->xor1[i].index = Log2(y);
1689 }
1690 else
1691 {
1692 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1693 pEquation->xor2[i].channel = 1;
1694 pEquation->xor2[i].valid = 1;
1695 pEquation->xor2[i].index = Log2(y);
1696 }
1697 }
1698
1699 swizzle[i].x &= ~x;
1700 swizzle[i].y &= ~y;
1701 }
1702 }
1703 }
1704 }
1705
1706 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1707 }
1708 else
1709 {
1710 const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;
1711 const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;
1712 const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;
1713 const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1714 const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1715 const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1716
1717 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1718 UINT_32 xMask = 0;
1719 UINT_32 yMask = 0;
1720 UINT_32 zMask = 0;
1721 UINT_32 bMask = (1 << elemLog2) - 1;
1722
1723 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1724 {
1725 if (IsPow2(pSwizzle[i].value))
1726 {
1727 if (pSwizzle[i].x != 0)
1728 {
1729 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1730 xMask |= pSwizzle[i].x;
1731
1732 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1733
1734 ADDR_ASSERT(xLog2 < blkXLog2);
1735
1736 pEquation->addr[i].channel = 0;
1737 pEquation->addr[i].valid = 1;
1738 pEquation->addr[i].index = xLog2 + elemLog2;
1739 }
1740 else if (pSwizzle[i].y != 0)
1741 {
1742 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1743 yMask |= pSwizzle[i].y;
1744
1745 pEquation->addr[i].channel = 1;
1746 pEquation->addr[i].valid = 1;
1747 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1748
1749 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1750 }
1751 else
1752 {
1753 ADDR_ASSERT(pSwizzle[i].z != 0);
1754 ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1755 zMask |= pSwizzle[i].z;
1756
1757 pEquation->addr[i].channel = 2;
1758 pEquation->addr[i].valid = 1;
1759 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1760
1761 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1762 }
1763
1764 swizzle[i].value = 0;
1765 bMask |= 1 << i;
1766 }
1767 else
1768 {
1769 swizzle[i].x = pSwizzle[i].x;
1770 swizzle[i].y = pSwizzle[i].y;
1771 swizzle[i].z = pSwizzle[i].z;
1772 swizzle[i].s = 0;
1773
1774 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1775
1776 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1777 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1778 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1779
1780 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1781
1782 if (xHi != 0)
1783 {
1784 ADDR_ASSERT(IsPow2(xHi));
1785 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1786
1787 pEquation->xor1[i].channel = 0;
1788 pEquation->xor1[i].valid = 1;
1789 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1790
1791 swizzle[i].x &= blkXMask;
1792 }
1793
1794 if (yHi != 0)
1795 {
1796 ADDR_ASSERT(IsPow2(yHi));
1797
1798 if (pEquation->xor1[i].value == 0)
1799 {
1800 pEquation->xor1[i].channel = 1;
1801 pEquation->xor1[i].valid = 1;
1802 pEquation->xor1[i].index = Log2(yHi);
1803 }
1804 else
1805 {
1806 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1807 pEquation->xor2[i].channel = 1;
1808 pEquation->xor2[i].valid = 1;
1809 pEquation->xor2[i].index = Log2(yHi);
1810 }
1811
1812 swizzle[i].y &= blkYMask;
1813 }
1814
1815 if (zHi != 0)
1816 {
1817 ADDR_ASSERT(IsPow2(zHi));
1818
1819 if (pEquation->xor1[i].value == 0)
1820 {
1821 pEquation->xor1[i].channel = 2;
1822 pEquation->xor1[i].valid = 1;
1823 pEquation->xor1[i].index = Log2(zHi);
1824 }
1825 else
1826 {
1827 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1828 pEquation->xor2[i].channel = 2;
1829 pEquation->xor2[i].valid = 1;
1830 pEquation->xor2[i].index = Log2(zHi);
1831 }
1832
1833 swizzle[i].z &= blkZMask;
1834 }
1835
1836 if (swizzle[i].value == 0)
1837 {
1838 bMask |= 1 << i;
1839 }
1840 }
1841 }
1842
1843 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1844 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1845
1846 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1847
1848 while (bMask != blockMask)
1849 {
1850 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1851 {
1852 if ((bMask & (1 << i)) == 0)
1853 {
1854 if (IsPow2(swizzle[i].value))
1855 {
1856 if (swizzle[i].x != 0)
1857 {
1858 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1859 xMask |= swizzle[i].x;
1860
1861 const UINT_32 xLog2 = Log2(swizzle[i].x);
1862
1863 ADDR_ASSERT(xLog2 < blkXLog2);
1864
1865 pEquation->addr[i].channel = 0;
1866 pEquation->addr[i].valid = 1;
1867 pEquation->addr[i].index = xLog2 + elemLog2;
1868 }
1869 else if (swizzle[i].y != 0)
1870 {
1871 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1872 yMask |= swizzle[i].y;
1873
1874 pEquation->addr[i].channel = 1;
1875 pEquation->addr[i].valid = 1;
1876 pEquation->addr[i].index = Log2(swizzle[i].y);
1877
1878 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1879 }
1880 else
1881 {
1882 ADDR_ASSERT(swizzle[i].z != 0);
1883 ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1884 zMask |= swizzle[i].z;
1885
1886 pEquation->addr[i].channel = 2;
1887 pEquation->addr[i].valid = 1;
1888 pEquation->addr[i].index = Log2(swizzle[i].z);
1889
1890 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1891 }
1892
1893 swizzle[i].value = 0;
1894 bMask |= 1 << i;
1895 }
1896 else
1897 {
1898 const UINT_32 x = swizzle[i].x & xMask;
1899 const UINT_32 y = swizzle[i].y & yMask;
1900 const UINT_32 z = swizzle[i].z & zMask;
1901
1902 if (x != 0)
1903 {
1904 ADDR_ASSERT(IsPow2(x));
1905
1906 if (pEquation->xor1[i].value == 0)
1907 {
1908 pEquation->xor1[i].channel = 0;
1909 pEquation->xor1[i].valid = 1;
1910 pEquation->xor1[i].index = Log2(x) + elemLog2;
1911 }
1912 else
1913 {
1914 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1915 pEquation->xor2[i].channel = 0;
1916 pEquation->xor2[i].valid = 1;
1917 pEquation->xor2[i].index = Log2(x) + elemLog2;
1918 }
1919 }
1920
1921 if (y != 0)
1922 {
1923 ADDR_ASSERT(IsPow2(y));
1924
1925 if (pEquation->xor1[i].value == 0)
1926 {
1927 pEquation->xor1[i].channel = 1;
1928 pEquation->xor1[i].valid = 1;
1929 pEquation->xor1[i].index = Log2(y);
1930 }
1931 else
1932 {
1933 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1934 pEquation->xor2[i].channel = 1;
1935 pEquation->xor2[i].valid = 1;
1936 pEquation->xor2[i].index = Log2(y);
1937 }
1938 }
1939
1940 if (z != 0)
1941 {
1942 ADDR_ASSERT(IsPow2(z));
1943
1944 if (pEquation->xor1[i].value == 0)
1945 {
1946 pEquation->xor1[i].channel = 2;
1947 pEquation->xor1[i].valid = 1;
1948 pEquation->xor1[i].index = Log2(z);
1949 }
1950 else
1951 {
1952 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1953 pEquation->xor2[i].channel = 2;
1954 pEquation->xor2[i].valid = 1;
1955 pEquation->xor2[i].index = Log2(z);
1956 }
1957 }
1958
1959 swizzle[i].x &= ~x;
1960 swizzle[i].y &= ~y;
1961 swizzle[i].z &= ~z;
1962 }
1963 }
1964 }
1965 }
1966
1967 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1968 }
1969 }
1970
1971 /**
1972 ************************************************************************************************************************
1973 * Gfx10Lib::InitEquationTable
1974 *
1975 * @brief
1976 * Initialize Equation table.
1977 *
1978 * @return
1979 * N/A
1980 ************************************************************************************************************************
1981 */
InitEquationTable()1982 VOID Gfx10Lib::InitEquationTable()
1983 {
1984 memset(m_equationTable, 0, sizeof(m_equationTable));
1985
1986 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1987 {
1988 const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1989
1990 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1991 {
1992 const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1993
1994 for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1995 {
1996 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
1997 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
1998
1999 if (pPatInfo != NULL)
2000 {
2001 ADDR_ASSERT(IsValidSwMode(swMode));
2002
2003 if (pPatInfo->maxItemCount <= 3)
2004 {
2005 ADDR_EQUATION equation = {};
2006
2007 ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
2008
2009 equationIndex = m_numEquations;
2010 ADDR_ASSERT(equationIndex < EquationTableSize);
2011
2012 m_equationTable[equationIndex] = equation;
2013
2014 m_numEquations++;
2015 }
2016 else
2017 {
2018 // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
2019 ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
2020 ADDR_ASSERT(rsrcTypeIdx == 1);
2021 ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
2022 ADDR_ASSERT(m_settings.supportRbPlus == 1);
2023 }
2024 }
2025
2026 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
2027 }
2028 }
2029 }
2030 }
2031
2032 /**
2033 ************************************************************************************************************************
2034 * Gfx10Lib::HwlGetEquationIndex
2035 *
2036 * @brief
2037 * Interface function stub of GetEquationIndex
2038 *
2039 * @return
2040 * ADDR_E_RETURNCODE
2041 ************************************************************************************************************************
2042 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const2043 UINT_32 Gfx10Lib::HwlGetEquationIndex(
2044 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
2045 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
2046 ) const
2047 {
2048 UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
2049
2050 if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
2051 (pIn->resourceType == ADDR_RSRC_TEX_3D))
2052 {
2053 const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
2054 const UINT_32 swModeIdx = static_cast<UINT_32>(pIn->swizzleMode);
2055 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
2056
2057 equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
2058 }
2059
2060 if (pOut->pMipInfo != NULL)
2061 {
2062 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2063 {
2064 pOut->pMipInfo[i].equationIndex = equationIdx;
2065 }
2066 }
2067
2068 return equationIdx;
2069 }
2070
2071 /**
2072 ************************************************************************************************************************
2073 * Gfx10Lib::GetValidDisplaySwizzleModes
2074 *
2075 * @brief
2076 * Get valid swizzle modes mask for displayable surface
2077 *
2078 * @return
2079 * Valid swizzle modes mask for displayable surface
2080 ************************************************************************************************************************
2081 */
GetValidDisplaySwizzleModes(UINT_32 bpp) const2082 UINT_32 Gfx10Lib::GetValidDisplaySwizzleModes(
2083 UINT_32 bpp
2084 ) const
2085 {
2086 UINT_32 swModeMask = 0;
2087
2088 if (bpp <= 64)
2089 {
2090 if (m_settings.isDcn20)
2091 {
2092 swModeMask = (bpp == 64) ? Dcn20Bpp64SwModeMask : Dcn20NonBpp64SwModeMask;
2093 }
2094 else
2095 {
2096 swModeMask = (bpp == 64) ? Dcn21Bpp64SwModeMask : Dcn21NonBpp64SwModeMask;
2097 }
2098 }
2099
2100 return swModeMask;
2101 }
2102
2103 /**
2104 ************************************************************************************************************************
2105 * Gfx10Lib::IsValidDisplaySwizzleMode
2106 *
2107 * @brief
2108 * Check if a swizzle mode is supported by display engine
2109 *
2110 * @return
2111 * TRUE is swizzle mode is supported by display engine
2112 ************************************************************************************************************************
2113 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2114 BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
2115 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2116 ) const
2117 {
2118 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
2119
2120 return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
2121 }
2122
2123 /**
2124 ************************************************************************************************************************
2125 * Gfx10Lib::GetMaxNumMipsInTail
2126 *
2127 * @brief
2128 * Return max number of mips in tails
2129 *
2130 * @return
2131 * Max number of mips in tails
2132 ************************************************************************************************************************
2133 */
GetMaxNumMipsInTail(UINT_32 blockSizeLog2,BOOL_32 isThin) const2134 UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
2135 UINT_32 blockSizeLog2, ///< block size log2
2136 BOOL_32 isThin ///< is thin or thick
2137 ) const
2138 {
2139 UINT_32 effectiveLog2 = blockSizeLog2;
2140
2141 if (isThin == FALSE)
2142 {
2143 effectiveLog2 -= (blockSizeLog2 - 8) / 3;
2144 }
2145
2146 return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
2147 }
2148
2149 /**
2150 ************************************************************************************************************************
2151 * Gfx10Lib::HwlComputePipeBankXor
2152 *
2153 * @brief
2154 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2155 *
2156 * @return
2157 * PipeBankXor value
2158 ************************************************************************************************************************
2159 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const2160 ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
2161 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2162 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2163 ) const
2164 {
2165 if (IsNonPrtXor(pIn->swizzleMode))
2166 {
2167 const UINT_32 bankBits = GetBankXorBits(GetBlockSizeLog2(pIn->swizzleMode));
2168
2169 // No pipe xor...
2170 const UINT_32 pipeXor = 0;
2171 UINT_32 bankXor = 0;
2172
2173 const UINT_32 XorPatternLen = 8;
2174 static const UINT_32 XorBankRot1b[XorPatternLen] = {0, 1, 0, 1, 0, 1, 0, 1};
2175 static const UINT_32 XorBankRot2b[XorPatternLen] = {0, 2, 1, 3, 2, 0, 3, 1};
2176 static const UINT_32 XorBankRot3b[XorPatternLen] = {0, 4, 2, 6, 1, 5, 3, 7};
2177 static const UINT_32 XorBankRot4b[XorPatternLen] = {0, 8, 4, 12, 2, 10, 6, 14};
2178 static const UINT_32* XorBankRotPat[] = {XorBankRot1b, XorBankRot2b, XorBankRot3b, XorBankRot4b};
2179
2180 switch (bankBits)
2181 {
2182 case 1:
2183 case 2:
2184 case 3:
2185 case 4:
2186 bankXor = XorBankRotPat[bankBits - 1][pIn->surfIndex % XorPatternLen] << (m_pipesLog2 + ColumnBits);
2187 break;
2188 default:
2189 // valid bank bits should be 0~4
2190 ADDR_ASSERT_ALWAYS();
2191 case 0:
2192 break;
2193 }
2194
2195 pOut->pipeBankXor = bankXor | pipeXor;
2196 }
2197 else
2198 {
2199 pOut->pipeBankXor = 0;
2200 }
2201
2202 return ADDR_OK;
2203 }
2204
2205 /**
2206 ************************************************************************************************************************
2207 * Gfx10Lib::HwlComputeSlicePipeBankXor
2208 *
2209 * @brief
2210 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2211 *
2212 * @return
2213 * PipeBankXor value
2214 ************************************************************************************************************************
2215 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const2216 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
2217 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2218 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2219 ) const
2220 {
2221 if (IsNonPrtXor(pIn->swizzleMode))
2222 {
2223 const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2224 const UINT_32 pipeBits = GetPipeXorBits(blockBits);
2225 const UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
2226
2227 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;
2228
2229 if (pIn->bpe != 0)
2230 {
2231 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
2232 pIn->resourceType,
2233 Log2(pIn->bpe >> 3),
2234 1);
2235
2236 if (pPatInfo != NULL)
2237 {
2238 ADDR_BIT_SETTING fullSwizzlePattern[20];
2239 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
2240
2241 const UINT_32 pipeBankXorOffset =
2242 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
2243 blockBits,
2244 0,
2245 0,
2246 pIn->slice,
2247 0);
2248
2249 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
2250
2251 // Should have no bit set under pipe interleave
2252 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
2253
2254 // This assertion firing means old approach doesn't calculate a correct sliceXor value...
2255 ADDR_ASSERT(pipeBankXor == pipeXor);
2256
2257 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
2258 }
2259 }
2260 }
2261 else
2262 {
2263 pOut->pipeBankXor = 0;
2264 }
2265
2266 return ADDR_OK;
2267 }
2268
2269 /**
2270 ************************************************************************************************************************
2271 * Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2272 *
2273 * @brief
2274 * Compute sub resource offset to support swizzle pattern
2275 *
2276 * @return
2277 * Offset
2278 ************************************************************************************************************************
2279 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const2280 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2281 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure
2282 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure
2283 ) const
2284 {
2285 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2286
2287 pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
2288
2289 return ADDR_OK;
2290 }
2291
2292 /**
2293 ************************************************************************************************************************
2294 * Gfx10Lib::HwlComputeNonBlockCompressedView
2295 *
2296 * @brief
2297 * Compute non-block-compressed view for a given mipmap level/slice.
2298 *
2299 * @return
2300 * ADDR_E_RETURNCODE
2301 ************************************************************************************************************************
2302 */
HwlComputeNonBlockCompressedView(const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT * pIn,ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT * pOut) const2303 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView(
2304 const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, ///< [in] input structure
2305 ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut ///< [out] output structure
2306 ) const
2307 {
2308 ADDR_E_RETURNCODE returnCode = ADDR_OK;
2309
2310 if (pIn->resourceType != ADDR_RSRC_TEX_2D)
2311 {
2312 // Only 2D resource can have a NonBC view...
2313 returnCode = ADDR_INVALIDPARAMS;
2314 }
2315 else if ((pIn->format != ADDR_FMT_ASTC_8x8) &&
2316 ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
2317 {
2318 // Only support BC1~BC7 or ASTC_8x8 for now...
2319 returnCode = ADDR_NOTSUPPORTED;
2320 }
2321 else
2322 {
2323 UINT_32 bcWidth, bcHeight;
2324 UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
2325
2326 ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
2327 infoIn.flags = pIn->flags;
2328 infoIn.swizzleMode = pIn->swizzleMode;
2329 infoIn.resourceType = pIn->resourceType;
2330 infoIn.bpp = bpp;
2331 infoIn.width = PowTwoAlign(pIn->width, bcWidth) / bcWidth;
2332 infoIn.height = PowTwoAlign(pIn->height, bcHeight) / bcHeight;
2333 infoIn.numSlices = pIn->numSlices;
2334 infoIn.numMipLevels = pIn->numMipLevels;
2335 infoIn.numSamples = 1;
2336 infoIn.numFrags = 1;
2337
2338 ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {};
2339
2340 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
2341 infoOut.pMipInfo = mipInfo;
2342
2343 const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE;
2344
2345 if (tiled)
2346 {
2347 returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut);
2348 }
2349 else
2350 {
2351 returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut);
2352 }
2353
2354 if (returnCode == ADDR_OK)
2355 {
2356 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
2357 subOffIn.swizzleMode = infoIn.swizzleMode;
2358 subOffIn.resourceType = infoIn.resourceType;
2359 subOffIn.slice = pIn->slice;
2360 subOffIn.sliceSize = infoOut.sliceSize;
2361 subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
2362 subOffIn.mipTailOffset = mipInfo[pIn->mipId].mipTailOffset;
2363
2364 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
2365
2366 // For any mipmap level, move nonBc view base address by offset
2367 HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
2368 pOut->offset = subOffOut.offset;
2369
2370 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
2371 slicePbXorIn.bpe = infoIn.bpp;
2372 slicePbXorIn.swizzleMode = infoIn.swizzleMode;
2373 slicePbXorIn.resourceType = infoIn.resourceType;
2374 slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
2375 slicePbXorIn.slice = pIn->slice;
2376
2377 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
2378
2379 // For any mipmap level, nonBc view should use computed pbXor
2380 HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
2381 pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
2382
2383 const BOOL_32 inTail = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE;
2384 const UINT_32 requestMipWidth = PowTwoAlign(Max(pIn->width >> pIn->mipId, 1u), bcWidth) / bcWidth;
2385 const UINT_32 requestMipHeight = PowTwoAlign(Max(pIn->height >> pIn->mipId, 1u), bcHeight) / bcHeight;
2386
2387 if (inTail)
2388 {
2389 // For mipmap level that is in mip tail block, hack a lot of things...
2390 // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
2391 // are fit in tail block:
2392
2393 // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
2394 pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
2395
2396 // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
2397 pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
2398
2399 // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
2400 pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2);
2401
2402 // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
2403 pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight);
2404 }
2405 // This check should cover at least mipId == 0
2406 else if (requestMipWidth << pIn->mipId == infoIn.width)
2407 {
2408 // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
2409 // - only one mipmap level and mipId = 0
2410 pOut->mipId = 0;
2411 pOut->numMipLevels = 1;
2412
2413 // (mip0) width = requestMipWidth
2414 pOut->unalignedWidth = requestMipWidth;
2415
2416 // (mip0) height = requestMipHeight
2417 pOut->unalignedHeight = requestMipHeight;
2418 }
2419 else
2420 {
2421 // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
2422 // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
2423 // because single mip view may have different pitch value than original (multiple) mip view...
2424 // A simple case would be:
2425 // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
2426 // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
2427 // mip0 width = 0x101/mip1 width = 0x80
2428 // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
2429 // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
2430
2431 // - 2 levels and mipId = 1
2432 pOut->mipId = 1;
2433 pOut->numMipLevels = 2;
2434
2435 const UINT_32 upperMipWidth =
2436 PowTwoAlign(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth) / bcWidth;
2437 const UINT_32 upperMipHeight =
2438 PowTwoAlign(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight) / bcHeight;
2439
2440 const BOOL_32 needToAvoidInTail =
2441 tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ?
2442 TRUE : FALSE;
2443
2444 const UINT_32 hwMipWidth = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth);
2445 const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight);
2446
2447 const BOOL_32 needExtraWidth =
2448 ((upperMipWidth < requestMipWidth * 2) ||
2449 ((upperMipWidth == requestMipWidth * 2) &&
2450 ((needToAvoidInTail == TRUE) ||
2451 (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE;
2452
2453 const BOOL_32 needExtraHeight =
2454 ((upperMipHeight < requestMipHeight * 2) ||
2455 ((upperMipHeight == requestMipHeight * 2) &&
2456 ((needToAvoidInTail == TRUE) ||
2457 (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE;
2458
2459 // (mip0) width = requestLastMipLevelWidth
2460 pOut->unalignedWidth = upperMipWidth + (needExtraWidth ? 1: 0);
2461
2462 // (mip0) height = requestLastMipLevelHeight
2463 pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0);
2464 }
2465
2466 // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
2467 ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth);
2468 // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
2469 ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight);
2470 }
2471 }
2472
2473 return returnCode;
2474 }
2475
2476 /**
2477 ************************************************************************************************************************
2478 * Gfx10Lib::ValidateNonSwModeParams
2479 *
2480 * @brief
2481 * Validate compute surface info params except swizzle mode
2482 *
2483 * @return
2484 * TRUE if parameters are valid, FALSE otherwise
2485 ************************************************************************************************************************
2486 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2487 BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
2488 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2489 {
2490 BOOL_32 valid = TRUE;
2491
2492 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2493 {
2494 ADDR_ASSERT_ALWAYS();
2495 valid = FALSE;
2496 }
2497
2498 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
2499 {
2500 ADDR_ASSERT_ALWAYS();
2501 valid = FALSE;
2502 }
2503
2504 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2505 const AddrResourceType rsrcType = pIn->resourceType;
2506 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
2507 const BOOL_32 msaa = (pIn->numFrags > 1);
2508 const BOOL_32 display = flags.display;
2509 const BOOL_32 tex3d = IsTex3d(rsrcType);
2510 const BOOL_32 tex2d = IsTex2d(rsrcType);
2511 const BOOL_32 tex1d = IsTex1d(rsrcType);
2512 const BOOL_32 stereo = flags.qbStereo;
2513
2514
2515 // Resource type check
2516 if (tex1d)
2517 {
2518 if (msaa || display || stereo)
2519 {
2520 ADDR_ASSERT_ALWAYS();
2521 valid = FALSE;
2522 }
2523 }
2524 else if (tex2d)
2525 {
2526 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2527 {
2528 ADDR_ASSERT_ALWAYS();
2529 valid = FALSE;
2530 }
2531 }
2532 else if (tex3d)
2533 {
2534 if (msaa || display || stereo)
2535 {
2536 ADDR_ASSERT_ALWAYS();
2537 valid = FALSE;
2538 }
2539 }
2540 else
2541 {
2542 ADDR_ASSERT_ALWAYS();
2543 valid = FALSE;
2544 }
2545
2546 return valid;
2547 }
2548
2549 /**
2550 ************************************************************************************************************************
2551 * Gfx10Lib::ValidateSwModeParams
2552 *
2553 * @brief
2554 * Validate compute surface info related to swizzle mode
2555 *
2556 * @return
2557 * TRUE if parameters are valid, FALSE otherwise
2558 ************************************************************************************************************************
2559 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2560 BOOL_32 Gfx10Lib::ValidateSwModeParams(
2561 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2562 {
2563 BOOL_32 valid = TRUE;
2564
2565 if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2566 {
2567 ADDR_ASSERT_ALWAYS();
2568 valid = FALSE;
2569 }
2570 else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
2571 {
2572 {
2573 ADDR_ASSERT_ALWAYS();
2574 valid = FALSE;
2575 }
2576 }
2577
2578 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2579 const AddrResourceType rsrcType = pIn->resourceType;
2580 const AddrSwizzleMode swizzle = pIn->swizzleMode;
2581 const BOOL_32 msaa = (pIn->numFrags > 1);
2582 const BOOL_32 zbuffer = flags.depth || flags.stencil;
2583 const BOOL_32 color = flags.color;
2584 const BOOL_32 display = flags.display;
2585 const BOOL_32 tex3d = IsTex3d(rsrcType);
2586 const BOOL_32 tex2d = IsTex2d(rsrcType);
2587 const BOOL_32 tex1d = IsTex1d(rsrcType);
2588 const BOOL_32 thin3d = flags.view3dAs2dArray;
2589 const BOOL_32 linear = IsLinear(swizzle);
2590 const BOOL_32 blk256B = IsBlock256b(swizzle);
2591 const BOOL_32 blkVar = IsBlockVariable(swizzle);
2592 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
2593 const BOOL_32 prt = flags.prt;
2594 const BOOL_32 fmask = flags.fmask;
2595
2596 // Misc check
2597 if ((pIn->numFrags > 1) &&
2598 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2599 {
2600 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2601 ADDR_ASSERT_ALWAYS();
2602 valid = FALSE;
2603 }
2604
2605 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2606 {
2607 ADDR_ASSERT_ALWAYS();
2608 valid = FALSE;
2609 }
2610
2611 if ((pIn->bpp == 96) && (linear == FALSE))
2612 {
2613 ADDR_ASSERT_ALWAYS();
2614 valid = FALSE;
2615 }
2616
2617 const UINT_32 swizzleMask = 1 << swizzle;
2618
2619 // Resource type check
2620 if (tex1d)
2621 {
2622 if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
2623 {
2624 ADDR_ASSERT_ALWAYS();
2625 valid = FALSE;
2626 }
2627 }
2628 else if (tex2d)
2629 {
2630 if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0)
2631 {
2632 {
2633 ADDR_ASSERT_ALWAYS();
2634 valid = FALSE;
2635 }
2636 }
2637 else if ((prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) ||
2638 (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0)))
2639 {
2640 ADDR_ASSERT_ALWAYS();
2641 valid = FALSE;
2642 }
2643
2644 }
2645 else if (tex3d)
2646 {
2647 if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
2648 (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
2649 (thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0)))
2650 {
2651 ADDR_ASSERT_ALWAYS();
2652 valid = FALSE;
2653 }
2654 }
2655
2656 // Swizzle type check
2657 if (linear)
2658 {
2659 if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2660 {
2661 ADDR_ASSERT_ALWAYS();
2662 valid = FALSE;
2663 }
2664 }
2665 else if (IsZOrderSwizzle(swizzle))
2666 {
2667 if ((pIn->bpp > 64) ||
2668 (msaa && (color || (pIn->bpp > 32))) ||
2669 ElemLib::IsBlockCompressed(pIn->format) ||
2670 ElemLib::IsMacroPixelPacked(pIn->format))
2671 {
2672 ADDR_ASSERT_ALWAYS();
2673 valid = FALSE;
2674 }
2675 }
2676 else if (IsStandardSwizzle(rsrcType, swizzle))
2677 {
2678 if (zbuffer || msaa)
2679 {
2680 ADDR_ASSERT_ALWAYS();
2681 valid = FALSE;
2682 }
2683 }
2684 else if (IsDisplaySwizzle(rsrcType, swizzle))
2685 {
2686 if (zbuffer || msaa)
2687 {
2688 ADDR_ASSERT_ALWAYS();
2689 valid = FALSE;
2690 }
2691 }
2692 else if (IsRtOptSwizzle(swizzle))
2693 {
2694 if (zbuffer)
2695 {
2696 ADDR_ASSERT_ALWAYS();
2697 valid = FALSE;
2698 }
2699 }
2700 else
2701 {
2702 {
2703 ADDR_ASSERT_ALWAYS();
2704 valid = FALSE;
2705 }
2706 }
2707
2708 // Block type check
2709 if (blk256B)
2710 {
2711 if (zbuffer || tex3d || msaa)
2712 {
2713 ADDR_ASSERT_ALWAYS();
2714 valid = FALSE;
2715 }
2716 }
2717 else if (blkVar)
2718 {
2719 if (m_blockVarSizeLog2 == 0)
2720 {
2721 ADDR_ASSERT_ALWAYS();
2722 valid = FALSE;
2723 }
2724 }
2725
2726 return valid;
2727 }
2728
2729 /**
2730 ************************************************************************************************************************
2731 * Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2732 *
2733 * @brief
2734 * Compute surface info sanity check
2735 *
2736 * @return
2737 * Offset
2738 ************************************************************************************************************************
2739 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2740 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2741 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2742 ) const
2743 {
2744 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2745 }
2746
2747 /**
2748 ************************************************************************************************************************
2749 * Gfx10Lib::HwlGetPreferredSurfaceSetting
2750 *
2751 * @brief
2752 * Internal function to get suggested surface information for cliet to use
2753 *
2754 * @return
2755 * ADDR_E_RETURNCODE
2756 ************************************************************************************************************************
2757 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const2758 ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
2759 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure
2760 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure
2761 ) const
2762 {
2763 ADDR_E_RETURNCODE returnCode = ADDR_OK;
2764
2765 if (pIn->flags.fmask)
2766 {
2767 const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE;
2768 const BOOL_32 forbidVarBlockType = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0));
2769
2770 if (forbid64KbBlockType && forbidVarBlockType)
2771 {
2772 // Invalid combination...
2773 ADDR_ASSERT_ALWAYS();
2774 returnCode = ADDR_INVALIDPARAMS;
2775 }
2776 else
2777 {
2778 pOut->resourceType = ADDR_RSRC_TEX_2D;
2779 pOut->validBlockSet.value = 0;
2780 pOut->validBlockSet.macroThin64KB = forbid64KbBlockType ? 0 : 1;
2781 pOut->validBlockSet.var = forbidVarBlockType ? 0 : 1;
2782 pOut->validSwModeSet.value = 0;
2783 pOut->validSwModeSet.sw64KB_Z_X = forbid64KbBlockType ? 0 : 1;
2784 pOut->validSwModeSet.gfx10.swVar_Z_X = forbidVarBlockType ? 0 : 1;
2785 pOut->canXor = TRUE;
2786 pOut->validSwTypeSet.value = AddrSwSetZ;
2787 pOut->clientPreferredSwSet = pOut->validSwTypeSet;
2788
2789 BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE);
2790
2791 if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))
2792 {
2793 const UINT_8 maxFmaskSwizzleModeType = 2;
2794 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2795 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2796 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
2797 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2798 const UINT_32 width = Max(pIn->width, 1u);
2799 const UINT_32 height = Max(pIn->height, 1u);
2800 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u);
2801
2802 AddrSwizzleMode swMode[maxFmaskSwizzleModeType] = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X};
2803 Dim3d blkDim[maxFmaskSwizzleModeType] = {{}, {}};
2804 Dim3d padDim[maxFmaskSwizzleModeType] = {{}, {}};
2805 UINT_64 padSize[maxFmaskSwizzleModeType] = {};
2806
2807 for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++)
2808 {
2809 ComputeBlockDimensionForSurf(&blkDim[i].w,
2810 &blkDim[i].h,
2811 &blkDim[i].d,
2812 fmaskBpp,
2813 1,
2814 pOut->resourceType,
2815 swMode[i]);
2816
2817 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2818 padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
2819 }
2820
2821 if (BlockTypeWithinMemoryBudget(padSize[0],
2822 padSize[1],
2823 ratioLow,
2824 ratioHi,
2825 pIn->memoryBudget,
2826 GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0])))
2827 {
2828 use64KbBlockType = FALSE;
2829 }
2830 }
2831 else if (forbidVarBlockType)
2832 {
2833 use64KbBlockType = TRUE;
2834 }
2835
2836 if (use64KbBlockType)
2837 {
2838 pOut->swizzleMode = ADDR_SW_64KB_Z_X;
2839 }
2840 else
2841 {
2842 pOut->swizzleMode = ADDR_SW_VAR_Z_X;
2843 }
2844 }
2845 }
2846 else
2847 {
2848 UINT_32 bpp = pIn->bpp;
2849 UINT_32 width = Max(pIn->width, 1u);
2850 UINT_32 height = Max(pIn->height, 1u);
2851
2852 // Set format to INVALID will skip this conversion
2853 if (pIn->format != ADDR_FMT_INVALID)
2854 {
2855 ElemMode elemMode = ADDR_UNCOMPRESSED;
2856 UINT_32 expandX, expandY;
2857
2858 // Get compression/expansion factors and element mode which indicates compression/expansion
2859 bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2860 &elemMode,
2861 &expandX,
2862 &expandY);
2863
2864 UINT_32 basePitch = 0;
2865 GetElemLib()->AdjustSurfaceInfo(elemMode,
2866 expandX,
2867 expandY,
2868 &bpp,
2869 &basePitch,
2870 &width,
2871 &height);
2872 }
2873
2874 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2875 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2876 const UINT_32 numSamples = Max(pIn->numSamples, 1u);
2877 const UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
2878 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
2879
2880 // Pre sanity check on non swizzle mode parameters
2881 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2882 localIn.flags = pIn->flags;
2883 localIn.resourceType = pIn->resourceType;
2884 localIn.format = pIn->format;
2885 localIn.bpp = bpp;
2886 localIn.width = width;
2887 localIn.height = height;
2888 localIn.numSlices = numSlices;
2889 localIn.numMipLevels = numMipLevels;
2890 localIn.numSamples = numSamples;
2891 localIn.numFrags = numFrags;
2892
2893 if (ValidateNonSwModeParams(&localIn))
2894 {
2895 // Forbid swizzle mode(s) by client setting
2896 ADDR2_SWMODE_SET allowedSwModeSet = {};
2897 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
2898 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx10Blk256BSwModeMask;
2899 allowedSwModeSet.value |=
2900 pIn->forbiddenBlock.macroThin4KB ? 0 :
2901 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
2902 allowedSwModeSet.value |=
2903 pIn->forbiddenBlock.macroThick4KB ? 0 :
2904 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
2905 allowedSwModeSet.value |=
2906 pIn->forbiddenBlock.macroThin64KB ? 0 :
2907 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
2908 allowedSwModeSet.value |=
2909 pIn->forbiddenBlock.macroThick64KB ? 0 :
2910 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
2911 allowedSwModeSet.value |=
2912 pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);
2913
2914 if (pIn->preferredSwSet.value != 0)
2915 {
2916 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
2917 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
2918 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
2919 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
2920 }
2921
2922 if (pIn->noXor)
2923 {
2924 allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
2925 }
2926
2927 if (pIn->maxAlign > 0)
2928 {
2929 if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
2930 {
2931 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2932 }
2933
2934 if (pIn->maxAlign < Size64K)
2935 {
2936 allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
2937 }
2938
2939 if (pIn->maxAlign < Size4K)
2940 {
2941 allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
2942 }
2943
2944 if (pIn->maxAlign < Size256)
2945 {
2946 allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
2947 }
2948 }
2949
2950 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2951 switch (pIn->resourceType)
2952 {
2953 case ADDR_RSRC_TEX_1D:
2954 allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
2955 break;
2956
2957 case ADDR_RSRC_TEX_2D:
2958 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
2959
2960 break;
2961
2962 case ADDR_RSRC_TEX_3D:
2963 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
2964
2965 if (pIn->flags.view3dAs2dArray)
2966 {
2967 allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask;
2968 }
2969 break;
2970
2971 default:
2972 ADDR_ASSERT_ALWAYS();
2973 allowedSwModeSet.value = 0;
2974 break;
2975 }
2976
2977 if (ElemLib::IsBlockCompressed(pIn->format) ||
2978 ElemLib::IsMacroPixelPacked(pIn->format) ||
2979 (bpp > 64) ||
2980 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2981 {
2982 allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
2983 }
2984
2985 if (pIn->format == ADDR_FMT_32_32_32)
2986 {
2987 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
2988 }
2989
2990 if (msaa)
2991 {
2992 allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
2993 }
2994
2995 if (pIn->flags.depth || pIn->flags.stencil)
2996 {
2997 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2998 }
2999
3000 if (pIn->flags.display)
3001 {
3002 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
3003 }
3004
3005 if (allowedSwModeSet.value != 0)
3006 {
3007 #if DEBUG
3008 // Post sanity check, at least AddrLib should accept the output generated by its own
3009 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3010
3011 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3012 {
3013 if (validateSwModeSet & 1)
3014 {
3015 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3016 ADDR_ASSERT(ValidateSwModeParams(&localIn));
3017 }
3018
3019 validateSwModeSet >>= 1;
3020 }
3021 #endif
3022
3023 pOut->resourceType = pIn->resourceType;
3024 pOut->validSwModeSet = allowedSwModeSet;
3025 pOut->canXor = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
3026 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3027 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3028
3029 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3030
3031 if (pOut->clientPreferredSwSet.value == 0)
3032 {
3033 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3034 }
3035
3036 // Apply optional restrictions
3037 if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig)
3038 {
3039 if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0)
3040 {
3041 // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
3042 // the GL2 in VAR mode, so it should be avoided.
3043 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
3044 }
3045 else
3046 {
3047 // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
3048 // But we have to suffer from low performance because there is no other choice...
3049 ADDR_ASSERT_ALWAYS();
3050 }
3051 }
3052
3053 if (pIn->flags.needEquation)
3054 {
3055 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
3056 }
3057
3058 if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
3059 {
3060 pOut->swizzleMode = ADDR_SW_LINEAR;
3061 }
3062 else
3063 {
3064 const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
3065
3066 if ((height > 1) && (computeMinSize == FALSE))
3067 {
3068 // Always ignore linear swizzle mode if:
3069 // 1. This is a (2D/3D) resource with height > 1
3070 // 2. Client doesn't require computing minimize size
3071 allowedSwModeSet.swLinear = 0;
3072 }
3073
3074 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3075
3076 // Determine block size if there are 2 or more block type candidates
3077 if (IsPow2(allowedBlockSet.value) == FALSE)
3078 {
3079 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
3080
3081 swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
3082
3083 if (m_blockVarSizeLog2 != 0)
3084 {
3085 swMode[AddrBlockThinVar] = ADDR_SW_VAR_R_X;
3086 }
3087
3088 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3089 {
3090 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
3091 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_R_X;
3092 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3093 }
3094 else
3095 {
3096 swMode[AddrBlockMicro] = ADDR_SW_256B_S;
3097 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_S;
3098 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;
3099 }
3100
3101 UINT_64 padSize[AddrBlockMaxTiledType] = {};
3102
3103 const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
3104 const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
3105 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3106 UINT_32 minSizeBlk = AddrBlockMicro;
3107 UINT_64 minSize = 0;
3108
3109 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3110
3111 for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
3112 {
3113 if (IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
3114 {
3115 localIn.swizzleMode = swMode[i];
3116
3117 if (localIn.swizzleMode == ADDR_SW_LINEAR)
3118 {
3119 returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
3120 }
3121 else
3122 {
3123 returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
3124 }
3125
3126 if (returnCode == ADDR_OK)
3127 {
3128 padSize[i] = localOut.surfSize;
3129
3130 if (minSize == 0)
3131 {
3132 minSize = padSize[i];
3133 minSizeBlk = i;
3134 }
3135 else
3136 {
3137 if (BlockTypeWithinMemoryBudget(
3138 minSize,
3139 padSize[i],
3140 ratioLow,
3141 ratioHi,
3142 0.0,
3143 GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])))
3144 {
3145 minSize = padSize[i];
3146 minSizeBlk = i;
3147 }
3148 }
3149 }
3150 else
3151 {
3152 ADDR_ASSERT_ALWAYS();
3153 break;
3154 }
3155 }
3156 }
3157
3158 if (pIn->memoryBudget > 1.0)
3159 {
3160 // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
3161 // smaller-block type again in coming loop
3162 switch (minSizeBlk)
3163 {
3164 case AddrBlockThick64KB:
3165 allowedBlockSet.macroThin64KB = 0;
3166 case AddrBlockThinVar:
3167 case AddrBlockThin64KB:
3168 allowedBlockSet.macroThick4KB = 0;
3169 case AddrBlockThick4KB:
3170 allowedBlockSet.macroThin4KB = 0;
3171 case AddrBlockThin4KB:
3172 allowedBlockSet.micro = 0;
3173 case AddrBlockMicro:
3174 allowedBlockSet.linear = 0;
3175 case AddrBlockLinear:
3176 break;
3177
3178 default:
3179 ADDR_ASSERT_ALWAYS();
3180 break;
3181 }
3182
3183 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3184 {
3185 if ((i != minSizeBlk) &&
3186 IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
3187 {
3188 if (BlockTypeWithinMemoryBudget(
3189 minSize,
3190 padSize[i],
3191 0,
3192 0,
3193 pIn->memoryBudget,
3194 GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])) == FALSE)
3195 {
3196 // Clear the block type if the memory waste is unacceptable
3197 allowedBlockSet.value &= ~(1u << (i - 1));
3198 }
3199 }
3200 }
3201
3202 // Remove VAR block type if bigger block type is allowed
3203 if (GetBlockSizeLog2(swMode[AddrBlockThinVar]) < GetBlockSizeLog2(ADDR_SW_64KB_R_X))
3204 {
3205 if (allowedBlockSet.macroThick64KB || allowedBlockSet.macroThin64KB)
3206 {
3207 allowedBlockSet.var = 0;
3208 }
3209 }
3210
3211 // Remove linear block type if 2 or more block types are allowed
3212 if (IsPow2(allowedBlockSet.value) == FALSE)
3213 {
3214 allowedBlockSet.linear = 0;
3215 }
3216
3217 // Select the biggest allowed block type
3218 minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
3219
3220 if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
3221 {
3222 minSizeBlk = AddrBlockLinear;
3223 }
3224 }
3225
3226 switch (minSizeBlk)
3227 {
3228 case AddrBlockLinear:
3229 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
3230 break;
3231
3232 case AddrBlockMicro:
3233 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3234 allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
3235 break;
3236
3237 case AddrBlockThin4KB:
3238 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3239 allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
3240 break;
3241
3242 case AddrBlockThick4KB:
3243 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3244 allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask;
3245 break;
3246
3247 case AddrBlockThin64KB:
3248 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3249 Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask;
3250 break;
3251
3252 case AddrBlockThick64KB:
3253 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3254 allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask;
3255 break;
3256
3257 case AddrBlockThinVar:
3258 allowedSwModeSet.value &= Gfx10BlkVarSwModeMask;
3259 break;
3260
3261 default:
3262 ADDR_ASSERT_ALWAYS();
3263 allowedSwModeSet.value = 0;
3264 break;
3265 }
3266 }
3267
3268 // Block type should be determined.
3269 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3270
3271 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3272
3273 // Determine swizzle type if there are 2 or more swizzle type candidates
3274 if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
3275 {
3276 if (ElemLib::IsBlockCompressed(pIn->format))
3277 {
3278 if (allowedSwSet.sw_D)
3279 {
3280 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3281 }
3282 else if (allowedSwSet.sw_S)
3283 {
3284 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3285 }
3286 else
3287 {
3288 ADDR_ASSERT(allowedSwSet.sw_R);
3289 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3290 }
3291 }
3292 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3293 {
3294 if (allowedSwSet.sw_S)
3295 {
3296 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3297 }
3298 else if (allowedSwSet.sw_D)
3299 {
3300 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3301 }
3302 else
3303 {
3304 ADDR_ASSERT(allowedSwSet.sw_R);
3305 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3306 }
3307 }
3308 else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
3309 {
3310 if (pIn->flags.color &&
3311 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB &&
3312 allowedSwSet.sw_D)
3313 {
3314 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3315 }
3316 else if (allowedSwSet.sw_S)
3317 {
3318 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3319 }
3320 else if (allowedSwSet.sw_R)
3321 {
3322 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3323 }
3324 else
3325 {
3326 ADDR_ASSERT(allowedSwSet.sw_Z);
3327 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3328 }
3329 }
3330 else
3331 {
3332 if (allowedSwSet.sw_R)
3333 {
3334 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3335 }
3336 else if (allowedSwSet.sw_D)
3337 {
3338 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3339 }
3340 else if (allowedSwSet.sw_S)
3341 {
3342 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3343 }
3344 else
3345 {
3346 ADDR_ASSERT(allowedSwSet.sw_Z);
3347 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3348 }
3349 }
3350
3351 // Swizzle type should be determined.
3352 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3353 }
3354
3355 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
3356 // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3357 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3358 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3359 }
3360 }
3361 else
3362 {
3363 // Invalid combination...
3364 ADDR_ASSERT_ALWAYS();
3365 returnCode = ADDR_INVALIDPARAMS;
3366 }
3367 }
3368 else
3369 {
3370 // Invalid combination...
3371 ADDR_ASSERT_ALWAYS();
3372 returnCode = ADDR_INVALIDPARAMS;
3373 }
3374 }
3375
3376 return returnCode;
3377 }
3378
3379 /**
3380 ************************************************************************************************************************
3381 * Gfx10Lib::ComputeStereoInfo
3382 *
3383 * @brief
3384 * Compute height alignment and right eye pipeBankXor for stereo surface
3385 *
3386 * @return
3387 * Error code
3388 *
3389 ************************************************************************************************************************
3390 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pAlignY,UINT_32 * pRightXor) const3391 ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
3392 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< Compute surface info
3393 UINT_32* pAlignY, ///< Stereo requested additional alignment in Y
3394 UINT_32* pRightXor ///< Right eye xor
3395 ) const
3396 {
3397 ADDR_E_RETURNCODE ret = ADDR_OK;
3398
3399 *pRightXor = 0;
3400
3401 if (IsNonPrtXor(pIn->swizzleMode))
3402 {
3403 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3404 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3405 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
3406 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3407 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
3408
3409 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3410 {
3411 UINT_32 yMax = 0;
3412 UINT_32 yPosMask = 0;
3413
3414 // First get "max y bit"
3415 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3416 {
3417 ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
3418
3419 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3420 (m_equationTable[eqIndex].addr[i].index > yMax))
3421 {
3422 yMax = m_equationTable[eqIndex].addr[i].index;
3423 }
3424
3425 if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3426 (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3427 (m_equationTable[eqIndex].xor1[i].index > yMax))
3428 {
3429 yMax = m_equationTable[eqIndex].xor1[i].index;
3430 }
3431
3432 if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3433 (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3434 (m_equationTable[eqIndex].xor2[i].index > yMax))
3435 {
3436 yMax = m_equationTable[eqIndex].xor2[i].index;
3437 }
3438 }
3439
3440 // Then loop again for populating a position mask of "max Y bit"
3441 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3442 {
3443 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3444 (m_equationTable[eqIndex].addr[i].index == yMax))
3445 {
3446 yPosMask |= 1u << i;
3447 }
3448 else if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3449 (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3450 (m_equationTable[eqIndex].xor1[i].index == yMax))
3451 {
3452 yPosMask |= 1u << i;
3453 }
3454 else if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3455 (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3456 (m_equationTable[eqIndex].xor2[i].index == yMax))
3457 {
3458 yPosMask |= 1u << i;
3459 }
3460 }
3461
3462 const UINT_32 additionalAlign = 1 << yMax;
3463
3464 if (additionalAlign >= *pAlignY)
3465 {
3466 *pAlignY = additionalAlign;
3467
3468 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
3469
3470 if ((alignedHeight >> yMax) & 1)
3471 {
3472 *pRightXor = yPosMask >> m_pipeInterleaveLog2;
3473 }
3474 }
3475 }
3476 else
3477 {
3478 ret = ADDR_INVALIDPARAMS;
3479 }
3480 }
3481
3482 return ret;
3483 }
3484
3485 /**
3486 ************************************************************************************************************************
3487 * Gfx10Lib::HwlComputeSurfaceInfoTiled
3488 *
3489 * @brief
3490 * Internal function to calculate alignment for tiled surface
3491 *
3492 * @return
3493 * ADDR_E_RETURNCODE
3494 ************************************************************************************************************************
3495 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3496 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(
3497 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3498 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3499 ) const
3500 {
3501 ADDR_E_RETURNCODE ret;
3502
3503 // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
3504 pOut->mipChainPitch = 0;
3505 pOut->mipChainHeight = 0;
3506 pOut->mipChainSlice = 0;
3507 pOut->epitchIsHeight = FALSE;
3508
3509 // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
3510 pOut->mipChainInTail = FALSE;
3511 pOut->firstMipIdInTail = pIn->numMipLevels;
3512
3513 if (IsBlock256b(pIn->swizzleMode))
3514 {
3515 ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3516 }
3517 else
3518 {
3519 ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3520 }
3521
3522 return ret;
3523 }
3524
3525
3526 /**
3527 ************************************************************************************************************************
3528 * Gfx10Lib::ComputeSurfaceInfoMicroTiled
3529 *
3530 * @brief
3531 * Internal function to calculate alignment for micro tiled surface
3532 *
3533 * @return
3534 * ADDR_E_RETURNCODE
3535 ************************************************************************************************************************
3536 */
ComputeSurfaceInfoMicroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3537 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(
3538 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3539 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3540 ) const
3541 {
3542 ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3543 &pOut->blockHeight,
3544 &pOut->blockSlices,
3545 pIn->bpp,
3546 pIn->numFrags,
3547 pIn->resourceType,
3548 pIn->swizzleMode);
3549
3550 if (ret == ADDR_OK)
3551 {
3552 const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3553
3554 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3555 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3556 pOut->numSlices = pIn->numSlices;
3557 pOut->baseAlign = blockSize;
3558
3559 if (pIn->numMipLevels > 1)
3560 {
3561 const UINT_32 mip0Width = pIn->width;
3562 const UINT_32 mip0Height = pIn->height;
3563 UINT_64 mipSliceSize = 0;
3564
3565 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3566 {
3567 UINT_32 mipWidth, mipHeight;
3568
3569 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3570
3571 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pOut->blockWidth);
3572 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3573
3574 if (pOut->pMipInfo != NULL)
3575 {
3576 pOut->pMipInfo[i].pitch = mipActualWidth;
3577 pOut->pMipInfo[i].height = mipActualHeight;
3578 pOut->pMipInfo[i].depth = 1;
3579 pOut->pMipInfo[i].offset = mipSliceSize;
3580 pOut->pMipInfo[i].mipTailOffset = 0;
3581 pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3582 }
3583
3584 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3585 }
3586
3587 pOut->sliceSize = mipSliceSize;
3588 pOut->surfSize = mipSliceSize * pOut->numSlices;
3589 }
3590 else
3591 {
3592 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3593 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3594
3595 if (pOut->pMipInfo != NULL)
3596 {
3597 pOut->pMipInfo[0].pitch = pOut->pitch;
3598 pOut->pMipInfo[0].height = pOut->height;
3599 pOut->pMipInfo[0].depth = 1;
3600 pOut->pMipInfo[0].offset = 0;
3601 pOut->pMipInfo[0].mipTailOffset = 0;
3602 pOut->pMipInfo[0].macroBlockOffset = 0;
3603 }
3604 }
3605
3606 }
3607
3608 return ret;
3609 }
3610
3611 /**
3612 ************************************************************************************************************************
3613 * Gfx10Lib::ComputeSurfaceInfoMacroTiled
3614 *
3615 * @brief
3616 * Internal function to calculate alignment for macro tiled surface
3617 *
3618 * @return
3619 * ADDR_E_RETURNCODE
3620 ************************************************************************************************************************
3621 */
ComputeSurfaceInfoMacroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3622 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(
3623 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3624 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3625 ) const
3626 {
3627 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3628 &pOut->blockHeight,
3629 &pOut->blockSlices,
3630 pIn->bpp,
3631 pIn->numFrags,
3632 pIn->resourceType,
3633 pIn->swizzleMode);
3634
3635 if (returnCode == ADDR_OK)
3636 {
3637 UINT_32 heightAlign = pOut->blockHeight;
3638
3639 if (pIn->flags.qbStereo)
3640 {
3641 UINT_32 rightXor = 0;
3642
3643 returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor);
3644
3645 if (returnCode == ADDR_OK)
3646 {
3647 pOut->pStereoInfo->rightSwizzle = rightXor;
3648 }
3649 }
3650
3651 if (returnCode == ADDR_OK)
3652 {
3653 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3654 const UINT_32 blockSize = 1 << blockSizeLog2;
3655
3656 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3657 pOut->height = PowTwoAlign(pIn->height, heightAlign);
3658 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3659 pOut->baseAlign = blockSize;
3660
3661 if (pIn->numMipLevels > 1)
3662 {
3663 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
3664 pIn->swizzleMode,
3665 pOut->blockWidth,
3666 pOut->blockHeight,
3667 pOut->blockSlices);
3668 const UINT_32 mip0Width = pIn->width;
3669 const UINT_32 mip0Height = pIn->height;
3670 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
3671 const UINT_32 mip0Depth = isThin ? 1 : pIn->numSlices;
3672 const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3673 const UINT_32 index = Log2(pIn->bpp >> 3);
3674 UINT_32 firstMipInTail = pIn->numMipLevels;
3675 UINT_64 mipChainSliceSize = 0;
3676 UINT_64 mipSize[MaxMipLevels];
3677 UINT_64 mipSliceSize[MaxMipLevels];
3678
3679 Dim3d fixedTailMaxDim = tailMaxDim;
3680
3681 if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3682 {
3683 fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3684 fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3685 }
3686
3687 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3688 {
3689 UINT_32 mipWidth, mipHeight, mipDepth;
3690
3691 GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3692
3693 if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3694 {
3695 firstMipInTail = i;
3696 mipChainSliceSize += blockSize / pOut->blockSlices;
3697 break;
3698 }
3699 else
3700 {
3701 const UINT_32 pitch = PowTwoAlign(mipWidth, pOut->blockWidth);
3702 const UINT_32 height = PowTwoAlign(mipHeight, pOut->blockHeight);
3703 const UINT_32 depth = PowTwoAlign(mipDepth, pOut->blockSlices);
3704 const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3705
3706 mipSize[i] = sliceSize * depth;
3707 mipSliceSize[i] = sliceSize * pOut->blockSlices;
3708 mipChainSliceSize += sliceSize;
3709
3710 if (pOut->pMipInfo != NULL)
3711 {
3712 pOut->pMipInfo[i].pitch = pitch;
3713 pOut->pMipInfo[i].height = height;
3714 pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3715 }
3716 }
3717 }
3718
3719 pOut->sliceSize = mipChainSliceSize;
3720 pOut->surfSize = mipChainSliceSize * pOut->numSlices;
3721 pOut->mipChainInTail = (firstMipInTail == 0) ? TRUE : FALSE;
3722 pOut->firstMipIdInTail = firstMipInTail;
3723
3724 if (pOut->pMipInfo != NULL)
3725 {
3726 UINT_64 offset = 0;
3727 UINT_64 macroBlkOffset = 0;
3728 UINT_32 tailMaxDepth = 0;
3729
3730 if (firstMipInTail != pIn->numMipLevels)
3731 {
3732 UINT_32 mipWidth, mipHeight;
3733
3734 GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3735 &mipWidth, &mipHeight, &tailMaxDepth);
3736
3737 offset = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3738 macroBlkOffset = blockSize;
3739 }
3740
3741 for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3742 {
3743 pOut->pMipInfo[i].offset = offset;
3744 pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3745 pOut->pMipInfo[i].mipTailOffset = 0;
3746
3747 offset += mipSize[i];
3748 macroBlkOffset += mipSliceSize[i];
3749 }
3750
3751 UINT_32 pitch = tailMaxDim.w;
3752 UINT_32 height = tailMaxDim.h;
3753 UINT_32 depth = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3754
3755 tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3756
3757 for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3758 {
3759 const UINT_32 m = maxMipsInTail - 1 - (i - firstMipInTail);
3760 const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3761
3762 pOut->pMipInfo[i].offset = mipOffset * tailMaxDepth;
3763 pOut->pMipInfo[i].mipTailOffset = mipOffset;
3764 pOut->pMipInfo[i].macroBlockOffset = 0;
3765
3766 pOut->pMipInfo[i].pitch = pitch;
3767 pOut->pMipInfo[i].height = height;
3768 pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3769
3770 UINT_32 mipX = ((mipOffset >> 9) & 1) |
3771 ((mipOffset >> 10) & 2) |
3772 ((mipOffset >> 11) & 4) |
3773 ((mipOffset >> 12) & 8) |
3774 ((mipOffset >> 13) & 16) |
3775 ((mipOffset >> 14) & 32);
3776 UINT_32 mipY = ((mipOffset >> 8) & 1) |
3777 ((mipOffset >> 9) & 2) |
3778 ((mipOffset >> 10) & 4) |
3779 ((mipOffset >> 11) & 8) |
3780 ((mipOffset >> 12) & 16) |
3781 ((mipOffset >> 13) & 32);
3782
3783 if (blockSizeLog2 & 1)
3784 {
3785 const UINT_32 temp = mipX;
3786 mipX = mipY;
3787 mipY = temp;
3788
3789 if (index & 1)
3790 {
3791 mipY = (mipY << 1) | (mipX & 1);
3792 mipX = mipX >> 1;
3793 }
3794 }
3795
3796 if (isThin)
3797 {
3798 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3799 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3800 pOut->pMipInfo[i].mipTailCoordZ = 0;
3801
3802 pitch = Max(pitch >> 1, Block256_2d[index].w);
3803 height = Max(height >> 1, Block256_2d[index].h);
3804 }
3805 else
3806 {
3807 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3808 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3809 pOut->pMipInfo[i].mipTailCoordZ = 0;
3810
3811 pitch = Max(pitch >> 1, Block256_3d[index].w);
3812 height = Max(height >> 1, Block256_3d[index].h);
3813 }
3814 }
3815 }
3816 }
3817 else
3818 {
3819 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;
3820 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3821
3822 if (pOut->pMipInfo != NULL)
3823 {
3824 pOut->pMipInfo[0].pitch = pOut->pitch;
3825 pOut->pMipInfo[0].height = pOut->height;
3826 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3827 pOut->pMipInfo[0].offset = 0;
3828 pOut->pMipInfo[0].mipTailOffset = 0;
3829 pOut->pMipInfo[0].macroBlockOffset = 0;
3830 pOut->pMipInfo[0].mipTailCoordX = 0;
3831 pOut->pMipInfo[0].mipTailCoordY = 0;
3832 pOut->pMipInfo[0].mipTailCoordZ = 0;
3833 }
3834 }
3835 }
3836 }
3837
3838 return returnCode;
3839 }
3840
3841 /**
3842 ************************************************************************************************************************
3843 * Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
3844 *
3845 * @brief
3846 * Internal function to calculate address from coord for tiled swizzle surface
3847 *
3848 * @return
3849 * ADDR_E_RETURNCODE
3850 ************************************************************************************************************************
3851 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3852 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
3853 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3854 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3855 ) const
3856 {
3857 ADDR_E_RETURNCODE ret;
3858
3859 if (IsBlock256b(pIn->swizzleMode))
3860 {
3861 ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3862 }
3863 else
3864 {
3865 ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3866 }
3867
3868 return ret;
3869 }
3870
3871 /**
3872 ************************************************************************************************************************
3873 * Gfx10Lib::ComputeOffsetFromEquation
3874 *
3875 * @brief
3876 * Compute offset from equation
3877 *
3878 * @return
3879 * Offset
3880 ************************************************************************************************************************
3881 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z) const3882 UINT_32 Gfx10Lib::ComputeOffsetFromEquation(
3883 const ADDR_EQUATION* pEq, ///< Equation
3884 UINT_32 x, ///< x coord in bytes
3885 UINT_32 y, ///< y coord in pixel
3886 UINT_32 z ///< z coord in slice
3887 ) const
3888 {
3889 UINT_32 offset = 0;
3890
3891 for (UINT_32 i = 0; i < pEq->numBits; i++)
3892 {
3893 UINT_32 v = 0;
3894
3895 if (pEq->addr[i].valid)
3896 {
3897 if (pEq->addr[i].channel == 0)
3898 {
3899 v ^= (x >> pEq->addr[i].index) & 1;
3900 }
3901 else if (pEq->addr[i].channel == 1)
3902 {
3903 v ^= (y >> pEq->addr[i].index) & 1;
3904 }
3905 else
3906 {
3907 ADDR_ASSERT(pEq->addr[i].channel == 2);
3908 v ^= (z >> pEq->addr[i].index) & 1;
3909 }
3910 }
3911
3912 if (pEq->xor1[i].valid)
3913 {
3914 if (pEq->xor1[i].channel == 0)
3915 {
3916 v ^= (x >> pEq->xor1[i].index) & 1;
3917 }
3918 else if (pEq->xor1[i].channel == 1)
3919 {
3920 v ^= (y >> pEq->xor1[i].index) & 1;
3921 }
3922 else
3923 {
3924 ADDR_ASSERT(pEq->xor1[i].channel == 2);
3925 v ^= (z >> pEq->xor1[i].index) & 1;
3926 }
3927 }
3928
3929 if (pEq->xor2[i].valid)
3930 {
3931 if (pEq->xor2[i].channel == 0)
3932 {
3933 v ^= (x >> pEq->xor2[i].index) & 1;
3934 }
3935 else if (pEq->xor2[i].channel == 1)
3936 {
3937 v ^= (y >> pEq->xor2[i].index) & 1;
3938 }
3939 else
3940 {
3941 ADDR_ASSERT(pEq->xor2[i].channel == 2);
3942 v ^= (z >> pEq->xor2[i].index) & 1;
3943 }
3944 }
3945
3946 offset |= (v << i);
3947 }
3948
3949 return offset;
3950 }
3951
3952 /**
3953 ************************************************************************************************************************
3954 * Gfx10Lib::ComputeOffsetFromSwizzlePattern
3955 *
3956 * @brief
3957 * Compute offset from swizzle pattern
3958 *
3959 * @return
3960 * Offset
3961 ************************************************************************************************************************
3962 */
ComputeOffsetFromSwizzlePattern(const UINT_64 * pPattern,UINT_32 numBits,UINT_32 x,UINT_32 y,UINT_32 z,UINT_32 s) const3963 UINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern(
3964 const UINT_64* pPattern, ///< Swizzle pattern
3965 UINT_32 numBits, ///< Number of bits in pattern
3966 UINT_32 x, ///< x coord in pixel
3967 UINT_32 y, ///< y coord in pixel
3968 UINT_32 z, ///< z coord in slice
3969 UINT_32 s ///< sample id
3970 ) const
3971 {
3972 UINT_32 offset = 0;
3973 const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
3974
3975 for (UINT_32 i = 0; i < numBits; i++)
3976 {
3977 UINT_32 v = 0;
3978
3979 if (pSwizzlePattern[i].x != 0)
3980 {
3981 UINT_16 mask = pSwizzlePattern[i].x;
3982 UINT_32 xBits = x;
3983
3984 while (mask != 0)
3985 {
3986 if (mask & 1)
3987 {
3988 v ^= xBits & 1;
3989 }
3990
3991 xBits >>= 1;
3992 mask >>= 1;
3993 }
3994 }
3995
3996 if (pSwizzlePattern[i].y != 0)
3997 {
3998 UINT_16 mask = pSwizzlePattern[i].y;
3999 UINT_32 yBits = y;
4000
4001 while (mask != 0)
4002 {
4003 if (mask & 1)
4004 {
4005 v ^= yBits & 1;
4006 }
4007
4008 yBits >>= 1;
4009 mask >>= 1;
4010 }
4011 }
4012
4013 if (pSwizzlePattern[i].z != 0)
4014 {
4015 UINT_16 mask = pSwizzlePattern[i].z;
4016 UINT_32 zBits = z;
4017
4018 while (mask != 0)
4019 {
4020 if (mask & 1)
4021 {
4022 v ^= zBits & 1;
4023 }
4024
4025 zBits >>= 1;
4026 mask >>= 1;
4027 }
4028 }
4029
4030 if (pSwizzlePattern[i].s != 0)
4031 {
4032 UINT_16 mask = pSwizzlePattern[i].s;
4033 UINT_32 sBits = s;
4034
4035 while (mask != 0)
4036 {
4037 if (mask & 1)
4038 {
4039 v ^= sBits & 1;
4040 }
4041
4042 sBits >>= 1;
4043 mask >>= 1;
4044 }
4045 }
4046
4047 offset |= (v << i);
4048 }
4049
4050 return offset;
4051 }
4052
4053 /**
4054 ************************************************************************************************************************
4055 * Gfx10Lib::GetSwizzlePatternInfo
4056 *
4057 * @brief
4058 * Get swizzle pattern
4059 *
4060 * @return
4061 * Swizzle pattern information
4062 ************************************************************************************************************************
4063 */
GetSwizzlePatternInfo(AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elemLog2,UINT_32 numFrag) const4064 const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
4065 AddrSwizzleMode swizzleMode, ///< Swizzle mode
4066 AddrResourceType resourceType, ///< Resource type
4067 UINT_32 elemLog2, ///< Element size in bytes log2
4068 UINT_32 numFrag ///< Number of fragment
4069 ) const
4070 {
4071 const UINT_32 index = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
4072 const ADDR_SW_PATINFO* patInfo = NULL;
4073 const UINT_32 swizzleMask = 1 << swizzleMode;
4074
4075 if (IsBlockVariable(swizzleMode))
4076 {
4077 if (m_blockVarSizeLog2 != 0)
4078 {
4079 ADDR_ASSERT(m_settings.supportRbPlus);
4080
4081 if (IsRtOptSwizzle(swizzleMode))
4082 {
4083 if (numFrag == 1)
4084 {
4085 patInfo = GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO;
4086 }
4087 else if (numFrag == 2)
4088 {
4089 patInfo = GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO;
4090 }
4091 else if (numFrag == 4)
4092 {
4093 patInfo = GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO;
4094 }
4095 else
4096 {
4097 ADDR_ASSERT(numFrag == 8);
4098 patInfo = GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO;
4099 }
4100 }
4101 else if (IsZOrderSwizzle(swizzleMode))
4102 {
4103 if (numFrag == 1)
4104 {
4105 patInfo = GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO;
4106 }
4107 else if (numFrag == 2)
4108 {
4109 patInfo = GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO;
4110 }
4111 else if (numFrag == 4)
4112 {
4113 patInfo = GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO;
4114 }
4115 else
4116 {
4117 ADDR_ASSERT(numFrag == 8);
4118 patInfo = GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO;
4119 }
4120 }
4121 }
4122 }
4123 else if (IsLinear(swizzleMode) == FALSE)
4124 {
4125 if (resourceType == ADDR_RSRC_TEX_3D)
4126 {
4127 ADDR_ASSERT(numFrag == 1);
4128
4129 if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0)
4130 {
4131 if (IsRtOptSwizzle(swizzleMode))
4132 {
4133 patInfo = m_settings.supportRbPlus ?
4134 GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
4135 }
4136 else if (IsZOrderSwizzle(swizzleMode))
4137 {
4138 patInfo = m_settings.supportRbPlus ?
4139 GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
4140 }
4141 else if (IsDisplaySwizzle(resourceType, swizzleMode))
4142 {
4143 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
4144 patInfo = m_settings.supportRbPlus ?
4145 GFX10_SW_64K_D3_X_RBPLUS_PATINFO : GFX10_SW_64K_D3_X_PATINFO;
4146 }
4147 else
4148 {
4149 ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
4150
4151 if (IsBlock4kb(swizzleMode))
4152 {
4153 if (swizzleMode == ADDR_SW_4KB_S)
4154 {
4155 patInfo = m_settings.supportRbPlus ?
4156 GFX10_SW_4K_S3_RBPLUS_PATINFO : GFX10_SW_4K_S3_PATINFO;
4157 }
4158 else
4159 {
4160 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
4161 patInfo = m_settings.supportRbPlus ?
4162 GFX10_SW_4K_S3_X_RBPLUS_PATINFO : GFX10_SW_4K_S3_X_PATINFO;
4163 }
4164 }
4165 else
4166 {
4167 if (swizzleMode == ADDR_SW_64KB_S)
4168 {
4169 patInfo = m_settings.supportRbPlus ?
4170 GFX10_SW_64K_S3_RBPLUS_PATINFO : GFX10_SW_64K_S3_PATINFO;
4171 }
4172 else if (swizzleMode == ADDR_SW_64KB_S_X)
4173 {
4174 patInfo = m_settings.supportRbPlus ?
4175 GFX10_SW_64K_S3_X_RBPLUS_PATINFO : GFX10_SW_64K_S3_X_PATINFO;
4176 }
4177 else
4178 {
4179 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
4180 patInfo = m_settings.supportRbPlus ?
4181 GFX10_SW_64K_S3_T_RBPLUS_PATINFO : GFX10_SW_64K_S3_T_PATINFO;
4182 }
4183 }
4184 }
4185 }
4186 }
4187 else
4188 {
4189 if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0)
4190 {
4191 if (IsBlock256b(swizzleMode))
4192 {
4193 if (swizzleMode == ADDR_SW_256B_S)
4194 {
4195 patInfo = m_settings.supportRbPlus ?
4196 GFX10_SW_256_S_RBPLUS_PATINFO : GFX10_SW_256_S_PATINFO;
4197 }
4198 else
4199 {
4200 ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
4201 patInfo = m_settings.supportRbPlus ?
4202 GFX10_SW_256_D_RBPLUS_PATINFO : GFX10_SW_256_D_PATINFO;
4203 }
4204 }
4205 else if (IsBlock4kb(swizzleMode))
4206 {
4207 if (IsStandardSwizzle(resourceType, swizzleMode))
4208 {
4209 if (swizzleMode == ADDR_SW_4KB_S)
4210 {
4211 patInfo = m_settings.supportRbPlus ?
4212 GFX10_SW_4K_S_RBPLUS_PATINFO : GFX10_SW_4K_S_PATINFO;
4213 }
4214 else
4215 {
4216 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
4217 patInfo = m_settings.supportRbPlus ?
4218 GFX10_SW_4K_S_X_RBPLUS_PATINFO : GFX10_SW_4K_S_X_PATINFO;
4219 }
4220 }
4221 else
4222 {
4223 if (swizzleMode == ADDR_SW_4KB_D)
4224 {
4225 patInfo = m_settings.supportRbPlus ?
4226 GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO;
4227 }
4228 else
4229 {
4230 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
4231 patInfo = m_settings.supportRbPlus ?
4232 GFX10_SW_4K_D_X_RBPLUS_PATINFO : GFX10_SW_4K_D_X_PATINFO;
4233 }
4234 }
4235 }
4236 else
4237 {
4238 if (IsRtOptSwizzle(swizzleMode))
4239 {
4240 if (numFrag == 1)
4241 {
4242 patInfo = m_settings.supportRbPlus ?
4243 GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
4244 }
4245 else if (numFrag == 2)
4246 {
4247 patInfo = m_settings.supportRbPlus ?
4248 GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_2xaa_PATINFO;
4249 }
4250 else if (numFrag == 4)
4251 {
4252 patInfo = m_settings.supportRbPlus ?
4253 GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_4xaa_PATINFO;
4254 }
4255 else
4256 {
4257 ADDR_ASSERT(numFrag == 8);
4258 patInfo = m_settings.supportRbPlus ?
4259 GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_8xaa_PATINFO;
4260 }
4261 }
4262 else if (IsZOrderSwizzle(swizzleMode))
4263 {
4264 if (numFrag == 1)
4265 {
4266 patInfo = m_settings.supportRbPlus ?
4267 GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
4268 }
4269 else if (numFrag == 2)
4270 {
4271 patInfo = m_settings.supportRbPlus ?
4272 GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_2xaa_PATINFO;
4273 }
4274 else if (numFrag == 4)
4275 {
4276 patInfo = m_settings.supportRbPlus ?
4277 GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_4xaa_PATINFO;
4278 }
4279 else
4280 {
4281 ADDR_ASSERT(numFrag == 8);
4282 patInfo = m_settings.supportRbPlus ?
4283 GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_8xaa_PATINFO;
4284 }
4285 }
4286 else if (IsDisplaySwizzle(resourceType, swizzleMode))
4287 {
4288 if (swizzleMode == ADDR_SW_64KB_D)
4289 {
4290 patInfo = m_settings.supportRbPlus ?
4291 GFX10_SW_64K_D_RBPLUS_PATINFO : GFX10_SW_64K_D_PATINFO;
4292 }
4293 else if (swizzleMode == ADDR_SW_64KB_D_X)
4294 {
4295 patInfo = m_settings.supportRbPlus ?
4296 GFX10_SW_64K_D_X_RBPLUS_PATINFO : GFX10_SW_64K_D_X_PATINFO;
4297 }
4298 else
4299 {
4300 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);
4301 patInfo = m_settings.supportRbPlus ?
4302 GFX10_SW_64K_D_T_RBPLUS_PATINFO : GFX10_SW_64K_D_T_PATINFO;
4303 }
4304 }
4305 else
4306 {
4307 if (swizzleMode == ADDR_SW_64KB_S)
4308 {
4309 patInfo = m_settings.supportRbPlus ?
4310 GFX10_SW_64K_S_RBPLUS_PATINFO : GFX10_SW_64K_S_PATINFO;
4311 }
4312 else if (swizzleMode == ADDR_SW_64KB_S_X)
4313 {
4314 patInfo = m_settings.supportRbPlus ?
4315 GFX10_SW_64K_S_X_RBPLUS_PATINFO : GFX10_SW_64K_S_X_PATINFO;
4316 }
4317 else
4318 {
4319 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
4320 patInfo = m_settings.supportRbPlus ?
4321 GFX10_SW_64K_S_T_RBPLUS_PATINFO : GFX10_SW_64K_S_T_PATINFO;
4322 }
4323 }
4324 }
4325 }
4326 }
4327 }
4328
4329 return (patInfo != NULL) ? &patInfo[index] : NULL;
4330 }
4331
4332
4333 /**
4334 ************************************************************************************************************************
4335 * Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
4336 *
4337 * @brief
4338 * Internal function to calculate address from coord for micro tiled swizzle surface
4339 *
4340 * @return
4341 * ADDR_E_RETURNCODE
4342 ************************************************************************************************************************
4343 */
ComputeSurfaceAddrFromCoordMicroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4344 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
4345 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4346 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4347 ) const
4348 {
4349 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
4350 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4351 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
4352
4353 localIn.swizzleMode = pIn->swizzleMode;
4354 localIn.flags = pIn->flags;
4355 localIn.resourceType = pIn->resourceType;
4356 localIn.bpp = pIn->bpp;
4357 localIn.width = Max(pIn->unalignedWidth, 1u);
4358 localIn.height = Max(pIn->unalignedHeight, 1u);
4359 localIn.numSlices = Max(pIn->numSlices, 1u);
4360 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4361 localIn.numSamples = Max(pIn->numSamples, 1u);
4362 localIn.numFrags = Max(pIn->numFrags, 1u);
4363 localOut.pMipInfo = mipInfo;
4364
4365 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
4366
4367 if (ret == ADDR_OK)
4368 {
4369 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4370 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
4371 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
4372 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
4373
4374 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4375 {
4376 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4377 const UINT_32 yb = pIn->y / localOut.blockHeight;
4378 const UINT_32 xb = pIn->x / localOut.blockWidth;
4379 const UINT_32 blockIndex = yb * pb + xb;
4380 const UINT_32 blockSize = 256;
4381 const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4382 pIn->x << elemLog2,
4383 pIn->y,
4384 0);
4385 pOut->addr = localOut.sliceSize * pIn->slice +
4386 mipInfo[pIn->mipId].macroBlockOffset +
4387 (blockIndex * blockSize) +
4388 blk256Offset;
4389 }
4390 else
4391 {
4392 ret = ADDR_INVALIDPARAMS;
4393 }
4394 }
4395
4396 return ret;
4397 }
4398
4399 /**
4400 ************************************************************************************************************************
4401 * Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
4402 *
4403 * @brief
4404 * Internal function to calculate address from coord for macro tiled swizzle surface
4405 *
4406 * @return
4407 * ADDR_E_RETURNCODE
4408 ************************************************************************************************************************
4409 */
ComputeSurfaceAddrFromCoordMacroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4410 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
4411 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4412 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4413 ) const
4414 {
4415 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
4416 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4417 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
4418
4419 localIn.swizzleMode = pIn->swizzleMode;
4420 localIn.flags = pIn->flags;
4421 localIn.resourceType = pIn->resourceType;
4422 localIn.bpp = pIn->bpp;
4423 localIn.width = Max(pIn->unalignedWidth, 1u);
4424 localIn.height = Max(pIn->unalignedHeight, 1u);
4425 localIn.numSlices = Max(pIn->numSlices, 1u);
4426 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4427 localIn.numSamples = Max(pIn->numSamples, 1u);
4428 localIn.numFrags = Max(pIn->numFrags, 1u);
4429 localOut.pMipInfo = mipInfo;
4430
4431 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
4432
4433 if (ret == ADDR_OK)
4434 {
4435 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4436 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4437 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
4438 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
4439 const UINT_32 bankMask = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
4440 const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
4441 (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
4442
4443 if (localIn.numFrags > 1)
4444 {
4445 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4446 pIn->resourceType,
4447 elemLog2,
4448 localIn.numFrags);
4449
4450 if (pPatInfo != NULL)
4451 {
4452 const UINT_32 pb = localOut.pitch / localOut.blockWidth;
4453 const UINT_32 yb = pIn->y / localOut.blockHeight;
4454 const UINT_32 xb = pIn->x / localOut.blockWidth;
4455 const UINT_64 blkIdx = yb * pb + xb;
4456
4457 ADDR_BIT_SETTING fullSwizzlePattern[20];
4458 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4459
4460 const UINT_32 blkOffset =
4461 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
4462 blkSizeLog2,
4463 pIn->x,
4464 pIn->y,
4465 pIn->slice,
4466 pIn->sample);
4467
4468 pOut->addr = (localOut.sliceSize * pIn->slice) +
4469 (blkIdx << blkSizeLog2) +
4470 (blkOffset ^ pipeBankXor);
4471 }
4472 else
4473 {
4474 ret = ADDR_INVALIDPARAMS;
4475 }
4476 }
4477 else
4478 {
4479 const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
4480 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
4481 const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
4482
4483 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4484 {
4485 const BOOL_32 inTail = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
4486 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
4487 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
4488 const UINT_32 sliceId = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
4489 const UINT_32 x = inTail ? (pIn->x + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
4490 const UINT_32 y = inTail ? (pIn->y + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
4491 const UINT_32 z = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
4492 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4493 const UINT_32 yb = pIn->y / localOut.blockHeight;
4494 const UINT_32 xb = pIn->x / localOut.blockWidth;
4495 const UINT_64 blkIdx = yb * pb + xb;
4496 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4497 x << elemLog2,
4498 y,
4499 z);
4500 pOut->addr = sliceSize * sliceId +
4501 mipInfo[pIn->mipId].macroBlockOffset +
4502 (blkIdx << blkSizeLog2) +
4503 (blkOffset ^ pipeBankXor);
4504 }
4505 else
4506 {
4507 ret = ADDR_INVALIDPARAMS;
4508 }
4509 }
4510 }
4511
4512 return ret;
4513 }
4514
4515 /**
4516 ************************************************************************************************************************
4517 * Gfx10Lib::HwlComputeMaxBaseAlignments
4518 *
4519 * @brief
4520 * Gets maximum alignments
4521 * @return
4522 * maximum alignments
4523 ************************************************************************************************************************
4524 */
HwlComputeMaxBaseAlignments() const4525 UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const
4526 {
4527 return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K;
4528 }
4529
4530 /**
4531 ************************************************************************************************************************
4532 * Gfx10Lib::HwlComputeMaxMetaBaseAlignments
4533 *
4534 * @brief
4535 * Gets maximum alignments for metadata
4536 * @return
4537 * maximum alignments for metadata
4538 ************************************************************************************************************************
4539 */
HwlComputeMaxMetaBaseAlignments() const4540 UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
4541 {
4542 Dim3d metaBlk;
4543
4544 const AddrSwizzleMode ValidSwizzleModeForXmask[] =
4545 {
4546 ADDR_SW_64KB_Z_X,
4547 m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X,
4548 };
4549
4550 UINT_32 maxBaseAlignHtile = 0;
4551 UINT_32 maxBaseAlignCmask = 0;
4552
4553 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++)
4554 {
4555 for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4556 {
4557 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4558 {
4559 // Max base alignment for Htile
4560 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil,
4561 ADDR_RSRC_TEX_2D,
4562 ValidSwizzleModeForXmask[swIdx],
4563 bppLog2,
4564 numFragLog2,
4565 TRUE,
4566 &metaBlk);
4567
4568 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4569 }
4570 }
4571
4572 // Max base alignment for Cmask
4573 const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask,
4574 ADDR_RSRC_TEX_2D,
4575 ValidSwizzleModeForXmask[swIdx],
4576 0,
4577 0,
4578 TRUE,
4579 &metaBlk);
4580
4581 maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask);
4582 }
4583
4584 // Max base alignment for 2D Dcc
4585 const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4586 {
4587 ADDR_SW_64KB_S_X,
4588 ADDR_SW_64KB_D_X,
4589 ADDR_SW_64KB_R_X,
4590 m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4591 };
4592
4593 UINT_32 maxBaseAlignDcc2D = 0;
4594
4595 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4596 {
4597 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4598 {
4599 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4600 {
4601 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,
4602 ADDR_RSRC_TEX_2D,
4603 ValidSwizzleModeForDcc2D[swIdx],
4604 bppLog2,
4605 numFragLog2,
4606 TRUE,
4607 &metaBlk);
4608
4609 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4610 }
4611 }
4612 }
4613
4614 // Max base alignment for 3D Dcc
4615 const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4616 {
4617 ADDR_SW_64KB_Z_X,
4618 ADDR_SW_64KB_S_X,
4619 ADDR_SW_64KB_D_X,
4620 ADDR_SW_64KB_R_X,
4621 m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4622 };
4623
4624 UINT_32 maxBaseAlignDcc3D = 0;
4625
4626 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4627 {
4628 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4629 {
4630 const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,
4631 ADDR_RSRC_TEX_3D,
4632 ValidSwizzleModeForDcc3D[swIdx],
4633 bppLog2,
4634 0,
4635 TRUE,
4636 &metaBlk);
4637
4638 maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4639 }
4640 }
4641
4642 return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
4643 }
4644
4645 /**
4646 ************************************************************************************************************************
4647 * Gfx10Lib::GetMetaElementSizeLog2
4648 *
4649 * @brief
4650 * Gets meta data element size log2
4651 * @return
4652 * Meta data element size log2
4653 ************************************************************************************************************************
4654 */
GetMetaElementSizeLog2(Gfx10DataType dataType)4655 INT_32 Gfx10Lib::GetMetaElementSizeLog2(
4656 Gfx10DataType dataType) ///< Data surface type
4657 {
4658 INT_32 elemSizeLog2 = 0;
4659
4660 if (dataType == Gfx10DataColor)
4661 {
4662 elemSizeLog2 = 0;
4663 }
4664 else if (dataType == Gfx10DataDepthStencil)
4665 {
4666 elemSizeLog2 = 2;
4667 }
4668 else
4669 {
4670 ADDR_ASSERT(dataType == Gfx10DataFmask);
4671 elemSizeLog2 = -1;
4672 }
4673
4674 return elemSizeLog2;
4675 }
4676
4677 /**
4678 ************************************************************************************************************************
4679 * Gfx10Lib::GetMetaCacheSizeLog2
4680 *
4681 * @brief
4682 * Gets meta data cache line size log2
4683 * @return
4684 * Meta data cache line size log2
4685 ************************************************************************************************************************
4686 */
GetMetaCacheSizeLog2(Gfx10DataType dataType)4687 INT_32 Gfx10Lib::GetMetaCacheSizeLog2(
4688 Gfx10DataType dataType) ///< Data surface type
4689 {
4690 INT_32 cacheSizeLog2 = 0;
4691
4692 if (dataType == Gfx10DataColor)
4693 {
4694 cacheSizeLog2 = 6;
4695 }
4696 else if (dataType == Gfx10DataDepthStencil)
4697 {
4698 cacheSizeLog2 = 8;
4699 }
4700 else
4701 {
4702 ADDR_ASSERT(dataType == Gfx10DataFmask);
4703 cacheSizeLog2 = 8;
4704 }
4705 return cacheSizeLog2;
4706 }
4707
4708 /**
4709 ************************************************************************************************************************
4710 * Gfx10Lib::HwlComputeSurfaceInfoLinear
4711 *
4712 * @brief
4713 * Internal function to calculate alignment for linear surface
4714 *
4715 * @return
4716 * ADDR_E_RETURNCODE
4717 ************************************************************************************************************************
4718 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4719 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(
4720 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4721 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4722 ) const
4723 {
4724 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4725
4726 if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
4727 {
4728 returnCode = ADDR_INVALIDPARAMS;
4729 }
4730 else
4731 {
4732 const UINT_32 elementBytes = pIn->bpp >> 3;
4733 const UINT_32 pitchAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4734 const UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4735 UINT_32 pitch = PowTwoAlign(pIn->width, pitchAlign);
4736 UINT_32 actualHeight = pIn->height;
4737 UINT_64 sliceSize = 0;
4738
4739 if (pIn->numMipLevels > 1)
4740 {
4741 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
4742 {
4743 UINT_32 mipWidth, mipHeight;
4744
4745 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
4746
4747 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
4748
4749 if (pOut->pMipInfo != NULL)
4750 {
4751 pOut->pMipInfo[i].pitch = mipActualWidth;
4752 pOut->pMipInfo[i].height = mipHeight;
4753 pOut->pMipInfo[i].depth = mipDepth;
4754 pOut->pMipInfo[i].offset = sliceSize;
4755 pOut->pMipInfo[i].mipTailOffset = 0;
4756 pOut->pMipInfo[i].macroBlockOffset = sliceSize;
4757 }
4758
4759 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
4760 }
4761 }
4762 else
4763 {
4764 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
4765
4766 if (returnCode == ADDR_OK)
4767 {
4768 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
4769
4770 if (pOut->pMipInfo != NULL)
4771 {
4772 pOut->pMipInfo[0].pitch = pitch;
4773 pOut->pMipInfo[0].height = actualHeight;
4774 pOut->pMipInfo[0].depth = mipDepth;
4775 pOut->pMipInfo[0].offset = 0;
4776 pOut->pMipInfo[0].mipTailOffset = 0;
4777 pOut->pMipInfo[0].macroBlockOffset = 0;
4778 }
4779 }
4780 }
4781
4782 if (returnCode == ADDR_OK)
4783 {
4784 pOut->pitch = pitch;
4785 pOut->height = actualHeight;
4786 pOut->numSlices = pIn->numSlices;
4787 pOut->sliceSize = sliceSize;
4788 pOut->surfSize = sliceSize * pOut->numSlices;
4789 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4790 pOut->blockWidth = pitchAlign;
4791 pOut->blockHeight = 1;
4792 pOut->blockSlices = 1;
4793
4794 // Following members are useless on GFX10
4795 pOut->mipChainPitch = 0;
4796 pOut->mipChainHeight = 0;
4797 pOut->mipChainSlice = 0;
4798 pOut->epitchIsHeight = FALSE;
4799
4800 // Post calculation validate
4801 ADDR_ASSERT(pOut->sliceSize > 0);
4802 }
4803 }
4804
4805 return returnCode;
4806 }
4807
4808 } // V2
4809 } // Addr
4810