1 /*
2  * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  */
26 
27 /**
28 ************************************************************************************************************************
29 * @file  gfx10addrlib.cpp
30 * @brief Contain the implementation for the Gfx10Lib class.
31 ************************************************************************************************************************
32 */
33 
34 #include "gfx10addrlib.h"
35 #include "gfx10_gb_reg.h"
36 
37 #include "amdgpu_asic_addr.h"
38 
39 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
40 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
41 
42 namespace Addr
43 {
44 /**
45 ************************************************************************************************************************
46 *   Gfx10HwlInit
47 *
48 *   @brief
49 *       Creates an Gfx10Lib object.
50 *
51 *   @return
52 *       Returns an Gfx10Lib object pointer.
53 ************************************************************************************************************************
54 */
Gfx10HwlInit(const Client * pClient)55 Addr::Lib* Gfx10HwlInit(const Client* pClient)
56 {
57     return V2::Gfx10Lib::CreateObj(pClient);
58 }
59 
60 namespace V2
61 {
62 
63 ////////////////////////////////////////////////////////////////////////////////////////////////////
64 //                               Static Const Member
65 ////////////////////////////////////////////////////////////////////////////////////////////////////
66 
67 const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
68 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
69     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR
70     {0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_256B_S
71     {0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_256B_D
72     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
73 
74     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
75     {0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_S
76     {0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_D
77     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
78 
79     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
80     {0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_S
81     {0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_D
82     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
83 
84     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
85     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
86     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
87     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
88 
89     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
90     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_S_T
91     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_D_T
92     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
93 
94     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
95     {0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_S_X
96     {0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_D_X
97     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
98 
99     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_Z_X
100     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_S_X
101     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_D_X
102     {0,    0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    1,    0}, // ADDR_SW_64KB_R_X
103 
104     {0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_VAR_Z_X
105     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
106     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
107     {0,    0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0}, // ADDR_SW_VAR_R_X
108     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR_GENERAL
109 };
110 
111 const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
112 
113 const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
114 const Dim3d Gfx10Lib::Block4K_Log2_3d[]  = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
115 
116 /**
117 ************************************************************************************************************************
118 *   Gfx10Lib::Gfx10Lib
119 *
120 *   @brief
121 *       Constructor
122 *
123 ************************************************************************************************************************
124 */
Gfx10Lib(const Client * pClient)125 Gfx10Lib::Gfx10Lib(const Client* pClient)
126     :
127     Lib(pClient),
128     m_numPkrLog2(0),
129     m_numSaLog2(0),
130     m_colorBaseIndex(0),
131     m_xmaskBaseIndex(0),
132     m_dccBaseIndex(0)
133 {
134     memset(&m_settings, 0, sizeof(m_settings));
135     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
136 }
137 
138 /**
139 ************************************************************************************************************************
140 *   Gfx10Lib::~Gfx10Lib
141 *
142 *   @brief
143 *       Destructor
144 ************************************************************************************************************************
145 */
~Gfx10Lib()146 Gfx10Lib::~Gfx10Lib()
147 {
148 }
149 
150 /**
151 ************************************************************************************************************************
152 *   Gfx10Lib::HwlComputeHtileInfo
153 *
154 *   @brief
155 *       Interface function stub of AddrComputeHtilenfo
156 *
157 *   @return
158 *       ADDR_E_RETURNCODE
159 ************************************************************************************************************************
160 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const161 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
162     const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,    ///< [in] input structure
163     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*      pOut    ///< [out] output structure
164     ) const
165 {
166     ADDR_E_RETURNCODE ret = ADDR_OK;
167 
168     if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
169          ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) ||
170         (pIn->hTileFlags.pipeAligned != TRUE))
171     {
172         ret = ADDR_INVALIDPARAMS;
173     }
174     else
175     {
176         Dim3d         metaBlk     = {};
177         const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
178                                                    ADDR_RSRC_TEX_2D,
179                                                    pIn->swizzleMode,
180                                                    0,
181                                                    0,
182                                                    TRUE,
183                                                    &metaBlk);
184 
185         pOut->pitch         = PowTwoAlign(pIn->unalignedWidth,  metaBlk.w);
186         pOut->height        = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
187         pOut->baseAlign     = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
188         pOut->metaBlkWidth  = metaBlk.w;
189         pOut->metaBlkHeight = metaBlk.h;
190 
191         if (pIn->numMipLevels > 1)
192         {
193             ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
194 
195             UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
196 
197             for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
198             {
199                 UINT_32 mipWidth, mipHeight;
200 
201                 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
202 
203                 mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
204                 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
205 
206                 const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
207                 const UINT_32 heightInM    = mipHeight / metaBlk.h;
208                 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
209 
210                 if (pOut->pMipInfo != NULL)
211                 {
212                     pOut->pMipInfo[i].inMiptail = FALSE;
213                     pOut->pMipInfo[i].offset    = offset;
214                     pOut->pMipInfo[i].sliceSize = mipSliceSize;
215                 }
216 
217                 offset += mipSliceSize;
218             }
219 
220             pOut->sliceSize          = offset;
221             pOut->metaBlkNumPerSlice = offset / metaBlkSize;
222             pOut->htileBytes         = pOut->sliceSize * pIn->numSlices;
223 
224             if (pOut->pMipInfo != NULL)
225             {
226                 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
227                 {
228                     pOut->pMipInfo[i].inMiptail = TRUE;
229                     pOut->pMipInfo[i].offset    = 0;
230                     pOut->pMipInfo[i].sliceSize = 0;
231                 }
232 
233                 if (pIn->firstMipIdInTail != pIn->numMipLevels)
234                 {
235                     pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
236                 }
237             }
238         }
239         else
240         {
241             const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
242             const UINT_32 heightInM = pOut->height / metaBlk.h;
243 
244             pOut->metaBlkNumPerSlice    = pitchInM * heightInM;
245             pOut->sliceSize             = pOut->metaBlkNumPerSlice * metaBlkSize;
246             pOut->htileBytes            = pOut->sliceSize * pIn->numSlices;
247 
248             if (pOut->pMipInfo != NULL)
249             {
250                 pOut->pMipInfo[0].inMiptail = FALSE;
251                 pOut->pMipInfo[0].offset    = 0;
252                 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
253             }
254         }
255 
256         // Get the HTILE address equation (copied from HtileAddrFromCoord).
257         // HTILE addressing depends on the number of samples, but this code doesn't support it yet.
258         const UINT_32 index = m_xmaskBaseIndex;
259         const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
260 
261         ADDR_C_ASSERT(sizeof(GFX10_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2);
262         pOut->equation.gfx10_bits = (UINT_16 *)GFX10_HTILE_SW_PATTERN[patIdxTable[index]];
263     }
264 
265     return ret;
266 }
267 
268 /**
269 ************************************************************************************************************************
270 *   Gfx10Lib::HwlComputeCmaskInfo
271 *
272 *   @brief
273 *       Interface function stub of AddrComputeCmaskInfo
274 *
275 *   @return
276 *       ADDR_E_RETURNCODE
277 ************************************************************************************************************************
278 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const279 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
280     const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,    ///< [in] input structure
281     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*      pOut    ///< [out] output structure
282     ) const
283 {
284     ADDR_E_RETURNCODE ret = ADDR_OK;
285 
286     if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
287         (pIn->cMaskFlags.pipeAligned != TRUE)   ||
288         ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
289          ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))))
290     {
291         ret = ADDR_INVALIDPARAMS;
292     }
293     else
294     {
295         Dim3d         metaBlk     = {};
296         const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
297                                                    ADDR_RSRC_TEX_2D,
298                                                    pIn->swizzleMode,
299                                                    0,
300                                                    0,
301                                                    TRUE,
302                                                    &metaBlk);
303 
304         pOut->pitch         = PowTwoAlign(pIn->unalignedWidth,  metaBlk.w);
305         pOut->height        = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
306         pOut->baseAlign     = metaBlkSize;
307         pOut->metaBlkWidth  = metaBlk.w;
308         pOut->metaBlkHeight = metaBlk.h;
309 
310         if (pIn->numMipLevels > 1)
311         {
312             ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
313 
314             UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;
315 
316             for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
317             {
318                 UINT_32 mipWidth, mipHeight;
319 
320                 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
321 
322                 mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
323                 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
324 
325                 const UINT_32 pitchInM  = mipWidth  / metaBlk.w;
326                 const UINT_32 heightInM = mipHeight / metaBlk.h;
327 
328                 if (pOut->pMipInfo != NULL)
329                 {
330                     pOut->pMipInfo[i].inMiptail = FALSE;
331                     pOut->pMipInfo[i].offset    = metaBlkPerSlice * metaBlkSize;
332                     pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
333                 }
334 
335                 metaBlkPerSlice += pitchInM * heightInM;
336             }
337 
338             pOut->metaBlkNumPerSlice = metaBlkPerSlice;
339 
340             if (pOut->pMipInfo != NULL)
341             {
342                 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
343                 {
344                     pOut->pMipInfo[i].inMiptail = TRUE;
345                     pOut->pMipInfo[i].offset    = 0;
346                     pOut->pMipInfo[i].sliceSize = 0;
347                 }
348 
349                 if (pIn->firstMipIdInTail != pIn->numMipLevels)
350                 {
351                     pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
352                 }
353             }
354         }
355         else
356         {
357             const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
358             const UINT_32 heightInM = pOut->height / metaBlk.h;
359 
360             pOut->metaBlkNumPerSlice = pitchInM * heightInM;
361 
362             if (pOut->pMipInfo != NULL)
363             {
364                 pOut->pMipInfo[0].inMiptail = FALSE;
365                 pOut->pMipInfo[0].offset    = 0;
366                 pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
367             }
368         }
369 
370         pOut->sliceSize  = pOut->metaBlkNumPerSlice * metaBlkSize;
371         pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;
372 
373         // Get the CMASK address equation (copied from CmaskAddrFromCoord)
374         const UINT_32  fmaskBpp      = GetFmaskBpp(1, 1);
375         const UINT_32  fmaskElemLog2 = Log2(fmaskBpp >> 3);
376         const UINT_32  index         = m_xmaskBaseIndex + fmaskElemLog2;
377         const UINT_8*  patIdxTable   =
378             (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
379             (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
380 
381         ADDR_C_ASSERT(sizeof(GFX10_CMASK_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
382         pOut->equation.gfx10_bits = (UINT_16*)GFX10_CMASK_SW_PATTERN[patIdxTable[index]];
383     }
384 
385     return ret;
386 }
387 
388 /**
389 ************************************************************************************************************************
390 *   Gfx10Lib::HwlComputeDccInfo
391 *
392 *   @brief
393 *       Interface function to compute DCC key info
394 *
395 *   @return
396 *       ADDR_E_RETURNCODE
397 ************************************************************************************************************************
398 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const399 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
400     const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,    ///< [in] input structure
401     ADDR2_COMPUTE_DCCINFO_OUTPUT*      pOut    ///< [out] output structure
402     ) const
403 {
404     ADDR_E_RETURNCODE ret = ADDR_OK;
405 
406     if (IsLinear(pIn->swizzleMode) || IsBlock256b(pIn->swizzleMode))
407     {
408         // Hardware support dcc for 256 swizzle mode, but address lib will not support it because we only
409         // select 256 swizzle mode for small surface, and it's not helpful to enable dcc for small surface.
410         ret = ADDR_INVALIDPARAMS;
411     }
412     else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
413     {
414         // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
415         ret = ADDR_INVALIDPARAMS;
416     }
417     else
418     {
419         const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
420 
421         {
422             // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
423             ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));
424 
425             const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
426 
427             pOut->compressBlkWidth  = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
428             pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
429             pOut->compressBlkDepth  = isThick ? Block256_3d[elemLog2].d : 1;
430         }
431 
432         if (ret == ADDR_OK)
433         {
434             Dim3d         metaBlk     = {};
435             const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
436             const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
437                                                        pIn->resourceType,
438                                                        pIn->swizzleMode,
439                                                        elemLog2,
440                                                        numFragLog2,
441                                                        pIn->dccKeyFlags.pipeAligned,
442                                                        &metaBlk);
443 
444             pOut->dccRamBaseAlign   = metaBlkSize;
445             pOut->metaBlkWidth      = metaBlk.w;
446             pOut->metaBlkHeight     = metaBlk.h;
447             pOut->metaBlkDepth      = metaBlk.d;
448             pOut->metaBlkSize       = metaBlkSize;
449 
450             pOut->pitch             = PowTwoAlign(pIn->unalignedWidth,     metaBlk.w);
451             pOut->height            = PowTwoAlign(pIn->unalignedHeight,    metaBlk.h);
452             pOut->depth             = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);
453 
454             if (pIn->numMipLevels > 1)
455             {
456                 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
457 
458                 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
459 
460                 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
461                 {
462                     UINT_32 mipWidth, mipHeight;
463 
464                     GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
465 
466                     mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
467                     mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
468 
469                     const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
470                     const UINT_32 heightInM    = mipHeight / metaBlk.h;
471                     const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
472 
473                     if (pOut->pMipInfo != NULL)
474                     {
475                         pOut->pMipInfo[i].inMiptail = FALSE;
476                         pOut->pMipInfo[i].offset    = offset;
477                         pOut->pMipInfo[i].sliceSize = mipSliceSize;
478                     }
479 
480                     offset += mipSliceSize;
481                 }
482 
483                 pOut->dccRamSliceSize    = offset;
484                 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
485                 pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);
486 
487                 if (pOut->pMipInfo != NULL)
488                 {
489                     for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
490                     {
491                         pOut->pMipInfo[i].inMiptail = TRUE;
492                         pOut->pMipInfo[i].offset    = 0;
493                         pOut->pMipInfo[i].sliceSize = 0;
494                     }
495 
496                     if (pIn->firstMipIdInTail != pIn->numMipLevels)
497                     {
498                         pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
499                     }
500                 }
501             }
502             else
503             {
504                 const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
505                 const UINT_32 heightInM = pOut->height / metaBlk.h;
506 
507                 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
508                 pOut->dccRamSliceSize    = pOut->metaBlkNumPerSlice * metaBlkSize;
509                 pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);
510 
511                 if (pOut->pMipInfo != NULL)
512                 {
513                     pOut->pMipInfo[0].inMiptail = FALSE;
514                     pOut->pMipInfo[0].offset    = 0;
515                     pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
516                 }
517             }
518 
519             // Get the DCC address equation (copied from DccAddrFromCoord)
520             const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
521             const UINT_32 numPipeLog2 = m_pipesLog2;
522             UINT_32       index       = m_dccBaseIndex + elemLog2;
523             const UINT_8* patIdxTable;
524 
525             if (m_settings.supportRbPlus)
526             {
527                 patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
528 
529                 if (pIn->dccKeyFlags.pipeAligned)
530                 {
531                     index += MaxNumOfBpp;
532 
533                     if (m_numPkrLog2 < 2)
534                     {
535                         index += m_pipesLog2 * MaxNumOfBpp;
536                     }
537                     else
538                     {
539                         // 4 groups for "m_numPkrLog2 < 2" case
540                         index += 4 * MaxNumOfBpp;
541 
542                         const UINT_32 dccPipePerPkr = 3;
543 
544                         index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
545                                  (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
546                     }
547                 }
548             }
549             else
550             {
551                 patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
552 
553                 if (pIn->dccKeyFlags.pipeAligned)
554                 {
555                     index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
556                 }
557                 else
558                 {
559                     index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
560                 }
561             }
562 
563             ADDR_C_ASSERT(sizeof(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
564             pOut->equation.gfx10_bits = (UINT_16*)GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]];
565         }
566     }
567 
568     return ret;
569 }
570 
571 /**
572 ************************************************************************************************************************
573 *   Gfx10Lib::HwlComputeCmaskAddrFromCoord
574 *
575 *   @brief
576 *       Interface function stub of AddrComputeCmaskAddrFromCoord
577 *
578 *   @return
579 *       ADDR_E_RETURNCODE
580 ************************************************************************************************************************
581 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)582 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
583     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
584     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
585 {
586     // Only support pipe aligned CMask
587     ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE);
588 
589     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {};
590     input.size            = sizeof(input);
591     input.cMaskFlags      = pIn->cMaskFlags;
592     input.colorFlags      = pIn->colorFlags;
593     input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
594     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
595     input.numSlices       = Max(pIn->numSlices,       1u);
596     input.swizzleMode     = pIn->swizzleMode;
597     input.resourceType    = pIn->resourceType;
598 
599     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {};
600     output.size = sizeof(output);
601 
602     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
603 
604     if (returnCode == ADDR_OK)
605     {
606         const UINT_32  fmaskBpp      = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
607         const UINT_32  fmaskElemLog2 = Log2(fmaskBpp >> 3);
608         const UINT_32  pipeMask      = (1 << m_pipesLog2) - 1;
609         const UINT_32  index         = m_xmaskBaseIndex + fmaskElemLog2;
610         const UINT_8*  patIdxTable   =
611             (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
612             (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
613 
614 
615         const UINT_32  blkSizeLog2  = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
616         const UINT_32  blkMask      = (1 << blkSizeLog2) - 1;
617         const UINT_32  blkOffset    = ComputeOffsetFromSwizzlePattern(GFX10_CMASK_SW_PATTERN[patIdxTable[index]],
618                                                                       blkSizeLog2 + 1, // +1 for nibble offset
619                                                                       pIn->x,
620                                                                       pIn->y,
621                                                                       pIn->slice,
622                                                                       0);
623         const UINT_32 xb       = pIn->x / output.metaBlkWidth;
624         const UINT_32 yb       = pIn->y / output.metaBlkHeight;
625         const UINT_32 pb       = output.pitch / output.metaBlkWidth;
626         const UINT_32 blkIndex = (yb * pb) + xb;
627         const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
628 
629         pOut->addr = (output.sliceSize * pIn->slice) +
630                      (blkIndex * (1 << blkSizeLog2)) +
631                      ((blkOffset >> 1) ^ pipeXor);
632         pOut->bitPosition = (blkOffset & 1) << 2;
633     }
634 
635     return returnCode;
636 }
637 
638 /**
639 ************************************************************************************************************************
640 *   Gfx10Lib::HwlComputeHtileAddrFromCoord
641 *
642 *   @brief
643 *       Interface function stub of AddrComputeHtileAddrFromCoord
644 *
645 *   @return
646 *       ADDR_E_RETURNCODE
647 ************************************************************************************************************************
648 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)649 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
650     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
651     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
652 {
653     ADDR_E_RETURNCODE returnCode = ADDR_OK;
654 
655     if (pIn->numMipLevels > 1)
656     {
657         returnCode = ADDR_NOTIMPLEMENTED;
658     }
659     else
660     {
661         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {};
662         input.size            = sizeof(input);
663         input.hTileFlags      = pIn->hTileFlags;
664         input.depthFlags      = pIn->depthflags;
665         input.swizzleMode     = pIn->swizzleMode;
666         input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
667         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
668         input.numSlices       = Max(pIn->numSlices,       1u);
669         input.numMipLevels    = 1;
670 
671         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {};
672         output.size = sizeof(output);
673 
674         returnCode = ComputeHtileInfo(&input, &output);
675 
676         if (returnCode == ADDR_OK)
677         {
678             const UINT_32  numSampleLog2 = Log2(pIn->numSamples);
679             const UINT_32  pipeMask      = (1 << m_pipesLog2) - 1;
680             const UINT_32  index         = m_xmaskBaseIndex + numSampleLog2;
681             const UINT_8*  patIdxTable   = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
682 
683 
684             const UINT_32  blkSizeLog2   = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
685             const UINT_32  blkMask       = (1 << blkSizeLog2) - 1;
686             const UINT_32  blkOffset     = ComputeOffsetFromSwizzlePattern(GFX10_HTILE_SW_PATTERN[patIdxTable[index]],
687                                                                            blkSizeLog2 + 1, // +1 for nibble offset
688                                                                            pIn->x,
689                                                                            pIn->y,
690                                                                            pIn->slice,
691                                                                            0);
692             const UINT_32 xb       = pIn->x / output.metaBlkWidth;
693             const UINT_32 yb       = pIn->y / output.metaBlkHeight;
694             const UINT_32 pb       = output.pitch / output.metaBlkWidth;
695             const UINT_32 blkIndex = (yb * pb) + xb;
696             const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
697 
698             pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
699                          (blkIndex * (1 << blkSizeLog2)) +
700                          ((blkOffset >> 1) ^ pipeXor);
701         }
702     }
703 
704     return returnCode;
705 }
706 
707 /**
708 ************************************************************************************************************************
709 *   Gfx10Lib::HwlComputeHtileCoordFromAddr
710 *
711 *   @brief
712 *       Interface function stub of AddrComputeHtileCoordFromAddr
713 *
714 *   @return
715 *       ADDR_E_RETURNCODE
716 ************************************************************************************************************************
717 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)718 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
719     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
720     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut)   ///< [out] output structure
721 {
722     ADDR_NOT_IMPLEMENTED();
723 
724     return ADDR_OK;
725 }
726 
727 /**
728 ************************************************************************************************************************
729 *   Gfx10Lib::HwlSupportComputeDccAddrFromCoord
730 *
731 *   @brief
732 *       Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
733 *
734 *   @return
735 *       ADDR_E_RETURNCODE
736 ************************************************************************************************************************
737 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)738 ADDR_E_RETURNCODE Gfx10Lib::HwlSupportComputeDccAddrFromCoord(
739     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
740 {
741     ADDR_E_RETURNCODE returnCode = ADDR_OK;
742 
743     if ((pIn->resourceType       != ADDR_RSRC_TEX_2D) ||
744         (pIn->swizzleMode        != ADDR_SW_64KB_R_X) ||
745         (pIn->dccKeyFlags.linear == TRUE)             ||
746         (pIn->numFrags           >  1)                ||
747         (pIn->numMipLevels       >  1)                ||
748         (pIn->mipId              >  0))
749     {
750         returnCode = ADDR_NOTSUPPORTED;
751     }
752     else if ((pIn->pitch == 0)         ||
753              (pIn->metaBlkWidth == 0)  ||
754              (pIn->metaBlkHeight == 0) ||
755              (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
756     {
757         returnCode = ADDR_NOTSUPPORTED;
758     }
759 
760     return returnCode;
761 }
762 
763 /**
764 ************************************************************************************************************************
765 *   Gfx10Lib::HwlComputeDccAddrFromCoord
766 *
767 *   @brief
768 *       Interface function stub of AddrComputeDccAddrFromCoord
769 *
770 *   @return
771 *       N/A
772 ************************************************************************************************************************
773 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)774 VOID Gfx10Lib::HwlComputeDccAddrFromCoord(
775     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,  ///< [in] input structure
776     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut) ///< [out] output structure
777 {
778     const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
779     const UINT_32 numPipeLog2 = m_pipesLog2;
780     const UINT_32 pipeMask    = (1 << numPipeLog2) - 1;
781     UINT_32       index       = m_dccBaseIndex + elemLog2;
782     const UINT_8* patIdxTable;
783 
784     if (m_settings.supportRbPlus)
785     {
786         patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
787 
788         if (pIn->dccKeyFlags.pipeAligned)
789         {
790             index += MaxNumOfBpp;
791 
792             if (m_numPkrLog2 < 2)
793             {
794                 index += m_pipesLog2 * MaxNumOfBpp;
795             }
796             else
797             {
798                 // 4 groups for "m_numPkrLog2 < 2" case
799                 index += 4 * MaxNumOfBpp;
800 
801                 const UINT_32 dccPipePerPkr = 3;
802 
803                 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
804                          (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
805             }
806         }
807     }
808     else
809     {
810         patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
811 
812         if (pIn->dccKeyFlags.pipeAligned)
813         {
814             index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
815         }
816         else
817         {
818             index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
819         }
820     }
821 
822     const UINT_32  blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
823     const UINT_32  blkMask     = (1 << blkSizeLog2) - 1;
824     const UINT_32  blkOffset   =
825         ComputeOffsetFromSwizzlePattern(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
826                                         blkSizeLog2 + 1, // +1 for nibble offset
827                                         pIn->x,
828                                         pIn->y,
829                                         pIn->slice,
830                                         0);
831     const UINT_32 xb       = pIn->x / pIn->metaBlkWidth;
832     const UINT_32 yb       = pIn->y / pIn->metaBlkHeight;
833     const UINT_32 pb       = pIn->pitch / pIn->metaBlkWidth;
834     const UINT_32 blkIndex = (yb * pb) + xb;
835     const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
836 
837     pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
838                  (blkIndex * (1 << blkSizeLog2)) +
839                  ((blkOffset >> 1) ^ pipeXor);
840 }
841 
842 /**
843 ************************************************************************************************************************
844 *   Gfx10Lib::HwlInitGlobalParams
845 *
846 *   @brief
847 *       Initializes global parameters
848 *
849 *   @return
850 *       TRUE if all settings are valid
851 *
852 ************************************************************************************************************************
853 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)854 BOOL_32 Gfx10Lib::HwlInitGlobalParams(
855     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
856 {
857     BOOL_32              valid = TRUE;
858     GB_ADDR_CONFIG_GFX10 gbAddrConfig;
859 
860     gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
861 
862     // These values are copied from CModel code
863     switch (gbAddrConfig.bits.NUM_PIPES)
864     {
865         case ADDR_CONFIG_1_PIPE:
866             m_pipes     = 1;
867             m_pipesLog2 = 0;
868             break;
869         case ADDR_CONFIG_2_PIPE:
870             m_pipes     = 2;
871             m_pipesLog2 = 1;
872             break;
873         case ADDR_CONFIG_4_PIPE:
874             m_pipes     = 4;
875             m_pipesLog2 = 2;
876             break;
877         case ADDR_CONFIG_8_PIPE:
878             m_pipes     = 8;
879             m_pipesLog2 = 3;
880             break;
881         case ADDR_CONFIG_16_PIPE:
882             m_pipes     = 16;
883             m_pipesLog2 = 4;
884             break;
885         case ADDR_CONFIG_32_PIPE:
886             m_pipes     = 32;
887             m_pipesLog2 = 5;
888             break;
889         case ADDR_CONFIG_64_PIPE:
890             m_pipes     = 64;
891             m_pipesLog2 = 6;
892             break;
893         default:
894             ADDR_ASSERT_ALWAYS();
895             valid = FALSE;
896             break;
897     }
898 
899     switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
900     {
901         case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
902             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
903             m_pipeInterleaveLog2  = 8;
904             break;
905         case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
906             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
907             m_pipeInterleaveLog2  = 9;
908             break;
909         case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
910             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
911             m_pipeInterleaveLog2  = 10;
912             break;
913         case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
914             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
915             m_pipeInterleaveLog2  = 11;
916             break;
917         default:
918             ADDR_ASSERT_ALWAYS();
919             valid = FALSE;
920             break;
921     }
922 
923     // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
924     // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
925     // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
926     ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
927 
928     switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
929     {
930         case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
931             m_maxCompFrag     = 1;
932             m_maxCompFragLog2 = 0;
933             break;
934         case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
935             m_maxCompFrag     = 2;
936             m_maxCompFragLog2 = 1;
937             break;
938         case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
939             m_maxCompFrag     = 4;
940             m_maxCompFragLog2 = 2;
941             break;
942         case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
943             m_maxCompFrag     = 8;
944             m_maxCompFragLog2 = 3;
945             break;
946         default:
947             ADDR_ASSERT_ALWAYS();
948             valid = FALSE;
949             break;
950     }
951 
952     {
953         // Skip unaligned case
954         m_xmaskBaseIndex += MaxNumOfAA;
955 
956         m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfAA;
957         m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
958 
959         if (m_settings.supportRbPlus)
960         {
961             m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
962             m_numSaLog2  = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
963 
964             ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
965 
966             ADDR_C_ASSERT(sizeof(GFX10_HTILE_RBPLUS_PATIDX) / sizeof(GFX10_HTILE_RBPLUS_PATIDX[0]) ==
967                           sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX) / sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX[0]));
968 
969             if (m_numPkrLog2 >= 2)
970             {
971                 m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
972                 m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
973             }
974         }
975         else
976         {
977             const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
978                                         static_cast<UINT_32>(ADDR_CONFIG_1_PIPE)  +
979                                         1;
980 
981             ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA);
982 
983             ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) ==
984                           sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0]));
985         }
986     }
987 
988     if (m_settings.supportRbPlus)
989     {
990         // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
991         // corresponding SW_64KB_* mode
992         m_blockVarSizeLog2 = m_pipesLog2 + 14;
993     }
994 
995 
996     if (valid)
997     {
998         InitEquationTable();
999     }
1000 
1001     return valid;
1002 }
1003 
1004 /**
1005 ************************************************************************************************************************
1006 *   Gfx10Lib::HwlConvertChipFamily
1007 *
1008 *   @brief
1009 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1010 *   @return
1011 *       ChipFamily
1012 ************************************************************************************************************************
1013 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)1014 ChipFamily Gfx10Lib::HwlConvertChipFamily(
1015     UINT_32 chipFamily,        ///< [in] chip family defined in atiih.h
1016     UINT_32 chipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1017 {
1018     ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
1019 
1020     m_settings.dccUnsup3DSwDis  = 1;
1021     m_settings.dsMipmapHtileFix = 1;
1022 
1023     switch (chipFamily)
1024     {
1025         case FAMILY_NV:
1026             if (ASICREV_IS_NAVI10_P(chipRevision))
1027             {
1028                 m_settings.dsMipmapHtileFix = 0;
1029                 m_settings.isDcn20          = 1;
1030             }
1031 
1032             if (ASICREV_IS_NAVI12_P(chipRevision))
1033             {
1034                 m_settings.isDcn20 = 1;
1035             }
1036 
1037             if (ASICREV_IS_NAVI14_M(chipRevision))
1038             {
1039                 m_settings.isDcn20 = 1;
1040             }
1041 
1042             if (ASICREV_IS_SIENNA_CICHLID(chipRevision))
1043             {
1044                 m_settings.supportRbPlus   = 1;
1045                 m_settings.dccUnsup3DSwDis = 0;
1046             }
1047 
1048             if (ASICREV_IS_NAVY_FLOUNDER(chipRevision))
1049             {
1050                 m_settings.supportRbPlus   = 1;
1051                 m_settings.dccUnsup3DSwDis = 0;
1052             }
1053 
1054             if (ASICREV_IS_DIMGREY_CAVEFISH(chipRevision))
1055             {
1056                 m_settings.supportRbPlus   = 1;
1057                 m_settings.dccUnsup3DSwDis = 0;
1058             }
1059 
1060             if (ASICREV_IS_BEIGE_GOBY(chipRevision))
1061             {
1062                 m_settings.supportRbPlus   = 1;
1063                 m_settings.dccUnsup3DSwDis = 0;
1064             }
1065             break;
1066 
1067         case FAMILY_VGH:
1068             if (ASICREV_IS_VANGOGH(chipRevision))
1069             {
1070                 m_settings.supportRbPlus   = 1;
1071                 m_settings.dccUnsup3DSwDis = 0;
1072             }
1073             else
1074             {
1075                 ADDR_ASSERT(!"Unknown chip revision");
1076             }
1077 
1078             break;
1079 
1080         case FAMILY_YC:
1081             if (ASICREV_IS_YELLOW_CARP(chipRevision))
1082             {
1083                 m_settings.supportRbPlus   = 1;
1084                 m_settings.dccUnsup3DSwDis = 0;
1085             }
1086             else
1087             {
1088                 ADDR_ASSERT(!"Unknown chip revision");
1089             }
1090 
1091             break;
1092 
1093         default:
1094             ADDR_ASSERT(!"Unknown chip family");
1095             break;
1096     }
1097 
1098     m_configFlags.use32bppFor422Fmt = TRUE;
1099 
1100     return family;
1101 }
1102 
1103 /**
1104 ************************************************************************************************************************
1105 *   Gfx10Lib::GetBlk256SizeLog2
1106 *
1107 *   @brief
1108 *       Get block 256 size
1109 *
1110 *   @return
1111 *       N/A
1112 ************************************************************************************************************************
1113 */
GetBlk256SizeLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1114 void Gfx10Lib::GetBlk256SizeLog2(
1115     AddrResourceType resourceType,      ///< [in] Resource type
1116     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1117     UINT_32          elemLog2,          ///< [in] element size log2
1118     UINT_32          numSamplesLog2,    ///< [in] number of samples
1119     Dim3d*           pBlock             ///< [out] block size
1120     ) const
1121 {
1122     if (IsThin(resourceType, swizzleMode))
1123     {
1124         UINT_32 blockBits = 8 - elemLog2;
1125 
1126         if (IsZOrderSwizzle(swizzleMode))
1127         {
1128             blockBits -= numSamplesLog2;
1129         }
1130 
1131         pBlock->w = (blockBits >> 1) + (blockBits & 1);
1132         pBlock->h = (blockBits >> 1);
1133         pBlock->d = 0;
1134     }
1135     else
1136     {
1137         ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1138 
1139         UINT_32 blockBits = 8 - elemLog2;
1140 
1141         pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
1142         pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
1143         pBlock->h = (blockBits / 3);
1144     }
1145 }
1146 
1147 /**
1148 ************************************************************************************************************************
1149 *   Gfx10Lib::GetCompressedBlockSizeLog2
1150 *
1151 *   @brief
1152 *       Get compress block size
1153 *
1154 *   @return
1155 *       N/A
1156 ************************************************************************************************************************
1157 */
GetCompressedBlockSizeLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1158 void Gfx10Lib::GetCompressedBlockSizeLog2(
1159     Gfx10DataType    dataType,          ///< [in] Data type
1160     AddrResourceType resourceType,      ///< [in] Resource type
1161     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1162     UINT_32          elemLog2,          ///< [in] element size log2
1163     UINT_32          numSamplesLog2,    ///< [in] number of samples
1164     Dim3d*           pBlock             ///< [out] block size
1165     ) const
1166 {
1167     if (dataType == Gfx10DataColor)
1168     {
1169         GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
1170     }
1171     else
1172     {
1173         ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
1174         pBlock->w = 3;
1175         pBlock->h = 3;
1176         pBlock->d = 0;
1177     }
1178 }
1179 
1180 /**
1181 ************************************************************************************************************************
1182 *   Gfx10Lib::GetMetaOverlapLog2
1183 *
1184 *   @brief
1185 *       Get meta block overlap
1186 *
1187 *   @return
1188 *       N/A
1189 ************************************************************************************************************************
1190 */
GetMetaOverlapLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2) const1191 INT_32 Gfx10Lib::GetMetaOverlapLog2(
1192     Gfx10DataType    dataType,          ///< [in] Data type
1193     AddrResourceType resourceType,      ///< [in] Resource type
1194     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1195     UINT_32          elemLog2,          ///< [in] element size log2
1196     UINT_32          numSamplesLog2     ///< [in] number of samples
1197     ) const
1198 {
1199     Dim3d compBlock;
1200     Dim3d microBlock;
1201 
1202     GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
1203     GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, &microBlock);
1204 
1205     const INT_32 compSizeLog2   = compBlock.w  + compBlock.h  + compBlock.d;
1206     const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
1207     const INT_32 maxSizeLog2    = Max(compSizeLog2, blk256SizeLog2);
1208     const INT_32 numPipesLog2   = GetEffectiveNumPipes();
1209     INT_32       overlap        = numPipesLog2 - maxSizeLog2;
1210 
1211     if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
1212     {
1213         overlap++;
1214     }
1215 
1216     // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1217     if ((elemLog2 == 4) && (numSamplesLog2 == 3))
1218     {
1219         overlap--;
1220     }
1221     overlap = Max(overlap, 0);
1222     return overlap;
1223 }
1224 
1225 /**
1226 ************************************************************************************************************************
1227 *   Gfx10Lib::Get3DMetaOverlapLog2
1228 *
1229 *   @brief
1230 *       Get 3d meta block overlap
1231 *
1232 *   @return
1233 *       N/A
1234 ************************************************************************************************************************
1235 */
Get3DMetaOverlapLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2) const1236 INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
1237     AddrResourceType resourceType,      ///< [in] Resource type
1238     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1239     UINT_32          elemLog2           ///< [in] element size log2
1240     ) const
1241 {
1242     Dim3d microBlock;
1243     GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, &microBlock);
1244 
1245     INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
1246 
1247     if (m_settings.supportRbPlus)
1248     {
1249         overlap++;
1250     }
1251 
1252     if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
1253     {
1254         overlap = 0;
1255     }
1256     return overlap;
1257 }
1258 
1259 /**
1260 ************************************************************************************************************************
1261 *   Gfx10Lib::GetPipeRotateAmount
1262 *
1263 *   @brief
1264 *       Get pipe rotate amount
1265 *
1266 *   @return
1267 *       Pipe rotate amount
1268 ************************************************************************************************************************
1269 */
1270 
GetPipeRotateAmount(AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const1271 INT_32 Gfx10Lib::GetPipeRotateAmount(
1272     AddrResourceType resourceType,      ///< [in] Resource type
1273     AddrSwizzleMode  swizzleMode        ///< [in] Swizzle mode
1274     ) const
1275 {
1276     INT_32 amount = 0;
1277 
1278     if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
1279     {
1280         amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
1281                  1 : m_pipesLog2 - (m_numSaLog2 + 1);
1282     }
1283 
1284     return amount;
1285 }
1286 
1287 /**
1288 ************************************************************************************************************************
1289 *   Gfx10Lib::GetMetaBlkSize
1290 *
1291 *   @brief
1292 *       Get metadata block size
1293 *
1294 *   @return
1295 *       Meta block size
1296 ************************************************************************************************************************
1297 */
GetMetaBlkSize(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,BOOL_32 pipeAlign,Dim3d * pBlock) const1298 UINT_32 Gfx10Lib::GetMetaBlkSize(
1299     Gfx10DataType    dataType,          ///< [in] Data type
1300     AddrResourceType resourceType,      ///< [in] Resource type
1301     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1302     UINT_32          elemLog2,          ///< [in] element size log2
1303     UINT_32          numSamplesLog2,    ///< [in] number of samples
1304     BOOL_32          pipeAlign,         ///< [in] pipe align
1305     Dim3d*           pBlock             ///< [out] block size
1306     ) const
1307 {
1308     INT_32 metablkSizeLog2;
1309 
1310     {
1311         const INT_32 metaElemSizeLog2   = GetMetaElementSizeLog2(dataType);
1312         const INT_32 metaCacheSizeLog2  = GetMetaCacheSizeLog2(dataType);
1313         const INT_32 compBlkSizeLog2    = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
1314         const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
1315                                           numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
1316         const INT_32 dataBlkSizeLog2    = GetBlockSizeLog2(swizzleMode);
1317         INT_32       numPipesLog2       = m_pipesLog2;
1318 
1319         if (IsThin(resourceType, swizzleMode))
1320         {
1321             if ((pipeAlign == FALSE) ||
1322                 (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
1323                 (IsDisplaySwizzle(resourceType, swizzleMode)  == TRUE))
1324             {
1325                 if (pipeAlign)
1326                 {
1327                     metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1328                     metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
1329                 }
1330                 else
1331                 {
1332                     metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
1333                 }
1334             }
1335             else
1336             {
1337                 if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1338                 {
1339                     numPipesLog2++;
1340                 }
1341 
1342                 INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1343 
1344                 if (numPipesLog2 >= 4)
1345                 {
1346                     INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1347 
1348                     // In 16Bpe 8xaa, we have an extra overlap bit
1349                     if ((pipeRotateLog2 > 0)  &&
1350                         (elemLog2 == 4)       &&
1351                         (numSamplesLog2 == 3) &&
1352                         (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1353                     {
1354                         overlapLog2++;
1355                     }
1356 
1357                     metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1358                     metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1359 
1360                     if (m_settings.supportRbPlus    &&
1361                         IsRtOptSwizzle(swizzleMode) &&
1362                         (numPipesLog2 == 6)         &&
1363                         (numSamplesLog2 == 3)       &&
1364                         (m_maxCompFragLog2 == 3)    &&
1365                         (metablkSizeLog2 < 15))
1366                     {
1367                         metablkSizeLog2 = 15;
1368                     }
1369                 }
1370                 else
1371                 {
1372                     metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1373                 }
1374 
1375                 if (dataType == Gfx10DataDepthStencil)
1376                 {
1377                     // For htile surfaces, pad meta block size to 2K * num_pipes
1378                     metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1379                 }
1380 
1381                 const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);
1382 
1383                 if  (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1384                 {
1385                     const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1386 
1387                     metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1388                 }
1389             }
1390 
1391             const INT_32 metablkBitsLog2 =
1392                 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1393             pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1394             pBlock->h = 1 << (metablkBitsLog2 >> 1);
1395             pBlock->d = 1;
1396         }
1397         else
1398         {
1399             ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1400 
1401             if (pipeAlign)
1402             {
1403                 if (m_settings.supportRbPlus         &&
1404                     (m_pipesLog2 == m_numSaLog2 + 1) &&
1405                     (m_pipesLog2 > 1)                &&
1406                     IsRbAligned(resourceType, swizzleMode))
1407                 {
1408                     numPipesLog2++;
1409                 }
1410 
1411                 const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1412 
1413                 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1414                 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1415                 metablkSizeLog2 = Max(metablkSizeLog2, 12);
1416             }
1417             else
1418             {
1419                 metablkSizeLog2 = 12;
1420             }
1421 
1422             const INT_32 metablkBitsLog2 =
1423                 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1424             pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1425             pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1426             pBlock->d = 1 << (metablkBitsLog2 / 3);
1427         }
1428     }
1429 
1430     return (1 << static_cast<UINT_32>(metablkSizeLog2));
1431 }
1432 
1433 /**
1434 ************************************************************************************************************************
1435 *   Gfx10Lib::ConvertSwizzlePatternToEquation
1436 *
1437 *   @brief
1438 *       Convert swizzle pattern to equation.
1439 *
1440 *   @return
1441 *       N/A
1442 ************************************************************************************************************************
1443 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,AddrResourceType rsrcType,AddrSwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const1444 VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
1445     UINT_32                elemLog2,  ///< [in] element bytes log2
1446     AddrResourceType       rsrcType,  ///< [in] resource type
1447     AddrSwizzleMode        swMode,    ///< [in] swizzle mode
1448     const ADDR_SW_PATINFO* pPatInfo,  ///< [in] swizzle pattern infor
1449     ADDR_EQUATION*         pEquation) ///< [out] equation converted from swizzle pattern
1450     const
1451 {
1452     ADDR_BIT_SETTING fullSwizzlePattern[20];
1453     GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1454 
1455     const ADDR_BIT_SETTING* pSwizzle      = fullSwizzlePattern;
1456     const UINT_32           blockSizeLog2 = GetBlockSizeLog2(swMode);
1457 
1458     pEquation->numBits            = blockSizeLog2;
1459     pEquation->stackedDepthSlices = FALSE;
1460 
1461     for (UINT_32 i = 0; i < elemLog2; i++)
1462     {
1463         pEquation->addr[i].channel = 0;
1464         pEquation->addr[i].valid   = 1;
1465         pEquation->addr[i].index   = i;
1466     }
1467 
1468     if (IsXor(swMode) == FALSE)
1469     {
1470         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1471         {
1472             ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1473 
1474             if (pSwizzle[i].x != 0)
1475             {
1476                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1477 
1478                 pEquation->addr[i].channel = 0;
1479                 pEquation->addr[i].valid   = 1;
1480                 pEquation->addr[i].index   = Log2(pSwizzle[i].x) + elemLog2;
1481             }
1482             else if (pSwizzle[i].y != 0)
1483             {
1484                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1485 
1486                 pEquation->addr[i].channel = 1;
1487                 pEquation->addr[i].valid   = 1;
1488                 pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1489             }
1490             else
1491             {
1492                 ADDR_ASSERT(pSwizzle[i].z != 0);
1493                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1494 
1495                 pEquation->addr[i].channel = 2;
1496                 pEquation->addr[i].valid   = 1;
1497                 pEquation->addr[i].index   = Log2(pSwizzle[i].z);
1498             }
1499 
1500             pEquation->xor1[i].value = 0;
1501             pEquation->xor2[i].value = 0;
1502         }
1503     }
1504     else if (IsThin(rsrcType, swMode))
1505     {
1506         Dim3d dim;
1507         ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1508 
1509         const UINT_32 blkXLog2 = Log2(dim.w);
1510         const UINT_32 blkYLog2 = Log2(dim.h);
1511         const UINT_32 blkXMask = dim.w - 1;
1512         const UINT_32 blkYMask = dim.h - 1;
1513 
1514         ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1515         UINT_32          xMask = 0;
1516         UINT_32          yMask = 0;
1517         UINT_32          bMask = (1 << elemLog2) - 1;
1518 
1519         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1520         {
1521             if (IsPow2(pSwizzle[i].value))
1522             {
1523                 if (pSwizzle[i].x != 0)
1524                 {
1525                     ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1526                     xMask |= pSwizzle[i].x;
1527 
1528                     const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1529 
1530                     ADDR_ASSERT(xLog2 < blkXLog2);
1531 
1532                     pEquation->addr[i].channel = 0;
1533                     pEquation->addr[i].valid   = 1;
1534                     pEquation->addr[i].index   = xLog2 + elemLog2;
1535                 }
1536                 else
1537                 {
1538                     ADDR_ASSERT(pSwizzle[i].y != 0);
1539                     ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1540                     yMask |= pSwizzle[i].y;
1541 
1542                     pEquation->addr[i].channel = 1;
1543                     pEquation->addr[i].valid   = 1;
1544                     pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1545 
1546                     ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1547                 }
1548 
1549                 swizzle[i].value = 0;
1550                 bMask |= 1 << i;
1551             }
1552             else
1553             {
1554                 if (pSwizzle[i].z != 0)
1555                 {
1556                     ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1557 
1558                     pEquation->xor2[i].channel = 2;
1559                     pEquation->xor2[i].valid   = 1;
1560                     pEquation->xor2[i].index   = Log2(pSwizzle[i].z);
1561                 }
1562 
1563                 swizzle[i].x = pSwizzle[i].x;
1564                 swizzle[i].y = pSwizzle[i].y;
1565                 swizzle[i].z = swizzle[i].s = 0;
1566 
1567                 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1568 
1569                 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1570 
1571                 if (xHi != 0)
1572                 {
1573                     ADDR_ASSERT(IsPow2(xHi));
1574                     ADDR_ASSERT(pEquation->xor1[i].value == 0);
1575 
1576                     pEquation->xor1[i].channel = 0;
1577                     pEquation->xor1[i].valid   = 1;
1578                     pEquation->xor1[i].index   = Log2(xHi) + elemLog2;
1579 
1580                     swizzle[i].x &= blkXMask;
1581                 }
1582 
1583                 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1584 
1585                 if (yHi != 0)
1586                 {
1587                     ADDR_ASSERT(IsPow2(yHi));
1588 
1589                     if (xHi == 0)
1590                     {
1591                         ADDR_ASSERT(pEquation->xor1[i].value == 0);
1592                         pEquation->xor1[i].channel = 1;
1593                         pEquation->xor1[i].valid   = 1;
1594                         pEquation->xor1[i].index   = Log2(yHi);
1595                     }
1596                     else
1597                     {
1598                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1599                         pEquation->xor2[i].channel = 1;
1600                         pEquation->xor2[i].valid   = 1;
1601                         pEquation->xor2[i].index   = Log2(yHi);
1602                     }
1603 
1604                     swizzle[i].y &= blkYMask;
1605                 }
1606 
1607                 if (swizzle[i].value == 0)
1608                 {
1609                     bMask |= 1 << i;
1610                 }
1611             }
1612         }
1613 
1614         const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1615         const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;
1616 
1617         ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1618 
1619         while (bMask != blockMask)
1620         {
1621             for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1622             {
1623                 if ((bMask & (1 << i)) == 0)
1624                 {
1625                     if (IsPow2(swizzle[i].value))
1626                     {
1627                         if (swizzle[i].x != 0)
1628                         {
1629                             ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1630                             xMask |= swizzle[i].x;
1631 
1632                             const UINT_32 xLog2 = Log2(swizzle[i].x);
1633 
1634                             ADDR_ASSERT(xLog2 < blkXLog2);
1635 
1636                             pEquation->addr[i].channel = 0;
1637                             pEquation->addr[i].valid   = 1;
1638                             pEquation->addr[i].index   = xLog2 + elemLog2;
1639                         }
1640                         else
1641                         {
1642                             ADDR_ASSERT(swizzle[i].y != 0);
1643                             ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1644                             yMask |= swizzle[i].y;
1645 
1646                             pEquation->addr[i].channel = 1;
1647                             pEquation->addr[i].valid   = 1;
1648                             pEquation->addr[i].index   = Log2(swizzle[i].y);
1649 
1650                             ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1651                         }
1652 
1653                         swizzle[i].value = 0;
1654                         bMask |= 1 << i;
1655                     }
1656                     else
1657                     {
1658                         const UINT_32 x = swizzle[i].x & xMask;
1659                         const UINT_32 y = swizzle[i].y & yMask;
1660 
1661                         if (x != 0)
1662                         {
1663                             ADDR_ASSERT(IsPow2(x));
1664 
1665                             if (pEquation->xor1[i].value == 0)
1666                             {
1667                                 pEquation->xor1[i].channel = 0;
1668                                 pEquation->xor1[i].valid   = 1;
1669                                 pEquation->xor1[i].index   = Log2(x) + elemLog2;
1670                             }
1671                             else
1672                             {
1673                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1674                                 pEquation->xor2[i].channel = 0;
1675                                 pEquation->xor2[i].valid   = 1;
1676                                 pEquation->xor2[i].index   = Log2(x) + elemLog2;
1677                             }
1678                         }
1679 
1680                         if (y != 0)
1681                         {
1682                             ADDR_ASSERT(IsPow2(y));
1683 
1684                             if (pEquation->xor1[i].value == 0)
1685                             {
1686                                 pEquation->xor1[i].channel = 1;
1687                                 pEquation->xor1[i].valid   = 1;
1688                                 pEquation->xor1[i].index   = Log2(y);
1689                             }
1690                             else
1691                             {
1692                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1693                                 pEquation->xor2[i].channel = 1;
1694                                 pEquation->xor2[i].valid   = 1;
1695                                 pEquation->xor2[i].index   = Log2(y);
1696                             }
1697                         }
1698 
1699                         swizzle[i].x &= ~x;
1700                         swizzle[i].y &= ~y;
1701                     }
1702                 }
1703             }
1704         }
1705 
1706         ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1707     }
1708     else
1709     {
1710         const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;
1711         const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;
1712         const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;
1713         const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1714         const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1715         const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1716 
1717         ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1718         UINT_32          xMask = 0;
1719         UINT_32          yMask = 0;
1720         UINT_32          zMask = 0;
1721         UINT_32          bMask = (1 << elemLog2) - 1;
1722 
1723         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1724         {
1725             if (IsPow2(pSwizzle[i].value))
1726             {
1727                 if (pSwizzle[i].x != 0)
1728                 {
1729                     ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1730                     xMask |= pSwizzle[i].x;
1731 
1732                     const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1733 
1734                     ADDR_ASSERT(xLog2 < blkXLog2);
1735 
1736                     pEquation->addr[i].channel = 0;
1737                     pEquation->addr[i].valid   = 1;
1738                     pEquation->addr[i].index   = xLog2 + elemLog2;
1739                 }
1740                 else if (pSwizzle[i].y != 0)
1741                 {
1742                     ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1743                     yMask |= pSwizzle[i].y;
1744 
1745                     pEquation->addr[i].channel = 1;
1746                     pEquation->addr[i].valid   = 1;
1747                     pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1748 
1749                     ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1750                 }
1751                 else
1752                 {
1753                     ADDR_ASSERT(pSwizzle[i].z != 0);
1754                     ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1755                     zMask |= pSwizzle[i].z;
1756 
1757                     pEquation->addr[i].channel = 2;
1758                     pEquation->addr[i].valid   = 1;
1759                     pEquation->addr[i].index   = Log2(pSwizzle[i].z);
1760 
1761                     ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1762                 }
1763 
1764                 swizzle[i].value = 0;
1765                 bMask |= 1 << i;
1766             }
1767             else
1768             {
1769                 swizzle[i].x = pSwizzle[i].x;
1770                 swizzle[i].y = pSwizzle[i].y;
1771                 swizzle[i].z = pSwizzle[i].z;
1772                 swizzle[i].s = 0;
1773 
1774                 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1775 
1776                 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1777                 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1778                 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1779 
1780                 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1781 
1782                 if (xHi != 0)
1783                 {
1784                     ADDR_ASSERT(IsPow2(xHi));
1785                     ADDR_ASSERT(pEquation->xor1[i].value == 0);
1786 
1787                     pEquation->xor1[i].channel = 0;
1788                     pEquation->xor1[i].valid   = 1;
1789                     pEquation->xor1[i].index   = Log2(xHi) + elemLog2;
1790 
1791                     swizzle[i].x &= blkXMask;
1792                 }
1793 
1794                 if (yHi != 0)
1795                 {
1796                     ADDR_ASSERT(IsPow2(yHi));
1797 
1798                     if (pEquation->xor1[i].value == 0)
1799                     {
1800                         pEquation->xor1[i].channel = 1;
1801                         pEquation->xor1[i].valid   = 1;
1802                         pEquation->xor1[i].index   = Log2(yHi);
1803                     }
1804                     else
1805                     {
1806                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1807                         pEquation->xor2[i].channel = 1;
1808                         pEquation->xor2[i].valid   = 1;
1809                         pEquation->xor2[i].index   = Log2(yHi);
1810                     }
1811 
1812                     swizzle[i].y &= blkYMask;
1813                 }
1814 
1815                 if (zHi != 0)
1816                 {
1817                     ADDR_ASSERT(IsPow2(zHi));
1818 
1819                     if (pEquation->xor1[i].value == 0)
1820                     {
1821                         pEquation->xor1[i].channel = 2;
1822                         pEquation->xor1[i].valid   = 1;
1823                         pEquation->xor1[i].index   = Log2(zHi);
1824                     }
1825                     else
1826                     {
1827                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1828                         pEquation->xor2[i].channel = 2;
1829                         pEquation->xor2[i].valid   = 1;
1830                         pEquation->xor2[i].index   = Log2(zHi);
1831                     }
1832 
1833                     swizzle[i].z &= blkZMask;
1834                 }
1835 
1836                 if (swizzle[i].value == 0)
1837                 {
1838                     bMask |= 1 << i;
1839                 }
1840             }
1841         }
1842 
1843         const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1844         const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;
1845 
1846         ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1847 
1848         while (bMask != blockMask)
1849         {
1850             for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1851             {
1852                 if ((bMask & (1 << i)) == 0)
1853                 {
1854                     if (IsPow2(swizzle[i].value))
1855                     {
1856                         if (swizzle[i].x != 0)
1857                         {
1858                             ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1859                             xMask |= swizzle[i].x;
1860 
1861                             const UINT_32 xLog2 = Log2(swizzle[i].x);
1862 
1863                             ADDR_ASSERT(xLog2 < blkXLog2);
1864 
1865                             pEquation->addr[i].channel = 0;
1866                             pEquation->addr[i].valid   = 1;
1867                             pEquation->addr[i].index   = xLog2 + elemLog2;
1868                         }
1869                         else if (swizzle[i].y != 0)
1870                         {
1871                             ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1872                             yMask |= swizzle[i].y;
1873 
1874                             pEquation->addr[i].channel = 1;
1875                             pEquation->addr[i].valid   = 1;
1876                             pEquation->addr[i].index   = Log2(swizzle[i].y);
1877 
1878                             ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1879                         }
1880                         else
1881                         {
1882                             ADDR_ASSERT(swizzle[i].z != 0);
1883                             ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1884                             zMask |= swizzle[i].z;
1885 
1886                             pEquation->addr[i].channel = 2;
1887                             pEquation->addr[i].valid   = 1;
1888                             pEquation->addr[i].index   = Log2(swizzle[i].z);
1889 
1890                             ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1891                         }
1892 
1893                         swizzle[i].value = 0;
1894                         bMask |= 1 << i;
1895                     }
1896                     else
1897                     {
1898                         const UINT_32 x = swizzle[i].x & xMask;
1899                         const UINT_32 y = swizzle[i].y & yMask;
1900                         const UINT_32 z = swizzle[i].z & zMask;
1901 
1902                         if (x != 0)
1903                         {
1904                             ADDR_ASSERT(IsPow2(x));
1905 
1906                             if (pEquation->xor1[i].value == 0)
1907                             {
1908                                 pEquation->xor1[i].channel = 0;
1909                                 pEquation->xor1[i].valid   = 1;
1910                                 pEquation->xor1[i].index   = Log2(x) + elemLog2;
1911                             }
1912                             else
1913                             {
1914                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1915                                 pEquation->xor2[i].channel = 0;
1916                                 pEquation->xor2[i].valid   = 1;
1917                                 pEquation->xor2[i].index   = Log2(x) + elemLog2;
1918                             }
1919                         }
1920 
1921                         if (y != 0)
1922                         {
1923                             ADDR_ASSERT(IsPow2(y));
1924 
1925                             if (pEquation->xor1[i].value == 0)
1926                             {
1927                                 pEquation->xor1[i].channel = 1;
1928                                 pEquation->xor1[i].valid   = 1;
1929                                 pEquation->xor1[i].index   = Log2(y);
1930                             }
1931                             else
1932                             {
1933                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1934                                 pEquation->xor2[i].channel = 1;
1935                                 pEquation->xor2[i].valid   = 1;
1936                                 pEquation->xor2[i].index   = Log2(y);
1937                             }
1938                         }
1939 
1940                         if (z != 0)
1941                         {
1942                             ADDR_ASSERT(IsPow2(z));
1943 
1944                             if (pEquation->xor1[i].value == 0)
1945                             {
1946                                 pEquation->xor1[i].channel = 2;
1947                                 pEquation->xor1[i].valid   = 1;
1948                                 pEquation->xor1[i].index   = Log2(z);
1949                             }
1950                             else
1951                             {
1952                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1953                                 pEquation->xor2[i].channel = 2;
1954                                 pEquation->xor2[i].valid   = 1;
1955                                 pEquation->xor2[i].index   = Log2(z);
1956                             }
1957                         }
1958 
1959                         swizzle[i].x &= ~x;
1960                         swizzle[i].y &= ~y;
1961                         swizzle[i].z &= ~z;
1962                     }
1963                 }
1964             }
1965         }
1966 
1967         ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1968     }
1969 }
1970 
1971 /**
1972 ************************************************************************************************************************
1973 *   Gfx10Lib::InitEquationTable
1974 *
1975 *   @brief
1976 *       Initialize Equation table.
1977 *
1978 *   @return
1979 *       N/A
1980 ************************************************************************************************************************
1981 */
InitEquationTable()1982 VOID Gfx10Lib::InitEquationTable()
1983 {
1984     memset(m_equationTable, 0, sizeof(m_equationTable));
1985 
1986     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1987     {
1988         const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1989 
1990         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1991         {
1992             const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1993 
1994             for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1995             {
1996                 UINT_32                equationIndex = ADDR_INVALID_EQUATION_INDEX;
1997                 const ADDR_SW_PATINFO* pPatInfo      = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
1998 
1999                 if (pPatInfo != NULL)
2000                 {
2001                     ADDR_ASSERT(IsValidSwMode(swMode));
2002 
2003                     if (pPatInfo->maxItemCount <= 3)
2004                     {
2005                         ADDR_EQUATION equation = {};
2006 
2007                         ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
2008 
2009                         equationIndex = m_numEquations;
2010                         ADDR_ASSERT(equationIndex < EquationTableSize);
2011 
2012                         m_equationTable[equationIndex] = equation;
2013 
2014                         m_numEquations++;
2015                     }
2016                     else
2017                     {
2018                         // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
2019                         ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
2020                         ADDR_ASSERT(rsrcTypeIdx == 1);
2021                         ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
2022                         ADDR_ASSERT(m_settings.supportRbPlus == 1);
2023                     }
2024                 }
2025 
2026                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
2027             }
2028         }
2029     }
2030 }
2031 
2032 /**
2033 ************************************************************************************************************************
2034 *   Gfx10Lib::HwlGetEquationIndex
2035 *
2036 *   @brief
2037 *       Interface function stub of GetEquationIndex
2038 *
2039 *   @return
2040 *       ADDR_E_RETURNCODE
2041 ************************************************************************************************************************
2042 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const2043 UINT_32 Gfx10Lib::HwlGetEquationIndex(
2044     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
2045     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
2046     ) const
2047 {
2048     UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
2049 
2050     if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
2051         (pIn->resourceType == ADDR_RSRC_TEX_3D))
2052     {
2053         const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
2054         const UINT_32 swModeIdx   = static_cast<UINT_32>(pIn->swizzleMode);
2055         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
2056 
2057         equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
2058     }
2059 
2060     if (pOut->pMipInfo != NULL)
2061     {
2062         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2063         {
2064             pOut->pMipInfo[i].equationIndex = equationIdx;
2065         }
2066     }
2067 
2068     return equationIdx;
2069 }
2070 
2071 /**
2072 ************************************************************************************************************************
2073 *   Gfx10Lib::GetValidDisplaySwizzleModes
2074 *
2075 *   @brief
2076 *       Get valid swizzle modes mask for displayable surface
2077 *
2078 *   @return
2079 *       Valid swizzle modes mask for displayable surface
2080 ************************************************************************************************************************
2081 */
GetValidDisplaySwizzleModes(UINT_32 bpp) const2082 UINT_32 Gfx10Lib::GetValidDisplaySwizzleModes(
2083     UINT_32 bpp
2084     ) const
2085 {
2086     UINT_32 swModeMask = 0;
2087 
2088     if (bpp <= 64)
2089     {
2090         if (m_settings.isDcn20)
2091         {
2092             swModeMask = (bpp == 64) ? Dcn20Bpp64SwModeMask : Dcn20NonBpp64SwModeMask;
2093         }
2094         else
2095         {
2096             swModeMask = (bpp == 64) ? Dcn21Bpp64SwModeMask : Dcn21NonBpp64SwModeMask;
2097         }
2098     }
2099 
2100     return swModeMask;
2101 }
2102 
2103 /**
2104 ************************************************************************************************************************
2105 *   Gfx10Lib::IsValidDisplaySwizzleMode
2106 *
2107 *   @brief
2108 *       Check if a swizzle mode is supported by display engine
2109 *
2110 *   @return
2111 *       TRUE is swizzle mode is supported by display engine
2112 ************************************************************************************************************************
2113 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2114 BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
2115     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
2116     ) const
2117 {
2118     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
2119 
2120     return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
2121 }
2122 
2123 /**
2124 ************************************************************************************************************************
2125 *   Gfx10Lib::GetMaxNumMipsInTail
2126 *
2127 *   @brief
2128 *       Return max number of mips in tails
2129 *
2130 *   @return
2131 *       Max number of mips in tails
2132 ************************************************************************************************************************
2133 */
GetMaxNumMipsInTail(UINT_32 blockSizeLog2,BOOL_32 isThin) const2134 UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
2135     UINT_32 blockSizeLog2,     ///< block size log2
2136     BOOL_32 isThin             ///< is thin or thick
2137     ) const
2138 {
2139     UINT_32 effectiveLog2 = blockSizeLog2;
2140 
2141     if (isThin == FALSE)
2142     {
2143         effectiveLog2 -= (blockSizeLog2 - 8) / 3;
2144     }
2145 
2146     return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
2147 }
2148 
2149 /**
2150 ************************************************************************************************************************
2151 *   Gfx10Lib::HwlComputePipeBankXor
2152 *
2153 *   @brief
2154 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2155 *
2156 *   @return
2157 *       PipeBankXor value
2158 ************************************************************************************************************************
2159 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const2160 ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
2161     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,     ///< [in] input structure
2162     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut     ///< [out] output structure
2163     ) const
2164 {
2165     if (IsNonPrtXor(pIn->swizzleMode))
2166     {
2167         const UINT_32 bankBits = GetBankXorBits(GetBlockSizeLog2(pIn->swizzleMode));
2168 
2169         // No pipe xor...
2170         const UINT_32 pipeXor = 0;
2171         UINT_32       bankXor = 0;
2172 
2173         const UINT_32         XorPatternLen = 8;
2174         static const UINT_32  XorBankRot1b[XorPatternLen] = {0,  1,  0,  1,  0,  1,  0,  1};
2175         static const UINT_32  XorBankRot2b[XorPatternLen] = {0,  2,  1,  3,  2,  0,  3,  1};
2176         static const UINT_32  XorBankRot3b[XorPatternLen] = {0,  4,  2,  6,  1,  5,  3,  7};
2177         static const UINT_32  XorBankRot4b[XorPatternLen] = {0,  8,  4, 12,  2, 10,  6, 14};
2178         static const UINT_32* XorBankRotPat[] = {XorBankRot1b, XorBankRot2b, XorBankRot3b, XorBankRot4b};
2179 
2180         switch (bankBits)
2181         {
2182             case 1:
2183             case 2:
2184             case 3:
2185             case 4:
2186                 bankXor = XorBankRotPat[bankBits - 1][pIn->surfIndex % XorPatternLen] << (m_pipesLog2 + ColumnBits);
2187                 break;
2188             default:
2189                 // valid bank bits should be 0~4
2190                 ADDR_ASSERT_ALWAYS();
2191             case 0:
2192                 break;
2193         }
2194 
2195         pOut->pipeBankXor = bankXor | pipeXor;
2196     }
2197     else
2198     {
2199         pOut->pipeBankXor = 0;
2200     }
2201 
2202     return ADDR_OK;
2203 }
2204 
2205 /**
2206 ************************************************************************************************************************
2207 *   Gfx10Lib::HwlComputeSlicePipeBankXor
2208 *
2209 *   @brief
2210 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2211 *
2212 *   @return
2213 *       PipeBankXor value
2214 ************************************************************************************************************************
2215 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const2216 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
2217     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,   ///< [in] input structure
2218     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut   ///< [out] output structure
2219     ) const
2220 {
2221     if (IsNonPrtXor(pIn->swizzleMode))
2222     {
2223         const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2224         const UINT_32 pipeBits  = GetPipeXorBits(blockBits);
2225         const UINT_32 pipeXor   = ReverseBitVector(pIn->slice, pipeBits);
2226 
2227         pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;
2228 
2229         if (pIn->bpe != 0)
2230         {
2231             const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
2232                                                                     pIn->resourceType,
2233                                                                     Log2(pIn->bpe >> 3),
2234                                                                     1);
2235 
2236             if (pPatInfo != NULL)
2237             {
2238                 ADDR_BIT_SETTING fullSwizzlePattern[20];
2239                 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
2240 
2241                 const UINT_32 pipeBankXorOffset =
2242                     ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
2243                                                     blockBits,
2244                                                     0,
2245                                                     0,
2246                                                     pIn->slice,
2247                                                     0);
2248 
2249                 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
2250 
2251                 // Should have no bit set under pipe interleave
2252                 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
2253 
2254                 // This assertion firing means old approach doesn't calculate a correct sliceXor value...
2255                 ADDR_ASSERT(pipeBankXor == pipeXor);
2256 
2257                 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
2258             }
2259         }
2260     }
2261     else
2262     {
2263         pOut->pipeBankXor = 0;
2264     }
2265 
2266     return ADDR_OK;
2267 }
2268 
2269 /**
2270 ************************************************************************************************************************
2271 *   Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2272 *
2273 *   @brief
2274 *       Compute sub resource offset to support swizzle pattern
2275 *
2276 *   @return
2277 *       Offset
2278 ************************************************************************************************************************
2279 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const2280 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2281     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,    ///< [in] input structure
2282     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut    ///< [out] output structure
2283     ) const
2284 {
2285     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2286 
2287     pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
2288 
2289     return ADDR_OK;
2290 }
2291 
2292 /**
2293 ************************************************************************************************************************
2294 *   Gfx10Lib::HwlComputeNonBlockCompressedView
2295 *
2296 *   @brief
2297 *       Compute non-block-compressed view for a given mipmap level/slice.
2298 *
2299 *   @return
2300 *       ADDR_E_RETURNCODE
2301 ************************************************************************************************************************
2302 */
HwlComputeNonBlockCompressedView(const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT * pIn,ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT * pOut) const2303 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView(
2304     const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,    ///< [in] input structure
2305     ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut    ///< [out] output structure
2306     ) const
2307 {
2308     ADDR_E_RETURNCODE returnCode = ADDR_OK;
2309 
2310     if (pIn->resourceType != ADDR_RSRC_TEX_2D)
2311     {
2312         // Only 2D resource can have a NonBC view...
2313         returnCode = ADDR_INVALIDPARAMS;
2314     }
2315     else if ((pIn->format != ADDR_FMT_ASTC_8x8) &&
2316              ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
2317     {
2318         // Only support BC1~BC7 or ASTC_8x8 for now...
2319         returnCode = ADDR_NOTSUPPORTED;
2320     }
2321     else
2322     {
2323         UINT_32 bcWidth, bcHeight;
2324         UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
2325 
2326         ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
2327         infoIn.flags        = pIn->flags;
2328         infoIn.swizzleMode  = pIn->swizzleMode;
2329         infoIn.resourceType = pIn->resourceType;
2330         infoIn.bpp          = bpp;
2331         infoIn.width        = PowTwoAlign(pIn->width, bcWidth) / bcWidth;
2332         infoIn.height       = PowTwoAlign(pIn->height, bcHeight) / bcHeight;
2333         infoIn.numSlices    = pIn->numSlices;
2334         infoIn.numMipLevels = pIn->numMipLevels;
2335         infoIn.numSamples   = 1;
2336         infoIn.numFrags     = 1;
2337 
2338         ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {};
2339 
2340         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
2341         infoOut.pMipInfo = mipInfo;
2342 
2343         const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE;
2344 
2345         if (tiled)
2346         {
2347             returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut);
2348         }
2349         else
2350         {
2351             returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut);
2352         }
2353 
2354         if (returnCode == ADDR_OK)
2355         {
2356             ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
2357             subOffIn.swizzleMode      = infoIn.swizzleMode;
2358             subOffIn.resourceType     = infoIn.resourceType;
2359             subOffIn.slice            = pIn->slice;
2360             subOffIn.sliceSize        = infoOut.sliceSize;
2361             subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
2362             subOffIn.mipTailOffset    = mipInfo[pIn->mipId].mipTailOffset;
2363 
2364             ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
2365 
2366             // For any mipmap level, move nonBc view base address by offset
2367             HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
2368             pOut->offset = subOffOut.offset;
2369 
2370             ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
2371             slicePbXorIn.bpe             = infoIn.bpp;
2372             slicePbXorIn.swizzleMode     = infoIn.swizzleMode;
2373             slicePbXorIn.resourceType    = infoIn.resourceType;
2374             slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
2375             slicePbXorIn.slice           = pIn->slice;
2376 
2377             ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
2378 
2379             // For any mipmap level, nonBc view should use computed pbXor
2380             HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
2381             pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
2382 
2383             const BOOL_32 inTail           = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE;
2384             const UINT_32 requestMipWidth  = PowTwoAlign(Max(pIn->width >> pIn->mipId, 1u), bcWidth) / bcWidth;
2385             const UINT_32 requestMipHeight = PowTwoAlign(Max(pIn->height >> pIn->mipId, 1u), bcHeight) / bcHeight;
2386 
2387             if (inTail)
2388             {
2389                 // For mipmap level that is in mip tail block, hack a lot of things...
2390                 // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
2391                 // are fit in tail block:
2392 
2393                 // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
2394                 pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
2395 
2396                 // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
2397                 pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
2398 
2399                 // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
2400                 pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2);
2401 
2402                 // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
2403                 pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight);
2404             }
2405             // This check should cover at least mipId == 0
2406             else if (requestMipWidth << pIn->mipId == infoIn.width)
2407             {
2408                 // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
2409                 // - only one mipmap level and mipId = 0
2410                 pOut->mipId        = 0;
2411                 pOut->numMipLevels = 1;
2412 
2413                 // (mip0) width = requestMipWidth
2414                 pOut->unalignedWidth = requestMipWidth;
2415 
2416                 // (mip0) height = requestMipHeight
2417                 pOut->unalignedHeight = requestMipHeight;
2418             }
2419             else
2420             {
2421                 // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
2422                 // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
2423                 // because single mip view may have different pitch value than original (multiple) mip view...
2424                 // A simple case would be:
2425                 // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
2426                 // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
2427                 //   mip0 width = 0x101/mip1 width = 0x80
2428                 // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
2429                 // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
2430 
2431                 // - 2 levels and mipId = 1
2432                 pOut->mipId        = 1;
2433                 pOut->numMipLevels = 2;
2434 
2435                 const UINT_32 upperMipWidth  =
2436                     PowTwoAlign(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth) / bcWidth;
2437                 const UINT_32 upperMipHeight =
2438                     PowTwoAlign(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight) / bcHeight;
2439 
2440                 const BOOL_32 needToAvoidInTail =
2441                     tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ?
2442                     TRUE : FALSE;
2443 
2444                 const UINT_32 hwMipWidth  = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth);
2445                 const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight);
2446 
2447                 const BOOL_32 needExtraWidth =
2448                     ((upperMipWidth < requestMipWidth * 2) ||
2449                      ((upperMipWidth == requestMipWidth * 2) &&
2450                       ((needToAvoidInTail == TRUE) ||
2451                        (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE;
2452 
2453                 const BOOL_32 needExtraHeight =
2454                     ((upperMipHeight < requestMipHeight * 2) ||
2455                      ((upperMipHeight == requestMipHeight * 2) &&
2456                       ((needToAvoidInTail == TRUE) ||
2457                        (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE;
2458 
2459                 // (mip0) width = requestLastMipLevelWidth
2460                 pOut->unalignedWidth  = upperMipWidth + (needExtraWidth ? 1: 0);
2461 
2462                 // (mip0) height = requestLastMipLevelHeight
2463                 pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0);
2464             }
2465 
2466             // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
2467             ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth);
2468             // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
2469             ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight);
2470         }
2471     }
2472 
2473     return returnCode;
2474 }
2475 
2476 /**
2477 ************************************************************************************************************************
2478 *   Gfx10Lib::ValidateNonSwModeParams
2479 *
2480 *   @brief
2481 *       Validate compute surface info params except swizzle mode
2482 *
2483 *   @return
2484 *       TRUE if parameters are valid, FALSE otherwise
2485 ************************************************************************************************************************
2486 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2487 BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
2488     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2489 {
2490     BOOL_32 valid = TRUE;
2491 
2492     if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2493     {
2494         ADDR_ASSERT_ALWAYS();
2495         valid = FALSE;
2496     }
2497 
2498     if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
2499     {
2500         ADDR_ASSERT_ALWAYS();
2501         valid = FALSE;
2502     }
2503 
2504     const ADDR2_SURFACE_FLAGS flags    = pIn->flags;
2505     const AddrResourceType    rsrcType = pIn->resourceType;
2506     const BOOL_32             mipmap   = (pIn->numMipLevels > 1);
2507     const BOOL_32             msaa     = (pIn->numFrags > 1);
2508     const BOOL_32             display  = flags.display;
2509     const BOOL_32             tex3d    = IsTex3d(rsrcType);
2510     const BOOL_32             tex2d    = IsTex2d(rsrcType);
2511     const BOOL_32             tex1d    = IsTex1d(rsrcType);
2512     const BOOL_32             stereo   = flags.qbStereo;
2513 
2514 
2515     // Resource type check
2516     if (tex1d)
2517     {
2518         if (msaa || display || stereo)
2519         {
2520             ADDR_ASSERT_ALWAYS();
2521             valid = FALSE;
2522         }
2523     }
2524     else if (tex2d)
2525     {
2526         if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2527         {
2528             ADDR_ASSERT_ALWAYS();
2529             valid = FALSE;
2530         }
2531     }
2532     else if (tex3d)
2533     {
2534         if (msaa || display || stereo)
2535         {
2536             ADDR_ASSERT_ALWAYS();
2537             valid = FALSE;
2538         }
2539     }
2540     else
2541     {
2542         ADDR_ASSERT_ALWAYS();
2543         valid = FALSE;
2544     }
2545 
2546     return valid;
2547 }
2548 
2549 /**
2550 ************************************************************************************************************************
2551 *   Gfx10Lib::ValidateSwModeParams
2552 *
2553 *   @brief
2554 *       Validate compute surface info related to swizzle mode
2555 *
2556 *   @return
2557 *       TRUE if parameters are valid, FALSE otherwise
2558 ************************************************************************************************************************
2559 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2560 BOOL_32 Gfx10Lib::ValidateSwModeParams(
2561     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2562 {
2563     BOOL_32 valid = TRUE;
2564 
2565     if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2566     {
2567         ADDR_ASSERT_ALWAYS();
2568         valid = FALSE;
2569     }
2570     else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
2571     {
2572         {
2573             ADDR_ASSERT_ALWAYS();
2574             valid = FALSE;
2575         }
2576     }
2577 
2578     const ADDR2_SURFACE_FLAGS flags       = pIn->flags;
2579     const AddrResourceType    rsrcType    = pIn->resourceType;
2580     const AddrSwizzleMode     swizzle     = pIn->swizzleMode;
2581     const BOOL_32             msaa        = (pIn->numFrags > 1);
2582     const BOOL_32             zbuffer     = flags.depth || flags.stencil;
2583     const BOOL_32             color       = flags.color;
2584     const BOOL_32             display     = flags.display;
2585     const BOOL_32             tex3d       = IsTex3d(rsrcType);
2586     const BOOL_32             tex2d       = IsTex2d(rsrcType);
2587     const BOOL_32             tex1d       = IsTex1d(rsrcType);
2588     const BOOL_32             thin3d      = flags.view3dAs2dArray;
2589     const BOOL_32             linear      = IsLinear(swizzle);
2590     const BOOL_32             blk256B     = IsBlock256b(swizzle);
2591     const BOOL_32             blkVar      = IsBlockVariable(swizzle);
2592     const BOOL_32             isNonPrtXor = IsNonPrtXor(swizzle);
2593     const BOOL_32             prt         = flags.prt;
2594     const BOOL_32             fmask       = flags.fmask;
2595 
2596     // Misc check
2597     if ((pIn->numFrags > 1) &&
2598         (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2599     {
2600         // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2601         ADDR_ASSERT_ALWAYS();
2602         valid = FALSE;
2603     }
2604 
2605     if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2606     {
2607         ADDR_ASSERT_ALWAYS();
2608         valid = FALSE;
2609     }
2610 
2611     if ((pIn->bpp == 96) && (linear == FALSE))
2612     {
2613         ADDR_ASSERT_ALWAYS();
2614         valid = FALSE;
2615     }
2616 
2617     const UINT_32 swizzleMask = 1 << swizzle;
2618 
2619     // Resource type check
2620     if (tex1d)
2621     {
2622         if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
2623         {
2624             ADDR_ASSERT_ALWAYS();
2625             valid = FALSE;
2626         }
2627     }
2628     else if (tex2d)
2629     {
2630         if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0)
2631         {
2632             {
2633                 ADDR_ASSERT_ALWAYS();
2634                 valid = FALSE;
2635             }
2636         }
2637         else if ((prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) ||
2638                  (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0)))
2639         {
2640             ADDR_ASSERT_ALWAYS();
2641             valid = FALSE;
2642         }
2643 
2644     }
2645     else if (tex3d)
2646     {
2647         if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
2648             (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
2649             (thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0)))
2650         {
2651             ADDR_ASSERT_ALWAYS();
2652             valid = FALSE;
2653         }
2654     }
2655 
2656     // Swizzle type check
2657     if (linear)
2658     {
2659         if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2660         {
2661             ADDR_ASSERT_ALWAYS();
2662             valid = FALSE;
2663         }
2664     }
2665     else if (IsZOrderSwizzle(swizzle))
2666     {
2667         if ((pIn->bpp > 64)                         ||
2668             (msaa && (color || (pIn->bpp > 32)))    ||
2669             ElemLib::IsBlockCompressed(pIn->format) ||
2670             ElemLib::IsMacroPixelPacked(pIn->format))
2671         {
2672             ADDR_ASSERT_ALWAYS();
2673             valid = FALSE;
2674         }
2675     }
2676     else if (IsStandardSwizzle(rsrcType, swizzle))
2677     {
2678         if (zbuffer || msaa)
2679         {
2680             ADDR_ASSERT_ALWAYS();
2681             valid = FALSE;
2682         }
2683     }
2684     else if (IsDisplaySwizzle(rsrcType, swizzle))
2685     {
2686         if (zbuffer || msaa)
2687         {
2688             ADDR_ASSERT_ALWAYS();
2689             valid = FALSE;
2690         }
2691     }
2692     else if (IsRtOptSwizzle(swizzle))
2693     {
2694         if (zbuffer)
2695         {
2696             ADDR_ASSERT_ALWAYS();
2697             valid = FALSE;
2698         }
2699     }
2700     else
2701     {
2702         {
2703             ADDR_ASSERT_ALWAYS();
2704             valid = FALSE;
2705         }
2706     }
2707 
2708     // Block type check
2709     if (blk256B)
2710     {
2711         if (zbuffer || tex3d || msaa)
2712         {
2713             ADDR_ASSERT_ALWAYS();
2714             valid = FALSE;
2715         }
2716     }
2717     else if (blkVar)
2718     {
2719         if (m_blockVarSizeLog2 == 0)
2720         {
2721             ADDR_ASSERT_ALWAYS();
2722             valid = FALSE;
2723         }
2724     }
2725 
2726     return valid;
2727 }
2728 
2729 /**
2730 ************************************************************************************************************************
2731 *   Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2732 *
2733 *   @brief
2734 *       Compute surface info sanity check
2735 *
2736 *   @return
2737 *       Offset
2738 ************************************************************************************************************************
2739 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2740 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2741     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
2742     ) const
2743 {
2744     return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2745 }
2746 
2747 /**
2748 ************************************************************************************************************************
2749 *   Gfx10Lib::HwlGetPreferredSurfaceSetting
2750 *
2751 *   @brief
2752 *       Internal function to get suggested surface information for cliet to use
2753 *
2754 *   @return
2755 *       ADDR_E_RETURNCODE
2756 ************************************************************************************************************************
2757 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const2758 ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
2759     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,  ///< [in] input structure
2760     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut  ///< [out] output structure
2761     ) const
2762 {
2763     ADDR_E_RETURNCODE returnCode = ADDR_OK;
2764 
2765     if (pIn->flags.fmask)
2766     {
2767         const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE;
2768         const BOOL_32 forbidVarBlockType  = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0));
2769 
2770         if (forbid64KbBlockType && forbidVarBlockType)
2771         {
2772             // Invalid combination...
2773             ADDR_ASSERT_ALWAYS();
2774             returnCode = ADDR_INVALIDPARAMS;
2775         }
2776         else
2777         {
2778             pOut->resourceType                   = ADDR_RSRC_TEX_2D;
2779             pOut->validBlockSet.value            = 0;
2780             pOut->validBlockSet.macroThin64KB    = forbid64KbBlockType ? 0 : 1;
2781             pOut->validBlockSet.var              = forbidVarBlockType  ? 0 : 1;
2782             pOut->validSwModeSet.value           = 0;
2783             pOut->validSwModeSet.sw64KB_Z_X      = forbid64KbBlockType ? 0 : 1;
2784             pOut->validSwModeSet.gfx10.swVar_Z_X = forbidVarBlockType  ? 0 : 1;
2785             pOut->canXor                         = TRUE;
2786             pOut->validSwTypeSet.value           = AddrSwSetZ;
2787             pOut->clientPreferredSwSet           = pOut->validSwTypeSet;
2788 
2789             BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE);
2790 
2791             if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))
2792             {
2793                 const UINT_8  maxFmaskSwizzleModeType = 2;
2794                 const UINT_32 ratioLow                = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2795                 const UINT_32 ratioHi                 = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2796                 const UINT_32 fmaskBpp                = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
2797                 const UINT_32 numSlices               = Max(pIn->numSlices, 1u);
2798                 const UINT_32 width                   = Max(pIn->width, 1u);
2799                 const UINT_32 height                  = Max(pIn->height, 1u);
2800                 const UINT_64 sizeAlignInElement      = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u);
2801 
2802                 AddrSwizzleMode swMode[maxFmaskSwizzleModeType]  = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X};
2803                 Dim3d           blkDim[maxFmaskSwizzleModeType]  = {{}, {}};
2804                 Dim3d           padDim[maxFmaskSwizzleModeType]  = {{}, {}};
2805                 UINT_64         padSize[maxFmaskSwizzleModeType] = {};
2806 
2807                 for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++)
2808                 {
2809                     ComputeBlockDimensionForSurf(&blkDim[i].w,
2810                                                  &blkDim[i].h,
2811                                                  &blkDim[i].d,
2812                                                  fmaskBpp,
2813                                                  1,
2814                                                  pOut->resourceType,
2815                                                  swMode[i]);
2816 
2817                     padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2818                     padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
2819                 }
2820 
2821                 if (BlockTypeWithinMemoryBudget(padSize[0],
2822                                                 padSize[1],
2823                                                 ratioLow,
2824                                                 ratioHi,
2825                                                 pIn->memoryBudget,
2826                                                 GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0])))
2827                 {
2828                     use64KbBlockType = FALSE;
2829                 }
2830             }
2831             else if (forbidVarBlockType)
2832             {
2833                 use64KbBlockType = TRUE;
2834             }
2835 
2836             if (use64KbBlockType)
2837             {
2838                 pOut->swizzleMode = ADDR_SW_64KB_Z_X;
2839             }
2840             else
2841             {
2842                 pOut->swizzleMode = ADDR_SW_VAR_Z_X;
2843             }
2844         }
2845     }
2846     else
2847     {
2848         UINT_32 bpp    = pIn->bpp;
2849         UINT_32 width  = Max(pIn->width, 1u);
2850         UINT_32 height = Max(pIn->height, 1u);
2851 
2852         // Set format to INVALID will skip this conversion
2853         if (pIn->format != ADDR_FMT_INVALID)
2854         {
2855             ElemMode elemMode = ADDR_UNCOMPRESSED;
2856             UINT_32 expandX, expandY;
2857 
2858             // Get compression/expansion factors and element mode which indicates compression/expansion
2859             bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2860                                                 &elemMode,
2861                                                 &expandX,
2862                                                 &expandY);
2863 
2864             UINT_32 basePitch = 0;
2865             GetElemLib()->AdjustSurfaceInfo(elemMode,
2866                                             expandX,
2867                                             expandY,
2868                                             &bpp,
2869                                             &basePitch,
2870                                             &width,
2871                                             &height);
2872         }
2873 
2874         const UINT_32 numSlices    = Max(pIn->numSlices,    1u);
2875         const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2876         const UINT_32 numSamples   = Max(pIn->numSamples,   1u);
2877         const UINT_32 numFrags     = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
2878         const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);
2879 
2880         // Pre sanity check on non swizzle mode parameters
2881         ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2882         localIn.flags        = pIn->flags;
2883         localIn.resourceType = pIn->resourceType;
2884         localIn.format       = pIn->format;
2885         localIn.bpp          = bpp;
2886         localIn.width        = width;
2887         localIn.height       = height;
2888         localIn.numSlices    = numSlices;
2889         localIn.numMipLevels = numMipLevels;
2890         localIn.numSamples   = numSamples;
2891         localIn.numFrags     = numFrags;
2892 
2893         if (ValidateNonSwModeParams(&localIn))
2894         {
2895             // Forbid swizzle mode(s) by client setting
2896             ADDR2_SWMODE_SET allowedSwModeSet = {};
2897             allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
2898             allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx10Blk256BSwModeMask;
2899             allowedSwModeSet.value |=
2900                 pIn->forbiddenBlock.macroThin4KB ? 0 :
2901                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
2902             allowedSwModeSet.value |=
2903                 pIn->forbiddenBlock.macroThick4KB ? 0 :
2904                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
2905             allowedSwModeSet.value |=
2906                 pIn->forbiddenBlock.macroThin64KB ? 0 :
2907                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
2908             allowedSwModeSet.value |=
2909                 pIn->forbiddenBlock.macroThick64KB ? 0 :
2910                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
2911             allowedSwModeSet.value |=
2912                 pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);
2913 
2914             if (pIn->preferredSwSet.value != 0)
2915             {
2916                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
2917                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
2918                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
2919                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
2920             }
2921 
2922             if (pIn->noXor)
2923             {
2924                 allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
2925             }
2926 
2927             if (pIn->maxAlign > 0)
2928             {
2929                 if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
2930                 {
2931                     allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2932                 }
2933 
2934                 if (pIn->maxAlign < Size64K)
2935                 {
2936                     allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
2937                 }
2938 
2939                 if (pIn->maxAlign < Size4K)
2940                 {
2941                     allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
2942                 }
2943 
2944                 if (pIn->maxAlign < Size256)
2945                 {
2946                     allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
2947                 }
2948             }
2949 
2950             // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2951             switch (pIn->resourceType)
2952             {
2953                 case ADDR_RSRC_TEX_1D:
2954                     allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
2955                     break;
2956 
2957                 case ADDR_RSRC_TEX_2D:
2958                     allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
2959 
2960                     break;
2961 
2962                 case ADDR_RSRC_TEX_3D:
2963                     allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
2964 
2965                     if (pIn->flags.view3dAs2dArray)
2966                     {
2967                         allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask;
2968                     }
2969                     break;
2970 
2971                 default:
2972                     ADDR_ASSERT_ALWAYS();
2973                     allowedSwModeSet.value = 0;
2974                     break;
2975             }
2976 
2977             if (ElemLib::IsBlockCompressed(pIn->format)  ||
2978                 ElemLib::IsMacroPixelPacked(pIn->format) ||
2979                 (bpp > 64)                               ||
2980                 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2981             {
2982                 allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
2983             }
2984 
2985             if (pIn->format == ADDR_FMT_32_32_32)
2986             {
2987                 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
2988             }
2989 
2990             if (msaa)
2991             {
2992                 allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
2993             }
2994 
2995             if (pIn->flags.depth || pIn->flags.stencil)
2996             {
2997                 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2998             }
2999 
3000             if (pIn->flags.display)
3001             {
3002                 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
3003             }
3004 
3005             if (allowedSwModeSet.value != 0)
3006             {
3007 #if DEBUG
3008                 // Post sanity check, at least AddrLib should accept the output generated by its own
3009                 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3010 
3011                 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3012                 {
3013                     if (validateSwModeSet & 1)
3014                     {
3015                         localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3016                         ADDR_ASSERT(ValidateSwModeParams(&localIn));
3017                     }
3018 
3019                     validateSwModeSet >>= 1;
3020                 }
3021 #endif
3022 
3023                 pOut->resourceType   = pIn->resourceType;
3024                 pOut->validSwModeSet = allowedSwModeSet;
3025                 pOut->canXor         = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
3026                 pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3027                 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3028 
3029                 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3030 
3031                 if (pOut->clientPreferredSwSet.value == 0)
3032                 {
3033                     pOut->clientPreferredSwSet.value = AddrSwSetAll;
3034                 }
3035 
3036                 // Apply optional restrictions
3037                 if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig)
3038                 {
3039                     if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0)
3040                     {
3041                         // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
3042                         // the GL2 in VAR mode, so it should be avoided.
3043                         allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
3044                     }
3045                     else
3046                     {
3047                         // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
3048                         // But we have to suffer from low performance because there is no other choice...
3049                         ADDR_ASSERT_ALWAYS();
3050                     }
3051                 }
3052 
3053                 if (pIn->flags.needEquation)
3054                 {
3055                     FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
3056                 }
3057 
3058                 if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
3059                 {
3060                     pOut->swizzleMode = ADDR_SW_LINEAR;
3061                 }
3062                 else
3063                 {
3064                     const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
3065 
3066                     if ((height > 1) && (computeMinSize == FALSE))
3067                     {
3068                         // Always ignore linear swizzle mode if:
3069                         // 1. This is a (2D/3D) resource with height > 1
3070                         // 2. Client doesn't require computing minimize size
3071                         allowedSwModeSet.swLinear = 0;
3072                     }
3073 
3074                     ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3075 
3076                     // Determine block size if there are 2 or more block type candidates
3077                     if (IsPow2(allowedBlockSet.value) == FALSE)
3078                     {
3079                         AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
3080 
3081                         swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
3082 
3083                         if (m_blockVarSizeLog2 != 0)
3084                         {
3085                             swMode[AddrBlockThinVar] = ADDR_SW_VAR_R_X;
3086                         }
3087 
3088                         if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3089                         {
3090                             swMode[AddrBlockThick4KB]  = ADDR_SW_4KB_S;
3091                             swMode[AddrBlockThin64KB]  = ADDR_SW_64KB_R_X;
3092                             swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3093                         }
3094                         else
3095                         {
3096                             swMode[AddrBlockMicro]    = ADDR_SW_256B_S;
3097                             swMode[AddrBlockThin4KB]  = ADDR_SW_4KB_S;
3098                             swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;
3099                         }
3100 
3101                         UINT_64 padSize[AddrBlockMaxTiledType] = {};
3102 
3103                         const UINT_32 ratioLow           = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
3104                         const UINT_32 ratioHi            = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
3105                         const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3106                         UINT_32       minSizeBlk         = AddrBlockMicro;
3107                         UINT_64       minSize            = 0;
3108 
3109                         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3110 
3111                         for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
3112                         {
3113                             if (IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
3114                             {
3115                                 localIn.swizzleMode = swMode[i];
3116 
3117                                 if (localIn.swizzleMode == ADDR_SW_LINEAR)
3118                                 {
3119                                     returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
3120                                 }
3121                                 else
3122                                 {
3123                                     returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
3124                                 }
3125 
3126                                 if (returnCode == ADDR_OK)
3127                                 {
3128                                     padSize[i] = localOut.surfSize;
3129 
3130                                     if (minSize == 0)
3131                                     {
3132                                         minSize    = padSize[i];
3133                                         minSizeBlk = i;
3134                                     }
3135                                     else
3136                                     {
3137                                         if (BlockTypeWithinMemoryBudget(
3138                                                 minSize,
3139                                                 padSize[i],
3140                                                 ratioLow,
3141                                                 ratioHi,
3142                                                 0.0,
3143                                                 GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])))
3144                                         {
3145                                             minSize    = padSize[i];
3146                                             minSizeBlk = i;
3147                                         }
3148                                     }
3149                                 }
3150                                 else
3151                                 {
3152                                     ADDR_ASSERT_ALWAYS();
3153                                     break;
3154                                 }
3155                             }
3156                         }
3157 
3158                         if (pIn->memoryBudget > 1.0)
3159                         {
3160                             // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
3161                             // smaller-block type again in coming loop
3162                             switch (minSizeBlk)
3163                             {
3164                                 case AddrBlockThick64KB:
3165                                     allowedBlockSet.macroThin64KB = 0;
3166                                 case AddrBlockThinVar:
3167                                 case AddrBlockThin64KB:
3168                                     allowedBlockSet.macroThick4KB = 0;
3169                                 case AddrBlockThick4KB:
3170                                     allowedBlockSet.macroThin4KB = 0;
3171                                 case AddrBlockThin4KB:
3172                                     allowedBlockSet.micro  = 0;
3173                                 case AddrBlockMicro:
3174                                     allowedBlockSet.linear = 0;
3175                                 case AddrBlockLinear:
3176                                     break;
3177 
3178                                 default:
3179                                     ADDR_ASSERT_ALWAYS();
3180                                     break;
3181                             }
3182 
3183                             for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3184                             {
3185                                 if ((i != minSizeBlk) &&
3186                                     IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
3187                                 {
3188                                     if (BlockTypeWithinMemoryBudget(
3189                                             minSize,
3190                                             padSize[i],
3191                                             0,
3192                                             0,
3193                                             pIn->memoryBudget,
3194                                             GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])) == FALSE)
3195                                     {
3196                                         // Clear the block type if the memory waste is unacceptable
3197                                         allowedBlockSet.value &= ~(1u << (i - 1));
3198                                     }
3199                                 }
3200                             }
3201 
3202                             // Remove VAR block type if bigger block type is allowed
3203                             if (GetBlockSizeLog2(swMode[AddrBlockThinVar]) < GetBlockSizeLog2(ADDR_SW_64KB_R_X))
3204                             {
3205                                 if (allowedBlockSet.macroThick64KB || allowedBlockSet.macroThin64KB)
3206                                 {
3207                                     allowedBlockSet.var = 0;
3208                                 }
3209                             }
3210 
3211                             // Remove linear block type if 2 or more block types are allowed
3212                             if (IsPow2(allowedBlockSet.value) == FALSE)
3213                             {
3214                                 allowedBlockSet.linear = 0;
3215                             }
3216 
3217                             // Select the biggest allowed block type
3218                             minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
3219 
3220                             if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
3221                             {
3222                                 minSizeBlk = AddrBlockLinear;
3223                             }
3224                         }
3225 
3226                         switch (minSizeBlk)
3227                         {
3228                             case AddrBlockLinear:
3229                                 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
3230                                 break;
3231 
3232                             case AddrBlockMicro:
3233                                 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3234                                 allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
3235                                 break;
3236 
3237                             case AddrBlockThin4KB:
3238                                 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3239                                 allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
3240                                 break;
3241 
3242                             case AddrBlockThick4KB:
3243                                 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3244                                 allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask;
3245                                 break;
3246 
3247                             case AddrBlockThin64KB:
3248                                 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3249                                                           Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask;
3250                                 break;
3251 
3252                             case AddrBlockThick64KB:
3253                                 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3254                                 allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask;
3255                                 break;
3256 
3257                             case AddrBlockThinVar:
3258                                 allowedSwModeSet.value &= Gfx10BlkVarSwModeMask;
3259                                 break;
3260 
3261                             default:
3262                                 ADDR_ASSERT_ALWAYS();
3263                                 allowedSwModeSet.value = 0;
3264                                 break;
3265                         }
3266                     }
3267 
3268                     // Block type should be determined.
3269                     ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3270 
3271                     ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3272 
3273                     // Determine swizzle type if there are 2 or more swizzle type candidates
3274                     if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
3275                     {
3276                         if (ElemLib::IsBlockCompressed(pIn->format))
3277                         {
3278                             if (allowedSwSet.sw_D)
3279                             {
3280                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3281                             }
3282                             else if (allowedSwSet.sw_S)
3283                             {
3284                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3285                             }
3286                             else
3287                             {
3288                                 ADDR_ASSERT(allowedSwSet.sw_R);
3289                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3290                             }
3291                         }
3292                         else if (ElemLib::IsMacroPixelPacked(pIn->format))
3293                         {
3294                             if (allowedSwSet.sw_S)
3295                             {
3296                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3297                             }
3298                             else if (allowedSwSet.sw_D)
3299                             {
3300                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3301                             }
3302                             else
3303                             {
3304                                 ADDR_ASSERT(allowedSwSet.sw_R);
3305                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3306                             }
3307                         }
3308                         else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
3309                         {
3310                             if (pIn->flags.color &&
3311                                 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB &&
3312                                 allowedSwSet.sw_D)
3313                             {
3314                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3315                             }
3316                             else if (allowedSwSet.sw_S)
3317                             {
3318                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3319                             }
3320                             else if (allowedSwSet.sw_R)
3321                             {
3322                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3323                             }
3324                             else
3325                             {
3326                                 ADDR_ASSERT(allowedSwSet.sw_Z);
3327                                 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3328                             }
3329                         }
3330                         else
3331                         {
3332                             if (allowedSwSet.sw_R)
3333                             {
3334                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3335                             }
3336                             else if (allowedSwSet.sw_D)
3337                             {
3338                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3339                             }
3340                             else if (allowedSwSet.sw_S)
3341                             {
3342                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3343                             }
3344                             else
3345                             {
3346                                 ADDR_ASSERT(allowedSwSet.sw_Z);
3347                                 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3348                             }
3349                         }
3350 
3351                         // Swizzle type should be determined.
3352                         ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3353                     }
3354 
3355                     // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
3356                     // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3357                     // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3358                     pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3359                 }
3360             }
3361             else
3362             {
3363                 // Invalid combination...
3364                 ADDR_ASSERT_ALWAYS();
3365                 returnCode = ADDR_INVALIDPARAMS;
3366             }
3367         }
3368         else
3369         {
3370             // Invalid combination...
3371             ADDR_ASSERT_ALWAYS();
3372             returnCode = ADDR_INVALIDPARAMS;
3373         }
3374     }
3375 
3376     return returnCode;
3377 }
3378 
3379 /**
3380 ************************************************************************************************************************
3381 *   Gfx10Lib::ComputeStereoInfo
3382 *
3383 *   @brief
3384 *       Compute height alignment and right eye pipeBankXor for stereo surface
3385 *
3386 *   @return
3387 *       Error code
3388 *
3389 ************************************************************************************************************************
3390 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pAlignY,UINT_32 * pRightXor) const3391 ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
3392     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,        ///< Compute surface info
3393     UINT_32*                                pAlignY,    ///< Stereo requested additional alignment in Y
3394     UINT_32*                                pRightXor   ///< Right eye xor
3395     ) const
3396 {
3397     ADDR_E_RETURNCODE ret = ADDR_OK;
3398 
3399     *pRightXor = 0;
3400 
3401     if (IsNonPrtXor(pIn->swizzleMode))
3402     {
3403         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3404         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
3405         const UINT_32 rsrcType    = static_cast<UINT_32>(pIn->resourceType) - 1;
3406         const UINT_32 swMode      = static_cast<UINT_32>(pIn->swizzleMode);
3407         const UINT_32 eqIndex     = m_equationLookupTable[rsrcType][swMode][elemLog2];
3408 
3409         if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3410         {
3411             UINT_32 yMax     = 0;
3412             UINT_32 yPosMask = 0;
3413 
3414             // First get "max y bit"
3415             for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3416             {
3417                 ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
3418 
3419                 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3420                     (m_equationTable[eqIndex].addr[i].index > yMax))
3421                 {
3422                     yMax = m_equationTable[eqIndex].addr[i].index;
3423                 }
3424 
3425                 if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3426                     (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3427                     (m_equationTable[eqIndex].xor1[i].index > yMax))
3428                 {
3429                     yMax = m_equationTable[eqIndex].xor1[i].index;
3430                 }
3431 
3432                 if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3433                     (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3434                     (m_equationTable[eqIndex].xor2[i].index > yMax))
3435                 {
3436                     yMax = m_equationTable[eqIndex].xor2[i].index;
3437                 }
3438             }
3439 
3440             // Then loop again for populating a position mask of "max Y bit"
3441             for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3442             {
3443                 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3444                     (m_equationTable[eqIndex].addr[i].index == yMax))
3445                 {
3446                     yPosMask |= 1u << i;
3447                 }
3448                 else if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3449                          (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3450                          (m_equationTable[eqIndex].xor1[i].index == yMax))
3451                 {
3452                     yPosMask |= 1u << i;
3453                 }
3454                 else if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3455                          (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3456                          (m_equationTable[eqIndex].xor2[i].index == yMax))
3457                 {
3458                     yPosMask |= 1u << i;
3459                 }
3460             }
3461 
3462             const UINT_32 additionalAlign = 1 << yMax;
3463 
3464             if (additionalAlign >= *pAlignY)
3465             {
3466                 *pAlignY = additionalAlign;
3467 
3468                 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
3469 
3470                 if ((alignedHeight >> yMax) & 1)
3471                 {
3472                     *pRightXor = yPosMask >> m_pipeInterleaveLog2;
3473                 }
3474             }
3475         }
3476         else
3477         {
3478             ret = ADDR_INVALIDPARAMS;
3479         }
3480     }
3481 
3482     return ret;
3483 }
3484 
3485 /**
3486 ************************************************************************************************************************
3487 *   Gfx10Lib::HwlComputeSurfaceInfoTiled
3488 *
3489 *   @brief
3490 *       Internal function to calculate alignment for tiled surface
3491 *
3492 *   @return
3493 *       ADDR_E_RETURNCODE
3494 ************************************************************************************************************************
3495 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3496 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(
3497      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3498      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3499      ) const
3500 {
3501     ADDR_E_RETURNCODE ret;
3502 
3503     // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
3504     pOut->mipChainPitch    = 0;
3505     pOut->mipChainHeight   = 0;
3506     pOut->mipChainSlice    = 0;
3507     pOut->epitchIsHeight   = FALSE;
3508 
3509     // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
3510     pOut->mipChainInTail   = FALSE;
3511     pOut->firstMipIdInTail = pIn->numMipLevels;
3512 
3513     if (IsBlock256b(pIn->swizzleMode))
3514     {
3515         ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3516     }
3517     else
3518     {
3519         ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3520     }
3521 
3522     return ret;
3523 }
3524 
3525 
3526 /**
3527 ************************************************************************************************************************
3528 *   Gfx10Lib::ComputeSurfaceInfoMicroTiled
3529 *
3530 *   @brief
3531 *       Internal function to calculate alignment for micro tiled surface
3532 *
3533 *   @return
3534 *       ADDR_E_RETURNCODE
3535 ************************************************************************************************************************
3536 */
ComputeSurfaceInfoMicroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3537 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(
3538      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3539      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3540      ) const
3541 {
3542     ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3543                                                          &pOut->blockHeight,
3544                                                          &pOut->blockSlices,
3545                                                          pIn->bpp,
3546                                                          pIn->numFrags,
3547                                                          pIn->resourceType,
3548                                                          pIn->swizzleMode);
3549 
3550     if (ret == ADDR_OK)
3551     {
3552         const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3553 
3554         pOut->pitch     = PowTwoAlign(pIn->width,  pOut->blockWidth);
3555         pOut->height    = PowTwoAlign(pIn->height, pOut->blockHeight);
3556         pOut->numSlices = pIn->numSlices;
3557         pOut->baseAlign = blockSize;
3558 
3559         if (pIn->numMipLevels > 1)
3560         {
3561             const UINT_32 mip0Width    = pIn->width;
3562             const UINT_32 mip0Height   = pIn->height;
3563             UINT_64       mipSliceSize = 0;
3564 
3565             for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3566             {
3567                 UINT_32 mipWidth, mipHeight;
3568 
3569                 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3570 
3571                 const UINT_32 mipActualWidth  = PowTwoAlign(mipWidth,  pOut->blockWidth);
3572                 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3573 
3574                 if (pOut->pMipInfo != NULL)
3575                 {
3576                     pOut->pMipInfo[i].pitch            = mipActualWidth;
3577                     pOut->pMipInfo[i].height           = mipActualHeight;
3578                     pOut->pMipInfo[i].depth            = 1;
3579                     pOut->pMipInfo[i].offset           = mipSliceSize;
3580                     pOut->pMipInfo[i].mipTailOffset    = 0;
3581                     pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3582                 }
3583 
3584                 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3585             }
3586 
3587             pOut->sliceSize = mipSliceSize;
3588             pOut->surfSize  = mipSliceSize * pOut->numSlices;
3589         }
3590         else
3591         {
3592             pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3593             pOut->surfSize  = pOut->sliceSize * pOut->numSlices;
3594 
3595             if (pOut->pMipInfo != NULL)
3596             {
3597                 pOut->pMipInfo[0].pitch            = pOut->pitch;
3598                 pOut->pMipInfo[0].height           = pOut->height;
3599                 pOut->pMipInfo[0].depth            = 1;
3600                 pOut->pMipInfo[0].offset           = 0;
3601                 pOut->pMipInfo[0].mipTailOffset    = 0;
3602                 pOut->pMipInfo[0].macroBlockOffset = 0;
3603             }
3604         }
3605 
3606     }
3607 
3608     return ret;
3609 }
3610 
3611 /**
3612 ************************************************************************************************************************
3613 *   Gfx10Lib::ComputeSurfaceInfoMacroTiled
3614 *
3615 *   @brief
3616 *       Internal function to calculate alignment for macro tiled surface
3617 *
3618 *   @return
3619 *       ADDR_E_RETURNCODE
3620 ************************************************************************************************************************
3621 */
ComputeSurfaceInfoMacroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3622 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(
3623      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3624      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3625      ) const
3626 {
3627     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3628                                                                 &pOut->blockHeight,
3629                                                                 &pOut->blockSlices,
3630                                                                 pIn->bpp,
3631                                                                 pIn->numFrags,
3632                                                                 pIn->resourceType,
3633                                                                 pIn->swizzleMode);
3634 
3635     if (returnCode == ADDR_OK)
3636     {
3637         UINT_32 heightAlign = pOut->blockHeight;
3638 
3639         if (pIn->flags.qbStereo)
3640         {
3641             UINT_32 rightXor = 0;
3642 
3643             returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor);
3644 
3645             if (returnCode == ADDR_OK)
3646             {
3647                 pOut->pStereoInfo->rightSwizzle = rightXor;
3648             }
3649         }
3650 
3651         if (returnCode == ADDR_OK)
3652         {
3653             const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3654             const UINT_32 blockSize     = 1 << blockSizeLog2;
3655 
3656             pOut->pitch     = PowTwoAlign(pIn->width,     pOut->blockWidth);
3657             pOut->height    = PowTwoAlign(pIn->height,    heightAlign);
3658             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3659             pOut->baseAlign = blockSize;
3660 
3661             if (pIn->numMipLevels > 1)
3662             {
3663                 const Dim3d  tailMaxDim         = GetMipTailDim(pIn->resourceType,
3664                                                                 pIn->swizzleMode,
3665                                                                 pOut->blockWidth,
3666                                                                 pOut->blockHeight,
3667                                                                 pOut->blockSlices);
3668                 const UINT_32 mip0Width         = pIn->width;
3669                 const UINT_32 mip0Height        = pIn->height;
3670                 const BOOL_32 isThin            = IsThin(pIn->resourceType, pIn->swizzleMode);
3671                 const UINT_32 mip0Depth         = isThin ? 1 : pIn->numSlices;
3672                 const UINT_32 maxMipsInTail     = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3673                 const UINT_32 index             = Log2(pIn->bpp >> 3);
3674                 UINT_32       firstMipInTail    = pIn->numMipLevels;
3675                 UINT_64       mipChainSliceSize = 0;
3676                 UINT_64       mipSize[MaxMipLevels];
3677                 UINT_64       mipSliceSize[MaxMipLevels];
3678 
3679                 Dim3d fixedTailMaxDim = tailMaxDim;
3680 
3681                 if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3682                 {
3683                     fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3684                     fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3685                 }
3686 
3687                 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3688                 {
3689                     UINT_32 mipWidth, mipHeight, mipDepth;
3690 
3691                     GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3692 
3693                     if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3694                     {
3695                         firstMipInTail     = i;
3696                         mipChainSliceSize += blockSize / pOut->blockSlices;
3697                         break;
3698                     }
3699                     else
3700                     {
3701                         const UINT_32 pitch     = PowTwoAlign(mipWidth,  pOut->blockWidth);
3702                         const UINT_32 height    = PowTwoAlign(mipHeight, pOut->blockHeight);
3703                         const UINT_32 depth     = PowTwoAlign(mipDepth,  pOut->blockSlices);
3704                         const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3705 
3706                         mipSize[i]         = sliceSize * depth;
3707                         mipSliceSize[i]    = sliceSize * pOut->blockSlices;
3708                         mipChainSliceSize += sliceSize;
3709 
3710                         if (pOut->pMipInfo != NULL)
3711                         {
3712                             pOut->pMipInfo[i].pitch  = pitch;
3713                             pOut->pMipInfo[i].height = height;
3714                             pOut->pMipInfo[i].depth  = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3715                         }
3716                     }
3717                 }
3718 
3719                 pOut->sliceSize        = mipChainSliceSize;
3720                 pOut->surfSize         = mipChainSliceSize * pOut->numSlices;
3721                 pOut->mipChainInTail   = (firstMipInTail == 0) ? TRUE : FALSE;
3722                 pOut->firstMipIdInTail = firstMipInTail;
3723 
3724                 if (pOut->pMipInfo != NULL)
3725                 {
3726                     UINT_64 offset         = 0;
3727                     UINT_64 macroBlkOffset = 0;
3728                     UINT_32 tailMaxDepth   = 0;
3729 
3730                     if (firstMipInTail != pIn->numMipLevels)
3731                     {
3732                         UINT_32 mipWidth, mipHeight;
3733 
3734                         GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3735                                    &mipWidth, &mipHeight, &tailMaxDepth);
3736 
3737                         offset         = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3738                         macroBlkOffset = blockSize;
3739                     }
3740 
3741                     for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3742                     {
3743                         pOut->pMipInfo[i].offset           = offset;
3744                         pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3745                         pOut->pMipInfo[i].mipTailOffset    = 0;
3746 
3747                         offset         += mipSize[i];
3748                         macroBlkOffset += mipSliceSize[i];
3749                     }
3750 
3751                     UINT_32 pitch  = tailMaxDim.w;
3752                     UINT_32 height = tailMaxDim.h;
3753                     UINT_32 depth  = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3754 
3755                     tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3756 
3757                     for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3758                     {
3759                         const UINT_32 m         = maxMipsInTail - 1 - (i - firstMipInTail);
3760                         const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3761 
3762                         pOut->pMipInfo[i].offset           = mipOffset * tailMaxDepth;
3763                         pOut->pMipInfo[i].mipTailOffset    = mipOffset;
3764                         pOut->pMipInfo[i].macroBlockOffset = 0;
3765 
3766                         pOut->pMipInfo[i].pitch  = pitch;
3767                         pOut->pMipInfo[i].height = height;
3768                         pOut->pMipInfo[i].depth  = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3769 
3770                         UINT_32 mipX = ((mipOffset >> 9)  & 1)  |
3771                                        ((mipOffset >> 10) & 2)  |
3772                                        ((mipOffset >> 11) & 4)  |
3773                                        ((mipOffset >> 12) & 8)  |
3774                                        ((mipOffset >> 13) & 16) |
3775                                        ((mipOffset >> 14) & 32);
3776                         UINT_32 mipY = ((mipOffset >> 8)  & 1)  |
3777                                        ((mipOffset >> 9)  & 2)  |
3778                                        ((mipOffset >> 10) & 4)  |
3779                                        ((mipOffset >> 11) & 8)  |
3780                                        ((mipOffset >> 12) & 16) |
3781                                        ((mipOffset >> 13) & 32);
3782 
3783                         if (blockSizeLog2 & 1)
3784                         {
3785                             const UINT_32 temp = mipX;
3786                             mipX = mipY;
3787                             mipY = temp;
3788 
3789                             if (index & 1)
3790                             {
3791                                 mipY = (mipY << 1) | (mipX & 1);
3792                                 mipX = mipX >> 1;
3793                             }
3794                         }
3795 
3796                         if (isThin)
3797                         {
3798                             pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3799                             pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3800                             pOut->pMipInfo[i].mipTailCoordZ = 0;
3801 
3802                             pitch  = Max(pitch  >> 1, Block256_2d[index].w);
3803                             height = Max(height >> 1, Block256_2d[index].h);
3804                         }
3805                         else
3806                         {
3807                             pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3808                             pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3809                             pOut->pMipInfo[i].mipTailCoordZ = 0;
3810 
3811                             pitch  = Max(pitch  >> 1, Block256_3d[index].w);
3812                             height = Max(height >> 1, Block256_3d[index].h);
3813                         }
3814                     }
3815                 }
3816             }
3817             else
3818             {
3819                 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;
3820                 pOut->surfSize  = pOut->sliceSize * pOut->numSlices;
3821 
3822                 if (pOut->pMipInfo != NULL)
3823                 {
3824                     pOut->pMipInfo[0].pitch            = pOut->pitch;
3825                     pOut->pMipInfo[0].height           = pOut->height;
3826                     pOut->pMipInfo[0].depth            = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3827                     pOut->pMipInfo[0].offset           = 0;
3828                     pOut->pMipInfo[0].mipTailOffset    = 0;
3829                     pOut->pMipInfo[0].macroBlockOffset = 0;
3830                     pOut->pMipInfo[0].mipTailCoordX    = 0;
3831                     pOut->pMipInfo[0].mipTailCoordY    = 0;
3832                     pOut->pMipInfo[0].mipTailCoordZ    = 0;
3833                 }
3834             }
3835         }
3836     }
3837 
3838     return returnCode;
3839 }
3840 
3841 /**
3842 ************************************************************************************************************************
3843 *   Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
3844 *
3845 *   @brief
3846 *       Internal function to calculate address from coord for tiled swizzle surface
3847 *
3848 *   @return
3849 *       ADDR_E_RETURNCODE
3850 ************************************************************************************************************************
3851 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3852 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
3853      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
3854      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
3855      ) const
3856 {
3857     ADDR_E_RETURNCODE ret;
3858 
3859     if (IsBlock256b(pIn->swizzleMode))
3860     {
3861         ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3862     }
3863     else
3864     {
3865         ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3866     }
3867 
3868     return ret;
3869 }
3870 
3871 /**
3872 ************************************************************************************************************************
3873 *   Gfx10Lib::ComputeOffsetFromEquation
3874 *
3875 *   @brief
3876 *       Compute offset from equation
3877 *
3878 *   @return
3879 *       Offset
3880 ************************************************************************************************************************
3881 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z) const3882 UINT_32 Gfx10Lib::ComputeOffsetFromEquation(
3883     const ADDR_EQUATION* pEq,   ///< Equation
3884     UINT_32              x,     ///< x coord in bytes
3885     UINT_32              y,     ///< y coord in pixel
3886     UINT_32              z      ///< z coord in slice
3887     ) const
3888 {
3889     UINT_32 offset = 0;
3890 
3891     for (UINT_32 i = 0; i < pEq->numBits; i++)
3892     {
3893         UINT_32 v = 0;
3894 
3895         if (pEq->addr[i].valid)
3896         {
3897             if (pEq->addr[i].channel == 0)
3898             {
3899                 v ^= (x >> pEq->addr[i].index) & 1;
3900             }
3901             else if (pEq->addr[i].channel == 1)
3902             {
3903                 v ^= (y >> pEq->addr[i].index) & 1;
3904             }
3905             else
3906             {
3907                 ADDR_ASSERT(pEq->addr[i].channel == 2);
3908                 v ^= (z >> pEq->addr[i].index) & 1;
3909             }
3910         }
3911 
3912         if (pEq->xor1[i].valid)
3913         {
3914             if (pEq->xor1[i].channel == 0)
3915             {
3916                 v ^= (x >> pEq->xor1[i].index) & 1;
3917             }
3918             else if (pEq->xor1[i].channel == 1)
3919             {
3920                 v ^= (y >> pEq->xor1[i].index) & 1;
3921             }
3922             else
3923             {
3924                 ADDR_ASSERT(pEq->xor1[i].channel == 2);
3925                 v ^= (z >> pEq->xor1[i].index) & 1;
3926             }
3927         }
3928 
3929         if (pEq->xor2[i].valid)
3930         {
3931             if (pEq->xor2[i].channel == 0)
3932             {
3933                 v ^= (x >> pEq->xor2[i].index) & 1;
3934             }
3935             else if (pEq->xor2[i].channel == 1)
3936             {
3937                 v ^= (y >> pEq->xor2[i].index) & 1;
3938             }
3939             else
3940             {
3941                 ADDR_ASSERT(pEq->xor2[i].channel == 2);
3942                 v ^= (z >> pEq->xor2[i].index) & 1;
3943             }
3944         }
3945 
3946         offset |= (v << i);
3947     }
3948 
3949     return offset;
3950 }
3951 
3952 /**
3953 ************************************************************************************************************************
3954 *   Gfx10Lib::ComputeOffsetFromSwizzlePattern
3955 *
3956 *   @brief
3957 *       Compute offset from swizzle pattern
3958 *
3959 *   @return
3960 *       Offset
3961 ************************************************************************************************************************
3962 */
ComputeOffsetFromSwizzlePattern(const UINT_64 * pPattern,UINT_32 numBits,UINT_32 x,UINT_32 y,UINT_32 z,UINT_32 s) const3963 UINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern(
3964     const UINT_64* pPattern,    ///< Swizzle pattern
3965     UINT_32        numBits,     ///< Number of bits in pattern
3966     UINT_32        x,           ///< x coord in pixel
3967     UINT_32        y,           ///< y coord in pixel
3968     UINT_32        z,           ///< z coord in slice
3969     UINT_32        s            ///< sample id
3970     ) const
3971 {
3972     UINT_32                 offset          = 0;
3973     const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
3974 
3975     for (UINT_32 i = 0; i < numBits; i++)
3976     {
3977         UINT_32 v = 0;
3978 
3979         if (pSwizzlePattern[i].x != 0)
3980         {
3981             UINT_16 mask  = pSwizzlePattern[i].x;
3982             UINT_32 xBits = x;
3983 
3984             while (mask != 0)
3985             {
3986                 if (mask & 1)
3987                 {
3988                     v ^= xBits & 1;
3989                 }
3990 
3991                 xBits >>= 1;
3992                 mask  >>= 1;
3993             }
3994         }
3995 
3996         if (pSwizzlePattern[i].y != 0)
3997         {
3998             UINT_16 mask  = pSwizzlePattern[i].y;
3999             UINT_32 yBits = y;
4000 
4001             while (mask != 0)
4002             {
4003                 if (mask & 1)
4004                 {
4005                     v ^= yBits & 1;
4006                 }
4007 
4008                 yBits >>= 1;
4009                 mask  >>= 1;
4010             }
4011         }
4012 
4013         if (pSwizzlePattern[i].z != 0)
4014         {
4015             UINT_16 mask  = pSwizzlePattern[i].z;
4016             UINT_32 zBits = z;
4017 
4018             while (mask != 0)
4019             {
4020                 if (mask & 1)
4021                 {
4022                     v ^= zBits & 1;
4023                 }
4024 
4025                 zBits >>= 1;
4026                 mask  >>= 1;
4027             }
4028         }
4029 
4030         if (pSwizzlePattern[i].s != 0)
4031         {
4032             UINT_16 mask  = pSwizzlePattern[i].s;
4033             UINT_32 sBits = s;
4034 
4035             while (mask != 0)
4036             {
4037                 if (mask & 1)
4038                 {
4039                     v ^= sBits & 1;
4040                 }
4041 
4042                 sBits >>= 1;
4043                 mask  >>= 1;
4044             }
4045         }
4046 
4047         offset |= (v << i);
4048     }
4049 
4050     return offset;
4051 }
4052 
4053 /**
4054 ************************************************************************************************************************
4055 *   Gfx10Lib::GetSwizzlePatternInfo
4056 *
4057 *   @brief
4058 *       Get swizzle pattern
4059 *
4060 *   @return
4061 *       Swizzle pattern information
4062 ************************************************************************************************************************
4063 */
GetSwizzlePatternInfo(AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elemLog2,UINT_32 numFrag) const4064 const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
4065     AddrSwizzleMode  swizzleMode,       ///< Swizzle mode
4066     AddrResourceType resourceType,      ///< Resource type
4067     UINT_32          elemLog2,          ///< Element size in bytes log2
4068     UINT_32          numFrag            ///< Number of fragment
4069     ) const
4070 {
4071     const UINT_32          index       = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
4072     const ADDR_SW_PATINFO* patInfo     = NULL;
4073     const UINT_32          swizzleMask = 1 << swizzleMode;
4074 
4075     if (IsBlockVariable(swizzleMode))
4076     {
4077         if (m_blockVarSizeLog2 != 0)
4078         {
4079             ADDR_ASSERT(m_settings.supportRbPlus);
4080 
4081             if (IsRtOptSwizzle(swizzleMode))
4082             {
4083                 if (numFrag == 1)
4084                 {
4085                     patInfo = GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO;
4086                 }
4087                 else if (numFrag == 2)
4088                 {
4089                     patInfo = GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO;
4090                 }
4091                 else if (numFrag == 4)
4092                 {
4093                     patInfo = GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO;
4094                 }
4095                 else
4096                 {
4097                     ADDR_ASSERT(numFrag == 8);
4098                     patInfo = GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO;
4099                 }
4100             }
4101             else if (IsZOrderSwizzle(swizzleMode))
4102             {
4103                 if (numFrag == 1)
4104                 {
4105                     patInfo = GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO;
4106                 }
4107                 else if (numFrag == 2)
4108                 {
4109                     patInfo = GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO;
4110                 }
4111                 else if (numFrag == 4)
4112                 {
4113                     patInfo = GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO;
4114                 }
4115                 else
4116                 {
4117                     ADDR_ASSERT(numFrag == 8);
4118                     patInfo = GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO;
4119                 }
4120             }
4121         }
4122     }
4123     else if (IsLinear(swizzleMode) == FALSE)
4124     {
4125         if (resourceType == ADDR_RSRC_TEX_3D)
4126         {
4127             ADDR_ASSERT(numFrag == 1);
4128 
4129             if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0)
4130             {
4131                 if (IsRtOptSwizzle(swizzleMode))
4132                 {
4133                     patInfo = m_settings.supportRbPlus ?
4134                               GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
4135                 }
4136                 else if (IsZOrderSwizzle(swizzleMode))
4137                 {
4138                     patInfo = m_settings.supportRbPlus ?
4139                               GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
4140                 }
4141                 else if (IsDisplaySwizzle(resourceType, swizzleMode))
4142                 {
4143                     ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
4144                     patInfo = m_settings.supportRbPlus ?
4145                               GFX10_SW_64K_D3_X_RBPLUS_PATINFO : GFX10_SW_64K_D3_X_PATINFO;
4146                 }
4147                 else
4148                 {
4149                     ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
4150 
4151                     if (IsBlock4kb(swizzleMode))
4152                     {
4153                         if (swizzleMode == ADDR_SW_4KB_S)
4154                         {
4155                             patInfo = m_settings.supportRbPlus ?
4156                                       GFX10_SW_4K_S3_RBPLUS_PATINFO : GFX10_SW_4K_S3_PATINFO;
4157                         }
4158                         else
4159                         {
4160                             ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
4161                             patInfo = m_settings.supportRbPlus ?
4162                                       GFX10_SW_4K_S3_X_RBPLUS_PATINFO : GFX10_SW_4K_S3_X_PATINFO;
4163                         }
4164                     }
4165                     else
4166                     {
4167                         if (swizzleMode == ADDR_SW_64KB_S)
4168                         {
4169                             patInfo = m_settings.supportRbPlus ?
4170                                       GFX10_SW_64K_S3_RBPLUS_PATINFO : GFX10_SW_64K_S3_PATINFO;
4171                         }
4172                         else if (swizzleMode == ADDR_SW_64KB_S_X)
4173                         {
4174                             patInfo = m_settings.supportRbPlus ?
4175                                       GFX10_SW_64K_S3_X_RBPLUS_PATINFO : GFX10_SW_64K_S3_X_PATINFO;
4176                         }
4177                         else
4178                         {
4179                             ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
4180                             patInfo = m_settings.supportRbPlus ?
4181                                       GFX10_SW_64K_S3_T_RBPLUS_PATINFO : GFX10_SW_64K_S3_T_PATINFO;
4182                         }
4183                     }
4184                 }
4185             }
4186         }
4187         else
4188         {
4189             if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0)
4190             {
4191                 if (IsBlock256b(swizzleMode))
4192                 {
4193                     if (swizzleMode == ADDR_SW_256B_S)
4194                     {
4195                         patInfo = m_settings.supportRbPlus ?
4196                                   GFX10_SW_256_S_RBPLUS_PATINFO : GFX10_SW_256_S_PATINFO;
4197                     }
4198                     else
4199                     {
4200                         ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
4201                         patInfo = m_settings.supportRbPlus ?
4202                                   GFX10_SW_256_D_RBPLUS_PATINFO : GFX10_SW_256_D_PATINFO;
4203                     }
4204                 }
4205                 else if (IsBlock4kb(swizzleMode))
4206                 {
4207                     if (IsStandardSwizzle(resourceType, swizzleMode))
4208                     {
4209                         if (swizzleMode == ADDR_SW_4KB_S)
4210                         {
4211                             patInfo = m_settings.supportRbPlus ?
4212                                       GFX10_SW_4K_S_RBPLUS_PATINFO : GFX10_SW_4K_S_PATINFO;
4213                         }
4214                         else
4215                         {
4216                             ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
4217                             patInfo = m_settings.supportRbPlus ?
4218                                       GFX10_SW_4K_S_X_RBPLUS_PATINFO : GFX10_SW_4K_S_X_PATINFO;
4219                         }
4220                     }
4221                     else
4222                     {
4223                         if (swizzleMode == ADDR_SW_4KB_D)
4224                         {
4225                             patInfo = m_settings.supportRbPlus ?
4226                                       GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO;
4227                         }
4228                         else
4229                         {
4230                             ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
4231                             patInfo = m_settings.supportRbPlus ?
4232                                       GFX10_SW_4K_D_X_RBPLUS_PATINFO : GFX10_SW_4K_D_X_PATINFO;
4233                         }
4234                     }
4235                 }
4236                 else
4237                 {
4238                     if (IsRtOptSwizzle(swizzleMode))
4239                     {
4240                         if (numFrag == 1)
4241                         {
4242                             patInfo = m_settings.supportRbPlus ?
4243                                       GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
4244                         }
4245                         else if (numFrag == 2)
4246                         {
4247                             patInfo = m_settings.supportRbPlus ?
4248                                       GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_2xaa_PATINFO;
4249                         }
4250                         else if (numFrag == 4)
4251                         {
4252                             patInfo = m_settings.supportRbPlus ?
4253                                       GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_4xaa_PATINFO;
4254                         }
4255                         else
4256                         {
4257                             ADDR_ASSERT(numFrag == 8);
4258                             patInfo = m_settings.supportRbPlus ?
4259                                       GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_8xaa_PATINFO;
4260                         }
4261                     }
4262                     else if (IsZOrderSwizzle(swizzleMode))
4263                     {
4264                         if (numFrag == 1)
4265                         {
4266                             patInfo = m_settings.supportRbPlus ?
4267                                       GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
4268                         }
4269                         else if (numFrag == 2)
4270                         {
4271                             patInfo = m_settings.supportRbPlus ?
4272                                       GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_2xaa_PATINFO;
4273                         }
4274                         else if (numFrag == 4)
4275                         {
4276                             patInfo = m_settings.supportRbPlus ?
4277                                       GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_4xaa_PATINFO;
4278                         }
4279                         else
4280                         {
4281                             ADDR_ASSERT(numFrag == 8);
4282                             patInfo = m_settings.supportRbPlus ?
4283                                       GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_8xaa_PATINFO;
4284                         }
4285                     }
4286                     else if (IsDisplaySwizzle(resourceType, swizzleMode))
4287                     {
4288                         if (swizzleMode == ADDR_SW_64KB_D)
4289                         {
4290                             patInfo = m_settings.supportRbPlus ?
4291                                       GFX10_SW_64K_D_RBPLUS_PATINFO : GFX10_SW_64K_D_PATINFO;
4292                         }
4293                         else if (swizzleMode == ADDR_SW_64KB_D_X)
4294                         {
4295                             patInfo = m_settings.supportRbPlus ?
4296                                       GFX10_SW_64K_D_X_RBPLUS_PATINFO : GFX10_SW_64K_D_X_PATINFO;
4297                         }
4298                         else
4299                         {
4300                             ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);
4301                             patInfo = m_settings.supportRbPlus ?
4302                                       GFX10_SW_64K_D_T_RBPLUS_PATINFO : GFX10_SW_64K_D_T_PATINFO;
4303                         }
4304                     }
4305                     else
4306                     {
4307                         if (swizzleMode == ADDR_SW_64KB_S)
4308                         {
4309                             patInfo = m_settings.supportRbPlus ?
4310                                       GFX10_SW_64K_S_RBPLUS_PATINFO : GFX10_SW_64K_S_PATINFO;
4311                         }
4312                         else if (swizzleMode == ADDR_SW_64KB_S_X)
4313                         {
4314                             patInfo = m_settings.supportRbPlus ?
4315                                       GFX10_SW_64K_S_X_RBPLUS_PATINFO : GFX10_SW_64K_S_X_PATINFO;
4316                         }
4317                         else
4318                         {
4319                             ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
4320                             patInfo = m_settings.supportRbPlus ?
4321                                       GFX10_SW_64K_S_T_RBPLUS_PATINFO : GFX10_SW_64K_S_T_PATINFO;
4322                         }
4323                     }
4324                 }
4325             }
4326         }
4327     }
4328 
4329     return (patInfo != NULL) ? &patInfo[index] : NULL;
4330 }
4331 
4332 
4333 /**
4334 ************************************************************************************************************************
4335 *   Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
4336 *
4337 *   @brief
4338 *       Internal function to calculate address from coord for micro tiled swizzle surface
4339 *
4340 *   @return
4341 *       ADDR_E_RETURNCODE
4342 ************************************************************************************************************************
4343 */
ComputeSurfaceAddrFromCoordMicroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4344 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
4345      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4346      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4347      ) const
4348 {
4349     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
4350     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4351     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
4352 
4353     localIn.swizzleMode  = pIn->swizzleMode;
4354     localIn.flags        = pIn->flags;
4355     localIn.resourceType = pIn->resourceType;
4356     localIn.bpp          = pIn->bpp;
4357     localIn.width        = Max(pIn->unalignedWidth,  1u);
4358     localIn.height       = Max(pIn->unalignedHeight, 1u);
4359     localIn.numSlices    = Max(pIn->numSlices,       1u);
4360     localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
4361     localIn.numSamples   = Max(pIn->numSamples,      1u);
4362     localIn.numFrags     = Max(pIn->numFrags,        1u);
4363     localOut.pMipInfo    = mipInfo;
4364 
4365     ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
4366 
4367     if (ret == ADDR_OK)
4368     {
4369         const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4370         const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
4371         const UINT_32 swMode   = static_cast<UINT_32>(pIn->swizzleMode);
4372         const UINT_32 eqIndex  = m_equationLookupTable[rsrcType][swMode][elemLog2];
4373 
4374         if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4375         {
4376             const UINT_32 pb           = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4377             const UINT_32 yb           = pIn->y / localOut.blockHeight;
4378             const UINT_32 xb           = pIn->x / localOut.blockWidth;
4379             const UINT_32 blockIndex   = yb * pb + xb;
4380             const UINT_32 blockSize    = 256;
4381             const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4382                                                                    pIn->x << elemLog2,
4383                                                                    pIn->y,
4384                                                                    0);
4385             pOut->addr = localOut.sliceSize * pIn->slice +
4386                          mipInfo[pIn->mipId].macroBlockOffset +
4387                          (blockIndex * blockSize) +
4388                          blk256Offset;
4389         }
4390         else
4391         {
4392             ret = ADDR_INVALIDPARAMS;
4393         }
4394     }
4395 
4396     return ret;
4397 }
4398 
4399 /**
4400 ************************************************************************************************************************
4401 *   Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
4402 *
4403 *   @brief
4404 *       Internal function to calculate address from coord for macro tiled swizzle surface
4405 *
4406 *   @return
4407 *       ADDR_E_RETURNCODE
4408 ************************************************************************************************************************
4409 */
ComputeSurfaceAddrFromCoordMacroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4410 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
4411      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4412      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4413      ) const
4414 {
4415     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
4416     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4417     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
4418 
4419     localIn.swizzleMode  = pIn->swizzleMode;
4420     localIn.flags        = pIn->flags;
4421     localIn.resourceType = pIn->resourceType;
4422     localIn.bpp          = pIn->bpp;
4423     localIn.width        = Max(pIn->unalignedWidth,  1u);
4424     localIn.height       = Max(pIn->unalignedHeight, 1u);
4425     localIn.numSlices    = Max(pIn->numSlices,       1u);
4426     localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
4427     localIn.numSamples   = Max(pIn->numSamples,      1u);
4428     localIn.numFrags     = Max(pIn->numFrags,        1u);
4429     localOut.pMipInfo    = mipInfo;
4430 
4431     ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
4432 
4433     if (ret == ADDR_OK)
4434     {
4435         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
4436         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4437         const UINT_32 blkMask     = (1 << blkSizeLog2) - 1;
4438         const UINT_32 pipeMask    = (1 << m_pipesLog2) - 1;
4439         const UINT_32 bankMask    = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
4440         const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
4441                                     (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
4442 
4443         if (localIn.numFrags > 1)
4444         {
4445             const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4446                                                                     pIn->resourceType,
4447                                                                     elemLog2,
4448                                                                     localIn.numFrags);
4449 
4450             if (pPatInfo != NULL)
4451             {
4452                 const UINT_32 pb        = localOut.pitch / localOut.blockWidth;
4453                 const UINT_32 yb        = pIn->y / localOut.blockHeight;
4454                 const UINT_32 xb        = pIn->x / localOut.blockWidth;
4455                 const UINT_64 blkIdx    = yb * pb + xb;
4456 
4457                 ADDR_BIT_SETTING fullSwizzlePattern[20];
4458                 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4459 
4460                 const UINT_32 blkOffset =
4461                     ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
4462                                                     blkSizeLog2,
4463                                                     pIn->x,
4464                                                     pIn->y,
4465                                                     pIn->slice,
4466                                                     pIn->sample);
4467 
4468                 pOut->addr = (localOut.sliceSize * pIn->slice) +
4469                              (blkIdx << blkSizeLog2) +
4470                              (blkOffset ^ pipeBankXor);
4471             }
4472             else
4473             {
4474                 ret = ADDR_INVALIDPARAMS;
4475             }
4476         }
4477         else
4478         {
4479             const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
4480             const UINT_32 swMode  = static_cast<UINT_32>(pIn->swizzleMode);
4481             const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
4482 
4483             if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4484             {
4485                 const BOOL_32 inTail    = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
4486                 const BOOL_32 isThin    = IsThin(pIn->resourceType, pIn->swizzleMode);
4487                 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
4488                 const UINT_32 sliceId   = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
4489                 const UINT_32 x         = inTail ? (pIn->x     + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
4490                 const UINT_32 y         = inTail ? (pIn->y     + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
4491                 const UINT_32 z         = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
4492                 const UINT_32 pb        = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4493                 const UINT_32 yb        = pIn->y / localOut.blockHeight;
4494                 const UINT_32 xb        = pIn->x / localOut.blockWidth;
4495                 const UINT_64 blkIdx    = yb * pb + xb;
4496                 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4497                                                                     x << elemLog2,
4498                                                                     y,
4499                                                                     z);
4500                 pOut->addr = sliceSize * sliceId +
4501                              mipInfo[pIn->mipId].macroBlockOffset +
4502                              (blkIdx << blkSizeLog2) +
4503                              (blkOffset ^ pipeBankXor);
4504             }
4505             else
4506             {
4507                 ret = ADDR_INVALIDPARAMS;
4508             }
4509         }
4510     }
4511 
4512     return ret;
4513 }
4514 
4515 /**
4516 ************************************************************************************************************************
4517 *   Gfx10Lib::HwlComputeMaxBaseAlignments
4518 *
4519 *   @brief
4520 *       Gets maximum alignments
4521 *   @return
4522 *       maximum alignments
4523 ************************************************************************************************************************
4524 */
HwlComputeMaxBaseAlignments() const4525 UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const
4526 {
4527     return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K;
4528 }
4529 
4530 /**
4531 ************************************************************************************************************************
4532 *   Gfx10Lib::HwlComputeMaxMetaBaseAlignments
4533 *
4534 *   @brief
4535 *       Gets maximum alignments for metadata
4536 *   @return
4537 *       maximum alignments for metadata
4538 ************************************************************************************************************************
4539 */
HwlComputeMaxMetaBaseAlignments() const4540 UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
4541 {
4542     Dim3d metaBlk;
4543 
4544     const AddrSwizzleMode ValidSwizzleModeForXmask[] =
4545     {
4546         ADDR_SW_64KB_Z_X,
4547         m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X,
4548     };
4549 
4550     UINT_32 maxBaseAlignHtile = 0;
4551     UINT_32 maxBaseAlignCmask = 0;
4552 
4553     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++)
4554     {
4555         for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4556         {
4557             for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4558             {
4559                 // Max base alignment for Htile
4560                 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil,
4561                                                                 ADDR_RSRC_TEX_2D,
4562                                                                 ValidSwizzleModeForXmask[swIdx],
4563                                                                 bppLog2,
4564                                                                 numFragLog2,
4565                                                                 TRUE,
4566                                                                 &metaBlk);
4567 
4568                 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4569             }
4570         }
4571 
4572         // Max base alignment for Cmask
4573         const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask,
4574                                                         ADDR_RSRC_TEX_2D,
4575                                                         ValidSwizzleModeForXmask[swIdx],
4576                                                         0,
4577                                                         0,
4578                                                         TRUE,
4579                                                         &metaBlk);
4580 
4581         maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask);
4582     }
4583 
4584     // Max base alignment for 2D Dcc
4585     const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4586     {
4587         ADDR_SW_64KB_S_X,
4588         ADDR_SW_64KB_D_X,
4589         ADDR_SW_64KB_R_X,
4590         m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4591     };
4592 
4593     UINT_32 maxBaseAlignDcc2D = 0;
4594 
4595     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4596     {
4597         for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4598         {
4599             for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4600             {
4601                 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,
4602                                                              ADDR_RSRC_TEX_2D,
4603                                                              ValidSwizzleModeForDcc2D[swIdx],
4604                                                              bppLog2,
4605                                                              numFragLog2,
4606                                                              TRUE,
4607                                                              &metaBlk);
4608 
4609                 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4610             }
4611         }
4612     }
4613 
4614     // Max base alignment for 3D Dcc
4615     const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4616     {
4617         ADDR_SW_64KB_Z_X,
4618         ADDR_SW_64KB_S_X,
4619         ADDR_SW_64KB_D_X,
4620         ADDR_SW_64KB_R_X,
4621         m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4622     };
4623 
4624     UINT_32 maxBaseAlignDcc3D = 0;
4625 
4626     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4627     {
4628         for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4629         {
4630             const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,
4631                                                          ADDR_RSRC_TEX_3D,
4632                                                          ValidSwizzleModeForDcc3D[swIdx],
4633                                                          bppLog2,
4634                                                          0,
4635                                                          TRUE,
4636                                                          &metaBlk);
4637 
4638             maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4639         }
4640     }
4641 
4642     return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
4643 }
4644 
4645 /**
4646 ************************************************************************************************************************
4647 *   Gfx10Lib::GetMetaElementSizeLog2
4648 *
4649 *   @brief
4650 *       Gets meta data element size log2
4651 *   @return
4652 *       Meta data element size log2
4653 ************************************************************************************************************************
4654 */
GetMetaElementSizeLog2(Gfx10DataType dataType)4655 INT_32 Gfx10Lib::GetMetaElementSizeLog2(
4656     Gfx10DataType dataType) ///< Data surface type
4657 {
4658     INT_32 elemSizeLog2 = 0;
4659 
4660     if (dataType == Gfx10DataColor)
4661     {
4662         elemSizeLog2 = 0;
4663     }
4664     else if (dataType == Gfx10DataDepthStencil)
4665     {
4666         elemSizeLog2 = 2;
4667     }
4668     else
4669     {
4670         ADDR_ASSERT(dataType == Gfx10DataFmask);
4671         elemSizeLog2 = -1;
4672     }
4673 
4674     return elemSizeLog2;
4675 }
4676 
4677 /**
4678 ************************************************************************************************************************
4679 *   Gfx10Lib::GetMetaCacheSizeLog2
4680 *
4681 *   @brief
4682 *       Gets meta data cache line size log2
4683 *   @return
4684 *       Meta data cache line size log2
4685 ************************************************************************************************************************
4686 */
GetMetaCacheSizeLog2(Gfx10DataType dataType)4687 INT_32 Gfx10Lib::GetMetaCacheSizeLog2(
4688     Gfx10DataType dataType) ///< Data surface type
4689 {
4690     INT_32 cacheSizeLog2 = 0;
4691 
4692     if (dataType == Gfx10DataColor)
4693     {
4694         cacheSizeLog2 = 6;
4695     }
4696     else if (dataType == Gfx10DataDepthStencil)
4697     {
4698         cacheSizeLog2 = 8;
4699     }
4700     else
4701     {
4702         ADDR_ASSERT(dataType == Gfx10DataFmask);
4703         cacheSizeLog2 = 8;
4704     }
4705     return cacheSizeLog2;
4706 }
4707 
4708 /**
4709 ************************************************************************************************************************
4710 *   Gfx10Lib::HwlComputeSurfaceInfoLinear
4711 *
4712 *   @brief
4713 *       Internal function to calculate alignment for linear surface
4714 *
4715 *   @return
4716 *       ADDR_E_RETURNCODE
4717 ************************************************************************************************************************
4718 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4719 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(
4720      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4721      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4722      ) const
4723 {
4724     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4725 
4726     if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
4727     {
4728         returnCode = ADDR_INVALIDPARAMS;
4729     }
4730     else
4731     {
4732         const UINT_32 elementBytes = pIn->bpp >> 3;
4733         const UINT_32 pitchAlign   = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4734         const UINT_32 mipDepth     = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4735         UINT_32       pitch        = PowTwoAlign(pIn->width, pitchAlign);
4736         UINT_32       actualHeight = pIn->height;
4737         UINT_64       sliceSize    = 0;
4738 
4739         if (pIn->numMipLevels > 1)
4740         {
4741             for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
4742             {
4743                 UINT_32 mipWidth, mipHeight;
4744 
4745                 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
4746 
4747                 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
4748 
4749                 if (pOut->pMipInfo != NULL)
4750                 {
4751                     pOut->pMipInfo[i].pitch            = mipActualWidth;
4752                     pOut->pMipInfo[i].height           = mipHeight;
4753                     pOut->pMipInfo[i].depth            = mipDepth;
4754                     pOut->pMipInfo[i].offset           = sliceSize;
4755                     pOut->pMipInfo[i].mipTailOffset    = 0;
4756                     pOut->pMipInfo[i].macroBlockOffset = sliceSize;
4757                 }
4758 
4759                 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
4760             }
4761         }
4762         else
4763         {
4764             returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
4765 
4766             if (returnCode == ADDR_OK)
4767             {
4768                 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
4769 
4770                 if (pOut->pMipInfo != NULL)
4771                 {
4772                     pOut->pMipInfo[0].pitch            = pitch;
4773                     pOut->pMipInfo[0].height           = actualHeight;
4774                     pOut->pMipInfo[0].depth            = mipDepth;
4775                     pOut->pMipInfo[0].offset           = 0;
4776                     pOut->pMipInfo[0].mipTailOffset    = 0;
4777                     pOut->pMipInfo[0].macroBlockOffset = 0;
4778                 }
4779             }
4780         }
4781 
4782         if (returnCode == ADDR_OK)
4783         {
4784             pOut->pitch          = pitch;
4785             pOut->height         = actualHeight;
4786             pOut->numSlices      = pIn->numSlices;
4787             pOut->sliceSize      = sliceSize;
4788             pOut->surfSize       = sliceSize * pOut->numSlices;
4789             pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4790             pOut->blockWidth     = pitchAlign;
4791             pOut->blockHeight    = 1;
4792             pOut->blockSlices    = 1;
4793 
4794             // Following members are useless on GFX10
4795             pOut->mipChainPitch  = 0;
4796             pOut->mipChainHeight = 0;
4797             pOut->mipChainSlice  = 0;
4798             pOut->epitchIsHeight = FALSE;
4799 
4800             // Post calculation validate
4801             ADDR_ASSERT(pOut->sliceSize > 0);
4802         }
4803     }
4804 
4805     return returnCode;
4806 }
4807 
4808 } // V2
4809 } // Addr
4810