1 /*
2  * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  */
26 
27 /**
28 ************************************************************************************************************************
29 * @file  gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
32 */
33 
34 #include "gfx9addrlib.h"
35 
36 #include "gfx9_gb_reg.h"
37 
38 #include "amdgpu_asic_addr.h"
39 
40 ////////////////////////////////////////////////////////////////////////////////////////////////////
41 ////////////////////////////////////////////////////////////////////////////////////////////////////
42 
43 namespace Addr
44 {
45 
46 /**
47 ************************************************************************************************************************
48 *   Gfx9HwlInit
49 *
50 *   @brief
51 *       Creates an Gfx9Lib object.
52 *
53 *   @return
54 *       Returns an Gfx9Lib object pointer.
55 ************************************************************************************************************************
56 */
Gfx9HwlInit(const Client * pClient)57 Addr::Lib* Gfx9HwlInit(const Client* pClient)
58 {
59     return V2::Gfx9Lib::CreateObj(pClient);
60 }
61 
62 namespace V2
63 {
64 
65 ////////////////////////////////////////////////////////////////////////////////////////////////////
66 //                               Static Const Member
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68 
69 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
70 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
71     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR
72     {0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_256B_S
73     {0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_256B_D
74     {0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_256B_R
75 
76     {0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_Z
77     {0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_S
78     {0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_D
79     {0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_4KB_R
80 
81     {0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_Z
82     {0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_S
83     {0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_D
84     {0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_64KB_R
85 
86     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
87     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
88     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
89     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
90 
91     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_Z_T
92     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_S_T
93     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_D_T
94     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    1,    0,    0}, // ADDR_SW_64KB_R_T
95 
96     {0,    0,    1,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_Z_x
97     {0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_S_x
98     {0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_D_x
99     {0,    0,    1,    0,    0,    0,    0,    0,    1,    1,    0,    0,    0}, // ADDR_SW_4KB_R_x
100 
101     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_Z_X
102     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_S_X
103     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_D_X
104     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    0,    0,    0}, // ADDR_SW_64KB_R_X
105 
106     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
107     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
108     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
109     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
110     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR_GENERAL
111 };
112 
113 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};
114 
115 const Dim3d   Gfx9Lib::Block256_3dS[]  = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
116 
117 const Dim3d   Gfx9Lib::Block256_3dZ[]  = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
118 
119 /**
120 ************************************************************************************************************************
121 *   Gfx9Lib::Gfx9Lib
122 *
123 *   @brief
124 *       Constructor
125 *
126 ************************************************************************************************************************
127 */
Gfx9Lib(const Client * pClient)128 Gfx9Lib::Gfx9Lib(const Client* pClient)
129     :
130     Lib(pClient)
131 {
132     memset(&m_settings, 0, sizeof(m_settings));
133     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
134     memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
135     m_metaEqOverrideIndex = 0;
136 }
137 
138 /**
139 ************************************************************************************************************************
140 *   Gfx9Lib::~Gfx9Lib
141 *
142 *   @brief
143 *       Destructor
144 ************************************************************************************************************************
145 */
~Gfx9Lib()146 Gfx9Lib::~Gfx9Lib()
147 {
148 }
149 
150 /**
151 ************************************************************************************************************************
152 *   Gfx9Lib::HwlComputeHtileInfo
153 *
154 *   @brief
155 *       Interface function stub of AddrComputeHtilenfo
156 *
157 *   @return
158 *       ADDR_E_RETURNCODE
159 ************************************************************************************************************************
160 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const161 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
162     const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
163     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
164     ) const
165 {
166     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
167                                                        pIn->swizzleMode);
168 
169     UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
170 
171     UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
172 
173     if ((numPipeTotal == 1) && (numRbTotal == 1))
174     {
175         numCompressBlkPerMetaBlkLog2 = 10;
176     }
177     else
178     {
179         if (m_settings.applyAliasFix)
180         {
181             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
182         }
183         else
184         {
185             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
186         }
187     }
188 
189     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
190 
191     Dim3d   metaBlkDim   = {8, 8, 1};
192     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
193     UINT_32 widthAmp     = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
194     UINT_32 heightAmp    = totalAmpBits - widthAmp;
195     metaBlkDim.w <<= widthAmp;
196     metaBlkDim.h <<= heightAmp;
197 
198 #if DEBUG
199     Dim3d metaBlkDimDbg = {8, 8, 1};
200     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
201     {
202         if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
203             ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
204         {
205             metaBlkDimDbg.h <<= 1;
206         }
207         else
208         {
209             metaBlkDimDbg.w <<= 1;
210         }
211     }
212     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
213 #endif
214 
215     UINT_32 numMetaBlkX;
216     UINT_32 numMetaBlkY;
217     UINT_32 numMetaBlkZ;
218 
219     GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
220                    pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
221                    &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
222 
223     const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
224     UINT_32       align       = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
225 
226     if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
227     {
228         align *= (numPipeTotal >> 1);
229     }
230 
231     align = Max(align, metaBlkSize);
232 
233     if (m_settings.metaBaseAlignFix)
234     {
235         align = Max(align, GetBlockSize(pIn->swizzleMode));
236     }
237 
238     if (m_settings.htileAlignFix)
239     {
240         const INT_32 metaBlkSizeLog2        = numCompressBlkPerMetaBlkLog2 + 2;
241         const INT_32 htileCachelineSizeLog2 = 11;
242         const INT_32 maxNumOfRbMaskBits     = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
243 
244         INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
245 
246         align <<= rbMaskPadding;
247     }
248 
249     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
250     pOut->height     = numMetaBlkY * metaBlkDim.h;
251     pOut->sliceSize  = numMetaBlkX * numMetaBlkY * metaBlkSize;
252 
253     pOut->metaBlkWidth       = metaBlkDim.w;
254     pOut->metaBlkHeight      = metaBlkDim.h;
255     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
256 
257     pOut->baseAlign  = align;
258     pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
259 
260     return ADDR_OK;
261 }
262 
263 /**
264 ************************************************************************************************************************
265 *   Gfx9Lib::HwlComputeCmaskInfo
266 *
267 *   @brief
268 *       Interface function stub of AddrComputeCmaskInfo
269 *
270 *   @return
271 *       ADDR_E_RETURNCODE
272 ************************************************************************************************************************
273 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const274 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
275     const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
276     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
277     ) const
278 {
279     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
280 
281     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
282                                                        pIn->swizzleMode);
283 
284     UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
285 
286     UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
287 
288     if ((numPipeTotal == 1) && (numRbTotal == 1))
289     {
290         numCompressBlkPerMetaBlkLog2 = 13;
291     }
292     else
293     {
294         if (m_settings.applyAliasFix)
295         {
296             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
297         }
298         else
299         {
300             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
301         }
302 
303         numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
304     }
305 
306     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
307 
308     Dim2d metaBlkDim = {8, 8};
309     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
310     UINT_32 heightAmp = totalAmpBits >> 1;
311     UINT_32 widthAmp = totalAmpBits - heightAmp;
312     metaBlkDim.w <<= widthAmp;
313     metaBlkDim.h <<= heightAmp;
314 
315 #if DEBUG
316     Dim2d metaBlkDimDbg = {8, 8};
317     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
318     {
319         if (metaBlkDimDbg.h < metaBlkDimDbg.w)
320         {
321             metaBlkDimDbg.h <<= 1;
322         }
323         else
324         {
325             metaBlkDimDbg.w <<= 1;
326         }
327     }
328     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
329 #endif
330 
331     UINT_32 numMetaBlkX = (pIn->unalignedWidth  + metaBlkDim.w - 1) / metaBlkDim.w;
332     UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
333     UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
334 
335     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
336 
337     if (m_settings.metaBaseAlignFix)
338     {
339         sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
340     }
341 
342     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
343     pOut->height     = numMetaBlkY * metaBlkDim.h;
344     pOut->sliceSize  = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
345     pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
346     pOut->baseAlign  = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
347 
348     pOut->metaBlkWidth = metaBlkDim.w;
349     pOut->metaBlkHeight = metaBlkDim.h;
350 
351     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
352 
353     // Get the CMASK address equation (copied from CmaskAddrFromCoord)
354     UINT_32 fmaskBpp              = GetFmaskBpp(1, 1);
355     UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
356     UINT_32 metaBlkWidthLog2      = Log2(pOut->metaBlkWidth);
357     UINT_32 metaBlkHeightLog2     = Log2(pOut->metaBlkHeight);
358 
359     MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
360                                 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
361                                 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
362 
363     CoordEq *eq = (CoordEq *)((Gfx9Lib *)this)->GetMetaEquation(metaEqParams);
364 
365     // Generate the CMASK address equation.
366     pOut->equation.gfx9.num_bits = Min(32u, eq->getsize());
367     bool checked = false;
368     for (unsigned b = 0; b < pOut->equation.gfx9.num_bits; b++) {
369        CoordTerm &bit = (*eq)[b];
370 
371        unsigned c;
372        for (c = 0; c < bit.getsize(); c++) {
373           Coordinate &coord = bit[c];
374           pOut->equation.gfx9.bit[b].coord[c].dim = coord.getdim();
375           pOut->equation.gfx9.bit[b].coord[c].ord = coord.getord();
376        }
377        for (; c < 5; c++)
378           pOut->equation.gfx9.bit[b].coord[c].dim = 5; /* meaning invalid */
379     }
380 
381     // Reduce num_bits because DIM_M fills the rest of the bits monotonically.
382     for (int b = pOut->equation.gfx9.num_bits - 1; b >= 1; b--) {
383        CoordTerm &prev = (*eq)[b - 1];
384        CoordTerm &cur = (*eq)[b];
385 
386        if (cur.getsize() == 1 && cur[0].getdim() == DIM_M &&
387           prev.getsize() == 1 && prev[0].getdim() == DIM_M &&
388           prev[0].getord() + 1 == cur[0].getord())
389           pOut->equation.gfx9.num_bits = b;
390        else
391           break;
392     }
393 
394     pOut->equation.gfx9.numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
395                                                                    pIn->swizzleMode);
396 
397     return ADDR_OK;
398 }
399 
400 /**
401 ************************************************************************************************************************
402 *   Gfx9Lib::GetMetaMipInfo
403 *
404 *   @brief
405 *       Get meta mip info
406 *
407 *   @return
408 *       N/A
409 ************************************************************************************************************************
410 */
GetMetaMipInfo(UINT_32 numMipLevels,Dim3d * pMetaBlkDim,BOOL_32 dataThick,ADDR2_META_MIP_INFO * pInfo,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 * pNumMetaBlkX,UINT_32 * pNumMetaBlkY,UINT_32 * pNumMetaBlkZ) const411 VOID Gfx9Lib::GetMetaMipInfo(
412     UINT_32 numMipLevels,           ///< [in]  number of mip levels
413     Dim3d* pMetaBlkDim,             ///< [in]  meta block dimension
414     BOOL_32 dataThick,              ///< [in]  data surface is thick
415     ADDR2_META_MIP_INFO* pInfo,     ///< [out] meta mip info
416     UINT_32 mip0Width,              ///< [in]  mip0 width
417     UINT_32 mip0Height,             ///< [in]  mip0 height
418     UINT_32 mip0Depth,              ///< [in]  mip0 depth
419     UINT_32* pNumMetaBlkX,          ///< [out] number of metablock X in mipchain
420     UINT_32* pNumMetaBlkY,          ///< [out] number of metablock Y in mipchain
421     UINT_32* pNumMetaBlkZ)          ///< [out] number of metablock Z in mipchain
422     const
423 {
424     UINT_32 numMetaBlkX = (mip0Width  + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
425     UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
426     UINT_32 numMetaBlkZ = (mip0Depth  + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
427     UINT_32 tailWidth   = pMetaBlkDim->w;
428     UINT_32 tailHeight  = pMetaBlkDim->h >> 1;
429     UINT_32 tailDepth   = pMetaBlkDim->d;
430     BOOL_32 inTail      = FALSE;
431     AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
432 
433     if (numMipLevels > 1)
434     {
435         if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
436         {
437             // Z major
438             major = ADDR_MAJOR_Z;
439         }
440         else if (numMetaBlkX >= numMetaBlkY)
441         {
442             // X major
443             major = ADDR_MAJOR_X;
444         }
445         else
446         {
447             // Y major
448             major = ADDR_MAJOR_Y;
449         }
450 
451         inTail = ((mip0Width <= tailWidth) &&
452                   (mip0Height <= tailHeight) &&
453                   ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
454 
455         if (inTail == FALSE)
456         {
457             UINT_32 orderLimit;
458             UINT_32 *pMipDim;
459             UINT_32 *pOrderDim;
460 
461             if (major == ADDR_MAJOR_Z)
462             {
463                 // Z major
464                 pMipDim = &numMetaBlkY;
465                 pOrderDim = &numMetaBlkZ;
466                 orderLimit = 4;
467             }
468             else if (major == ADDR_MAJOR_X)
469             {
470                 // X major
471                 pMipDim = &numMetaBlkY;
472                 pOrderDim = &numMetaBlkX;
473                 orderLimit = 4;
474             }
475             else
476             {
477                 // Y major
478                 pMipDim = &numMetaBlkX;
479                 pOrderDim = &numMetaBlkY;
480                 orderLimit = 2;
481             }
482 
483             if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
484             {
485                 *pMipDim += 2;
486             }
487             else
488             {
489                 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
490             }
491         }
492     }
493 
494     if (pInfo != NULL)
495     {
496         UINT_32 mipWidth  = mip0Width;
497         UINT_32 mipHeight = mip0Height;
498         UINT_32 mipDepth  = mip0Depth;
499         Dim3d   mipCoord  = {0};
500 
501         for (UINT_32 mip = 0; mip < numMipLevels; mip++)
502         {
503             if (inTail)
504             {
505                 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
506                                    pMetaBlkDim);
507                 break;
508             }
509             else
510             {
511                 mipWidth  = PowTwoAlign(mipWidth, pMetaBlkDim->w);
512                 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
513                 mipDepth  = PowTwoAlign(mipDepth, pMetaBlkDim->d);
514 
515                 pInfo[mip].inMiptail = FALSE;
516                 pInfo[mip].startX = mipCoord.w;
517                 pInfo[mip].startY = mipCoord.h;
518                 pInfo[mip].startZ = mipCoord.d;
519                 pInfo[mip].width  = mipWidth;
520                 pInfo[mip].height = mipHeight;
521                 pInfo[mip].depth  = dataThick ? mipDepth : 1;
522 
523                 if ((mip >= 3) || (mip & 1))
524                 {
525                     switch (major)
526                     {
527                         case ADDR_MAJOR_X:
528                             mipCoord.w += mipWidth;
529                             break;
530                         case ADDR_MAJOR_Y:
531                             mipCoord.h += mipHeight;
532                             break;
533                         case ADDR_MAJOR_Z:
534                             mipCoord.d += mipDepth;
535                             break;
536                         default:
537                             break;
538                     }
539                 }
540                 else
541                 {
542                     switch (major)
543                     {
544                         case ADDR_MAJOR_X:
545                             mipCoord.h += mipHeight;
546                             break;
547                         case ADDR_MAJOR_Y:
548                             mipCoord.w += mipWidth;
549                             break;
550                         case ADDR_MAJOR_Z:
551                             mipCoord.h += mipHeight;
552                             break;
553                         default:
554                             break;
555                     }
556                 }
557 
558                 mipWidth  = Max(mipWidth >> 1, 1u);
559                 mipHeight = Max(mipHeight >> 1, 1u);
560                 mipDepth = Max(mipDepth >> 1, 1u);
561 
562                 inTail = ((mipWidth <= tailWidth) &&
563                           (mipHeight <= tailHeight) &&
564                           ((dataThick == FALSE) || (mipDepth <= tailDepth)));
565             }
566         }
567     }
568 
569     *pNumMetaBlkX = numMetaBlkX;
570     *pNumMetaBlkY = numMetaBlkY;
571     *pNumMetaBlkZ = numMetaBlkZ;
572 }
573 
574 /**
575 ************************************************************************************************************************
576 *   Gfx9Lib::HwlComputeDccInfo
577 *
578 *   @brief
579 *       Interface function to compute DCC key info
580 *
581 *   @return
582 *       ADDR_E_RETURNCODE
583 ************************************************************************************************************************
584 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const585 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
586     const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
587     ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
588     ) const
589 {
590     BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
591     BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
592     BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
593 
594     if (dataLinear)
595     {
596         metaLinear = TRUE;
597     }
598     else if (metaLinear == TRUE)
599     {
600         pipeAligned = FALSE;
601     }
602 
603     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
604 
605     if (metaLinear)
606     {
607         // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
608         ADDR_ASSERT_ALWAYS();
609 
610         pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
611         pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
612     }
613     else
614     {
615         BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
616 
617         UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
618 
619         UINT_32 numFrags = Max(pIn->numFrags, 1u);
620         UINT_32 numSlices = Max(pIn->numSlices, 1u);
621 
622         minMetaBlkSize /= numFrags;
623 
624         UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
625 
626         UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
627 
628         if ((numPipeTotal > 1) || (numRbTotal > 1))
629         {
630             const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
631 
632             numCompressBlkPerMetaBlk =
633                 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
634 
635             if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
636             {
637                 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
638             }
639         }
640 
641         Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
642         Dim3d metaBlkDim = compressBlkDim;
643 
644         for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
645         {
646             if ((metaBlkDim.h < metaBlkDim.w) ||
647                 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
648             {
649                 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
650                 {
651                     metaBlkDim.h <<= 1;
652                 }
653                 else
654                 {
655                     metaBlkDim.d <<= 1;
656                 }
657             }
658             else
659             {
660                 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
661                 {
662                     metaBlkDim.w <<= 1;
663                 }
664                 else
665                 {
666                     metaBlkDim.d <<= 1;
667                 }
668             }
669         }
670 
671         UINT_32 numMetaBlkX;
672         UINT_32 numMetaBlkY;
673         UINT_32 numMetaBlkZ;
674 
675         GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
676                        pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
677                        &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
678 
679         UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
680 
681         if (numFrags > m_maxCompFrag)
682         {
683             sizeAlign *= (numFrags / m_maxCompFrag);
684         }
685 
686         if (m_settings.metaBaseAlignFix)
687         {
688             sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
689         }
690 
691         pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
692                            numCompressBlkPerMetaBlk * numFrags;
693         pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
694         pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
695 
696         pOut->pitch = numMetaBlkX * metaBlkDim.w;
697         pOut->height = numMetaBlkY * metaBlkDim.h;
698         pOut->depth = numMetaBlkZ * metaBlkDim.d;
699 
700         pOut->compressBlkWidth = compressBlkDim.w;
701         pOut->compressBlkHeight = compressBlkDim.h;
702         pOut->compressBlkDepth = compressBlkDim.d;
703 
704         pOut->metaBlkWidth = metaBlkDim.w;
705         pOut->metaBlkHeight = metaBlkDim.h;
706         pOut->metaBlkDepth = metaBlkDim.d;
707         pOut->metaBlkSize = numCompressBlkPerMetaBlk * numFrags;
708 
709         pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
710         pOut->fastClearSizePerSlice =
711             pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
712 
713         // Get the DCC address equation (copied from DccAddrFromCoord)
714         UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
715         UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
716         UINT_32 metaBlkWidthLog2  = Log2(pOut->metaBlkWidth);
717         UINT_32 metaBlkHeightLog2 = Log2(pOut->metaBlkHeight);
718         UINT_32 metaBlkDepthLog2  = Log2(pOut->metaBlkDepth);
719         UINT_32 compBlkWidthLog2  = Log2(pOut->compressBlkWidth);
720         UINT_32 compBlkHeightLog2 = Log2(pOut->compressBlkHeight);
721         UINT_32 compBlkDepthLog2  = Log2(pOut->compressBlkDepth);
722 
723         MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
724                                      Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
725                                      metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
726                                      compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
727 
728         CoordEq *eq = (CoordEq *)((Gfx9Lib *)this)->GetMetaEquation(metaEqParams);
729 
730         // Generate the DCC address equation.
731         pOut->equation.gfx9.num_bits = Min(32u, eq->getsize());
732         bool checked = false;
733         for (unsigned b = 0; b < pOut->equation.gfx9.num_bits; b++) {
734            CoordTerm &bit = (*eq)[b];
735 
736            unsigned c;
737            for (c = 0; c < bit.getsize(); c++) {
738               Coordinate &coord = bit[c];
739               pOut->equation.gfx9.bit[b].coord[c].dim = coord.getdim();
740               pOut->equation.gfx9.bit[b].coord[c].ord = coord.getord();
741            }
742            for (; c < 5; c++)
743               pOut->equation.gfx9.bit[b].coord[c].dim = 5; /* meaning invalid */
744         }
745 
746         // Reduce num_bits because DIM_M fills the rest of the bits monotonically.
747         for (int b = pOut->equation.gfx9.num_bits - 1; b >= 1; b--) {
748            CoordTerm &prev = (*eq)[b - 1];
749            CoordTerm &cur = (*eq)[b];
750 
751            if (cur.getsize() == 1 && cur[0].getdim() == DIM_M &&
752                prev.getsize() == 1 && prev[0].getdim() == DIM_M &&
753                prev[0].getord() + 1 == cur[0].getord())
754               pOut->equation.gfx9.num_bits = b;
755            else
756               break;
757         }
758 
759         pOut->equation.gfx9.numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
760                                                                        pIn->swizzleMode);
761     }
762 
763     return ADDR_OK;
764 }
765 
766 /**
767 ************************************************************************************************************************
768 *   Gfx9Lib::HwlComputeMaxBaseAlignments
769 *
770 *   @brief
771 *       Gets maximum alignments
772 *   @return
773 *       maximum alignments
774 ************************************************************************************************************************
775 */
HwlComputeMaxBaseAlignments() const776 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
777 {
778     return Size64K;
779 }
780 
781 /**
782 ************************************************************************************************************************
783 *   Gfx9Lib::HwlComputeMaxMetaBaseAlignments
784 *
785 *   @brief
786 *       Gets maximum alignments for metadata
787 *   @return
788 *       maximum alignments for metadata
789 ************************************************************************************************************************
790 */
HwlComputeMaxMetaBaseAlignments() const791 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
792 {
793     // Max base alignment for Htile
794     const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
795     const UINT_32 maxNumRbTotal   = m_se * m_rbPerSe;
796 
797     // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
798     // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
799     ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
800     const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
801 
802     UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
803 
804     if (maxNumPipeTotal > 2)
805     {
806         maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
807     }
808 
809     maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
810 
811     if (m_settings.metaBaseAlignFix)
812     {
813         maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
814     }
815 
816     if (m_settings.htileAlignFix)
817     {
818         maxBaseAlignHtile *= maxNumPipeTotal;
819     }
820 
821     // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
822 
823     // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
824     UINT_32 maxBaseAlignDcc3D = 65536;
825 
826     if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
827     {
828         maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
829     }
830 
831     // Max base alignment for Msaa Dcc
832     UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
833 
834     if (m_settings.metaBaseAlignFix)
835     {
836         maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
837     }
838 
839     return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
840 }
841 
842 /**
843 ************************************************************************************************************************
844 *   Gfx9Lib::HwlComputeCmaskAddrFromCoord
845 *
846 *   @brief
847 *       Interface function stub of AddrComputeCmaskAddrFromCoord
848 *
849 *   @return
850 *       ADDR_E_RETURNCODE
851 ************************************************************************************************************************
852 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)853 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
854     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
855     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
856 {
857     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
858     input.size            = sizeof(input);
859     input.cMaskFlags      = pIn->cMaskFlags;
860     input.colorFlags      = pIn->colorFlags;
861     input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
862     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
863     input.numSlices       = Max(pIn->numSlices, 1u);
864     input.swizzleMode     = pIn->swizzleMode;
865     input.resourceType    = pIn->resourceType;
866 
867     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
868     output.size = sizeof(output);
869 
870     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
871 
872     if (returnCode == ADDR_OK)
873     {
874         UINT_32 fmaskBpp              = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
875         UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
876         UINT_32 metaBlkWidthLog2      = Log2(output.metaBlkWidth);
877         UINT_32 metaBlkHeightLog2     = Log2(output.metaBlkHeight);
878 
879         MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
880                                      Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
881                                      metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
882 
883         const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
884 
885         UINT_32 xb = pIn->x / output.metaBlkWidth;
886         UINT_32 yb = pIn->y / output.metaBlkHeight;
887         UINT_32 zb = pIn->slice;
888 
889         UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
890         UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
891         UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
892 
893         UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, 0, blockIndex};
894         UINT_64 address  = pMetaEq->solve(coords);
895 
896         pOut->addr = address >> 1;
897         pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
898 
899 
900         UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
901                                                            pIn->swizzleMode);
902 
903         UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
904 
905         pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
906     }
907 
908     return returnCode;
909 }
910 
911 /**
912 ************************************************************************************************************************
913 *   Gfx9Lib::HwlComputeHtileAddrFromCoord
914 *
915 *   @brief
916 *       Interface function stub of AddrComputeHtileAddrFromCoord
917 *
918 *   @return
919 *       ADDR_E_RETURNCODE
920 ************************************************************************************************************************
921 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)922 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
923     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
924     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
925 {
926     ADDR_E_RETURNCODE returnCode = ADDR_OK;
927 
928     if (pIn->numMipLevels > 1)
929     {
930         returnCode = ADDR_NOTIMPLEMENTED;
931     }
932     else
933     {
934         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
935         input.size            = sizeof(input);
936         input.hTileFlags      = pIn->hTileFlags;
937         input.depthFlags      = pIn->depthflags;
938         input.swizzleMode     = pIn->swizzleMode;
939         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
940         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
941         input.numSlices       = Max(pIn->numSlices, 1u);
942         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
943 
944         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
945         output.size = sizeof(output);
946 
947         returnCode = ComputeHtileInfo(&input, &output);
948 
949         if (returnCode == ADDR_OK)
950         {
951             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
952             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
953             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
954             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
955 
956             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
957                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
958                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
959 
960             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
961 
962             UINT_32 xb = pIn->x / output.metaBlkWidth;
963             UINT_32 yb = pIn->y / output.metaBlkHeight;
964             UINT_32 zb = pIn->slice;
965 
966             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
967             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
968             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
969 
970             UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, 0, blockIndex};
971             UINT_64 address  = pMetaEq->solve(coords);
972 
973             pOut->addr = address >> 1;
974 
975             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
976                                                                pIn->swizzleMode);
977 
978             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
979 
980             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
981         }
982     }
983 
984     return returnCode;
985 }
986 
987 /**
988 ************************************************************************************************************************
989 *   Gfx9Lib::HwlComputeHtileCoordFromAddr
990 *
991 *   @brief
992 *       Interface function stub of AddrComputeHtileCoordFromAddr
993 *
994 *   @return
995 *       ADDR_E_RETURNCODE
996 ************************************************************************************************************************
997 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)998 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
999     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
1000     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut)   ///< [out] output structure
1001 {
1002     ADDR_E_RETURNCODE returnCode = ADDR_OK;
1003 
1004     if (pIn->numMipLevels > 1)
1005     {
1006         returnCode = ADDR_NOTIMPLEMENTED;
1007     }
1008     else
1009     {
1010         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
1011         input.size            = sizeof(input);
1012         input.hTileFlags      = pIn->hTileFlags;
1013         input.swizzleMode     = pIn->swizzleMode;
1014         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
1015         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
1016         input.numSlices       = Max(pIn->numSlices, 1u);
1017         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
1018 
1019         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
1020         output.size = sizeof(output);
1021 
1022         returnCode = ComputeHtileInfo(&input, &output);
1023 
1024         if (returnCode == ADDR_OK)
1025         {
1026             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
1027             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
1028             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1029             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
1030 
1031             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
1032                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
1033                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
1034 
1035             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1036 
1037             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
1038                                                                pIn->swizzleMode);
1039 
1040             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1041 
1042             UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
1043 
1044             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
1045             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1046 
1047             UINT_32 coords[NUM_DIMS];
1048             pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, coords);
1049 
1050             pOut->slice = coords[DIM_M] / sliceSizeInBlock;
1051             pOut->y     = ((coords[DIM_M] % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + coords[DIM_Y];
1052             pOut->x     = (coords[DIM_M] % pitchInBlock) * output.metaBlkWidth + coords[DIM_X];
1053         }
1054     }
1055 
1056     return returnCode;
1057 }
1058 
1059 /**
1060 ************************************************************************************************************************
1061 *   Gfx9Lib::HwlSupportComputeDccAddrFromCoord
1062 *
1063 *   @brief
1064 *       Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
1065 *
1066 *   @return
1067 *       ADDR_E_RETURNCODE
1068 ************************************************************************************************************************
1069 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)1070 ADDR_E_RETURNCODE Gfx9Lib::HwlSupportComputeDccAddrFromCoord(
1071     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
1072 {
1073     ADDR_E_RETURNCODE returnCode = ADDR_OK;
1074 
1075     if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
1076     {
1077         returnCode = ADDR_NOTSUPPORTED;
1078     }
1079     else if ((pIn->pitch == 0)             ||
1080              (pIn->height == 0)            ||
1081              (pIn->compressBlkWidth == 0)  ||
1082              (pIn->compressBlkHeight == 0) ||
1083              (pIn->compressBlkDepth == 0)  ||
1084              (pIn->metaBlkWidth == 0)      ||
1085              (pIn->metaBlkHeight == 0)     ||
1086              (pIn->metaBlkDepth == 0)      ||
1087              (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
1088     {
1089         returnCode = ADDR_NOTSUPPORTED;
1090     }
1091 
1092     return returnCode;
1093 }
1094 
1095 /**
1096 ************************************************************************************************************************
1097 *   Gfx9Lib::HwlComputeDccAddrFromCoord
1098 *
1099 *   @brief
1100 *       Interface function stub of AddrComputeDccAddrFromCoord
1101 *
1102 *   @return
1103 *       N/A
1104 ************************************************************************************************************************
1105 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)1106 VOID Gfx9Lib::HwlComputeDccAddrFromCoord(
1107     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*  pIn,
1108     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
1109 {
1110     UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
1111     UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
1112     UINT_32 metaBlkWidthLog2  = Log2(pIn->metaBlkWidth);
1113     UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
1114     UINT_32 metaBlkDepthLog2  = Log2(pIn->metaBlkDepth);
1115     UINT_32 compBlkWidthLog2  = Log2(pIn->compressBlkWidth);
1116     UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
1117     UINT_32 compBlkDepthLog2  = Log2(pIn->compressBlkDepth);
1118 
1119     MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1120                                  Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1121                                  metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1122                                  compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1123 
1124     const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1125 
1126     UINT_32 xb = pIn->x / pIn->metaBlkWidth;
1127     UINT_32 yb = pIn->y / pIn->metaBlkHeight;
1128     UINT_32 zb = pIn->slice / pIn->metaBlkDepth;
1129 
1130     UINT_32 pitchInBlock     = pIn->pitch / pIn->metaBlkWidth;
1131     UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
1132     UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1133 
1134     UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex};
1135     UINT_64 address  = pMetaEq->solve(coords);
1136 
1137     pOut->addr = address >> 1;
1138 
1139     UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1140                                                        pIn->swizzleMode);
1141 
1142     UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1143 
1144     pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1145 }
1146 
1147 /**
1148 ************************************************************************************************************************
1149 *   Gfx9Lib::HwlInitGlobalParams
1150 *
1151 *   @brief
1152 *       Initializes global parameters
1153 *
1154 *   @return
1155 *       TRUE if all settings are valid
1156 *
1157 ************************************************************************************************************************
1158 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)1159 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1160     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1161 {
1162     BOOL_32 valid = TRUE;
1163 
1164     if (m_settings.isArcticIsland)
1165     {
1166         GB_ADDR_CONFIG_GFX9 gbAddrConfig;
1167 
1168         gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1169 
1170         // These values are copied from CModel code
1171         switch (gbAddrConfig.bits.NUM_PIPES)
1172         {
1173             case ADDR_CONFIG_1_PIPE:
1174                 m_pipes = 1;
1175                 m_pipesLog2 = 0;
1176                 break;
1177             case ADDR_CONFIG_2_PIPE:
1178                 m_pipes = 2;
1179                 m_pipesLog2 = 1;
1180                 break;
1181             case ADDR_CONFIG_4_PIPE:
1182                 m_pipes = 4;
1183                 m_pipesLog2 = 2;
1184                 break;
1185             case ADDR_CONFIG_8_PIPE:
1186                 m_pipes = 8;
1187                 m_pipesLog2 = 3;
1188                 break;
1189             case ADDR_CONFIG_16_PIPE:
1190                 m_pipes = 16;
1191                 m_pipesLog2 = 4;
1192                 break;
1193             case ADDR_CONFIG_32_PIPE:
1194                 m_pipes = 32;
1195                 m_pipesLog2 = 5;
1196                 break;
1197             default:
1198                 ADDR_ASSERT_ALWAYS();
1199                 break;
1200         }
1201 
1202         switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1203         {
1204             case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1205                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1206                 m_pipeInterleaveLog2 = 8;
1207                 break;
1208             case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1209                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1210                 m_pipeInterleaveLog2 = 9;
1211                 break;
1212             case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1213                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1214                 m_pipeInterleaveLog2 = 10;
1215                 break;
1216             case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1217                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1218                 m_pipeInterleaveLog2 = 11;
1219                 break;
1220             default:
1221                 ADDR_ASSERT_ALWAYS();
1222                 break;
1223         }
1224 
1225         // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1226         // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1227         ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1228 
1229         switch (gbAddrConfig.bits.NUM_BANKS)
1230         {
1231             case ADDR_CONFIG_1_BANK:
1232                 m_banks = 1;
1233                 m_banksLog2 = 0;
1234                 break;
1235             case ADDR_CONFIG_2_BANK:
1236                 m_banks = 2;
1237                 m_banksLog2 = 1;
1238                 break;
1239             case ADDR_CONFIG_4_BANK:
1240                 m_banks = 4;
1241                 m_banksLog2 = 2;
1242                 break;
1243             case ADDR_CONFIG_8_BANK:
1244                 m_banks = 8;
1245                 m_banksLog2 = 3;
1246                 break;
1247             case ADDR_CONFIG_16_BANK:
1248                 m_banks = 16;
1249                 m_banksLog2 = 4;
1250                 break;
1251             default:
1252                 ADDR_ASSERT_ALWAYS();
1253                 break;
1254         }
1255 
1256         switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1257         {
1258             case ADDR_CONFIG_1_SHADER_ENGINE:
1259                 m_se = 1;
1260                 m_seLog2 = 0;
1261                 break;
1262             case ADDR_CONFIG_2_SHADER_ENGINE:
1263                 m_se = 2;
1264                 m_seLog2 = 1;
1265                 break;
1266             case ADDR_CONFIG_4_SHADER_ENGINE:
1267                 m_se = 4;
1268                 m_seLog2 = 2;
1269                 break;
1270             case ADDR_CONFIG_8_SHADER_ENGINE:
1271                 m_se = 8;
1272                 m_seLog2 = 3;
1273                 break;
1274             default:
1275                 ADDR_ASSERT_ALWAYS();
1276                 break;
1277         }
1278 
1279         switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1280         {
1281             case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1282                 m_rbPerSe = 1;
1283                 m_rbPerSeLog2 = 0;
1284                 break;
1285             case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1286                 m_rbPerSe = 2;
1287                 m_rbPerSeLog2 = 1;
1288                 break;
1289             case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1290                 m_rbPerSe = 4;
1291                 m_rbPerSeLog2 = 2;
1292                 break;
1293             default:
1294                 ADDR_ASSERT_ALWAYS();
1295                 break;
1296         }
1297 
1298         switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1299         {
1300             case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1301                 m_maxCompFrag = 1;
1302                 m_maxCompFragLog2 = 0;
1303                 break;
1304             case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1305                 m_maxCompFrag = 2;
1306                 m_maxCompFragLog2 = 1;
1307                 break;
1308             case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1309                 m_maxCompFrag = 4;
1310                 m_maxCompFragLog2 = 2;
1311                 break;
1312             case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1313                 m_maxCompFrag = 8;
1314                 m_maxCompFragLog2 = 3;
1315                 break;
1316             default:
1317                 ADDR_ASSERT_ALWAYS();
1318                 break;
1319         }
1320 
1321         if ((m_rbPerSeLog2 == 1) &&
1322             (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1323              ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1324         {
1325             ADDR_ASSERT(m_settings.isVega10 == FALSE);
1326 
1327             ADDR_ASSERT(m_settings.isRaven == FALSE);
1328 
1329             ADDR_ASSERT(m_settings.isVega20 == FALSE);
1330 
1331             if (m_settings.isVega12)
1332             {
1333                 m_settings.htileCacheRbConflict = 1;
1334             }
1335         }
1336 
1337         // For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
1338         m_blockVarSizeLog2 = 0;
1339     }
1340     else
1341     {
1342         valid = FALSE;
1343         ADDR_NOT_IMPLEMENTED();
1344     }
1345 
1346     if (valid)
1347     {
1348         InitEquationTable();
1349     }
1350 
1351     return valid;
1352 }
1353 
1354 /**
1355 ************************************************************************************************************************
1356 *   Gfx9Lib::HwlConvertChipFamily
1357 *
1358 *   @brief
1359 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1360 *   @return
1361 *       ChipFamily
1362 ************************************************************************************************************************
1363 */
HwlConvertChipFamily(UINT_32 uChipFamily,UINT_32 uChipRevision)1364 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1365     UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
1366     UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1367 {
1368     ChipFamily family = ADDR_CHIP_FAMILY_AI;
1369 
1370     switch (uChipFamily)
1371     {
1372         case FAMILY_AI:
1373             m_settings.isArcticIsland = 1;
1374             m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1375             m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1376             m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1377             m_settings.isDce12 = 1;
1378 
1379             if (m_settings.isVega10 == 0)
1380             {
1381                 m_settings.htileAlignFix = 1;
1382                 m_settings.applyAliasFix = 1;
1383             }
1384 
1385             m_settings.metaBaseAlignFix = 1;
1386 
1387             m_settings.depthPipeXorDisable = 1;
1388             break;
1389         case FAMILY_RV:
1390             m_settings.isArcticIsland = 1;
1391 
1392             if (ASICREV_IS_RAVEN(uChipRevision))
1393             {
1394                 m_settings.isRaven = 1;
1395 
1396                 m_settings.depthPipeXorDisable = 1;
1397             }
1398 
1399             if (ASICREV_IS_RAVEN2(uChipRevision))
1400             {
1401                 m_settings.isRaven = 1;
1402             }
1403 
1404             if (m_settings.isRaven == 0)
1405             {
1406                 m_settings.htileAlignFix = 1;
1407                 m_settings.applyAliasFix = 1;
1408             }
1409 
1410             m_settings.isDcn1 = m_settings.isRaven;
1411 
1412             if (ASICREV_IS_RENOIR(uChipRevision))
1413             {
1414                 m_settings.isRaven = 1;
1415                 m_settings.isDcn2  = 1;
1416             }
1417 
1418             m_settings.metaBaseAlignFix = 1;
1419             break;
1420 
1421         default:
1422             ADDR_ASSERT(!"No Chip found");
1423             break;
1424     }
1425 
1426     return family;
1427 }
1428 
1429 /**
1430 ************************************************************************************************************************
1431 *   Gfx9Lib::InitRbEquation
1432 *
1433 *   @brief
1434 *       Init RB equation
1435 *   @return
1436 *       N/A
1437 ************************************************************************************************************************
1438 */
GetRbEquation(CoordEq * pRbEq,UINT_32 numRbPerSeLog2,UINT_32 numSeLog2) const1439 VOID Gfx9Lib::GetRbEquation(
1440     CoordEq* pRbEq,             ///< [out] rb equation
1441     UINT_32  numRbPerSeLog2,    ///< [in] number of rb per shader engine
1442     UINT_32  numSeLog2)         ///< [in] number of shader engine
1443     const
1444 {
1445     // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1446     UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1447     Coordinate cx(DIM_X, rbRegion);
1448     Coordinate cy(DIM_Y, rbRegion);
1449 
1450     UINT_32 start = 0;
1451     UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1452 
1453     // Clear the rb equation
1454     pRbEq->resize(0);
1455     pRbEq->resize(numRbTotalLog2);
1456 
1457     if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1458     {
1459         // Special case when more than 1 SE, and 2 RB per SE
1460         (*pRbEq)[0].add(cx);
1461         (*pRbEq)[0].add(cy);
1462         cx++;
1463         cy++;
1464 
1465         if (m_settings.applyAliasFix == false)
1466         {
1467             (*pRbEq)[0].add(cy);
1468         }
1469 
1470         (*pRbEq)[0].add(cy);
1471         start++;
1472     }
1473 
1474     UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1475 
1476     for (UINT_32 i = 0; i < numBits; i++)
1477     {
1478         UINT_32 idx =
1479             start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1480 
1481         if ((i % 2) == 1)
1482         {
1483             (*pRbEq)[idx].add(cx);
1484             cx++;
1485         }
1486         else
1487         {
1488             (*pRbEq)[idx].add(cy);
1489             cy++;
1490         }
1491     }
1492 }
1493 
1494 /**
1495 ************************************************************************************************************************
1496 *   Gfx9Lib::GetDataEquation
1497 *
1498 *   @brief
1499 *       Get data equation for fmask and Z
1500 *   @return
1501 *       N/A
1502 ************************************************************************************************************************
1503 */
GetDataEquation(CoordEq * pDataEq,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2) const1504 VOID Gfx9Lib::GetDataEquation(
1505     CoordEq* pDataEq,               ///< [out] data surface equation
1506     Gfx9DataType dataSurfaceType,   ///< [in] data surface type
1507     AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
1508     AddrResourceType resourceType,  ///< [in] data surface resource type
1509     UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
1510     UINT_32 numSamplesLog2)         ///< [in] data surface sample count
1511     const
1512 {
1513     Coordinate cx(DIM_X, 0);
1514     Coordinate cy(DIM_Y, 0);
1515     Coordinate cz(DIM_Z, 0);
1516     Coordinate cs(DIM_S, 0);
1517 
1518     // Clear the equation
1519     pDataEq->resize(0);
1520     pDataEq->resize(27);
1521 
1522     if (dataSurfaceType == Gfx9DataColor)
1523     {
1524         if (IsLinear(swizzleMode))
1525         {
1526             Coordinate cm(DIM_M, 0);
1527 
1528             pDataEq->resize(49);
1529 
1530             for (UINT_32 i = 0; i < 49; i++)
1531             {
1532                 (*pDataEq)[i].add(cm);
1533                 cm++;
1534             }
1535         }
1536         else if (IsThick(resourceType, swizzleMode))
1537         {
1538             // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1539             UINT_32 i;
1540             if (IsStandardSwizzle(resourceType, swizzleMode))
1541             {
1542                 // Standard 3d swizzle
1543                 // Fill in bottom x bits
1544                 for (i = elementBytesLog2; i < 4; i++)
1545                 {
1546                     (*pDataEq)[i].add(cx);
1547                     cx++;
1548                 }
1549                 // Fill in 2 bits of y and then z
1550                 for (i = 4; i < 6; i++)
1551                 {
1552                     (*pDataEq)[i].add(cy);
1553                     cy++;
1554                 }
1555                 for (i = 6; i < 8; i++)
1556                 {
1557                     (*pDataEq)[i].add(cz);
1558                     cz++;
1559                 }
1560                 if (elementBytesLog2 < 2)
1561                 {
1562                     // fill in z & y bit
1563                     (*pDataEq)[8].add(cz);
1564                     (*pDataEq)[9].add(cy);
1565                     cz++;
1566                     cy++;
1567                 }
1568                 else if (elementBytesLog2 == 2)
1569                 {
1570                     // fill in y and x bit
1571                     (*pDataEq)[8].add(cy);
1572                     (*pDataEq)[9].add(cx);
1573                     cy++;
1574                     cx++;
1575                 }
1576                 else
1577                 {
1578                     // fill in 2 x bits
1579                     (*pDataEq)[8].add(cx);
1580                     cx++;
1581                     (*pDataEq)[9].add(cx);
1582                     cx++;
1583                 }
1584             }
1585             else
1586             {
1587                 // Z 3d swizzle
1588                 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1589                 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1590                                 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1591                 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1592                 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1593                 {
1594                     (*pDataEq)[i].add(cz);
1595                     cz++;
1596                 }
1597                 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1598                 {
1599                     // add an x and z
1600                     (*pDataEq)[6].add(cx);
1601                     (*pDataEq)[7].add(cz);
1602                     cx++;
1603                     cz++;
1604                 }
1605                 else if (elementBytesLog2 == 2)
1606                 {
1607                     // add a y and z
1608                     (*pDataEq)[6].add(cy);
1609                     (*pDataEq)[7].add(cz);
1610                     cy++;
1611                     cz++;
1612                 }
1613                 // add y and x
1614                 (*pDataEq)[8].add(cy);
1615                 (*pDataEq)[9].add(cx);
1616                 cy++;
1617                 cx++;
1618             }
1619             // Fill in bit 10 and up
1620             pDataEq->mort3d( cz, cy, cx, 10 );
1621         }
1622         else if (IsThin(resourceType, swizzleMode))
1623         {
1624             UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1625             // Color 2D
1626             UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1627             UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1628             UINT_32 i;
1629             // Fill in bottom x bits
1630             for (i = elementBytesLog2; i < 4; i++)
1631             {
1632                 (*pDataEq)[i].add(cx);
1633                 cx++;
1634             }
1635             // Fill in bottom y bits
1636             for (i = 4; i < 4 + microYBits; i++)
1637             {
1638                 (*pDataEq)[i].add(cy);
1639                 cy++;
1640             }
1641             // Fill in last of the micro_x bits
1642             for (i = 4 + microYBits; i < 8; i++)
1643             {
1644                 (*pDataEq)[i].add(cx);
1645                 cx++;
1646             }
1647             // Fill in x/y bits below sample split
1648             pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1649             // Fill in sample bits
1650             for (i = 0; i < numSamplesLog2; i++)
1651             {
1652                 cs.set(DIM_S, i);
1653                 (*pDataEq)[tileSplitStart + i].add(cs);
1654             }
1655             // Fill in x/y bits above sample split
1656             if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1657             {
1658                 pDataEq->mort2d(cx, cy, blockSizeLog2);
1659             }
1660             else
1661             {
1662                 pDataEq->mort2d(cy, cx, blockSizeLog2);
1663             }
1664         }
1665         else
1666         {
1667             ADDR_ASSERT_ALWAYS();
1668         }
1669     }
1670     else
1671     {
1672         // Fmask or depth
1673         UINT_32 sampleStart = elementBytesLog2;
1674         UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1675         UINT_32 ymajStart = 6 + numSamplesLog2;
1676 
1677         for (UINT_32 s = 0; s < numSamplesLog2; s++)
1678         {
1679             cs.set(DIM_S, s);
1680             (*pDataEq)[sampleStart + s].add(cs);
1681         }
1682 
1683         // Put in the x-major order pixel bits
1684         pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1685         // Put in the y-major order pixel bits
1686         pDataEq->mort2d(cy, cx, ymajStart);
1687     }
1688 }
1689 
1690 /**
1691 ************************************************************************************************************************
1692 *   Gfx9Lib::GetPipeEquation
1693 *
1694 *   @brief
1695 *       Get pipe equation
1696 *   @return
1697 *       N/A
1698 ************************************************************************************************************************
1699 */
GetPipeEquation(CoordEq * pPipeEq,CoordEq * pDataEq,UINT_32 pipeInterleaveLog2,UINT_32 numPipeLog2,UINT_32 numSamplesLog2,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType) const1700 VOID Gfx9Lib::GetPipeEquation(
1701     CoordEq*         pPipeEq,            ///< [out] pipe equation
1702     CoordEq*         pDataEq,            ///< [in] data equation
1703     UINT_32          pipeInterleaveLog2, ///< [in] pipe interleave
1704     UINT_32          numPipeLog2,        ///< [in] number of pipes
1705     UINT_32          numSamplesLog2,     ///< [in] data surface sample count
1706     Gfx9DataType     dataSurfaceType,    ///< [in] data surface type
1707     AddrSwizzleMode  swizzleMode,        ///< [in] data surface swizzle mode
1708     AddrResourceType resourceType        ///< [in] data surface resource type
1709     ) const
1710 {
1711     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1712     CoordEq dataEq;
1713 
1714     pDataEq->copy(dataEq);
1715 
1716     if (dataSurfaceType == Gfx9DataColor)
1717     {
1718         INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1719         dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1720     }
1721 
1722     dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1723 
1724     // This section should only apply to z/stencil, maybe fmask
1725     // If the pipe bit is below the comp block size,
1726     // then keep moving up the address until we find a bit that is above
1727     UINT_32 pipeStart = 0;
1728 
1729     if (dataSurfaceType != Gfx9DataColor)
1730     {
1731         Coordinate tileMin(DIM_X, 3);
1732 
1733         while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1734         {
1735             pipeStart++;
1736         }
1737 
1738         // if pipe is 0, then the first pipe bit is above the comp block size,
1739         // so we don't need to do anything
1740         // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1741         // we will get the same pipe equation
1742         if (pipeStart != 0)
1743         {
1744             for (UINT_32 i = 0; i < numPipeLog2; i++)
1745             {
1746                 // Copy the jth bit above pipe interleave to the current pipe equation bit
1747                 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1748             }
1749         }
1750     }
1751 
1752     if (IsPrt(swizzleMode))
1753     {
1754         // Clear out bits above the block size if prt's are enabled
1755         dataEq.resize(blockSizeLog2);
1756         dataEq.resize(48);
1757     }
1758 
1759     if (IsXor(swizzleMode))
1760     {
1761         CoordEq xorMask;
1762 
1763         if (IsThick(resourceType, swizzleMode))
1764         {
1765             CoordEq xorMask2;
1766 
1767             dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1768 
1769             xorMask.resize(numPipeLog2);
1770 
1771             for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1772             {
1773                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1774                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1775             }
1776         }
1777         else
1778         {
1779             // Xor in the bits above the pipe+gpu bits
1780             dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1781 
1782             if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1783             {
1784                 Coordinate co;
1785                 CoordEq xorMask2;
1786                 // if 1xaa and not prt, then xor in the z bits
1787                 xorMask2.resize(0);
1788                 xorMask2.resize(numPipeLog2);
1789                 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1790                 {
1791                     co.set(DIM_Z, numPipeLog2 - 1 - pipeIdx);
1792                     xorMask2[pipeIdx].add(co);
1793                 }
1794 
1795                 pPipeEq->xorin(xorMask2);
1796             }
1797         }
1798 
1799         xorMask.reverse();
1800         pPipeEq->xorin(xorMask);
1801     }
1802 }
1803 /**
1804 ************************************************************************************************************************
1805 *   Gfx9Lib::GetMetaEquation
1806 *
1807 *   @brief
1808 *       Get meta equation for cmask/htile/DCC
1809 *   @return
1810 *       Pointer to a calculated meta equation
1811 ************************************************************************************************************************
1812 */
GetMetaEquation(const MetaEqParams & metaEqParams)1813 const CoordEq* Gfx9Lib::GetMetaEquation(
1814     const MetaEqParams& metaEqParams)
1815 {
1816     UINT_32 cachedMetaEqIndex;
1817 
1818     for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1819     {
1820         if (memcmp(&metaEqParams,
1821                    &m_cachedMetaEqKey[cachedMetaEqIndex],
1822                    static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1823         {
1824             break;
1825         }
1826     }
1827 
1828     CoordEq* pMetaEq = NULL;
1829 
1830     if (cachedMetaEqIndex < MaxCachedMetaEq)
1831     {
1832         pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1833     }
1834     else
1835     {
1836         m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1837 
1838         pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1839 
1840         m_metaEqOverrideIndex %= MaxCachedMetaEq;
1841 
1842         GenMetaEquation(pMetaEq,
1843                         metaEqParams.maxMip,
1844                         metaEqParams.elementBytesLog2,
1845                         metaEqParams.numSamplesLog2,
1846                         metaEqParams.metaFlag,
1847                         metaEqParams.dataSurfaceType,
1848                         metaEqParams.swizzleMode,
1849                         metaEqParams.resourceType,
1850                         metaEqParams.metaBlkWidthLog2,
1851                         metaEqParams.metaBlkHeightLog2,
1852                         metaEqParams.metaBlkDepthLog2,
1853                         metaEqParams.compBlkWidthLog2,
1854                         metaEqParams.compBlkHeightLog2,
1855                         metaEqParams.compBlkDepthLog2);
1856     }
1857 
1858     return pMetaEq;
1859 }
1860 
1861 /**
1862 ************************************************************************************************************************
1863 *   Gfx9Lib::GenMetaEquation
1864 *
1865 *   @brief
1866 *       Get meta equation for cmask/htile/DCC
1867 *   @return
1868 *       N/A
1869 ************************************************************************************************************************
1870 */
GenMetaEquation(CoordEq * pMetaEq,UINT_32 maxMip,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2,ADDR2_META_FLAGS metaFlag,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 metaBlkWidthLog2,UINT_32 metaBlkHeightLog2,UINT_32 metaBlkDepthLog2,UINT_32 compBlkWidthLog2,UINT_32 compBlkHeightLog2,UINT_32 compBlkDepthLog2) const1871 VOID Gfx9Lib::GenMetaEquation(
1872     CoordEq*         pMetaEq,               ///< [out] meta equation
1873     UINT_32          maxMip,                ///< [in] max mip Id
1874     UINT_32          elementBytesLog2,      ///< [in] data surface element bytes
1875     UINT_32          numSamplesLog2,        ///< [in] data surface sample count
1876     ADDR2_META_FLAGS metaFlag,              ///< [in] meta falg
1877     Gfx9DataType     dataSurfaceType,       ///< [in] data surface type
1878     AddrSwizzleMode  swizzleMode,           ///< [in] data surface swizzle mode
1879     AddrResourceType resourceType,          ///< [in] data surface resource type
1880     UINT_32          metaBlkWidthLog2,      ///< [in] meta block width
1881     UINT_32          metaBlkHeightLog2,     ///< [in] meta block height
1882     UINT_32          metaBlkDepthLog2,      ///< [in] meta block depth
1883     UINT_32          compBlkWidthLog2,      ///< [in] compress block width
1884     UINT_32          compBlkHeightLog2,     ///< [in] compress block height
1885     UINT_32          compBlkDepthLog2)      ///< [in] compress block depth
1886     const
1887 {
1888     UINT_32 numPipeTotalLog2   = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1889     UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1890 
1891     // Get the correct data address and rb equation
1892     CoordEq dataEq;
1893     GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1894                     elementBytesLog2, numSamplesLog2);
1895 
1896     // Get pipe and rb equations
1897     CoordEq pipeEquation;
1898     GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1899                     numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1900     numPipeTotalLog2 = pipeEquation.getsize();
1901 
1902     if (metaFlag.linear)
1903     {
1904         // Linear metadata supporting was removed for GFX9! No one can use this feature.
1905         ADDR_ASSERT_ALWAYS();
1906 
1907         ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1908 
1909         dataEq.copy(*pMetaEq);
1910 
1911         if (IsLinear(swizzleMode))
1912         {
1913             if (metaFlag.pipeAligned)
1914             {
1915                 // Remove the pipe bits
1916                 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1917                 pMetaEq->shift(-shift, pipeInterleaveLog2);
1918             }
1919             // Divide by comp block size, which for linear (which is always color) is 256 B
1920             pMetaEq->shift(-8);
1921 
1922             if (metaFlag.pipeAligned)
1923             {
1924                 // Put pipe bits back in
1925                 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1926 
1927                 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1928                 {
1929                     pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1930                 }
1931             }
1932         }
1933 
1934         pMetaEq->shift(1);
1935     }
1936     else
1937     {
1938         UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1939         UINT_32 compFragLog2 =
1940             ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1941             maxCompFragLog2 : numSamplesLog2;
1942 
1943         UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1944 
1945         // Make sure the metaaddr is cleared
1946         pMetaEq->resize(0);
1947         pMetaEq->resize(27);
1948 
1949         if (IsThick(resourceType, swizzleMode))
1950         {
1951             Coordinate cx(DIM_X, 0);
1952             Coordinate cy(DIM_Y, 0);
1953             Coordinate cz(DIM_Z, 0);
1954 
1955             if (maxMip > 0)
1956             {
1957                 pMetaEq->mort3d(cy, cx, cz);
1958             }
1959             else
1960             {
1961                 pMetaEq->mort3d(cx, cy, cz);
1962             }
1963         }
1964         else
1965         {
1966             Coordinate cx(DIM_X, 0);
1967             Coordinate cy(DIM_Y, 0);
1968             Coordinate cs;
1969 
1970             if (maxMip > 0)
1971             {
1972                 pMetaEq->mort2d(cy, cx, compFragLog2);
1973             }
1974             else
1975             {
1976                 pMetaEq->mort2d(cx, cy, compFragLog2);
1977             }
1978 
1979             //------------------------------------------------------------------------------------------------------------------------
1980             // Put the compressible fragments at the lsb
1981             // the uncompressible frags will be at the msb of the micro address
1982             //------------------------------------------------------------------------------------------------------------------------
1983             for (UINT_32 s = 0; s < compFragLog2; s++)
1984             {
1985                 cs.set(DIM_S, s);
1986                 (*pMetaEq)[s].add(cs);
1987             }
1988         }
1989 
1990         // Keep a copy of the pipe equations
1991         CoordEq origPipeEquation;
1992         pipeEquation.copy(origPipeEquation);
1993 
1994         Coordinate co;
1995         // filter out everything under the compressed block size
1996         co.set(DIM_X, compBlkWidthLog2);
1997         pMetaEq->Filter('<', co, 0, DIM_X);
1998         co.set(DIM_Y, compBlkHeightLog2);
1999         pMetaEq->Filter('<', co, 0, DIM_Y);
2000         co.set(DIM_Z, compBlkDepthLog2);
2001         pMetaEq->Filter('<', co, 0, DIM_Z);
2002 
2003         // For non-color, filter out sample bits
2004         if (dataSurfaceType != Gfx9DataColor)
2005         {
2006             co.set(DIM_X, 0);
2007             pMetaEq->Filter('<', co, 0, DIM_S);
2008         }
2009 
2010         // filter out everything above the metablock size
2011         co.set(DIM_X, metaBlkWidthLog2 - 1);
2012         pMetaEq->Filter('>', co, 0, DIM_X);
2013         co.set(DIM_Y, metaBlkHeightLog2 - 1);
2014         pMetaEq->Filter('>', co, 0, DIM_Y);
2015         co.set(DIM_Z, metaBlkDepthLog2 - 1);
2016         pMetaEq->Filter('>', co, 0, DIM_Z);
2017 
2018         // filter out everything above the metablock size for the channel bits
2019         co.set(DIM_X, metaBlkWidthLog2 - 1);
2020         pipeEquation.Filter('>', co, 0, DIM_X);
2021         co.set(DIM_Y, metaBlkHeightLog2 - 1);
2022         pipeEquation.Filter('>', co, 0, DIM_Y);
2023         co.set(DIM_Z, metaBlkDepthLog2 - 1);
2024         pipeEquation.Filter('>', co, 0, DIM_Z);
2025 
2026         // Make sure we still have the same number of channel bits
2027         if (pipeEquation.getsize() != numPipeTotalLog2)
2028         {
2029             ADDR_ASSERT_ALWAYS();
2030         }
2031 
2032         // Loop through all channel and rb bits,
2033         // and make sure these components exist in the metadata address
2034         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2035         {
2036             for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
2037             {
2038                 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
2039                 {
2040                     ADDR_ASSERT_ALWAYS();
2041                 }
2042             }
2043         }
2044 
2045         const UINT_32 numSeLog2     = metaFlag.rbAligned ? m_seLog2      : 0;
2046         const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
2047         const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
2048         CoordEq       origRbEquation;
2049 
2050         GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
2051 
2052         CoordEq rbEquation = origRbEquation;
2053 
2054         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2055         {
2056             for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
2057             {
2058                 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
2059                 {
2060                     ADDR_ASSERT_ALWAYS();
2061                 }
2062             }
2063         }
2064 
2065         if (m_settings.applyAliasFix)
2066         {
2067             co.set(DIM_Z, -1);
2068         }
2069 
2070         // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
2071         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2072         {
2073             for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
2074             {
2075                 BOOL_32 isRbEquationInPipeEquation = FALSE;
2076 
2077                 if (m_settings.applyAliasFix)
2078                 {
2079                     CoordTerm filteredPipeEq;
2080                     filteredPipeEq = pipeEquation[j];
2081 
2082                     filteredPipeEq.Filter('>', co, 0, DIM_Z);
2083 
2084                     isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
2085                 }
2086                 else
2087                 {
2088                     isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
2089                 }
2090 
2091                 if (isRbEquationInPipeEquation)
2092                 {
2093                     rbEquation[i].Clear();
2094                 }
2095             }
2096         }
2097 
2098          bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
2099 
2100         // Loop through each bit of the channel, get the smallest coordinate,
2101         // and remove it from the metaaddr, and rb_equation
2102         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2103         {
2104             pipeEquation[i].getsmallest(co);
2105 
2106             UINT_32 old_size = pMetaEq->getsize();
2107             pMetaEq->Filter('=', co);
2108             UINT_32 new_size = pMetaEq->getsize();
2109             if (new_size != old_size-1)
2110             {
2111                 ADDR_ASSERT_ALWAYS();
2112             }
2113             pipeEquation.remove(co);
2114             for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2115             {
2116                 if (rbEquation[j].remove(co))
2117                 {
2118                     // if we actually removed something from this bit, then add the remaining
2119                     // channel bits, as these can be removed for this bit
2120                     for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2121                     {
2122                         if (pipeEquation[i][k] != co)
2123                         {
2124                             rbEquation[j].add(pipeEquation[i][k]);
2125                             rbAppendedWithPipeBits[j] = true;
2126                         }
2127                     }
2128                 }
2129             }
2130         }
2131 
2132         // Loop through the rb bits and see what remain;
2133         // filter out the smallest coordinate if it remains
2134         UINT_32 rbBitsLeft = 0;
2135         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2136         {
2137             BOOL_32 isRbEqAppended = FALSE;
2138 
2139             if (m_settings.applyAliasFix)
2140             {
2141                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2142             }
2143             else
2144             {
2145                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2146             }
2147 
2148             if (isRbEqAppended)
2149             {
2150                 rbBitsLeft++;
2151                 rbEquation[i].getsmallest(co);
2152                 UINT_32 old_size = pMetaEq->getsize();
2153                 pMetaEq->Filter('=', co);
2154                 UINT_32 new_size = pMetaEq->getsize();
2155                 if (new_size != old_size - 1)
2156                 {
2157                     // assert warning
2158                 }
2159                 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2160                 {
2161                     if (rbEquation[j].remove(co))
2162                     {
2163                         // if we actually removed something from this bit, then add the remaining
2164                         // rb bits, as these can be removed for this bit
2165                         for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2166                         {
2167                             if (rbEquation[i][k] != co)
2168                             {
2169                                 rbEquation[j].add(rbEquation[i][k]);
2170                                 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2171                             }
2172                         }
2173                     }
2174                 }
2175             }
2176         }
2177 
2178         // capture the size of the metaaddr
2179         UINT_32 metaSize = pMetaEq->getsize();
2180         // resize to 49 bits...make this a nibble address
2181         pMetaEq->resize(49);
2182         // Concatenate the macro address above the current address
2183         for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2184         {
2185             co.set(DIM_M, j);
2186             (*pMetaEq)[i].add(co);
2187         }
2188 
2189         // Multiply by meta element size (in nibbles)
2190         if (dataSurfaceType == Gfx9DataColor)
2191         {
2192             pMetaEq->shift(1);
2193         }
2194         else if (dataSurfaceType == Gfx9DataDepthStencil)
2195         {
2196             pMetaEq->shift(3);
2197         }
2198 
2199         //------------------------------------------------------------------------------------------
2200         // Note the pipeInterleaveLog2+1 is because address is a nibble address
2201         // Shift up from pipe interleave number of channel
2202         // and rb bits left, and uncompressed fragments
2203         //------------------------------------------------------------------------------------------
2204 
2205         pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2206 
2207         // Put in the channel bits
2208         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2209         {
2210             origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2211         }
2212 
2213         // Put in remaining rb bits
2214         for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2215         {
2216             BOOL_32 isRbEqAppended = FALSE;
2217 
2218             if (m_settings.applyAliasFix)
2219             {
2220                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2221             }
2222             else
2223             {
2224                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2225             }
2226 
2227             if (isRbEqAppended)
2228             {
2229                 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2230                 // Mark any rb bit we add in to the rb mask
2231                 j++;
2232             }
2233         }
2234 
2235         //------------------------------------------------------------------------------------------
2236         // Put in the uncompressed fragment bits
2237         //------------------------------------------------------------------------------------------
2238         for (UINT_32 i = 0; i < uncompFragLog2; i++)
2239         {
2240             co.set(DIM_S, compFragLog2 + i);
2241             (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2242         }
2243     }
2244 }
2245 
2246 /**
2247 ************************************************************************************************************************
2248 *   Gfx9Lib::IsEquationSupported
2249 *
2250 *   @brief
2251 *       Check if equation is supported for given swizzle mode and resource type.
2252 *
2253 *   @return
2254 *       TRUE if supported
2255 ************************************************************************************************************************
2256 */
IsEquationSupported(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2) const2257 BOOL_32 Gfx9Lib::IsEquationSupported(
2258     AddrResourceType rsrcType,
2259     AddrSwizzleMode  swMode,
2260     UINT_32          elementBytesLog2) const
2261 {
2262     BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2263                         (IsValidSwMode(swMode) == TRUE) &&
2264                         (IsLinear(swMode) == FALSE) &&
2265                         (((IsTex2d(rsrcType) == TRUE) &&
2266                           ((elementBytesLog2 < 4) ||
2267                            ((IsRotateSwizzle(swMode) == FALSE) &&
2268                             (IsZOrderSwizzle(swMode) == FALSE)))) ||
2269                          ((IsTex3d(rsrcType) == TRUE) &&
2270                           (IsRotateSwizzle(swMode) == FALSE) &&
2271                           (IsBlock256b(swMode) == FALSE)));
2272 
2273     return supported;
2274 }
2275 
2276 /**
2277 ************************************************************************************************************************
2278 *   Gfx9Lib::InitEquationTable
2279 *
2280 *   @brief
2281 *       Initialize Equation table.
2282 *
2283 *   @return
2284 *       N/A
2285 ************************************************************************************************************************
2286 */
InitEquationTable()2287 VOID Gfx9Lib::InitEquationTable()
2288 {
2289     memset(m_equationTable, 0, sizeof(m_equationTable));
2290 
2291     // Loop all possible resource type (2D/3D)
2292     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2293     {
2294         AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2295 
2296         // Loop all possible swizzle mode
2297         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
2298         {
2299             AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2300 
2301             // Loop all possible bpp
2302             for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2303             {
2304                 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2305 
2306                 // Check if the input is supported
2307                 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2308                 {
2309                     ADDR_EQUATION     equation;
2310                     ADDR_E_RETURNCODE retCode;
2311 
2312                     memset(&equation, 0, sizeof(ADDR_EQUATION));
2313 
2314                     // Generate the equation
2315                     if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2316                     {
2317                         retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2318                     }
2319                     else if (IsThin(rsrcType, swMode))
2320                     {
2321                         retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2322                     }
2323                     else
2324                     {
2325                         retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2326                     }
2327 
2328                     // Only fill the equation into the table if the return code is ADDR_OK,
2329                     // otherwise if the return code is not ADDR_OK, it indicates this is not
2330                     // a valid input, we do nothing but just fill invalid equation index
2331                     // into the lookup table.
2332                     if (retCode == ADDR_OK)
2333                     {
2334                         equationIndex = m_numEquations;
2335                         ADDR_ASSERT(equationIndex < EquationTableSize);
2336 
2337                         m_equationTable[equationIndex] = equation;
2338 
2339                         m_numEquations++;
2340                     }
2341                     else
2342                     {
2343                         ADDR_ASSERT_ALWAYS();
2344                     }
2345                 }
2346 
2347                 // Fill the index into the lookup table, if the combination is not supported
2348                 // fill the invalid equation index
2349                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2350             }
2351         }
2352     }
2353 }
2354 
2355 /**
2356 ************************************************************************************************************************
2357 *   Gfx9Lib::HwlGetEquationIndex
2358 *
2359 *   @brief
2360 *       Interface function stub of GetEquationIndex
2361 *
2362 *   @return
2363 *       ADDR_E_RETURNCODE
2364 ************************************************************************************************************************
2365 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const2366 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2367     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2368     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut
2369     ) const
2370 {
2371     AddrResourceType rsrcType         = pIn->resourceType;
2372     AddrSwizzleMode  swMode           = pIn->swizzleMode;
2373     UINT_32          elementBytesLog2 = Log2(pIn->bpp >> 3);
2374     UINT_32          index            = ADDR_INVALID_EQUATION_INDEX;
2375 
2376     if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2377     {
2378         UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2379         UINT_32 swModeIdx   = static_cast<UINT_32>(swMode);
2380 
2381         index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2382     }
2383 
2384     if (pOut->pMipInfo != NULL)
2385     {
2386         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2387         {
2388             pOut->pMipInfo[i].equationIndex = index;
2389         }
2390     }
2391 
2392     return index;
2393 }
2394 
2395 /**
2396 ************************************************************************************************************************
2397 *   Gfx9Lib::HwlComputeBlock256Equation
2398 *
2399 *   @brief
2400 *       Interface function stub of ComputeBlock256Equation
2401 *
2402 *   @return
2403 *       ADDR_E_RETURNCODE
2404 ************************************************************************************************************************
2405 */
HwlComputeBlock256Equation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2406 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2407     AddrResourceType rsrcType,
2408     AddrSwizzleMode  swMode,
2409     UINT_32          elementBytesLog2,
2410     ADDR_EQUATION*   pEquation) const
2411 {
2412     ADDR_E_RETURNCODE ret = ADDR_OK;
2413 
2414     pEquation->numBits = 8;
2415 
2416     UINT_32 i = 0;
2417     for (; i < elementBytesLog2; i++)
2418     {
2419         InitChannel(1, 0 , i, &pEquation->addr[i]);
2420     }
2421 
2422     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2423 
2424     const UINT_32 maxBitsUsed = 4;
2425     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2426     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2427 
2428     for (i = 0; i < maxBitsUsed; i++)
2429     {
2430         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2431         InitChannel(1, 1, i, &y[i]);
2432     }
2433 
2434     if (IsStandardSwizzle(rsrcType, swMode))
2435     {
2436         switch (elementBytesLog2)
2437         {
2438             case 0:
2439                 pixelBit[0] = x[0];
2440                 pixelBit[1] = x[1];
2441                 pixelBit[2] = x[2];
2442                 pixelBit[3] = x[3];
2443                 pixelBit[4] = y[0];
2444                 pixelBit[5] = y[1];
2445                 pixelBit[6] = y[2];
2446                 pixelBit[7] = y[3];
2447                 break;
2448             case 1:
2449                 pixelBit[0] = x[0];
2450                 pixelBit[1] = x[1];
2451                 pixelBit[2] = x[2];
2452                 pixelBit[3] = y[0];
2453                 pixelBit[4] = y[1];
2454                 pixelBit[5] = y[2];
2455                 pixelBit[6] = x[3];
2456                 break;
2457             case 2:
2458                 pixelBit[0] = x[0];
2459                 pixelBit[1] = x[1];
2460                 pixelBit[2] = y[0];
2461                 pixelBit[3] = y[1];
2462                 pixelBit[4] = y[2];
2463                 pixelBit[5] = x[2];
2464                 break;
2465             case 3:
2466                 pixelBit[0] = x[0];
2467                 pixelBit[1] = y[0];
2468                 pixelBit[2] = y[1];
2469                 pixelBit[3] = x[1];
2470                 pixelBit[4] = x[2];
2471                 break;
2472             case 4:
2473                 pixelBit[0] = y[0];
2474                 pixelBit[1] = y[1];
2475                 pixelBit[2] = x[0];
2476                 pixelBit[3] = x[1];
2477                 break;
2478             default:
2479                 ADDR_ASSERT_ALWAYS();
2480                 ret = ADDR_INVALIDPARAMS;
2481                 break;
2482         }
2483     }
2484     else if (IsDisplaySwizzle(rsrcType, swMode))
2485     {
2486         switch (elementBytesLog2)
2487         {
2488             case 0:
2489                 pixelBit[0] = x[0];
2490                 pixelBit[1] = x[1];
2491                 pixelBit[2] = x[2];
2492                 pixelBit[3] = y[1];
2493                 pixelBit[4] = y[0];
2494                 pixelBit[5] = y[2];
2495                 pixelBit[6] = x[3];
2496                 pixelBit[7] = y[3];
2497                 break;
2498             case 1:
2499                 pixelBit[0] = x[0];
2500                 pixelBit[1] = x[1];
2501                 pixelBit[2] = x[2];
2502                 pixelBit[3] = y[0];
2503                 pixelBit[4] = y[1];
2504                 pixelBit[5] = y[2];
2505                 pixelBit[6] = x[3];
2506                 break;
2507             case 2:
2508                 pixelBit[0] = x[0];
2509                 pixelBit[1] = x[1];
2510                 pixelBit[2] = y[0];
2511                 pixelBit[3] = x[2];
2512                 pixelBit[4] = y[1];
2513                 pixelBit[5] = y[2];
2514                 break;
2515             case 3:
2516                 pixelBit[0] = x[0];
2517                 pixelBit[1] = y[0];
2518                 pixelBit[2] = x[1];
2519                 pixelBit[3] = x[2];
2520                 pixelBit[4] = y[1];
2521                 break;
2522             case 4:
2523                 pixelBit[0] = x[0];
2524                 pixelBit[1] = y[0];
2525                 pixelBit[2] = x[1];
2526                 pixelBit[3] = y[1];
2527                 break;
2528             default:
2529                 ADDR_ASSERT_ALWAYS();
2530                 ret = ADDR_INVALIDPARAMS;
2531                 break;
2532         }
2533     }
2534     else if (IsRotateSwizzle(swMode))
2535     {
2536         switch (elementBytesLog2)
2537         {
2538             case 0:
2539                 pixelBit[0] = y[0];
2540                 pixelBit[1] = y[1];
2541                 pixelBit[2] = y[2];
2542                 pixelBit[3] = x[1];
2543                 pixelBit[4] = x[0];
2544                 pixelBit[5] = x[2];
2545                 pixelBit[6] = x[3];
2546                 pixelBit[7] = y[3];
2547                 break;
2548             case 1:
2549                 pixelBit[0] = y[0];
2550                 pixelBit[1] = y[1];
2551                 pixelBit[2] = y[2];
2552                 pixelBit[3] = x[0];
2553                 pixelBit[4] = x[1];
2554                 pixelBit[5] = x[2];
2555                 pixelBit[6] = x[3];
2556                 break;
2557             case 2:
2558                 pixelBit[0] = y[0];
2559                 pixelBit[1] = y[1];
2560                 pixelBit[2] = x[0];
2561                 pixelBit[3] = y[2];
2562                 pixelBit[4] = x[1];
2563                 pixelBit[5] = x[2];
2564                 break;
2565             case 3:
2566                 pixelBit[0] = y[0];
2567                 pixelBit[1] = x[0];
2568                 pixelBit[2] = y[1];
2569                 pixelBit[3] = x[1];
2570                 pixelBit[4] = x[2];
2571                 break;
2572             default:
2573                 ADDR_ASSERT_ALWAYS();
2574             case 4:
2575                 ret = ADDR_INVALIDPARAMS;
2576                 break;
2577         }
2578     }
2579     else
2580     {
2581         ADDR_ASSERT_ALWAYS();
2582         ret = ADDR_INVALIDPARAMS;
2583     }
2584 
2585     // Post validation
2586     if (ret == ADDR_OK)
2587     {
2588         Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2589         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2590                     (microBlockDim.w * (1 << elementBytesLog2)));
2591         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2592     }
2593 
2594     return ret;
2595 }
2596 
2597 /**
2598 ************************************************************************************************************************
2599 *   Gfx9Lib::HwlComputeThinEquation
2600 *
2601 *   @brief
2602 *       Interface function stub of ComputeThinEquation
2603 *
2604 *   @return
2605 *       ADDR_E_RETURNCODE
2606 ************************************************************************************************************************
2607 */
HwlComputeThinEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2608 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2609     AddrResourceType rsrcType,
2610     AddrSwizzleMode  swMode,
2611     UINT_32          elementBytesLog2,
2612     ADDR_EQUATION*   pEquation) const
2613 {
2614     ADDR_E_RETURNCODE ret = ADDR_OK;
2615 
2616     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2617 
2618     UINT_32 maxXorBits = blockSizeLog2;
2619     if (IsNonPrtXor(swMode))
2620     {
2621         // For non-prt-xor, maybe need to initialize some more bits for xor
2622         // The highest xor bit used in equation will be max the following 3 items:
2623         // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2624         // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2625         // 3. blockSizeLog2
2626 
2627         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2628         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2629                                      GetPipeXorBits(blockSizeLog2) +
2630                                      2 * GetBankXorBits(blockSizeLog2));
2631     }
2632 
2633     const UINT_32 maxBitsUsed = 14;
2634     ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2635     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2636     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2637 
2638     const UINT_32 extraXorBits = 16;
2639     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2640     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2641 
2642     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2643     {
2644         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2645         InitChannel(1, 1, i, &y[i]);
2646     }
2647 
2648     ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2649 
2650     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2651     {
2652         InitChannel(1, 0 , i, &pixelBit[i]);
2653     }
2654 
2655     UINT_32 xIdx = 0;
2656     UINT_32 yIdx = 0;
2657     UINT_32 lowBits = 0;
2658 
2659     if (IsZOrderSwizzle(swMode))
2660     {
2661         if (elementBytesLog2 <= 3)
2662         {
2663             for (UINT_32 i = elementBytesLog2; i < 6; i++)
2664             {
2665                 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2666             }
2667 
2668             lowBits = 6;
2669         }
2670         else
2671         {
2672             ret = ADDR_INVALIDPARAMS;
2673         }
2674     }
2675     else
2676     {
2677         ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2678 
2679         if (ret == ADDR_OK)
2680         {
2681             Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2682             xIdx = Log2(microBlockDim.w);
2683             yIdx = Log2(microBlockDim.h);
2684             lowBits = 8;
2685         }
2686     }
2687 
2688     if (ret == ADDR_OK)
2689     {
2690         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2691         {
2692             pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2693         }
2694 
2695         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2696         {
2697             xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2698         }
2699 
2700         if (IsXor(swMode))
2701         {
2702             // Fill XOR bits
2703             UINT_32 pipeStart = m_pipeInterleaveLog2;
2704             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2705 
2706             UINT_32 bankStart = pipeStart + pipeXorBits;
2707             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2708 
2709             for (UINT_32 i = 0; i < pipeXorBits; i++)
2710             {
2711                 UINT_32               xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2712                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2713                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2714 
2715                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2716             }
2717 
2718             for (UINT_32 i = 0; i < bankXorBits; i++)
2719             {
2720                 UINT_32               xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2721                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2722                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2723 
2724                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2725             }
2726 
2727             if (IsPrt(swMode) == FALSE)
2728             {
2729                 for (UINT_32 i = 0; i < pipeXorBits; i++)
2730                 {
2731                     InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2732                 }
2733 
2734                 for (UINT_32 i = 0; i < bankXorBits; i++)
2735                 {
2736                     InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2737                 }
2738             }
2739         }
2740 
2741         pEquation->numBits = blockSizeLog2;
2742     }
2743 
2744     return ret;
2745 }
2746 
2747 /**
2748 ************************************************************************************************************************
2749 *   Gfx9Lib::HwlComputeThickEquation
2750 *
2751 *   @brief
2752 *       Interface function stub of ComputeThickEquation
2753 *
2754 *   @return
2755 *       ADDR_E_RETURNCODE
2756 ************************************************************************************************************************
2757 */
HwlComputeThickEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2758 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2759     AddrResourceType rsrcType,
2760     AddrSwizzleMode  swMode,
2761     UINT_32          elementBytesLog2,
2762     ADDR_EQUATION*   pEquation) const
2763 {
2764     ADDR_E_RETURNCODE ret = ADDR_OK;
2765 
2766     ADDR_ASSERT(IsTex3d(rsrcType));
2767 
2768     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2769 
2770     UINT_32 maxXorBits = blockSizeLog2;
2771     if (IsNonPrtXor(swMode))
2772     {
2773         // For non-prt-xor, maybe need to initialize some more bits for xor
2774         // The highest xor bit used in equation will be max the following 3:
2775         // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2776         // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2777         // 3. blockSizeLog2
2778 
2779         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2780         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2781                                      GetPipeXorBits(blockSizeLog2) +
2782                                      3 * GetBankXorBits(blockSizeLog2));
2783     }
2784 
2785     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2786     {
2787         InitChannel(1, 0 , i, &pEquation->addr[i]);
2788     }
2789 
2790     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2791 
2792     const UINT_32 maxBitsUsed = 12;
2793     ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2794     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2795     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2796     ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2797 
2798     const UINT_32 extraXorBits = 24;
2799     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2800     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2801 
2802     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2803     {
2804         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2805         InitChannel(1, 1, i, &y[i]);
2806         InitChannel(1, 2, i, &z[i]);
2807     }
2808 
2809     if (IsZOrderSwizzle(swMode))
2810     {
2811         switch (elementBytesLog2)
2812         {
2813             case 0:
2814                 pixelBit[0]  = x[0];
2815                 pixelBit[1]  = y[0];
2816                 pixelBit[2]  = x[1];
2817                 pixelBit[3]  = y[1];
2818                 pixelBit[4]  = z[0];
2819                 pixelBit[5]  = z[1];
2820                 pixelBit[6]  = x[2];
2821                 pixelBit[7]  = z[2];
2822                 pixelBit[8]  = y[2];
2823                 pixelBit[9]  = x[3];
2824                 break;
2825             case 1:
2826                 pixelBit[0]  = x[0];
2827                 pixelBit[1]  = y[0];
2828                 pixelBit[2]  = x[1];
2829                 pixelBit[3]  = y[1];
2830                 pixelBit[4]  = z[0];
2831                 pixelBit[5]  = z[1];
2832                 pixelBit[6]  = z[2];
2833                 pixelBit[7]  = y[2];
2834                 pixelBit[8]  = x[2];
2835                 break;
2836             case 2:
2837                 pixelBit[0]  = x[0];
2838                 pixelBit[1]  = y[0];
2839                 pixelBit[2]  = x[1];
2840                 pixelBit[3]  = z[0];
2841                 pixelBit[4]  = y[1];
2842                 pixelBit[5]  = z[1];
2843                 pixelBit[6]  = y[2];
2844                 pixelBit[7]  = x[2];
2845                 break;
2846             case 3:
2847                 pixelBit[0]  = x[0];
2848                 pixelBit[1]  = y[0];
2849                 pixelBit[2]  = z[0];
2850                 pixelBit[3]  = x[1];
2851                 pixelBit[4]  = z[1];
2852                 pixelBit[5]  = y[1];
2853                 pixelBit[6]  = x[2];
2854                 break;
2855             case 4:
2856                 pixelBit[0]  = x[0];
2857                 pixelBit[1]  = y[0];
2858                 pixelBit[2]  = z[0];
2859                 pixelBit[3]  = z[1];
2860                 pixelBit[4]  = y[1];
2861                 pixelBit[5]  = x[1];
2862                 break;
2863             default:
2864                 ADDR_ASSERT_ALWAYS();
2865                 ret = ADDR_INVALIDPARAMS;
2866                 break;
2867         }
2868     }
2869     else if (IsStandardSwizzle(rsrcType, swMode))
2870     {
2871         switch (elementBytesLog2)
2872         {
2873             case 0:
2874                 pixelBit[0]  = x[0];
2875                 pixelBit[1]  = x[1];
2876                 pixelBit[2]  = x[2];
2877                 pixelBit[3]  = x[3];
2878                 pixelBit[4]  = y[0];
2879                 pixelBit[5]  = y[1];
2880                 pixelBit[6]  = z[0];
2881                 pixelBit[7]  = z[1];
2882                 pixelBit[8]  = z[2];
2883                 pixelBit[9]  = y[2];
2884                 break;
2885             case 1:
2886                 pixelBit[0]  = x[0];
2887                 pixelBit[1]  = x[1];
2888                 pixelBit[2]  = x[2];
2889                 pixelBit[3]  = y[0];
2890                 pixelBit[4]  = y[1];
2891                 pixelBit[5]  = z[0];
2892                 pixelBit[6]  = z[1];
2893                 pixelBit[7]  = z[2];
2894                 pixelBit[8]  = y[2];
2895                 break;
2896             case 2:
2897                 pixelBit[0]  = x[0];
2898                 pixelBit[1]  = x[1];
2899                 pixelBit[2]  = y[0];
2900                 pixelBit[3]  = y[1];
2901                 pixelBit[4]  = z[0];
2902                 pixelBit[5]  = z[1];
2903                 pixelBit[6]  = y[2];
2904                 pixelBit[7]  = x[2];
2905                 break;
2906             case 3:
2907                 pixelBit[0]  = x[0];
2908                 pixelBit[1]  = y[0];
2909                 pixelBit[2]  = y[1];
2910                 pixelBit[3]  = z[0];
2911                 pixelBit[4]  = z[1];
2912                 pixelBit[5]  = x[1];
2913                 pixelBit[6]  = x[2];
2914                 break;
2915             case 4:
2916                 pixelBit[0]  = y[0];
2917                 pixelBit[1]  = y[1];
2918                 pixelBit[2]  = z[0];
2919                 pixelBit[3]  = z[1];
2920                 pixelBit[4]  = x[0];
2921                 pixelBit[5]  = x[1];
2922                 break;
2923             default:
2924                 ADDR_ASSERT_ALWAYS();
2925                 ret = ADDR_INVALIDPARAMS;
2926                 break;
2927         }
2928     }
2929     else
2930     {
2931         ADDR_ASSERT_ALWAYS();
2932         ret = ADDR_INVALIDPARAMS;
2933     }
2934 
2935     if (ret == ADDR_OK)
2936     {
2937         Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2938         UINT_32 xIdx = Log2(microBlockDim.w);
2939         UINT_32 yIdx = Log2(microBlockDim.h);
2940         UINT_32 zIdx = Log2(microBlockDim.d);
2941 
2942         pixelBit = pEquation->addr;
2943 
2944         const UINT_32 lowBits = 10;
2945         ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2946         ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2947 
2948         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2949         {
2950             if ((i % 3) == 0)
2951             {
2952                 pixelBit[i] = x[xIdx++];
2953             }
2954             else if ((i % 3) == 1)
2955             {
2956                 pixelBit[i] = z[zIdx++];
2957             }
2958             else
2959             {
2960                 pixelBit[i] = y[yIdx++];
2961             }
2962         }
2963 
2964         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2965         {
2966             if ((i % 3) == 0)
2967             {
2968                 xorExtra[i - blockSizeLog2] = x[xIdx++];
2969             }
2970             else if ((i % 3) == 1)
2971             {
2972                 xorExtra[i - blockSizeLog2] = z[zIdx++];
2973             }
2974             else
2975             {
2976                 xorExtra[i - blockSizeLog2] = y[yIdx++];
2977             }
2978         }
2979 
2980         if (IsXor(swMode))
2981         {
2982             // Fill XOR bits
2983             UINT_32 pipeStart = m_pipeInterleaveLog2;
2984             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2985             for (UINT_32 i = 0; i < pipeXorBits; i++)
2986             {
2987                 UINT_32               xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2988                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2989                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2990 
2991                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2992 
2993                 UINT_32               xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2994                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2995                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2996 
2997                 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2998             }
2999 
3000             UINT_32 bankStart = pipeStart + pipeXorBits;
3001             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
3002             for (UINT_32 i = 0; i < bankXorBits; i++)
3003             {
3004                 UINT_32               xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
3005                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
3006                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
3007 
3008                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
3009 
3010                 UINT_32               xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
3011                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
3012                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
3013 
3014                 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
3015             }
3016         }
3017 
3018         pEquation->numBits = blockSizeLog2;
3019     }
3020 
3021     return ret;
3022 }
3023 
3024 /**
3025 ************************************************************************************************************************
3026 *   Gfx9Lib::IsValidDisplaySwizzleMode
3027 *
3028 *   @brief
3029 *       Check if a swizzle mode is supported by display engine
3030 *
3031 *   @return
3032 *       TRUE is swizzle mode is supported by display engine
3033 ************************************************************************************************************************
3034 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3035 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
3036     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3037 {
3038     BOOL_32 support = FALSE;
3039 
3040     const UINT_32 swizzleMask = 1 << pIn->swizzleMode;
3041 
3042     if (m_settings.isDce12)
3043     {
3044         if (pIn->bpp == 32)
3045         {
3046             support = (Dce12Bpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
3047         }
3048         else if (pIn->bpp <= 64)
3049         {
3050             support = (Dce12NonBpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
3051         }
3052     }
3053     else if (m_settings.isDcn1)
3054     {
3055         if (pIn->bpp < 64)
3056         {
3057             support = (Dcn1NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3058         }
3059         else if (pIn->bpp == 64)
3060         {
3061             support = (Dcn1Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3062         }
3063     }
3064     else if (m_settings.isDcn2)
3065     {
3066         if (pIn->bpp < 64)
3067         {
3068             support = (Dcn2NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3069         }
3070         else if (pIn->bpp == 64)
3071         {
3072             support = (Dcn2Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3073         }
3074     }
3075     else
3076     {
3077         ADDR_NOT_IMPLEMENTED();
3078     }
3079 
3080     return support;
3081 }
3082 
3083 /**
3084 ************************************************************************************************************************
3085 *   Gfx9Lib::HwlComputePipeBankXor
3086 *
3087 *   @brief
3088 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3089 *
3090 *   @return
3091 *       PipeBankXor value
3092 ************************************************************************************************************************
3093 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const3094 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3095     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3096     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
3097 {
3098     if (IsXor(pIn->swizzleMode))
3099     {
3100         UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3101         UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3102         UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3103 
3104         UINT_32 pipeXor = 0;
3105         UINT_32 bankXor = 0;
3106 
3107         const UINT_32 bankMask = (1 << bankBits) - 1;
3108         const UINT_32 index    = pIn->surfIndex & bankMask;
3109 
3110         const UINT_32 bpp      = pIn->flags.fmask ?
3111                                  GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3112         if (bankBits == 4)
3113         {
3114             static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3115             static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3116 
3117             bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3118         }
3119         else if (bankBits > 0)
3120         {
3121             UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3122             bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3123             bankXor = (index * bankIncrease) & bankMask;
3124         }
3125 
3126         pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3127     }
3128     else
3129     {
3130         pOut->pipeBankXor = 0;
3131     }
3132 
3133     return ADDR_OK;
3134 }
3135 
3136 /**
3137 ************************************************************************************************************************
3138 *   Gfx9Lib::HwlComputeSlicePipeBankXor
3139 *
3140 *   @brief
3141 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3142 *
3143 *   @return
3144 *       PipeBankXor value
3145 ************************************************************************************************************************
3146 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const3147 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3148     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3149     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
3150 {
3151     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3152     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3153     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3154 
3155     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3156     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3157 
3158     pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3159 
3160     return ADDR_OK;
3161 }
3162 
3163 /**
3164 ************************************************************************************************************************
3165 *   Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3166 *
3167 *   @brief
3168 *       Compute sub resource offset to support swizzle pattern
3169 *
3170 *   @return
3171 *       Offset
3172 ************************************************************************************************************************
3173 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const3174 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3175     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3176     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
3177 {
3178     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3179 
3180     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3181     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3182     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3183     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3184     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3185     UINT_32 pipeBankXor    = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3186 
3187     pOut->offset = pIn->slice * pIn->sliceSize +
3188                    pIn->macroBlockOffset +
3189                    (pIn->mipTailOffset ^ pipeBankXor) -
3190                    static_cast<UINT_64>(pipeBankXor);
3191     return ADDR_OK;
3192 }
3193 
3194 /**
3195 ************************************************************************************************************************
3196 *   Gfx9Lib::ValidateNonSwModeParams
3197 *
3198 *   @brief
3199 *       Validate compute surface info params except swizzle mode
3200 *
3201 *   @return
3202 *       TRUE if parameters are valid, FALSE otherwise
3203 ************************************************************************************************************************
3204 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3205 BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
3206     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3207 {
3208     BOOL_32 valid = TRUE;
3209 
3210     if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3211     {
3212         ADDR_ASSERT_ALWAYS();
3213         valid = FALSE;
3214     }
3215 
3216     if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
3217     {
3218         ADDR_ASSERT_ALWAYS();
3219         valid = FALSE;
3220     }
3221 
3222     const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3223     const BOOL_32 msaa   = (pIn->numFrags > 1);
3224     const BOOL_32 isBc   = ElemLib::IsBlockCompressed(pIn->format);
3225 
3226     const AddrResourceType rsrcType = pIn->resourceType;
3227     const BOOL_32          tex3d    = IsTex3d(rsrcType);
3228     const BOOL_32          tex2d    = IsTex2d(rsrcType);
3229     const BOOL_32          tex1d    = IsTex1d(rsrcType);
3230 
3231     const ADDR2_SURFACE_FLAGS flags   = pIn->flags;
3232     const BOOL_32             zbuffer = flags.depth || flags.stencil;
3233     const BOOL_32             display = flags.display || flags.rotated;
3234     const BOOL_32             stereo  = flags.qbStereo;
3235     const BOOL_32             fmask   = flags.fmask;
3236 
3237     // Resource type check
3238     if (tex1d)
3239     {
3240         if (msaa || zbuffer || display || stereo || isBc || fmask)
3241         {
3242             ADDR_ASSERT_ALWAYS();
3243             valid = FALSE;
3244         }
3245     }
3246     else if (tex2d)
3247     {
3248         if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
3249         {
3250             ADDR_ASSERT_ALWAYS();
3251             valid = FALSE;
3252         }
3253     }
3254     else if (tex3d)
3255     {
3256         if (msaa || zbuffer || display || stereo || fmask)
3257         {
3258             ADDR_ASSERT_ALWAYS();
3259             valid = FALSE;
3260         }
3261     }
3262     else
3263     {
3264         ADDR_ASSERT_ALWAYS();
3265         valid = FALSE;
3266     }
3267 
3268     return valid;
3269 }
3270 
3271 /**
3272 ************************************************************************************************************************
3273 *   Gfx9Lib::ValidateSwModeParams
3274 *
3275 *   @brief
3276 *       Validate compute surface info related to swizzle mode
3277 *
3278 *   @return
3279 *       TRUE if parameters are valid, FALSE otherwise
3280 ************************************************************************************************************************
3281 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3282 BOOL_32 Gfx9Lib::ValidateSwModeParams(
3283     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3284 {
3285     BOOL_32 valid = TRUE;
3286 
3287     if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
3288     {
3289         ADDR_ASSERT_ALWAYS();
3290         valid = FALSE;
3291     }
3292 
3293     const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3294     const BOOL_32 msaa   = (pIn->numFrags > 1);
3295     const BOOL_32 isBc   = ElemLib::IsBlockCompressed(pIn->format);
3296     const BOOL_32 is422  = ElemLib::IsMacroPixelPacked(pIn->format);
3297 
3298     const AddrResourceType rsrcType = pIn->resourceType;
3299     const BOOL_32          tex3d    = IsTex3d(rsrcType);
3300     const BOOL_32          tex2d    = IsTex2d(rsrcType);
3301     const BOOL_32          tex1d    = IsTex1d(rsrcType);
3302 
3303     const AddrSwizzleMode  swizzle     = pIn->swizzleMode;
3304     const BOOL_32          linear      = IsLinear(swizzle);
3305     const BOOL_32          blk256B     = IsBlock256b(swizzle);
3306     const BOOL_32          isNonPrtXor = IsNonPrtXor(swizzle);
3307 
3308     const ADDR2_SURFACE_FLAGS flags   = pIn->flags;
3309     const BOOL_32             zbuffer = flags.depth || flags.stencil;
3310     const BOOL_32             color   = flags.color;
3311     const BOOL_32             texture = flags.texture;
3312     const BOOL_32             display = flags.display || flags.rotated;
3313     const BOOL_32             prt     = flags.prt;
3314     const BOOL_32             fmask   = flags.fmask;
3315 
3316     const BOOL_32             thin3d  = tex3d && flags.view3dAs2dArray;
3317     const BOOL_32             zMaxMip = tex3d && mipmap &&
3318                                         (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);
3319 
3320     // Misc check
3321     if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3322     {
3323         // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3324         ADDR_ASSERT_ALWAYS();
3325         valid = FALSE;
3326     }
3327 
3328     if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
3329     {
3330         ADDR_ASSERT_ALWAYS();
3331         valid = FALSE;
3332     }
3333 
3334     if ((pIn->bpp == 96) && (linear == FALSE))
3335     {
3336         ADDR_ASSERT_ALWAYS();
3337         valid = FALSE;
3338     }
3339 
3340     if (prt && isNonPrtXor)
3341     {
3342         ADDR_ASSERT_ALWAYS();
3343         valid = FALSE;
3344     }
3345 
3346     // Resource type check
3347     if (tex1d)
3348     {
3349         if (linear == FALSE)
3350         {
3351             ADDR_ASSERT_ALWAYS();
3352             valid = FALSE;
3353         }
3354     }
3355 
3356     // Swizzle type check
3357     if (linear)
3358     {
3359         if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
3360             ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
3361         {
3362             ADDR_ASSERT_ALWAYS();
3363             valid = FALSE;
3364         }
3365     }
3366     else if (IsZOrderSwizzle(swizzle))
3367     {
3368         if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
3369         {
3370             ADDR_ASSERT_ALWAYS();
3371             valid = FALSE;
3372         }
3373     }
3374     else if (IsStandardSwizzle(swizzle))
3375     {
3376         if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
3377         {
3378             ADDR_ASSERT_ALWAYS();
3379             valid = FALSE;
3380         }
3381     }
3382     else if (IsDisplaySwizzle(swizzle))
3383     {
3384         if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
3385         {
3386             ADDR_ASSERT_ALWAYS();
3387             valid = FALSE;
3388         }
3389     }
3390     else if (IsRotateSwizzle(swizzle))
3391     {
3392         if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
3393         {
3394             ADDR_ASSERT_ALWAYS();
3395             valid = FALSE;
3396         }
3397     }
3398     else
3399     {
3400         ADDR_ASSERT_ALWAYS();
3401         valid = FALSE;
3402     }
3403 
3404     // Block type check
3405     if (blk256B)
3406     {
3407         if (prt || zbuffer || tex3d || mipmap || msaa)
3408         {
3409             ADDR_ASSERT_ALWAYS();
3410             valid = FALSE;
3411         }
3412     }
3413 
3414     return valid;
3415 }
3416 
3417 /**
3418 ************************************************************************************************************************
3419 *   Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3420 *
3421 *   @brief
3422 *       Compute surface info sanity check
3423 *
3424 *   @return
3425 *       ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3426 ************************************************************************************************************************
3427 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3428 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3429     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3430 {
3431     return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
3432 }
3433 
3434 /**
3435 ************************************************************************************************************************
3436 *   Gfx9Lib::HwlGetPreferredSurfaceSetting
3437 *
3438 *   @brief
3439 *       Internal function to get suggested surface information for cliet to use
3440 *
3441 *   @return
3442 *       ADDR_E_RETURNCODE
3443 ************************************************************************************************************************
3444 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const3445 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3446     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3447     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
3448 {
3449     ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
3450     ElemLib*          pElemLib   = GetElemLib();
3451 
3452     UINT_32 bpp        = pIn->bpp;
3453     UINT_32 width      = Max(pIn->width, 1u);
3454     UINT_32 height     = Max(pIn->height, 1u);
3455     UINT_32 numSamples = Max(pIn->numSamples, 1u);
3456     UINT_32 numFrags   = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3457 
3458     if (pIn->flags.fmask)
3459     {
3460         bpp                = GetFmaskBpp(numSamples, numFrags);
3461         numFrags           = 1;
3462         numSamples         = 1;
3463         pOut->resourceType = ADDR_RSRC_TEX_2D;
3464     }
3465     else
3466     {
3467         // Set format to INVALID will skip this conversion
3468         if (pIn->format != ADDR_FMT_INVALID)
3469         {
3470             UINT_32 expandX, expandY;
3471 
3472             // Don't care for this case
3473             ElemMode elemMode = ADDR_UNCOMPRESSED;
3474 
3475             // Get compression/expansion factors and element mode which indicates compression/expansion
3476             bpp = pElemLib->GetBitsPerPixel(pIn->format,
3477                                             &elemMode,
3478                                             &expandX,
3479                                             &expandY);
3480 
3481             UINT_32 basePitch = 0;
3482             GetElemLib()->AdjustSurfaceInfo(elemMode,
3483                                             expandX,
3484                                             expandY,
3485                                             &bpp,
3486                                             &basePitch,
3487                                             &width,
3488                                             &height);
3489         }
3490 
3491         // The output may get changed for volume(3D) texture resource in future
3492         pOut->resourceType = pIn->resourceType;
3493     }
3494 
3495     const UINT_32 numSlices    = Max(pIn->numSlices, 1u);
3496     const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3497     const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);
3498     const BOOL_32 displayRsrc  = pIn->flags.display || pIn->flags.rotated;
3499 
3500     // Pre sanity check on non swizzle mode parameters
3501     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3502     localIn.flags        = pIn->flags;
3503     localIn.resourceType = pOut->resourceType;
3504     localIn.format       = pIn->format;
3505     localIn.bpp          = bpp;
3506     localIn.width        = width;
3507     localIn.height       = height;
3508     localIn.numSlices    = numSlices;
3509     localIn.numMipLevels = numMipLevels;
3510     localIn.numSamples   = numSamples;
3511     localIn.numFrags     = numFrags;
3512 
3513     if (ValidateNonSwModeParams(&localIn))
3514     {
3515         // Forbid swizzle mode(s) by client setting
3516         ADDR2_SWMODE_SET allowedSwModeSet = {};
3517         allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3518         allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx9Blk256BSwModeMask;
3519         allowedSwModeSet.value |=
3520             pIn->forbiddenBlock.macroThin4KB ? 0 :
3521             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask);
3522         allowedSwModeSet.value |=
3523             pIn->forbiddenBlock.macroThick4KB ? 0 :
3524             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick4KBSwModeMask : 0);
3525         allowedSwModeSet.value |=
3526             pIn->forbiddenBlock.macroThin64KB ? 0 :
3527             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask);
3528         allowedSwModeSet.value |=
3529             pIn->forbiddenBlock.macroThick64KB ? 0 :
3530             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick64KBSwModeMask : 0);
3531 
3532         if (pIn->preferredSwSet.value != 0)
3533         {
3534             allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3535             allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3536             allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3537             allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3538         }
3539 
3540         if (pIn->noXor)
3541         {
3542             allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3543         }
3544 
3545         if (pIn->maxAlign > 0)
3546         {
3547             if (pIn->maxAlign < Size64K)
3548             {
3549                 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3550             }
3551 
3552             if (pIn->maxAlign < Size4K)
3553             {
3554                 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3555             }
3556 
3557             if (pIn->maxAlign < Size256)
3558             {
3559                 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3560             }
3561         }
3562 
3563         // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3564         switch (pOut->resourceType)
3565         {
3566             case ADDR_RSRC_TEX_1D:
3567                 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3568                 break;
3569 
3570             case ADDR_RSRC_TEX_2D:
3571                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3572 
3573                 if (bpp > 64)
3574                 {
3575                     allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3576                 }
3577                 break;
3578 
3579             case ADDR_RSRC_TEX_3D:
3580                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3581 
3582                 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3583                 {
3584                     // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3585                     // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3586                     // or SW_*_Z modes if mipmapping is desired on a 3D surface
3587                     allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3588                 }
3589 
3590                 if ((bpp == 128) && pIn->flags.color)
3591                 {
3592                     allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3593                 }
3594 
3595                 if (pIn->flags.view3dAs2dArray)
3596                 {
3597                     allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3598                 }
3599                 break;
3600 
3601             default:
3602                 ADDR_ASSERT_ALWAYS();
3603                 allowedSwModeSet.value = 0;
3604                 break;
3605         }
3606 
3607         if (pIn->format == ADDR_FMT_32_32_32)
3608         {
3609             allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3610         }
3611 
3612         if (ElemLib::IsBlockCompressed(pIn->format))
3613         {
3614             if (pIn->flags.texture)
3615             {
3616                 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3617             }
3618             else
3619             {
3620                 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3621             }
3622         }
3623 
3624         if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3625             (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3626         {
3627             allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3628         }
3629 
3630         if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3631         {
3632             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3633 
3634             if (pIn->flags.noMetadata == FALSE)
3635             {
3636                 if (pIn->flags.depth &&
3637                     pIn->flags.texture &&
3638                     (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3639                 {
3640                     // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3641                     // equation from wrong address within memory range a tile covered and use the
3642                     // garbage data for compressed Z reading which finally leads to corruption.
3643                     allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3644                 }
3645 
3646                 if (m_settings.htileCacheRbConflict &&
3647                     (pIn->flags.depth || pIn->flags.stencil) &&
3648                     (numSlices > 1) &&
3649                     (pIn->flags.metaRbUnaligned == FALSE) &&
3650                     (pIn->flags.metaPipeUnaligned == FALSE))
3651                 {
3652                     // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3653                     allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3654                 }
3655             }
3656         }
3657 
3658         if (msaa)
3659         {
3660             allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3661         }
3662 
3663         if ((numFrags > 1) &&
3664             (Size4K < (m_pipeInterleaveBytes * numFrags)))
3665         {
3666             // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3667             allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3668         }
3669 
3670         if (numMipLevels > 1)
3671         {
3672             allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3673         }
3674 
3675         if (displayRsrc)
3676         {
3677             if (m_settings.isDce12)
3678             {
3679                 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3680             }
3681             else if (m_settings.isDcn1)
3682             {
3683                 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3684             }
3685             else if (m_settings.isDcn2)
3686             {
3687                 allowedSwModeSet.value &= (bpp == 64) ? Dcn2Bpp64SwModeMask : Dcn2NonBpp64SwModeMask;
3688             }
3689             else
3690             {
3691                 ADDR_NOT_IMPLEMENTED();
3692             }
3693         }
3694 
3695         if (allowedSwModeSet.value != 0)
3696         {
3697 #if DEBUG
3698             // Post sanity check, at least AddrLib should accept the output generated by its own
3699             UINT_32 validateSwModeSet = allowedSwModeSet.value;
3700 
3701             for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3702             {
3703                 if (validateSwModeSet & 1)
3704                 {
3705                     localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3706                     ADDR_ASSERT(ValidateSwModeParams(&localIn));
3707                 }
3708 
3709                 validateSwModeSet >>= 1;
3710             }
3711 #endif
3712 
3713             pOut->validSwModeSet = allowedSwModeSet;
3714             pOut->canXor         = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3715             pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3716             pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3717 
3718             pOut->clientPreferredSwSet = pIn->preferredSwSet;
3719 
3720             if (pOut->clientPreferredSwSet.value == 0)
3721             {
3722                 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3723             }
3724 
3725             // Apply optional restrictions
3726             if (pIn->flags.needEquation)
3727             {
3728                 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
3729             }
3730 
3731             if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3732             {
3733                 pOut->swizzleMode = ADDR_SW_LINEAR;
3734             }
3735             else
3736             {
3737                 const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
3738 
3739                 if ((height > 1) && (computeMinSize == FALSE))
3740                 {
3741                     // Always ignore linear swizzle mode if:
3742                     // 1. This is a (2D/3D) resource with height > 1
3743                     // 2. Client doesn't require computing minimize size
3744                     allowedSwModeSet.swLinear = 0;
3745                 }
3746 
3747                 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3748 
3749                 // Determine block size if there are 2 or more block type candidates
3750                 if (IsPow2(allowedBlockSet.value) == FALSE)
3751                 {
3752                     AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
3753 
3754                     swMode[AddrBlockLinear]   = ADDR_SW_LINEAR;
3755                     swMode[AddrBlockMicro]    = ADDR_SW_256B_D;
3756                     swMode[AddrBlockThin4KB]  = ADDR_SW_4KB_D;
3757                     swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D;
3758 
3759                     if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3760                     {
3761                         swMode[AddrBlockThick4KB]  = ADDR_SW_4KB_S;
3762                         swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3763                     }
3764 
3765                     UINT_64 padSize[AddrBlockMaxTiledType] = {};
3766 
3767                     const UINT_32 ratioLow           = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
3768                     const UINT_32 ratioHi            = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
3769                     const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3770                     UINT_32       minSizeBlk         = AddrBlockMicro;
3771                     UINT_64       minSize            = 0;
3772 
3773                     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3774 
3775                     for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
3776                     {
3777                         if (IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
3778                         {
3779                             localIn.swizzleMode = swMode[i];
3780 
3781                             if (localIn.swizzleMode == ADDR_SW_LINEAR)
3782                             {
3783                                 returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
3784                             }
3785                             else
3786                             {
3787                                 returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
3788                             }
3789 
3790                             if (returnCode == ADDR_OK)
3791                             {
3792                                 padSize[i] = localOut.surfSize;
3793 
3794                                 if ((minSize == 0) ||
3795                                     BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi))
3796                                 {
3797                                     minSize    = padSize[i];
3798                                     minSizeBlk = i;
3799                                 }
3800                             }
3801                             else
3802                             {
3803                                 ADDR_ASSERT_ALWAYS();
3804                                 break;
3805                             }
3806                         }
3807                     }
3808 
3809                     if (pIn->memoryBudget > 1.0)
3810                     {
3811                         // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
3812                         // smaller-block type again in coming loop
3813                         switch (minSizeBlk)
3814                         {
3815                             case AddrBlockThick64KB:
3816                                 allowedBlockSet.macroThin64KB = 0;
3817                             case AddrBlockThin64KB:
3818                                 allowedBlockSet.macroThick4KB = 0;
3819                             case AddrBlockThick4KB:
3820                                 allowedBlockSet.macroThin4KB = 0;
3821                             case AddrBlockThin4KB:
3822                                 allowedBlockSet.micro  = 0;
3823                             case AddrBlockMicro:
3824                                 allowedBlockSet.linear = 0;
3825                             case AddrBlockLinear:
3826                                 break;
3827 
3828                             default:
3829                                 ADDR_ASSERT_ALWAYS();
3830                                 break;
3831                         }
3832 
3833                         for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3834                         {
3835                             if ((i != minSizeBlk) &&
3836                                 IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
3837                             {
3838                                 if (BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE)
3839                                 {
3840                                     // Clear the block type if the memory waste is unacceptable
3841                                     allowedBlockSet.value &= ~(1u << (i - 1));
3842                                 }
3843                             }
3844                         }
3845 
3846                         // Remove linear block type if 2 or more block types are allowed
3847                         if (IsPow2(allowedBlockSet.value) == FALSE)
3848                         {
3849                             allowedBlockSet.linear = 0;
3850                         }
3851 
3852                         // Select the biggest allowed block type
3853                         minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
3854 
3855                         if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
3856                         {
3857                             minSizeBlk = AddrBlockLinear;
3858                         }
3859                     }
3860 
3861                     switch (minSizeBlk)
3862                     {
3863                         case AddrBlockLinear:
3864                             allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3865                             break;
3866 
3867                         case AddrBlockMicro:
3868                             ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3869                             allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3870                             break;
3871 
3872                         case AddrBlockThin4KB:
3873                             allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3874                                                       Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask;
3875                             break;
3876 
3877                         case AddrBlockThick4KB:
3878                             ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3879                             allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask;
3880                             break;
3881 
3882                         case AddrBlockThin64KB:
3883                             allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3884                                                       Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask;
3885                             break;
3886 
3887                         case AddrBlockThick64KB:
3888                             ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3889                             allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask;
3890                             break;
3891 
3892                         default:
3893                             ADDR_ASSERT_ALWAYS();
3894                             allowedSwModeSet.value = 0;
3895                             break;
3896                     }
3897                 }
3898 
3899                 // Block type should be determined.
3900                 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3901 
3902                 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3903 
3904                 // Determine swizzle type if there are 2 or more swizzle type candidates
3905                 if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
3906                 {
3907                     if (ElemLib::IsBlockCompressed(pIn->format))
3908                     {
3909                         if (allowedSwSet.sw_D)
3910                         {
3911                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3912                         }
3913                         else
3914                         {
3915                             ADDR_ASSERT(allowedSwSet.sw_S);
3916                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3917                         }
3918                     }
3919                     else if (ElemLib::IsMacroPixelPacked(pIn->format))
3920                     {
3921                         if (allowedSwSet.sw_S)
3922                         {
3923                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3924                         }
3925                         else if (allowedSwSet.sw_D)
3926                         {
3927                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3928                         }
3929                         else
3930                         {
3931                             ADDR_ASSERT(allowedSwSet.sw_R);
3932                             allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3933                         }
3934                     }
3935                     else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3936                     {
3937                         if (pIn->flags.color && allowedSwSet.sw_D)
3938                         {
3939                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3940                         }
3941                         else if (allowedSwSet.sw_Z)
3942                         {
3943                             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3944                         }
3945                         else
3946                         {
3947                             ADDR_ASSERT(allowedSwSet.sw_S);
3948                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3949                         }
3950                     }
3951                     else
3952                     {
3953                         if (pIn->flags.rotated && allowedSwSet.sw_R)
3954                         {
3955                             allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3956                         }
3957                         else if (allowedSwSet.sw_D)
3958                         {
3959                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3960                         }
3961                         else if (allowedSwSet.sw_S)
3962                         {
3963                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3964                         }
3965                         else
3966                         {
3967                             ADDR_ASSERT(allowedSwSet.sw_Z);
3968                             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3969                         }
3970                     }
3971 
3972                     // Swizzle type should be determined.
3973                     ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3974                 }
3975 
3976                 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
3977                 // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3978                 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3979                 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3980             }
3981 
3982             returnCode = ADDR_OK;
3983         }
3984         else
3985         {
3986             // Invalid combination...
3987             ADDR_ASSERT_ALWAYS();
3988         }
3989     }
3990     else
3991     {
3992         // Invalid combination...
3993         ADDR_ASSERT_ALWAYS();
3994     }
3995 
3996     return returnCode;
3997 }
3998 
3999 /**
4000 ************************************************************************************************************************
4001 *   Gfx9Lib::ComputeStereoInfo
4002 *
4003 *   @brief
4004 *       Compute height alignment and right eye pipeBankXor for stereo surface
4005 *
4006 *   @return
4007 *       Error code
4008 *
4009 ************************************************************************************************************************
4010 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut,UINT_32 * pHeightAlign) const4011 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
4012     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
4013     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
4014     UINT_32*                                pHeightAlign
4015     ) const
4016 {
4017     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4018 
4019     UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
4020 
4021     if (eqIndex < m_numEquations)
4022     {
4023         if (IsXor(pIn->swizzleMode))
4024         {
4025             const UINT_32        blkSizeLog2       = GetBlockSizeLog2(pIn->swizzleMode);
4026             const UINT_32        numPipeBits       = GetPipeXorBits(blkSizeLog2);
4027             const UINT_32        numBankBits       = GetBankXorBits(blkSizeLog2);
4028             const UINT_32        bppLog2           = Log2(pIn->bpp >> 3);
4029             const UINT_32        maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
4030             const ADDR_EQUATION *pEqToCheck        = &m_equationTable[eqIndex];
4031 
4032             ADDR_ASSERT(maxYCoordBlock256 ==
4033                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], Log2Size256, 1));
4034 
4035             const UINT_32 maxYCoordInBaseEquation =
4036                 (blkSizeLog2 - Log2Size256) / 2 + maxYCoordBlock256;
4037 
4038             ADDR_ASSERT(maxYCoordInBaseEquation ==
4039                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
4040 
4041             const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
4042 
4043             ADDR_ASSERT(maxYCoordInPipeXor ==
4044                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
4045 
4046             const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
4047                                                0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
4048 
4049             ADDR_ASSERT(maxYCoordInBankXor ==
4050                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
4051 
4052             const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
4053 
4054             if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
4055             {
4056                 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
4057 
4058                 if (pOut->pStereoInfo != NULL)
4059                 {
4060                     pOut->pStereoInfo->rightSwizzle = 0;
4061 
4062                     if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
4063                     {
4064                         if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
4065                         {
4066                             pOut->pStereoInfo->rightSwizzle |= (1u << 1);
4067                         }
4068 
4069                         if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
4070                         {
4071                             pOut->pStereoInfo->rightSwizzle |=
4072                                 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
4073                         }
4074 
4075                         ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
4076                                     GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
4077                                                        numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
4078                     }
4079                 }
4080             }
4081         }
4082     }
4083     else
4084     {
4085         ADDR_ASSERT_ALWAYS();
4086         returnCode = ADDR_ERROR;
4087     }
4088 
4089     return returnCode;
4090 }
4091 
4092 /**
4093 ************************************************************************************************************************
4094 *   Gfx9Lib::HwlComputeSurfaceInfoTiled
4095 *
4096 *   @brief
4097 *       Internal function to calculate alignment for tiled surface
4098 *
4099 *   @return
4100 *       ADDR_E_RETURNCODE
4101 ************************************************************************************************************************
4102 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4103 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
4104      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4105      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4106      ) const
4107 {
4108     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
4109                                                                 &pOut->blockHeight,
4110                                                                 &pOut->blockSlices,
4111                                                                 pIn->bpp,
4112                                                                 pIn->numFrags,
4113                                                                 pIn->resourceType,
4114                                                                 pIn->swizzleMode);
4115 
4116     if (returnCode == ADDR_OK)
4117     {
4118         UINT_32 pitchAlignInElement = pOut->blockWidth;
4119 
4120         if ((IsTex2d(pIn->resourceType) == TRUE) &&
4121             (pIn->flags.display || pIn->flags.rotated) &&
4122             (pIn->numMipLevels <= 1) &&
4123             (pIn->numSamples <= 1) &&
4124             (pIn->numFrags <= 1))
4125         {
4126             // Display engine needs pitch align to be at least 32 pixels.
4127             pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
4128         }
4129 
4130         pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4131 
4132         if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
4133         {
4134             if ((pIn->pitchInElement % pitchAlignInElement) != 0)
4135             {
4136                 returnCode = ADDR_INVALIDPARAMS;
4137             }
4138             else if (pIn->pitchInElement < pOut->pitch)
4139             {
4140                 returnCode = ADDR_INVALIDPARAMS;
4141             }
4142             else
4143             {
4144                 pOut->pitch = pIn->pitchInElement;
4145             }
4146         }
4147 
4148         UINT_32 heightAlign = 0;
4149 
4150         if (pIn->flags.qbStereo)
4151         {
4152             returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
4153         }
4154 
4155         if (returnCode == ADDR_OK)
4156         {
4157             pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
4158 
4159             if (heightAlign > 1)
4160             {
4161                 pOut->height = PowTwoAlign(pOut->height, heightAlign);
4162             }
4163 
4164             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
4165 
4166             pOut->epitchIsHeight   = FALSE;
4167             pOut->mipChainInTail   = FALSE;
4168             pOut->firstMipIdInTail = pIn->numMipLevels;
4169 
4170             pOut->mipChainPitch    = pOut->pitch;
4171             pOut->mipChainHeight   = pOut->height;
4172             pOut->mipChainSlice    = pOut->numSlices;
4173 
4174             if (pIn->numMipLevels > 1)
4175             {
4176                 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
4177                                                          pIn->swizzleMode,
4178                                                          pIn->bpp,
4179                                                          pIn->width,
4180                                                          pIn->height,
4181                                                          pIn->numSlices,
4182                                                          pOut->blockWidth,
4183                                                          pOut->blockHeight,
4184                                                          pOut->blockSlices,
4185                                                          pIn->numMipLevels,
4186                                                          pOut->pMipInfo);
4187 
4188                 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4189 
4190                 if (endingMipId == 0)
4191                 {
4192                     const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4193                                                            pIn->swizzleMode,
4194                                                            pOut->blockWidth,
4195                                                            pOut->blockHeight,
4196                                                            pOut->blockSlices);
4197 
4198                     pOut->epitchIsHeight = TRUE;
4199                     pOut->pitch          = tailMaxDim.w;
4200                     pOut->height         = tailMaxDim.h;
4201                     pOut->numSlices      = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4202                                            tailMaxDim.d : pIn->numSlices;
4203                     pOut->mipChainInTail = TRUE;
4204                 }
4205                 else
4206                 {
4207                     UINT_32 mip0WidthInBlk  = pOut->pitch  / pOut->blockWidth;
4208                     UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4209 
4210                     AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4211                                                            pIn->swizzleMode,
4212                                                            mip0WidthInBlk,
4213                                                            mip0HeightInBlk,
4214                                                            pOut->numSlices / pOut->blockSlices);
4215                     if (majorMode == ADDR_MAJOR_Y)
4216                     {
4217                         UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4218 
4219                         if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4220                         {
4221                             mip1WidthInBlk++;
4222                         }
4223 
4224                         pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4225 
4226                         pOut->epitchIsHeight = FALSE;
4227                     }
4228                     else
4229                     {
4230                         UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4231 
4232                         if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4233                         {
4234                             mip1HeightInBlk++;
4235                         }
4236 
4237                         pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4238 
4239                         pOut->epitchIsHeight = TRUE;
4240                     }
4241                 }
4242 
4243                 if (pOut->pMipInfo != NULL)
4244                 {
4245                     UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4246 
4247                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4248                     {
4249                         Dim3d   mipStartPos          = {0};
4250                         UINT_32 mipTailOffsetInBytes = 0;
4251 
4252                         mipStartPos = GetMipStartPos(pIn->resourceType,
4253                                                      pIn->swizzleMode,
4254                                                      pOut->pitch,
4255                                                      pOut->height,
4256                                                      pOut->numSlices,
4257                                                      pOut->blockWidth,
4258                                                      pOut->blockHeight,
4259                                                      pOut->blockSlices,
4260                                                      i,
4261                                                      elementBytesLog2,
4262                                                      &mipTailOffsetInBytes);
4263 
4264                         UINT_32 pitchInBlock     =
4265                             pOut->mipChainPitch / pOut->blockWidth;
4266                         UINT_32 sliceInBlock     =
4267                             (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4268                         UINT_64 blockIndex       =
4269                             mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4270                         UINT_64 macroBlockOffset =
4271                             blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4272 
4273                         pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4274                         pOut->pMipInfo[i].mipTailOffset    = mipTailOffsetInBytes;
4275                     }
4276                 }
4277             }
4278             else if (pOut->pMipInfo != NULL)
4279             {
4280                 pOut->pMipInfo[0].pitch  = pOut->pitch;
4281                 pOut->pMipInfo[0].height = pOut->height;
4282                 pOut->pMipInfo[0].depth  = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4283                 pOut->pMipInfo[0].offset = 0;
4284             }
4285 
4286             pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4287                               (pIn->bpp >> 3) * pIn->numFrags;
4288             pOut->surfSize  = pOut->sliceSize * pOut->mipChainSlice;
4289             pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4290 
4291             if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
4292                 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
4293                 (pIn->flags.texture == TRUE) &&
4294                 (pIn->flags.noMetadata == FALSE) &&
4295                 (pIn->flags.metaPipeUnaligned == FALSE))
4296             {
4297                 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4298                 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4299                 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4300                 // them, which may cause invalid metadata to be fetched.
4301                 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
4302             }
4303 
4304             if (pIn->flags.prt)
4305             {
4306                 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4307             }
4308         }
4309     }
4310 
4311     return returnCode;
4312 }
4313 
4314 /**
4315 ************************************************************************************************************************
4316 *   Gfx9Lib::HwlComputeSurfaceInfoLinear
4317 *
4318 *   @brief
4319 *       Internal function to calculate alignment for linear surface
4320 *
4321 *   @return
4322 *       ADDR_E_RETURNCODE
4323 ************************************************************************************************************************
4324 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4325 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4326      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4327      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4328      ) const
4329 {
4330     ADDR_E_RETURNCODE returnCode   = ADDR_OK;
4331     UINT_32           pitch        = 0;
4332     UINT_32           actualHeight = 0;
4333     UINT_32           elementBytes = pIn->bpp >> 3;
4334     const UINT_32     alignment    = pIn->flags.prt ? PrtAlignment : 256;
4335 
4336     if (IsTex1d(pIn->resourceType))
4337     {
4338         if (pIn->height > 1)
4339         {
4340             returnCode = ADDR_INVALIDPARAMS;
4341         }
4342         else
4343         {
4344             const UINT_32 pitchAlignInElement = alignment / elementBytes;
4345 
4346             pitch        = PowTwoAlign(pIn->width, pitchAlignInElement);
4347             actualHeight = pIn->numMipLevels;
4348 
4349             if (pIn->flags.prt == FALSE)
4350             {
4351                 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4352                                                         &pitch, &actualHeight);
4353             }
4354 
4355             if (returnCode == ADDR_OK)
4356             {
4357                 if (pOut->pMipInfo != NULL)
4358                 {
4359                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4360                     {
4361                         pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4362                         pOut->pMipInfo[i].pitch  = pitch;
4363                         pOut->pMipInfo[i].height = 1;
4364                         pOut->pMipInfo[i].depth  = 1;
4365                     }
4366                 }
4367             }
4368         }
4369     }
4370     else
4371     {
4372         returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4373     }
4374 
4375     if ((pitch == 0) || (actualHeight == 0))
4376     {
4377         returnCode = ADDR_INVALIDPARAMS;
4378     }
4379 
4380     if (returnCode == ADDR_OK)
4381     {
4382         pOut->pitch          = pitch;
4383         pOut->height         = pIn->height;
4384         pOut->numSlices      = pIn->numSlices;
4385         pOut->mipChainPitch  = pitch;
4386         pOut->mipChainHeight = actualHeight;
4387         pOut->mipChainSlice  = pOut->numSlices;
4388         pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4389         pOut->sliceSize      = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4390         pOut->surfSize       = pOut->sliceSize * pOut->numSlices;
4391         pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4392         pOut->blockWidth     = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4393         pOut->blockHeight    = 1;
4394         pOut->blockSlices    = 1;
4395     }
4396 
4397     // Post calculation validate
4398     ADDR_ASSERT(pOut->sliceSize > 0);
4399 
4400     return returnCode;
4401 }
4402 
4403 /**
4404 ************************************************************************************************************************
4405 *   Gfx9Lib::GetMipChainInfo
4406 *
4407 *   @brief
4408 *       Internal function to get out information about mip chain
4409 *
4410 *   @return
4411 *       Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4412 ************************************************************************************************************************
4413 */
GetMipChainInfo(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 bpp,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 numMipLevel,ADDR2_MIP_INFO * pMipInfo) const4414 UINT_32 Gfx9Lib::GetMipChainInfo(
4415     AddrResourceType  resourceType,
4416     AddrSwizzleMode   swizzleMode,
4417     UINT_32           bpp,
4418     UINT_32           mip0Width,
4419     UINT_32           mip0Height,
4420     UINT_32           mip0Depth,
4421     UINT_32           blockWidth,
4422     UINT_32           blockHeight,
4423     UINT_32           blockDepth,
4424     UINT_32           numMipLevel,
4425     ADDR2_MIP_INFO*   pMipInfo) const
4426 {
4427     const Dim3d tailMaxDim =
4428         GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4429 
4430     UINT_32 mipPitch         = mip0Width;
4431     UINT_32 mipHeight        = mip0Height;
4432     UINT_32 mipDepth         = IsTex3d(resourceType) ? mip0Depth : 1;
4433     UINT_32 offset           = 0;
4434     UINT_32 firstMipIdInTail = numMipLevel;
4435     BOOL_32 inTail           = FALSE;
4436     BOOL_32 finalDim         = FALSE;
4437     BOOL_32 is3dThick        = IsThick(resourceType, swizzleMode);
4438     BOOL_32 is3dThin         = IsTex3d(resourceType) && (is3dThick == FALSE);
4439 
4440     for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4441     {
4442         if (inTail)
4443         {
4444             if (finalDim == FALSE)
4445             {
4446                 UINT_32 mipSize;
4447 
4448                 if (is3dThick)
4449                 {
4450                     mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4451                 }
4452                 else
4453                 {
4454                     mipSize = mipPitch * mipHeight * (bpp >> 3);
4455                 }
4456 
4457                 if (mipSize <= 256)
4458                 {
4459                     UINT_32 index = Log2(bpp >> 3);
4460 
4461                     if (is3dThick)
4462                     {
4463                         mipPitch  = Block256_3dZ[index].w;
4464                         mipHeight = Block256_3dZ[index].h;
4465                         mipDepth  = Block256_3dZ[index].d;
4466                     }
4467                     else
4468                     {
4469                         mipPitch  = Block256_2d[index].w;
4470                         mipHeight = Block256_2d[index].h;
4471                     }
4472 
4473                     finalDim = TRUE;
4474                 }
4475             }
4476         }
4477         else
4478         {
4479             inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4480                                  mipPitch, mipHeight, mipDepth);
4481 
4482             if (inTail)
4483             {
4484                 firstMipIdInTail = mipId;
4485                 mipPitch         = tailMaxDim.w;
4486                 mipHeight        = tailMaxDim.h;
4487 
4488                 if (is3dThick)
4489                 {
4490                     mipDepth = tailMaxDim.d;
4491                 }
4492             }
4493             else
4494             {
4495                 mipPitch  = PowTwoAlign(mipPitch,  blockWidth);
4496                 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4497 
4498                 if (is3dThick)
4499                 {
4500                     mipDepth = PowTwoAlign(mipDepth,  blockDepth);
4501                 }
4502             }
4503         }
4504 
4505         if (pMipInfo != NULL)
4506         {
4507             pMipInfo[mipId].pitch  = mipPitch;
4508             pMipInfo[mipId].height = mipHeight;
4509             pMipInfo[mipId].depth  = mipDepth;
4510             pMipInfo[mipId].offset = offset;
4511         }
4512 
4513         offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4514 
4515         if (finalDim)
4516         {
4517             if (is3dThin)
4518             {
4519                 mipDepth = Max(mipDepth >> 1, 1u);
4520             }
4521         }
4522         else
4523         {
4524             mipPitch  = Max(mipPitch >> 1, 1u);
4525             mipHeight = Max(mipHeight >> 1, 1u);
4526 
4527             if (is3dThick || is3dThin)
4528             {
4529                 mipDepth = Max(mipDepth >> 1, 1u);
4530             }
4531         }
4532     }
4533 
4534     return firstMipIdInTail;
4535 }
4536 
4537 /**
4538 ************************************************************************************************************************
4539 *   Gfx9Lib::GetMetaMiptailInfo
4540 *
4541 *   @brief
4542 *       Get mip tail coordinate information.
4543 *
4544 *   @return
4545 *       N/A
4546 ************************************************************************************************************************
4547 */
GetMetaMiptailInfo(ADDR2_META_MIP_INFO * pInfo,Dim3d mipCoord,UINT_32 numMipInTail,Dim3d * pMetaBlkDim) const4548 VOID Gfx9Lib::GetMetaMiptailInfo(
4549     ADDR2_META_MIP_INFO*    pInfo,          ///< [out] output structure to store per mip coord
4550     Dim3d                   mipCoord,       ///< [in] mip tail base coord
4551     UINT_32                 numMipInTail,   ///< [in] number of mips in tail
4552     Dim3d*                  pMetaBlkDim     ///< [in] meta block width/height/depth
4553     ) const
4554 {
4555     BOOL_32 isThick   = (pMetaBlkDim->d > 1);
4556     UINT_32 mipWidth  = pMetaBlkDim->w;
4557     UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4558     UINT_32 mipDepth  = pMetaBlkDim->d;
4559     UINT_32 minInc;
4560 
4561     if (isThick)
4562     {
4563         minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4564     }
4565     else if (pMetaBlkDim->h >= 1024)
4566     {
4567         minInc = 256;
4568     }
4569     else if (pMetaBlkDim->h == 512)
4570     {
4571         minInc = 128;
4572     }
4573     else
4574     {
4575         minInc = 64;
4576     }
4577 
4578     UINT_32 blk32MipId = 0xFFFFFFFF;
4579 
4580     for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4581     {
4582         pInfo[mip].inMiptail = TRUE;
4583         pInfo[mip].startX = mipCoord.w;
4584         pInfo[mip].startY = mipCoord.h;
4585         pInfo[mip].startZ = mipCoord.d;
4586         pInfo[mip].width = mipWidth;
4587         pInfo[mip].height = mipHeight;
4588         pInfo[mip].depth = mipDepth;
4589 
4590         if (mipWidth <= 32)
4591         {
4592             if (blk32MipId == 0xFFFFFFFF)
4593             {
4594                 blk32MipId = mip;
4595             }
4596 
4597             mipCoord.w = pInfo[blk32MipId].startX;
4598             mipCoord.h = pInfo[blk32MipId].startY;
4599             mipCoord.d = pInfo[blk32MipId].startZ;
4600 
4601             switch (mip - blk32MipId)
4602             {
4603                 case 0:
4604                     mipCoord.w += 32;       // 16x16
4605                     break;
4606                 case 1:
4607                     mipCoord.h += 32;       // 8x8
4608                     break;
4609                 case 2:
4610                     mipCoord.h += 32;       // 4x4
4611                     mipCoord.w += 16;
4612                     break;
4613                 case 3:
4614                     mipCoord.h += 32;       // 2x2
4615                     mipCoord.w += 32;
4616                     break;
4617                 case 4:
4618                     mipCoord.h += 32;       // 1x1
4619                     mipCoord.w += 48;
4620                     break;
4621                 // The following are for BC/ASTC formats
4622                 case 5:
4623                     mipCoord.h += 48;       // 1/2 x 1/2
4624                     break;
4625                 case 6:
4626                     mipCoord.h += 48;       // 1/4 x 1/4
4627                     mipCoord.w += 16;
4628                     break;
4629                 case 7:
4630                     mipCoord.h += 48;       // 1/8 x 1/8
4631                     mipCoord.w += 32;
4632                     break;
4633                 case 8:
4634                     mipCoord.h += 48;       // 1/16 x 1/16
4635                     mipCoord.w += 48;
4636                     break;
4637                 default:
4638                     ADDR_ASSERT_ALWAYS();
4639                     break;
4640             }
4641 
4642             mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4643             mipHeight = mipWidth;
4644 
4645             if (isThick)
4646             {
4647                 mipDepth = mipWidth;
4648             }
4649         }
4650         else
4651         {
4652             if (mipWidth <= minInc)
4653             {
4654                 // if we're below the minimal increment...
4655                 if (isThick)
4656                 {
4657                     // For 3d, just go in z direction
4658                     mipCoord.d += mipDepth;
4659                 }
4660                 else
4661                 {
4662                     // For 2d, first go across, then down
4663                     if ((mipWidth * 2) == minInc)
4664                     {
4665                         // if we're 2 mips below, that's when we go back in x, and down in y
4666                         mipCoord.w -= minInc;
4667                         mipCoord.h += minInc;
4668                     }
4669                     else
4670                     {
4671                         // otherwise, just go across in x
4672                         mipCoord.w += minInc;
4673                     }
4674                 }
4675             }
4676             else
4677             {
4678                 // On even mip, go down, otherwise, go across
4679                 if (mip & 1)
4680                 {
4681                     mipCoord.w += mipWidth;
4682                 }
4683                 else
4684                 {
4685                     mipCoord.h += mipHeight;
4686                 }
4687             }
4688             // Divide the width by 2
4689             mipWidth >>= 1;
4690             // After the first mip in tail, the mip is always a square
4691             mipHeight = mipWidth;
4692             // ...or for 3d, a cube
4693             if (isThick)
4694             {
4695                 mipDepth = mipWidth;
4696             }
4697         }
4698     }
4699 }
4700 
4701 /**
4702 ************************************************************************************************************************
4703 *   Gfx9Lib::GetMipStartPos
4704 *
4705 *   @brief
4706 *       Internal function to get out information about mip logical start position
4707 *
4708 *   @return
4709 *       logical start position in macro block width/heith/depth of one mip level within one slice
4710 ************************************************************************************************************************
4711 */
GetMipStartPos(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 width,UINT_32 height,UINT_32 depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 mipId,UINT_32 log2ElementBytes,UINT_32 * pMipTailBytesOffset) const4712 Dim3d Gfx9Lib::GetMipStartPos(
4713     AddrResourceType  resourceType,
4714     AddrSwizzleMode   swizzleMode,
4715     UINT_32           width,
4716     UINT_32           height,
4717     UINT_32           depth,
4718     UINT_32           blockWidth,
4719     UINT_32           blockHeight,
4720     UINT_32           blockDepth,
4721     UINT_32           mipId,
4722     UINT_32           log2ElementBytes,
4723     UINT_32*          pMipTailBytesOffset) const
4724 {
4725     Dim3d       mipStartPos = {0};
4726     const Dim3d tailMaxDim  = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4727 
4728     // Report mip in tail if Mip0 is already in mip tail
4729     BOOL_32 inMipTail      = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4730     UINT_32 log2BlkSize    = GetBlockSizeLog2(swizzleMode);
4731     UINT_32 mipIndexInTail = mipId;
4732 
4733     if (inMipTail == FALSE)
4734     {
4735         // Mip 0 dimension, unit in block
4736         UINT_32 mipWidthInBlk   = width  / blockWidth;
4737         UINT_32 mipHeightInBlk  = height / blockHeight;
4738         UINT_32 mipDepthInBlk   = depth  / blockDepth;
4739         AddrMajorMode majorMode = GetMajorMode(resourceType,
4740                                                swizzleMode,
4741                                                mipWidthInBlk,
4742                                                mipHeightInBlk,
4743                                                mipDepthInBlk);
4744 
4745         UINT_32 endingMip = mipId + 1;
4746 
4747         for (UINT_32 i = 1; i <= mipId; i++)
4748         {
4749             if ((i == 1) || (i == 3))
4750             {
4751                 if (majorMode == ADDR_MAJOR_Y)
4752                 {
4753                     mipStartPos.w += mipWidthInBlk;
4754                 }
4755                 else
4756                 {
4757                     mipStartPos.h += mipHeightInBlk;
4758                 }
4759             }
4760             else
4761             {
4762                 if (majorMode == ADDR_MAJOR_X)
4763                 {
4764                    mipStartPos.w += mipWidthInBlk;
4765                 }
4766                 else if (majorMode == ADDR_MAJOR_Y)
4767                 {
4768                    mipStartPos.h += mipHeightInBlk;
4769                 }
4770                 else
4771                 {
4772                    mipStartPos.d += mipDepthInBlk;
4773                 }
4774             }
4775 
4776             BOOL_32 inTail = FALSE;
4777 
4778             if (IsThick(resourceType, swizzleMode))
4779             {
4780                 UINT_32 dim = log2BlkSize % 3;
4781 
4782                 if (dim == 0)
4783                 {
4784                     inTail =
4785                         (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4786                 }
4787                 else if (dim == 1)
4788                 {
4789                     inTail =
4790                         (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4791                 }
4792                 else
4793                 {
4794                     inTail =
4795                         (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4796                 }
4797             }
4798             else
4799             {
4800                 if (log2BlkSize & 1)
4801                 {
4802                     inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4803                 }
4804                 else
4805                 {
4806                     inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4807                 }
4808             }
4809 
4810             if (inTail)
4811             {
4812                 endingMip = i;
4813                 break;
4814             }
4815 
4816             mipWidthInBlk  = RoundHalf(mipWidthInBlk);
4817             mipHeightInBlk = RoundHalf(mipHeightInBlk);
4818             mipDepthInBlk  = RoundHalf(mipDepthInBlk);
4819         }
4820 
4821         if (mipId >= endingMip)
4822         {
4823             inMipTail      = TRUE;
4824             mipIndexInTail = mipId - endingMip;
4825         }
4826     }
4827 
4828     if (inMipTail)
4829     {
4830         UINT_32 index = mipIndexInTail + MaxMacroBits - log2BlkSize;
4831         ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4832         *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4833     }
4834 
4835     return mipStartPos;
4836 }
4837 
4838 /**
4839 ************************************************************************************************************************
4840 *   Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4841 *
4842 *   @brief
4843 *       Internal function to calculate address from coord for tiled swizzle surface
4844 *
4845 *   @return
4846 *       ADDR_E_RETURNCODE
4847 ************************************************************************************************************************
4848 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4849 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4850      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4851      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4852      ) const
4853 {
4854     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4855     localIn.swizzleMode  = pIn->swizzleMode;
4856     localIn.flags        = pIn->flags;
4857     localIn.resourceType = pIn->resourceType;
4858     localIn.bpp          = pIn->bpp;
4859     localIn.width        = Max(pIn->unalignedWidth, 1u);
4860     localIn.height       = Max(pIn->unalignedHeight, 1u);
4861     localIn.numSlices    = Max(pIn->numSlices, 1u);
4862     localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4863     localIn.numSamples   = Max(pIn->numSamples, 1u);
4864     localIn.numFrags     = Max(pIn->numFrags, 1u);
4865     if (localIn.numMipLevels <= 1)
4866     {
4867         localIn.pitchInElement = pIn->pitchInElement;
4868     }
4869 
4870     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4871     ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4872 
4873     BOOL_32 valid = (returnCode == ADDR_OK) &&
4874                     (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4875                      IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4876                     ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4877 
4878     if (valid)
4879     {
4880         UINT_32 log2ElementBytes   = Log2(pIn->bpp >> 3);
4881         Dim3d   mipStartPos        = {0};
4882         UINT_32 mipTailBytesOffset = 0;
4883 
4884         if (pIn->numMipLevels > 1)
4885         {
4886             // Mip-map chain cannot be MSAA surface
4887             ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4888 
4889             mipStartPos = GetMipStartPos(pIn->resourceType,
4890                                          pIn->swizzleMode,
4891                                          localOut.pitch,
4892                                          localOut.height,
4893                                          localOut.numSlices,
4894                                          localOut.blockWidth,
4895                                          localOut.blockHeight,
4896                                          localOut.blockSlices,
4897                                          pIn->mipId,
4898                                          log2ElementBytes,
4899                                          &mipTailBytesOffset);
4900         }
4901 
4902         UINT_32 interleaveOffset = 0;
4903         UINT_32 pipeBits = 0;
4904         UINT_32 pipeXor = 0;
4905         UINT_32 bankBits = 0;
4906         UINT_32 bankXor = 0;
4907 
4908         if (IsThin(pIn->resourceType, pIn->swizzleMode))
4909         {
4910             UINT_32 blockOffset = 0;
4911             UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4912 
4913             if (IsZOrderSwizzle(pIn->swizzleMode))
4914             {
4915                 // Morton generation
4916                 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4917                 {
4918                     UINT_32 totalLowBits = 6 - log2ElementBytes;
4919                     UINT_32 mortBits = totalLowBits / 2;
4920                     UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4921                     // Are 9 bits enough?
4922                     UINT_32 highBitsValue =
4923                         MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4924                     blockOffset = lowBitsValue | highBitsValue;
4925                     ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4926                 }
4927                 else
4928                 {
4929                     blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4930                 }
4931 
4932                 // Fill LSBs with sample bits
4933                 if (pIn->numSamples > 1)
4934                 {
4935                     blockOffset *= pIn->numSamples;
4936                     blockOffset |= pIn->sample;
4937                 }
4938 
4939                 // Shift according to BytesPP
4940                 blockOffset <<= log2ElementBytes;
4941             }
4942             else
4943             {
4944                 // Micro block offset
4945                 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4946                 blockOffset = microBlockOffset;
4947 
4948                 // Micro block dimension
4949                 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4950                 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4951                 // Morton generation, does 12 bit enough?
4952                 blockOffset |=
4953                     MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4954 
4955                 // Sample bits start location
4956                 UINT_32 sampleStart = log2BlkSize - Log2(pIn->numSamples);
4957                 // Join sample bits information to the highest Macro block bits
4958                 if (IsNonPrtXor(pIn->swizzleMode))
4959                 {
4960                     // Non-prt-Xor : xor highest Macro block bits with sample bits
4961                     blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4962                 }
4963                 else
4964                 {
4965                     // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4966                     // after this op, the blockOffset only contains log2 Macro block size bits
4967                     blockOffset %= (1 << sampleStart);
4968                     blockOffset |= (pIn->sample << sampleStart);
4969                     ADDR_ASSERT((blockOffset >> log2BlkSize) == 0);
4970                 }
4971             }
4972 
4973             if (IsXor(pIn->swizzleMode))
4974             {
4975                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4976                 if (IsPrt(pIn->swizzleMode))
4977                 {
4978                     blockOffset &= ((1 << log2BlkSize) - 1);
4979                 }
4980 
4981                 // Preserve offset inside pipe interleave
4982                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4983                 blockOffset >>= m_pipeInterleaveLog2;
4984 
4985                 // Pipe/Se xor bits
4986                 pipeBits = GetPipeXorBits(log2BlkSize);
4987                 // Pipe xor
4988                 pipeXor = FoldXor2d(blockOffset, pipeBits);
4989                 blockOffset >>= pipeBits;
4990 
4991                 // Bank xor bits
4992                 bankBits = GetBankXorBits(log2BlkSize);
4993                 // Bank Xor
4994                 bankXor = FoldXor2d(blockOffset, bankBits);
4995                 blockOffset >>= bankBits;
4996 
4997                 // Put all the part back together
4998                 blockOffset <<= bankBits;
4999                 blockOffset |= bankXor;
5000                 blockOffset <<= pipeBits;
5001                 blockOffset |= pipeXor;
5002                 blockOffset <<= m_pipeInterleaveLog2;
5003                 blockOffset |= interleaveOffset;
5004             }
5005 
5006             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
5007             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
5008 
5009             blockOffset |= mipTailBytesOffset;
5010 
5011             if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
5012             {
5013                 // Apply slice xor if not MSAA/PRT
5014                 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
5015                 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
5016                                 (m_pipeInterleaveLog2 + pipeBits));
5017             }
5018 
5019             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
5020                                                   bankBits, pipeBits, &blockOffset);
5021 
5022             blockOffset %= (1 << log2BlkSize);
5023 
5024             UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
5025             UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
5026             UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
5027             UINT_64 macroBlockIndex =
5028                 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
5029                 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
5030                 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
5031 
5032             pOut->addr = blockOffset | (macroBlockIndex << log2BlkSize);
5033         }
5034         else
5035         {
5036             UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
5037 
5038             Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
5039 
5040             UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
5041                                               (pIn->y / microBlockDim.h),
5042                                               (pIn->slice / microBlockDim.d),
5043                                               8);
5044 
5045             blockOffset <<= 10;
5046             blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
5047 
5048             if (IsXor(pIn->swizzleMode))
5049             {
5050                 // Mask off bits above Macro block bits to keep page synonyms working for prt
5051                 if (IsPrt(pIn->swizzleMode))
5052                 {
5053                     blockOffset &= ((1 << log2BlkSize) - 1);
5054                 }
5055 
5056                 // Preserve offset inside pipe interleave
5057                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
5058                 blockOffset >>= m_pipeInterleaveLog2;
5059 
5060                 // Pipe/Se xor bits
5061                 pipeBits = GetPipeXorBits(log2BlkSize);
5062                 // Pipe xor
5063                 pipeXor = FoldXor3d(blockOffset, pipeBits);
5064                 blockOffset >>= pipeBits;
5065 
5066                 // Bank xor bits
5067                 bankBits = GetBankXorBits(log2BlkSize);
5068                 // Bank Xor
5069                 bankXor = FoldXor3d(blockOffset, bankBits);
5070                 blockOffset >>= bankBits;
5071 
5072                 // Put all the part back together
5073                 blockOffset <<= bankBits;
5074                 blockOffset |= bankXor;
5075                 blockOffset <<= pipeBits;
5076                 blockOffset |= pipeXor;
5077                 blockOffset <<= m_pipeInterleaveLog2;
5078                 blockOffset |= interleaveOffset;
5079             }
5080 
5081             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
5082             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
5083             blockOffset |= mipTailBytesOffset;
5084 
5085             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
5086                                                   bankBits, pipeBits, &blockOffset);
5087 
5088             blockOffset %= (1 << log2BlkSize);
5089 
5090             UINT_32 xb = pIn->x / localOut.blockWidth  + mipStartPos.w;
5091             UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
5092             UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
5093 
5094             UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
5095             UINT_32 sliceSizeInBlock =
5096                 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
5097             UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
5098 
5099             pOut->addr = blockOffset | (blockIndex << log2BlkSize);
5100         }
5101     }
5102     else
5103     {
5104         returnCode = ADDR_INVALIDPARAMS;
5105     }
5106 
5107     return returnCode;
5108 }
5109 
5110 /**
5111 ************************************************************************************************************************
5112 *   Gfx9Lib::ComputeSurfaceInfoLinear
5113 *
5114 *   @brief
5115 *       Internal function to calculate padding for linear swizzle 2D/3D surface
5116 *
5117 *   @return
5118 *       N/A
5119 ************************************************************************************************************************
5120 */
ComputeSurfaceLinearPadding(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pMipmap0PaddedWidth,UINT_32 * pSlice0PaddedHeight,ADDR2_MIP_INFO * pMipInfo) const5121 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
5122     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,                    ///< [in] input srtucture
5123     UINT_32*                                pMipmap0PaddedWidth,    ///< [out] padded width in element
5124     UINT_32*                                pSlice0PaddedHeight,    ///< [out] padded height for HW
5125     ADDR2_MIP_INFO*                         pMipInfo                ///< [out] per mip information
5126     ) const
5127 {
5128     ADDR_E_RETURNCODE returnCode = ADDR_OK;
5129 
5130     UINT_32 elementBytes        = pIn->bpp >> 3;
5131     UINT_32 pitchAlignInElement = 0;
5132 
5133     if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
5134     {
5135         ADDR_ASSERT(pIn->numMipLevels <= 1);
5136         ADDR_ASSERT(pIn->numSlices <= 1);
5137         pitchAlignInElement = 1;
5138     }
5139     else
5140     {
5141         pitchAlignInElement = (256 / elementBytes);
5142     }
5143 
5144     UINT_32 mipChainWidth      = PowTwoAlign(pIn->width, pitchAlignInElement);
5145     UINT_32 slice0PaddedHeight = pIn->height;
5146 
5147     returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
5148                                             &mipChainWidth, &slice0PaddedHeight);
5149 
5150     if (returnCode == ADDR_OK)
5151     {
5152         UINT_32 mipChainHeight = 0;
5153         UINT_32 mipHeight      = pIn->height;
5154         UINT_32 mipDepth       = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
5155 
5156         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
5157         {
5158             if (pMipInfo != NULL)
5159             {
5160                 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
5161                 pMipInfo[i].pitch  = mipChainWidth;
5162                 pMipInfo[i].height = mipHeight;
5163                 pMipInfo[i].depth  = mipDepth;
5164             }
5165 
5166             mipChainHeight += mipHeight;
5167             mipHeight = RoundHalf(mipHeight);
5168             mipHeight = Max(mipHeight, 1u);
5169         }
5170 
5171         *pMipmap0PaddedWidth = mipChainWidth;
5172         *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
5173     }
5174 
5175     return returnCode;
5176 }
5177 
5178 /**
5179 ************************************************************************************************************************
5180 *   Gfx9Lib::ComputeThinBlockDimension
5181 *
5182 *   @brief
5183 *       Internal function to get thin block width/height/depth in element from surface input params.
5184 *
5185 *   @return
5186 *       N/A
5187 ************************************************************************************************************************
5188 */
ComputeThinBlockDimension(UINT_32 * pWidth,UINT_32 * pHeight,UINT_32 * pDepth,UINT_32 bpp,UINT_32 numSamples,AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const5189 VOID Gfx9Lib::ComputeThinBlockDimension(
5190     UINT_32*         pWidth,
5191     UINT_32*         pHeight,
5192     UINT_32*         pDepth,
5193     UINT_32          bpp,
5194     UINT_32          numSamples,
5195     AddrResourceType resourceType,
5196     AddrSwizzleMode  swizzleMode) const
5197 {
5198     ADDR_ASSERT(IsThin(resourceType, swizzleMode));
5199 
5200     const UINT_32 log2BlkSize              = GetBlockSizeLog2(swizzleMode);
5201     const UINT_32 eleBytes                 = bpp >> 3;
5202     const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
5203     const UINT_32 log2blkSizeIn256B        = log2BlkSize - 8;
5204     const UINT_32 widthAmp                 = log2blkSizeIn256B / 2;
5205     const UINT_32 heightAmp                = log2blkSizeIn256B - widthAmp;
5206 
5207     ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
5208 
5209     *pWidth  = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
5210     *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
5211     *pDepth  = 1;
5212 
5213     if (numSamples > 1)
5214     {
5215         const UINT_32 log2sample = Log2(numSamples);
5216         const UINT_32 q          = log2sample >> 1;
5217         const UINT_32 r          = log2sample & 1;
5218 
5219         if (log2BlkSize & 1)
5220         {
5221             *pWidth  >>= q;
5222             *pHeight >>= (q + r);
5223         }
5224         else
5225         {
5226             *pWidth  >>= (q + r);
5227             *pHeight >>= q;
5228         }
5229     }
5230 }
5231 
5232 } // V2
5233 } // Addr
5234