1 /*
2  * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  */
26 
27 /**
28 ****************************************************************************************************
29 * @file  addrelemlib.cpp
30 * @brief Contains the class implementation for element/pixel related functions.
31 ****************************************************************************************************
32 */
33 
34 #include "addrelemlib.h"
35 #include "addrlib.h"
36 
37 namespace Addr
38 {
39 
40 /**
41 ****************************************************************************************************
42 *   ElemLib::ElemLib
43 *
44 *   @brief
45 *       constructor
46 *
47 *   @return
48 *       N/A
49 ****************************************************************************************************
50 */
ElemLib(Lib * pAddrLib)51 ElemLib::ElemLib(
52     Lib* pAddrLib)  ///< [in] Parent addrlib instance pointer
53     :
54     Object(pAddrLib->GetClient()),
55     m_pAddrLib(pAddrLib)
56 {
57     switch (m_pAddrLib->GetChipFamily())
58     {
59         case ADDR_CHIP_FAMILY_R6XX:
60             m_depthPlanarType = ADDR_DEPTH_PLANAR_R600;
61             m_fp16ExportNorm = 0;
62             break;
63         case ADDR_CHIP_FAMILY_R7XX:
64             m_depthPlanarType = ADDR_DEPTH_PLANAR_R600;
65             m_fp16ExportNorm = 1;
66             break;
67         case ADDR_CHIP_FAMILY_R8XX:
68         case ADDR_CHIP_FAMILY_NI: // Same as 8xx
69             m_depthPlanarType = ADDR_DEPTH_PLANAR_R800;
70             m_fp16ExportNorm = 1;
71             break;
72         default:
73             m_fp16ExportNorm = 1;
74             m_depthPlanarType = ADDR_DEPTH_PLANAR_R800;
75             break;
76     }
77 
78     m_configFlags.value = 0;
79 }
80 
81 /**
82 ****************************************************************************************************
83 *   ElemLib::~ElemLib
84 *
85 *   @brief
86 *       destructor
87 *
88 *   @return
89 *       N/A
90 ****************************************************************************************************
91 */
~ElemLib()92 ElemLib::~ElemLib()
93 {
94 }
95 
96 /**
97 ****************************************************************************************************
98 *   ElemLib::Create
99 *
100 *   @brief
101 *       Creates and initializes AddrLib object.
102 *
103 *   @return
104 *       Returns point to ADDR_CREATEINFO if successful.
105 ****************************************************************************************************
106 */
Create(const Lib * pAddrLib)107 ElemLib* ElemLib::Create(
108     const Lib* pAddrLib)   ///< [in] Pointer of parent AddrLib instance
109 {
110     ElemLib* pElemLib = NULL;
111 
112     if (pAddrLib)
113     {
114         VOID* pObj = Object::ClientAlloc(sizeof(ElemLib), pAddrLib->GetClient());
115         if (pObj)
116         {
117             pElemLib = new(pObj) ElemLib(const_cast<Lib* const>(pAddrLib));
118         }
119     }
120 
121     return pElemLib;
122 }
123 
124 /**************************************************************************************************
125 *   ElemLib::Flt32sToInt32s
126 *
127 *   @brief
128 *       Convert a ADDR_FLT_32 value to Int32 value
129 *
130 *   @return
131 *       N/A
132 ****************************************************************************************************
133 */
Flt32sToInt32s(ADDR_FLT_32 value,UINT_32 bits,NumberType numberType,UINT_32 * pResult)134 VOID ElemLib::Flt32sToInt32s(
135     ADDR_FLT_32     value,      ///< [in] ADDR_FLT_32 value
136     UINT_32         bits,       ///< [in] nubmer of bits in value
137     NumberType      numberType, ///< [in] the type of number
138     UINT_32*        pResult)    ///< [out] Int32 value
139 {
140     UINT_8 round = 128;    //ADDR_ROUND_BY_HALF
141     UINT_32 uscale;
142     UINT_32 sign;
143 
144     //convert each component to an INT_32
145     switch ( numberType )
146     {
147         case ADDR_NO_NUMBER:    //fall through
148         case ADDR_ZERO:         //fall through
149         case ADDR_ONE:          //fall through
150         case ADDR_EPSILON:      //fall through
151             return;        // these are zero-bit components, so don't set result
152 
153         case ADDR_UINT_BITS:            // unsigned integer bit field, clamped to range
154             uscale = (1<<bits) - 1;
155             if (bits == 32)               // special case unsigned 32-bit int
156             {
157                 *pResult = value.i;
158             }
159             else
160             {
161                 if ((value.i < 0) || (value.u > uscale))
162                 {
163                     *pResult = uscale;
164                 }
165                 else
166                 {
167                     *pResult = value.i;
168                 }
169                 return;
170             }
171 
172         // The algorithm used in the DB and TX differs at one value for 24-bit unorms
173         case ADDR_UNORM_R6XXDB:        // unsigned repeating fraction
174             if ((bits==24) && (value.i == 0x33000000))
175             {
176                 *pResult = 1;
177                 return;
178             }              // Else treat like ADDR_UNORM_R6XX
179 
180         case ADDR_UNORM_R6XX:            // unsigned repeating fraction
181             if (value.f <= 0)
182             {
183                 *pResult = 0;            // first clamp to [0..1]
184             }
185             else
186             {
187                 if (value.f >= 1)
188                 {
189                      *pResult = (1<<bits) - 1;
190                 }
191                 else
192                 {
193                     if ((value.i | 0x87FFFFFF) == 0xFFFFFFFF)
194                     {
195                         *pResult = 0;                        // NaN, so force to 0
196                     }
197 
198                     #if 0 // floating point version for documentation
199                     else
200                     {
201                         FLOAT f = value.f * ((1<<bits) - 1);
202                         *pResult = static_cast<INT_32>(f + (round/256.0f));
203                     }
204                     #endif
205                     else
206                     {
207                         ADDR_FLT_32 scaled;
208                         ADDR_FLT_32 shifted;
209                         UINT_64 truncated, rounded;
210                         UINT_32 altShift;
211                         UINT_32 mask = (1 << bits) - 1;
212                         UINT_32 half = 1 << (bits - 1);
213                         UINT_32 mant24 = (value.i & 0x7FFFFF) + 0x800000;
214                         UINT_64 temp = mant24 - (mant24>>bits) -
215                             static_cast<INT_32>((mant24 & mask) > half);
216                         UINT_32 exp8 = value.i >> 23;
217                         UINT_32 shift = 126 - exp8 + 24 - bits;
218                         UINT_64 final;
219 
220                         if (shift >= 32) // This is zero, even with maximum dither add
221                         {
222                             final = 0;
223                         }
224                         else
225                         {
226                             final = ((temp<<8) + (static_cast<UINT_64>(round)<<shift)) >> (shift+8);
227                         }
228                         //ADDR_EXIT( *pResult == final,
229                         //    ("Float %x converted to %d-bit Unorm %x != bitwise %x",
230                         //     value.u, bits, (UINT_32)*pResult, (UINT_32)final) );
231                         if (final > mask)
232                         {
233                             final = mask;
234                         }
235 
236                         scaled.f  = value.f * ((1<<bits) - 1);
237                         shifted.f = (scaled.f * 256);
238                         truncated = ((shifted.i&0x7FFFFF) + (INT_64)0x800000) << 8;
239                         altShift  = 126 + 24 + 8 - ((shifted.i>>23)&0xFF);
240                         truncated = (altShift > 60) ? 0 : truncated >> altShift;
241                         rounded   = static_cast<INT_32>((round + truncated) >> 8);
242                         //if (rounded > ((1<<bits) - 1))
243                         //    rounded = ((1<<bits) - 1);
244                         *pResult = static_cast<INT_32>(rounded); //(INT_32)final;
245                     }
246                 }
247             }
248 
249             return;
250 
251         case ADDR_S8FLOAT32:    // 32-bit IEEE float, passes through NaN values
252             *pResult = value.i;
253             return;
254 
255         // @@ FIX ROUNDING in this code, fix the denorm case
256         case ADDR_U4FLOATC:         // Unsigned float, 4-bit exponent. bias 15, clamped [0..1]
257             sign = (value.i >> 31) & 1;
258             if ((value.i&0x7F800000) == 0x7F800000)    // If NaN or INF:
259             {
260                 if ((value.i&0x007FFFFF) != 0)             // then if NaN
261                 {
262                     *pResult = 0;                       // return 0
263                 }
264                 else
265                 {
266                     *pResult = (sign)?0:0xF00000;           // else +INF->+1, -INF->0
267                 }
268                 return;
269             }
270             if (value.f <= 0)
271             {
272                 *pResult = 0;
273             }
274             else
275             {
276                 if (value.f>=1)
277                 {
278                     *pResult = 0xF << (bits-4);
279                 }
280                 else
281                 {
282                     if ((value.i>>23) > 112 )
283                     {
284                         // 24-bit float: normalized
285                         // value.i += 1 << (22-bits+4);
286                         // round the IEEE mantissa to mantissa size
287                         // @@ NOTE: add code to support rounding
288                         value.u &= 0x7FFFFFF;             // mask off high 4 exponent bits
289                         *pResult = value.i >> (23-bits+4);// shift off unused mantissa bits
290                     }
291                     else
292                     {
293                         // 24-bit float: denormalized
294                         value.f = value.f / (1<<28) / (1<<28);
295                         value.f = value.f / (1<<28) / (1<<28);    // convert to IEEE denorm
296                         // value.i += 1 << (22-bits+4);
297                         // round the IEEE mantissa to mantissa size
298                         // @@ NOTE: add code to support rounding
299                         *pResult = value.i >> (23-bits+4);    // shift off unused mantissa bits
300                     }
301                 }
302             }
303 
304             return;
305 
306         default:                    // invalid number mode
307             //ADDR_EXIT(0, ("Invalid AddrNumber %d", numberType) );
308             break;
309 
310     }
311 }
312 
313 /**
314 ****************************************************************************************************
315 *   ElemLib::Int32sToPixel
316 *
317 *   @brief
318 *       Pack 32-bit integer values into an uncompressed pixel,
319 *       in the proper order
320 *
321 *   @return
322 *       N/A
323 *
324 *   @note
325 *       This entry point packes four 32-bit integer values into
326 *       an uncompressed pixel. The pixel values are specifies in
327 *       standard order, e.g. depth/stencil. This routine asserts
328 *       if called on compressed pixel.
329 ****************************************************************************************************
330 */
Int32sToPixel(UINT_32 numComps,UINT_32 * pComps,UINT_32 * pCompBits,UINT_32 * pCompStart,ComponentFlags properties,UINT_32 resultBits,UINT_8 * pPixel)331 VOID ElemLib::Int32sToPixel(
332     UINT_32              numComps,      ///< [in] number of components
333     UINT_32*             pComps,        ///< [in] compnents
334     UINT_32*             pCompBits,     ///< [in] total bits in each component
335     UINT_32*             pCompStart,    ///< [in] the first bit position of each component
336     ComponentFlags       properties,    ///< [in] properties about byteAligned, exportNorm
337     UINT_32              resultBits,    ///< [in] result bits: total bpp after decompression
338     UINT_8*              pPixel)        ///< [out] a depth/stencil pixel value
339 {
340     UINT_32 i;
341     UINT_32 j;
342     UINT_32 start;
343     UINT_32 size;
344     UINT_32 byte;
345     UINT_32 value = 0;
346     UINT_32 compMask;
347     UINT_32 elemMask=0;
348     UINT_32 elementXor = 0;  // address xor when reading bytes from elements
349 
350 
351     // @@ NOTE: assert if called on a compressed format!
352 
353     if (properties.byteAligned)    // Components are all byte-sized
354     {
355         for (i = 0; i < numComps; i++)        // Then for each component
356         {
357             // Copy the bytes of the component into the element
358             start = pCompStart[i] / 8;
359             size  = pCompBits[i]  / 8;
360             for (j = 0; j < size; j++)
361             {
362                 pPixel[(j+start)^elementXor] = static_cast<UINT_8>(pComps[i] >> (8*j));
363             }
364         }
365     }
366     else                        // Element is 32-bits or less, components are bit fields
367     {
368         // First, extract each component in turn and combine it into a 32-bit value
369         for (i = 0; i < numComps; i++)
370         {
371             compMask = (1 << pCompBits[i]) - 1;
372             elemMask |= compMask << pCompStart[i];
373             value |= (pComps[i] & compMask) << pCompStart[i];
374         }
375 
376         // Mext, copy the masked value into the element
377         size = (resultBits + 7) / 8;
378         for (i = 0; i < size; i++)
379         {
380             byte = pPixel[i^elementXor] & ~(elemMask >> (8*i));
381             pPixel[i^elementXor] = static_cast<UINT_8>(byte | ((elemMask & value) >> (8*i)));
382         }
383     }
384 }
385 
386 /**
387 ****************************************************************************************************
388 *   Flt32ToDepthPixel
389 *
390 *   @brief
391 *       Convert a FLT_32 value to a depth/stencil pixel value
392 *
393 *   @return
394 *       N/A
395 ****************************************************************************************************
396 */
Flt32ToDepthPixel(AddrDepthFormat format,const ADDR_FLT_32 comps[2],UINT_8 * pPixel) const397 VOID ElemLib::Flt32ToDepthPixel(
398     AddrDepthFormat     format,     ///< [in] Depth format
399     const ADDR_FLT_32   comps[2],   ///< [in] two components of depth
400     UINT_8*             pPixel      ///< [out] depth pixel value
401     ) const
402 {
403     UINT_32 i;
404     UINT_32 values[2];
405     ComponentFlags properties;  // byteAligned, exportNorm
406     UINT_32 resultBits = 0;     // result bits: total bits per pixel after decompression
407 
408     PixelFormatInfo fmt;
409 
410     // get type for each component
411     PixGetDepthCompInfo(format, &fmt);
412 
413     //initialize properties
414     properties.byteAligned = TRUE;
415     properties.exportNorm  = TRUE;
416     properties.floatComp   = FALSE;
417 
418     //set properties and result bits
419     for (i = 0; i < 2; i++)
420     {
421         if ((fmt.compBit[i] & 7) || (fmt.compStart[i] & 7))
422         {
423             properties.byteAligned = FALSE;
424         }
425 
426         if (resultBits < fmt.compStart[i] + fmt.compBit[i])
427         {
428             resultBits = fmt.compStart[i] + fmt.compBit[i];
429         }
430 
431         // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
432         if (fmt.compBit[i] > 11 || fmt.numType[i] >= ADDR_USCALED)
433         {
434             properties.exportNorm = FALSE;
435         }
436 
437         // Mark if there are any floating point components
438         if ((fmt.numType[i] == ADDR_U4FLOATC) || (fmt.numType[i] >= ADDR_S8FLOAT) )
439         {
440             properties.floatComp = TRUE;
441         }
442     }
443 
444     // Convert the two input floats to integer values
445     for (i = 0; i < 2; i++)
446     {
447         Flt32sToInt32s(comps[i], fmt.compBit[i], fmt.numType[i], &values[i]);
448     }
449 
450     // Then pack the two integer components, in the proper order
451     Int32sToPixel(2, values, fmt.compBit, fmt.compStart, properties, resultBits, pPixel );
452 
453 }
454 
455 /**
456 ****************************************************************************************************
457 *   Flt32ToColorPixel
458 *
459 *   @brief
460 *       Convert a FLT_32 value to a red/green/blue/alpha pixel value
461 *
462 *   @return
463 *       N/A
464 ****************************************************************************************************
465 */
Flt32ToColorPixel(AddrColorFormat format,AddrSurfaceNumber surfNum,AddrSurfaceSwap surfSwap,const ADDR_FLT_32 comps[4],UINT_8 * pPixel) const466 VOID ElemLib::Flt32ToColorPixel(
467     AddrColorFormat     format,     ///< [in] Color format
468     AddrSurfaceNumber   surfNum,    ///< [in] Surface number
469     AddrSurfaceSwap     surfSwap,   ///< [in] Surface swap
470     const ADDR_FLT_32   comps[4],   ///< [in] four components of color
471     UINT_8*             pPixel      ///< [out] a red/green/blue/alpha pixel value
472     ) const
473 {
474     PixelFormatInfo pixelInfo;
475 
476     UINT_32 i;
477     UINT_32 values[4];
478     ComponentFlags properties;    // byteAligned, exportNorm
479     UINT_32 resultBits = 0;       // result bits: total bits per pixel after decompression
480 
481     memset(&pixelInfo, 0, sizeof(PixelFormatInfo));
482 
483     PixGetColorCompInfo(format, surfNum, surfSwap, &pixelInfo);
484 
485     //initialize properties
486     properties.byteAligned = TRUE;
487     properties.exportNorm  = TRUE;
488     properties.floatComp   = FALSE;
489 
490     //set properties and result bits
491     for (i = 0; i < 4; i++)
492     {
493         if ( (pixelInfo.compBit[i] & 7) || (pixelInfo.compStart[i] & 7) )
494         {
495             properties.byteAligned = FALSE;
496         }
497 
498         if (resultBits < pixelInfo.compStart[i] + pixelInfo.compBit[i])
499         {
500             resultBits = pixelInfo.compStart[i] + pixelInfo.compBit[i];
501         }
502 
503         if (m_fp16ExportNorm)
504         {
505             // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
506             // or if it's not FP and <=16 bits
507             if (((pixelInfo.compBit[i] > 11) || (pixelInfo.numType[i] >= ADDR_USCALED))
508                 && (pixelInfo.numType[i] !=ADDR_U4FLOATC))
509             {
510                 properties.exportNorm = FALSE;
511             }
512         }
513         else
514         {
515             // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
516             if (pixelInfo.compBit[i] > 11 || pixelInfo.numType[i] >= ADDR_USCALED)
517             {
518                 properties.exportNorm = FALSE;
519             }
520         }
521 
522         // Mark if there are any floating point components
523         if ( (pixelInfo.numType[i] == ADDR_U4FLOATC) ||
524              (pixelInfo.numType[i] >= ADDR_S8FLOAT) )
525         {
526             properties.floatComp = TRUE;
527         }
528     }
529 
530     // Convert the four input floats to integer values
531     for (i = 0; i < 4; i++)
532     {
533         Flt32sToInt32s(comps[i], pixelInfo.compBit[i], pixelInfo.numType[i], &values[i]);
534     }
535 
536     // Then pack the four integer components, in the proper order
537     Int32sToPixel(4, values, &pixelInfo.compBit[0], &pixelInfo.compStart[0],
538                   properties, resultBits, pPixel);
539 }
540 
541 /**
542 ****************************************************************************************************
543 *   ElemLib::GetCompType
544 *
545 *   @brief
546 *       Fill per component info
547 *
548 *   @return
549 *       N/A
550 *
551 ****************************************************************************************************
552 */
GetCompType(AddrColorFormat format,AddrSurfaceNumber numType,PixelFormatInfo * pInfo)553 VOID ElemLib::GetCompType(
554     AddrColorFormat   format,     ///< [in] surface format
555     AddrSurfaceNumber numType,  ///< [in] number type
556     PixelFormatInfo*  pInfo)       ///< [in][out] per component info out
557 {
558     BOOL_32 handled = FALSE;
559 
560     // Floating point formats override the number format
561     switch (format)
562     {
563         case ADDR_COLOR_16_FLOAT:            // fall through for all pure floating point format
564         case ADDR_COLOR_16_16_FLOAT:
565         case ADDR_COLOR_16_16_16_16_FLOAT:
566         case ADDR_COLOR_32_FLOAT:
567         case ADDR_COLOR_32_32_FLOAT:
568         case ADDR_COLOR_32_32_32_32_FLOAT:
569         case ADDR_COLOR_10_11_11_FLOAT:
570         case ADDR_COLOR_11_11_10_FLOAT:
571             numType = ADDR_NUMBER_FLOAT;
572             break;
573             // Special handling for the depth formats
574         case ADDR_COLOR_8_24:                // fall through for these 2 similar format
575         case ADDR_COLOR_24_8:
576             for (UINT_32 c = 0; c < 4; c++)
577             {
578                 if (pInfo->compBit[c] == 8)
579                 {
580                     pInfo->numType[c] = ADDR_UINT_BITS;
581                 }
582                 else if (pInfo->compBit[c]  == 24)
583                 {
584                     pInfo->numType[c] = ADDR_UNORM_R6XX;
585                 }
586                 else
587                 {
588                     pInfo->numType[c] = ADDR_NO_NUMBER;
589                 }
590             }
591             handled = TRUE;
592             break;
593         case ADDR_COLOR_8_24_FLOAT:          // fall through for these 3 similar format
594         case ADDR_COLOR_24_8_FLOAT:
595         case ADDR_COLOR_X24_8_32_FLOAT:
596             for (UINT_32 c = 0; c < 4; c++)
597             {
598                 if (pInfo->compBit[c] == 8)
599                 {
600                     pInfo->numType[c] = ADDR_UINT_BITS;
601                 }
602                 else if (pInfo->compBit[c] == 24)
603                 {
604                     pInfo->numType[c] = ADDR_U4FLOATC;
605                 }
606                 else if (pInfo->compBit[c] == 32)
607                 {
608                     pInfo->numType[c] = ADDR_S8FLOAT32;
609                 }
610                 else
611                 {
612                     pInfo->numType[c] = ADDR_NO_NUMBER;
613                 }
614             }
615             handled = TRUE;
616             break;
617         default:
618             break;
619     }
620 
621     if (!handled)
622     {
623         for (UINT_32 c = 0; c < 4; c++)
624         {
625             // Assign a number type for each component
626             AddrSurfaceNumber cnum;
627 
628             // First handle default component values
629             if (pInfo->compBit[c] == 0)
630             {
631                 if (c < 3)
632                 {
633                     pInfo->numType[c] = ADDR_ZERO;      // Default is zero for RGB
634                 }
635                 else if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT)
636                 {
637                     pInfo->numType[c] = ADDR_EPSILON;   // Alpha INT_32 bits default is 0x01
638                 }
639                 else
640                 {
641                     pInfo->numType[c] = ADDR_ONE;       // Alpha normal default is float 1.0
642                 }
643                 continue;
644             }
645             // Now handle small components
646             else if (pInfo->compBit[c] == 1)
647             {
648                 if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT)
649                 {
650                     cnum = ADDR_NUMBER_UINT;
651                 }
652                 else
653                 {
654                     cnum = ADDR_NUMBER_UNORM;
655                 }
656             }
657             else
658             {
659                 cnum = numType;
660             }
661 
662             // If no default, set the number type fom num, compbits, and architecture
663             switch (cnum)
664             {
665                 case ADDR_NUMBER_SRGB:
666                     pInfo->numType[c] = (c < 3) ? ADDR_GAMMA8_R6XX : ADDR_UNORM_R6XX;
667                     break;
668                 case ADDR_NUMBER_UNORM:
669                     pInfo->numType[c] = ADDR_UNORM_R6XX;
670                     break;
671                 case ADDR_NUMBER_SNORM:
672                     pInfo->numType[c] = ADDR_SNORM_R6XX;
673                     break;
674                 case ADDR_NUMBER_USCALED:
675                     pInfo->numType[c] = ADDR_USCALED;  // @@ Do we need separate Pele routine?
676                     break;
677                 case ADDR_NUMBER_SSCALED:
678                     pInfo->numType[c] = ADDR_SSCALED;  // @@ Do we need separate Pele routine?
679                     break;
680                 case ADDR_NUMBER_FLOAT:
681                     if (pInfo->compBit[c] == 32)
682                     {
683                         pInfo->numType[c] = ADDR_S8FLOAT32;
684                     }
685                     else if (pInfo->compBit[c] == 16)
686                     {
687                         pInfo->numType[c] = ADDR_S5FLOAT;
688                     }
689                     else if (pInfo->compBit[c] >= 10)
690                     {
691                         pInfo->numType[c] = ADDR_U5FLOAT;
692                     }
693                     else
694                     {
695                         ADDR_ASSERT_ALWAYS();
696                     }
697                     break;
698                 case ADDR_NUMBER_SINT:
699                     pInfo->numType[c] = ADDR_SINT_BITS;
700                     break;
701                 case ADDR_NUMBER_UINT:
702                     pInfo->numType[c] = ADDR_UINT_BITS;
703                     break;
704 
705                 default:
706                     ADDR_ASSERT(!"Invalid number type");
707                     pInfo->numType[c] = ADDR_NO_NUMBER;
708                     break;
709              }
710         }
711     }
712 }
713 
714 /**
715 ****************************************************************************************************
716 *   ElemLib::GetCompSwap
717 *
718 *   @brief
719 *       Get components swapped for color surface
720 *
721 *   @return
722 *       N/A
723 *
724 ****************************************************************************************************
725 */
GetCompSwap(AddrSurfaceSwap swap,PixelFormatInfo * pInfo)726 VOID ElemLib::GetCompSwap(
727     AddrSurfaceSwap  swap,   ///< [in] swap mode
728     PixelFormatInfo* pInfo)  ///< [in,out] output per component info
729 {
730     switch (pInfo->comps)
731     {
732         case 4:
733             switch (swap)
734             {
735                 case ADDR_SWAP_ALT:
736                     SwapComps( 0, 2, pInfo );
737                     break;    // BGRA
738                 case ADDR_SWAP_STD_REV:
739                     SwapComps( 0, 3, pInfo );
740                     SwapComps( 1, 2, pInfo );
741                     break;    // ABGR
742                 case ADDR_SWAP_ALT_REV:
743                     SwapComps( 0, 3, pInfo );
744                     SwapComps( 0, 2, pInfo );
745                     SwapComps( 0, 1, pInfo );
746                     break;    // ARGB
747                 default:
748                     break;
749             }
750             break;
751         case 3:
752             switch (swap)
753             {
754                 case ADDR_SWAP_ALT_REV:
755                     SwapComps( 0, 3, pInfo );
756                     SwapComps( 0, 2, pInfo );
757                     break;    // AGR
758                 case ADDR_SWAP_STD_REV:
759                     SwapComps( 0, 2, pInfo );
760                     break;    // BGR
761                 case ADDR_SWAP_ALT:
762                     SwapComps( 2, 3, pInfo );
763                     break;    // RGA
764                 default:
765                     break;    // RGB
766             }
767             break;
768         case 2:
769             switch (swap)
770             {
771                 case ADDR_SWAP_ALT_REV:
772                     SwapComps( 0, 1, pInfo );
773                     SwapComps( 1, 3, pInfo );
774                     break;    // AR
775                 case ADDR_SWAP_STD_REV:
776                     SwapComps( 0, 1, pInfo );
777                     break;    // GR
778                 case ADDR_SWAP_ALT:
779                     SwapComps( 1, 3, pInfo );
780                     break;    // RA
781                 default:
782                     break;    // RG
783             }
784             break;
785         case 1:
786             switch (swap)
787             {
788                 case ADDR_SWAP_ALT_REV:
789                     SwapComps( 0, 3, pInfo );
790                     break;    // A
791                 case ADDR_SWAP_STD_REV:
792                     SwapComps( 0, 2, pInfo );
793                     break;    // B
794                 case ADDR_SWAP_ALT:
795                     SwapComps( 0, 1, pInfo );
796                     break;    // G
797                 default:
798                     break;    // R
799             }
800             break;
801     }
802 }
803 
804 /**
805 ****************************************************************************************************
806 *   ElemLib::GetCompSwap
807 *
808 *   @brief
809 *       Get components swapped for color surface
810 *
811 *   @return
812 *       N/A
813 *
814 ****************************************************************************************************
815 */
SwapComps(UINT_32 c0,UINT_32 c1,PixelFormatInfo * pInfo)816 VOID ElemLib::SwapComps(
817     UINT_32          c0,     ///< [in] component index 0
818     UINT_32          c1,     ///< [in] component index 1
819     PixelFormatInfo* pInfo)  ///< [in,out] output per component info
820 {
821     UINT_32 start;
822     UINT_32 bits;
823 
824     start = pInfo->compStart[c0];
825     pInfo->compStart[c0] = pInfo->compStart[c1];
826     pInfo->compStart[c1] = start;
827 
828     bits  = pInfo->compBit[c0];
829     pInfo->compBit[c0] = pInfo->compBit[c1];
830     pInfo->compBit[c1] = bits;
831 }
832 
833 /**
834 ****************************************************************************************************
835 *   ElemLib::PixGetColorCompInfo
836 *
837 *   @brief
838 *       Get per component info for color surface
839 *
840 *   @return
841 *       N/A
842 *
843 ****************************************************************************************************
844 */
PixGetColorCompInfo(AddrColorFormat format,AddrSurfaceNumber number,AddrSurfaceSwap swap,PixelFormatInfo * pInfo) const845 VOID ElemLib::PixGetColorCompInfo(
846     AddrColorFormat   format, ///< [in] surface format, read from register
847     AddrSurfaceNumber number, ///< [in] pixel number type
848     AddrSurfaceSwap   swap,   ///< [in] component swap mode
849     PixelFormatInfo*  pInfo   ///< [out] output per component info
850     ) const
851 {
852     // 1. Get componet bits
853     switch (format)
854     {
855         case ADDR_COLOR_8:
856             GetCompBits(8, 0, 0, 0, pInfo);
857             break;
858         case ADDR_COLOR_1_5_5_5:
859             GetCompBits(5, 5, 5, 1, pInfo);
860             break;
861         case ADDR_COLOR_5_6_5:
862             GetCompBits(8, 6, 5, 0, pInfo);
863             break;
864         case ADDR_COLOR_6_5_5:
865             GetCompBits(5, 5, 6, 0, pInfo);
866             break;
867         case ADDR_COLOR_8_8:
868             GetCompBits(8, 8, 0, 0, pInfo);
869             break;
870         case ADDR_COLOR_4_4_4_4:
871             GetCompBits(4, 4, 4, 4, pInfo);
872             break;
873         case ADDR_COLOR_16:
874             GetCompBits(16, 0, 0, 0, pInfo);
875             break;
876         case ADDR_COLOR_8_8_8_8:
877             GetCompBits(8, 8, 8, 8, pInfo);
878             break;
879         case ADDR_COLOR_2_10_10_10:
880             GetCompBits(10, 10, 10, 2, pInfo);
881             break;
882         case ADDR_COLOR_10_11_11:
883             GetCompBits(11, 11, 10, 0, pInfo);
884             break;
885         case ADDR_COLOR_11_11_10:
886             GetCompBits(10, 11, 11, 0, pInfo);
887             break;
888         case ADDR_COLOR_16_16:
889             GetCompBits(16, 16, 0, 0, pInfo);
890             break;
891         case ADDR_COLOR_16_16_16_16:
892             GetCompBits(16, 16, 16, 16, pInfo);
893             break;
894         case ADDR_COLOR_16_FLOAT:
895             GetCompBits(16, 0, 0, 0, pInfo);
896             break;
897         case ADDR_COLOR_16_16_FLOAT:
898             GetCompBits(16, 16, 0, 0, pInfo);
899             break;
900         case ADDR_COLOR_32_FLOAT:
901             GetCompBits(32, 0, 0, 0, pInfo);
902             break;
903         case ADDR_COLOR_32_32_FLOAT:
904             GetCompBits(32, 32, 0, 0, pInfo);
905             break;
906         case ADDR_COLOR_16_16_16_16_FLOAT:
907             GetCompBits(16, 16, 16, 16, pInfo);
908             break;
909         case ADDR_COLOR_32_32_32_32_FLOAT:
910             GetCompBits(32, 32, 32, 32, pInfo);
911             break;
912 
913         case ADDR_COLOR_32:
914             GetCompBits(32, 0, 0, 0, pInfo);
915             break;
916         case ADDR_COLOR_32_32:
917             GetCompBits(32, 32, 0, 0, pInfo);
918             break;
919         case ADDR_COLOR_32_32_32_32:
920             GetCompBits(32, 32, 32, 32, pInfo);
921             break;
922         case ADDR_COLOR_10_10_10_2:
923             GetCompBits(2, 10, 10, 10, pInfo);
924             break;
925         case ADDR_COLOR_10_11_11_FLOAT:
926             GetCompBits(11, 11, 10, 0, pInfo);
927             break;
928         case ADDR_COLOR_11_11_10_FLOAT:
929             GetCompBits(10, 11, 11, 0, pInfo);
930             break;
931         case ADDR_COLOR_5_5_5_1:
932             GetCompBits(1, 5, 5, 5, pInfo);
933             break;
934         case ADDR_COLOR_3_3_2:
935             GetCompBits(2, 3, 3, 0, pInfo);
936             break;
937         case ADDR_COLOR_4_4:
938             GetCompBits(4, 4, 0, 0, pInfo);
939             break;
940         case ADDR_COLOR_8_24:
941         case ADDR_COLOR_8_24_FLOAT:  // same bit count, fall through
942             GetCompBits(24, 8, 0, 0, pInfo);
943             break;
944         case ADDR_COLOR_24_8:
945         case ADDR_COLOR_24_8_FLOAT:  // same bit count, fall through
946             GetCompBits(8, 24, 0, 0, pInfo);
947             break;
948         case ADDR_COLOR_X24_8_32_FLOAT:
949             GetCompBits(32, 8, 0, 0, pInfo);
950             break;
951 
952         case ADDR_COLOR_INVALID:
953             GetCompBits(0, 0, 0, 0, pInfo);
954             break;
955         default:
956             ADDR_ASSERT(0);
957             GetCompBits(0, 0, 0, 0, pInfo);
958             break;
959     }
960 
961     // 2. Get component number type
962 
963     GetCompType(format, number, pInfo);
964 
965     // 3. Swap components if needed
966 
967     GetCompSwap(swap, pInfo);
968 }
969 
970 /**
971 ****************************************************************************************************
972 *   ElemLib::PixGetDepthCompInfo
973 *
974 *   @brief
975 *       Get per component info for depth surface
976 *
977 *   @return
978 *       N/A
979 *
980 ****************************************************************************************************
981 */
PixGetDepthCompInfo(AddrDepthFormat format,PixelFormatInfo * pInfo) const982 VOID ElemLib::PixGetDepthCompInfo(
983     AddrDepthFormat  format,     ///< [in] surface format, read from register
984     PixelFormatInfo* pInfo       ///< [out] output per component bits and type
985     ) const
986 {
987     if (m_depthPlanarType == ADDR_DEPTH_PLANAR_R800)
988     {
989         if (format == ADDR_DEPTH_8_24_FLOAT)
990         {
991             format = ADDR_DEPTH_X24_8_32_FLOAT; // Use this format to represent R800's D24FS8
992         }
993 
994         if (format == ADDR_DEPTH_X8_24_FLOAT)
995         {
996             format = ADDR_DEPTH_32_FLOAT;
997         }
998     }
999 
1000     switch (format)
1001     {
1002         case ADDR_DEPTH_16:
1003             GetCompBits(16, 0, 0, 0, pInfo);
1004             break;
1005         case ADDR_DEPTH_8_24:
1006         case ADDR_DEPTH_8_24_FLOAT:      // similar format, fall through
1007             GetCompBits(24, 8, 0, 0, pInfo);
1008             break;
1009         case ADDR_DEPTH_X8_24:
1010         case ADDR_DEPTH_X8_24_FLOAT:     // similar format, fall through
1011             GetCompBits(24, 0, 0, 0, pInfo);
1012             break;
1013         case ADDR_DEPTH_32_FLOAT:
1014             GetCompBits(32, 0, 0, 0, pInfo);
1015             break;
1016         case ADDR_DEPTH_X24_8_32_FLOAT:
1017             GetCompBits(32, 8, 0, 0, pInfo);
1018             break;
1019         case ADDR_DEPTH_INVALID:
1020             GetCompBits(0, 0, 0, 0, pInfo);
1021             break;
1022         default:
1023             ADDR_ASSERT(0);
1024             GetCompBits(0, 0, 0, 0, pInfo);
1025             break;
1026     }
1027 
1028     switch (format)
1029     {
1030         case ADDR_DEPTH_16:
1031             pInfo->numType [0] = ADDR_UNORM_R6XX;
1032             pInfo->numType [1] = ADDR_ZERO;
1033             break;
1034         case ADDR_DEPTH_8_24:
1035             pInfo->numType [0] = ADDR_UNORM_R6XXDB;
1036             pInfo->numType [1] = ADDR_UINT_BITS;
1037             break;
1038         case ADDR_DEPTH_8_24_FLOAT:
1039             pInfo->numType [0] = ADDR_U4FLOATC;
1040             pInfo->numType [1] = ADDR_UINT_BITS;
1041             break;
1042         case ADDR_DEPTH_X8_24:
1043             pInfo->numType [0] = ADDR_UNORM_R6XXDB;
1044             pInfo->numType [1] = ADDR_ZERO;
1045             break;
1046         case ADDR_DEPTH_X8_24_FLOAT:
1047             pInfo->numType [0] = ADDR_U4FLOATC;
1048             pInfo->numType [1] = ADDR_ZERO;
1049             break;
1050         case ADDR_DEPTH_32_FLOAT:
1051             pInfo->numType [0] = ADDR_S8FLOAT32;
1052             pInfo->numType [1] = ADDR_ZERO;
1053             break;
1054         case ADDR_DEPTH_X24_8_32_FLOAT:
1055             pInfo->numType [0] = ADDR_S8FLOAT32;
1056             pInfo->numType [1] = ADDR_UINT_BITS;
1057             break;
1058         default:
1059             pInfo->numType [0] = ADDR_NO_NUMBER;
1060             pInfo->numType [1] = ADDR_NO_NUMBER;
1061             break;
1062     }
1063 
1064     pInfo->numType [2] = ADDR_NO_NUMBER;
1065     pInfo->numType [3] = ADDR_NO_NUMBER;
1066 }
1067 
1068 /**
1069 ****************************************************************************************************
1070 *   ElemLib::PixGetExportNorm
1071 *
1072 *   @brief
1073 *       Check if fp16 export norm can be enabled.
1074 *
1075 *   @return
1076 *       TRUE if this can be enabled.
1077 *
1078 ****************************************************************************************************
1079 */
PixGetExportNorm(AddrColorFormat colorFmt,AddrSurfaceNumber numberFmt,AddrSurfaceSwap swap) const1080 BOOL_32 ElemLib::PixGetExportNorm(
1081     AddrColorFormat     colorFmt,       ///< [in] surface format, read from register
1082     AddrSurfaceNumber   numberFmt,      ///< [in] pixel number type
1083     AddrSurfaceSwap     swap            ///< [in] components swap type
1084     ) const
1085 {
1086     BOOL_32 enabled = TRUE;
1087 
1088     PixelFormatInfo formatInfo;
1089 
1090     PixGetColorCompInfo(colorFmt, numberFmt, swap, &formatInfo);
1091 
1092     for (UINT_32 c = 0; c < 4; c++)
1093     {
1094         if (m_fp16ExportNorm)
1095         {
1096             if (((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED)) &&
1097                 (formatInfo.numType[c] != ADDR_U4FLOATC)    &&
1098                 (formatInfo.numType[c] != ADDR_S5FLOAT)     &&
1099                 (formatInfo.numType[c] != ADDR_S5FLOATM)    &&
1100                 (formatInfo.numType[c] != ADDR_U5FLOAT)     &&
1101                 (formatInfo.numType[c] != ADDR_U3FLOATM))
1102             {
1103                 enabled = FALSE;
1104                 break;
1105             }
1106         }
1107         else
1108         {
1109             if ((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED))
1110             {
1111                 enabled = FALSE;
1112                 break;
1113             }
1114         }
1115     }
1116 
1117     return enabled;
1118 }
1119 
1120 /**
1121 ****************************************************************************************************
1122 *   ElemLib::AdjustSurfaceInfo
1123 *
1124 *   @brief
1125 *       Adjust bpp/base pitch/width/height according to elemMode and expandX/Y
1126 *
1127 *   @return
1128 *       N/A
1129 ****************************************************************************************************
1130 */
AdjustSurfaceInfo(ElemMode elemMode,UINT_32 expandX,UINT_32 expandY,UINT_32 * pBpp,UINT_32 * pBasePitch,UINT_32 * pWidth,UINT_32 * pHeight)1131 VOID ElemLib::AdjustSurfaceInfo(
1132     ElemMode        elemMode,       ///< [in] element mode
1133     UINT_32         expandX,        ///< [in] decompression expansion factor in X
1134     UINT_32         expandY,        ///< [in] decompression expansion factor in Y
1135     UINT_32*        pBpp,           ///< [in,out] bpp
1136     UINT_32*        pBasePitch,     ///< [in,out] base pitch
1137     UINT_32*        pWidth,         ///< [in,out] width
1138     UINT_32*        pHeight)        ///< [in,out] height
1139 {
1140     UINT_32 packedBits;
1141     UINT_32 basePitch;
1142     UINT_32 width;
1143     UINT_32 height;
1144     UINT_32 bpp;
1145     BOOL_32 bBCnFormat = FALSE;
1146 
1147     ADDR_ASSERT(pBpp != NULL);
1148     ADDR_ASSERT(pWidth != NULL && pHeight != NULL && pBasePitch != NULL);
1149 
1150     if (pBpp)
1151     {
1152         bpp = *pBpp;
1153 
1154         switch (elemMode)
1155         {
1156             case ADDR_EXPANDED:
1157                 packedBits = bpp / expandX / expandY;
1158                 break;
1159             case ADDR_PACKED_STD: // Different bit order
1160             case ADDR_PACKED_REV:
1161                 packedBits = bpp * expandX * expandY;
1162                 break;
1163             case ADDR_PACKED_GBGR:
1164             case ADDR_PACKED_BGRG:
1165                 packedBits = bpp; // 32-bit packed ==> 2 32-bit result
1166                 break;
1167             case ADDR_PACKED_BC1: // Fall through
1168             case ADDR_PACKED_BC4:
1169                 packedBits = 64;
1170                 bBCnFormat = TRUE;
1171                 break;
1172             case ADDR_PACKED_BC2: // Fall through
1173             case ADDR_PACKED_BC3: // Fall through
1174             case ADDR_PACKED_BC5: // Fall through
1175                 bBCnFormat = TRUE;
1176                 // fall through
1177             case ADDR_PACKED_ASTC:
1178             case ADDR_PACKED_ETC2_128BPP:
1179                 packedBits = 128;
1180                 break;
1181             case ADDR_PACKED_ETC2_64BPP:
1182                 packedBits = 64;
1183                 break;
1184             case ADDR_ROUND_BY_HALF:  // Fall through
1185             case ADDR_ROUND_TRUNCATE: // Fall through
1186             case ADDR_ROUND_DITHER:   // Fall through
1187             case ADDR_UNCOMPRESSED:
1188                 packedBits = bpp;
1189                 break;
1190             default:
1191                 packedBits = bpp;
1192                 ADDR_ASSERT_ALWAYS();
1193                 break;
1194         }
1195 
1196         *pBpp = packedBits;
1197     }
1198 
1199     if (pWidth && pHeight && pBasePitch)
1200     {
1201         basePitch = *pBasePitch;
1202         width     = *pWidth;
1203         height    = *pHeight;
1204 
1205         if ((expandX > 1) || (expandY > 1))
1206         {
1207             if (elemMode == ADDR_EXPANDED)
1208             {
1209                 basePitch *= expandX;
1210                 width     *= expandX;
1211                 height    *= expandY;
1212             }
1213             else
1214             {
1215                 // Evergreen family workaround
1216                 if (bBCnFormat && (m_pAddrLib->GetChipFamily() == ADDR_CHIP_FAMILY_R8XX))
1217                 {
1218                     // For BCn we now pad it to POW2 at the beginning so it is safe to
1219                     // divide by 4 directly
1220                     basePitch = basePitch / expandX;
1221                     width     = width  / expandX;
1222                     height    = height / expandY;
1223 #if DEBUG
1224                     width     = (width == 0) ? 1 : width;
1225                     height    = (height == 0) ? 1 : height;
1226 
1227                     if ((*pWidth > PowTwoAlign(width, 8) * expandX) ||
1228                         (*pHeight > PowTwoAlign(height, 8) * expandY)) // 8 is 1D tiling alignment
1229                     {
1230                         // if this assertion is hit we may have issues if app samples
1231                         // rightmost/bottommost pixels
1232                         ADDR_ASSERT_ALWAYS();
1233                     }
1234 #endif
1235                 }
1236                 else // Not BCn format we still keep old way (FMT_1? No real test yet)
1237                 {
1238                     basePitch = (basePitch + expandX - 1) / expandX;
1239                     width     = (width + expandX - 1) / expandX;
1240                     height    = (height + expandY - 1) / expandY;
1241                 }
1242             }
1243 
1244             *pBasePitch = basePitch; // 0 is legal value for base pitch.
1245             *pWidth     = (width == 0) ? 1 : width;
1246             *pHeight    = (height == 0) ? 1 : height;
1247         } //if (pWidth && pHeight && pBasePitch)
1248     }
1249 }
1250 
1251 /**
1252 ****************************************************************************************************
1253 *   ElemLib::RestoreSurfaceInfo
1254 *
1255 *   @brief
1256 *       Reverse operation of AdjustSurfaceInfo
1257 *
1258 *   @return
1259 *       N/A
1260 ****************************************************************************************************
1261 */
RestoreSurfaceInfo(ElemMode elemMode,UINT_32 expandX,UINT_32 expandY,UINT_32 * pBpp,UINT_32 * pWidth,UINT_32 * pHeight)1262 VOID ElemLib::RestoreSurfaceInfo(
1263     ElemMode        elemMode,       ///< [in] element mode
1264     UINT_32         expandX,        ///< [in] decompression expansion factor in X
1265     UINT_32         expandY,        ///< [out] decompression expansion factor in Y
1266     UINT_32*        pBpp,           ///< [in,out] bpp
1267     UINT_32*        pWidth,         ///< [in,out] width
1268     UINT_32*        pHeight)        ///< [in,out] height
1269 {
1270     UINT_32 originalBits;
1271     UINT_32 width;
1272     UINT_32 height;
1273     UINT_32 bpp;
1274 
1275     BOOL_32 bBCnFormat = FALSE;
1276 
1277     ADDR_ASSERT(pBpp != NULL);
1278     ADDR_ASSERT(pWidth != NULL && pHeight != NULL);
1279 
1280     if (pBpp)
1281     {
1282         bpp = *pBpp;
1283 
1284         switch (elemMode)
1285         {
1286         case ADDR_EXPANDED:
1287             originalBits = bpp * expandX * expandY;
1288             break;
1289         case ADDR_PACKED_STD: // Different bit order
1290         case ADDR_PACKED_REV:
1291             originalBits = bpp / expandX / expandY;
1292             break;
1293         case ADDR_PACKED_GBGR:
1294         case ADDR_PACKED_BGRG:
1295             originalBits = bpp; // 32-bit packed ==> 2 32-bit result
1296             break;
1297         case ADDR_PACKED_BC1: // Fall through
1298         case ADDR_PACKED_BC4:
1299             originalBits = 64;
1300             bBCnFormat = TRUE;
1301             break;
1302         case ADDR_PACKED_BC2: // Fall through
1303         case ADDR_PACKED_BC3: // Fall through
1304         case ADDR_PACKED_BC5:
1305             bBCnFormat = TRUE;
1306             // fall through
1307         case ADDR_PACKED_ASTC:
1308         case ADDR_PACKED_ETC2_128BPP:
1309             originalBits = 128;
1310             break;
1311         case ADDR_PACKED_ETC2_64BPP:
1312             originalBits = 64;
1313             break;
1314         case ADDR_ROUND_BY_HALF:  // Fall through
1315         case ADDR_ROUND_TRUNCATE: // Fall through
1316         case ADDR_ROUND_DITHER:   // Fall through
1317         case ADDR_UNCOMPRESSED:
1318             originalBits = bpp;
1319             break;
1320         default:
1321             originalBits = bpp;
1322             ADDR_ASSERT_ALWAYS();
1323             break;
1324         }
1325 
1326         *pBpp = originalBits;
1327     }
1328 
1329     if (pWidth && pHeight)
1330     {
1331         width    = *pWidth;
1332         height   = *pHeight;
1333 
1334         if ((expandX > 1) || (expandY > 1))
1335         {
1336             if (elemMode == ADDR_EXPANDED)
1337             {
1338                 width /= expandX;
1339                 height /= expandY;
1340             }
1341             else
1342             {
1343                 width *= expandX;
1344                 height *= expandY;
1345             }
1346         }
1347 
1348         *pWidth  = (width == 0) ? 1 : width;
1349         *pHeight = (height == 0) ? 1 : height;
1350     }
1351 }
1352 
1353 /**
1354 ****************************************************************************************************
1355 *   ElemLib::GetBitsPerPixel
1356 *
1357 *   @brief
1358 *       Compute the total bits per element according to a format
1359 *       code. For compressed formats, this is not the same as
1360 *       the number of bits per decompressed element.
1361 *
1362 *   @return
1363 *       Bits per pixel
1364 ****************************************************************************************************
1365 */
GetBitsPerPixel(AddrFormat format,ElemMode * pElemMode,UINT_32 * pExpandX,UINT_32 * pExpandY,UINT_32 * pUnusedBits)1366 UINT_32 ElemLib::GetBitsPerPixel(
1367     AddrFormat          format,         ///< [in] surface format code
1368     ElemMode*           pElemMode,      ///< [out] element mode
1369     UINT_32*            pExpandX,       ///< [out] decompression expansion factor in X
1370     UINT_32*            pExpandY,       ///< [out] decompression expansion factor in Y
1371     UINT_32*            pUnusedBits)    ///< [out] bits unused
1372 {
1373     UINT_32 bpp;
1374     UINT_32 expandX = 1;
1375     UINT_32 expandY = 1;
1376     UINT_32 bitUnused = 0;
1377     ElemMode elemMode = ADDR_UNCOMPRESSED; // default value
1378 
1379     switch (format)
1380     {
1381         case ADDR_FMT_8:
1382             bpp = 8;
1383             break;
1384         case ADDR_FMT_1_5_5_5:
1385         case ADDR_FMT_5_6_5:
1386         case ADDR_FMT_6_5_5:
1387         case ADDR_FMT_8_8:
1388         case ADDR_FMT_4_4_4_4:
1389         case ADDR_FMT_16:
1390             bpp = 16;
1391             break;
1392         case ADDR_FMT_GB_GR:
1393             elemMode = ADDR_PACKED_GBGR;
1394             bpp      = m_configFlags.use32bppFor422Fmt ? 32 : 16;
1395             expandX  = m_configFlags.use32bppFor422Fmt ? 2 : 1;
1396             break;
1397         case ADDR_FMT_BG_RG:
1398             elemMode = ADDR_PACKED_BGRG;
1399             bpp      = m_configFlags.use32bppFor422Fmt ? 32 : 16;
1400             expandX  = m_configFlags.use32bppFor422Fmt ? 2 : 1;
1401             break;
1402         case ADDR_FMT_8_8_8_8:
1403         case ADDR_FMT_2_10_10_10:
1404         case ADDR_FMT_10_11_11:
1405         case ADDR_FMT_11_11_10:
1406         case ADDR_FMT_16_16:
1407         case ADDR_FMT_32:
1408         case ADDR_FMT_24_8:
1409             bpp = 32;
1410             break;
1411         case ADDR_FMT_16_16_16_16:
1412         case ADDR_FMT_32_32:
1413         case ADDR_FMT_CTX1:
1414             bpp = 64;
1415             break;
1416         case ADDR_FMT_32_32_32_32:
1417             bpp = 128;
1418             break;
1419         case ADDR_FMT_INVALID:
1420             bpp = 0;
1421             break;
1422         case ADDR_FMT_1_REVERSED:
1423             elemMode = ADDR_PACKED_REV;
1424             expandX = 8;
1425             bpp = 1;
1426             break;
1427         case ADDR_FMT_1:
1428             elemMode = ADDR_PACKED_STD;
1429             expandX = 8;
1430             bpp = 1;
1431             break;
1432         case ADDR_FMT_4_4:
1433         case ADDR_FMT_3_3_2:
1434             bpp = 8;
1435             break;
1436         case ADDR_FMT_5_5_5_1:
1437             bpp = 16;
1438             break;
1439         case ADDR_FMT_32_AS_8:
1440         case ADDR_FMT_32_AS_8_8:
1441         case ADDR_FMT_8_24:
1442         case ADDR_FMT_10_10_10_2:
1443         case ADDR_FMT_5_9_9_9_SHAREDEXP:
1444             bpp = 32;
1445             break;
1446         case ADDR_FMT_X24_8_32_FLOAT:
1447             bpp = 64;
1448             bitUnused = 24;
1449             break;
1450         case ADDR_FMT_8_8_8:
1451             elemMode = ADDR_EXPANDED;
1452             bpp = 24;//@@ 8;      // read 3 elements per pixel
1453             expandX = 3;
1454             break;
1455         case ADDR_FMT_16_16_16:
1456             elemMode = ADDR_EXPANDED;
1457             bpp = 48;//@@ 16;      // read 3 elements per pixel
1458             expandX = 3;
1459             break;
1460         case ADDR_FMT_32_32_32:
1461             elemMode = ADDR_EXPANDED;
1462             expandX = 3;
1463             bpp = 96;//@@ 32;      // read 3 elements per pixel
1464             break;
1465         case ADDR_FMT_BC1:
1466             elemMode = ADDR_PACKED_BC1;
1467             expandX = 4;
1468             expandY = 4;
1469             bpp = 64;
1470             break;
1471         case ADDR_FMT_BC4:
1472             elemMode = ADDR_PACKED_BC4;
1473             expandX = 4;
1474             expandY = 4;
1475             bpp = 64;
1476             break;
1477         case ADDR_FMT_BC2:
1478             elemMode = ADDR_PACKED_BC2;
1479             expandX = 4;
1480             expandY = 4;
1481             bpp = 128;
1482             break;
1483         case ADDR_FMT_BC3:
1484             elemMode = ADDR_PACKED_BC3;
1485             expandX = 4;
1486             expandY = 4;
1487             bpp = 128;
1488             break;
1489         case ADDR_FMT_BC5:
1490         case ADDR_FMT_BC6: // reuse ADDR_PACKED_BC5
1491         case ADDR_FMT_BC7: // reuse ADDR_PACKED_BC5
1492             elemMode = ADDR_PACKED_BC5;
1493             expandX = 4;
1494             expandY = 4;
1495             bpp = 128;
1496             break;
1497 
1498         case ADDR_FMT_ETC2_64BPP:
1499             elemMode = ADDR_PACKED_ETC2_64BPP;
1500             expandX  = 4;
1501             expandY  = 4;
1502             bpp      = 64;
1503             break;
1504 
1505         case ADDR_FMT_ETC2_128BPP:
1506             elemMode = ADDR_PACKED_ETC2_128BPP;
1507             expandX  = 4;
1508             expandY  = 4;
1509             bpp      = 128;
1510             break;
1511 
1512         case ADDR_FMT_ASTC_4x4:
1513             elemMode = ADDR_PACKED_ASTC;
1514             expandX  = 4;
1515             expandY  = 4;
1516             bpp      = 128;
1517             break;
1518 
1519         case ADDR_FMT_ASTC_5x4:
1520             elemMode = ADDR_PACKED_ASTC;
1521             expandX  = 5;
1522             expandY  = 4;
1523             bpp      = 128;
1524             break;
1525 
1526         case ADDR_FMT_ASTC_5x5:
1527             elemMode = ADDR_PACKED_ASTC;
1528             expandX  = 5;
1529             expandY  = 5;
1530             bpp      = 128;
1531             break;
1532 
1533         case ADDR_FMT_ASTC_6x5:
1534             elemMode = ADDR_PACKED_ASTC;
1535             expandX  = 6;
1536             expandY  = 5;
1537             bpp      = 128;
1538             break;
1539 
1540         case ADDR_FMT_ASTC_6x6:
1541             elemMode = ADDR_PACKED_ASTC;
1542             expandX  = 6;
1543             expandY  = 6;
1544             bpp      = 128;
1545             break;
1546 
1547         case ADDR_FMT_ASTC_8x5:
1548             elemMode = ADDR_PACKED_ASTC;
1549             expandX  = 8;
1550             expandY  = 5;
1551             bpp      = 128;
1552             break;
1553 
1554         case ADDR_FMT_ASTC_8x6:
1555             elemMode = ADDR_PACKED_ASTC;
1556             expandX  = 8;
1557             expandY  = 6;
1558             bpp      = 128;
1559             break;
1560 
1561         case ADDR_FMT_ASTC_8x8:
1562             elemMode = ADDR_PACKED_ASTC;
1563             expandX  = 8;
1564             expandY  = 8;
1565             bpp      = 128;
1566             break;
1567 
1568         case ADDR_FMT_ASTC_10x5:
1569             elemMode = ADDR_PACKED_ASTC;
1570             expandX  = 10;
1571             expandY  = 5;
1572             bpp      = 128;
1573             break;
1574 
1575         case ADDR_FMT_ASTC_10x6:
1576             elemMode = ADDR_PACKED_ASTC;
1577             expandX  = 10;
1578             expandY  = 6;
1579             bpp      = 128;
1580             break;
1581 
1582         case ADDR_FMT_ASTC_10x8:
1583             elemMode = ADDR_PACKED_ASTC;
1584             expandX  = 10;
1585             expandY  = 8;
1586             bpp      = 128;
1587             break;
1588 
1589         case ADDR_FMT_ASTC_10x10:
1590             elemMode = ADDR_PACKED_ASTC;
1591             expandX  = 10;
1592             expandY  = 10;
1593             bpp      = 128;
1594             break;
1595 
1596         case ADDR_FMT_ASTC_12x10:
1597             elemMode = ADDR_PACKED_ASTC;
1598             expandX  = 12;
1599             expandY  = 10;
1600             bpp      = 128;
1601             break;
1602 
1603         case ADDR_FMT_ASTC_12x12:
1604             elemMode = ADDR_PACKED_ASTC;
1605             expandX  = 12;
1606             expandY  = 12;
1607             bpp      = 128;
1608             break;
1609 
1610         default:
1611             bpp = 0;
1612             ADDR_ASSERT_ALWAYS();
1613             break;
1614             // @@ or should this be an error?
1615     }
1616 
1617     SafeAssign(pExpandX, expandX);
1618     SafeAssign(pExpandY, expandY);
1619     SafeAssign(pUnusedBits, bitUnused);
1620     SafeAssign(reinterpret_cast<UINT_32*>(pElemMode), elemMode);
1621 
1622     return bpp;
1623 }
1624 
1625 /**
1626 ****************************************************************************************************
1627 *   ElemLib::GetCompBits
1628 *
1629 *   @brief
1630 *       Set each component's bit size and bit start. And set element mode and number type
1631 *
1632 *   @return
1633 *       N/A
1634 ****************************************************************************************************
1635 */
GetCompBits(UINT_32 c0,UINT_32 c1,UINT_32 c2,UINT_32 c3,PixelFormatInfo * pInfo,ElemMode elemMode)1636 VOID ElemLib::GetCompBits(
1637     UINT_32          c0,        ///< [in] bits of component 0
1638     UINT_32          c1,        ///< [in] bits of component 1
1639     UINT_32          c2,        ///< [in] bits of component 2
1640     UINT_32          c3,        ///< [in] bits of component 3
1641     PixelFormatInfo* pInfo,     ///< [out] per component info out
1642     ElemMode         elemMode)  ///< [in] element mode
1643 {
1644     pInfo->comps = 0;
1645 
1646     pInfo->compBit[0] = c0;
1647     pInfo->compBit[1] = c1;
1648     pInfo->compBit[2] = c2;
1649     pInfo->compBit[3] = c3;
1650 
1651     pInfo->compStart[0] = 0;
1652     pInfo->compStart[1] = c0;
1653     pInfo->compStart[2] = c0+c1;
1654     pInfo->compStart[3] = c0+c1+c2;
1655 
1656     pInfo->elemMode = elemMode;
1657     // still needed since component swap may depend on number of components
1658     for (INT i=0; i<4; i++)
1659     {
1660         if (pInfo->compBit[i] == 0)
1661         {
1662             pInfo->compStart[i]  = 0;       // all null components start at bit 0
1663             pInfo->numType[i] = ADDR_NO_NUMBER; // and have no number type
1664         }
1665         else
1666         {
1667             pInfo->comps++;
1668         }
1669     }
1670 }
1671 
1672 /**
1673 ****************************************************************************************************
1674 *   ElemLib::GetCompBits
1675 *
1676 *   @brief
1677 *       Set the clear color (or clear depth/stencil) for a surface
1678 *
1679 *   @note
1680 *       If clearColor is zero, a default clear value is used in place of comps[4].
1681 *       If float32 is set, full precision is used, else the mantissa is reduced to 12-bits
1682 *
1683 *   @return
1684 *       N/A
1685 ****************************************************************************************************
1686 */
SetClearComps(ADDR_FLT_32 comps[4],BOOL_32 clearColor,BOOL_32 float32)1687 VOID ElemLib::SetClearComps(
1688     ADDR_FLT_32 comps[4],   ///< [in,out] components
1689     BOOL_32 clearColor,     ///< [in] TRUE if clear color is set (CLEAR_COLOR)
1690     BOOL_32 float32)        ///< [in] TRUE if float32 component (BLEND_FLOAT32)
1691 {
1692     INT_32 i;
1693 
1694     // Use default clearvalues if clearColor is disabled
1695     if (clearColor == FALSE)
1696     {
1697         for (i=0; i<3; i++)
1698         {
1699             comps[i].f = 0.0;
1700         }
1701         comps[3].f = 1.0;
1702     }
1703 
1704     // Otherwise use the (modified) clear value
1705     else
1706     {
1707         for (i=0; i<4; i++)
1708         {   // If full precision, use clear value unchanged
1709             if (float32)
1710             {
1711                 // Do nothing
1712                 //comps[i] = comps[i];
1713             }
1714             // Else if it is a NaN, use the standard NaN value
1715             else if ((comps[i].u & 0x7FFFFFFF) > 0x7F800000)
1716             {
1717                 comps[i].u = 0xFFC00000;
1718             }
1719             // Else reduce the mantissa precision
1720             else
1721             {
1722                 comps[i].u = comps[i].u & 0xFFFFF000;
1723             }
1724         }
1725     }
1726 }
1727 
1728 /**
1729 ****************************************************************************************************
1730 *   ElemLib::IsBlockCompressed
1731 *
1732 *   @brief
1733 *       TRUE if this is block compressed format
1734 *
1735 *   @note
1736 *
1737 *   @return
1738 *       BOOL_32
1739 ****************************************************************************************************
1740 */
IsBlockCompressed(AddrFormat format)1741 BOOL_32 ElemLib::IsBlockCompressed(
1742     AddrFormat format)  ///< [in] Format
1743 {
1744     return (((format >= ADDR_FMT_BC1) && (format <= ADDR_FMT_BC7)) ||
1745             ((format >= ADDR_FMT_ASTC_4x4) && (format <= ADDR_FMT_ETC2_128BPP)));
1746 }
1747 
1748 
1749 /**
1750 ****************************************************************************************************
1751 *   ElemLib::IsCompressed
1752 *
1753 *   @brief
1754 *       TRUE if this is block compressed format or 1 bit format
1755 *
1756 *   @note
1757 *
1758 *   @return
1759 *       BOOL_32
1760 ****************************************************************************************************
1761 */
IsCompressed(AddrFormat format)1762 BOOL_32 ElemLib::IsCompressed(
1763     AddrFormat format)  ///< [in] Format
1764 {
1765     return IsBlockCompressed(format) || format == ADDR_FMT_BC1 || format == ADDR_FMT_BC7;
1766 }
1767 
1768 /**
1769 ****************************************************************************************************
1770 *   ElemLib::IsExpand3x
1771 *
1772 *   @brief
1773 *       TRUE if this is 3x expand format
1774 *
1775 *   @note
1776 *
1777 *   @return
1778 *       BOOL_32
1779 ****************************************************************************************************
1780 */
IsExpand3x(AddrFormat format)1781 BOOL_32 ElemLib::IsExpand3x(
1782     AddrFormat format)  ///< [in] Format
1783 {
1784     BOOL_32 is3x = FALSE;
1785 
1786     switch (format)
1787     {
1788         case ADDR_FMT_8_8_8:
1789         case ADDR_FMT_16_16_16:
1790         case ADDR_FMT_32_32_32:
1791             is3x = TRUE;
1792             break;
1793         default:
1794             break;
1795     }
1796 
1797     return is3x;
1798 }
1799 
1800 /**
1801 ****************************************************************************************************
1802 *   ElemLib::IsMacroPixelPacked
1803 *
1804 *   @brief
1805 *       TRUE if this is a macro-pixel-packed format.
1806 *
1807 *   @note
1808 *
1809 *   @return
1810 *       BOOL_32
1811 ****************************************************************************************************
1812 */
IsMacroPixelPacked(AddrFormat format)1813 BOOL_32 ElemLib::IsMacroPixelPacked(
1814     AddrFormat format)  ///< [in] Format
1815 {
1816     BOOL_32 isMacroPixelPacked = FALSE;
1817 
1818     switch (format)
1819     {
1820         case ADDR_FMT_BG_RG:
1821         case ADDR_FMT_GB_GR:
1822             isMacroPixelPacked = TRUE;
1823             break;
1824         default:
1825             break;
1826     }
1827 
1828     return isMacroPixelPacked;
1829 }
1830 
1831 }
1832