1 //-------------------------------------------------------------------------------------
2 // BC.h
3 //
4 // Block-compression (BC) functionality
5 //
6 // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
7 // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
9 // PARTICULAR PURPOSE.
10 //
11 // Copyright (c) Microsoft Corporation. All rights reserved.
12 //
13 // http://go.microsoft.com/fwlink/?LinkId=248926
14 //-------------------------------------------------------------------------------------
15 
16 #pragma once
17 
18 #include <assert.h>
19 
20 #ifdef USE_XNAMATH
21 #include <xnamath.h>
22 #else
23 #include <directxmath.h>
24 #include <directxpackedvector.h>
25 #endif
26 
27 #include <float.h>
28 
29 namespace DirectX
30 {
31 
32 #ifndef USE_XNAMATH
33 typedef PackedVector::HALF HALF;
34 typedef PackedVector::XMHALF4 XMHALF4;
35 typedef PackedVector::XMU565 XMU565;
36 #endif
37 
38 //-------------------------------------------------------------------------------------
39 // Constants
40 //-------------------------------------------------------------------------------------
41 
42 const uint16_t F16S_MASK    = 0x8000;   // f16 sign mask
43 const uint16_t F16EM_MASK   = 0x7fff;   // f16 exp & mantissa mask
44 const uint16_t F16MAX       = 0x7bff;   // MAXFLT bit pattern for XMHALF
45 
46 #define SIGN_EXTEND(x,nb) ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x))
47 
48 // Because these are used in SAL annotations, they need to remain macros rather than const values
49 #define NUM_PIXELS_PER_BLOCK 16
50 #define BC6H_MAX_REGIONS 2
51 #define BC6H_MAX_INDICES 16
52 #define BC7_MAX_REGIONS 3
53 #define BC7_MAX_INDICES 16
54 
55 const size_t BC6H_NUM_CHANNELS = 3;
56 const size_t BC6H_MAX_SHAPES = 32;
57 
58 const size_t BC7_NUM_CHANNELS = 4;
59 const size_t BC7_MAX_SHAPES = 64;
60 
61 const int32_t BC67_WEIGHT_MAX = 64;
62 const uint32_t BC67_WEIGHT_SHIFT = 6;
63 const int32_t BC67_WEIGHT_ROUND = 32;
64 
65 extern const int g_aWeights2[4];
66 extern const int g_aWeights3[8];
67 extern const int g_aWeights4[16];
68 
69 enum BC_FLAGS
70 {
71     BC_FLAGS_NONE       = 0x0,
72     BC_FLAGS_DITHER_RGB = 0x10000,  // Enables dithering for RGB colors for BC1-3
73     BC_FLAGS_DITHER_A   = 0x20000,  // Enables dithering for Alpha channel for BC1-3
74     BC_FLAGS_UNIFORM    = 0x40000,  // By default, uses perceptual weighting for BC1-3; this flag makes it a uniform weighting
75     BC_FLAGS_USE_3SUBSETS = 0x80000,// By default, BC7 skips mode 0 & 2; this flag adds those modes back
76 };
77 
78 //-------------------------------------------------------------------------------------
79 // Structures
80 //-------------------------------------------------------------------------------------
81 class HDRColorA;
82 
83 class LDRColorA
84 {
85 public:
86     uint8_t r, g, b, a;
87 
LDRColorA()88     LDRColorA() DIRECTX_CTOR_DEFAULT
89     LDRColorA(uint8_t _r, uint8_t _g, uint8_t _b, uint8_t _a) : r(_r), g(_g), b(_b), a(_a) {}
90 
91     const uint8_t& operator [] (_In_range_(0,3) size_t uElement) const
92     {
93         switch(uElement)
94         {
95         case 0: return r;
96         case 1: return g;
97         case 2: return b;
98         case 3: return a;
99         default: assert(false); return r;
100         }
101     }
102 
103     uint8_t& operator [] (_In_range_(0,3) size_t uElement)
104     {
105         switch(uElement)
106         {
107         case 0: return r;
108         case 1: return g;
109         case 2: return b;
110         case 3: return a;
111         default: assert(false); return r;
112         }
113     }
114 
115     LDRColorA operator = (_In_ const HDRColorA& c);
116 
117     static void InterpolateRGB(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wc, _In_ _In_range_(2, 4) size_t wcprec, _Out_ LDRColorA& out)
118     {
119         const int* aWeights = nullptr;
120         switch(wcprec)
121         {
122         case 2: aWeights = g_aWeights2; assert( wc < 4 ); _Analysis_assume_( wc < 4 ); break;
123         case 3: aWeights = g_aWeights3; assert( wc < 8 ); _Analysis_assume_( wc < 8 ); break;
124         case 4: aWeights = g_aWeights4; assert( wc < 16 ); _Analysis_assume_( wc < 16 ); break;
125         default: assert(false); out.r = out.g = out.b = 0; return;
126         }
127         out.r = uint8_t((uint32_t(c0.r) * uint32_t(BC67_WEIGHT_MAX - aWeights[wc]) + uint32_t(c1.r) * uint32_t(aWeights[wc]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT);
128         out.g = uint8_t((uint32_t(c0.g) * uint32_t(BC67_WEIGHT_MAX - aWeights[wc]) + uint32_t(c1.g) * uint32_t(aWeights[wc]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT);
129         out.b = uint8_t((uint32_t(c0.b) * uint32_t(BC67_WEIGHT_MAX - aWeights[wc]) + uint32_t(c1.b) * uint32_t(aWeights[wc]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT);
130     }
131 
132     static void InterpolateA(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wa, _In_range_(2, 4) _In_ size_t waprec, _Out_ LDRColorA& out)
133     {
134         const int* aWeights = nullptr;
135         switch(waprec)
136         {
137         case 2: aWeights = g_aWeights2; assert( wa < 4 ); _Analysis_assume_( wa < 4 ); break;
138         case 3: aWeights = g_aWeights3; assert( wa < 8 ); _Analysis_assume_( wa < 8 ); break;
139         case 4: aWeights = g_aWeights4; assert( wa < 16 ); _Analysis_assume_( wa < 16 ); break;
140         default: assert(false); out.a = 0; return;
141         }
142         out.a = uint8_t((uint32_t(c0.a) * uint32_t(BC67_WEIGHT_MAX - aWeights[wa]) + uint32_t(c1.a) * uint32_t(aWeights[wa]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT);
143     }
144 
145     static void Interpolate(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wc, _In_ size_t wa, _In_ _In_range_(2, 4) size_t wcprec, _In_ _In_range_(2, 4) size_t waprec, _Out_ LDRColorA& out)
146     {
147         InterpolateRGB(c0, c1, wc, wcprec, out);
148         InterpolateA(c0, c1, wa, waprec, out);
149     }
150 };
151 
152 static_assert( sizeof(LDRColorA) == 4, "Unexpected packing");
153 
154 class HDRColorA
155 {
156 public:
157     float r, g, b, a;
158 
159 public:
HDRColorA()160     HDRColorA() DIRECTX_CTOR_DEFAULT
161     HDRColorA(float _r, float _g, float _b, float _a) : r(_r), g(_g), b(_b), a(_a) {}
HDRColorA(const HDRColorA & c)162     HDRColorA(const HDRColorA& c) : r(c.r), g(c.g), b(c.b), a(c.a) {}
HDRColorA(const LDRColorA & c)163     HDRColorA(const LDRColorA& c)
164     {
165         r = float(c.r) * (1.0f/255.0f);
166         g = float(c.g) * (1.0f/255.0f);
167         b = float(c.b) * (1.0f/255.0f);
168         a = float(c.a) * (1.0f/255.0f);
169     }
170 
171     // binary operators
172     HDRColorA operator + ( _In_ const HDRColorA& c ) const
173     {
174         return HDRColorA(r + c.r, g + c.g, b + c.b, a + c.a);
175     }
176 
177     HDRColorA operator - ( _In_ const HDRColorA& c ) const
178     {
179         return HDRColorA(r - c.r, g - c.g, b - c.b, a - c.a);
180     }
181 
182     HDRColorA operator * ( _In_ float f ) const
183     {
184         return HDRColorA(r * f, g * f, b * f, a * f);
185     }
186 
187     HDRColorA operator / ( _In_ float f ) const
188     {
189         float fInv = 1.0f / f;
190         return HDRColorA(r * fInv, g * fInv, b * fInv, a * fInv);
191     }
192 
193     float operator * ( _In_ const HDRColorA& c ) const
194     {
195         return r * c.r + g * c.g + b * c.b + a * c.a;
196     }
197 
198     // assignment operators
199     HDRColorA& operator += ( _In_ const HDRColorA& c )
200     {
201         r += c.r;
202         g += c.g;
203         b += c.b;
204         a += c.a;
205         return *this;
206     }
207 
208     HDRColorA& operator -= ( _In_ const HDRColorA& c )
209     {
210         r -= c.r;
211         g -= c.g;
212         b -= c.b;
213         a -= c.a;
214         return *this;
215     }
216 
217     HDRColorA& operator *= ( _In_ float f )
218     {
219         r *= f;
220         g *= f;
221         b *= f;
222         a *= f;
223         return *this;
224     }
225 
226     HDRColorA& operator /= ( _In_ float f )
227     {
228         float fInv = 1.0f / f;
229         r *= fInv;
230         g *= fInv;
231         b *= fInv;
232         a *= fInv;
233         return *this;
234     }
235 
236     HDRColorA& operator = (_In_ const LDRColorA& c)
237     {
238         r = (float) c.r;
239         g = (float) c.g;
240         b = (float) c.b;
241         a = (float) c.a;
242         return *this;
243     }
244 
Clamp(_In_ float fMin,_In_ float fMax)245     HDRColorA& Clamp(_In_ float fMin, _In_ float fMax)
246     {
247         r = std::min<float>(fMax, std::max<float>(fMin, r));
248         g = std::min<float>(fMax, std::max<float>(fMin, g));
249         b = std::min<float>(fMax, std::max<float>(fMin, b));
250         a = std::min<float>(fMax, std::max<float>(fMin, a));
251         return *this;
252     }
253 
ToLDRColorA()254     LDRColorA ToLDRColorA() const
255     {
256         return LDRColorA((uint8_t) (r + 0.01f), (uint8_t) (g + 0.01f), (uint8_t) (b + 0.01f), (uint8_t) (a + 0.01f));
257     }
258 };
259 
260 inline LDRColorA LDRColorA::operator = (_In_ const HDRColorA& c)
261 {
262     LDRColorA ret;
263     HDRColorA tmp(c);
264     tmp = tmp.Clamp(0.0f, 1.0f) * 255.0f;
265     ret.r = uint8_t(tmp.r + 0.001f);
266     ret.g = uint8_t(tmp.g + 0.001f);
267     ret.b = uint8_t(tmp.b + 0.001f);
268     ret.a = uint8_t(tmp.a + 0.001f);
269     return ret;
270 }
271 
272 struct LDREndPntPair
273 {
274     LDRColorA A;
275     LDRColorA B;
276 };
277 
278 struct HDREndPntPair
279 {
280     HDRColorA A;
281     HDRColorA B;
282 };
283 
HDRColorALerp(_Out_ HDRColorA * pOut,_In_ const HDRColorA * pC1,_In_ const HDRColorA * pC2,_In_ float s)284 inline HDRColorA* HDRColorALerp(_Out_ HDRColorA *pOut, _In_ const HDRColorA *pC1, _In_ const HDRColorA *pC2, _In_ float s)
285 {
286     pOut->r = pC1->r + s * (pC2->r - pC1->r);
287     pOut->g = pC1->g + s * (pC2->g - pC1->g);
288     pOut->b = pC1->b + s * (pC2->b - pC1->b);
289     pOut->a = pC1->a + s * (pC2->a - pC1->a);
290     return pOut;
291 }
292 
293 #pragma pack(push,1)
294 // BC1/DXT1 compression (4 bits per texel)
295 struct D3DX_BC1
296 {
297     uint16_t    rgb[2]; // 565 colors
298     uint32_t    bitmap; // 2bpp rgb bitmap
299 };
300 
301 // BC2/DXT2/3 compression (8 bits per texel)
302 struct D3DX_BC2
303 {
304     uint32_t    bitmap[2];  // 4bpp alpha bitmap
305     D3DX_BC1    bc1;        // BC1 rgb data
306 };
307 
308 // BC3/DXT4/5 compression (8 bits per texel)
309 struct D3DX_BC3
310 {
311     uint8_t     alpha[2];   // alpha values
312     uint8_t     bitmap[6];  // 3bpp alpha bitmap
313     D3DX_BC1    bc1;        // BC1 rgb data
314 };
315 #pragma pack(pop)
316 
317 class INTColor
318 {
319 public:
320     int r, g, b;
321     int pad;
322 
323 public:
INTColor()324     INTColor() DIRECTX_CTOR_DEFAULT
325     INTColor(int nr, int ng, int nb) {r = nr; g = ng; b = nb;}
INTColor(const INTColor & c)326     INTColor(const INTColor& c) {r = c.r; g = c.g; b = c.b;}
327 
328     INTColor operator - ( _In_ const INTColor& c ) const
329     {
330         return INTColor(r - c.r, g - c.g, b - c.b);
331     }
332 
333     INTColor& operator += ( _In_ const INTColor& c )
334     {
335         r += c.r;
336         g += c.g;
337         b += c.b;
338         return *this;
339     }
340 
341     INTColor& operator -= ( _In_ const INTColor& c )
342     {
343         r -= c.r;
344         g -= c.g;
345         b -= c.b;
346         return *this;
347     }
348 
349     INTColor& operator &= ( _In_ const INTColor& c )
350     {
351         r &= c.r;
352         g &= c.g;
353         b &= c.b;
354         return *this;
355     }
356 
357     int& operator [] ( _In_ uint8_t i )
358     {
359         assert(i < sizeof(INTColor) / sizeof(int));
360         _Analysis_assume_(i < sizeof(INTColor) / sizeof(int));
361         return ((int*) this)[i];
362     }
363 
Set(_In_ const HDRColorA & c,_In_ bool bSigned)364     void Set(_In_ const HDRColorA& c, _In_ bool bSigned)
365     {
366         XMHALF4 aF16;
367 
368         XMVECTOR v = XMLoadFloat4( (const XMFLOAT4*)& c );
369         XMStoreHalf4( &aF16, v );
370 
371         r = F16ToINT(aF16.x, bSigned);
372         g = F16ToINT(aF16.y, bSigned);
373         b = F16ToINT(aF16.z, bSigned);
374     }
375 
Clamp(_In_ int iMin,_In_ int iMax)376     INTColor& Clamp(_In_ int iMin, _In_ int iMax)
377     {
378         r = std::min<int>(iMax, std::max<int>(iMin, r));
379         g = std::min<int>(iMax, std::max<int>(iMin, g));
380         b = std::min<int>(iMax, std::max<int>(iMin, b));
381         return *this;
382     }
383 
SignExtend(_In_ const LDRColorA & Prec)384     INTColor& SignExtend(_In_ const LDRColorA& Prec)
385     {
386         r = SIGN_EXTEND(r, Prec.r);
387         g = SIGN_EXTEND(g, Prec.g);
388         b = SIGN_EXTEND(b, Prec.b);
389         return *this;
390     }
391 
392     void ToF16(_Out_writes_(3) HALF aF16[3], _In_ bool bSigned) const
393     {
394         aF16[0] = INT2F16(r, bSigned);
395         aF16[1] = INT2F16(g, bSigned);
396         aF16[2] = INT2F16(b, bSigned);
397     }
398 
399 private:
F16ToINT(_In_ const HALF & f,_In_ bool bSigned)400     static int F16ToINT(_In_ const HALF& f, _In_ bool bSigned)
401     {
402         uint16_t input = *((const uint16_t*) &f);
403         int out, s;
404         if(bSigned)
405         {
406             s = input & F16S_MASK;
407             input &= F16EM_MASK;
408             if(input > F16MAX) out = F16MAX;
409             else out = input;
410             out = s ? -out : out;
411         }
412         else
413         {
414             if(input & F16S_MASK) out = 0;
415             else out = input;
416         }
417         return out;
418     }
419 
INT2F16(_In_ int input,_In_ bool bSigned)420     static HALF INT2F16(_In_ int input, _In_ bool bSigned)
421     {
422         HALF h;
423         uint16_t out;
424         if(bSigned)
425         {
426             int s = 0;
427             if(input < 0)
428             {
429                 s = F16S_MASK;
430                 input = -input;
431             }
432             out = uint16_t(s | input);
433         }
434         else
435         {
436             assert(input >= 0 && input <= F16MAX);
437             out = (uint16_t) input;
438         }
439 
440         *((uint16_t*) &h) = out;
441         return h;
442     }
443 };
444 
445 static_assert( sizeof(INTColor) == 16, "Unexpected packing");
446 
447 struct INTEndPntPair
448 {
449     INTColor A;
450     INTColor B;
451 };
452 
453 template< size_t SizeInBytes >
454 class CBits
455 {
456 public:
GetBit(_Inout_ size_t & uStartBit)457     uint8_t GetBit(_Inout_ size_t& uStartBit) const
458     {
459         assert(uStartBit < 128);
460         _Analysis_assume_(uStartBit < 128);
461         size_t uIndex = uStartBit >> 3;
462         uint8_t ret = (m_uBits[uIndex] >> (uStartBit - (uIndex << 3))) & 0x01;
463         uStartBit++;
464         return ret;
465     }
466 
GetBits(_Inout_ size_t & uStartBit,_In_ size_t uNumBits)467     uint8_t GetBits(_Inout_ size_t& uStartBit, _In_ size_t uNumBits) const
468     {
469         if(uNumBits == 0) return 0;
470         assert(uStartBit + uNumBits <= 128 && uNumBits <= 8);
471         _Analysis_assume_(uStartBit + uNumBits <= 128 && uNumBits <= 8);
472         uint8_t ret;
473         size_t uIndex = uStartBit >> 3;
474         size_t uBase = uStartBit - (uIndex << 3);
475         if(uBase + uNumBits > 8)
476         {
477             size_t uFirstIndexBits = 8 - uBase;
478             size_t uNextIndexBits = uNumBits - uFirstIndexBits;
479             ret = (m_uBits[uIndex] >> uBase) | ((m_uBits[uIndex+1] & ((1 << uNextIndexBits) - 1)) << uFirstIndexBits);
480         }
481         else
482         {
483             ret = (m_uBits[uIndex] >> uBase) & ((1 << uNumBits) - 1);
484         }
485         assert(ret < (1 << uNumBits));
486         uStartBit += uNumBits;
487         return ret;
488     }
489 
SetBit(_Inout_ size_t & uStartBit,_In_ uint8_t uValue)490     void SetBit(_Inout_ size_t& uStartBit, _In_ uint8_t uValue)
491     {
492         assert(uStartBit < 128 && uValue < 2);
493         _Analysis_assume_(uStartBit < 128 && uValue < 2);
494         size_t uIndex = uStartBit >> 3;
495         size_t uBase = uStartBit - (uIndex << 3);
496         m_uBits[uIndex] &= ~(1 << uBase);
497         m_uBits[uIndex] |= uValue << uBase;
498         uStartBit++;
499     }
500 
SetBits(_Inout_ size_t & uStartBit,_In_ size_t uNumBits,_In_ uint8_t uValue)501     void SetBits(_Inout_ size_t& uStartBit, _In_ size_t uNumBits, _In_ uint8_t uValue)
502     {
503         if(uNumBits == 0)
504             return;
505         assert(uStartBit + uNumBits <= 128 && uNumBits <= 8);
506         _Analysis_assume_(uStartBit + uNumBits <= 128 && uNumBits <= 8);
507         assert(uValue < (1 << uNumBits));
508         size_t uIndex = uStartBit >> 3;
509         size_t uBase = uStartBit - (uIndex << 3);
510         if(uBase + uNumBits > 8)
511         {
512             size_t uFirstIndexBits = 8 - uBase;
513             size_t uNextIndexBits = uNumBits - uFirstIndexBits;
514             m_uBits[uIndex] &= ~(((1 << uFirstIndexBits) - 1) << uBase);
515             m_uBits[uIndex] |= uValue << uBase;
516             m_uBits[uIndex+1] &= ~((1 << uNextIndexBits) - 1);
517             m_uBits[uIndex+1] |= uValue >> uFirstIndexBits;
518         }
519         else
520         {
521             m_uBits[uIndex] &= ~(((1 << uNumBits) - 1) << uBase);
522             m_uBits[uIndex] |= uValue << uBase;
523         }
524         uStartBit += uNumBits;
525     }
526 
527 private:
528     uint8_t m_uBits[ SizeInBytes ];
529 };
530 
531 // BC6H compression (16 bits per texel)
532 class D3DX_BC6H : private CBits< 16 >
533 {
534 public:
535     void Decode(_In_ bool bSigned, _Out_writes_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut) const;
536     void Encode(_In_ bool bSigned, _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn);
537 
538 private:
539 #pragma warning(push)
540 #pragma warning(disable : 4480)
541     enum EField : uint8_t
542     {
543         NA, // N/A
544         M,  // Mode
545         D,  // Shape
546         RW,
547         RX,
548         RY,
549         RZ,
550         GW,
551         GX,
552         GY,
553         GZ,
554         BW,
555         BX,
556         BY,
557         BZ,
558     };
559 #pragma warning(pop)
560 
561     struct ModeDescriptor
562     {
563         EField m_eField;
564         uint8_t   m_uBit;
565     };
566 
567     struct ModeInfo
568     {
569         uint8_t uMode;
570         uint8_t uPartitions;
571         bool bTransformed;
572         uint8_t uIndexPrec;
573         LDRColorA RGBAPrec[BC6H_MAX_REGIONS][2];
574     };
575 
576 #pragma warning(push)
577 #pragma warning(disable : 4512)
578     struct EncodeParams
579     {
580         float fBestErr;
581         const bool bSigned;
582         uint8_t uMode;
583         uint8_t uShape;
584         const HDRColorA* const aHDRPixels;
585         INTEndPntPair aUnqEndPts[BC6H_MAX_SHAPES][BC6H_MAX_REGIONS];
586         INTColor aIPixels[NUM_PIXELS_PER_BLOCK];
587 
EncodeParamsEncodeParams588         EncodeParams(const HDRColorA* const aOriginal, bool bSignedFormat) :
589             aHDRPixels(aOriginal), fBestErr(FLT_MAX), bSigned(bSignedFormat)
590         {
591             for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
592             {
593                 aIPixels[i].Set(aOriginal[i], bSigned);
594             }
595         }
596     };
597 #pragma warning(pop)
598 
599     static int Quantize(_In_ int iValue, _In_ int prec, _In_ bool bSigned);
600     static int Unquantize(_In_ int comp, _In_ uint8_t uBitsPerComp, _In_ bool bSigned);
601     static int FinishUnquantize(_In_ int comp, _In_ bool bSigned);
602 
603     static bool EndPointsFit(_In_ const EncodeParams* pEP, _In_reads_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[]);
604 
605     void GeneratePaletteQuantized(_In_ const EncodeParams* pEP, _In_ const INTEndPntPair& endPts,
606                                   _Out_writes_(BC6H_MAX_INDICES) INTColor aPalette[]) const;
607     float MapColorsQuantized(_In_ const EncodeParams* pEP, _In_reads_(np) const INTColor aColors[], _In_ size_t np, _In_ const INTEndPntPair &endPts) const;
608     float PerturbOne(_In_ const EncodeParams* pEP, _In_reads_(np) const INTColor aColors[], _In_ size_t np, _In_ uint8_t ch,
609                      _In_ const INTEndPntPair& oldEndPts, _Out_ INTEndPntPair& newEndPts, _In_ float fOldErr, _In_ int do_b) const;
610     void OptimizeOne(_In_ const EncodeParams* pEP, _In_reads_(np) const INTColor aColors[], _In_ size_t np, _In_ float aOrgErr,
611                      _In_ const INTEndPntPair &aOrgEndPts, _Out_ INTEndPntPair &aOptEndPts) const;
612     void OptimizeEndPoints(_In_ const EncodeParams* pEP, _In_reads_(BC6H_MAX_REGIONS) const float aOrgErr[],
613                            _In_reads_(BC6H_MAX_REGIONS) const INTEndPntPair aOrgEndPts[],
614                            _Out_writes_all_(BC6H_MAX_REGIONS) INTEndPntPair aOptEndPts[]) const;
615     static void SwapIndices(_In_ const EncodeParams* pEP, _Inout_updates_all_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[],
616                             _In_reads_(NUM_PIXELS_PER_BLOCK) size_t aIndices[]);
617     void AssignIndices(_In_ const EncodeParams* pEP, _In_reads_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[],
618                         _Out_writes_(NUM_PIXELS_PER_BLOCK) size_t aIndices[],
619                         _Out_writes_(BC6H_MAX_REGIONS) float aTotErr[]) const;
620     void QuantizeEndPts(_In_ const EncodeParams* pEP, _Out_writes_(BC6H_MAX_REGIONS) INTEndPntPair* qQntEndPts) const;
621     void EmitBlock(_In_ const EncodeParams* pEP, _In_reads_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[],
622                    _In_reads_(NUM_PIXELS_PER_BLOCK) const size_t aIndices[]);
623     void Refine(_Inout_ EncodeParams* pEP);
624 
625     static void GeneratePaletteUnquantized(_In_ const EncodeParams* pEP, _In_ size_t uRegion, _Out_writes_(BC6H_MAX_INDICES) INTColor aPalette[]);
626     float MapColors(_In_ const EncodeParams* pEP, _In_ size_t uRegion, _In_ size_t np, _In_reads_(np) const size_t* auIndex) const;
627     float RoughMSE(_Inout_ EncodeParams* pEP) const;
628 
629 private:
630     const static ModeDescriptor ms_aDesc[][82];
631     const static ModeInfo ms_aInfo[];
632     const static int ms_aModeToInfo[];
633 };
634 
635 // BC67 compression (16b bits per texel)
636 class D3DX_BC7 : private CBits< 16 >
637 {
638 public:
639     void Decode(_Out_writes_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut) const;
640     void Encode(bool skip3subsets, _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn);
641 
642 private:
643     struct ModeInfo
644     {
645         uint8_t uPartitions;
646         uint8_t uPartitionBits;
647         uint8_t uPBits;
648         uint8_t uRotationBits;
649         uint8_t uIndexModeBits;
650         uint8_t uIndexPrec;
651         uint8_t uIndexPrec2;
652         LDRColorA RGBAPrec;
653         LDRColorA RGBAPrecWithP;
654     };
655 
656 #pragma warning(push)
657 #pragma warning(disable : 4512)
658     struct EncodeParams
659     {
660         uint8_t uMode;
661         LDREndPntPair aEndPts[BC7_MAX_SHAPES][BC7_MAX_REGIONS];
662         LDRColorA aLDRPixels[NUM_PIXELS_PER_BLOCK];
663         const HDRColorA* const aHDRPixels;
664 
EncodeParamsEncodeParams665         EncodeParams(const HDRColorA* const aOriginal) : aHDRPixels(aOriginal) {}
666     };
667 #pragma warning(pop)
668 
Quantize(_In_ uint8_t comp,_In_ uint8_t uPrec)669     static uint8_t Quantize(_In_ uint8_t comp, _In_ uint8_t uPrec)
670     {
671         assert(0 < uPrec && uPrec <= 8);
672         uint8_t rnd = (uint8_t) std::min<uint16_t>(255, uint16_t(comp) + (1 << (7 - uPrec)));
673         return rnd >> (8 - uPrec);
674     }
675 
Quantize(_In_ const LDRColorA & c,_In_ const LDRColorA & RGBAPrec)676     static LDRColorA Quantize(_In_ const LDRColorA& c, _In_ const LDRColorA& RGBAPrec)
677     {
678         LDRColorA q;
679         q.r = Quantize(c.r, RGBAPrec.r);
680         q.g = Quantize(c.g, RGBAPrec.g);
681         q.b = Quantize(c.b, RGBAPrec.b);
682         if(RGBAPrec.a)
683             q.a = Quantize(c.a, RGBAPrec.a);
684         else
685             q.a = 255;
686         return q;
687     }
688 
Unquantize(_In_ uint8_t comp,_In_ size_t uPrec)689     static uint8_t Unquantize(_In_ uint8_t comp, _In_ size_t uPrec)
690     {
691         assert(0 < uPrec && uPrec <= 8);
692         comp = comp << (8 - uPrec);
693         return comp | (comp >> uPrec);
694     }
695 
Unquantize(_In_ const LDRColorA & c,_In_ const LDRColorA & RGBAPrec)696     static LDRColorA Unquantize(_In_ const LDRColorA& c, _In_ const LDRColorA& RGBAPrec)
697     {
698         LDRColorA q;
699         q.r = Unquantize(c.r, RGBAPrec.r);
700         q.g = Unquantize(c.g, RGBAPrec.g);
701         q.b = Unquantize(c.b, RGBAPrec.b);
702         q.a = RGBAPrec.a > 0 ? Unquantize(c.a, RGBAPrec.a) : 255;
703         return q;
704     }
705 
706     void GeneratePaletteQuantized(_In_ const EncodeParams* pEP, _In_ size_t uIndexMode, _In_ const LDREndPntPair& endpts,
707                                   _Out_writes_(BC7_MAX_INDICES) LDRColorA aPalette[]) const;
708     float PerturbOne(_In_ const EncodeParams* pEP, _In_reads_(np) const LDRColorA colors[], _In_ size_t np, _In_ size_t uIndexMode,
709                      _In_ size_t ch, _In_ const LDREndPntPair &old_endpts,
710                      _Out_ LDREndPntPair &new_endpts, _In_ float old_err, _In_ uint8_t do_b) const;
711     void Exhaustive(_In_ const EncodeParams* pEP, _In_reads_(np) const LDRColorA aColors[], _In_ size_t np, _In_ size_t uIndexMode,
712                     _In_ size_t ch, _Inout_ float& fOrgErr, _Inout_ LDREndPntPair& optEndPt) const;
713     void OptimizeOne(_In_ const EncodeParams* pEP, _In_reads_(np) const LDRColorA colors[], _In_ size_t np, _In_ size_t uIndexMode,
714                      _In_ float orig_err, _In_ const LDREndPntPair &orig_endpts, _Out_ LDREndPntPair &opt_endpts) const;
715     void OptimizeEndPoints(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode,
716                            _In_reads_(BC7_MAX_REGIONS) const float orig_err[],
717                            _In_reads_(BC7_MAX_REGIONS) const LDREndPntPair orig_endpts[],
718                            _Out_writes_(BC7_MAX_REGIONS) LDREndPntPair opt_endpts[]) const;
719     void AssignIndices(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode,
720                        _In_reads_(BC7_MAX_REGIONS) LDREndPntPair endpts[],
721                        _Out_writes_(NUM_PIXELS_PER_BLOCK) size_t aIndices[], _Out_writes_(NUM_PIXELS_PER_BLOCK) size_t aIndices2[],
722                        _Out_writes_(BC7_MAX_REGIONS) float afTotErr[]) const;
723     void EmitBlock(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uRotation, _In_ size_t uIndexMode,
724                    _In_reads_(BC7_MAX_REGIONS) const LDREndPntPair aEndPts[],
725                    _In_reads_(NUM_PIXELS_PER_BLOCK) const size_t aIndex[],
726                    _In_reads_(NUM_PIXELS_PER_BLOCK) const size_t aIndex2[]);
727     float Refine(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uRotation, _In_ size_t uIndexMode);
728 
729     float MapColors(_In_ const EncodeParams* pEP, _In_reads_(np) const LDRColorA aColors[], _In_ size_t np, _In_ size_t uIndexMode,
730                     _In_ const LDREndPntPair& endPts, _In_ float fMinErr) const;
731     static float RoughMSE(_Inout_ EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode);
732 
733 private:
734     const static ModeInfo ms_aInfo[];
735 };
736 
737 //-------------------------------------------------------------------------------------
738 #pragma warning(push)
739 #pragma warning(disable : 4127)
OptimizeAlpha(float * pX,float * pY,const float * pPoints,size_t cSteps)740 template <bool bRange> void OptimizeAlpha(float *pX, float *pY, const float *pPoints, size_t cSteps)
741 {
742     static const float pC6[] = { 5.0f/5.0f, 4.0f/5.0f, 3.0f/5.0f, 2.0f/5.0f, 1.0f/5.0f, 0.0f/5.0f };
743     static const float pD6[] = { 0.0f/5.0f, 1.0f/5.0f, 2.0f/5.0f, 3.0f/5.0f, 4.0f/5.0f, 5.0f/5.0f };
744     static const float pC8[] = { 7.0f/7.0f, 6.0f/7.0f, 5.0f/7.0f, 4.0f/7.0f, 3.0f/7.0f, 2.0f/7.0f, 1.0f/7.0f, 0.0f/7.0f };
745     static const float pD8[] = { 0.0f/7.0f, 1.0f/7.0f, 2.0f/7.0f, 3.0f/7.0f, 4.0f/7.0f, 5.0f/7.0f, 6.0f/7.0f, 7.0f/7.0f };
746 
747     const float *pC = (6 == cSteps) ? pC6 : pC8;
748     const float *pD = (6 == cSteps) ? pD6 : pD8;
749 
750     float MAX_VALUE = 1.0f;
751     float MIN_VALUE;
752     if (bRange)
753     {
754         MIN_VALUE = -1.0f;
755     }
756     else
757     {
758         MIN_VALUE = 0.0f;
759     }
760 
761     // Find Min and Max points, as starting point
762     float fX = MAX_VALUE;
763     float fY = MIN_VALUE;
764 
765     if(8 == cSteps)
766     {
767         for(size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
768         {
769             if(pPoints[iPoint] < fX)
770                 fX = pPoints[iPoint];
771 
772             if(pPoints[iPoint] > fY)
773                 fY = pPoints[iPoint];
774         }
775     }
776     else
777     {
778         for(size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
779         {
780             if(pPoints[iPoint] < fX && pPoints[iPoint] > MIN_VALUE)
781                 fX = pPoints[iPoint];
782 
783             if(pPoints[iPoint] > fY && pPoints[iPoint] < MAX_VALUE)
784                 fY = pPoints[iPoint];
785         }
786 
787         if (fX == fY)
788         {
789             fY = MAX_VALUE;
790         }
791     }
792 
793     // Use Newton's Method to find local minima of sum-of-squares error.
794     float fSteps = (float) (cSteps - 1);
795 
796     for(size_t iIteration = 0; iIteration < 8; iIteration++)
797     {
798         float fScale;
799 
800         if((fY - fX) < (1.0f / 256.0f))
801             break;
802 
803         fScale = fSteps / (fY - fX);
804 
805         // Calculate new steps
806         float pSteps[8];
807 
808         for(size_t iStep = 0; iStep < cSteps; iStep++)
809             pSteps[iStep] = pC[iStep] * fX + pD[iStep] * fY;
810 
811         if(6 == cSteps)
812         {
813             pSteps[6] = MIN_VALUE;
814             pSteps[7] = MAX_VALUE;
815         }
816 
817         // Evaluate function, and derivatives
818         float dX  = 0.0f;
819         float dY  = 0.0f;
820         float d2X = 0.0f;
821         float d2Y = 0.0f;
822 
823         for(size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
824         {
825             float fDot = (pPoints[iPoint] - fX) * fScale;
826 
827             size_t iStep;
828 
829             if(fDot <= 0.0f)
830                 iStep = ((6 == cSteps) && (pPoints[iPoint] <= fX * 0.5f)) ? 6 : 0;
831             else if(fDot >= fSteps)
832                 iStep = ((6 == cSteps) && (pPoints[iPoint] >= (fY + 1.0f) * 0.5f)) ? 7 : (cSteps - 1);
833             else
834                 iStep = static_cast<int32_t>(fDot + 0.5f);
835 
836 
837             if(iStep < cSteps)
838             {
839                 // D3DX had this computation backwards (pPoints[iPoint] - pSteps[iStep])
840                 // this fix improves RMS of the alpha component
841                 float fDiff = pSteps[iStep] - pPoints[iPoint];
842 
843                 dX  += pC[iStep] * fDiff;
844                 d2X += pC[iStep] * pC[iStep];
845 
846                 dY  += pD[iStep] * fDiff;
847                 d2Y += pD[iStep] * pD[iStep];
848             }
849         }
850 
851         // Move endpoints
852         if(d2X > 0.0f)
853             fX -= dX / d2X;
854 
855         if(d2Y > 0.0f)
856             fY -= dY / d2Y;
857 
858         if(fX > fY)
859         {
860             float f = fX; fX = fY; fY = f;
861         }
862 
863         if((dX * dX < (1.0f / 64.0f)) && (dY * dY < (1.0f / 64.0f)))
864             break;
865     }
866 
867     *pX = (fX < MIN_VALUE) ? MIN_VALUE : (fX > MAX_VALUE) ? MAX_VALUE : fX;
868     *pY = (fY < MIN_VALUE) ? MIN_VALUE : (fY > MAX_VALUE) ? MAX_VALUE : fY;
869 }
870 #pragma warning(pop)
871 
872 
873 //-------------------------------------------------------------------------------------
874 // Functions
875 //-------------------------------------------------------------------------------------
876 
877 typedef void (*BC_DECODE)(XMVECTOR *pColor, const uint8_t *pBC);
878 typedef void (*BC_ENCODE)(uint8_t *pDXT, const XMVECTOR *pColor, DWORD flags);
879 
880 void D3DXDecodeBC1(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(8) const uint8_t *pBC);
881 void D3DXDecodeBC2(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC);
882 void D3DXDecodeBC3(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC);
883 void D3DXDecodeBC4U(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(8) const uint8_t *pBC);
884 void D3DXDecodeBC4S(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(8) const uint8_t *pBC);
885 void D3DXDecodeBC5U(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC);
886 void D3DXDecodeBC5S(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC);
887 void D3DXDecodeBC6HU(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC);
888 void D3DXDecodeBC6HS(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC);
889 void D3DXDecodeBC7(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC);
890 
891 void D3DXEncodeBC1(_Out_writes_(8) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ float alphaRef, _In_ DWORD flags);
892     // BC1 requires one additional parameter, so it doesn't match signature of BC_ENCODE above
893 
894 void D3DXEncodeBC2(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
895 void D3DXEncodeBC3(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
896 void D3DXEncodeBC4U(_Out_writes_(8) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
897 void D3DXEncodeBC4S(_Out_writes_(8) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
898 void D3DXEncodeBC5U(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
899 void D3DXEncodeBC5S(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
900 void D3DXEncodeBC6HU(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
901 void D3DXEncodeBC6HS(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
902 void D3DXEncodeBC7(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
903 
904 }; // namespace
905