1 //-------------------------------------------------------------------------------------
2 // BC.h
3 //
4 // Block-compression (BC) functionality
5 //
6 // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
7 // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
9 // PARTICULAR PURPOSE.
10 //
11 // Copyright (c) Microsoft Corporation. All rights reserved.
12 //
13 // http://go.microsoft.com/fwlink/?LinkId=248926
14 //-------------------------------------------------------------------------------------
15
16 #pragma once
17
18 #include <assert.h>
19
20 #ifdef USE_XNAMATH
21 #include <xnamath.h>
22 #else
23 #include <directxmath.h>
24 #include <directxpackedvector.h>
25 #endif
26
27 #include <float.h>
28
29 namespace DirectX
30 {
31
32 #ifndef USE_XNAMATH
33 typedef PackedVector::HALF HALF;
34 typedef PackedVector::XMHALF4 XMHALF4;
35 typedef PackedVector::XMU565 XMU565;
36 #endif
37
38 //-------------------------------------------------------------------------------------
39 // Constants
40 //-------------------------------------------------------------------------------------
41
42 const uint16_t F16S_MASK = 0x8000; // f16 sign mask
43 const uint16_t F16EM_MASK = 0x7fff; // f16 exp & mantissa mask
44 const uint16_t F16MAX = 0x7bff; // MAXFLT bit pattern for XMHALF
45
46 #define SIGN_EXTEND(x,nb) ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x))
47
48 // Because these are used in SAL annotations, they need to remain macros rather than const values
49 #define NUM_PIXELS_PER_BLOCK 16
50 #define BC6H_MAX_REGIONS 2
51 #define BC6H_MAX_INDICES 16
52 #define BC7_MAX_REGIONS 3
53 #define BC7_MAX_INDICES 16
54
55 const size_t BC6H_NUM_CHANNELS = 3;
56 const size_t BC6H_MAX_SHAPES = 32;
57
58 const size_t BC7_NUM_CHANNELS = 4;
59 const size_t BC7_MAX_SHAPES = 64;
60
61 const int32_t BC67_WEIGHT_MAX = 64;
62 const uint32_t BC67_WEIGHT_SHIFT = 6;
63 const int32_t BC67_WEIGHT_ROUND = 32;
64
65 extern const int g_aWeights2[4];
66 extern const int g_aWeights3[8];
67 extern const int g_aWeights4[16];
68
69 enum BC_FLAGS
70 {
71 BC_FLAGS_NONE = 0x0,
72 BC_FLAGS_DITHER_RGB = 0x10000, // Enables dithering for RGB colors for BC1-3
73 BC_FLAGS_DITHER_A = 0x20000, // Enables dithering for Alpha channel for BC1-3
74 BC_FLAGS_UNIFORM = 0x40000, // By default, uses perceptual weighting for BC1-3; this flag makes it a uniform weighting
75 BC_FLAGS_USE_3SUBSETS = 0x80000,// By default, BC7 skips mode 0 & 2; this flag adds those modes back
76 };
77
78 //-------------------------------------------------------------------------------------
79 // Structures
80 //-------------------------------------------------------------------------------------
81 class HDRColorA;
82
83 class LDRColorA
84 {
85 public:
86 uint8_t r, g, b, a;
87
LDRColorA()88 LDRColorA() DIRECTX_CTOR_DEFAULT
89 LDRColorA(uint8_t _r, uint8_t _g, uint8_t _b, uint8_t _a) : r(_r), g(_g), b(_b), a(_a) {}
90
91 const uint8_t& operator [] (_In_range_(0,3) size_t uElement) const
92 {
93 switch(uElement)
94 {
95 case 0: return r;
96 case 1: return g;
97 case 2: return b;
98 case 3: return a;
99 default: assert(false); return r;
100 }
101 }
102
103 uint8_t& operator [] (_In_range_(0,3) size_t uElement)
104 {
105 switch(uElement)
106 {
107 case 0: return r;
108 case 1: return g;
109 case 2: return b;
110 case 3: return a;
111 default: assert(false); return r;
112 }
113 }
114
115 LDRColorA operator = (_In_ const HDRColorA& c);
116
117 static void InterpolateRGB(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wc, _In_ _In_range_(2, 4) size_t wcprec, _Out_ LDRColorA& out)
118 {
119 const int* aWeights = nullptr;
120 switch(wcprec)
121 {
122 case 2: aWeights = g_aWeights2; assert( wc < 4 ); _Analysis_assume_( wc < 4 ); break;
123 case 3: aWeights = g_aWeights3; assert( wc < 8 ); _Analysis_assume_( wc < 8 ); break;
124 case 4: aWeights = g_aWeights4; assert( wc < 16 ); _Analysis_assume_( wc < 16 ); break;
125 default: assert(false); out.r = out.g = out.b = 0; return;
126 }
127 out.r = uint8_t((uint32_t(c0.r) * uint32_t(BC67_WEIGHT_MAX - aWeights[wc]) + uint32_t(c1.r) * uint32_t(aWeights[wc]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT);
128 out.g = uint8_t((uint32_t(c0.g) * uint32_t(BC67_WEIGHT_MAX - aWeights[wc]) + uint32_t(c1.g) * uint32_t(aWeights[wc]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT);
129 out.b = uint8_t((uint32_t(c0.b) * uint32_t(BC67_WEIGHT_MAX - aWeights[wc]) + uint32_t(c1.b) * uint32_t(aWeights[wc]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT);
130 }
131
132 static void InterpolateA(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wa, _In_range_(2, 4) _In_ size_t waprec, _Out_ LDRColorA& out)
133 {
134 const int* aWeights = nullptr;
135 switch(waprec)
136 {
137 case 2: aWeights = g_aWeights2; assert( wa < 4 ); _Analysis_assume_( wa < 4 ); break;
138 case 3: aWeights = g_aWeights3; assert( wa < 8 ); _Analysis_assume_( wa < 8 ); break;
139 case 4: aWeights = g_aWeights4; assert( wa < 16 ); _Analysis_assume_( wa < 16 ); break;
140 default: assert(false); out.a = 0; return;
141 }
142 out.a = uint8_t((uint32_t(c0.a) * uint32_t(BC67_WEIGHT_MAX - aWeights[wa]) + uint32_t(c1.a) * uint32_t(aWeights[wa]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT);
143 }
144
145 static void Interpolate(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wc, _In_ size_t wa, _In_ _In_range_(2, 4) size_t wcprec, _In_ _In_range_(2, 4) size_t waprec, _Out_ LDRColorA& out)
146 {
147 InterpolateRGB(c0, c1, wc, wcprec, out);
148 InterpolateA(c0, c1, wa, waprec, out);
149 }
150 };
151
152 static_assert( sizeof(LDRColorA) == 4, "Unexpected packing");
153
154 class HDRColorA
155 {
156 public:
157 float r, g, b, a;
158
159 public:
HDRColorA()160 HDRColorA() DIRECTX_CTOR_DEFAULT
161 HDRColorA(float _r, float _g, float _b, float _a) : r(_r), g(_g), b(_b), a(_a) {}
HDRColorA(const HDRColorA & c)162 HDRColorA(const HDRColorA& c) : r(c.r), g(c.g), b(c.b), a(c.a) {}
HDRColorA(const LDRColorA & c)163 HDRColorA(const LDRColorA& c)
164 {
165 r = float(c.r) * (1.0f/255.0f);
166 g = float(c.g) * (1.0f/255.0f);
167 b = float(c.b) * (1.0f/255.0f);
168 a = float(c.a) * (1.0f/255.0f);
169 }
170
171 // binary operators
172 HDRColorA operator + ( _In_ const HDRColorA& c ) const
173 {
174 return HDRColorA(r + c.r, g + c.g, b + c.b, a + c.a);
175 }
176
177 HDRColorA operator - ( _In_ const HDRColorA& c ) const
178 {
179 return HDRColorA(r - c.r, g - c.g, b - c.b, a - c.a);
180 }
181
182 HDRColorA operator * ( _In_ float f ) const
183 {
184 return HDRColorA(r * f, g * f, b * f, a * f);
185 }
186
187 HDRColorA operator / ( _In_ float f ) const
188 {
189 float fInv = 1.0f / f;
190 return HDRColorA(r * fInv, g * fInv, b * fInv, a * fInv);
191 }
192
193 float operator * ( _In_ const HDRColorA& c ) const
194 {
195 return r * c.r + g * c.g + b * c.b + a * c.a;
196 }
197
198 // assignment operators
199 HDRColorA& operator += ( _In_ const HDRColorA& c )
200 {
201 r += c.r;
202 g += c.g;
203 b += c.b;
204 a += c.a;
205 return *this;
206 }
207
208 HDRColorA& operator -= ( _In_ const HDRColorA& c )
209 {
210 r -= c.r;
211 g -= c.g;
212 b -= c.b;
213 a -= c.a;
214 return *this;
215 }
216
217 HDRColorA& operator *= ( _In_ float f )
218 {
219 r *= f;
220 g *= f;
221 b *= f;
222 a *= f;
223 return *this;
224 }
225
226 HDRColorA& operator /= ( _In_ float f )
227 {
228 float fInv = 1.0f / f;
229 r *= fInv;
230 g *= fInv;
231 b *= fInv;
232 a *= fInv;
233 return *this;
234 }
235
236 HDRColorA& operator = (_In_ const LDRColorA& c)
237 {
238 r = (float) c.r;
239 g = (float) c.g;
240 b = (float) c.b;
241 a = (float) c.a;
242 return *this;
243 }
244
Clamp(_In_ float fMin,_In_ float fMax)245 HDRColorA& Clamp(_In_ float fMin, _In_ float fMax)
246 {
247 r = std::min<float>(fMax, std::max<float>(fMin, r));
248 g = std::min<float>(fMax, std::max<float>(fMin, g));
249 b = std::min<float>(fMax, std::max<float>(fMin, b));
250 a = std::min<float>(fMax, std::max<float>(fMin, a));
251 return *this;
252 }
253
ToLDRColorA()254 LDRColorA ToLDRColorA() const
255 {
256 return LDRColorA((uint8_t) (r + 0.01f), (uint8_t) (g + 0.01f), (uint8_t) (b + 0.01f), (uint8_t) (a + 0.01f));
257 }
258 };
259
260 inline LDRColorA LDRColorA::operator = (_In_ const HDRColorA& c)
261 {
262 LDRColorA ret;
263 HDRColorA tmp(c);
264 tmp = tmp.Clamp(0.0f, 1.0f) * 255.0f;
265 ret.r = uint8_t(tmp.r + 0.001f);
266 ret.g = uint8_t(tmp.g + 0.001f);
267 ret.b = uint8_t(tmp.b + 0.001f);
268 ret.a = uint8_t(tmp.a + 0.001f);
269 return ret;
270 }
271
272 struct LDREndPntPair
273 {
274 LDRColorA A;
275 LDRColorA B;
276 };
277
278 struct HDREndPntPair
279 {
280 HDRColorA A;
281 HDRColorA B;
282 };
283
HDRColorALerp(_Out_ HDRColorA * pOut,_In_ const HDRColorA * pC1,_In_ const HDRColorA * pC2,_In_ float s)284 inline HDRColorA* HDRColorALerp(_Out_ HDRColorA *pOut, _In_ const HDRColorA *pC1, _In_ const HDRColorA *pC2, _In_ float s)
285 {
286 pOut->r = pC1->r + s * (pC2->r - pC1->r);
287 pOut->g = pC1->g + s * (pC2->g - pC1->g);
288 pOut->b = pC1->b + s * (pC2->b - pC1->b);
289 pOut->a = pC1->a + s * (pC2->a - pC1->a);
290 return pOut;
291 }
292
293 #pragma pack(push,1)
294 // BC1/DXT1 compression (4 bits per texel)
295 struct D3DX_BC1
296 {
297 uint16_t rgb[2]; // 565 colors
298 uint32_t bitmap; // 2bpp rgb bitmap
299 };
300
301 // BC2/DXT2/3 compression (8 bits per texel)
302 struct D3DX_BC2
303 {
304 uint32_t bitmap[2]; // 4bpp alpha bitmap
305 D3DX_BC1 bc1; // BC1 rgb data
306 };
307
308 // BC3/DXT4/5 compression (8 bits per texel)
309 struct D3DX_BC3
310 {
311 uint8_t alpha[2]; // alpha values
312 uint8_t bitmap[6]; // 3bpp alpha bitmap
313 D3DX_BC1 bc1; // BC1 rgb data
314 };
315 #pragma pack(pop)
316
317 class INTColor
318 {
319 public:
320 int r, g, b;
321 int pad;
322
323 public:
INTColor()324 INTColor() DIRECTX_CTOR_DEFAULT
325 INTColor(int nr, int ng, int nb) {r = nr; g = ng; b = nb;}
INTColor(const INTColor & c)326 INTColor(const INTColor& c) {r = c.r; g = c.g; b = c.b;}
327
328 INTColor operator - ( _In_ const INTColor& c ) const
329 {
330 return INTColor(r - c.r, g - c.g, b - c.b);
331 }
332
333 INTColor& operator += ( _In_ const INTColor& c )
334 {
335 r += c.r;
336 g += c.g;
337 b += c.b;
338 return *this;
339 }
340
341 INTColor& operator -= ( _In_ const INTColor& c )
342 {
343 r -= c.r;
344 g -= c.g;
345 b -= c.b;
346 return *this;
347 }
348
349 INTColor& operator &= ( _In_ const INTColor& c )
350 {
351 r &= c.r;
352 g &= c.g;
353 b &= c.b;
354 return *this;
355 }
356
357 int& operator [] ( _In_ uint8_t i )
358 {
359 assert(i < sizeof(INTColor) / sizeof(int));
360 _Analysis_assume_(i < sizeof(INTColor) / sizeof(int));
361 return ((int*) this)[i];
362 }
363
Set(_In_ const HDRColorA & c,_In_ bool bSigned)364 void Set(_In_ const HDRColorA& c, _In_ bool bSigned)
365 {
366 XMHALF4 aF16;
367
368 XMVECTOR v = XMLoadFloat4( (const XMFLOAT4*)& c );
369 XMStoreHalf4( &aF16, v );
370
371 r = F16ToINT(aF16.x, bSigned);
372 g = F16ToINT(aF16.y, bSigned);
373 b = F16ToINT(aF16.z, bSigned);
374 }
375
Clamp(_In_ int iMin,_In_ int iMax)376 INTColor& Clamp(_In_ int iMin, _In_ int iMax)
377 {
378 r = std::min<int>(iMax, std::max<int>(iMin, r));
379 g = std::min<int>(iMax, std::max<int>(iMin, g));
380 b = std::min<int>(iMax, std::max<int>(iMin, b));
381 return *this;
382 }
383
SignExtend(_In_ const LDRColorA & Prec)384 INTColor& SignExtend(_In_ const LDRColorA& Prec)
385 {
386 r = SIGN_EXTEND(r, Prec.r);
387 g = SIGN_EXTEND(g, Prec.g);
388 b = SIGN_EXTEND(b, Prec.b);
389 return *this;
390 }
391
392 void ToF16(_Out_writes_(3) HALF aF16[3], _In_ bool bSigned) const
393 {
394 aF16[0] = INT2F16(r, bSigned);
395 aF16[1] = INT2F16(g, bSigned);
396 aF16[2] = INT2F16(b, bSigned);
397 }
398
399 private:
F16ToINT(_In_ const HALF & f,_In_ bool bSigned)400 static int F16ToINT(_In_ const HALF& f, _In_ bool bSigned)
401 {
402 uint16_t input = *((const uint16_t*) &f);
403 int out, s;
404 if(bSigned)
405 {
406 s = input & F16S_MASK;
407 input &= F16EM_MASK;
408 if(input > F16MAX) out = F16MAX;
409 else out = input;
410 out = s ? -out : out;
411 }
412 else
413 {
414 if(input & F16S_MASK) out = 0;
415 else out = input;
416 }
417 return out;
418 }
419
INT2F16(_In_ int input,_In_ bool bSigned)420 static HALF INT2F16(_In_ int input, _In_ bool bSigned)
421 {
422 HALF h;
423 uint16_t out;
424 if(bSigned)
425 {
426 int s = 0;
427 if(input < 0)
428 {
429 s = F16S_MASK;
430 input = -input;
431 }
432 out = uint16_t(s | input);
433 }
434 else
435 {
436 assert(input >= 0 && input <= F16MAX);
437 out = (uint16_t) input;
438 }
439
440 *((uint16_t*) &h) = out;
441 return h;
442 }
443 };
444
445 static_assert( sizeof(INTColor) == 16, "Unexpected packing");
446
447 struct INTEndPntPair
448 {
449 INTColor A;
450 INTColor B;
451 };
452
453 template< size_t SizeInBytes >
454 class CBits
455 {
456 public:
GetBit(_Inout_ size_t & uStartBit)457 uint8_t GetBit(_Inout_ size_t& uStartBit) const
458 {
459 assert(uStartBit < 128);
460 _Analysis_assume_(uStartBit < 128);
461 size_t uIndex = uStartBit >> 3;
462 uint8_t ret = (m_uBits[uIndex] >> (uStartBit - (uIndex << 3))) & 0x01;
463 uStartBit++;
464 return ret;
465 }
466
GetBits(_Inout_ size_t & uStartBit,_In_ size_t uNumBits)467 uint8_t GetBits(_Inout_ size_t& uStartBit, _In_ size_t uNumBits) const
468 {
469 if(uNumBits == 0) return 0;
470 assert(uStartBit + uNumBits <= 128 && uNumBits <= 8);
471 _Analysis_assume_(uStartBit + uNumBits <= 128 && uNumBits <= 8);
472 uint8_t ret;
473 size_t uIndex = uStartBit >> 3;
474 size_t uBase = uStartBit - (uIndex << 3);
475 if(uBase + uNumBits > 8)
476 {
477 size_t uFirstIndexBits = 8 - uBase;
478 size_t uNextIndexBits = uNumBits - uFirstIndexBits;
479 ret = (m_uBits[uIndex] >> uBase) | ((m_uBits[uIndex+1] & ((1 << uNextIndexBits) - 1)) << uFirstIndexBits);
480 }
481 else
482 {
483 ret = (m_uBits[uIndex] >> uBase) & ((1 << uNumBits) - 1);
484 }
485 assert(ret < (1 << uNumBits));
486 uStartBit += uNumBits;
487 return ret;
488 }
489
SetBit(_Inout_ size_t & uStartBit,_In_ uint8_t uValue)490 void SetBit(_Inout_ size_t& uStartBit, _In_ uint8_t uValue)
491 {
492 assert(uStartBit < 128 && uValue < 2);
493 _Analysis_assume_(uStartBit < 128 && uValue < 2);
494 size_t uIndex = uStartBit >> 3;
495 size_t uBase = uStartBit - (uIndex << 3);
496 m_uBits[uIndex] &= ~(1 << uBase);
497 m_uBits[uIndex] |= uValue << uBase;
498 uStartBit++;
499 }
500
SetBits(_Inout_ size_t & uStartBit,_In_ size_t uNumBits,_In_ uint8_t uValue)501 void SetBits(_Inout_ size_t& uStartBit, _In_ size_t uNumBits, _In_ uint8_t uValue)
502 {
503 if(uNumBits == 0)
504 return;
505 assert(uStartBit + uNumBits <= 128 && uNumBits <= 8);
506 _Analysis_assume_(uStartBit + uNumBits <= 128 && uNumBits <= 8);
507 assert(uValue < (1 << uNumBits));
508 size_t uIndex = uStartBit >> 3;
509 size_t uBase = uStartBit - (uIndex << 3);
510 if(uBase + uNumBits > 8)
511 {
512 size_t uFirstIndexBits = 8 - uBase;
513 size_t uNextIndexBits = uNumBits - uFirstIndexBits;
514 m_uBits[uIndex] &= ~(((1 << uFirstIndexBits) - 1) << uBase);
515 m_uBits[uIndex] |= uValue << uBase;
516 m_uBits[uIndex+1] &= ~((1 << uNextIndexBits) - 1);
517 m_uBits[uIndex+1] |= uValue >> uFirstIndexBits;
518 }
519 else
520 {
521 m_uBits[uIndex] &= ~(((1 << uNumBits) - 1) << uBase);
522 m_uBits[uIndex] |= uValue << uBase;
523 }
524 uStartBit += uNumBits;
525 }
526
527 private:
528 uint8_t m_uBits[ SizeInBytes ];
529 };
530
531 // BC6H compression (16 bits per texel)
532 class D3DX_BC6H : private CBits< 16 >
533 {
534 public:
535 void Decode(_In_ bool bSigned, _Out_writes_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut) const;
536 void Encode(_In_ bool bSigned, _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn);
537
538 private:
539 #pragma warning(push)
540 #pragma warning(disable : 4480)
541 enum EField : uint8_t
542 {
543 NA, // N/A
544 M, // Mode
545 D, // Shape
546 RW,
547 RX,
548 RY,
549 RZ,
550 GW,
551 GX,
552 GY,
553 GZ,
554 BW,
555 BX,
556 BY,
557 BZ,
558 };
559 #pragma warning(pop)
560
561 struct ModeDescriptor
562 {
563 EField m_eField;
564 uint8_t m_uBit;
565 };
566
567 struct ModeInfo
568 {
569 uint8_t uMode;
570 uint8_t uPartitions;
571 bool bTransformed;
572 uint8_t uIndexPrec;
573 LDRColorA RGBAPrec[BC6H_MAX_REGIONS][2];
574 };
575
576 #pragma warning(push)
577 #pragma warning(disable : 4512)
578 struct EncodeParams
579 {
580 float fBestErr;
581 const bool bSigned;
582 uint8_t uMode;
583 uint8_t uShape;
584 const HDRColorA* const aHDRPixels;
585 INTEndPntPair aUnqEndPts[BC6H_MAX_SHAPES][BC6H_MAX_REGIONS];
586 INTColor aIPixels[NUM_PIXELS_PER_BLOCK];
587
EncodeParamsEncodeParams588 EncodeParams(const HDRColorA* const aOriginal, bool bSignedFormat) :
589 aHDRPixels(aOriginal), fBestErr(FLT_MAX), bSigned(bSignedFormat)
590 {
591 for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
592 {
593 aIPixels[i].Set(aOriginal[i], bSigned);
594 }
595 }
596 };
597 #pragma warning(pop)
598
599 static int Quantize(_In_ int iValue, _In_ int prec, _In_ bool bSigned);
600 static int Unquantize(_In_ int comp, _In_ uint8_t uBitsPerComp, _In_ bool bSigned);
601 static int FinishUnquantize(_In_ int comp, _In_ bool bSigned);
602
603 static bool EndPointsFit(_In_ const EncodeParams* pEP, _In_reads_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[]);
604
605 void GeneratePaletteQuantized(_In_ const EncodeParams* pEP, _In_ const INTEndPntPair& endPts,
606 _Out_writes_(BC6H_MAX_INDICES) INTColor aPalette[]) const;
607 float MapColorsQuantized(_In_ const EncodeParams* pEP, _In_reads_(np) const INTColor aColors[], _In_ size_t np, _In_ const INTEndPntPair &endPts) const;
608 float PerturbOne(_In_ const EncodeParams* pEP, _In_reads_(np) const INTColor aColors[], _In_ size_t np, _In_ uint8_t ch,
609 _In_ const INTEndPntPair& oldEndPts, _Out_ INTEndPntPair& newEndPts, _In_ float fOldErr, _In_ int do_b) const;
610 void OptimizeOne(_In_ const EncodeParams* pEP, _In_reads_(np) const INTColor aColors[], _In_ size_t np, _In_ float aOrgErr,
611 _In_ const INTEndPntPair &aOrgEndPts, _Out_ INTEndPntPair &aOptEndPts) const;
612 void OptimizeEndPoints(_In_ const EncodeParams* pEP, _In_reads_(BC6H_MAX_REGIONS) const float aOrgErr[],
613 _In_reads_(BC6H_MAX_REGIONS) const INTEndPntPair aOrgEndPts[],
614 _Out_writes_all_(BC6H_MAX_REGIONS) INTEndPntPair aOptEndPts[]) const;
615 static void SwapIndices(_In_ const EncodeParams* pEP, _Inout_updates_all_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[],
616 _In_reads_(NUM_PIXELS_PER_BLOCK) size_t aIndices[]);
617 void AssignIndices(_In_ const EncodeParams* pEP, _In_reads_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[],
618 _Out_writes_(NUM_PIXELS_PER_BLOCK) size_t aIndices[],
619 _Out_writes_(BC6H_MAX_REGIONS) float aTotErr[]) const;
620 void QuantizeEndPts(_In_ const EncodeParams* pEP, _Out_writes_(BC6H_MAX_REGIONS) INTEndPntPair* qQntEndPts) const;
621 void EmitBlock(_In_ const EncodeParams* pEP, _In_reads_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[],
622 _In_reads_(NUM_PIXELS_PER_BLOCK) const size_t aIndices[]);
623 void Refine(_Inout_ EncodeParams* pEP);
624
625 static void GeneratePaletteUnquantized(_In_ const EncodeParams* pEP, _In_ size_t uRegion, _Out_writes_(BC6H_MAX_INDICES) INTColor aPalette[]);
626 float MapColors(_In_ const EncodeParams* pEP, _In_ size_t uRegion, _In_ size_t np, _In_reads_(np) const size_t* auIndex) const;
627 float RoughMSE(_Inout_ EncodeParams* pEP) const;
628
629 private:
630 const static ModeDescriptor ms_aDesc[][82];
631 const static ModeInfo ms_aInfo[];
632 const static int ms_aModeToInfo[];
633 };
634
635 // BC67 compression (16b bits per texel)
636 class D3DX_BC7 : private CBits< 16 >
637 {
638 public:
639 void Decode(_Out_writes_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut) const;
640 void Encode(bool skip3subsets, _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn);
641
642 private:
643 struct ModeInfo
644 {
645 uint8_t uPartitions;
646 uint8_t uPartitionBits;
647 uint8_t uPBits;
648 uint8_t uRotationBits;
649 uint8_t uIndexModeBits;
650 uint8_t uIndexPrec;
651 uint8_t uIndexPrec2;
652 LDRColorA RGBAPrec;
653 LDRColorA RGBAPrecWithP;
654 };
655
656 #pragma warning(push)
657 #pragma warning(disable : 4512)
658 struct EncodeParams
659 {
660 uint8_t uMode;
661 LDREndPntPair aEndPts[BC7_MAX_SHAPES][BC7_MAX_REGIONS];
662 LDRColorA aLDRPixels[NUM_PIXELS_PER_BLOCK];
663 const HDRColorA* const aHDRPixels;
664
EncodeParamsEncodeParams665 EncodeParams(const HDRColorA* const aOriginal) : aHDRPixels(aOriginal) {}
666 };
667 #pragma warning(pop)
668
Quantize(_In_ uint8_t comp,_In_ uint8_t uPrec)669 static uint8_t Quantize(_In_ uint8_t comp, _In_ uint8_t uPrec)
670 {
671 assert(0 < uPrec && uPrec <= 8);
672 uint8_t rnd = (uint8_t) std::min<uint16_t>(255, uint16_t(comp) + (1 << (7 - uPrec)));
673 return rnd >> (8 - uPrec);
674 }
675
Quantize(_In_ const LDRColorA & c,_In_ const LDRColorA & RGBAPrec)676 static LDRColorA Quantize(_In_ const LDRColorA& c, _In_ const LDRColorA& RGBAPrec)
677 {
678 LDRColorA q;
679 q.r = Quantize(c.r, RGBAPrec.r);
680 q.g = Quantize(c.g, RGBAPrec.g);
681 q.b = Quantize(c.b, RGBAPrec.b);
682 if(RGBAPrec.a)
683 q.a = Quantize(c.a, RGBAPrec.a);
684 else
685 q.a = 255;
686 return q;
687 }
688
Unquantize(_In_ uint8_t comp,_In_ size_t uPrec)689 static uint8_t Unquantize(_In_ uint8_t comp, _In_ size_t uPrec)
690 {
691 assert(0 < uPrec && uPrec <= 8);
692 comp = comp << (8 - uPrec);
693 return comp | (comp >> uPrec);
694 }
695
Unquantize(_In_ const LDRColorA & c,_In_ const LDRColorA & RGBAPrec)696 static LDRColorA Unquantize(_In_ const LDRColorA& c, _In_ const LDRColorA& RGBAPrec)
697 {
698 LDRColorA q;
699 q.r = Unquantize(c.r, RGBAPrec.r);
700 q.g = Unquantize(c.g, RGBAPrec.g);
701 q.b = Unquantize(c.b, RGBAPrec.b);
702 q.a = RGBAPrec.a > 0 ? Unquantize(c.a, RGBAPrec.a) : 255;
703 return q;
704 }
705
706 void GeneratePaletteQuantized(_In_ const EncodeParams* pEP, _In_ size_t uIndexMode, _In_ const LDREndPntPair& endpts,
707 _Out_writes_(BC7_MAX_INDICES) LDRColorA aPalette[]) const;
708 float PerturbOne(_In_ const EncodeParams* pEP, _In_reads_(np) const LDRColorA colors[], _In_ size_t np, _In_ size_t uIndexMode,
709 _In_ size_t ch, _In_ const LDREndPntPair &old_endpts,
710 _Out_ LDREndPntPair &new_endpts, _In_ float old_err, _In_ uint8_t do_b) const;
711 void Exhaustive(_In_ const EncodeParams* pEP, _In_reads_(np) const LDRColorA aColors[], _In_ size_t np, _In_ size_t uIndexMode,
712 _In_ size_t ch, _Inout_ float& fOrgErr, _Inout_ LDREndPntPair& optEndPt) const;
713 void OptimizeOne(_In_ const EncodeParams* pEP, _In_reads_(np) const LDRColorA colors[], _In_ size_t np, _In_ size_t uIndexMode,
714 _In_ float orig_err, _In_ const LDREndPntPair &orig_endpts, _Out_ LDREndPntPair &opt_endpts) const;
715 void OptimizeEndPoints(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode,
716 _In_reads_(BC7_MAX_REGIONS) const float orig_err[],
717 _In_reads_(BC7_MAX_REGIONS) const LDREndPntPair orig_endpts[],
718 _Out_writes_(BC7_MAX_REGIONS) LDREndPntPair opt_endpts[]) const;
719 void AssignIndices(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode,
720 _In_reads_(BC7_MAX_REGIONS) LDREndPntPair endpts[],
721 _Out_writes_(NUM_PIXELS_PER_BLOCK) size_t aIndices[], _Out_writes_(NUM_PIXELS_PER_BLOCK) size_t aIndices2[],
722 _Out_writes_(BC7_MAX_REGIONS) float afTotErr[]) const;
723 void EmitBlock(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uRotation, _In_ size_t uIndexMode,
724 _In_reads_(BC7_MAX_REGIONS) const LDREndPntPair aEndPts[],
725 _In_reads_(NUM_PIXELS_PER_BLOCK) const size_t aIndex[],
726 _In_reads_(NUM_PIXELS_PER_BLOCK) const size_t aIndex2[]);
727 float Refine(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uRotation, _In_ size_t uIndexMode);
728
729 float MapColors(_In_ const EncodeParams* pEP, _In_reads_(np) const LDRColorA aColors[], _In_ size_t np, _In_ size_t uIndexMode,
730 _In_ const LDREndPntPair& endPts, _In_ float fMinErr) const;
731 static float RoughMSE(_Inout_ EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode);
732
733 private:
734 const static ModeInfo ms_aInfo[];
735 };
736
737 //-------------------------------------------------------------------------------------
738 #pragma warning(push)
739 #pragma warning(disable : 4127)
OptimizeAlpha(float * pX,float * pY,const float * pPoints,size_t cSteps)740 template <bool bRange> void OptimizeAlpha(float *pX, float *pY, const float *pPoints, size_t cSteps)
741 {
742 static const float pC6[] = { 5.0f/5.0f, 4.0f/5.0f, 3.0f/5.0f, 2.0f/5.0f, 1.0f/5.0f, 0.0f/5.0f };
743 static const float pD6[] = { 0.0f/5.0f, 1.0f/5.0f, 2.0f/5.0f, 3.0f/5.0f, 4.0f/5.0f, 5.0f/5.0f };
744 static const float pC8[] = { 7.0f/7.0f, 6.0f/7.0f, 5.0f/7.0f, 4.0f/7.0f, 3.0f/7.0f, 2.0f/7.0f, 1.0f/7.0f, 0.0f/7.0f };
745 static const float pD8[] = { 0.0f/7.0f, 1.0f/7.0f, 2.0f/7.0f, 3.0f/7.0f, 4.0f/7.0f, 5.0f/7.0f, 6.0f/7.0f, 7.0f/7.0f };
746
747 const float *pC = (6 == cSteps) ? pC6 : pC8;
748 const float *pD = (6 == cSteps) ? pD6 : pD8;
749
750 float MAX_VALUE = 1.0f;
751 float MIN_VALUE;
752 if (bRange)
753 {
754 MIN_VALUE = -1.0f;
755 }
756 else
757 {
758 MIN_VALUE = 0.0f;
759 }
760
761 // Find Min and Max points, as starting point
762 float fX = MAX_VALUE;
763 float fY = MIN_VALUE;
764
765 if(8 == cSteps)
766 {
767 for(size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
768 {
769 if(pPoints[iPoint] < fX)
770 fX = pPoints[iPoint];
771
772 if(pPoints[iPoint] > fY)
773 fY = pPoints[iPoint];
774 }
775 }
776 else
777 {
778 for(size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
779 {
780 if(pPoints[iPoint] < fX && pPoints[iPoint] > MIN_VALUE)
781 fX = pPoints[iPoint];
782
783 if(pPoints[iPoint] > fY && pPoints[iPoint] < MAX_VALUE)
784 fY = pPoints[iPoint];
785 }
786
787 if (fX == fY)
788 {
789 fY = MAX_VALUE;
790 }
791 }
792
793 // Use Newton's Method to find local minima of sum-of-squares error.
794 float fSteps = (float) (cSteps - 1);
795
796 for(size_t iIteration = 0; iIteration < 8; iIteration++)
797 {
798 float fScale;
799
800 if((fY - fX) < (1.0f / 256.0f))
801 break;
802
803 fScale = fSteps / (fY - fX);
804
805 // Calculate new steps
806 float pSteps[8];
807
808 for(size_t iStep = 0; iStep < cSteps; iStep++)
809 pSteps[iStep] = pC[iStep] * fX + pD[iStep] * fY;
810
811 if(6 == cSteps)
812 {
813 pSteps[6] = MIN_VALUE;
814 pSteps[7] = MAX_VALUE;
815 }
816
817 // Evaluate function, and derivatives
818 float dX = 0.0f;
819 float dY = 0.0f;
820 float d2X = 0.0f;
821 float d2Y = 0.0f;
822
823 for(size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
824 {
825 float fDot = (pPoints[iPoint] - fX) * fScale;
826
827 size_t iStep;
828
829 if(fDot <= 0.0f)
830 iStep = ((6 == cSteps) && (pPoints[iPoint] <= fX * 0.5f)) ? 6 : 0;
831 else if(fDot >= fSteps)
832 iStep = ((6 == cSteps) && (pPoints[iPoint] >= (fY + 1.0f) * 0.5f)) ? 7 : (cSteps - 1);
833 else
834 iStep = static_cast<int32_t>(fDot + 0.5f);
835
836
837 if(iStep < cSteps)
838 {
839 // D3DX had this computation backwards (pPoints[iPoint] - pSteps[iStep])
840 // this fix improves RMS of the alpha component
841 float fDiff = pSteps[iStep] - pPoints[iPoint];
842
843 dX += pC[iStep] * fDiff;
844 d2X += pC[iStep] * pC[iStep];
845
846 dY += pD[iStep] * fDiff;
847 d2Y += pD[iStep] * pD[iStep];
848 }
849 }
850
851 // Move endpoints
852 if(d2X > 0.0f)
853 fX -= dX / d2X;
854
855 if(d2Y > 0.0f)
856 fY -= dY / d2Y;
857
858 if(fX > fY)
859 {
860 float f = fX; fX = fY; fY = f;
861 }
862
863 if((dX * dX < (1.0f / 64.0f)) && (dY * dY < (1.0f / 64.0f)))
864 break;
865 }
866
867 *pX = (fX < MIN_VALUE) ? MIN_VALUE : (fX > MAX_VALUE) ? MAX_VALUE : fX;
868 *pY = (fY < MIN_VALUE) ? MIN_VALUE : (fY > MAX_VALUE) ? MAX_VALUE : fY;
869 }
870 #pragma warning(pop)
871
872
873 //-------------------------------------------------------------------------------------
874 // Functions
875 //-------------------------------------------------------------------------------------
876
877 typedef void (*BC_DECODE)(XMVECTOR *pColor, const uint8_t *pBC);
878 typedef void (*BC_ENCODE)(uint8_t *pDXT, const XMVECTOR *pColor, DWORD flags);
879
880 void D3DXDecodeBC1(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(8) const uint8_t *pBC);
881 void D3DXDecodeBC2(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC);
882 void D3DXDecodeBC3(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC);
883 void D3DXDecodeBC4U(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(8) const uint8_t *pBC);
884 void D3DXDecodeBC4S(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(8) const uint8_t *pBC);
885 void D3DXDecodeBC5U(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC);
886 void D3DXDecodeBC5S(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC);
887 void D3DXDecodeBC6HU(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC);
888 void D3DXDecodeBC6HS(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC);
889 void D3DXDecodeBC7(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC);
890
891 void D3DXEncodeBC1(_Out_writes_(8) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ float alphaRef, _In_ DWORD flags);
892 // BC1 requires one additional parameter, so it doesn't match signature of BC_ENCODE above
893
894 void D3DXEncodeBC2(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
895 void D3DXEncodeBC3(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
896 void D3DXEncodeBC4U(_Out_writes_(8) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
897 void D3DXEncodeBC4S(_Out_writes_(8) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
898 void D3DXEncodeBC5U(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
899 void D3DXEncodeBC5S(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
900 void D3DXEncodeBC6HU(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
901 void D3DXEncodeBC6HS(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
902 void D3DXEncodeBC7(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
903
904 }; // namespace
905