1 //-------------------------------------------------------------------------------------
2 // BC4BC5.cpp
3 //
4 // Block-compression (BC) functionality for BC4 and BC5 (DirectX 10 texture compression)
5 //
6 // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
7 // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
9 // PARTICULAR PURPOSE.
10 //
11 // Copyright (c) Microsoft Corporation. All rights reserved.
12 //
13 // http://go.microsoft.com/fwlink/?LinkId=248926
14 //-------------------------------------------------------------------------------------
15
16 #include "DirectXTexP.h"
17
18 #include "BC.h"
19
20 namespace DirectX
21 {
22
23 //------------------------------------------------------------------------------------
24 // Constants
25 //------------------------------------------------------------------------------------
26
27 // Because these are used in SAL annotations, they need to remain macros rather than const values
28 #define BLOCK_LEN 4
29 // length of each block in texel
30
31 #define BLOCK_SIZE (BLOCK_LEN * BLOCK_LEN)
32 // total texels in a 4x4 block.
33
34 //------------------------------------------------------------------------------------
35 // Structures
36 //-------------------------------------------------------------------------------------
37
38 #pragma warning(push)
39 #pragma warning(disable : 4201)
40
41 // BC4U/BC5U
42 struct BC4_UNORM
43 {
RDirectX::BC4_UNORM44 float R(size_t uOffset) const
45 {
46 size_t uIndex = GetIndex(uOffset);
47 return DecodeFromIndex(uIndex);
48 }
49
DecodeFromIndexDirectX::BC4_UNORM50 float DecodeFromIndex(size_t uIndex) const
51 {
52 if (uIndex == 0)
53 return red_0 / 255.0f;
54 if (uIndex == 1)
55 return red_1 / 255.0f;
56 float fred_0 = red_0 / 255.0f;
57 float fred_1 = red_1 / 255.0f;
58 if (red_0 > red_1)
59 {
60 uIndex -= 1;
61 return (fred_0 * (7-uIndex) + fred_1 * uIndex) / 7.0f;
62 }
63 else
64 {
65 if (uIndex == 6)
66 return 0.0f;
67 if (uIndex == 7)
68 return 1.0f;
69 uIndex -= 1;
70 return (fred_0 * (5-uIndex) + fred_1 * uIndex) / 5.0f;
71 }
72 }
73
GetIndexDirectX::BC4_UNORM74 size_t GetIndex(size_t uOffset) const
75 {
76 return (size_t) ((data >> (3*uOffset + 16)) & 0x07);
77 }
78
SetIndexDirectX::BC4_UNORM79 void SetIndex(size_t uOffset, size_t uIndex)
80 {
81 data &= ~((uint64_t) 0x07 << (3*uOffset + 16));
82 data |= ((uint64_t) uIndex << (3*uOffset + 16));
83 }
84
85 union
86 {
87 struct
88 {
89 uint8_t red_0;
90 uint8_t red_1;
91 uint8_t indices[6];
92 };
93 uint64_t data;
94 };
95 };
96
97 // BC4S/BC5S
98 struct BC4_SNORM
99 {
RDirectX::BC4_SNORM100 float R(size_t uOffset) const
101 {
102 size_t uIndex = GetIndex(uOffset);
103 return DecodeFromIndex(uIndex);
104 }
105
DecodeFromIndexDirectX::BC4_SNORM106 float DecodeFromIndex(size_t uIndex) const
107 {
108 int8_t sred_0 = (red_0 == -128)? -127 : red_0;
109 int8_t sred_1 = (red_1 == -128)? -127 : red_1;
110
111 if (uIndex == 0)
112 return sred_0 / 127.0f;
113 if (uIndex == 1)
114 return sred_1 / 127.0f;
115 float fred_0 = sred_0 / 127.0f;
116 float fred_1 = sred_1 / 127.0f;
117 if (red_0 > red_1)
118 {
119 uIndex -= 1;
120 return (fred_0 * (7-uIndex) + fred_1 * uIndex) / 7.0f;
121 }
122 else
123 {
124 if (uIndex == 6)
125 return -1.0f;
126 if (uIndex == 7)
127 return 1.0f;
128 uIndex -= 1;
129 return (fred_0 * (5-uIndex) + fred_1 * uIndex) / 5.0f;
130 }
131 }
132
GetIndexDirectX::BC4_SNORM133 size_t GetIndex(size_t uOffset) const
134 {
135 return (size_t) ((data >> (3*uOffset + 16)) & 0x07);
136 }
137
SetIndexDirectX::BC4_SNORM138 void SetIndex(size_t uOffset, size_t uIndex)
139 {
140 data &= ~((uint64_t) 0x07 << (3*uOffset + 16));
141 data |= ((uint64_t) uIndex << (3*uOffset + 16));
142 }
143
144 union
145 {
146 struct
147 {
148 int8_t red_0;
149 int8_t red_1;
150 uint8_t indices[6];
151 };
152 uint64_t data;
153 };
154 };
155
156 #pragma warning(pop)
157
158 //-------------------------------------------------------------------------------------
159 // Convert a floating point value to an 8-bit SNORM
160 //-------------------------------------------------------------------------------------
FloatToSNorm(_In_ float fVal,_Out_ int8_t * piSNorm)161 static void inline FloatToSNorm( _In_ float fVal, _Out_ int8_t *piSNorm )
162 {
163 const uint32_t dwMostNeg = ( 1 << ( 8 * sizeof( int8_t ) - 1 ) );
164
165 if( _isnan( fVal ) )
166 fVal = 0;
167 else
168 if( fVal > 1 )
169 fVal = 1; // Clamp to 1
170 else
171 if( fVal < -1 )
172 fVal = -1; // Clamp to -1
173
174 fVal = fVal * (int8_t) ( dwMostNeg - 1 );
175
176 if( fVal >= 0 )
177 fVal += .5f;
178 else
179 fVal -= .5f;
180
181 *piSNorm = (int8_t) (fVal);
182 }
183
184
185 //------------------------------------------------------------------------------
FindEndPointsBC4U(_In_reads_ (BLOCK_SIZE)const float theTexelsU[],_Out_ uint8_t & endpointU_0,_Out_ uint8_t & endpointU_1)186 static void FindEndPointsBC4U( _In_reads_(BLOCK_SIZE) const float theTexelsU[], _Out_ uint8_t &endpointU_0, _Out_ uint8_t &endpointU_1)
187 {
188 // The boundary of codec for signed/unsigned format
189 float MIN_NORM;
190 float MAX_NORM = 1.0f;
191 int8_t iStart, iEnd;
192 size_t i;
193
194 MIN_NORM = 0.0f;
195
196 // Find max/min of input texels
197 float fBlockMax = theTexelsU[0];
198 float fBlockMin = theTexelsU[0];
199 for (i = 0; i < BLOCK_SIZE; ++i)
200 {
201 if (theTexelsU[i]<fBlockMin)
202 {
203 fBlockMin = theTexelsU[i];
204 }
205 else if (theTexelsU[i]>fBlockMax)
206 {
207 fBlockMax = theTexelsU[i];
208 }
209 }
210
211 // If there are boundary values in input texels, Should use 4 block-codec to guarantee
212 // the exact code of the boundary values.
213 bool bUsing4BlockCodec = ( MIN_NORM == fBlockMin || MAX_NORM == fBlockMax );
214
215 // Using Optimize
216 float fStart, fEnd;
217
218 if (!bUsing4BlockCodec)
219 {
220 OptimizeAlpha<false>(&fStart, &fEnd, theTexelsU, 8);
221
222 iStart = (uint8_t) (fStart * 255.0f);
223 iEnd = (uint8_t) (fEnd * 255.0f);
224
225 endpointU_0 = iEnd;
226 endpointU_1 = iStart;
227 }
228 else
229 {
230 OptimizeAlpha<false>(&fStart, &fEnd, theTexelsU, 6);
231
232 iStart = (uint8_t) (fStart * 255.0f);
233 iEnd = (uint8_t) (fEnd * 255.0f);
234
235 endpointU_1 = iEnd;
236 endpointU_0 = iStart;
237 }
238 }
239
FindEndPointsBC4S(_In_reads_ (BLOCK_SIZE)const float theTexelsU[],_Out_ int8_t & endpointU_0,_Out_ int8_t & endpointU_1)240 static void FindEndPointsBC4S(_In_reads_(BLOCK_SIZE) const float theTexelsU[], _Out_ int8_t &endpointU_0, _Out_ int8_t &endpointU_1)
241 {
242 // The boundary of codec for signed/unsigned format
243 float MIN_NORM;
244 float MAX_NORM = 1.0f;
245 int8_t iStart, iEnd;
246 size_t i;
247
248 MIN_NORM = -1.0f;
249
250 // Find max/min of input texels
251 float fBlockMax = theTexelsU[0];
252 float fBlockMin = theTexelsU[0];
253 for (i = 0; i < BLOCK_SIZE; ++i)
254 {
255 if (theTexelsU[i]<fBlockMin)
256 {
257 fBlockMin = theTexelsU[i];
258 }
259 else if (theTexelsU[i]>fBlockMax)
260 {
261 fBlockMax = theTexelsU[i];
262 }
263 }
264
265 // If there are boundary values in input texels, Should use 4 block-codec to guarantee
266 // the exact code of the boundary values.
267 bool bUsing4BlockCodec = ( MIN_NORM == fBlockMin || MAX_NORM == fBlockMax );
268
269 // Using Optimize
270 float fStart, fEnd;
271
272 if (!bUsing4BlockCodec)
273 {
274 OptimizeAlpha<true>(&fStart, &fEnd, theTexelsU, 8);
275
276 FloatToSNorm(fStart, &iStart);
277 FloatToSNorm(fEnd, &iEnd);
278
279 endpointU_0 = iEnd;
280 endpointU_1 = iStart;
281 }
282 else
283 {
284 OptimizeAlpha<true>(&fStart, &fEnd, theTexelsU, 6);
285
286 FloatToSNorm(fStart, &iStart);
287 FloatToSNorm(fEnd, &iEnd);
288
289 endpointU_1 = iEnd;
290 endpointU_0 = iStart;
291 }
292 }
293
294
295 //------------------------------------------------------------------------------
FindEndPointsBC5U(_In_reads_ (BLOCK_SIZE)const float theTexelsU[],_In_reads_ (BLOCK_SIZE)const float theTexelsV[],_Out_ uint8_t & endpointU_0,_Out_ uint8_t & endpointU_1,_Out_ uint8_t & endpointV_0,_Out_ uint8_t & endpointV_1)296 static inline void FindEndPointsBC5U( _In_reads_(BLOCK_SIZE) const float theTexelsU[], _In_reads_(BLOCK_SIZE) const float theTexelsV[],
297 _Out_ uint8_t &endpointU_0, _Out_ uint8_t &endpointU_1, _Out_ uint8_t &endpointV_0, _Out_ uint8_t &endpointV_1)
298 {
299 //Encoding the U and V channel by BC4 codec separately.
300 FindEndPointsBC4U( theTexelsU, endpointU_0, endpointU_1);
301 FindEndPointsBC4U( theTexelsV, endpointV_0, endpointV_1);
302 }
303
FindEndPointsBC5S(_In_reads_ (BLOCK_SIZE)const float theTexelsU[],_In_reads_ (BLOCK_SIZE)const float theTexelsV[],_Out_ int8_t & endpointU_0,_Out_ int8_t & endpointU_1,_Out_ int8_t & endpointV_0,_Out_ int8_t & endpointV_1)304 static inline void FindEndPointsBC5S( _In_reads_(BLOCK_SIZE) const float theTexelsU[], _In_reads_(BLOCK_SIZE) const float theTexelsV[],
305 _Out_ int8_t &endpointU_0, _Out_ int8_t &endpointU_1, _Out_ int8_t &endpointV_0, _Out_ int8_t &endpointV_1)
306 {
307 //Encoding the U and V channel by BC4 codec separately.
308 FindEndPointsBC4S( theTexelsU, endpointU_0, endpointU_1);
309 FindEndPointsBC4S( theTexelsV, endpointV_0, endpointV_1);
310 }
311
312
313 //------------------------------------------------------------------------------
FindClosestUNORM(_Inout_ BC4_UNORM * pBC,_In_reads_ (NUM_PIXELS_PER_BLOCK)const float theTexelsU[])314 static void FindClosestUNORM(_Inout_ BC4_UNORM* pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const float theTexelsU[])
315 {
316 float rGradient[8];
317 int i;
318 for (i = 0; i < 8; ++i)
319 {
320 rGradient[i] = pBC->DecodeFromIndex(i);
321 }
322 for (i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
323 {
324 size_t uBestIndex = 0;
325 float fBestDelta = 100000;
326 for (size_t uIndex = 0; uIndex < 8; uIndex++)
327 {
328 float fCurrentDelta = fabsf(rGradient[uIndex]-theTexelsU[i]);
329 if (fCurrentDelta < fBestDelta)
330 {
331 uBestIndex = uIndex;
332 fBestDelta = fCurrentDelta;
333 }
334 }
335 pBC->SetIndex(i, uBestIndex);
336 }
337 }
338
FindClosestSNORM(_Inout_ BC4_SNORM * pBC,_In_reads_ (NUM_PIXELS_PER_BLOCK)const float theTexelsU[])339 static void FindClosestSNORM(_Inout_ BC4_SNORM* pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const float theTexelsU[])
340 {
341 float rGradient[8];
342 int i;
343 for (i = 0; i < 8; ++i)
344 {
345 rGradient[i] = pBC->DecodeFromIndex(i);
346 }
347 for (i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
348 {
349 size_t uBestIndex = 0;
350 float fBestDelta = 100000;
351 for (size_t uIndex = 0; uIndex < 8; uIndex++)
352 {
353 float fCurrentDelta = fabsf(rGradient[uIndex]-theTexelsU[i]);
354 if (fCurrentDelta < fBestDelta)
355 {
356 uBestIndex = uIndex;
357 fBestDelta = fCurrentDelta;
358 }
359 }
360 pBC->SetIndex(i, uBestIndex);
361 }
362 }
363
364
365 //=====================================================================================
366 // Entry points
367 //=====================================================================================
368
369 //-------------------------------------------------------------------------------------
370 // BC4 Compression
371 //-------------------------------------------------------------------------------------
372 _Use_decl_annotations_
D3DXDecodeBC4U(XMVECTOR * pColor,const uint8_t * pBC)373 void D3DXDecodeBC4U( XMVECTOR *pColor, const uint8_t *pBC )
374 {
375 assert( pColor && pBC );
376 static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" );
377
378 auto pBC4 = reinterpret_cast<const BC4_UNORM*>(pBC);
379
380 for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
381 {
382 #pragma prefast(suppress:22103, "writing blocks in two halves confuses tool")
383 pColor[i] = XMVectorSet( pBC4->R(i), 0, 0, 1.0f);
384 }
385 }
386
387 _Use_decl_annotations_
D3DXDecodeBC4S(XMVECTOR * pColor,const uint8_t * pBC)388 void D3DXDecodeBC4S(XMVECTOR *pColor, const uint8_t *pBC)
389 {
390 assert( pColor && pBC );
391 static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" );
392
393 auto pBC4 = reinterpret_cast<const BC4_SNORM*>(pBC);
394
395 for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
396 {
397 #pragma prefast(suppress:22103, "writing blocks in two halves confuses tool")
398 pColor[i] = XMVectorSet( pBC4->R(i), 0, 0, 1.0f);
399 }
400 }
401
402 _Use_decl_annotations_
D3DXEncodeBC4U(uint8_t * pBC,const XMVECTOR * pColor,DWORD flags)403 void D3DXEncodeBC4U( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags )
404 {
405 UNREFERENCED_PARAMETER( flags );
406
407 assert( pBC && pColor );
408 static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" );
409
410 memset(pBC, 0, sizeof(BC4_UNORM));
411 auto pBC4 = reinterpret_cast<BC4_UNORM*>(pBC);
412 float theTexelsU[NUM_PIXELS_PER_BLOCK];
413
414 for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
415 {
416 theTexelsU[i] = XMVectorGetX( pColor[i] );
417 }
418
419 FindEndPointsBC4U(theTexelsU, pBC4->red_0, pBC4->red_1);
420 FindClosestUNORM(pBC4, theTexelsU);
421 }
422
423 _Use_decl_annotations_
D3DXEncodeBC4S(uint8_t * pBC,const XMVECTOR * pColor,DWORD flags)424 void D3DXEncodeBC4S( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags )
425 {
426 UNREFERENCED_PARAMETER( flags );
427
428 assert( pBC && pColor );
429 static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" );
430
431 memset(pBC, 0, sizeof(BC4_UNORM));
432 auto pBC4 = reinterpret_cast<BC4_SNORM*>(pBC);
433 float theTexelsU[NUM_PIXELS_PER_BLOCK];
434
435 for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
436 {
437 theTexelsU[i] = XMVectorGetX( pColor[i] );
438 }
439
440 FindEndPointsBC4S(theTexelsU, pBC4->red_0, pBC4->red_1);
441 FindClosestSNORM(pBC4, theTexelsU);
442 }
443
444
445 //-------------------------------------------------------------------------------------
446 // BC5 Compression
447 //-------------------------------------------------------------------------------------
448 _Use_decl_annotations_
D3DXDecodeBC5U(XMVECTOR * pColor,const uint8_t * pBC)449 void D3DXDecodeBC5U(XMVECTOR *pColor, const uint8_t *pBC)
450 {
451 assert( pColor && pBC );
452 static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" );
453
454 auto pBCR = reinterpret_cast<const BC4_UNORM*>(pBC);
455 auto pBCG = reinterpret_cast<const BC4_UNORM*>(pBC+sizeof(BC4_UNORM));
456
457 for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
458 {
459 #pragma prefast(suppress:22103, "writing blocks in two halves confuses tool")
460 pColor[i] = XMVectorSet(pBCR->R(i), pBCG->R(i), 0, 1.0f);
461 }
462 }
463
464 _Use_decl_annotations_
D3DXDecodeBC5S(XMVECTOR * pColor,const uint8_t * pBC)465 void D3DXDecodeBC5S(XMVECTOR *pColor, const uint8_t *pBC)
466 {
467 assert( pColor && pBC );
468 static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" );
469
470 auto pBCR = reinterpret_cast<const BC4_SNORM*>(pBC);
471 auto pBCG = reinterpret_cast<const BC4_SNORM*>(pBC+sizeof(BC4_SNORM));
472
473 for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
474 {
475 #pragma prefast(suppress:22103, "writing blocks in two halves confuses tool")
476 pColor[i] = XMVectorSet(pBCR->R(i), pBCG->R(i), 0, 1.0f);
477 }
478 }
479
480 _Use_decl_annotations_
D3DXEncodeBC5U(uint8_t * pBC,const XMVECTOR * pColor,DWORD flags)481 void D3DXEncodeBC5U( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags )
482 {
483 UNREFERENCED_PARAMETER( flags );
484
485 assert( pBC && pColor );
486 static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" );
487
488 memset(pBC, 0, sizeof(BC4_UNORM)*2);
489 auto pBCR = reinterpret_cast<BC4_UNORM*>(pBC);
490 auto pBCG = reinterpret_cast<BC4_UNORM*>(pBC+sizeof(BC4_UNORM));
491 float theTexelsU[NUM_PIXELS_PER_BLOCK];
492 float theTexelsV[NUM_PIXELS_PER_BLOCK];
493
494 for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
495 {
496 XMFLOAT4A clr;
497 XMStoreFloat4A( &clr, pColor[i] );
498 theTexelsU[i] = clr.x;
499 theTexelsV[i] = clr.y;
500 }
501
502 FindEndPointsBC5U(
503 theTexelsU,
504 theTexelsV,
505 pBCR->red_0,
506 pBCR->red_1,
507 pBCG->red_0,
508 pBCG->red_1);
509
510 FindClosestUNORM(pBCR, theTexelsU);
511 FindClosestUNORM(pBCG, theTexelsV);
512 }
513
514 _Use_decl_annotations_
D3DXEncodeBC5S(uint8_t * pBC,const XMVECTOR * pColor,DWORD flags)515 void D3DXEncodeBC5S( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags )
516 {
517 UNREFERENCED_PARAMETER( flags );
518
519 assert( pBC && pColor );
520 static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" );
521
522 memset(pBC, 0, sizeof(BC4_UNORM)*2);
523 auto pBCR = reinterpret_cast<BC4_SNORM*>(pBC);
524 auto pBCG = reinterpret_cast<BC4_SNORM*>(pBC+sizeof(BC4_SNORM));
525 float theTexelsU[NUM_PIXELS_PER_BLOCK];
526 float theTexelsV[NUM_PIXELS_PER_BLOCK];
527
528 for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
529 {
530 XMFLOAT4A clr;
531 XMStoreFloat4A( &clr, pColor[i] );
532 theTexelsU[i] = clr.x;
533 theTexelsV[i] = clr.y;
534 }
535
536 FindEndPointsBC5S(
537 theTexelsU,
538 theTexelsV,
539 pBCR->red_0,
540 pBCR->red_1,
541 pBCG->red_0,
542 pBCG->red_1);
543
544 FindClosestSNORM(pBCR, theTexelsU);
545 FindClosestSNORM(pBCG, theTexelsV);
546 }
547
548 } // namespace
549