1 //-------------------------------------------------------------------------------------
2 // BC4BC5.cpp
3 //
4 // Block-compression (BC) functionality for BC4 and BC5 (DirectX 10 texture compression)
5 //
6 // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
7 // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
9 // PARTICULAR PURPOSE.
10 //
11 // Copyright (c) Microsoft Corporation. All rights reserved.
12 //
13 // http://go.microsoft.com/fwlink/?LinkId=248926
14 //-------------------------------------------------------------------------------------
15 
16 #include "DirectXTexP.h"
17 
18 #include "BC.h"
19 
20 namespace DirectX
21 {
22 
23 //------------------------------------------------------------------------------------
24 // Constants
25 //------------------------------------------------------------------------------------
26 
27 // Because these are used in SAL annotations, they need to remain macros rather than const values
28 #define BLOCK_LEN 4
29     // length of each block in texel
30 
31 #define BLOCK_SIZE (BLOCK_LEN * BLOCK_LEN)
32     // total texels in a 4x4 block.
33 
34 //------------------------------------------------------------------------------------
35 // Structures
36 //-------------------------------------------------------------------------------------
37 
38 #pragma warning(push)
39 #pragma warning(disable : 4201)
40 
41 // BC4U/BC5U
42 struct BC4_UNORM
43 {
RDirectX::BC4_UNORM44     float R(size_t uOffset) const
45     {
46         size_t uIndex = GetIndex(uOffset);
47         return DecodeFromIndex(uIndex);
48     }
49 
DecodeFromIndexDirectX::BC4_UNORM50     float DecodeFromIndex(size_t uIndex) const
51     {
52         if (uIndex == 0)
53             return red_0 / 255.0f;
54         if (uIndex == 1)
55             return red_1 / 255.0f;
56         float fred_0 = red_0 / 255.0f;
57         float fred_1 = red_1 / 255.0f;
58         if (red_0 > red_1)
59         {
60             uIndex -= 1;
61             return (fred_0 * (7-uIndex) + fred_1 * uIndex) / 7.0f;
62         }
63         else
64         {
65             if (uIndex == 6)
66                 return 0.0f;
67             if (uIndex == 7)
68                 return 1.0f;
69             uIndex -= 1;
70             return (fred_0 * (5-uIndex) + fred_1 * uIndex) / 5.0f;
71         }
72     }
73 
GetIndexDirectX::BC4_UNORM74     size_t GetIndex(size_t uOffset) const
75     {
76         return (size_t) ((data >> (3*uOffset + 16)) & 0x07);
77     }
78 
SetIndexDirectX::BC4_UNORM79     void SetIndex(size_t uOffset, size_t uIndex)
80     {
81         data &= ~((uint64_t) 0x07 << (3*uOffset + 16));
82         data |= ((uint64_t) uIndex << (3*uOffset + 16));
83     }
84 
85     union
86     {
87         struct
88         {
89             uint8_t red_0;
90             uint8_t red_1;
91             uint8_t indices[6];
92         };
93         uint64_t data;
94     };
95 };
96 
97 // BC4S/BC5S
98 struct BC4_SNORM
99 {
RDirectX::BC4_SNORM100     float R(size_t uOffset) const
101     {
102         size_t uIndex = GetIndex(uOffset);
103         return DecodeFromIndex(uIndex);
104     }
105 
DecodeFromIndexDirectX::BC4_SNORM106     float DecodeFromIndex(size_t uIndex) const
107     {
108         int8_t sred_0 = (red_0 == -128)? -127 : red_0;
109         int8_t sred_1 = (red_1 == -128)? -127 : red_1;
110 
111         if (uIndex == 0)
112             return sred_0 / 127.0f;
113         if (uIndex == 1)
114             return sred_1 / 127.0f;
115         float fred_0 = sred_0 / 127.0f;
116         float fred_1 = sred_1 / 127.0f;
117         if (red_0 > red_1)
118         {
119             uIndex -= 1;
120             return (fred_0 * (7-uIndex) + fred_1 * uIndex) / 7.0f;
121         }
122         else
123         {
124             if (uIndex == 6)
125                 return -1.0f;
126             if (uIndex == 7)
127                 return 1.0f;
128             uIndex -= 1;
129             return (fred_0 * (5-uIndex) + fred_1 * uIndex) / 5.0f;
130         }
131     }
132 
GetIndexDirectX::BC4_SNORM133     size_t GetIndex(size_t uOffset) const
134     {
135         return (size_t) ((data >> (3*uOffset + 16)) & 0x07);
136     }
137 
SetIndexDirectX::BC4_SNORM138     void SetIndex(size_t uOffset, size_t uIndex)
139     {
140         data &= ~((uint64_t) 0x07 << (3*uOffset + 16));
141         data |= ((uint64_t) uIndex << (3*uOffset + 16));
142     }
143 
144     union
145     {
146         struct
147         {
148             int8_t red_0;
149             int8_t red_1;
150             uint8_t indices[6];
151         };
152         uint64_t data;
153     };
154 };
155 
156 #pragma warning(pop)
157 
158 //-------------------------------------------------------------------------------------
159 // Convert a floating point value to an 8-bit SNORM
160 //-------------------------------------------------------------------------------------
FloatToSNorm(_In_ float fVal,_Out_ int8_t * piSNorm)161 static void inline FloatToSNorm( _In_ float fVal, _Out_ int8_t *piSNorm )
162 {
163     const uint32_t dwMostNeg = ( 1 << ( 8 * sizeof( int8_t ) - 1 ) );
164 
165     if( _isnan( fVal ) )
166         fVal = 0;
167     else
168         if( fVal > 1 )
169             fVal = 1;    // Clamp to 1
170         else
171             if( fVal < -1 )
172                 fVal = -1;    // Clamp to -1
173 
174     fVal = fVal * (int8_t) ( dwMostNeg - 1 );
175 
176     if( fVal >= 0 )
177         fVal += .5f;
178     else
179         fVal -= .5f;
180 
181     *piSNorm = (int8_t) (fVal);
182 }
183 
184 
185 //------------------------------------------------------------------------------
FindEndPointsBC4U(_In_reads_ (BLOCK_SIZE)const float theTexelsU[],_Out_ uint8_t & endpointU_0,_Out_ uint8_t & endpointU_1)186 static void FindEndPointsBC4U( _In_reads_(BLOCK_SIZE) const float theTexelsU[], _Out_ uint8_t &endpointU_0, _Out_ uint8_t &endpointU_1)
187 {
188     // The boundary of codec for signed/unsigned format
189     float MIN_NORM;
190     float MAX_NORM = 1.0f;
191     int8_t iStart, iEnd;
192     size_t i;
193 
194     MIN_NORM = 0.0f;
195 
196     // Find max/min of input texels
197     float fBlockMax = theTexelsU[0];
198     float fBlockMin = theTexelsU[0];
199     for (i = 0; i < BLOCK_SIZE; ++i)
200     {
201         if (theTexelsU[i]<fBlockMin)
202         {
203             fBlockMin = theTexelsU[i];
204         }
205         else if (theTexelsU[i]>fBlockMax)
206         {
207             fBlockMax = theTexelsU[i];
208         }
209     }
210 
211     //  If there are boundary values in input texels, Should use 4 block-codec to guarantee
212     //  the exact code of the boundary values.
213     bool bUsing4BlockCodec = ( MIN_NORM == fBlockMin || MAX_NORM == fBlockMax );
214 
215     // Using Optimize
216     float fStart, fEnd;
217 
218     if (!bUsing4BlockCodec)
219     {
220         OptimizeAlpha<false>(&fStart, &fEnd, theTexelsU, 8);
221 
222         iStart = (uint8_t) (fStart * 255.0f);
223         iEnd   = (uint8_t) (fEnd   * 255.0f);
224 
225         endpointU_0 = iEnd;
226         endpointU_1 = iStart;
227     }
228     else
229     {
230         OptimizeAlpha<false>(&fStart, &fEnd, theTexelsU, 6);
231 
232         iStart = (uint8_t) (fStart * 255.0f);
233         iEnd   = (uint8_t) (fEnd   * 255.0f);
234 
235         endpointU_1 = iEnd;
236         endpointU_0 = iStart;
237     }
238 }
239 
FindEndPointsBC4S(_In_reads_ (BLOCK_SIZE)const float theTexelsU[],_Out_ int8_t & endpointU_0,_Out_ int8_t & endpointU_1)240 static void FindEndPointsBC4S(_In_reads_(BLOCK_SIZE) const float theTexelsU[], _Out_ int8_t &endpointU_0, _Out_ int8_t &endpointU_1)
241 {
242     //  The boundary of codec for signed/unsigned format
243     float MIN_NORM;
244     float MAX_NORM = 1.0f;
245     int8_t iStart, iEnd;
246     size_t i;
247 
248     MIN_NORM = -1.0f;
249 
250     // Find max/min of input texels
251     float fBlockMax = theTexelsU[0];
252     float fBlockMin = theTexelsU[0];
253     for (i = 0; i < BLOCK_SIZE; ++i)
254     {
255         if (theTexelsU[i]<fBlockMin)
256         {
257             fBlockMin = theTexelsU[i];
258         }
259         else if (theTexelsU[i]>fBlockMax)
260         {
261             fBlockMax = theTexelsU[i];
262         }
263     }
264 
265     //  If there are boundary values in input texels, Should use 4 block-codec to guarantee
266     //  the exact code of the boundary values.
267     bool bUsing4BlockCodec = ( MIN_NORM == fBlockMin || MAX_NORM == fBlockMax );
268 
269     // Using Optimize
270     float fStart, fEnd;
271 
272     if (!bUsing4BlockCodec)
273     {
274         OptimizeAlpha<true>(&fStart, &fEnd, theTexelsU, 8);
275 
276         FloatToSNorm(fStart, &iStart);
277         FloatToSNorm(fEnd, &iEnd);
278 
279         endpointU_0 = iEnd;
280         endpointU_1 = iStart;
281     }
282     else
283     {
284         OptimizeAlpha<true>(&fStart, &fEnd, theTexelsU, 6);
285 
286         FloatToSNorm(fStart, &iStart);
287         FloatToSNorm(fEnd, &iEnd);
288 
289         endpointU_1 = iEnd;
290         endpointU_0 = iStart;
291     }
292 }
293 
294 
295 //------------------------------------------------------------------------------
FindEndPointsBC5U(_In_reads_ (BLOCK_SIZE)const float theTexelsU[],_In_reads_ (BLOCK_SIZE)const float theTexelsV[],_Out_ uint8_t & endpointU_0,_Out_ uint8_t & endpointU_1,_Out_ uint8_t & endpointV_0,_Out_ uint8_t & endpointV_1)296 static inline void FindEndPointsBC5U( _In_reads_(BLOCK_SIZE) const float theTexelsU[], _In_reads_(BLOCK_SIZE) const float theTexelsV[],
297                                       _Out_ uint8_t &endpointU_0, _Out_ uint8_t &endpointU_1, _Out_ uint8_t &endpointV_0, _Out_ uint8_t &endpointV_1)
298 {
299     //Encoding the U and V channel by BC4 codec separately.
300     FindEndPointsBC4U( theTexelsU, endpointU_0, endpointU_1);
301     FindEndPointsBC4U( theTexelsV, endpointV_0, endpointV_1);
302 }
303 
FindEndPointsBC5S(_In_reads_ (BLOCK_SIZE)const float theTexelsU[],_In_reads_ (BLOCK_SIZE)const float theTexelsV[],_Out_ int8_t & endpointU_0,_Out_ int8_t & endpointU_1,_Out_ int8_t & endpointV_0,_Out_ int8_t & endpointV_1)304 static inline void FindEndPointsBC5S( _In_reads_(BLOCK_SIZE) const float theTexelsU[], _In_reads_(BLOCK_SIZE) const float theTexelsV[],
305                                       _Out_ int8_t &endpointU_0, _Out_ int8_t &endpointU_1, _Out_ int8_t &endpointV_0, _Out_ int8_t &endpointV_1)
306 {
307     //Encoding the U and V channel by BC4 codec separately.
308     FindEndPointsBC4S( theTexelsU, endpointU_0, endpointU_1);
309     FindEndPointsBC4S( theTexelsV, endpointV_0, endpointV_1);
310 }
311 
312 
313 //------------------------------------------------------------------------------
FindClosestUNORM(_Inout_ BC4_UNORM * pBC,_In_reads_ (NUM_PIXELS_PER_BLOCK)const float theTexelsU[])314 static void FindClosestUNORM(_Inout_ BC4_UNORM* pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const float theTexelsU[])
315 {
316     float rGradient[8];
317     int i;
318     for (i = 0; i < 8; ++i)
319     {
320         rGradient[i] = pBC->DecodeFromIndex(i);
321     }
322     for (i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
323     {
324         size_t uBestIndex = 0;
325         float fBestDelta = 100000;
326         for (size_t uIndex = 0; uIndex < 8; uIndex++)
327         {
328             float fCurrentDelta = fabsf(rGradient[uIndex]-theTexelsU[i]);
329             if (fCurrentDelta < fBestDelta)
330             {
331                 uBestIndex = uIndex;
332                 fBestDelta = fCurrentDelta;
333             }
334         }
335         pBC->SetIndex(i, uBestIndex);
336     }
337 }
338 
FindClosestSNORM(_Inout_ BC4_SNORM * pBC,_In_reads_ (NUM_PIXELS_PER_BLOCK)const float theTexelsU[])339 static void FindClosestSNORM(_Inout_ BC4_SNORM* pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const float theTexelsU[])
340 {
341     float rGradient[8];
342     int i;
343     for (i = 0; i < 8; ++i)
344     {
345         rGradient[i] = pBC->DecodeFromIndex(i);
346     }
347     for (i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
348     {
349         size_t uBestIndex = 0;
350         float fBestDelta = 100000;
351         for (size_t uIndex = 0; uIndex < 8; uIndex++)
352         {
353             float fCurrentDelta = fabsf(rGradient[uIndex]-theTexelsU[i]);
354             if (fCurrentDelta < fBestDelta)
355             {
356                 uBestIndex = uIndex;
357                 fBestDelta = fCurrentDelta;
358             }
359         }
360         pBC->SetIndex(i, uBestIndex);
361     }
362 }
363 
364 
365 //=====================================================================================
366 // Entry points
367 //=====================================================================================
368 
369 //-------------------------------------------------------------------------------------
370 // BC4 Compression
371 //-------------------------------------------------------------------------------------
372 _Use_decl_annotations_
D3DXDecodeBC4U(XMVECTOR * pColor,const uint8_t * pBC)373 void D3DXDecodeBC4U( XMVECTOR *pColor, const uint8_t *pBC )
374 {
375     assert( pColor && pBC );
376     static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" );
377 
378     auto pBC4 = reinterpret_cast<const BC4_UNORM*>(pBC);
379 
380     for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
381     {
382         #pragma prefast(suppress:22103, "writing blocks in two halves confuses tool")
383         pColor[i] = XMVectorSet( pBC4->R(i), 0, 0, 1.0f);
384     }
385 }
386 
387 _Use_decl_annotations_
D3DXDecodeBC4S(XMVECTOR * pColor,const uint8_t * pBC)388 void D3DXDecodeBC4S(XMVECTOR *pColor, const uint8_t *pBC)
389 {
390     assert( pColor && pBC );
391     static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" );
392 
393     auto pBC4 = reinterpret_cast<const BC4_SNORM*>(pBC);
394 
395     for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
396     {
397         #pragma prefast(suppress:22103, "writing blocks in two halves confuses tool")
398         pColor[i] = XMVectorSet( pBC4->R(i), 0, 0, 1.0f);
399     }
400 }
401 
402 _Use_decl_annotations_
D3DXEncodeBC4U(uint8_t * pBC,const XMVECTOR * pColor,DWORD flags)403 void D3DXEncodeBC4U( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags )
404 {
405     UNREFERENCED_PARAMETER( flags );
406 
407     assert( pBC && pColor );
408     static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" );
409 
410     memset(pBC, 0, sizeof(BC4_UNORM));
411     auto pBC4 = reinterpret_cast<BC4_UNORM*>(pBC);
412     float theTexelsU[NUM_PIXELS_PER_BLOCK];
413 
414     for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
415     {
416         theTexelsU[i] = XMVectorGetX( pColor[i] );
417     }
418 
419     FindEndPointsBC4U(theTexelsU, pBC4->red_0, pBC4->red_1);
420     FindClosestUNORM(pBC4, theTexelsU);
421 }
422 
423 _Use_decl_annotations_
D3DXEncodeBC4S(uint8_t * pBC,const XMVECTOR * pColor,DWORD flags)424 void D3DXEncodeBC4S( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags )
425 {
426     UNREFERENCED_PARAMETER( flags );
427 
428     assert( pBC && pColor );
429     static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" );
430 
431     memset(pBC, 0, sizeof(BC4_UNORM));
432     auto pBC4 = reinterpret_cast<BC4_SNORM*>(pBC);
433     float theTexelsU[NUM_PIXELS_PER_BLOCK];
434 
435     for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
436     {
437         theTexelsU[i] = XMVectorGetX( pColor[i] );
438     }
439 
440     FindEndPointsBC4S(theTexelsU, pBC4->red_0, pBC4->red_1);
441     FindClosestSNORM(pBC4, theTexelsU);
442 }
443 
444 
445 //-------------------------------------------------------------------------------------
446 // BC5 Compression
447 //-------------------------------------------------------------------------------------
448 _Use_decl_annotations_
D3DXDecodeBC5U(XMVECTOR * pColor,const uint8_t * pBC)449 void D3DXDecodeBC5U(XMVECTOR *pColor, const uint8_t *pBC)
450 {
451     assert( pColor && pBC );
452     static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" );
453 
454     auto pBCR = reinterpret_cast<const BC4_UNORM*>(pBC);
455     auto pBCG = reinterpret_cast<const BC4_UNORM*>(pBC+sizeof(BC4_UNORM));
456 
457     for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
458     {
459         #pragma prefast(suppress:22103, "writing blocks in two halves confuses tool")
460         pColor[i] = XMVectorSet(pBCR->R(i), pBCG->R(i), 0, 1.0f);
461     }
462 }
463 
464 _Use_decl_annotations_
D3DXDecodeBC5S(XMVECTOR * pColor,const uint8_t * pBC)465 void D3DXDecodeBC5S(XMVECTOR *pColor, const uint8_t *pBC)
466 {
467     assert( pColor && pBC );
468     static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" );
469 
470     auto pBCR = reinterpret_cast<const BC4_SNORM*>(pBC);
471     auto pBCG = reinterpret_cast<const BC4_SNORM*>(pBC+sizeof(BC4_SNORM));
472 
473     for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
474     {
475         #pragma prefast(suppress:22103, "writing blocks in two halves confuses tool")
476         pColor[i] = XMVectorSet(pBCR->R(i), pBCG->R(i), 0, 1.0f);
477     }
478 }
479 
480 _Use_decl_annotations_
D3DXEncodeBC5U(uint8_t * pBC,const XMVECTOR * pColor,DWORD flags)481 void D3DXEncodeBC5U( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags )
482 {
483     UNREFERENCED_PARAMETER( flags );
484 
485     assert( pBC && pColor );
486     static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" );
487 
488     memset(pBC, 0, sizeof(BC4_UNORM)*2);
489     auto pBCR = reinterpret_cast<BC4_UNORM*>(pBC);
490     auto pBCG = reinterpret_cast<BC4_UNORM*>(pBC+sizeof(BC4_UNORM));
491     float theTexelsU[NUM_PIXELS_PER_BLOCK];
492     float theTexelsV[NUM_PIXELS_PER_BLOCK];
493 
494     for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
495     {
496         XMFLOAT4A clr;
497         XMStoreFloat4A( &clr, pColor[i] );
498         theTexelsU[i] = clr.x;
499         theTexelsV[i] = clr.y;
500     }
501 
502     FindEndPointsBC5U(
503         theTexelsU,
504         theTexelsV,
505         pBCR->red_0,
506         pBCR->red_1,
507         pBCG->red_0,
508         pBCG->red_1);
509 
510     FindClosestUNORM(pBCR, theTexelsU);
511     FindClosestUNORM(pBCG, theTexelsV);
512 }
513 
514 _Use_decl_annotations_
D3DXEncodeBC5S(uint8_t * pBC,const XMVECTOR * pColor,DWORD flags)515 void D3DXEncodeBC5S( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags )
516 {
517     UNREFERENCED_PARAMETER( flags );
518 
519     assert( pBC && pColor );
520     static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" );
521 
522     memset(pBC, 0, sizeof(BC4_UNORM)*2);
523     auto pBCR = reinterpret_cast<BC4_SNORM*>(pBC);
524     auto pBCG = reinterpret_cast<BC4_SNORM*>(pBC+sizeof(BC4_SNORM));
525     float theTexelsU[NUM_PIXELS_PER_BLOCK];
526     float theTexelsV[NUM_PIXELS_PER_BLOCK];
527 
528     for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
529     {
530         XMFLOAT4A clr;
531         XMStoreFloat4A( &clr, pColor[i] );
532         theTexelsU[i] = clr.x;
533         theTexelsV[i] = clr.y;
534     }
535 
536     FindEndPointsBC5S(
537         theTexelsU,
538         theTexelsV,
539         pBCR->red_0,
540         pBCR->red_1,
541         pBCG->red_0,
542         pBCG->red_1);
543 
544     FindClosestSNORM(pBCR, theTexelsU);
545     FindClosestSNORM(pBCG, theTexelsV);
546 }
547 
548 } // namespace
549