1 //-------------------------------------------------------------------------------------
2 // BC6HBC7.cpp
3 //
4 // Block-compression (BC) functionality for BC6H and BC7 (DirectX 11 texture compression)
5 //
6 // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
7 // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
9 // PARTICULAR PURPOSE.
10 //
11 // Copyright (c) Microsoft Corporation. All rights reserved.
12 //
13 // http://go.microsoft.com/fwlink/?LinkId=248926
14 //-------------------------------------------------------------------------------------
15 
16 #include "DirectXTexP.h"
17 
18 #include "BC.h"
19 
20 #ifndef USE_XNAMATH
21 using namespace DirectX::PackedVector;
22 #endif
23 
24 namespace DirectX
25 {
26 
27 //-------------------------------------------------------------------------------------
28 // Constants
29 //-------------------------------------------------------------------------------------
30 
31 static const float fEpsilon = (0.25f / 64.0f) * (0.25f / 64.0f);
32 static const float pC3[] = { 2.0f/2.0f, 1.0f/2.0f, 0.0f/2.0f };
33 static const float pD3[] = { 0.0f/2.0f, 1.0f/2.0f, 2.0f/2.0f };
34 static const float pC4[] = { 3.0f/3.0f, 2.0f/3.0f, 1.0f/3.0f, 0.0f/3.0f };
35 static const float pD4[] = { 0.0f/3.0f, 1.0f/3.0f, 2.0f/3.0f, 3.0f/3.0f };
36 
37 const int g_aWeights2[] = {0, 21, 43, 64};
38 const int g_aWeights3[] = {0, 9, 18, 27, 37, 46, 55, 64};
39 const int g_aWeights4[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64};
40 
41 // Partition, Shape, Pixel (index into 4x4 block)
42 static const uint8_t g_aPartitionTable[3][64][16] =
43 {
44     {   // 1 Region case has no subsets (all 0)
45         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
46         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
47         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
48         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
49         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
50         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
51         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
52         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
53         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
54         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
55         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
56         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
57         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
58         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
59         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
60         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
61         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
62         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
63         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
64         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
65         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
66         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
67         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
68         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
69         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
70         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
71         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
72         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
73         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
74         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
75         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
76         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
77         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
78         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
79         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
80         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
81         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
82         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
83         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
84         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
85         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
86         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
87         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
88         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
89         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
90         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
91         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
92         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
93         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
94         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
95         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
96         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
97         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
98         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
99         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
100         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
101         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
102         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
103         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
104         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
105         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
106         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
107         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
108         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
109     },
110 
111     {   // BC6H/BC7 Partition Set for 2 Subsets
112         { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, // Shape 0
113         { 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1 }, // Shape 1
114         { 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 }, // Shape 2
115         { 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, // Shape 3
116         { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1 }, // Shape 4
117         { 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // Shape 5
118         { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // Shape 6
119         { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, // Shape 7
120         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1 }, // Shape 8
121         { 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 9
122         { 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // Shape 10
123         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1 }, // Shape 11
124         { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 12
125         { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 13
126         { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 14
127         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1 }, // Shape 15
128         { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1 }, // Shape 16
129         { 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, // Shape 17
130         { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0 }, // Shape 18
131         { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0 }, // Shape 19
132         { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, // Shape 20
133         { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 }, // Shape 21
134         { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 }, // Shape 22
135         { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1 }, // Shape 23
136         { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0 }, // Shape 24
137         { 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 }, // Shape 25
138         { 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0 }, // Shape 26
139         { 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0 }, // Shape 27
140         { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0 }, // Shape 28
141         { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, // Shape 29
142         { 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0 }, // Shape 30
143         { 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0 }, // Shape 31
144 
145         // BC7 Partition Set for 2 Subsets (second-half)
146         { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, // Shape 32
147         { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1 }, // Shape 33
148         { 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0 }, // Shape 34
149         { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0 }, // Shape 35
150         { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0 }, // Shape 36
151         { 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0 }, // Shape 37
152         { 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 }, // Shape 38
153         { 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1 }, // Shape 39
154         { 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0 }, // Shape 40
155         { 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0 }, // Shape 41
156         { 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0 }, // Shape 42
157         { 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0 }, // Shape 43
158         { 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0 }, // Shape 44
159         { 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1 }, // Shape 45
160         { 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1 }, // Shape 46
161         { 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, // Shape 47
162         { 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, // Shape 48
163         { 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0 }, // Shape 49
164         { 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0 }, // Shape 50
165         { 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0 }, // Shape 51
166         { 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1 }, // Shape 52
167         { 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1 }, // Shape 53
168         { 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0 }, // Shape 54
169         { 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0 }, // Shape 55
170         { 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1 }, // Shape 56
171         { 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1 }, // Shape 57
172         { 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1 }, // Shape 58
173         { 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1 }, // Shape 59
174         { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, // Shape 60
175         { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, // Shape 61
176         { 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0 }, // Shape 62
177         { 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1 }  // Shape 63
178     },
179 
180     {   // BC7 Partition Set for 3 Subsets
181         { 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 1, 2, 2, 2, 2 }, // Shape 0
182         { 0, 0, 0, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1 }, // Shape 1
183         { 0, 0, 0, 0, 2, 0, 0, 1, 2, 2, 1, 1, 2, 2, 1, 1 }, // Shape 2
184         { 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1 }, // Shape 3
185         { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2 }, // Shape 4
186         { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 2, 2 }, // Shape 5
187         { 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 6
188         { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1 }, // Shape 7
189         { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 }, // Shape 8
190         { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2 }, // Shape 9
191         { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2 }, // Shape 10
192         { 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2 }, // Shape 11
193         { 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2 }, // Shape 12
194         { 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2 }, // Shape 13
195         { 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2 }, // Shape 14
196         { 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0 }, // Shape 15
197         { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2 }, // Shape 16
198         { 0, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0 }, // Shape 17
199         { 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2 }, // Shape 18
200         { 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1 }, // Shape 19
201         { 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2 }, // Shape 20
202         { 0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 1 }, // Shape 21
203         { 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2 }, // Shape 22
204         { 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 1, 0 }, // Shape 23
205         { 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0 }, // Shape 24
206         { 0, 0, 1, 2, 0, 0, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2 }, // Shape 25
207         { 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1, 0, 1, 1, 0 }, // Shape 26
208         { 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1 }, // Shape 27
209         { 0, 0, 2, 2, 1, 1, 0, 2, 1, 1, 0, 2, 0, 0, 2, 2 }, // Shape 28
210         { 0, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0, 2, 2, 2, 2, 2 }, // Shape 29
211         { 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1 }, // Shape 30
212         { 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 2, 1 }, // Shape 31
213         { 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 2, 2, 2 }, // Shape 32
214         { 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 0, 1, 1 }, // Shape 33
215         { 0, 0, 1, 1, 0, 0, 1, 2, 0, 0, 2, 2, 0, 2, 2, 2 }, // Shape 34
216         { 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0 }, // Shape 35
217         { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0 }, // Shape 36
218         { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 }, // Shape 37
219         { 0, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 0, 1, 2, 0 }, // Shape 38
220         { 0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1 }, // Shape 39
221         { 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1 }, // Shape 40
222         { 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2 }, // Shape 41
223         { 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1 }, // Shape 42
224         { 0, 0, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 2, 2 }, // Shape 43
225         { 0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1 }, // Shape 44
226         { 0, 2, 2, 0, 1, 2, 2, 1, 0, 2, 2, 0, 1, 2, 2, 1 }, // Shape 45
227         { 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1 }, // Shape 46
228         { 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1 }, // Shape 47
229         { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2 }, // Shape 48
230         { 0, 2, 2, 2, 0, 1, 1, 1, 0, 2, 2, 2, 0, 1, 1, 1 }, // Shape 49
231         { 0, 0, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 1, 1, 1, 2 }, // Shape 50
232         { 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2 }, // Shape 51
233         { 0, 2, 2, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2 }, // Shape 52
234         { 0, 0, 0, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2 }, // Shape 53
235         { 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2 }, // Shape 54
236         { 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2 }, // Shape 55
237         { 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2 }, // Shape 56
238         { 0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2 }, // Shape 57
239         { 0, 0, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2 }, // Shape 58
240         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2 }, // Shape 59
241         { 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1 }, // Shape 60
242         { 0, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2 }, // Shape 61
243         { 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, // Shape 62
244         { 0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0 }  // Shape 63
245     }
246 };
247 
248 // Partition, Shape, Fixup
249 static const uint8_t g_aFixUp[3][64][3] =
250 {
251     {   // No fix-ups for 1st subset for BC6H or BC7
252         { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
253         { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
254         { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
255         { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
256         { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
257         { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
258         { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
259         { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
260         { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
261         { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
262         { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
263         { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
264         { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
265         { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
266         { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
267         { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}
268     },
269 
270     {   // BC6H/BC7 Partition Set Fixups for 2 Subsets
271         { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
272         { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
273         { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
274         { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
275         { 0,15, 0}, { 0, 2, 0}, { 0, 8, 0}, { 0, 2, 0},
276         { 0, 2, 0}, { 0, 8, 0}, { 0, 8, 0}, { 0,15, 0},
277         { 0, 2, 0}, { 0, 8, 0}, { 0, 2, 0}, { 0, 2, 0},
278         { 0, 8, 0}, { 0, 8, 0}, { 0, 2, 0}, { 0, 2, 0},
279 
280         // BC7 Partition Set Fixups for 2 Subsets (second-half)
281         { 0,15, 0}, { 0,15, 0}, { 0, 6, 0}, { 0, 8, 0},
282         { 0, 2, 0}, { 0, 8, 0}, { 0,15, 0}, { 0,15, 0},
283         { 0, 2, 0}, { 0, 8, 0}, { 0, 2, 0}, { 0, 2, 0},
284         { 0, 2, 0}, { 0,15, 0}, { 0,15, 0}, { 0, 6, 0},
285         { 0, 6, 0}, { 0, 2, 0}, { 0, 6, 0}, { 0, 8, 0},
286         { 0,15, 0}, { 0,15, 0}, { 0, 2, 0}, { 0, 2, 0},
287         { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
288         { 0,15, 0}, { 0, 2, 0}, { 0, 2, 0}, { 0,15, 0}
289     },
290 
291     {   // BC7 Partition Set Fixups for 3 Subsets
292         { 0, 3,15}, { 0, 3, 8}, { 0,15, 8}, { 0,15, 3},
293         { 0, 8,15}, { 0, 3,15}, { 0,15, 3}, { 0,15, 8},
294         { 0, 8,15}, { 0, 8,15}, { 0, 6,15}, { 0, 6,15},
295         { 0, 6,15}, { 0, 5,15}, { 0, 3,15}, { 0, 3, 8},
296         { 0, 3,15}, { 0, 3, 8}, { 0, 8,15}, { 0,15, 3},
297         { 0, 3,15}, { 0, 3, 8}, { 0, 6,15}, { 0,10, 8},
298         { 0, 5, 3}, { 0, 8,15}, { 0, 8, 6}, { 0, 6,10},
299         { 0, 8,15}, { 0, 5,15}, { 0,15,10}, { 0,15, 8},
300         { 0, 8,15}, { 0,15, 3}, { 0, 3,15}, { 0, 5,10},
301         { 0, 6,10}, { 0,10, 8}, { 0, 8, 9}, { 0,15,10},
302         { 0,15, 6}, { 0, 3,15}, { 0,15, 8}, { 0, 5,15},
303         { 0,15, 3}, { 0,15, 6}, { 0,15, 6}, { 0,15, 8},
304         { 0, 3,15}, { 0,15, 3}, { 0, 5,15}, { 0, 5,15},
305         { 0, 5,15}, { 0, 8,15}, { 0, 5,15}, { 0,10,15},
306         { 0, 5,15}, { 0,10,15}, { 0, 8,15}, { 0,13,15},
307         { 0,15, 3}, { 0,12,15}, { 0, 3,15}, { 0, 3, 8}
308     }
309 };
310 
311 // BC6H Compression
312 const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] =
313 {
314     {   // Mode 1 (0x00) - 10 5 5 5
315         { M, 0}, { M, 1}, {GY, 4}, {BY, 4}, {BZ, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
316         {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
317         {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
318         {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
319         {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
320         {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
321         {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
322         {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
323         { D, 3}, { D, 4},
324     },
325 
326     {   // Mode 2 (0x01) - 7 6 6 6
327         { M, 0}, { M, 1}, {GY, 5}, {GZ, 4}, {GZ, 5}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
328         {RW, 5}, {RW, 6}, {BZ, 0}, {BZ, 1}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
329         {GW, 5}, {GW, 6}, {BY, 5}, {BZ, 2}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
330         {BW, 5}, {BW, 6}, {BZ, 3}, {BZ, 5}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
331         {RX, 5}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
332         {GX, 5}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
333         {BX, 5}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
334         {RY, 5}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {RZ, 5}, { D, 0}, { D, 1}, { D, 2},
335         { D, 3}, { D, 4},
336     },
337 
338     {   // Mode 3 (0x02) - 11 5 4 4
339         { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
340         {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
341         {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
342         {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
343         {RW,10}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GW,10},
344         {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BW,10},
345         {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
346         {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
347         { D, 3}, { D, 4},
348     },
349 
350     {   // Mode 4 (0x06) - 11 4 5 4
351         { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
352         {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
353         {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
354         {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RW,10},
355         {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
356         {GW,10}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BW,10},
357         {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {BZ, 0},
358         {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {GY, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
359         { D, 3}, { D, 4},
360     },
361 
362     {   // Mode 5 (0x0a) - 11 4 4 5
363         { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
364         {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
365         {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
366         {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RW,10},
367         {BY, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GW,10},
368         {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
369         {BW,10}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {BZ, 1},
370         {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {BZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
371         { D, 3}, { D, 4},
372     },
373 
374     {   // Mode 6 (0x0e) - 9 5 5 5
375         { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
376         {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
377         {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
378         {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
379         {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
380         {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
381         {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
382         {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
383         { D, 3}, { D, 4},
384     },
385 
386     {   // Mode 7 (0x12) - 8 6 5 5
387         { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
388         {RW, 5}, {RW, 6}, {RW, 7}, {GZ, 4}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
389         {GW, 5}, {GW, 6}, {GW, 7}, {BZ, 2}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
390         {BW, 5}, {BW, 6}, {BW, 7}, {BZ, 3}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
391         {RX, 5}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
392         {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
393         {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
394         {RY, 5}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {RZ, 5}, { D, 0}, { D, 1}, { D, 2},
395         { D, 3}, { D, 4},
396     },
397 
398     {   // Mode 8 (0x16) - 8 5 6 5
399         { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
400         {RW, 5}, {RW, 6}, {RW, 7}, {BZ, 0}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
401         {GW, 5}, {GW, 6}, {GW, 7}, {GY, 5}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
402         {BW, 5}, {BW, 6}, {BW, 7}, {GZ, 5}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
403         {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
404         {GX, 5}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
405         {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
406         {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
407         { D, 3}, { D, 4},
408     },
409 
410     {   // Mode 9 (0x1a) - 8 5 5 6
411         { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
412         {RW, 5}, {RW, 6}, {RW, 7}, {BZ, 1}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
413         {GW, 5}, {GW, 6}, {GW, 7}, {BY, 5}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
414         {BW, 5}, {BW, 6}, {BW, 7}, {BZ, 5}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
415         {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
416         {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
417         {BX, 5}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
418         {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
419         { D, 3}, { D, 4},
420     },
421 
422     {   // Mode 10 (0x1e) - 6 6 6 6
423         { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
424         {RW, 5}, {GZ, 4}, {BZ, 0}, {BZ, 1}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
425         {GW, 5}, {GY, 5}, {BY, 5}, {BZ, 2}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
426         {BW, 5}, {GZ, 5}, {BZ, 3}, {BZ, 5}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
427         {RX, 5}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
428         {GX, 5}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
429         {BX, 5}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
430         {RY, 5}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {RZ, 5}, { D, 0}, { D, 1}, { D, 2},
431         { D, 3}, { D, 4},
432     },
433 
434     {   // Mode 11 (0x03) - 10 10
435         { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
436         {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
437         {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
438         {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
439         {RX, 5}, {RX, 6}, {RX, 7}, {RX, 8}, {RX, 9}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
440         {GX, 5}, {GX, 6}, {GX, 7}, {GX, 8}, {GX, 9}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
441         {BX, 5}, {BX, 6}, {BX, 7}, {BX, 8}, {BX, 9}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
442         {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
443         {NA, 0}, {NA, 0},
444     },
445 
446     {   // Mode 12 (0x07) - 11 9
447         { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
448         {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
449         {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
450         {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
451         {RX, 5}, {RX, 6}, {RX, 7}, {RX, 8}, {RW,10}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
452         {GX, 5}, {GX, 6}, {GX, 7}, {GX, 8}, {GW,10}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
453         {BX, 5}, {BX, 6}, {BX, 7}, {BX, 8}, {BW,10}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
454         {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
455         {NA, 0}, {NA, 0},
456     },
457 
458     {   // Mode 13 (0x0b) - 12 8
459         { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
460         {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
461         {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
462         {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
463         {RX, 5}, {RX, 6}, {RX, 7}, {RW,11}, {RW,10}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
464         {GX, 5}, {GX, 6}, {GX, 7}, {GW,11}, {GW,10}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
465         {BX, 5}, {BX, 6}, {BX, 7}, {BW,11}, {BW,10}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
466         {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
467         {NA, 0}, {NA, 0},
468     },
469 
470     {   // Mode 14 (0x0f) - 16 4
471         { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
472         {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
473         {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
474         {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RW,15},
475         {RW,14}, {RW,13}, {RW,12}, {RW,11}, {RW,10}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GW,15},
476         {GW,14}, {GW,13}, {GW,12}, {GW,11}, {GW,10}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BW,15},
477         {BW,14}, {BW,13}, {BW,12}, {BW,11}, {BW,10}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
478         {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
479         {NA, 0}, {NA, 0},
480     },
481 };
482 
483 // Mode, Partitions, Transformed, IndexPrec, RGBAPrec
484 const D3DX_BC6H::ModeInfo D3DX_BC6H::ms_aInfo[] =
485 {
486     {0x00, 1, true,  3, LDRColorA(10,10,10,0), LDRColorA( 5, 5, 5,0), LDRColorA(5,5,5,0), LDRColorA(5,5,5,0)}, // Mode 1
487     {0x01, 1, true,  3, LDRColorA( 7, 7, 7,0), LDRColorA( 6, 6, 6,0), LDRColorA(6,6,6,0), LDRColorA(6,6,6,0)}, // Mode 2
488     {0x02, 1, true,  3, LDRColorA(11,11,11,0), LDRColorA( 5, 4, 4,0), LDRColorA(5,4,4,0), LDRColorA(5,4,4,0)}, // Mode 3
489     {0x06, 1, true,  3, LDRColorA(11,11,11,0), LDRColorA( 4, 5, 4,0), LDRColorA(4,5,4,0), LDRColorA(4,5,4,0)}, // Mode 4
490     {0x0a, 1, true,  3, LDRColorA(11,11,11,0), LDRColorA( 4, 4, 5,0), LDRColorA(4,4,5,0), LDRColorA(4,4,5,0)}, // Mode 5
491     {0x0e, 1, true,  3, LDRColorA( 9, 9, 9,0), LDRColorA( 5, 5, 5,0), LDRColorA(5,5,5,0), LDRColorA(5,5,5,0)}, // Mode 6
492     {0x12, 1, true,  3, LDRColorA( 8, 8, 8,0), LDRColorA( 6, 5, 5,0), LDRColorA(6,5,5,0), LDRColorA(6,5,5,0)}, // Mode 7
493     {0x16, 1, true,  3, LDRColorA( 8, 8, 8,0), LDRColorA( 5, 6, 5,0), LDRColorA(5,6,5,0), LDRColorA(5,6,5,0)}, // Mode 8
494     {0x1a, 1, true,  3, LDRColorA( 8, 8, 8,0), LDRColorA( 5, 5, 6,0), LDRColorA(5,5,6,0), LDRColorA(5,5,6,0)}, // Mode 9
495     {0x1e, 1, false, 3, LDRColorA( 6, 6, 6,0), LDRColorA( 6, 6, 6,0), LDRColorA(6,6,6,0), LDRColorA(6,6,6,0)}, // Mode 10
496     {0x03, 0, false, 4, LDRColorA(10,10,10,0), LDRColorA(10,10,10,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 11
497     {0x07, 0, true,  4, LDRColorA(11,11,11,0), LDRColorA( 9, 9, 9,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 12
498     {0x0b, 0, true,  4, LDRColorA(12,12,12,0), LDRColorA( 8, 8, 8,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 13
499     {0x0f, 0, true,  4, LDRColorA(16,16,16,0), LDRColorA( 4, 4, 4,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 14
500 };
501 
502 const int D3DX_BC6H::ms_aModeToInfo[] =
503 {
504      0, // Mode 1   - 0x00
505      1, // Mode 2   - 0x01
506      2, // Mode 3   - 0x02
507     10, // Mode 11  - 0x03
508     -1, // Invalid  - 0x04
509     -1, // Invalid  - 0x05
510      3, // Mode 4   - 0x06
511     11, // Mode 12  - 0x07
512     -1, // Invalid  - 0x08
513     -1, // Invalid  - 0x09
514      4, // Mode 5   - 0x0a
515     12, // Mode 13  - 0x0b
516     -1, // Invalid  - 0x0c
517     -1, // Invalid  - 0x0d
518      5, // Mode 6   - 0x0e
519     13, // Mode 14  - 0x0f
520     -1, // Invalid  - 0x10
521     -1, // Invalid  - 0x11
522      6, // Mode 7   - 0x12
523     -1, // Reserved - 0x13
524     -1, // Invalid  - 0x14
525     -1, // Invalid  - 0x15
526      7, // Mode 8   - 0x16
527     -1, // Reserved - 0x17
528     -1, // Invalid  - 0x18
529     -1, // Invalid  - 0x19
530      8, // Mode 9   - 0x1a
531     -1, // Reserved - 0x1b
532     -1, // Invalid  - 0x1c
533     -1, // Invalid  - 0x1d
534      9, // Mode 10  - 0x1e
535     -1, // Resreved - 0x1f
536 };
537 
538 // BC7 compression: uPartitions, uPartitionBits, uPBits, uRotationBits, uIndexModeBits, uIndexPrec, uIndexPrec2, RGBAPrec, RGBAPrecWithP
539 const D3DX_BC7::ModeInfo D3DX_BC7::ms_aInfo[] =
540 {
541     {2, 4, 6, 0, 0, 3, 0, LDRColorA(4,4,4,0), LDRColorA(5,5,5,0)},
542         // Mode 0: Color only, 3 Subsets, RGBP 4441 (unique P-bit), 3-bit indecies, 16 partitions
543     {1, 6, 2, 0, 0, 3, 0, LDRColorA(6,6,6,0), LDRColorA(7,7,7,0)},
544         // Mode 1: Color only, 2 Subsets, RGBP 6661 (shared P-bit), 3-bit indecies, 64 partitions
545     {2, 6, 0, 0, 0, 2, 0, LDRColorA(5,5,5,0), LDRColorA(5,5,5,0)},
546         // Mode 2: Color only, 3 Subsets, RGB 555, 2-bit indecies, 64 partitions
547     {1, 6, 4, 0, 0, 2, 0, LDRColorA(7,7,7,0), LDRColorA(8,8,8,0)},
548         // Mode 3: Color only, 2 Subsets, RGBP 7771 (unique P-bit), 2-bits indecies, 64 partitions
549     {0, 0, 0, 2, 1, 2, 3, LDRColorA(5,5,5,6), LDRColorA(5,5,5,6)},
550         // Mode 4: Color w/ Separate Alpha, 1 Subset, RGB 555, A6, 16x2/16x3-bit indices, 2-bit rotation, 1-bit index selector
551     {0, 0, 0, 2, 0, 2, 2, LDRColorA(7,7,7,8), LDRColorA(7,7,7,8)},
552         // Mode 5: Color w/ Separate Alpha, 1 Subset, RGB 777, A8, 16x2/16x2-bit indices, 2-bit rotation
553     {0, 0, 2, 0, 0, 4, 0, LDRColorA(7,7,7,7), LDRColorA(8,8,8,8)},
554         // Mode 6: Color+Alpha, 1 Subset, RGBAP 77771 (unique P-bit), 16x4-bit indecies
555     {1, 6, 4, 0, 0, 2, 0, LDRColorA(5,5,5,5), LDRColorA(6,6,6,6)}
556         // Mode 7: Color+Alpha, 2 Subsets, RGBAP 55551 (unique P-bit), 2-bit indices, 64 partitions
557 };
558 
559 
560 //-------------------------------------------------------------------------------------
561 // Helper functions
562 //-------------------------------------------------------------------------------------
563 inline static bool IsFixUpOffset(_In_range_(0,2) size_t uPartitions, _In_range_(0,63) size_t uShape, _In_range_(0,15) size_t uOffset)
564 {
565     assert(uPartitions < 3 && uShape < 64 && uOffset < 16);
566     _Analysis_assume_(uPartitions < 3 && uShape < 64 && uOffset < 16);
567     for(size_t p = 0; p <= uPartitions; p++)
568     {
569         if(uOffset == g_aFixUp[uPartitions][uShape][p])
570         {
571             return true;
572         }
573     }
574     return false;
575 }
576 
TransformForward(_Inout_updates_all_ (BC6H_MAX_REGIONS)INTEndPntPair aEndPts[])577 inline static void TransformForward(_Inout_updates_all_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[])
578 {
579     aEndPts[0].B -= aEndPts[0].A;
580     aEndPts[1].A -= aEndPts[0].A;
581     aEndPts[1].B -= aEndPts[0].A;
582 }
583 
TransformInverse(_Inout_updates_all_ (BC6H_MAX_REGIONS)INTEndPntPair aEndPts[],_In_ const LDRColorA & Prec,_In_ bool bSigned)584 inline static void TransformInverse(_Inout_updates_all_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[], _In_ const LDRColorA& Prec, _In_ bool bSigned)
585 {
586     INTColor WrapMask((1 << Prec.r) - 1, (1 << Prec.g) - 1, (1 << Prec.b) - 1);
587     aEndPts[0].B += aEndPts[0].A; aEndPts[0].B &= WrapMask;
588     aEndPts[1].A += aEndPts[0].A; aEndPts[1].A &= WrapMask;
589     aEndPts[1].B += aEndPts[0].A; aEndPts[1].B &= WrapMask;
590     if(bSigned)
591     {
592         aEndPts[0].B.SignExtend(Prec);
593         aEndPts[1].A.SignExtend(Prec);
594         aEndPts[1].B.SignExtend(Prec);
595     }
596 }
597 
Norm(_In_ const INTColor & a,_In_ const INTColor & b)598 inline static float Norm(_In_ const INTColor& a, _In_ const INTColor& b)
599 {
600     float dr = float(a.r) - float(b.r);
601     float dg = float(a.g) - float(b.g);
602     float db = float(a.b) - float(b.b);
603     return dr * dr + dg * dg + db * db;
604 }
605 
606 // return # of bits needed to store n. handle signed or unsigned cases properly
NBits(_In_ int n,_In_ bool bIsSigned)607 inline static int NBits(_In_ int n, _In_ bool bIsSigned)
608 {
609     int nb;
610     if(n == 0)
611     {
612         return 0;	// no bits needed for 0, signed or not
613     }
614     else if(n > 0)
615     {
616         for(nb = 0; n; ++nb, n >>= 1);
617         return nb + (bIsSigned ? 1 : 0);
618     }
619     else
620     {
621         assert(bIsSigned);
622         for(nb = 0; n < -1; ++nb, n >>= 1) ;
623         return nb + 1;
624     }
625 }
626 
627 
628 //-------------------------------------------------------------------------------------
OptimizeRGB(_In_reads_ (NUM_PIXELS_PER_BLOCK)const HDRColorA * const pPoints,_Out_ HDRColorA * pX,_Out_ HDRColorA * pY,_In_ size_t cSteps,_In_ size_t cPixels,_In_reads_ (cPixels)const size_t * pIndex)629 static float OptimizeRGB(_In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pPoints,
630                          _Out_ HDRColorA* pX, _Out_ HDRColorA* pY,
631                          _In_ size_t cSteps, _In_ size_t cPixels, _In_reads_(cPixels) const size_t* pIndex)
632 {
633     float fError = FLT_MAX;
634     const float *pC = (3 == cSteps) ? pC3 : pC4;
635     const float *pD = (3 == cSteps) ? pD3 : pD4;
636 
637     // Find Min and Max points, as starting point
638     HDRColorA X(1.0f, 1.0f, 1.0f, 0.0f);
639     HDRColorA Y(0.0f, 0.0f, 0.0f, 0.0f);
640 
641     for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
642     {
643         if(pPoints[pIndex[iPoint]].r < X.r) X.r = pPoints[pIndex[iPoint]].r;
644         if(pPoints[pIndex[iPoint]].g < X.g) X.g = pPoints[pIndex[iPoint]].g;
645         if(pPoints[pIndex[iPoint]].b < X.b) X.b = pPoints[pIndex[iPoint]].b;
646         if(pPoints[pIndex[iPoint]].r > Y.r) Y.r = pPoints[pIndex[iPoint]].r;
647         if(pPoints[pIndex[iPoint]].g > Y.g) Y.g = pPoints[pIndex[iPoint]].g;
648         if(pPoints[pIndex[iPoint]].b > Y.b) Y.b = pPoints[pIndex[iPoint]].b;
649     }
650 
651     // Diagonal axis
652     HDRColorA AB;
653     AB.r = Y.r - X.r;
654     AB.g = Y.g - X.g;
655     AB.b = Y.b - X.b;
656 
657     float fAB = AB.r * AB.r + AB.g * AB.g + AB.b * AB.b;
658 
659     // Single color block.. no need to root-find
660     if(fAB < FLT_MIN)
661     {
662         pX->r = X.r; pX->g = X.g; pX->b = X.b;
663         pY->r = Y.r; pY->g = Y.g; pY->b = Y.b;
664         return 0.0f;
665     }
666 
667     // Try all four axis directions, to determine which diagonal best fits data
668     float fABInv = 1.0f / fAB;
669 
670     HDRColorA Dir;
671     Dir.r = AB.r * fABInv;
672     Dir.g = AB.g * fABInv;
673     Dir.b = AB.b * fABInv;
674 
675     HDRColorA Mid;
676     Mid.r = (X.r + Y.r) * 0.5f;
677     Mid.g = (X.g + Y.g) * 0.5f;
678     Mid.b = (X.b + Y.b) * 0.5f;
679 
680     float fDir[4];
681     fDir[0] = fDir[1] = fDir[2] = fDir[3] = 0.0f;
682 
683     for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
684     {
685         HDRColorA Pt;
686         Pt.r = (pPoints[pIndex[iPoint]].r - Mid.r) * Dir.r;
687         Pt.g = (pPoints[pIndex[iPoint]].g - Mid.g) * Dir.g;
688         Pt.b = (pPoints[pIndex[iPoint]].b - Mid.b) * Dir.b;
689 
690         float f;
691         f = Pt.r + Pt.g + Pt.b; fDir[0] += f * f;
692         f = Pt.r + Pt.g - Pt.b; fDir[1] += f * f;
693         f = Pt.r - Pt.g + Pt.b; fDir[2] += f * f;
694         f = Pt.r - Pt.g - Pt.b; fDir[3] += f * f;
695     }
696 
697     float fDirMax = fDir[0];
698     size_t  iDirMax = 0;
699 
700     for(size_t iDir = 1; iDir < 4; iDir++)
701     {
702         if(fDir[iDir] > fDirMax)
703         {
704             fDirMax = fDir[iDir];
705             iDirMax = iDir;
706         }
707     }
708 
709     if(iDirMax & 2) std::swap( X.g, Y.g );
710     if(iDirMax & 1) std::swap( X.b, Y.b );
711 
712     // Two color block.. no need to root-find
713     if(fAB < 1.0f / 4096.0f)
714     {
715         pX->r = X.r; pX->g = X.g; pX->b = X.b;
716         pY->r = Y.r; pY->g = Y.g; pY->b = Y.b;
717         return 0.0f;
718     }
719 
720     // Use Newton's Method to find local minima of sum-of-squares error.
721     float fSteps = (float) (cSteps - 1);
722 
723     for(size_t iIteration = 0; iIteration < 8; iIteration++)
724     {
725         // Calculate new steps
726         HDRColorA pSteps[4] = {};
727 
728         for(size_t iStep = 0; iStep < cSteps; iStep++)
729         {
730             pSteps[iStep].r = X.r * pC[iStep] + Y.r * pD[iStep];
731             pSteps[iStep].g = X.g * pC[iStep] + Y.g * pD[iStep];
732             pSteps[iStep].b = X.b * pC[iStep] + Y.b * pD[iStep];
733         }
734 
735         // Calculate color direction
736         Dir.r = Y.r - X.r;
737         Dir.g = Y.g - X.g;
738         Dir.b = Y.b - X.b;
739 
740         float fLen = (Dir.r * Dir.r + Dir.g * Dir.g + Dir.b * Dir.b);
741 
742         if(fLen < (1.0f / 4096.0f))
743             break;
744 
745         float fScale = fSteps / fLen;
746 
747         Dir.r *= fScale;
748         Dir.g *= fScale;
749         Dir.b *= fScale;
750 
751         // Evaluate function, and derivatives
752         float d2X = 0.0f, d2Y = 0.0f;
753         HDRColorA dX(0.0f, 0.0f, 0.0f, 0.0f), dY(0.0f, 0.0f, 0.0f, 0.0f);
754 
755         for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
756         {
757             float fDot = (pPoints[pIndex[iPoint]].r - X.r) * Dir.r +
758                 (pPoints[pIndex[iPoint]].g - X.g) * Dir.g +
759                 (pPoints[pIndex[iPoint]].b - X.b) * Dir.b;
760 
761             size_t iStep;
762             if(fDot <= 0.0f)
763                 iStep = 0;
764             if(fDot >= fSteps)
765                 iStep = cSteps - 1;
766             else
767                 iStep = size_t(fDot + 0.5f);
768 
769             HDRColorA Diff;
770             Diff.r = pSteps[iStep].r - pPoints[pIndex[iPoint]].r;
771             Diff.g = pSteps[iStep].g - pPoints[pIndex[iPoint]].g;
772             Diff.b = pSteps[iStep].b - pPoints[pIndex[iPoint]].b;
773 
774             float fC = pC[iStep] * (1.0f / 8.0f);
775             float fD = pD[iStep] * (1.0f / 8.0f);
776 
777             d2X  += fC * pC[iStep];
778             dX.r += fC * Diff.r;
779             dX.g += fC * Diff.g;
780             dX.b += fC * Diff.b;
781 
782             d2Y  += fD * pD[iStep];
783             dY.r += fD * Diff.r;
784             dY.g += fD * Diff.g;
785             dY.b += fD * Diff.b;
786         }
787 
788         // Move endpoints
789         if(d2X > 0.0f)
790         {
791             float f = -1.0f / d2X;
792 
793             X.r += dX.r * f;
794             X.g += dX.g * f;
795             X.b += dX.b * f;
796         }
797 
798         if(d2Y > 0.0f)
799         {
800             float f = -1.0f / d2Y;
801 
802             Y.r += dY.r * f;
803             Y.g += dY.g * f;
804             Y.b += dY.b * f;
805         }
806 
807         if((dX.r * dX.r < fEpsilon) && (dX.g * dX.g < fEpsilon) && (dX.b * dX.b < fEpsilon) &&
808             (dY.r * dY.r < fEpsilon) && (dY.g * dY.g < fEpsilon) && (dY.b * dY.b < fEpsilon))
809         {
810             break;
811         }
812     }
813 
814     pX->r = X.r; pX->g = X.g; pX->b = X.b;
815     pY->r = Y.r; pY->g = Y.g; pY->b = Y.b;
816     return fError;
817 }
818 
819 
820 //-------------------------------------------------------------------------------------
OptimizeRGBA(_In_reads_ (NUM_PIXELS_PER_BLOCK)const HDRColorA * const pPoints,_Out_ HDRColorA * pX,_Out_ HDRColorA * pY,_In_ size_t cSteps,_In_ size_t cPixels,_In_reads_ (cPixels)const size_t * pIndex)821 static float OptimizeRGBA(_In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pPoints,
822                           _Out_ HDRColorA* pX, _Out_ HDRColorA* pY,
823                           _In_ size_t cSteps, _In_ size_t cPixels, _In_reads_(cPixels) const size_t* pIndex)
824 {
825     float fError = FLT_MAX;
826     const float *pC = (3 == cSteps) ? pC3 : pC4;
827     const float *pD = (3 == cSteps) ? pD3 : pD4;
828 
829     // Find Min and Max points, as starting point
830     HDRColorA X(1.0f, 1.0f, 1.0f, 1.0f);
831     HDRColorA Y(0.0f, 0.0f, 0.0f, 0.0f);
832 
833     for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
834     {
835         if(pPoints[pIndex[iPoint]].r < X.r) X.r = pPoints[pIndex[iPoint]].r;
836         if(pPoints[pIndex[iPoint]].g < X.g) X.g = pPoints[pIndex[iPoint]].g;
837         if(pPoints[pIndex[iPoint]].b < X.b) X.b = pPoints[pIndex[iPoint]].b;
838         if(pPoints[pIndex[iPoint]].a < X.a) X.a = pPoints[pIndex[iPoint]].a;
839         if(pPoints[pIndex[iPoint]].r > Y.r) Y.r = pPoints[pIndex[iPoint]].r;
840         if(pPoints[pIndex[iPoint]].g > Y.g) Y.g = pPoints[pIndex[iPoint]].g;
841         if(pPoints[pIndex[iPoint]].b > Y.b) Y.b = pPoints[pIndex[iPoint]].b;
842         if(pPoints[pIndex[iPoint]].a > Y.a) Y.a = pPoints[pIndex[iPoint]].a;
843     }
844 
845     // Diagonal axis
846     HDRColorA AB = Y - X;
847     float fAB = AB * AB;
848 
849     // Single color block.. no need to root-find
850     if(fAB < FLT_MIN)
851     {
852         *pX = X;
853         *pY = Y;
854         return 0.0f;
855     }
856 
857     // Try all four axis directions, to determine which diagonal best fits data
858     float fABInv = 1.0f / fAB;
859     HDRColorA Dir = AB * fABInv;
860     HDRColorA Mid = (X + Y) * 0.5f;
861 
862     float fDir[8];
863     fDir[0] = fDir[1] = fDir[2] = fDir[3] = fDir[4] = fDir[5] = fDir[6] = fDir[7] = 0.0f;
864 
865     for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
866     {
867         HDRColorA Pt;
868         Pt.r = (pPoints[pIndex[iPoint]].r - Mid.r) * Dir.r;
869         Pt.g = (pPoints[pIndex[iPoint]].g - Mid.g) * Dir.g;
870         Pt.b = (pPoints[pIndex[iPoint]].b - Mid.b) * Dir.b;
871         Pt.a = (pPoints[pIndex[iPoint]].a - Mid.a) * Dir.a;
872 
873         float f;
874         f = Pt.r + Pt.g + Pt.b + Pt.a; fDir[0] += f * f;
875         f = Pt.r + Pt.g + Pt.b - Pt.a; fDir[1] += f * f;
876         f = Pt.r + Pt.g - Pt.b + Pt.a; fDir[2] += f * f;
877         f = Pt.r + Pt.g - Pt.b - Pt.a; fDir[3] += f * f;
878         f = Pt.r - Pt.g + Pt.b + Pt.a; fDir[4] += f * f;
879         f = Pt.r - Pt.g + Pt.b - Pt.a; fDir[5] += f * f;
880         f = Pt.r - Pt.g - Pt.b + Pt.a; fDir[6] += f * f;
881         f = Pt.r - Pt.g - Pt.b - Pt.a; fDir[7] += f * f;
882     }
883 
884     float fDirMax = fDir[0];
885     size_t  iDirMax = 0;
886 
887     for(size_t iDir = 1; iDir < 8; iDir++)
888     {
889         if(fDir[iDir] > fDirMax)
890         {
891             fDirMax = fDir[iDir];
892             iDirMax = iDir;
893         }
894     }
895 
896     if(iDirMax & 4) std::swap(X.g, Y.g);
897     if(iDirMax & 2) std::swap(X.b, Y.b);
898     if(iDirMax & 1) std::swap(X.a, Y.a);
899 
900     // Two color block.. no need to root-find
901     if(fAB < 1.0f / 4096.0f)
902     {
903         *pX = X;
904         *pY = Y;
905         return 0.0f;
906     }
907 
908     // Use Newton's Method to find local minima of sum-of-squares error.
909     float fSteps = (float) (cSteps - 1);
910 
911     for(size_t iIteration = 0; iIteration < 8 && fError > 0.0f; iIteration++)
912     {
913         // Calculate new steps
914         HDRColorA pSteps[BC7_MAX_INDICES];
915 
916         LDRColorA lX, lY;
917         lX = (X * 255.0f).ToLDRColorA();
918         lY = (Y * 255.0f).ToLDRColorA();
919 
920         for(size_t iStep = 0; iStep < cSteps; iStep++)
921         {
922             pSteps[iStep] = X * pC[iStep] + Y * pD[iStep];
923             //LDRColorA::Interpolate(lX, lY, i, i, wcprec, waprec, aSteps[i]);
924         }
925 
926         // Calculate color direction
927         Dir = Y - X;
928         float fLen = Dir * Dir;
929         if(fLen < (1.0f / 4096.0f))
930             break;
931 
932         float fScale = fSteps / fLen;
933         Dir *= fScale;
934 
935         // Evaluate function, and derivatives
936         float d2X = 0.0f, d2Y = 0.0f;
937         HDRColorA dX(0.0f, 0.0f, 0.0f, 0.0f), dY(0.0f, 0.0f, 0.0f, 0.0f);
938 
939         for(size_t iPoint = 0; iPoint < cPixels; ++iPoint)
940         {
941             float fDot = (pPoints[pIndex[iPoint]] - X) * Dir;
942             size_t iStep;
943             if(fDot <= 0.0f)
944                 iStep = 0;
945             if(fDot >= fSteps)
946                 iStep = cSteps - 1;
947             else
948                 iStep = size_t(fDot + 0.5f);
949 
950             HDRColorA Diff = pSteps[iStep] - pPoints[pIndex[iPoint]];
951             float fC = pC[iStep] * (1.0f / 8.0f);
952             float fD = pD[iStep] * (1.0f / 8.0f);
953 
954             d2X  += fC * pC[iStep];
955             dX += Diff * fC;
956 
957             d2Y  += fD * pD[iStep];
958             dY += Diff * fD;
959         }
960 
961         // Move endpoints
962         if(d2X > 0.0f)
963         {
964             float f = -1.0f / d2X;
965             X += dX * f;
966         }
967 
968         if(d2Y > 0.0f)
969         {
970             float f = -1.0f / d2Y;
971             Y += dY * f;
972         }
973 
974         if((dX * dX < fEpsilon) && (dY * dY < fEpsilon))
975             break;
976     }
977 
978     *pX = X;
979     *pY = Y;
980     return fError;
981 }
982 
983 
984 //-------------------------------------------------------------------------------------
985 
986 static float ComputeError(_Inout_ const LDRColorA& pixel, _In_reads_(1 << uIndexPrec) const LDRColorA aPalette[],
987                           _In_ uint8_t uIndexPrec, _In_ uint8_t uIndexPrec2, _Out_opt_ size_t* pBestIndex = nullptr, _Out_opt_ size_t* pBestIndex2 = nullptr)
988 {
989     const size_t uNumIndices = size_t(1) << uIndexPrec;
990     const size_t uNumIndices2 = size_t(1) << uIndexPrec2;
991     float fTotalErr = 0;
992     float fBestErr = FLT_MAX;
993 
994     if(pBestIndex)
995         *pBestIndex = 0;
996     if(pBestIndex2)
997         *pBestIndex2 = 0;
998 
999     XMVECTOR vpixel = XMLoadUByte4( reinterpret_cast<const XMUBYTE4*>( &pixel ) );
1000 
1001     if(uIndexPrec2 == 0)
1002     {
1003         for(register size_t i = 0; i < uNumIndices && fBestErr > 0; i++)
1004         {
1005             XMVECTOR tpixel = XMLoadUByte4( reinterpret_cast<const XMUBYTE4*>( &aPalette[i] ) );
1006             // Compute ErrorMetric
1007             tpixel = XMVectorSubtract( vpixel, tpixel );
1008             float fErr = XMVectorGetX( XMVector4Dot( tpixel, tpixel ) );
1009             if(fErr > fBestErr)	// error increased, so we're done searching
1010                 break;
1011             if(fErr < fBestErr)
1012             {
1013                 fBestErr = fErr;
1014                 if(pBestIndex)
1015                     *pBestIndex = i;
1016             }
1017         }
1018         fTotalErr += fBestErr;
1019     }
1020     else
1021     {
1022         for(register size_t i = 0; i < uNumIndices && fBestErr > 0; i++)
1023         {
1024             XMVECTOR tpixel = XMLoadUByte4( reinterpret_cast<const XMUBYTE4*>( &aPalette[i] ) );
1025             // Compute ErrorMetricRGB
1026             tpixel = XMVectorSubtract( vpixel, tpixel );
1027             float fErr = XMVectorGetX( XMVector3Dot( tpixel, tpixel ) );
1028             if(fErr > fBestErr)	// error increased, so we're done searching
1029                 break;
1030             if(fErr < fBestErr)
1031             {
1032                 fBestErr = fErr;
1033                 if(pBestIndex)
1034                     *pBestIndex = i;
1035             }
1036         }
1037         fTotalErr += fBestErr;
1038         fBestErr = FLT_MAX;
1039         for(register size_t i = 0; i < uNumIndices2 && fBestErr > 0; i++)
1040         {
1041             // Compute ErrorMetricAlpha
1042             float ea = float(pixel.a) - float(aPalette[i].a);
1043             float fErr = ea*ea;
1044             if(fErr > fBestErr)	// error increased, so we're done searching
1045                 break;
1046             if(fErr < fBestErr)
1047             {
1048                 fBestErr = fErr;
1049                 if(pBestIndex2)
1050                     *pBestIndex2 = i;
1051             }
1052         }
1053         fTotalErr += fBestErr;
1054     }
1055 
1056     return fTotalErr;
1057 }
1058 
1059 
FillWithErrorColors(_Out_writes_ (NUM_PIXELS_PER_BLOCK)HDRColorA * pOut)1060 inline static void FillWithErrorColors( _Out_writes_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut )
1061 {
1062     for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1063     {
1064 #ifdef _DEBUG
1065         // Use Magenta in debug as a highly-visible error color
1066         pOut[i] = HDRColorA(1.0f, 0.0f, 1.0f, 1.0f);
1067 #else
1068         // In production use, default to black
1069         pOut[i] = HDRColorA(0.0f, 0.0f, 0.0f, 1.0f);
1070 #endif
1071     }
1072 }
1073 
1074 
1075 //-------------------------------------------------------------------------------------
1076 // BC6H Compression
1077 //-------------------------------------------------------------------------------------
1078 _Use_decl_annotations_
Decode(bool bSigned,HDRColorA * pOut) const1079 void D3DX_BC6H::Decode(bool bSigned, HDRColorA* pOut) const
1080 {
1081     assert(pOut );
1082 
1083     size_t uStartBit = 0;
1084     uint8_t uMode = GetBits(uStartBit, 2);
1085     if(uMode != 0x00 && uMode != 0x01)
1086     {
1087         uMode = (GetBits(uStartBit, 3) << 2) | uMode;
1088     }
1089 
1090     assert( uMode < 32 );
1091     _Analysis_assume_( uMode < 32 );
1092 
1093     if ( ms_aModeToInfo[uMode] >= 0 )
1094     {
1095         assert(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aInfo));
1096         _Analysis_assume_(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aInfo));
1097         const ModeDescriptor* desc = ms_aDesc[ms_aModeToInfo[uMode]];
1098 
1099         assert(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aDesc));
1100         _Analysis_assume_(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aDesc));
1101         const ModeInfo& info = ms_aInfo[ms_aModeToInfo[uMode]];
1102 
1103         INTEndPntPair aEndPts[BC6H_MAX_REGIONS];
1104         memset(aEndPts, 0, BC6H_MAX_REGIONS * 2 * sizeof(INTColor));
1105         uint32_t uShape = 0;
1106 
1107         // Read header
1108         const size_t uHeaderBits = info.uPartitions > 0 ? 82 : 65;
1109         while(uStartBit < uHeaderBits)
1110         {
1111             size_t uCurBit = uStartBit;
1112             if(GetBit(uStartBit))
1113             {
1114                 switch(desc[uCurBit].m_eField)
1115                 {
1116                 case D:  uShape |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1117                 case RW: aEndPts[0].A.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1118                 case RX: aEndPts[0].B.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1119                 case RY: aEndPts[1].A.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1120                 case RZ: aEndPts[1].B.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1121                 case GW: aEndPts[0].A.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1122                 case GX: aEndPts[0].B.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1123                 case GY: aEndPts[1].A.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1124                 case GZ: aEndPts[1].B.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1125                 case BW: aEndPts[0].A.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1126                 case BX: aEndPts[0].B.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1127                 case BY: aEndPts[1].A.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1128                 case BZ: aEndPts[1].B.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1129                 default:
1130                     {
1131 #ifdef _DEBUG
1132                         OutputDebugStringA( "BC6H: Invalid header bits encountered during decoding\n" );
1133 #endif
1134                         FillWithErrorColors( pOut );
1135                         return;
1136                     }
1137                 }
1138             }
1139         }
1140 
1141         assert( uShape < 64 );
1142         _Analysis_assume_( uShape < 64 );
1143 
1144         // Sign extend necessary end points
1145         if(bSigned)
1146         {
1147             aEndPts[0].A.SignExtend(info.RGBAPrec[0][0]);
1148         }
1149         if(bSigned || info.bTransformed)
1150         {
1151             assert( info.uPartitions < BC6H_MAX_REGIONS );
1152             _Analysis_assume_( info.uPartitions < BC6H_MAX_REGIONS );
1153             for(size_t p = 0; p <= info.uPartitions; ++p)
1154             {
1155                 if(p != 0)
1156                 {
1157                     aEndPts[p].A.SignExtend(info.RGBAPrec[p][0]);
1158                 }
1159                 aEndPts[p].B.SignExtend(info.RGBAPrec[p][1]);
1160             }
1161         }
1162 
1163         // Inverse transform the end points
1164         if(info.bTransformed)
1165         {
1166             TransformInverse(aEndPts, info.RGBAPrec[0][0], bSigned);
1167         }
1168 
1169         // Read indices
1170         for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1171         {
1172             size_t uNumBits = IsFixUpOffset(info.uPartitions, uShape, i) ? info.uIndexPrec-1 : info.uIndexPrec;
1173             if ( uStartBit + uNumBits > 128 )
1174             {
1175 #ifdef _DEBUG
1176                 OutputDebugStringA( "BC6H: Invalid block encountered during decoding\n" );
1177 #endif
1178                 FillWithErrorColors( pOut );
1179                 return;
1180             }
1181             uint8_t uIndex = GetBits(uStartBit, uNumBits);
1182 
1183             if ( uIndex >= ((info.uPartitions > 0) ? 8 : 16) )
1184             {
1185 #ifdef _DEBUG
1186                 OutputDebugStringA( "BC6H: Invalid index encountered during decoding\n" );
1187 #endif
1188                 FillWithErrorColors( pOut );
1189                 return;
1190             }
1191 
1192             size_t uRegion = g_aPartitionTable[info.uPartitions][uShape][i];
1193             assert( uRegion < BC6H_MAX_REGIONS );
1194             _Analysis_assume_( uRegion < BC6H_MAX_REGIONS );
1195 
1196             // Unquantize endpoints and interpolate
1197             int r1 = Unquantize(aEndPts[uRegion].A.r, info.RGBAPrec[0][0].r, bSigned);
1198             int g1 = Unquantize(aEndPts[uRegion].A.g, info.RGBAPrec[0][0].g, bSigned);
1199             int b1 = Unquantize(aEndPts[uRegion].A.b, info.RGBAPrec[0][0].b, bSigned);
1200             int r2 = Unquantize(aEndPts[uRegion].B.r, info.RGBAPrec[0][0].r, bSigned);
1201             int g2 = Unquantize(aEndPts[uRegion].B.g, info.RGBAPrec[0][0].g, bSigned);
1202             int b2 = Unquantize(aEndPts[uRegion].B.b, info.RGBAPrec[0][0].b, bSigned);
1203             const int* aWeights = info.uPartitions > 0 ? g_aWeights3 : g_aWeights4;
1204             INTColor fc;
1205             fc.r = FinishUnquantize((r1 * (BC67_WEIGHT_MAX - aWeights[uIndex]) + r2 * aWeights[uIndex] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT, bSigned);
1206             fc.g = FinishUnquantize((g1 * (BC67_WEIGHT_MAX - aWeights[uIndex]) + g2 * aWeights[uIndex] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT, bSigned);
1207             fc.b = FinishUnquantize((b1 * (BC67_WEIGHT_MAX - aWeights[uIndex]) + b2 * aWeights[uIndex] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT, bSigned);
1208 
1209             HALF rgb[3];
1210             fc.ToF16(rgb, bSigned);
1211 
1212             pOut[i].r = XMConvertHalfToFloat( rgb[0] );
1213             pOut[i].g = XMConvertHalfToFloat( rgb[1] );
1214             pOut[i].b = XMConvertHalfToFloat( rgb[2] );
1215             pOut[i].a = 1.0f;
1216         }
1217     }
1218     else
1219     {
1220 #ifdef _DEBUG
1221         const char* warnstr = "BC6H: Invalid mode encountered during decoding\n";
1222         switch( uMode )
1223         {
1224         case 0x13:  warnstr = "BC6H: Reserved mode 10011 encountered during decoding\n"; break;
1225         case 0x17:  warnstr = "BC6H: Reserved mode 10111 encountered during decoding\n"; break;
1226         case 0x1B:  warnstr = "BC6H: Reserved mode 11011 encountered during decoding\n"; break;
1227         case 0x1F:  warnstr = "BC6H: Reserved mode 11111 encountered during decoding\n"; break;
1228         }
1229         OutputDebugStringA( warnstr );
1230 #endif
1231         // Per the BC6H format spec, we must return opaque black
1232         for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1233         {
1234             pOut[i] = HDRColorA(0.0f, 0.0f, 0.0f, 1.0f);
1235         }
1236     }
1237 }
1238 
1239 _Use_decl_annotations_
Encode(bool bSigned,const HDRColorA * const pIn)1240 void D3DX_BC6H::Encode(bool bSigned, const HDRColorA* const pIn)
1241 {
1242     assert( pIn );
1243 
1244     EncodeParams EP(pIn, bSigned);
1245 
1246     for(EP.uMode = 0; EP.uMode < ARRAYSIZE(ms_aInfo) && EP.fBestErr > 0; ++EP.uMode)
1247     {
1248         const uint8_t uShapes = ms_aInfo[EP.uMode].uPartitions ? 32 : 1;
1249         // Number of rough cases to look at. reasonable values of this are 1, uShapes/4, and uShapes
1250         // uShapes/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
1251         const size_t uItems = std::max<size_t>(1, uShapes >> 2);
1252         float afRoughMSE[BC6H_MAX_SHAPES];
1253         uint8_t auShape[BC6H_MAX_SHAPES];
1254 
1255         // pick the best uItems shapes and refine these.
1256         for(EP.uShape = 0; EP.uShape < uShapes; ++EP.uShape)
1257         {
1258             size_t uShape = EP.uShape;
1259             afRoughMSE[uShape] = RoughMSE(&EP);
1260             auShape[uShape] = static_cast<uint8_t>(uShape);
1261         }
1262 
1263         // Bubble up the first uItems items
1264         for(register size_t i = 0; i < uItems; i++)
1265         {
1266             for(register size_t j = i + 1; j < uShapes; j++)
1267             {
1268                 if(afRoughMSE[i] > afRoughMSE[j])
1269                 {
1270                     std::swap(afRoughMSE[i], afRoughMSE[j]);
1271                     std::swap(auShape[i], auShape[j]);
1272                 }
1273             }
1274         }
1275 
1276         for(size_t i = 0; i < uItems && EP.fBestErr > 0; i++)
1277         {
1278             EP.uShape = auShape[i];
1279             Refine(&EP);
1280         }
1281     }
1282 }
1283 
1284 
1285 //-------------------------------------------------------------------------------------
1286 _Use_decl_annotations_
Quantize(int iValue,int prec,bool bSigned)1287 int D3DX_BC6H::Quantize(int iValue, int prec, bool bSigned)
1288 {
1289     assert(prec > 1);	// didn't bother to make it work for 1
1290     int q, s = 0;
1291     if(bSigned)
1292     {
1293         assert(iValue >= -F16MAX && iValue <= F16MAX);
1294         if(iValue < 0)
1295         {
1296             s = 1;
1297             iValue = -iValue;
1298         }
1299         q = (prec >= 16) ? iValue : (iValue << (prec-1)) / (F16MAX+1);
1300         if(s)
1301             q = -q;
1302         assert (q > -(1 << (prec-1)) && q < (1 << (prec-1)));
1303     }
1304     else
1305     {
1306         assert(iValue >= 0 && iValue <= F16MAX);
1307         q = (prec >= 15) ? iValue : (iValue << prec) / (F16MAX+1);
1308         assert (q >= 0 && q < (1 << prec));
1309     }
1310 
1311     return q;
1312 }
1313 
1314 _Use_decl_annotations_
Unquantize(int comp,uint8_t uBitsPerComp,bool bSigned)1315 int D3DX_BC6H::Unquantize(int comp, uint8_t uBitsPerComp, bool bSigned)
1316 {
1317     int unq = 0, s = 0;
1318     if(bSigned)
1319     {
1320         if(uBitsPerComp >= 16)
1321         {
1322             unq = comp;
1323         }
1324         else
1325         {
1326             if(comp < 0)
1327             {
1328                 s = 1;
1329                 comp = -comp;
1330             }
1331 
1332             if(comp == 0) unq = 0;
1333             else if(comp >= ((1 << (uBitsPerComp - 1)) - 1)) unq = 0x7FFF;
1334             else unq = ((comp << 15) + 0x4000) >> (uBitsPerComp-1);
1335 
1336             if(s) unq = -unq;
1337         }
1338     }
1339     else
1340     {
1341         if(uBitsPerComp >= 15) unq = comp;
1342         else if(comp == 0) unq = 0;
1343         else if(comp == ((1 << uBitsPerComp) - 1)) unq = 0xFFFF;
1344         else unq = ((comp << 16) + 0x8000) >> uBitsPerComp;
1345     }
1346 
1347     return unq;
1348 }
1349 
1350 _Use_decl_annotations_
FinishUnquantize(int comp,bool bSigned)1351 int D3DX_BC6H::FinishUnquantize(int comp, bool bSigned)
1352 {
1353     if(bSigned)
1354     {
1355         return (comp < 0) ? -(((-comp) * 31) >> 5) : (comp * 31) >> 5;  // scale the magnitude by 31/32
1356     }
1357     else
1358     {
1359         return (comp * 31) >> 6;                                        // scale the magnitude by 31/64
1360     }
1361 }
1362 
1363 
1364 //-------------------------------------------------------------------------------------
1365 _Use_decl_annotations_
EndPointsFit(const EncodeParams * pEP,const INTEndPntPair aEndPts[])1366 bool D3DX_BC6H::EndPointsFit(const EncodeParams* pEP, const INTEndPntPair aEndPts[])
1367 {
1368     assert( pEP );
1369     const bool bTransformed = ms_aInfo[pEP->uMode].bTransformed;
1370     const bool bIsSigned = pEP->bSigned;
1371     const LDRColorA& Prec0 = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
1372     const LDRColorA& Prec1 = ms_aInfo[pEP->uMode].RGBAPrec[0][1];
1373     const LDRColorA& Prec2 = ms_aInfo[pEP->uMode].RGBAPrec[1][0];
1374     const LDRColorA& Prec3 = ms_aInfo[pEP->uMode].RGBAPrec[1][1];
1375 
1376     INTColor aBits[4];
1377     aBits[0].r = NBits(aEndPts[0].A.r, bIsSigned);
1378     aBits[0].g = NBits(aEndPts[0].A.g, bIsSigned);
1379     aBits[0].b = NBits(aEndPts[0].A.b, bIsSigned);
1380     aBits[1].r = NBits(aEndPts[0].B.r, bTransformed || bIsSigned);
1381     aBits[1].g = NBits(aEndPts[0].B.g, bTransformed || bIsSigned);
1382     aBits[1].b = NBits(aEndPts[0].B.b, bTransformed || bIsSigned);
1383     if(aBits[0].r > Prec0.r || aBits[1].r > Prec1.r ||
1384        aBits[0].g > Prec0.g || aBits[1].g > Prec1.g ||
1385        aBits[0].b > Prec0.b || aBits[1].b > Prec1.b)
1386         return false;
1387 
1388     if(ms_aInfo[pEP->uMode].uPartitions)
1389     {
1390         aBits[2].r = NBits(aEndPts[1].A.r, bTransformed || bIsSigned);
1391         aBits[2].g = NBits(aEndPts[1].A.g, bTransformed || bIsSigned);
1392         aBits[2].b = NBits(aEndPts[1].A.b, bTransformed || bIsSigned);
1393         aBits[3].r = NBits(aEndPts[1].B.r, bTransformed || bIsSigned);
1394         aBits[3].g = NBits(aEndPts[1].B.g, bTransformed || bIsSigned);
1395         aBits[3].b = NBits(aEndPts[1].B.b, bTransformed || bIsSigned);
1396 
1397         if(aBits[2].r > Prec2.r || aBits[3].r > Prec3.r ||
1398            aBits[2].g > Prec2.g || aBits[3].g > Prec3.g ||
1399            aBits[2].b > Prec2.b || aBits[3].b > Prec3.b)
1400             return false;
1401     }
1402 
1403     return true;
1404 }
1405 
1406 _Use_decl_annotations_
GeneratePaletteQuantized(const EncodeParams * pEP,const INTEndPntPair & endPts,INTColor aPalette[]) const1407 void D3DX_BC6H::GeneratePaletteQuantized(const EncodeParams* pEP, const INTEndPntPair& endPts, INTColor aPalette[]) const
1408 {
1409     assert( pEP );
1410     const size_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
1411     const size_t uNumIndices = size_t(1) << uIndexPrec;
1412     assert( uNumIndices > 0 );
1413     _Analysis_assume_( uNumIndices > 0 );
1414     const LDRColorA& Prec = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
1415 
1416     // scale endpoints
1417     INTEndPntPair unqEndPts;
1418     unqEndPts.A.r = Unquantize(endPts.A.r, Prec.r, pEP->bSigned);
1419     unqEndPts.A.g = Unquantize(endPts.A.g, Prec.g, pEP->bSigned);
1420     unqEndPts.A.b = Unquantize(endPts.A.b, Prec.b, pEP->bSigned);
1421     unqEndPts.B.r = Unquantize(endPts.B.r, Prec.r, pEP->bSigned);
1422     unqEndPts.B.g = Unquantize(endPts.B.g, Prec.g, pEP->bSigned);
1423     unqEndPts.B.b = Unquantize(endPts.B.b, Prec.b, pEP->bSigned);
1424 
1425     // interpolate
1426     const int* aWeights = nullptr;
1427     switch(uIndexPrec)
1428     {
1429     case 3: aWeights = g_aWeights3; assert(uNumIndices <= 8); _Analysis_assume_(uNumIndices <= 8); break;
1430     case 4: aWeights = g_aWeights4; assert(uNumIndices <= 16); _Analysis_assume_(uNumIndices <= 16); break;
1431     default:
1432         assert(false);
1433         for(size_t i = 0; i < uNumIndices; ++i)
1434         {
1435             #pragma prefast(suppress:22102 22103, "writing blocks in two halves confuses tool")
1436             aPalette[i] = INTColor(0,0,0);
1437         }
1438         return;
1439     }
1440 
1441     for (size_t i = 0; i < uNumIndices; ++i)
1442     {
1443         aPalette[i].r = FinishUnquantize(
1444             (unqEndPts.A.r * (BC67_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.r * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT,
1445             pEP->bSigned);
1446         aPalette[i].g = FinishUnquantize(
1447             (unqEndPts.A.g * (BC67_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.g * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT,
1448             pEP->bSigned);
1449         aPalette[i].b = FinishUnquantize(
1450             (unqEndPts.A.b * (BC67_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.b * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT,
1451             pEP->bSigned);
1452     }
1453 }
1454 
1455 // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
1456 _Use_decl_annotations_
MapColorsQuantized(const EncodeParams * pEP,const INTColor aColors[],size_t np,const INTEndPntPair & endPts) const1457 float D3DX_BC6H::MapColorsQuantized(const EncodeParams* pEP, const INTColor aColors[], size_t np, const INTEndPntPair &endPts) const
1458 {
1459     assert( pEP );
1460 
1461     const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
1462     const uint8_t uNumIndices = 1 << uIndexPrec;
1463     INTColor aPalette[BC6H_MAX_INDICES];
1464     GeneratePaletteQuantized(pEP, endPts, aPalette);
1465 
1466     float fTotErr = 0;
1467     for(size_t i = 0; i < np; ++i)
1468     {
1469         XMVECTOR vcolors = XMLoadSInt4( reinterpret_cast<const XMINT4*>( &aColors[i] ) );
1470 
1471         // Compute ErrorMetricRGB
1472         XMVECTOR tpal = XMLoadSInt4( reinterpret_cast<const XMINT4*>( &aPalette[0] ) );
1473         tpal = XMVectorSubtract( vcolors, tpal );
1474         float fBestErr = XMVectorGetX( XMVector3Dot( tpal, tpal ) );
1475 
1476         for(int j = 1; j < uNumIndices && fBestErr > 0; ++j)
1477         {
1478             // Compute ErrorMetricRGB
1479             tpal = XMLoadSInt4( reinterpret_cast<const XMINT4*>( &aPalette[j] ) );
1480             tpal = XMVectorSubtract( vcolors, tpal );
1481             float fErr = XMVectorGetX( XMVector3Dot( tpal, tpal ) );
1482             if(fErr > fBestErr) break;     // error increased, so we're done searching
1483             if(fErr < fBestErr) fBestErr = fErr;
1484         }
1485         fTotErr += fBestErr;
1486     }
1487     return fTotErr;
1488 }
1489 
1490 _Use_decl_annotations_
PerturbOne(const EncodeParams * pEP,const INTColor aColors[],size_t np,uint8_t ch,const INTEndPntPair & oldEndPts,INTEndPntPair & newEndPts,float fOldErr,int do_b) const1491 float D3DX_BC6H::PerturbOne(const EncodeParams* pEP, const INTColor aColors[], size_t np, uint8_t ch,
1492                             const INTEndPntPair& oldEndPts, INTEndPntPair& newEndPts, float fOldErr, int do_b) const
1493 {
1494     assert( pEP );
1495     uint8_t uPrec;
1496     switch(ch)
1497     {
1498     case 0: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].r; break;
1499     case 1: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].g; break;
1500     case 2: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].b; break;
1501     default: assert(false); newEndPts = oldEndPts; return FLT_MAX;
1502     }
1503     INTEndPntPair tmpEndPts;
1504     float fMinErr = fOldErr;
1505     int beststep = 0;
1506 
1507     // copy real endpoints so we can perturb them
1508     tmpEndPts = newEndPts = oldEndPts;
1509 
1510     // do a logarithmic search for the best error for this endpoint (which)
1511     for(int step = 1 << (uPrec-1); step; step >>= 1)
1512     {
1513         bool bImproved = false;
1514         for(int sign = -1; sign <= 1; sign += 2)
1515         {
1516             if(do_b == 0)
1517             {
1518                 tmpEndPts.A[ch] = newEndPts.A[ch] + sign * step;
1519                 if(tmpEndPts.A[ch] < 0 || tmpEndPts.A[ch] >= (1 << uPrec))
1520                     continue;
1521             }
1522             else
1523             {
1524                 tmpEndPts.B[ch] = newEndPts.B[ch] + sign * step;
1525                 if(tmpEndPts.B[ch] < 0 || tmpEndPts.B[ch] >= (1 << uPrec))
1526                     continue;
1527             }
1528 
1529             float fErr = MapColorsQuantized(pEP, aColors, np, tmpEndPts);
1530 
1531             if(fErr < fMinErr)
1532             {
1533                 bImproved = true;
1534                 fMinErr = fErr;
1535                 beststep = sign * step;
1536             }
1537         }
1538         // if this was an improvement, move the endpoint and continue search from there
1539         if(bImproved)
1540         {
1541             if(do_b == 0)
1542                 newEndPts.A[ch] += beststep;
1543             else
1544                 newEndPts.B[ch] += beststep;
1545         }
1546     }
1547     return fMinErr;
1548 }
1549 
1550 _Use_decl_annotations_
OptimizeOne(const EncodeParams * pEP,const INTColor aColors[],size_t np,float aOrgErr,const INTEndPntPair & aOrgEndPts,INTEndPntPair & aOptEndPts) const1551 void D3DX_BC6H::OptimizeOne(const EncodeParams* pEP, const INTColor aColors[], size_t np, float aOrgErr,
1552                             const INTEndPntPair &aOrgEndPts, INTEndPntPair &aOptEndPts) const
1553 {
1554     assert( pEP );
1555     float aOptErr = aOrgErr;
1556     aOptEndPts.A = aOrgEndPts.A;
1557     aOptEndPts.B = aOrgEndPts.B;
1558 
1559     INTEndPntPair new_a, new_b;
1560     INTEndPntPair newEndPts;
1561     int do_b;
1562 
1563     // now optimize each channel separately
1564     for(uint8_t ch = 0; ch < 3; ++ch)
1565     {
1566         // figure out which endpoint when perturbed gives the most improvement and start there
1567         // if we just alternate, we can easily end up in a local minima
1568         float fErr0 = PerturbOne(pEP, aColors, np, ch, aOptEndPts, new_a, aOptErr, 0);	// perturb endpt A
1569         float fErr1 = PerturbOne(pEP, aColors, np, ch, aOptEndPts, new_b, aOptErr, 1);	// perturb endpt B
1570 
1571         if(fErr0 < fErr1)
1572         {
1573             if(fErr0 >= aOptErr) continue;
1574             aOptEndPts.A[ch] = new_a.A[ch];
1575             aOptErr = fErr0;
1576             do_b = 1;		// do B next
1577         }
1578         else
1579         {
1580             if(fErr1 >= aOptErr) continue;
1581             aOptEndPts.B[ch] = new_b.B[ch];
1582             aOptErr = fErr1;
1583             do_b = 0;		// do A next
1584         }
1585 
1586         // now alternate endpoints and keep trying until there is no improvement
1587         for(;;)
1588         {
1589             float fErr = PerturbOne(pEP, aColors, np, ch, aOptEndPts, newEndPts, aOptErr, do_b);
1590             if(fErr >= aOptErr)
1591                 break;
1592             if(do_b == 0)
1593                 aOptEndPts.A[ch] = newEndPts.A[ch];
1594             else
1595                 aOptEndPts.B[ch] = newEndPts.B[ch];
1596             aOptErr = fErr;
1597             do_b = 1 - do_b;	// now move the other endpoint
1598         }
1599     }
1600 }
1601 
1602 _Use_decl_annotations_
OptimizeEndPoints(const EncodeParams * pEP,const float aOrgErr[],const INTEndPntPair aOrgEndPts[],INTEndPntPair aOptEndPts[]) const1603 void D3DX_BC6H::OptimizeEndPoints(const EncodeParams* pEP, const float aOrgErr[], const INTEndPntPair aOrgEndPts[], INTEndPntPair aOptEndPts[]) const
1604 {
1605     assert( pEP );
1606     const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1607     assert( uPartitions < BC6H_MAX_REGIONS );
1608     _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS );
1609     INTColor aPixels[NUM_PIXELS_PER_BLOCK];
1610 
1611     for(size_t p = 0; p <= uPartitions; ++p)
1612     {
1613         // collect the pixels in the region
1614         size_t np = 0;
1615         for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1616         {
1617             if(g_aPartitionTable[p][pEP->uShape][i] == p)
1618             {
1619                 aPixels[np++] = pEP->aIPixels[i];
1620             }
1621         }
1622 
1623         OptimizeOne(pEP, aPixels, np, aOrgErr[p], aOrgEndPts[p], aOptEndPts[p]);
1624     }
1625 }
1626 
1627 // Swap endpoints as needed to ensure that the indices at fix up have a 0 high-order bit
1628 _Use_decl_annotations_
SwapIndices(const EncodeParams * pEP,INTEndPntPair aEndPts[],size_t aIndices[])1629 void D3DX_BC6H::SwapIndices(const EncodeParams* pEP, INTEndPntPair aEndPts[], size_t aIndices[])
1630 {
1631     assert( pEP );
1632     const size_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1633     const size_t uNumIndices = size_t(1) << ms_aInfo[pEP->uMode].uIndexPrec;
1634     const size_t uHighIndexBit = uNumIndices >> 1;
1635 
1636     assert( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1637     _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1638 
1639     for(size_t p = 0; p <= uPartitions; ++p)
1640     {
1641         size_t i = g_aFixUp[uPartitions][pEP->uShape][p];
1642         assert(g_aPartitionTable[uPartitions][pEP->uShape][i] == p);
1643         if(aIndices[i] & uHighIndexBit)
1644         {
1645             // high bit is set, swap the aEndPts and indices for this region
1646             std::swap(aEndPts[p].A, aEndPts[p].B);
1647 
1648             for(size_t j = 0; j < NUM_PIXELS_PER_BLOCK; ++j)
1649                 if(g_aPartitionTable[uPartitions][pEP->uShape][j] == p)
1650                     aIndices[j] = uNumIndices - 1 - aIndices[j];
1651         }
1652     }
1653 }
1654 
1655 // assign indices given a tile, shape, and quantized endpoints, return toterr for each region
1656 _Use_decl_annotations_
AssignIndices(const EncodeParams * pEP,const INTEndPntPair aEndPts[],size_t aIndices[],float aTotErr[]) const1657 void D3DX_BC6H::AssignIndices(const EncodeParams* pEP, const INTEndPntPair aEndPts[], size_t aIndices[], float aTotErr[]) const
1658 {
1659     assert( pEP );
1660     const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1661     const uint8_t uNumIndices = 1 << ms_aInfo[pEP->uMode].uIndexPrec;
1662 
1663     assert( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1664     _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1665 
1666     // build list of possibles
1667     INTColor aPalette[BC6H_MAX_REGIONS][BC6H_MAX_INDICES];
1668 
1669     for(size_t p = 0; p <= uPartitions; ++p)
1670     {
1671         GeneratePaletteQuantized(pEP, aEndPts[p], aPalette[p]);
1672         aTotErr[p] = 0;
1673     }
1674 
1675     for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1676     {
1677         const uint8_t uRegion = g_aPartitionTable[uPartitions][pEP->uShape][i];
1678         assert( uRegion < BC6H_MAX_REGIONS );
1679         _Analysis_assume_( uRegion < BC6H_MAX_REGIONS );
1680         float fBestErr = Norm(pEP->aIPixels[i], aPalette[uRegion][0]);
1681         aIndices[i] = 0;
1682 
1683         for(uint8_t j = 1; j < uNumIndices && fBestErr > 0; ++j)
1684         {
1685             float fErr = Norm(pEP->aIPixels[i], aPalette[uRegion][j]);
1686             if(fErr > fBestErr) break;	// error increased, so we're done searching
1687             if(fErr < fBestErr)
1688             {
1689                 fBestErr = fErr;
1690                 aIndices[i] = j;
1691             }
1692         }
1693         aTotErr[uRegion] += fBestErr;
1694     }
1695 }
1696 
1697 _Use_decl_annotations_
QuantizeEndPts(const EncodeParams * pEP,INTEndPntPair * aQntEndPts) const1698 void D3DX_BC6H::QuantizeEndPts(const EncodeParams* pEP, INTEndPntPair* aQntEndPts) const
1699 {
1700     assert( pEP && aQntEndPts );
1701     const INTEndPntPair* aUnqEndPts = pEP->aUnqEndPts[pEP->uShape];
1702     const LDRColorA& Prec = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
1703     const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1704     assert( uPartitions < BC6H_MAX_REGIONS );
1705     _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS );
1706 
1707     for(size_t p = 0; p <= uPartitions; ++p)
1708     {
1709         aQntEndPts[p].A.r = Quantize(aUnqEndPts[p].A.r, Prec.r, pEP->bSigned);
1710         aQntEndPts[p].A.g = Quantize(aUnqEndPts[p].A.g, Prec.g, pEP->bSigned);
1711         aQntEndPts[p].A.b = Quantize(aUnqEndPts[p].A.b, Prec.b, pEP->bSigned);
1712         aQntEndPts[p].B.r = Quantize(aUnqEndPts[p].B.r, Prec.r, pEP->bSigned);
1713         aQntEndPts[p].B.g = Quantize(aUnqEndPts[p].B.g, Prec.g, pEP->bSigned);
1714         aQntEndPts[p].B.b = Quantize(aUnqEndPts[p].B.b, Prec.b, pEP->bSigned);
1715     }
1716 }
1717 
1718 _Use_decl_annotations_
EmitBlock(const EncodeParams * pEP,const INTEndPntPair aEndPts[],const size_t aIndices[])1719 void D3DX_BC6H::EmitBlock(const EncodeParams* pEP, const INTEndPntPair aEndPts[], const size_t aIndices[])
1720 {
1721     assert( pEP );
1722     const uint8_t uRealMode = ms_aInfo[pEP->uMode].uMode;
1723     const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1724     const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
1725     const size_t uHeaderBits = uPartitions > 0 ? 82 : 65;
1726     const ModeDescriptor* desc = ms_aDesc[pEP->uMode];
1727     size_t uStartBit = 0;
1728 
1729     while(uStartBit < uHeaderBits)
1730     {
1731         switch(desc[uStartBit].m_eField)
1732         {
1733         case M:  SetBit(uStartBit, uint8_t(uRealMode >> desc[uStartBit].m_uBit) & 0x01); break;
1734         case D:  SetBit(uStartBit, uint8_t(pEP->uShape >> desc[uStartBit].m_uBit) & 0x01); break;
1735         case RW: SetBit(uStartBit, uint8_t(aEndPts[0].A.r >> desc[uStartBit].m_uBit) & 0x01); break;
1736         case RX: SetBit(uStartBit, uint8_t(aEndPts[0].B.r >> desc[uStartBit].m_uBit) & 0x01); break;
1737         case RY: SetBit(uStartBit, uint8_t(aEndPts[1].A.r >> desc[uStartBit].m_uBit) & 0x01); break;
1738         case RZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.r >> desc[uStartBit].m_uBit) & 0x01); break;
1739         case GW: SetBit(uStartBit, uint8_t(aEndPts[0].A.g >> desc[uStartBit].m_uBit) & 0x01); break;
1740         case GX: SetBit(uStartBit, uint8_t(aEndPts[0].B.g >> desc[uStartBit].m_uBit) & 0x01); break;
1741         case GY: SetBit(uStartBit, uint8_t(aEndPts[1].A.g >> desc[uStartBit].m_uBit) & 0x01); break;
1742         case GZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.g >> desc[uStartBit].m_uBit) & 0x01); break;
1743         case BW: SetBit(uStartBit, uint8_t(aEndPts[0].A.b >> desc[uStartBit].m_uBit) & 0x01); break;
1744         case BX: SetBit(uStartBit, uint8_t(aEndPts[0].B.b >> desc[uStartBit].m_uBit) & 0x01); break;
1745         case BY: SetBit(uStartBit, uint8_t(aEndPts[1].A.b >> desc[uStartBit].m_uBit) & 0x01); break;
1746         case BZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.b >> desc[uStartBit].m_uBit) & 0x01); break;
1747         default: assert(false);
1748         }
1749     }
1750 
1751     for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1752     {
1753         if(IsFixUpOffset(ms_aInfo[pEP->uMode].uPartitions, pEP->uShape, i))
1754             SetBits(uStartBit, uIndexPrec - 1, static_cast<uint8_t>( aIndices[i] ));
1755         else
1756             SetBits(uStartBit, uIndexPrec, static_cast<uint8_t>( aIndices[i] ));
1757     }
1758     assert(uStartBit == 128);
1759 }
1760 
1761 _Use_decl_annotations_
Refine(EncodeParams * pEP)1762 void D3DX_BC6H::Refine(EncodeParams* pEP)
1763 {
1764     assert( pEP );
1765     const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1766     assert( uPartitions < BC6H_MAX_REGIONS );
1767     _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS );
1768 
1769     const bool bTransformed = ms_aInfo[pEP->uMode].bTransformed;
1770     float aOrgErr[BC6H_MAX_REGIONS], aOptErr[BC6H_MAX_REGIONS];
1771     INTEndPntPair aOrgEndPts[BC6H_MAX_REGIONS], aOptEndPts[BC6H_MAX_REGIONS];
1772     size_t aOrgIdx[NUM_PIXELS_PER_BLOCK], aOptIdx[NUM_PIXELS_PER_BLOCK];
1773 
1774     QuantizeEndPts(pEP, aOrgEndPts);
1775     AssignIndices(pEP, aOrgEndPts, aOrgIdx, aOrgErr);
1776     SwapIndices(pEP, aOrgEndPts, aOrgIdx);
1777 
1778     if(bTransformed) TransformForward(aOrgEndPts);
1779     if(EndPointsFit(pEP, aOrgEndPts))
1780     {
1781         if(bTransformed) TransformInverse(aOrgEndPts, ms_aInfo[pEP->uMode].RGBAPrec[0][0], pEP->bSigned);
1782         OptimizeEndPoints(pEP, aOrgErr, aOrgEndPts, aOptEndPts);
1783         AssignIndices(pEP, aOptEndPts, aOptIdx, aOptErr);
1784         SwapIndices(pEP, aOptEndPts, aOptIdx);
1785 
1786         float fOrgTotErr = 0.0f, fOptTotErr = 0.0f;
1787         for(size_t p = 0; p <= uPartitions; ++p)
1788         {
1789             fOrgTotErr += aOrgErr[p];
1790             fOptTotErr += aOptErr[p];
1791         }
1792 
1793         if(bTransformed) TransformForward(aOptEndPts);
1794         if(EndPointsFit(pEP, aOptEndPts) && fOptTotErr < fOrgTotErr && fOptTotErr < pEP->fBestErr)
1795         {
1796             pEP->fBestErr = fOptTotErr;
1797             EmitBlock(pEP, aOptEndPts, aOptIdx);
1798         }
1799         else if(fOrgTotErr < pEP->fBestErr)
1800         {
1801             // either it stopped fitting when we optimized it, or there was no improvement
1802             // so go back to the unoptimized endpoints which we know will fit
1803             if(bTransformed) TransformForward(aOrgEndPts);
1804             pEP->fBestErr = fOrgTotErr;
1805             EmitBlock(pEP, aOrgEndPts, aOrgIdx);
1806         }
1807     }
1808 }
1809 
1810 _Use_decl_annotations_
GeneratePaletteUnquantized(const EncodeParams * pEP,size_t uRegion,INTColor aPalette[])1811 void D3DX_BC6H::GeneratePaletteUnquantized(const EncodeParams* pEP, size_t uRegion, INTColor aPalette[])
1812 {
1813     assert( pEP );
1814     assert( uRegion < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1815     _Analysis_assume_( uRegion < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1816     const INTEndPntPair& endPts = pEP->aUnqEndPts[pEP->uShape][uRegion];
1817     const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
1818     const uint8_t uNumIndices = 1 << uIndexPrec;
1819     assert(uNumIndices > 0);
1820     _Analysis_assume_(uNumIndices > 0);
1821 
1822     const int* aWeights = nullptr;
1823     switch(uIndexPrec)
1824     {
1825     case 3: aWeights = g_aWeights3; assert(uNumIndices <= 8); _Analysis_assume_(uNumIndices <= 8); break;
1826     case 4: aWeights = g_aWeights4; assert(uNumIndices <= 16); _Analysis_assume_(uNumIndices <= 16); break;
1827     default:
1828         assert(false);
1829         for(size_t i = 0; i < uNumIndices; ++i)
1830         {
1831             #pragma prefast(suppress:22102 22103, "writing blocks in two halves confuses tool")
1832             aPalette[i] = INTColor(0,0,0);
1833         }
1834         return;
1835     }
1836 
1837     for(register size_t i = 0; i < uNumIndices; ++i)
1838     {
1839         aPalette[i].r = (endPts.A.r * (BC67_WEIGHT_MAX - aWeights[i]) + endPts.B.r * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT;
1840         aPalette[i].g = (endPts.A.g * (BC67_WEIGHT_MAX - aWeights[i]) + endPts.B.g * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT;
1841         aPalette[i].b = (endPts.A.b * (BC67_WEIGHT_MAX - aWeights[i]) + endPts.B.b * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT;
1842     }
1843 }
1844 
1845 _Use_decl_annotations_
MapColors(const EncodeParams * pEP,size_t uRegion,size_t np,const size_t * auIndex) const1846 float D3DX_BC6H::MapColors(const EncodeParams* pEP, size_t uRegion, size_t np, const size_t* auIndex) const
1847 {
1848     assert( pEP );
1849     const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
1850     const uint8_t uNumIndices = 1 << uIndexPrec;
1851     INTColor aPalette[BC6H_MAX_INDICES];
1852     GeneratePaletteUnquantized(pEP, uRegion, aPalette);
1853 
1854     float fTotalErr = 0.0f;
1855     for(size_t i = 0; i < np; ++i)
1856     {
1857         float fBestErr = Norm(pEP->aIPixels[auIndex[i]], aPalette[0]);
1858         for(uint8_t j = 1; j < uNumIndices && fBestErr > 0.0f; ++j)
1859         {
1860             float fErr = Norm(pEP->aIPixels[auIndex[i]], aPalette[j]);
1861             if(fErr > fBestErr) break;      // error increased, so we're done searching
1862             if(fErr < fBestErr) fBestErr = fErr;
1863         }
1864         fTotalErr += fBestErr;
1865     }
1866 
1867     return fTotalErr;
1868 }
1869 
1870 _Use_decl_annotations_
RoughMSE(EncodeParams * pEP) const1871 float D3DX_BC6H::RoughMSE(EncodeParams* pEP) const
1872 {
1873     assert( pEP );
1874     assert( pEP->uShape < BC6H_MAX_SHAPES);
1875     _Analysis_assume_( pEP->uShape < BC6H_MAX_SHAPES);
1876 
1877     INTEndPntPair* aEndPts = pEP->aUnqEndPts[pEP->uShape];
1878 
1879     const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1880     assert( uPartitions < BC6H_MAX_REGIONS );
1881     _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS );
1882 
1883     size_t auPixIdx[NUM_PIXELS_PER_BLOCK];
1884 
1885     float fError = 0.0f;
1886     for(size_t p = 0; p <= uPartitions; ++p)
1887     {
1888         size_t np = 0;
1889         for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1890         {
1891             if(g_aPartitionTable[uPartitions][pEP->uShape][i] == p)
1892             {
1893                 auPixIdx[np++] = i;
1894             }
1895         }
1896 
1897         // handle simple cases
1898         assert(np > 0);
1899         if(np == 1)
1900         {
1901             aEndPts[p].A = pEP->aIPixels[auPixIdx[0]];
1902             aEndPts[p].B = pEP->aIPixels[auPixIdx[0]];
1903             continue;
1904         }
1905         else if(np == 2)
1906         {
1907             aEndPts[p].A = pEP->aIPixels[auPixIdx[0]];
1908             aEndPts[p].B = pEP->aIPixels[auPixIdx[1]];
1909             continue;
1910         }
1911 
1912         HDRColorA epA, epB;
1913         OptimizeRGB(pEP->aHDRPixels, &epA, &epB, 4, np, auPixIdx);
1914         aEndPts[p].A.Set(epA, pEP->bSigned);
1915         aEndPts[p].B.Set(epB, pEP->bSigned);
1916         if(pEP->bSigned)
1917         {
1918             aEndPts[p].A.Clamp(-F16MAX, F16MAX);
1919             aEndPts[p].B.Clamp(-F16MAX, F16MAX);
1920         }
1921         else
1922         {
1923             aEndPts[p].A.Clamp(0, F16MAX);
1924             aEndPts[p].B.Clamp(0, F16MAX);
1925         }
1926 
1927         fError += MapColors(pEP, p, np, auPixIdx);
1928     }
1929 
1930     return fError;
1931 }
1932 
1933 
1934 
1935 //-------------------------------------------------------------------------------------
1936 // BC7 Compression
1937 //-------------------------------------------------------------------------------------
1938 _Use_decl_annotations_
Decode(HDRColorA * pOut) const1939 void D3DX_BC7::Decode(HDRColorA* pOut) const
1940 {
1941     assert( pOut );
1942 
1943     size_t uFirst = 0;
1944     while(uFirst < 128 && !GetBit(uFirst)) {}
1945     uint8_t uMode = uint8_t(uFirst - 1);
1946 
1947     if(uMode < 8)
1948     {
1949         const uint8_t uPartitions = ms_aInfo[uMode].uPartitions;
1950         assert( uPartitions < BC7_MAX_REGIONS );
1951         _Analysis_assume_( uPartitions < BC7_MAX_REGIONS );
1952 
1953         const uint8_t uNumEndPts = (uPartitions + 1) << 1;
1954         const uint8_t uIndexPrec = ms_aInfo[uMode].uIndexPrec;
1955         const uint8_t uIndexPrec2 = ms_aInfo[uMode].uIndexPrec2;
1956         register size_t i;
1957         size_t uStartBit = uMode + 1;
1958         uint8_t P[6];
1959         uint8_t uShape = GetBits(uStartBit, ms_aInfo[uMode].uPartitionBits);
1960         assert( uShape < BC7_MAX_SHAPES );
1961         _Analysis_assume_( uShape < BC7_MAX_SHAPES );
1962 
1963         uint8_t uRotation = GetBits(uStartBit, ms_aInfo[uMode].uRotationBits);
1964         assert( uRotation < 4 );
1965 
1966         uint8_t uIndexMode = GetBits(uStartBit, ms_aInfo[uMode].uIndexModeBits);
1967         assert( uIndexMode < 2 );
1968 
1969         LDRColorA c[BC7_MAX_REGIONS << 1];
1970         const LDRColorA RGBAPrec = ms_aInfo[uMode].RGBAPrec;
1971         const LDRColorA RGBAPrecWithP = ms_aInfo[uMode].RGBAPrecWithP;
1972 
1973         assert( uNumEndPts <= (BC7_MAX_REGIONS << 1) );
1974 
1975         // Red channel
1976         for(i = 0; i < uNumEndPts; i++)
1977         {
1978             if ( uStartBit + RGBAPrec.r > 128 )
1979             {
1980 #ifdef _DEBUG
1981                 OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
1982 #endif
1983                 FillWithErrorColors( pOut );
1984                 return;
1985             }
1986 
1987             c[i].r = GetBits(uStartBit, RGBAPrec.r);
1988         }
1989 
1990         // Green channel
1991         for(i = 0; i < uNumEndPts; i++)
1992         {
1993             if ( uStartBit + RGBAPrec.g > 128 )
1994             {
1995 #ifdef _DEBUG
1996                 OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
1997 #endif
1998                 FillWithErrorColors( pOut );
1999                 return;
2000             }
2001 
2002              c[i].g = GetBits(uStartBit, RGBAPrec.g);
2003         }
2004 
2005         // Blue channel
2006         for(i = 0; i < uNumEndPts; i++)
2007         {
2008             if ( uStartBit + RGBAPrec.b > 128 )
2009             {
2010 #ifdef _DEBUG
2011                 OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
2012 #endif
2013                 FillWithErrorColors( pOut );
2014                 return;
2015             }
2016 
2017             c[i].b = GetBits(uStartBit, RGBAPrec.b);
2018         }
2019 
2020         // Alpha channel
2021         for(i = 0; i < uNumEndPts; i++)
2022         {
2023             if ( uStartBit + RGBAPrec.a > 128 )
2024             {
2025 #ifdef _DEBUG
2026                 OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
2027 #endif
2028                 FillWithErrorColors( pOut );
2029                 return;
2030             }
2031 
2032             c[i].a = RGBAPrec.a ? GetBits(uStartBit, RGBAPrec.a) : 255;
2033         }
2034 
2035         // P-bits
2036         assert( ms_aInfo[uMode].uPBits <= 6 );
2037         _Analysis_assume_( ms_aInfo[uMode].uPBits <= 6 );
2038         for(i = 0; i < ms_aInfo[uMode].uPBits; i++)
2039         {
2040             if ( uStartBit > 127 )
2041             {
2042 #ifdef _DEBUG
2043                 OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
2044 #endif
2045                 FillWithErrorColors( pOut );
2046                 return;
2047             }
2048 
2049             P[i] = GetBit(uStartBit);
2050         }
2051 
2052         if(ms_aInfo[uMode].uPBits)
2053         {
2054             for(i = 0; i < uNumEndPts; i++)
2055             {
2056                 size_t pi = i * ms_aInfo[uMode].uPBits / uNumEndPts;
2057                 for(register uint8_t ch = 0; ch < BC7_NUM_CHANNELS; ch++)
2058                 {
2059                     if(RGBAPrec[ch] != RGBAPrecWithP[ch])
2060                     {
2061                         c[i][ch] = (c[i][ch] << 1) | P[pi];
2062                     }
2063                 }
2064             }
2065         }
2066 
2067         for(i = 0; i < uNumEndPts; i++)
2068         {
2069             c[i] = Unquantize(c[i], RGBAPrecWithP);
2070         }
2071 
2072         uint8_t w1[NUM_PIXELS_PER_BLOCK], w2[NUM_PIXELS_PER_BLOCK];
2073 
2074         // read color indices
2075         for(i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2076         {
2077             size_t uNumBits = IsFixUpOffset(ms_aInfo[uMode].uPartitions, uShape, i) ? uIndexPrec - 1 : uIndexPrec;
2078             if ( uStartBit + uNumBits > 128 )
2079             {
2080 #ifdef _DEBUG
2081                 OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
2082 #endif
2083                 FillWithErrorColors( pOut );
2084                 return;
2085             }
2086             w1[i] = GetBits(uStartBit, uNumBits);
2087         }
2088 
2089         // read alpha indices
2090         if(uIndexPrec2)
2091         {
2092             for(i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2093             {
2094                 size_t uNumBits = i ? uIndexPrec2 : uIndexPrec2 - 1;
2095                 if ( uStartBit + uNumBits > 128 )
2096                 {
2097 #ifdef _DEBUG
2098                     OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
2099 #endif
2100                     FillWithErrorColors( pOut );
2101                     return;
2102                 }
2103                 w2[i] = GetBits(uStartBit, uNumBits );
2104             }
2105         }
2106 
2107         for(i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
2108         {
2109             uint8_t uRegion = g_aPartitionTable[uPartitions][uShape][i];
2110             LDRColorA outPixel;
2111             if(uIndexPrec2 == 0)
2112             {
2113                 LDRColorA::Interpolate(c[uRegion << 1], c[(uRegion << 1) + 1], w1[i], w1[i], uIndexPrec, uIndexPrec, outPixel);
2114             }
2115             else
2116             {
2117                 if(uIndexMode == 0)
2118                 {
2119                     LDRColorA::Interpolate(c[uRegion << 1], c[(uRegion << 1) + 1], w1[i], w2[i], uIndexPrec, uIndexPrec2, outPixel);
2120                 }
2121                 else
2122                 {
2123                     LDRColorA::Interpolate(c[uRegion << 1], c[(uRegion << 1) + 1], w2[i], w1[i], uIndexPrec2, uIndexPrec, outPixel);
2124                 }
2125             }
2126 
2127             switch(uRotation)
2128             {
2129             case 1: std::swap(outPixel.r, outPixel.a); break;
2130             case 2: std::swap(outPixel.g, outPixel.a); break;
2131             case 3: std::swap(outPixel.b, outPixel.a); break;
2132             }
2133 
2134             pOut[i] = HDRColorA(outPixel);
2135         }
2136     }
2137     else
2138     {
2139 #ifdef _DEBUG
2140         OutputDebugStringA( "BC7: Reserved mode 8 encountered during decoding\n" );
2141 #endif
2142         // Per the BC7 format spec, we must return transparent black
2143         memset( pOut, 0, sizeof(HDRColorA) * NUM_PIXELS_PER_BLOCK );
2144     }
2145 }
2146 
2147 _Use_decl_annotations_
Encode(bool skip3subsets,const HDRColorA * const pIn)2148 void D3DX_BC7::Encode(bool skip3subsets, const HDRColorA* const pIn)
2149 {
2150     assert( pIn );
2151 
2152     D3DX_BC7 final = *this;
2153     EncodeParams EP(pIn);
2154     float fMSEBest = FLT_MAX;
2155 
2156     for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
2157     {
2158         EP.aLDRPixels[i].r = uint8_t( std::max<float>( 0.0f, std::min<float>( 255.0f, pIn[i].r * 255.0f + 0.01f ) ) );
2159         EP.aLDRPixels[i].g = uint8_t( std::max<float>( 0.0f, std::min<float>( 255.0f, pIn[i].g * 255.0f + 0.01f ) ) );
2160         EP.aLDRPixels[i].b = uint8_t( std::max<float>( 0.0f, std::min<float>( 255.0f, pIn[i].b * 255.0f + 0.01f ) ) );
2161         EP.aLDRPixels[i].a = uint8_t( std::max<float>( 0.0f, std::min<float>( 255.0f, pIn[i].a * 255.0f + 0.01f ) ) );
2162     }
2163 
2164     for(EP.uMode = 0; EP.uMode < 8 && fMSEBest > 0; ++EP.uMode)
2165     {
2166         if ( skip3subsets && (EP.uMode == 0 || EP.uMode == 2) )
2167         {
2168             // 3 subset modes tend to be used rarely and add significant compression time
2169             continue;
2170         }
2171 
2172         const size_t uShapes = size_t(1) << ms_aInfo[EP.uMode].uPartitionBits;
2173         assert( uShapes <= BC7_MAX_SHAPES );
2174         _Analysis_assume_( uShapes <= BC7_MAX_SHAPES );
2175 
2176         const size_t uNumRots = size_t(1) << ms_aInfo[EP.uMode].uRotationBits;
2177         const size_t uNumIdxMode = size_t(1) << ms_aInfo[EP.uMode].uIndexModeBits;
2178         // Number of rough cases to look at. reasonable values of this are 1, uShapes/4, and uShapes
2179         // uShapes/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
2180         const size_t uItems = std::max<size_t>(1, uShapes >> 2);
2181         float afRoughMSE[BC7_MAX_SHAPES];
2182         size_t auShape[BC7_MAX_SHAPES];
2183 
2184         for(size_t r = 0; r < uNumRots && fMSEBest > 0; ++r)
2185         {
2186             switch(r)
2187             {
2188             case 1: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].r, EP.aLDRPixels[i].a); break;
2189             case 2: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].g, EP.aLDRPixels[i].a); break;
2190             case 3: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].b, EP.aLDRPixels[i].a); break;
2191             }
2192 
2193             for(size_t im = 0; im < uNumIdxMode && fMSEBest > 0; ++im)
2194             {
2195                 // pick the best uItems shapes and refine these.
2196                 for(size_t s = 0; s < uShapes; s++)
2197                 {
2198                     afRoughMSE[s] = RoughMSE(&EP, s, im);
2199                     auShape[s] = s;
2200                 }
2201 
2202                 // Bubble up the first uItems items
2203                 for(size_t i = 0; i < uItems; i++)
2204                 {
2205                     for(size_t j = i + 1; j < uShapes; j++)
2206                     {
2207                         if(afRoughMSE[i] > afRoughMSE[j])
2208                         {
2209                             std::swap(afRoughMSE[i], afRoughMSE[j]);
2210                             std::swap(auShape[i], auShape[j]);
2211                         }
2212                     }
2213                 }
2214 
2215                 for(size_t i = 0; i < uItems && fMSEBest > 0; i++)
2216                 {
2217                     float fMSE = Refine(&EP, auShape[i], r, im);
2218                     if(fMSE < fMSEBest)
2219                     {
2220                         final = *this;
2221                         fMSEBest = fMSE;
2222                     }
2223                 }
2224             }
2225 
2226             switch(r)
2227             {
2228             case 1: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].r, EP.aLDRPixels[i].a); break;
2229             case 2: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].g, EP.aLDRPixels[i].a); break;
2230             case 3: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].b, EP.aLDRPixels[i].a); break;
2231             }
2232         }
2233     }
2234 
2235     *this = final;
2236 }
2237 
2238 
2239 //-------------------------------------------------------------------------------------
2240 _Use_decl_annotations_
GeneratePaletteQuantized(const EncodeParams * pEP,size_t uIndexMode,const LDREndPntPair & endPts,LDRColorA aPalette[]) const2241 void D3DX_BC7::GeneratePaletteQuantized(const EncodeParams* pEP, size_t uIndexMode, const LDREndPntPair& endPts, LDRColorA aPalette[]) const
2242 {
2243     assert( pEP );
2244     const size_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec;
2245     const size_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2;
2246     const size_t uNumIndices = size_t(1) << uIndexPrec;
2247     const size_t uNumIndices2 = size_t(1) << uIndexPrec2;
2248     assert( uNumIndices > 0 && uNumIndices2 > 0 );
2249     _Analysis_assume_( uNumIndices > 0 && uNumIndices2 > 0 );
2250     assert( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) );
2251     _Analysis_assume_( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) );
2252 
2253     LDRColorA a = Unquantize(endPts.A, ms_aInfo[pEP->uMode].RGBAPrecWithP);
2254     LDRColorA b = Unquantize(endPts.B, ms_aInfo[pEP->uMode].RGBAPrecWithP);
2255     if(uIndexPrec2 == 0)
2256     {
2257         for(register size_t i = 0; i < uNumIndices; i++)
2258             LDRColorA::Interpolate(a, b, i, i, uIndexPrec, uIndexPrec, aPalette[i]);
2259     }
2260     else
2261     {
2262         for(register size_t i = 0; i < uNumIndices; i++)
2263             LDRColorA::InterpolateRGB(a, b, i, uIndexPrec, aPalette[i]);
2264         for(register size_t i = 0; i < uNumIndices2; i++)
2265             LDRColorA::InterpolateA(a, b, i, uIndexPrec2, aPalette[i]);
2266     }
2267 }
2268 
2269 _Use_decl_annotations_
PerturbOne(const EncodeParams * pEP,const LDRColorA aColors[],size_t np,size_t uIndexMode,size_t ch,const LDREndPntPair & oldEndPts,LDREndPntPair & newEndPts,float fOldErr,uint8_t do_b) const2270 float D3DX_BC7::PerturbOne(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode, size_t ch,
2271                            const LDREndPntPair &oldEndPts, LDREndPntPair &newEndPts, float fOldErr, uint8_t do_b) const
2272 {
2273     assert( pEP );
2274     const int prec = ms_aInfo[pEP->uMode].RGBAPrecWithP[ch];
2275     LDREndPntPair tmp_endPts = newEndPts = oldEndPts;
2276     float fMinErr = fOldErr;
2277     uint8_t* pnew_c = (do_b ? &newEndPts.B[ch] : &newEndPts.A[ch]);
2278     uint8_t* ptmp_c = (do_b ? &tmp_endPts.B[ch] : &tmp_endPts.A[ch]);
2279 
2280     // do a logarithmic search for the best error for this endpoint (which)
2281     for(int step = 1 << (prec-1); step; step >>= 1)
2282     {
2283         bool bImproved = false;
2284         int beststep = 0;
2285         for(int sign = -1; sign <= 1; sign += 2)
2286         {
2287             int tmp = int(*pnew_c) + sign * step;
2288             if(tmp < 0 || tmp >= (1 << prec))
2289                 continue;
2290             else
2291                 *ptmp_c = (uint8_t) tmp;
2292 
2293             float fTotalErr = MapColors(pEP, aColors, np, uIndexMode, tmp_endPts, fMinErr);
2294             if(fTotalErr < fMinErr)
2295             {
2296                 bImproved = true;
2297                 fMinErr = fTotalErr;
2298                 beststep = sign * step;
2299             }
2300         }
2301 
2302         // if this was an improvement, move the endpoint and continue search from there
2303         if(bImproved)
2304             *pnew_c = uint8_t(int(*pnew_c) + beststep);
2305     }
2306     return fMinErr;
2307 }
2308 
2309 // perturb the endpoints at least -3 to 3.
2310 // always ensure endpoint ordering is preserved (no need to overlap the scan)
2311 _Use_decl_annotations_
Exhaustive(const EncodeParams * pEP,const LDRColorA aColors[],size_t np,size_t uIndexMode,size_t ch,float & fOrgErr,LDREndPntPair & optEndPt) const2312 void D3DX_BC7::Exhaustive(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode, size_t ch,
2313                           float& fOrgErr, LDREndPntPair& optEndPt) const
2314 {
2315     assert( pEP );
2316     const uint8_t uPrec = ms_aInfo[pEP->uMode].RGBAPrecWithP[ch];
2317     LDREndPntPair tmpEndPt;
2318     if(fOrgErr == 0)
2319         return;
2320 
2321     int delta = 5;
2322 
2323     // ok figure out the range of A and B
2324     tmpEndPt = optEndPt;
2325     int alow = std::max<int>(0, int(optEndPt.A[ch]) - delta);
2326     int ahigh = std::min<int>((1 << uPrec) - 1, int(optEndPt.A[ch]) + delta);
2327     int blow = std::max<int>(0, int(optEndPt.B[ch]) - delta);
2328     int bhigh = std::min<int>((1 << uPrec) - 1, int(optEndPt.B[ch]) + delta);
2329     int amin = 0;
2330     int bmin = 0;
2331 
2332     float fBestErr = fOrgErr;
2333     if(optEndPt.A[ch] <= optEndPt.B[ch])
2334     {
2335         // keep a <= b
2336         for(int a = alow; a <= ahigh; ++a)
2337         {
2338             for(int b = std::max<int>(a, blow); b < bhigh; ++b)
2339             {
2340                 tmpEndPt.A[ch] = (uint8_t) a;
2341                 tmpEndPt.B[ch] = (uint8_t) b;
2342 
2343                 float fErr = MapColors(pEP, aColors, np, uIndexMode, tmpEndPt, fBestErr);
2344                 if(fErr < fBestErr)
2345                 {
2346                     amin = a;
2347                     bmin = b;
2348                     fBestErr = fErr;
2349                 }
2350             }
2351         }
2352     }
2353     else
2354     {
2355         // keep b <= a
2356         for(int b = blow; b < bhigh; ++b)
2357         {
2358             for(int a = std::max<int>(b, alow); a <= ahigh; ++a)
2359             {
2360                 tmpEndPt.A[ch] = (uint8_t) a;
2361                 tmpEndPt.B[ch] = (uint8_t) b;
2362 
2363                 float fErr = MapColors(pEP, aColors, np, uIndexMode, tmpEndPt, fBestErr);
2364                 if(fErr < fBestErr)
2365                 {
2366                     amin = a;
2367                     bmin = b;
2368                     fBestErr = fErr;
2369                 }
2370             }
2371         }
2372     }
2373 
2374     if(fBestErr < fOrgErr)
2375     {
2376         optEndPt.A[ch] = (uint8_t) amin;
2377         optEndPt.B[ch] = (uint8_t) bmin;
2378         fOrgErr = fBestErr;
2379     }
2380 }
2381 
2382 _Use_decl_annotations_
OptimizeOne(const EncodeParams * pEP,const LDRColorA aColors[],size_t np,size_t uIndexMode,float fOrgErr,const LDREndPntPair & org,LDREndPntPair & opt) const2383 void D3DX_BC7::OptimizeOne(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode,
2384                            float fOrgErr, const LDREndPntPair& org, LDREndPntPair& opt) const
2385 {
2386     assert( pEP );
2387 
2388     float fOptErr = fOrgErr;
2389     opt = org;
2390 
2391     LDREndPntPair new_a, new_b;
2392     LDREndPntPair newEndPts;
2393     uint8_t do_b;
2394 
2395     // now optimize each channel separately
2396     for(size_t ch = 0; ch < BC7_NUM_CHANNELS; ++ch)
2397     {
2398         if(ms_aInfo[pEP->uMode].RGBAPrecWithP[ch] == 0)
2399             continue;
2400 
2401         // figure out which endpoint when perturbed gives the most improvement and start there
2402         // if we just alternate, we can easily end up in a local minima
2403         float fErr0 = PerturbOne(pEP, aColors, np, uIndexMode, ch, opt, new_a, fOptErr, 0);	// perturb endpt A
2404         float fErr1 = PerturbOne(pEP, aColors, np, uIndexMode, ch, opt, new_b, fOptErr, 1);	// perturb endpt B
2405 
2406         uint8_t& copt_a = opt.A[ch];
2407         uint8_t& copt_b = opt.B[ch];
2408         uint8_t& cnew_a = new_a.A[ch];
2409         uint8_t& cnew_b = new_a.B[ch];
2410 
2411         if(fErr0 < fErr1)
2412         {
2413             if(fErr0 >= fOptErr)
2414                 continue;
2415             copt_a = cnew_a;
2416             fOptErr = fErr0;
2417             do_b = 1;		// do B next
2418         }
2419         else
2420         {
2421             if(fErr1 >= fOptErr)
2422                 continue;
2423             copt_b = cnew_b;
2424             fOptErr = fErr1;
2425             do_b = 0;		// do A next
2426         }
2427 
2428         // now alternate endpoints and keep trying until there is no improvement
2429         for( ; ; )
2430         {
2431             float fErr = PerturbOne(pEP, aColors, np, uIndexMode, ch, opt, newEndPts, fOptErr, do_b);
2432             if(fErr >= fOptErr)
2433                 break;
2434             if(do_b == 0)
2435                 copt_a = cnew_a;
2436             else
2437                 copt_b = cnew_b;
2438             fOptErr = fErr;
2439             do_b = 1 - do_b;	// now move the other endpoint
2440         }
2441     }
2442 
2443     // finally, do a small exhaustive search around what we think is the global minima to be sure
2444     for(size_t ch = 0; ch < BC7_NUM_CHANNELS; ch++)
2445         Exhaustive(pEP, aColors, np, uIndexMode, ch, fOptErr, opt);
2446 }
2447 
2448 _Use_decl_annotations_
OptimizeEndPoints(const EncodeParams * pEP,size_t uShape,size_t uIndexMode,const float afOrgErr[],const LDREndPntPair aOrgEndPts[],LDREndPntPair aOptEndPts[]) const2449 void D3DX_BC7::OptimizeEndPoints(const EncodeParams* pEP, size_t uShape, size_t uIndexMode, const float afOrgErr[],
2450                                  const LDREndPntPair aOrgEndPts[], LDREndPntPair aOptEndPts[]) const
2451 {
2452     assert( pEP );
2453     const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
2454     assert( uPartitions < BC7_MAX_REGIONS && uShape < BC7_MAX_SHAPES );
2455     _Analysis_assume_( uPartitions < BC7_MAX_REGIONS && uShape < BC7_MAX_SHAPES );
2456 
2457     LDRColorA aPixels[NUM_PIXELS_PER_BLOCK];
2458 
2459     for(size_t p = 0; p <= uPartitions; ++p)
2460     {
2461         // collect the pixels in the region
2462         size_t np = 0;
2463         for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
2464             if(g_aPartitionTable[uPartitions][uShape][i] == p)
2465                 aPixels[np++] = pEP->aLDRPixels[i];
2466 
2467         OptimizeOne(pEP, aPixels, np, uIndexMode, afOrgErr[p], aOrgEndPts[p], aOptEndPts[p]);
2468     }
2469 }
2470 
2471 _Use_decl_annotations_
AssignIndices(const EncodeParams * pEP,size_t uShape,size_t uIndexMode,LDREndPntPair endPts[],size_t aIndices[],size_t aIndices2[],float afTotErr[]) const2472 void D3DX_BC7::AssignIndices(const EncodeParams* pEP, size_t uShape, size_t uIndexMode, LDREndPntPair endPts[], size_t aIndices[], size_t aIndices2[],
2473                              float afTotErr[]) const
2474 {
2475     assert( pEP );
2476     assert( uShape < BC7_MAX_SHAPES );
2477     _Analysis_assume_( uShape < BC7_MAX_SHAPES );
2478 
2479     const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
2480     assert( uPartitions < BC7_MAX_REGIONS );
2481     _Analysis_assume_( uPartitions < BC7_MAX_REGIONS );
2482 
2483     const uint8_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec;
2484     const uint8_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2;
2485     const uint8_t uNumIndices = 1 << uIndexPrec;
2486     const uint8_t uNumIndices2 = 1 << uIndexPrec2;
2487 
2488     assert( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) );
2489     _Analysis_assume_( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) );
2490 
2491     const uint8_t uHighestIndexBit = uNumIndices >> 1;
2492     const uint8_t uHighestIndexBit2 = uNumIndices2 >> 1;
2493     LDRColorA aPalette[BC7_MAX_REGIONS][BC7_MAX_INDICES];
2494 
2495     // build list of possibles
2496     for(size_t p = 0; p <= uPartitions; p++)
2497     {
2498         GeneratePaletteQuantized(pEP, uIndexMode, endPts[p], aPalette[p]);
2499         afTotErr[p] = 0;
2500     }
2501 
2502     for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2503     {
2504         uint8_t uRegion = g_aPartitionTable[uPartitions][uShape][i];
2505         assert( uRegion < BC7_MAX_REGIONS );
2506         _Analysis_assume_( uRegion < BC7_MAX_REGIONS );
2507         afTotErr[uRegion] += ComputeError(pEP->aLDRPixels[i], aPalette[uRegion], uIndexPrec, uIndexPrec2, &(aIndices[i]), &(aIndices2[i]));
2508     }
2509 
2510     // swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
2511     if(uIndexPrec2 == 0)
2512     {
2513         for(register size_t p = 0; p <= uPartitions; p++)
2514         {
2515             if(aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit)
2516             {
2517                 std::swap(endPts[p].A, endPts[p].B);
2518                 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2519                     if(g_aPartitionTable[uPartitions][uShape][i] == p)
2520                         aIndices[i] = uNumIndices - 1 - aIndices[i];
2521             }
2522             assert((aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit) == 0);
2523         }
2524     }
2525     else
2526     {
2527         for(register size_t p = 0; p <= uPartitions; p++)
2528         {
2529             if(aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit)
2530             {
2531                 std::swap(endPts[p].A.r, endPts[p].B.r);
2532                 std::swap(endPts[p].A.g, endPts[p].B.g);
2533                 std::swap(endPts[p].A.b, endPts[p].B.b);
2534                 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2535                     if(g_aPartitionTable[uPartitions][uShape][i] == p)
2536                         aIndices[i] = uNumIndices - 1 - aIndices[i];
2537             }
2538             assert((aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit) == 0);
2539 
2540             if(aIndices2[0] & uHighestIndexBit2)
2541             {
2542                 std::swap(endPts[p].A.a, endPts[p].B.a);
2543                 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2544                     aIndices2[i] = uNumIndices2 - 1 - aIndices2[i];
2545             }
2546             assert((aIndices2[0] & uHighestIndexBit2) == 0);
2547         }
2548     }
2549 }
2550 
2551 _Use_decl_annotations_
EmitBlock(const EncodeParams * pEP,size_t uShape,size_t uRotation,size_t uIndexMode,const LDREndPntPair aEndPts[],const size_t aIndex[],const size_t aIndex2[])2552 void D3DX_BC7::EmitBlock(const EncodeParams* pEP, size_t uShape, size_t uRotation, size_t uIndexMode, const LDREndPntPair aEndPts[], const size_t aIndex[], const size_t aIndex2[])
2553 {
2554     assert( pEP );
2555     const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
2556     assert( uPartitions < BC7_MAX_REGIONS );
2557     _Analysis_assume_( uPartitions < BC7_MAX_REGIONS );
2558 
2559     const size_t uPBits = ms_aInfo[pEP->uMode].uPBits;
2560     const size_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
2561     const size_t uIndexPrec2 = ms_aInfo[pEP->uMode].uIndexPrec2;
2562     const LDRColorA RGBAPrec = ms_aInfo[pEP->uMode].RGBAPrec;
2563     const LDRColorA RGBAPrecWithP = ms_aInfo[pEP->uMode].RGBAPrecWithP;
2564     register size_t i;
2565     size_t uStartBit = 0;
2566     SetBits(uStartBit, pEP->uMode, 0);
2567     SetBits(uStartBit, 1, 1);
2568     SetBits(uStartBit, ms_aInfo[pEP->uMode].uRotationBits, static_cast<uint8_t>( uRotation ));
2569     SetBits(uStartBit, ms_aInfo[pEP->uMode].uIndexModeBits, static_cast<uint8_t>( uIndexMode ));
2570     SetBits(uStartBit, ms_aInfo[pEP->uMode].uPartitionBits, static_cast<uint8_t>( uShape ));
2571 
2572     if(uPBits)
2573     {
2574         const size_t uNumEP = size_t(1 + uPartitions) << 1;
2575         uint8_t aPVote[BC7_MAX_REGIONS << 1] = {0,0,0,0,0,0};
2576         uint8_t aCount[BC7_MAX_REGIONS << 1] = {0,0,0,0,0,0};
2577         for(uint8_t ch = 0; ch < BC7_NUM_CHANNELS; ch++)
2578         {
2579             uint8_t ep = 0;
2580             for(i = 0; i <= uPartitions; i++)
2581             {
2582                 if(RGBAPrec[ch] == RGBAPrecWithP[ch])
2583                 {
2584                     SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].A[ch]);
2585                     SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].B[ch]);
2586                 }
2587                 else
2588                 {
2589                     SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].A[ch] >> 1);
2590                     SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].B[ch] >> 1);
2591                     size_t idx = ep++ * uPBits / uNumEP;
2592                     assert(idx < (BC7_MAX_REGIONS << 1));
2593                     _Analysis_assume_(idx < (BC7_MAX_REGIONS << 1));
2594                     aPVote[idx] += aEndPts[i].A[ch] & 0x01;
2595                     aCount[idx]++;
2596                     idx = ep++ * uPBits / uNumEP;
2597                     assert(idx < (BC7_MAX_REGIONS << 1));
2598                     _Analysis_assume_(idx < (BC7_MAX_REGIONS << 1));
2599                     aPVote[idx] += aEndPts[i].B[ch] & 0x01;
2600                     aCount[idx]++;
2601                 }
2602             }
2603         }
2604 
2605         for(i = 0; i < uPBits; i++)
2606         {
2607             SetBits(uStartBit, 1, aPVote[i] > (aCount[i] >> 1) ? 1 : 0);
2608         }
2609     }
2610     else
2611     {
2612         for(size_t ch = 0; ch < BC7_NUM_CHANNELS; ch++)
2613         {
2614             for(i = 0; i <= uPartitions; i++)
2615             {
2616                 SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].A[ch] );
2617                 SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].B[ch] );
2618             }
2619         }
2620     }
2621 
2622     const size_t* aI1 = uIndexMode ? aIndex2 : aIndex;
2623     const size_t* aI2 = uIndexMode ? aIndex : aIndex2;
2624     for(i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2625     {
2626         if(IsFixUpOffset(ms_aInfo[pEP->uMode].uPartitions, uShape, i))
2627             SetBits(uStartBit, uIndexPrec - 1, static_cast<uint8_t>( aI1[i] ));
2628         else
2629             SetBits(uStartBit, uIndexPrec, static_cast<uint8_t>( aI1[i] ));
2630     }
2631     if(uIndexPrec2)
2632         for(i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2633             SetBits(uStartBit, i ? uIndexPrec2 : uIndexPrec2 - 1, static_cast<uint8_t>( aI2[i] ));
2634 
2635     assert(uStartBit == 128);
2636 }
2637 
2638 _Use_decl_annotations_
Refine(const EncodeParams * pEP,size_t uShape,size_t uRotation,size_t uIndexMode)2639 float D3DX_BC7::Refine(const EncodeParams* pEP, size_t uShape, size_t uRotation, size_t uIndexMode)
2640 {
2641     assert( pEP );
2642     assert( uShape < BC7_MAX_SHAPES );
2643     _Analysis_assume_( uShape < BC7_MAX_SHAPES );
2644     const LDREndPntPair* aEndPts = pEP->aEndPts[uShape];
2645 
2646     const size_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
2647     assert( uPartitions < BC7_MAX_REGIONS );
2648     _Analysis_assume_( uPartitions < BC7_MAX_REGIONS );
2649 
2650     LDREndPntPair aOrgEndPts[BC7_MAX_REGIONS];
2651     LDREndPntPair aOptEndPts[BC7_MAX_REGIONS];
2652     size_t aOrgIdx[NUM_PIXELS_PER_BLOCK];
2653     size_t aOrgIdx2[NUM_PIXELS_PER_BLOCK];
2654     size_t aOptIdx[NUM_PIXELS_PER_BLOCK];
2655     size_t aOptIdx2[NUM_PIXELS_PER_BLOCK];
2656     float aOrgErr[BC7_MAX_REGIONS];
2657     float aOptErr[BC7_MAX_REGIONS];
2658 
2659     for(register size_t p = 0; p <= uPartitions; p++)
2660     {
2661         aOrgEndPts[p].A = Quantize(aEndPts[p].A, ms_aInfo[pEP->uMode].RGBAPrecWithP);
2662         aOrgEndPts[p].B = Quantize(aEndPts[p].B, ms_aInfo[pEP->uMode].RGBAPrecWithP);
2663     }
2664 
2665     AssignIndices(pEP, uShape, uIndexMode, aOrgEndPts, aOrgIdx, aOrgIdx2, aOrgErr);
2666     OptimizeEndPoints(pEP, uShape, uIndexMode, aOrgErr, aOrgEndPts, aOptEndPts);
2667     AssignIndices(pEP, uShape, uIndexMode, aOptEndPts, aOptIdx, aOptIdx2, aOptErr);
2668 
2669     float fOrgTotErr = 0, fOptTotErr = 0;
2670     for(register size_t p = 0; p <= uPartitions; p++)
2671     {
2672         fOrgTotErr += aOrgErr[p];
2673         fOptTotErr += aOptErr[p];
2674     }
2675     if(fOptTotErr < fOrgTotErr)
2676     {
2677         EmitBlock(pEP, uShape, uRotation, uIndexMode, aOptEndPts, aOptIdx, aOptIdx2);
2678         return fOptTotErr;
2679     }
2680     else
2681     {
2682         EmitBlock(pEP, uShape, uRotation, uIndexMode, aOrgEndPts, aOrgIdx, aOrgIdx2);
2683         return fOrgTotErr;
2684     }
2685 }
2686 
2687 _Use_decl_annotations_
MapColors(const EncodeParams * pEP,const LDRColorA aColors[],size_t np,size_t uIndexMode,const LDREndPntPair & endPts,float fMinErr) const2688 float D3DX_BC7::MapColors(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode, const LDREndPntPair& endPts, float fMinErr) const
2689 {
2690     assert( pEP );
2691     const uint8_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec;
2692     const uint8_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2;
2693     LDRColorA aPalette[BC7_MAX_INDICES];
2694     float fTotalErr = 0;
2695 
2696     GeneratePaletteQuantized(pEP, uIndexMode, endPts, aPalette);
2697     for(register size_t i = 0; i < np; ++i)
2698     {
2699         fTotalErr += ComputeError(aColors[i], aPalette, uIndexPrec, uIndexPrec2);
2700         if(fTotalErr > fMinErr)   // check for early exit
2701         {
2702             fTotalErr = FLT_MAX;
2703             break;
2704         }
2705     }
2706 
2707     return fTotalErr;
2708 }
2709 
2710 _Use_decl_annotations_
RoughMSE(EncodeParams * pEP,size_t uShape,size_t uIndexMode)2711 float D3DX_BC7::RoughMSE(EncodeParams* pEP, size_t uShape, size_t uIndexMode)
2712 {
2713     assert( pEP );
2714     assert( uShape < BC7_MAX_SHAPES );
2715     _Analysis_assume_( uShape < BC7_MAX_SHAPES );
2716     LDREndPntPair* aEndPts = pEP->aEndPts[uShape];
2717 
2718     const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
2719     assert( uPartitions < BC7_MAX_REGIONS );
2720     _Analysis_assume_( uPartitions < BC7_MAX_REGIONS );
2721 
2722     const uint8_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec;
2723     const uint8_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2;
2724     const uint8_t uNumIndices = 1 << uIndexPrec;
2725     const uint8_t uNumIndices2 = 1 << uIndexPrec2;
2726     size_t auPixIdx[NUM_PIXELS_PER_BLOCK];
2727     LDRColorA aPalette[BC7_MAX_REGIONS][BC7_MAX_INDICES];
2728 
2729     for(size_t p = 0; p <= uPartitions; p++)
2730     {
2731         size_t np = 0;
2732         for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2733         {
2734             if (g_aPartitionTable[uPartitions][uShape][i] == p)
2735             {
2736                 auPixIdx[np++] = i;
2737             }
2738         }
2739 
2740         // handle simple cases
2741         assert(np > 0);
2742         if(np == 1)
2743         {
2744             aEndPts[p].A = pEP->aLDRPixels[auPixIdx[0]];
2745             aEndPts[p].B = pEP->aLDRPixels[auPixIdx[0]];
2746             continue;
2747         }
2748         else if(np == 2)
2749         {
2750             aEndPts[p].A = pEP->aLDRPixels[auPixIdx[0]];
2751             aEndPts[p].B = pEP->aLDRPixels[auPixIdx[1]];
2752             continue;
2753         }
2754 
2755         if(uIndexPrec2 == 0)
2756         {
2757             HDRColorA epA, epB;
2758             OptimizeRGBA(pEP->aHDRPixels, &epA, &epB, 4, np, auPixIdx);
2759             epA.Clamp(0.0f, 1.0f);
2760             epB.Clamp(0.0f, 1.0f);
2761             epA *= 255.0f;
2762             epB *= 255.0f;
2763             aEndPts[p].A = epA.ToLDRColorA();
2764             aEndPts[p].B = epB.ToLDRColorA();
2765         }
2766         else
2767         {
2768             uint8_t uMinAlpha = 255, uMaxAlpha = 0;
2769             for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
2770             {
2771                 uMinAlpha = std::min<uint8_t>(uMinAlpha, pEP->aLDRPixels[auPixIdx[i]].a);
2772                 uMaxAlpha = std::max<uint8_t>(uMaxAlpha, pEP->aLDRPixels[auPixIdx[i]].a);
2773             }
2774 
2775             HDRColorA epA, epB;
2776             OptimizeRGB(pEP->aHDRPixels, &epA, &epB, 4, np, auPixIdx);
2777             epA.Clamp(0.0f, 1.0f);
2778             epB.Clamp(0.0f, 1.0f);
2779             epA *= 255.0f;
2780             epB *= 255.0f;
2781             aEndPts[p].A = epA.ToLDRColorA();
2782             aEndPts[p].B = epB.ToLDRColorA();
2783             aEndPts[p].A.a = uMinAlpha;
2784             aEndPts[p].B.a = uMaxAlpha;
2785         }
2786     }
2787 
2788     if(uIndexPrec2 == 0)
2789     {
2790         for(size_t p = 0; p <= uPartitions; p++)
2791             for(register size_t i = 0; i < uNumIndices; i++)
2792                 LDRColorA::Interpolate(aEndPts[p].A, aEndPts[p].B, i, i, uIndexPrec, uIndexPrec, aPalette[p][i]);
2793     }
2794     else
2795     {
2796         for(size_t p = 0; p <= uPartitions; p++)
2797         {
2798             for(register size_t i = 0; i < uNumIndices; i++)
2799                 LDRColorA::InterpolateRGB(aEndPts[p].A, aEndPts[p].B, i, uIndexPrec, aPalette[p][i]);
2800             for(register size_t i = 0; i < uNumIndices2; i++)
2801                 LDRColorA::InterpolateA(aEndPts[p].A, aEndPts[p].B, i, uIndexPrec2, aPalette[p][i]);
2802         }
2803     }
2804 
2805     float fTotalErr = 0;
2806     for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2807     {
2808         uint8_t uRegion = g_aPartitionTable[uPartitions][uShape][i];
2809         fTotalErr += ComputeError(pEP->aLDRPixels[i], aPalette[uRegion], uIndexPrec, uIndexPrec2);
2810     }
2811 
2812     return fTotalErr;
2813 }
2814 
2815 //=====================================================================================
2816 // Entry points
2817 //=====================================================================================
2818 
2819 //-------------------------------------------------------------------------------------
2820 // BC6H Compression
2821 //-------------------------------------------------------------------------------------
2822 _Use_decl_annotations_
D3DXDecodeBC6HU(XMVECTOR * pColor,const uint8_t * pBC)2823 void D3DXDecodeBC6HU(XMVECTOR *pColor, const uint8_t *pBC)
2824 {
2825     assert( pColor && pBC );
2826     static_assert( sizeof(D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes" );
2827     reinterpret_cast< const D3DX_BC6H* >( pBC )->Decode(false, reinterpret_cast<HDRColorA*>(pColor));
2828 }
2829 
2830 _Use_decl_annotations_
D3DXDecodeBC6HS(XMVECTOR * pColor,const uint8_t * pBC)2831 void D3DXDecodeBC6HS(XMVECTOR *pColor, const uint8_t *pBC)
2832 {
2833     assert( pColor && pBC );
2834     static_assert( sizeof(D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes" );
2835     reinterpret_cast< const D3DX_BC6H* >( pBC )->Decode(true, reinterpret_cast<HDRColorA*>(pColor));
2836 }
2837 
2838 _Use_decl_annotations_
D3DXEncodeBC6HU(uint8_t * pBC,const XMVECTOR * pColor,DWORD flags)2839 void D3DXEncodeBC6HU(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags)
2840 {
2841     UNREFERENCED_PARAMETER(flags);
2842     assert( pBC && pColor );
2843     static_assert( sizeof(D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes" );
2844     reinterpret_cast< D3DX_BC6H* >( pBC )->Encode(false, reinterpret_cast<const HDRColorA*>(pColor));
2845 }
2846 
2847 _Use_decl_annotations_
D3DXEncodeBC6HS(uint8_t * pBC,const XMVECTOR * pColor,DWORD flags)2848 void D3DXEncodeBC6HS(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags)
2849 {
2850     UNREFERENCED_PARAMETER(flags);
2851     assert( pBC && pColor );
2852     static_assert( sizeof(D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes" );
2853     reinterpret_cast< D3DX_BC6H* >( pBC )->Encode(true, reinterpret_cast<const HDRColorA*>(pColor));
2854 }
2855 
2856 
2857 //-------------------------------------------------------------------------------------
2858 // BC7 Compression
2859 //-------------------------------------------------------------------------------------
2860 _Use_decl_annotations_
D3DXDecodeBC7(XMVECTOR * pColor,const uint8_t * pBC)2861 void D3DXDecodeBC7(XMVECTOR *pColor, const uint8_t *pBC)
2862 {
2863     assert( pColor && pBC );
2864     static_assert( sizeof(D3DX_BC7) == 16, "D3DX_BC7 should be 16 bytes" );
2865     reinterpret_cast< const D3DX_BC7* >( pBC )->Decode(reinterpret_cast<HDRColorA*>(pColor));
2866 }
2867 
2868 _Use_decl_annotations_
D3DXEncodeBC7(uint8_t * pBC,const XMVECTOR * pColor,DWORD flags)2869 void D3DXEncodeBC7(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags)
2870 {
2871     assert( pBC && pColor );
2872     static_assert( sizeof(D3DX_BC7) == 16, "D3DX_BC7 should be 16 bytes" );
2873     reinterpret_cast< D3DX_BC7* >( pBC )->Encode( !(flags& BC_FLAGS_USE_3SUBSETS), reinterpret_cast<const HDRColorA*>(pColor));
2874 }
2875 
2876 } // namespace
2877