1 //-------------------------------------------------------------------------------------
2 // BC6HBC7.cpp
3 //
4 // Block-compression (BC) functionality for BC6H and BC7 (DirectX 11 texture compression)
5 //
6 // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
7 // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
9 // PARTICULAR PURPOSE.
10 //
11 // Copyright (c) Microsoft Corporation. All rights reserved.
12 //
13 // http://go.microsoft.com/fwlink/?LinkId=248926
14 //-------------------------------------------------------------------------------------
15
16 #include "DirectXTexP.h"
17
18 #include "BC.h"
19
20 #ifndef USE_XNAMATH
21 using namespace DirectX::PackedVector;
22 #endif
23
24 namespace DirectX
25 {
26
27 //-------------------------------------------------------------------------------------
28 // Constants
29 //-------------------------------------------------------------------------------------
30
31 static const float fEpsilon = (0.25f / 64.0f) * (0.25f / 64.0f);
32 static const float pC3[] = { 2.0f/2.0f, 1.0f/2.0f, 0.0f/2.0f };
33 static const float pD3[] = { 0.0f/2.0f, 1.0f/2.0f, 2.0f/2.0f };
34 static const float pC4[] = { 3.0f/3.0f, 2.0f/3.0f, 1.0f/3.0f, 0.0f/3.0f };
35 static const float pD4[] = { 0.0f/3.0f, 1.0f/3.0f, 2.0f/3.0f, 3.0f/3.0f };
36
37 const int g_aWeights2[] = {0, 21, 43, 64};
38 const int g_aWeights3[] = {0, 9, 18, 27, 37, 46, 55, 64};
39 const int g_aWeights4[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64};
40
41 // Partition, Shape, Pixel (index into 4x4 block)
42 static const uint8_t g_aPartitionTable[3][64][16] =
43 {
44 { // 1 Region case has no subsets (all 0)
45 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
46 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
47 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
48 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
49 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
50 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
51 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
52 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
53 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
54 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
55 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
56 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
57 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
58 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
59 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
60 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
61 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
62 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
63 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
64 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
65 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
66 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
67 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
68 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
69 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
70 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
71 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
72 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
73 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
74 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
75 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
76 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
77 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
78 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
79 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
80 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
81 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
82 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
83 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
84 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
85 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
86 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
87 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
88 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
89 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
90 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
91 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
92 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
93 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
94 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
95 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
96 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
97 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
98 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
99 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
100 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
101 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
102 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
103 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
104 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
105 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
106 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
107 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
108 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
109 },
110
111 { // BC6H/BC7 Partition Set for 2 Subsets
112 { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, // Shape 0
113 { 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1 }, // Shape 1
114 { 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 }, // Shape 2
115 { 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, // Shape 3
116 { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1 }, // Shape 4
117 { 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // Shape 5
118 { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // Shape 6
119 { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, // Shape 7
120 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1 }, // Shape 8
121 { 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 9
122 { 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // Shape 10
123 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1 }, // Shape 11
124 { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 12
125 { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 13
126 { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 14
127 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1 }, // Shape 15
128 { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1 }, // Shape 16
129 { 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, // Shape 17
130 { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0 }, // Shape 18
131 { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0 }, // Shape 19
132 { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, // Shape 20
133 { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 }, // Shape 21
134 { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 }, // Shape 22
135 { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1 }, // Shape 23
136 { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0 }, // Shape 24
137 { 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 }, // Shape 25
138 { 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0 }, // Shape 26
139 { 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0 }, // Shape 27
140 { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0 }, // Shape 28
141 { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, // Shape 29
142 { 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0 }, // Shape 30
143 { 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0 }, // Shape 31
144
145 // BC7 Partition Set for 2 Subsets (second-half)
146 { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, // Shape 32
147 { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1 }, // Shape 33
148 { 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0 }, // Shape 34
149 { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0 }, // Shape 35
150 { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0 }, // Shape 36
151 { 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0 }, // Shape 37
152 { 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 }, // Shape 38
153 { 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1 }, // Shape 39
154 { 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0 }, // Shape 40
155 { 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0 }, // Shape 41
156 { 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0 }, // Shape 42
157 { 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0 }, // Shape 43
158 { 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0 }, // Shape 44
159 { 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1 }, // Shape 45
160 { 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1 }, // Shape 46
161 { 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, // Shape 47
162 { 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, // Shape 48
163 { 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0 }, // Shape 49
164 { 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0 }, // Shape 50
165 { 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0 }, // Shape 51
166 { 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1 }, // Shape 52
167 { 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1 }, // Shape 53
168 { 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0 }, // Shape 54
169 { 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0 }, // Shape 55
170 { 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1 }, // Shape 56
171 { 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1 }, // Shape 57
172 { 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1 }, // Shape 58
173 { 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1 }, // Shape 59
174 { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, // Shape 60
175 { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, // Shape 61
176 { 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0 }, // Shape 62
177 { 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1 } // Shape 63
178 },
179
180 { // BC7 Partition Set for 3 Subsets
181 { 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 1, 2, 2, 2, 2 }, // Shape 0
182 { 0, 0, 0, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1 }, // Shape 1
183 { 0, 0, 0, 0, 2, 0, 0, 1, 2, 2, 1, 1, 2, 2, 1, 1 }, // Shape 2
184 { 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1 }, // Shape 3
185 { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2 }, // Shape 4
186 { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 2, 2 }, // Shape 5
187 { 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 6
188 { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1 }, // Shape 7
189 { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 }, // Shape 8
190 { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2 }, // Shape 9
191 { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2 }, // Shape 10
192 { 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2 }, // Shape 11
193 { 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2 }, // Shape 12
194 { 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2 }, // Shape 13
195 { 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2 }, // Shape 14
196 { 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0 }, // Shape 15
197 { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2 }, // Shape 16
198 { 0, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0 }, // Shape 17
199 { 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2 }, // Shape 18
200 { 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1 }, // Shape 19
201 { 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2 }, // Shape 20
202 { 0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 1 }, // Shape 21
203 { 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2 }, // Shape 22
204 { 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 1, 0 }, // Shape 23
205 { 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0 }, // Shape 24
206 { 0, 0, 1, 2, 0, 0, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2 }, // Shape 25
207 { 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1, 0, 1, 1, 0 }, // Shape 26
208 { 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1 }, // Shape 27
209 { 0, 0, 2, 2, 1, 1, 0, 2, 1, 1, 0, 2, 0, 0, 2, 2 }, // Shape 28
210 { 0, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0, 2, 2, 2, 2, 2 }, // Shape 29
211 { 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1 }, // Shape 30
212 { 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 2, 1 }, // Shape 31
213 { 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 2, 2, 2 }, // Shape 32
214 { 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 0, 1, 1 }, // Shape 33
215 { 0, 0, 1, 1, 0, 0, 1, 2, 0, 0, 2, 2, 0, 2, 2, 2 }, // Shape 34
216 { 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0 }, // Shape 35
217 { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0 }, // Shape 36
218 { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 }, // Shape 37
219 { 0, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 0, 1, 2, 0 }, // Shape 38
220 { 0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1 }, // Shape 39
221 { 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1 }, // Shape 40
222 { 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2 }, // Shape 41
223 { 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1 }, // Shape 42
224 { 0, 0, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 2, 2 }, // Shape 43
225 { 0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1 }, // Shape 44
226 { 0, 2, 2, 0, 1, 2, 2, 1, 0, 2, 2, 0, 1, 2, 2, 1 }, // Shape 45
227 { 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1 }, // Shape 46
228 { 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1 }, // Shape 47
229 { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2 }, // Shape 48
230 { 0, 2, 2, 2, 0, 1, 1, 1, 0, 2, 2, 2, 0, 1, 1, 1 }, // Shape 49
231 { 0, 0, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 1, 1, 1, 2 }, // Shape 50
232 { 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2 }, // Shape 51
233 { 0, 2, 2, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2 }, // Shape 52
234 { 0, 0, 0, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2 }, // Shape 53
235 { 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2 }, // Shape 54
236 { 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2 }, // Shape 55
237 { 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2 }, // Shape 56
238 { 0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2 }, // Shape 57
239 { 0, 0, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2 }, // Shape 58
240 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2 }, // Shape 59
241 { 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1 }, // Shape 60
242 { 0, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2 }, // Shape 61
243 { 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, // Shape 62
244 { 0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0 } // Shape 63
245 }
246 };
247
248 // Partition, Shape, Fixup
249 static const uint8_t g_aFixUp[3][64][3] =
250 {
251 { // No fix-ups for 1st subset for BC6H or BC7
252 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
253 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
254 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
255 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
256 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
257 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
258 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
259 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
260 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
261 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
262 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
263 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
264 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
265 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
266 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
267 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}
268 },
269
270 { // BC6H/BC7 Partition Set Fixups for 2 Subsets
271 { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
272 { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
273 { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
274 { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
275 { 0,15, 0}, { 0, 2, 0}, { 0, 8, 0}, { 0, 2, 0},
276 { 0, 2, 0}, { 0, 8, 0}, { 0, 8, 0}, { 0,15, 0},
277 { 0, 2, 0}, { 0, 8, 0}, { 0, 2, 0}, { 0, 2, 0},
278 { 0, 8, 0}, { 0, 8, 0}, { 0, 2, 0}, { 0, 2, 0},
279
280 // BC7 Partition Set Fixups for 2 Subsets (second-half)
281 { 0,15, 0}, { 0,15, 0}, { 0, 6, 0}, { 0, 8, 0},
282 { 0, 2, 0}, { 0, 8, 0}, { 0,15, 0}, { 0,15, 0},
283 { 0, 2, 0}, { 0, 8, 0}, { 0, 2, 0}, { 0, 2, 0},
284 { 0, 2, 0}, { 0,15, 0}, { 0,15, 0}, { 0, 6, 0},
285 { 0, 6, 0}, { 0, 2, 0}, { 0, 6, 0}, { 0, 8, 0},
286 { 0,15, 0}, { 0,15, 0}, { 0, 2, 0}, { 0, 2, 0},
287 { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
288 { 0,15, 0}, { 0, 2, 0}, { 0, 2, 0}, { 0,15, 0}
289 },
290
291 { // BC7 Partition Set Fixups for 3 Subsets
292 { 0, 3,15}, { 0, 3, 8}, { 0,15, 8}, { 0,15, 3},
293 { 0, 8,15}, { 0, 3,15}, { 0,15, 3}, { 0,15, 8},
294 { 0, 8,15}, { 0, 8,15}, { 0, 6,15}, { 0, 6,15},
295 { 0, 6,15}, { 0, 5,15}, { 0, 3,15}, { 0, 3, 8},
296 { 0, 3,15}, { 0, 3, 8}, { 0, 8,15}, { 0,15, 3},
297 { 0, 3,15}, { 0, 3, 8}, { 0, 6,15}, { 0,10, 8},
298 { 0, 5, 3}, { 0, 8,15}, { 0, 8, 6}, { 0, 6,10},
299 { 0, 8,15}, { 0, 5,15}, { 0,15,10}, { 0,15, 8},
300 { 0, 8,15}, { 0,15, 3}, { 0, 3,15}, { 0, 5,10},
301 { 0, 6,10}, { 0,10, 8}, { 0, 8, 9}, { 0,15,10},
302 { 0,15, 6}, { 0, 3,15}, { 0,15, 8}, { 0, 5,15},
303 { 0,15, 3}, { 0,15, 6}, { 0,15, 6}, { 0,15, 8},
304 { 0, 3,15}, { 0,15, 3}, { 0, 5,15}, { 0, 5,15},
305 { 0, 5,15}, { 0, 8,15}, { 0, 5,15}, { 0,10,15},
306 { 0, 5,15}, { 0,10,15}, { 0, 8,15}, { 0,13,15},
307 { 0,15, 3}, { 0,12,15}, { 0, 3,15}, { 0, 3, 8}
308 }
309 };
310
311 // BC6H Compression
312 const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] =
313 {
314 { // Mode 1 (0x00) - 10 5 5 5
315 { M, 0}, { M, 1}, {GY, 4}, {BY, 4}, {BZ, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
316 {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
317 {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
318 {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
319 {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
320 {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
321 {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
322 {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
323 { D, 3}, { D, 4},
324 },
325
326 { // Mode 2 (0x01) - 7 6 6 6
327 { M, 0}, { M, 1}, {GY, 5}, {GZ, 4}, {GZ, 5}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
328 {RW, 5}, {RW, 6}, {BZ, 0}, {BZ, 1}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
329 {GW, 5}, {GW, 6}, {BY, 5}, {BZ, 2}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
330 {BW, 5}, {BW, 6}, {BZ, 3}, {BZ, 5}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
331 {RX, 5}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
332 {GX, 5}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
333 {BX, 5}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
334 {RY, 5}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {RZ, 5}, { D, 0}, { D, 1}, { D, 2},
335 { D, 3}, { D, 4},
336 },
337
338 { // Mode 3 (0x02) - 11 5 4 4
339 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
340 {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
341 {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
342 {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
343 {RW,10}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GW,10},
344 {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BW,10},
345 {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
346 {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
347 { D, 3}, { D, 4},
348 },
349
350 { // Mode 4 (0x06) - 11 4 5 4
351 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
352 {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
353 {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
354 {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RW,10},
355 {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
356 {GW,10}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BW,10},
357 {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {BZ, 0},
358 {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {GY, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
359 { D, 3}, { D, 4},
360 },
361
362 { // Mode 5 (0x0a) - 11 4 4 5
363 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
364 {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
365 {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
366 {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RW,10},
367 {BY, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GW,10},
368 {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
369 {BW,10}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {BZ, 1},
370 {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {BZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
371 { D, 3}, { D, 4},
372 },
373
374 { // Mode 6 (0x0e) - 9 5 5 5
375 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
376 {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
377 {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
378 {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
379 {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
380 {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
381 {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
382 {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
383 { D, 3}, { D, 4},
384 },
385
386 { // Mode 7 (0x12) - 8 6 5 5
387 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
388 {RW, 5}, {RW, 6}, {RW, 7}, {GZ, 4}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
389 {GW, 5}, {GW, 6}, {GW, 7}, {BZ, 2}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
390 {BW, 5}, {BW, 6}, {BW, 7}, {BZ, 3}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
391 {RX, 5}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
392 {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
393 {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
394 {RY, 5}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {RZ, 5}, { D, 0}, { D, 1}, { D, 2},
395 { D, 3}, { D, 4},
396 },
397
398 { // Mode 8 (0x16) - 8 5 6 5
399 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
400 {RW, 5}, {RW, 6}, {RW, 7}, {BZ, 0}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
401 {GW, 5}, {GW, 6}, {GW, 7}, {GY, 5}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
402 {BW, 5}, {BW, 6}, {BW, 7}, {GZ, 5}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
403 {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
404 {GX, 5}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
405 {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
406 {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
407 { D, 3}, { D, 4},
408 },
409
410 { // Mode 9 (0x1a) - 8 5 5 6
411 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
412 {RW, 5}, {RW, 6}, {RW, 7}, {BZ, 1}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
413 {GW, 5}, {GW, 6}, {GW, 7}, {BY, 5}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
414 {BW, 5}, {BW, 6}, {BW, 7}, {BZ, 5}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
415 {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
416 {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
417 {BX, 5}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
418 {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
419 { D, 3}, { D, 4},
420 },
421
422 { // Mode 10 (0x1e) - 6 6 6 6
423 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
424 {RW, 5}, {GZ, 4}, {BZ, 0}, {BZ, 1}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
425 {GW, 5}, {GY, 5}, {BY, 5}, {BZ, 2}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
426 {BW, 5}, {GZ, 5}, {BZ, 3}, {BZ, 5}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
427 {RX, 5}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
428 {GX, 5}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
429 {BX, 5}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
430 {RY, 5}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {RZ, 5}, { D, 0}, { D, 1}, { D, 2},
431 { D, 3}, { D, 4},
432 },
433
434 { // Mode 11 (0x03) - 10 10
435 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
436 {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
437 {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
438 {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
439 {RX, 5}, {RX, 6}, {RX, 7}, {RX, 8}, {RX, 9}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
440 {GX, 5}, {GX, 6}, {GX, 7}, {GX, 8}, {GX, 9}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
441 {BX, 5}, {BX, 6}, {BX, 7}, {BX, 8}, {BX, 9}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
442 {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
443 {NA, 0}, {NA, 0},
444 },
445
446 { // Mode 12 (0x07) - 11 9
447 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
448 {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
449 {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
450 {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
451 {RX, 5}, {RX, 6}, {RX, 7}, {RX, 8}, {RW,10}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
452 {GX, 5}, {GX, 6}, {GX, 7}, {GX, 8}, {GW,10}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
453 {BX, 5}, {BX, 6}, {BX, 7}, {BX, 8}, {BW,10}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
454 {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
455 {NA, 0}, {NA, 0},
456 },
457
458 { // Mode 13 (0x0b) - 12 8
459 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
460 {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
461 {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
462 {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
463 {RX, 5}, {RX, 6}, {RX, 7}, {RW,11}, {RW,10}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
464 {GX, 5}, {GX, 6}, {GX, 7}, {GW,11}, {GW,10}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
465 {BX, 5}, {BX, 6}, {BX, 7}, {BW,11}, {BW,10}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
466 {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
467 {NA, 0}, {NA, 0},
468 },
469
470 { // Mode 14 (0x0f) - 16 4
471 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
472 {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
473 {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
474 {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RW,15},
475 {RW,14}, {RW,13}, {RW,12}, {RW,11}, {RW,10}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GW,15},
476 {GW,14}, {GW,13}, {GW,12}, {GW,11}, {GW,10}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BW,15},
477 {BW,14}, {BW,13}, {BW,12}, {BW,11}, {BW,10}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
478 {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
479 {NA, 0}, {NA, 0},
480 },
481 };
482
483 // Mode, Partitions, Transformed, IndexPrec, RGBAPrec
484 const D3DX_BC6H::ModeInfo D3DX_BC6H::ms_aInfo[] =
485 {
486 {0x00, 1, true, 3, LDRColorA(10,10,10,0), LDRColorA( 5, 5, 5,0), LDRColorA(5,5,5,0), LDRColorA(5,5,5,0)}, // Mode 1
487 {0x01, 1, true, 3, LDRColorA( 7, 7, 7,0), LDRColorA( 6, 6, 6,0), LDRColorA(6,6,6,0), LDRColorA(6,6,6,0)}, // Mode 2
488 {0x02, 1, true, 3, LDRColorA(11,11,11,0), LDRColorA( 5, 4, 4,0), LDRColorA(5,4,4,0), LDRColorA(5,4,4,0)}, // Mode 3
489 {0x06, 1, true, 3, LDRColorA(11,11,11,0), LDRColorA( 4, 5, 4,0), LDRColorA(4,5,4,0), LDRColorA(4,5,4,0)}, // Mode 4
490 {0x0a, 1, true, 3, LDRColorA(11,11,11,0), LDRColorA( 4, 4, 5,0), LDRColorA(4,4,5,0), LDRColorA(4,4,5,0)}, // Mode 5
491 {0x0e, 1, true, 3, LDRColorA( 9, 9, 9,0), LDRColorA( 5, 5, 5,0), LDRColorA(5,5,5,0), LDRColorA(5,5,5,0)}, // Mode 6
492 {0x12, 1, true, 3, LDRColorA( 8, 8, 8,0), LDRColorA( 6, 5, 5,0), LDRColorA(6,5,5,0), LDRColorA(6,5,5,0)}, // Mode 7
493 {0x16, 1, true, 3, LDRColorA( 8, 8, 8,0), LDRColorA( 5, 6, 5,0), LDRColorA(5,6,5,0), LDRColorA(5,6,5,0)}, // Mode 8
494 {0x1a, 1, true, 3, LDRColorA( 8, 8, 8,0), LDRColorA( 5, 5, 6,0), LDRColorA(5,5,6,0), LDRColorA(5,5,6,0)}, // Mode 9
495 {0x1e, 1, false, 3, LDRColorA( 6, 6, 6,0), LDRColorA( 6, 6, 6,0), LDRColorA(6,6,6,0), LDRColorA(6,6,6,0)}, // Mode 10
496 {0x03, 0, false, 4, LDRColorA(10,10,10,0), LDRColorA(10,10,10,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 11
497 {0x07, 0, true, 4, LDRColorA(11,11,11,0), LDRColorA( 9, 9, 9,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 12
498 {0x0b, 0, true, 4, LDRColorA(12,12,12,0), LDRColorA( 8, 8, 8,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 13
499 {0x0f, 0, true, 4, LDRColorA(16,16,16,0), LDRColorA( 4, 4, 4,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 14
500 };
501
502 const int D3DX_BC6H::ms_aModeToInfo[] =
503 {
504 0, // Mode 1 - 0x00
505 1, // Mode 2 - 0x01
506 2, // Mode 3 - 0x02
507 10, // Mode 11 - 0x03
508 -1, // Invalid - 0x04
509 -1, // Invalid - 0x05
510 3, // Mode 4 - 0x06
511 11, // Mode 12 - 0x07
512 -1, // Invalid - 0x08
513 -1, // Invalid - 0x09
514 4, // Mode 5 - 0x0a
515 12, // Mode 13 - 0x0b
516 -1, // Invalid - 0x0c
517 -1, // Invalid - 0x0d
518 5, // Mode 6 - 0x0e
519 13, // Mode 14 - 0x0f
520 -1, // Invalid - 0x10
521 -1, // Invalid - 0x11
522 6, // Mode 7 - 0x12
523 -1, // Reserved - 0x13
524 -1, // Invalid - 0x14
525 -1, // Invalid - 0x15
526 7, // Mode 8 - 0x16
527 -1, // Reserved - 0x17
528 -1, // Invalid - 0x18
529 -1, // Invalid - 0x19
530 8, // Mode 9 - 0x1a
531 -1, // Reserved - 0x1b
532 -1, // Invalid - 0x1c
533 -1, // Invalid - 0x1d
534 9, // Mode 10 - 0x1e
535 -1, // Resreved - 0x1f
536 };
537
538 // BC7 compression: uPartitions, uPartitionBits, uPBits, uRotationBits, uIndexModeBits, uIndexPrec, uIndexPrec2, RGBAPrec, RGBAPrecWithP
539 const D3DX_BC7::ModeInfo D3DX_BC7::ms_aInfo[] =
540 {
541 {2, 4, 6, 0, 0, 3, 0, LDRColorA(4,4,4,0), LDRColorA(5,5,5,0)},
542 // Mode 0: Color only, 3 Subsets, RGBP 4441 (unique P-bit), 3-bit indecies, 16 partitions
543 {1, 6, 2, 0, 0, 3, 0, LDRColorA(6,6,6,0), LDRColorA(7,7,7,0)},
544 // Mode 1: Color only, 2 Subsets, RGBP 6661 (shared P-bit), 3-bit indecies, 64 partitions
545 {2, 6, 0, 0, 0, 2, 0, LDRColorA(5,5,5,0), LDRColorA(5,5,5,0)},
546 // Mode 2: Color only, 3 Subsets, RGB 555, 2-bit indecies, 64 partitions
547 {1, 6, 4, 0, 0, 2, 0, LDRColorA(7,7,7,0), LDRColorA(8,8,8,0)},
548 // Mode 3: Color only, 2 Subsets, RGBP 7771 (unique P-bit), 2-bits indecies, 64 partitions
549 {0, 0, 0, 2, 1, 2, 3, LDRColorA(5,5,5,6), LDRColorA(5,5,5,6)},
550 // Mode 4: Color w/ Separate Alpha, 1 Subset, RGB 555, A6, 16x2/16x3-bit indices, 2-bit rotation, 1-bit index selector
551 {0, 0, 0, 2, 0, 2, 2, LDRColorA(7,7,7,8), LDRColorA(7,7,7,8)},
552 // Mode 5: Color w/ Separate Alpha, 1 Subset, RGB 777, A8, 16x2/16x2-bit indices, 2-bit rotation
553 {0, 0, 2, 0, 0, 4, 0, LDRColorA(7,7,7,7), LDRColorA(8,8,8,8)},
554 // Mode 6: Color+Alpha, 1 Subset, RGBAP 77771 (unique P-bit), 16x4-bit indecies
555 {1, 6, 4, 0, 0, 2, 0, LDRColorA(5,5,5,5), LDRColorA(6,6,6,6)}
556 // Mode 7: Color+Alpha, 2 Subsets, RGBAP 55551 (unique P-bit), 2-bit indices, 64 partitions
557 };
558
559
560 //-------------------------------------------------------------------------------------
561 // Helper functions
562 //-------------------------------------------------------------------------------------
563 inline static bool IsFixUpOffset(_In_range_(0,2) size_t uPartitions, _In_range_(0,63) size_t uShape, _In_range_(0,15) size_t uOffset)
564 {
565 assert(uPartitions < 3 && uShape < 64 && uOffset < 16);
566 _Analysis_assume_(uPartitions < 3 && uShape < 64 && uOffset < 16);
567 for(size_t p = 0; p <= uPartitions; p++)
568 {
569 if(uOffset == g_aFixUp[uPartitions][uShape][p])
570 {
571 return true;
572 }
573 }
574 return false;
575 }
576
TransformForward(_Inout_updates_all_ (BC6H_MAX_REGIONS)INTEndPntPair aEndPts[])577 inline static void TransformForward(_Inout_updates_all_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[])
578 {
579 aEndPts[0].B -= aEndPts[0].A;
580 aEndPts[1].A -= aEndPts[0].A;
581 aEndPts[1].B -= aEndPts[0].A;
582 }
583
TransformInverse(_Inout_updates_all_ (BC6H_MAX_REGIONS)INTEndPntPair aEndPts[],_In_ const LDRColorA & Prec,_In_ bool bSigned)584 inline static void TransformInverse(_Inout_updates_all_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[], _In_ const LDRColorA& Prec, _In_ bool bSigned)
585 {
586 INTColor WrapMask((1 << Prec.r) - 1, (1 << Prec.g) - 1, (1 << Prec.b) - 1);
587 aEndPts[0].B += aEndPts[0].A; aEndPts[0].B &= WrapMask;
588 aEndPts[1].A += aEndPts[0].A; aEndPts[1].A &= WrapMask;
589 aEndPts[1].B += aEndPts[0].A; aEndPts[1].B &= WrapMask;
590 if(bSigned)
591 {
592 aEndPts[0].B.SignExtend(Prec);
593 aEndPts[1].A.SignExtend(Prec);
594 aEndPts[1].B.SignExtend(Prec);
595 }
596 }
597
Norm(_In_ const INTColor & a,_In_ const INTColor & b)598 inline static float Norm(_In_ const INTColor& a, _In_ const INTColor& b)
599 {
600 float dr = float(a.r) - float(b.r);
601 float dg = float(a.g) - float(b.g);
602 float db = float(a.b) - float(b.b);
603 return dr * dr + dg * dg + db * db;
604 }
605
606 // return # of bits needed to store n. handle signed or unsigned cases properly
NBits(_In_ int n,_In_ bool bIsSigned)607 inline static int NBits(_In_ int n, _In_ bool bIsSigned)
608 {
609 int nb;
610 if(n == 0)
611 {
612 return 0; // no bits needed for 0, signed or not
613 }
614 else if(n > 0)
615 {
616 for(nb = 0; n; ++nb, n >>= 1);
617 return nb + (bIsSigned ? 1 : 0);
618 }
619 else
620 {
621 assert(bIsSigned);
622 for(nb = 0; n < -1; ++nb, n >>= 1) ;
623 return nb + 1;
624 }
625 }
626
627
628 //-------------------------------------------------------------------------------------
OptimizeRGB(_In_reads_ (NUM_PIXELS_PER_BLOCK)const HDRColorA * const pPoints,_Out_ HDRColorA * pX,_Out_ HDRColorA * pY,_In_ size_t cSteps,_In_ size_t cPixels,_In_reads_ (cPixels)const size_t * pIndex)629 static float OptimizeRGB(_In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pPoints,
630 _Out_ HDRColorA* pX, _Out_ HDRColorA* pY,
631 _In_ size_t cSteps, _In_ size_t cPixels, _In_reads_(cPixels) const size_t* pIndex)
632 {
633 float fError = FLT_MAX;
634 const float *pC = (3 == cSteps) ? pC3 : pC4;
635 const float *pD = (3 == cSteps) ? pD3 : pD4;
636
637 // Find Min and Max points, as starting point
638 HDRColorA X(1.0f, 1.0f, 1.0f, 0.0f);
639 HDRColorA Y(0.0f, 0.0f, 0.0f, 0.0f);
640
641 for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
642 {
643 if(pPoints[pIndex[iPoint]].r < X.r) X.r = pPoints[pIndex[iPoint]].r;
644 if(pPoints[pIndex[iPoint]].g < X.g) X.g = pPoints[pIndex[iPoint]].g;
645 if(pPoints[pIndex[iPoint]].b < X.b) X.b = pPoints[pIndex[iPoint]].b;
646 if(pPoints[pIndex[iPoint]].r > Y.r) Y.r = pPoints[pIndex[iPoint]].r;
647 if(pPoints[pIndex[iPoint]].g > Y.g) Y.g = pPoints[pIndex[iPoint]].g;
648 if(pPoints[pIndex[iPoint]].b > Y.b) Y.b = pPoints[pIndex[iPoint]].b;
649 }
650
651 // Diagonal axis
652 HDRColorA AB;
653 AB.r = Y.r - X.r;
654 AB.g = Y.g - X.g;
655 AB.b = Y.b - X.b;
656
657 float fAB = AB.r * AB.r + AB.g * AB.g + AB.b * AB.b;
658
659 // Single color block.. no need to root-find
660 if(fAB < FLT_MIN)
661 {
662 pX->r = X.r; pX->g = X.g; pX->b = X.b;
663 pY->r = Y.r; pY->g = Y.g; pY->b = Y.b;
664 return 0.0f;
665 }
666
667 // Try all four axis directions, to determine which diagonal best fits data
668 float fABInv = 1.0f / fAB;
669
670 HDRColorA Dir;
671 Dir.r = AB.r * fABInv;
672 Dir.g = AB.g * fABInv;
673 Dir.b = AB.b * fABInv;
674
675 HDRColorA Mid;
676 Mid.r = (X.r + Y.r) * 0.5f;
677 Mid.g = (X.g + Y.g) * 0.5f;
678 Mid.b = (X.b + Y.b) * 0.5f;
679
680 float fDir[4];
681 fDir[0] = fDir[1] = fDir[2] = fDir[3] = 0.0f;
682
683 for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
684 {
685 HDRColorA Pt;
686 Pt.r = (pPoints[pIndex[iPoint]].r - Mid.r) * Dir.r;
687 Pt.g = (pPoints[pIndex[iPoint]].g - Mid.g) * Dir.g;
688 Pt.b = (pPoints[pIndex[iPoint]].b - Mid.b) * Dir.b;
689
690 float f;
691 f = Pt.r + Pt.g + Pt.b; fDir[0] += f * f;
692 f = Pt.r + Pt.g - Pt.b; fDir[1] += f * f;
693 f = Pt.r - Pt.g + Pt.b; fDir[2] += f * f;
694 f = Pt.r - Pt.g - Pt.b; fDir[3] += f * f;
695 }
696
697 float fDirMax = fDir[0];
698 size_t iDirMax = 0;
699
700 for(size_t iDir = 1; iDir < 4; iDir++)
701 {
702 if(fDir[iDir] > fDirMax)
703 {
704 fDirMax = fDir[iDir];
705 iDirMax = iDir;
706 }
707 }
708
709 if(iDirMax & 2) std::swap( X.g, Y.g );
710 if(iDirMax & 1) std::swap( X.b, Y.b );
711
712 // Two color block.. no need to root-find
713 if(fAB < 1.0f / 4096.0f)
714 {
715 pX->r = X.r; pX->g = X.g; pX->b = X.b;
716 pY->r = Y.r; pY->g = Y.g; pY->b = Y.b;
717 return 0.0f;
718 }
719
720 // Use Newton's Method to find local minima of sum-of-squares error.
721 float fSteps = (float) (cSteps - 1);
722
723 for(size_t iIteration = 0; iIteration < 8; iIteration++)
724 {
725 // Calculate new steps
726 HDRColorA pSteps[4] = {};
727
728 for(size_t iStep = 0; iStep < cSteps; iStep++)
729 {
730 pSteps[iStep].r = X.r * pC[iStep] + Y.r * pD[iStep];
731 pSteps[iStep].g = X.g * pC[iStep] + Y.g * pD[iStep];
732 pSteps[iStep].b = X.b * pC[iStep] + Y.b * pD[iStep];
733 }
734
735 // Calculate color direction
736 Dir.r = Y.r - X.r;
737 Dir.g = Y.g - X.g;
738 Dir.b = Y.b - X.b;
739
740 float fLen = (Dir.r * Dir.r + Dir.g * Dir.g + Dir.b * Dir.b);
741
742 if(fLen < (1.0f / 4096.0f))
743 break;
744
745 float fScale = fSteps / fLen;
746
747 Dir.r *= fScale;
748 Dir.g *= fScale;
749 Dir.b *= fScale;
750
751 // Evaluate function, and derivatives
752 float d2X = 0.0f, d2Y = 0.0f;
753 HDRColorA dX(0.0f, 0.0f, 0.0f, 0.0f), dY(0.0f, 0.0f, 0.0f, 0.0f);
754
755 for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
756 {
757 float fDot = (pPoints[pIndex[iPoint]].r - X.r) * Dir.r +
758 (pPoints[pIndex[iPoint]].g - X.g) * Dir.g +
759 (pPoints[pIndex[iPoint]].b - X.b) * Dir.b;
760
761 size_t iStep;
762 if(fDot <= 0.0f)
763 iStep = 0;
764 if(fDot >= fSteps)
765 iStep = cSteps - 1;
766 else
767 iStep = size_t(fDot + 0.5f);
768
769 HDRColorA Diff;
770 Diff.r = pSteps[iStep].r - pPoints[pIndex[iPoint]].r;
771 Diff.g = pSteps[iStep].g - pPoints[pIndex[iPoint]].g;
772 Diff.b = pSteps[iStep].b - pPoints[pIndex[iPoint]].b;
773
774 float fC = pC[iStep] * (1.0f / 8.0f);
775 float fD = pD[iStep] * (1.0f / 8.0f);
776
777 d2X += fC * pC[iStep];
778 dX.r += fC * Diff.r;
779 dX.g += fC * Diff.g;
780 dX.b += fC * Diff.b;
781
782 d2Y += fD * pD[iStep];
783 dY.r += fD * Diff.r;
784 dY.g += fD * Diff.g;
785 dY.b += fD * Diff.b;
786 }
787
788 // Move endpoints
789 if(d2X > 0.0f)
790 {
791 float f = -1.0f / d2X;
792
793 X.r += dX.r * f;
794 X.g += dX.g * f;
795 X.b += dX.b * f;
796 }
797
798 if(d2Y > 0.0f)
799 {
800 float f = -1.0f / d2Y;
801
802 Y.r += dY.r * f;
803 Y.g += dY.g * f;
804 Y.b += dY.b * f;
805 }
806
807 if((dX.r * dX.r < fEpsilon) && (dX.g * dX.g < fEpsilon) && (dX.b * dX.b < fEpsilon) &&
808 (dY.r * dY.r < fEpsilon) && (dY.g * dY.g < fEpsilon) && (dY.b * dY.b < fEpsilon))
809 {
810 break;
811 }
812 }
813
814 pX->r = X.r; pX->g = X.g; pX->b = X.b;
815 pY->r = Y.r; pY->g = Y.g; pY->b = Y.b;
816 return fError;
817 }
818
819
820 //-------------------------------------------------------------------------------------
OptimizeRGBA(_In_reads_ (NUM_PIXELS_PER_BLOCK)const HDRColorA * const pPoints,_Out_ HDRColorA * pX,_Out_ HDRColorA * pY,_In_ size_t cSteps,_In_ size_t cPixels,_In_reads_ (cPixels)const size_t * pIndex)821 static float OptimizeRGBA(_In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pPoints,
822 _Out_ HDRColorA* pX, _Out_ HDRColorA* pY,
823 _In_ size_t cSteps, _In_ size_t cPixels, _In_reads_(cPixels) const size_t* pIndex)
824 {
825 float fError = FLT_MAX;
826 const float *pC = (3 == cSteps) ? pC3 : pC4;
827 const float *pD = (3 == cSteps) ? pD3 : pD4;
828
829 // Find Min and Max points, as starting point
830 HDRColorA X(1.0f, 1.0f, 1.0f, 1.0f);
831 HDRColorA Y(0.0f, 0.0f, 0.0f, 0.0f);
832
833 for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
834 {
835 if(pPoints[pIndex[iPoint]].r < X.r) X.r = pPoints[pIndex[iPoint]].r;
836 if(pPoints[pIndex[iPoint]].g < X.g) X.g = pPoints[pIndex[iPoint]].g;
837 if(pPoints[pIndex[iPoint]].b < X.b) X.b = pPoints[pIndex[iPoint]].b;
838 if(pPoints[pIndex[iPoint]].a < X.a) X.a = pPoints[pIndex[iPoint]].a;
839 if(pPoints[pIndex[iPoint]].r > Y.r) Y.r = pPoints[pIndex[iPoint]].r;
840 if(pPoints[pIndex[iPoint]].g > Y.g) Y.g = pPoints[pIndex[iPoint]].g;
841 if(pPoints[pIndex[iPoint]].b > Y.b) Y.b = pPoints[pIndex[iPoint]].b;
842 if(pPoints[pIndex[iPoint]].a > Y.a) Y.a = pPoints[pIndex[iPoint]].a;
843 }
844
845 // Diagonal axis
846 HDRColorA AB = Y - X;
847 float fAB = AB * AB;
848
849 // Single color block.. no need to root-find
850 if(fAB < FLT_MIN)
851 {
852 *pX = X;
853 *pY = Y;
854 return 0.0f;
855 }
856
857 // Try all four axis directions, to determine which diagonal best fits data
858 float fABInv = 1.0f / fAB;
859 HDRColorA Dir = AB * fABInv;
860 HDRColorA Mid = (X + Y) * 0.5f;
861
862 float fDir[8];
863 fDir[0] = fDir[1] = fDir[2] = fDir[3] = fDir[4] = fDir[5] = fDir[6] = fDir[7] = 0.0f;
864
865 for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
866 {
867 HDRColorA Pt;
868 Pt.r = (pPoints[pIndex[iPoint]].r - Mid.r) * Dir.r;
869 Pt.g = (pPoints[pIndex[iPoint]].g - Mid.g) * Dir.g;
870 Pt.b = (pPoints[pIndex[iPoint]].b - Mid.b) * Dir.b;
871 Pt.a = (pPoints[pIndex[iPoint]].a - Mid.a) * Dir.a;
872
873 float f;
874 f = Pt.r + Pt.g + Pt.b + Pt.a; fDir[0] += f * f;
875 f = Pt.r + Pt.g + Pt.b - Pt.a; fDir[1] += f * f;
876 f = Pt.r + Pt.g - Pt.b + Pt.a; fDir[2] += f * f;
877 f = Pt.r + Pt.g - Pt.b - Pt.a; fDir[3] += f * f;
878 f = Pt.r - Pt.g + Pt.b + Pt.a; fDir[4] += f * f;
879 f = Pt.r - Pt.g + Pt.b - Pt.a; fDir[5] += f * f;
880 f = Pt.r - Pt.g - Pt.b + Pt.a; fDir[6] += f * f;
881 f = Pt.r - Pt.g - Pt.b - Pt.a; fDir[7] += f * f;
882 }
883
884 float fDirMax = fDir[0];
885 size_t iDirMax = 0;
886
887 for(size_t iDir = 1; iDir < 8; iDir++)
888 {
889 if(fDir[iDir] > fDirMax)
890 {
891 fDirMax = fDir[iDir];
892 iDirMax = iDir;
893 }
894 }
895
896 if(iDirMax & 4) std::swap(X.g, Y.g);
897 if(iDirMax & 2) std::swap(X.b, Y.b);
898 if(iDirMax & 1) std::swap(X.a, Y.a);
899
900 // Two color block.. no need to root-find
901 if(fAB < 1.0f / 4096.0f)
902 {
903 *pX = X;
904 *pY = Y;
905 return 0.0f;
906 }
907
908 // Use Newton's Method to find local minima of sum-of-squares error.
909 float fSteps = (float) (cSteps - 1);
910
911 for(size_t iIteration = 0; iIteration < 8 && fError > 0.0f; iIteration++)
912 {
913 // Calculate new steps
914 HDRColorA pSteps[BC7_MAX_INDICES];
915
916 LDRColorA lX, lY;
917 lX = (X * 255.0f).ToLDRColorA();
918 lY = (Y * 255.0f).ToLDRColorA();
919
920 for(size_t iStep = 0; iStep < cSteps; iStep++)
921 {
922 pSteps[iStep] = X * pC[iStep] + Y * pD[iStep];
923 //LDRColorA::Interpolate(lX, lY, i, i, wcprec, waprec, aSteps[i]);
924 }
925
926 // Calculate color direction
927 Dir = Y - X;
928 float fLen = Dir * Dir;
929 if(fLen < (1.0f / 4096.0f))
930 break;
931
932 float fScale = fSteps / fLen;
933 Dir *= fScale;
934
935 // Evaluate function, and derivatives
936 float d2X = 0.0f, d2Y = 0.0f;
937 HDRColorA dX(0.0f, 0.0f, 0.0f, 0.0f), dY(0.0f, 0.0f, 0.0f, 0.0f);
938
939 for(size_t iPoint = 0; iPoint < cPixels; ++iPoint)
940 {
941 float fDot = (pPoints[pIndex[iPoint]] - X) * Dir;
942 size_t iStep;
943 if(fDot <= 0.0f)
944 iStep = 0;
945 if(fDot >= fSteps)
946 iStep = cSteps - 1;
947 else
948 iStep = size_t(fDot + 0.5f);
949
950 HDRColorA Diff = pSteps[iStep] - pPoints[pIndex[iPoint]];
951 float fC = pC[iStep] * (1.0f / 8.0f);
952 float fD = pD[iStep] * (1.0f / 8.0f);
953
954 d2X += fC * pC[iStep];
955 dX += Diff * fC;
956
957 d2Y += fD * pD[iStep];
958 dY += Diff * fD;
959 }
960
961 // Move endpoints
962 if(d2X > 0.0f)
963 {
964 float f = -1.0f / d2X;
965 X += dX * f;
966 }
967
968 if(d2Y > 0.0f)
969 {
970 float f = -1.0f / d2Y;
971 Y += dY * f;
972 }
973
974 if((dX * dX < fEpsilon) && (dY * dY < fEpsilon))
975 break;
976 }
977
978 *pX = X;
979 *pY = Y;
980 return fError;
981 }
982
983
984 //-------------------------------------------------------------------------------------
985
986 static float ComputeError(_Inout_ const LDRColorA& pixel, _In_reads_(1 << uIndexPrec) const LDRColorA aPalette[],
987 _In_ uint8_t uIndexPrec, _In_ uint8_t uIndexPrec2, _Out_opt_ size_t* pBestIndex = nullptr, _Out_opt_ size_t* pBestIndex2 = nullptr)
988 {
989 const size_t uNumIndices = size_t(1) << uIndexPrec;
990 const size_t uNumIndices2 = size_t(1) << uIndexPrec2;
991 float fTotalErr = 0;
992 float fBestErr = FLT_MAX;
993
994 if(pBestIndex)
995 *pBestIndex = 0;
996 if(pBestIndex2)
997 *pBestIndex2 = 0;
998
999 XMVECTOR vpixel = XMLoadUByte4( reinterpret_cast<const XMUBYTE4*>( &pixel ) );
1000
1001 if(uIndexPrec2 == 0)
1002 {
1003 for(register size_t i = 0; i < uNumIndices && fBestErr > 0; i++)
1004 {
1005 XMVECTOR tpixel = XMLoadUByte4( reinterpret_cast<const XMUBYTE4*>( &aPalette[i] ) );
1006 // Compute ErrorMetric
1007 tpixel = XMVectorSubtract( vpixel, tpixel );
1008 float fErr = XMVectorGetX( XMVector4Dot( tpixel, tpixel ) );
1009 if(fErr > fBestErr) // error increased, so we're done searching
1010 break;
1011 if(fErr < fBestErr)
1012 {
1013 fBestErr = fErr;
1014 if(pBestIndex)
1015 *pBestIndex = i;
1016 }
1017 }
1018 fTotalErr += fBestErr;
1019 }
1020 else
1021 {
1022 for(register size_t i = 0; i < uNumIndices && fBestErr > 0; i++)
1023 {
1024 XMVECTOR tpixel = XMLoadUByte4( reinterpret_cast<const XMUBYTE4*>( &aPalette[i] ) );
1025 // Compute ErrorMetricRGB
1026 tpixel = XMVectorSubtract( vpixel, tpixel );
1027 float fErr = XMVectorGetX( XMVector3Dot( tpixel, tpixel ) );
1028 if(fErr > fBestErr) // error increased, so we're done searching
1029 break;
1030 if(fErr < fBestErr)
1031 {
1032 fBestErr = fErr;
1033 if(pBestIndex)
1034 *pBestIndex = i;
1035 }
1036 }
1037 fTotalErr += fBestErr;
1038 fBestErr = FLT_MAX;
1039 for(register size_t i = 0; i < uNumIndices2 && fBestErr > 0; i++)
1040 {
1041 // Compute ErrorMetricAlpha
1042 float ea = float(pixel.a) - float(aPalette[i].a);
1043 float fErr = ea*ea;
1044 if(fErr > fBestErr) // error increased, so we're done searching
1045 break;
1046 if(fErr < fBestErr)
1047 {
1048 fBestErr = fErr;
1049 if(pBestIndex2)
1050 *pBestIndex2 = i;
1051 }
1052 }
1053 fTotalErr += fBestErr;
1054 }
1055
1056 return fTotalErr;
1057 }
1058
1059
FillWithErrorColors(_Out_writes_ (NUM_PIXELS_PER_BLOCK)HDRColorA * pOut)1060 inline static void FillWithErrorColors( _Out_writes_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut )
1061 {
1062 for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1063 {
1064 #ifdef _DEBUG
1065 // Use Magenta in debug as a highly-visible error color
1066 pOut[i] = HDRColorA(1.0f, 0.0f, 1.0f, 1.0f);
1067 #else
1068 // In production use, default to black
1069 pOut[i] = HDRColorA(0.0f, 0.0f, 0.0f, 1.0f);
1070 #endif
1071 }
1072 }
1073
1074
1075 //-------------------------------------------------------------------------------------
1076 // BC6H Compression
1077 //-------------------------------------------------------------------------------------
1078 _Use_decl_annotations_
Decode(bool bSigned,HDRColorA * pOut) const1079 void D3DX_BC6H::Decode(bool bSigned, HDRColorA* pOut) const
1080 {
1081 assert(pOut );
1082
1083 size_t uStartBit = 0;
1084 uint8_t uMode = GetBits(uStartBit, 2);
1085 if(uMode != 0x00 && uMode != 0x01)
1086 {
1087 uMode = (GetBits(uStartBit, 3) << 2) | uMode;
1088 }
1089
1090 assert( uMode < 32 );
1091 _Analysis_assume_( uMode < 32 );
1092
1093 if ( ms_aModeToInfo[uMode] >= 0 )
1094 {
1095 assert(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aInfo));
1096 _Analysis_assume_(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aInfo));
1097 const ModeDescriptor* desc = ms_aDesc[ms_aModeToInfo[uMode]];
1098
1099 assert(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aDesc));
1100 _Analysis_assume_(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aDesc));
1101 const ModeInfo& info = ms_aInfo[ms_aModeToInfo[uMode]];
1102
1103 INTEndPntPair aEndPts[BC6H_MAX_REGIONS];
1104 memset(aEndPts, 0, BC6H_MAX_REGIONS * 2 * sizeof(INTColor));
1105 uint32_t uShape = 0;
1106
1107 // Read header
1108 const size_t uHeaderBits = info.uPartitions > 0 ? 82 : 65;
1109 while(uStartBit < uHeaderBits)
1110 {
1111 size_t uCurBit = uStartBit;
1112 if(GetBit(uStartBit))
1113 {
1114 switch(desc[uCurBit].m_eField)
1115 {
1116 case D: uShape |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1117 case RW: aEndPts[0].A.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1118 case RX: aEndPts[0].B.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1119 case RY: aEndPts[1].A.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1120 case RZ: aEndPts[1].B.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1121 case GW: aEndPts[0].A.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1122 case GX: aEndPts[0].B.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1123 case GY: aEndPts[1].A.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1124 case GZ: aEndPts[1].B.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1125 case BW: aEndPts[0].A.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1126 case BX: aEndPts[0].B.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1127 case BY: aEndPts[1].A.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1128 case BZ: aEndPts[1].B.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1129 default:
1130 {
1131 #ifdef _DEBUG
1132 OutputDebugStringA( "BC6H: Invalid header bits encountered during decoding\n" );
1133 #endif
1134 FillWithErrorColors( pOut );
1135 return;
1136 }
1137 }
1138 }
1139 }
1140
1141 assert( uShape < 64 );
1142 _Analysis_assume_( uShape < 64 );
1143
1144 // Sign extend necessary end points
1145 if(bSigned)
1146 {
1147 aEndPts[0].A.SignExtend(info.RGBAPrec[0][0]);
1148 }
1149 if(bSigned || info.bTransformed)
1150 {
1151 assert( info.uPartitions < BC6H_MAX_REGIONS );
1152 _Analysis_assume_( info.uPartitions < BC6H_MAX_REGIONS );
1153 for(size_t p = 0; p <= info.uPartitions; ++p)
1154 {
1155 if(p != 0)
1156 {
1157 aEndPts[p].A.SignExtend(info.RGBAPrec[p][0]);
1158 }
1159 aEndPts[p].B.SignExtend(info.RGBAPrec[p][1]);
1160 }
1161 }
1162
1163 // Inverse transform the end points
1164 if(info.bTransformed)
1165 {
1166 TransformInverse(aEndPts, info.RGBAPrec[0][0], bSigned);
1167 }
1168
1169 // Read indices
1170 for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1171 {
1172 size_t uNumBits = IsFixUpOffset(info.uPartitions, uShape, i) ? info.uIndexPrec-1 : info.uIndexPrec;
1173 if ( uStartBit + uNumBits > 128 )
1174 {
1175 #ifdef _DEBUG
1176 OutputDebugStringA( "BC6H: Invalid block encountered during decoding\n" );
1177 #endif
1178 FillWithErrorColors( pOut );
1179 return;
1180 }
1181 uint8_t uIndex = GetBits(uStartBit, uNumBits);
1182
1183 if ( uIndex >= ((info.uPartitions > 0) ? 8 : 16) )
1184 {
1185 #ifdef _DEBUG
1186 OutputDebugStringA( "BC6H: Invalid index encountered during decoding\n" );
1187 #endif
1188 FillWithErrorColors( pOut );
1189 return;
1190 }
1191
1192 size_t uRegion = g_aPartitionTable[info.uPartitions][uShape][i];
1193 assert( uRegion < BC6H_MAX_REGIONS );
1194 _Analysis_assume_( uRegion < BC6H_MAX_REGIONS );
1195
1196 // Unquantize endpoints and interpolate
1197 int r1 = Unquantize(aEndPts[uRegion].A.r, info.RGBAPrec[0][0].r, bSigned);
1198 int g1 = Unquantize(aEndPts[uRegion].A.g, info.RGBAPrec[0][0].g, bSigned);
1199 int b1 = Unquantize(aEndPts[uRegion].A.b, info.RGBAPrec[0][0].b, bSigned);
1200 int r2 = Unquantize(aEndPts[uRegion].B.r, info.RGBAPrec[0][0].r, bSigned);
1201 int g2 = Unquantize(aEndPts[uRegion].B.g, info.RGBAPrec[0][0].g, bSigned);
1202 int b2 = Unquantize(aEndPts[uRegion].B.b, info.RGBAPrec[0][0].b, bSigned);
1203 const int* aWeights = info.uPartitions > 0 ? g_aWeights3 : g_aWeights4;
1204 INTColor fc;
1205 fc.r = FinishUnquantize((r1 * (BC67_WEIGHT_MAX - aWeights[uIndex]) + r2 * aWeights[uIndex] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT, bSigned);
1206 fc.g = FinishUnquantize((g1 * (BC67_WEIGHT_MAX - aWeights[uIndex]) + g2 * aWeights[uIndex] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT, bSigned);
1207 fc.b = FinishUnquantize((b1 * (BC67_WEIGHT_MAX - aWeights[uIndex]) + b2 * aWeights[uIndex] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT, bSigned);
1208
1209 HALF rgb[3];
1210 fc.ToF16(rgb, bSigned);
1211
1212 pOut[i].r = XMConvertHalfToFloat( rgb[0] );
1213 pOut[i].g = XMConvertHalfToFloat( rgb[1] );
1214 pOut[i].b = XMConvertHalfToFloat( rgb[2] );
1215 pOut[i].a = 1.0f;
1216 }
1217 }
1218 else
1219 {
1220 #ifdef _DEBUG
1221 const char* warnstr = "BC6H: Invalid mode encountered during decoding\n";
1222 switch( uMode )
1223 {
1224 case 0x13: warnstr = "BC6H: Reserved mode 10011 encountered during decoding\n"; break;
1225 case 0x17: warnstr = "BC6H: Reserved mode 10111 encountered during decoding\n"; break;
1226 case 0x1B: warnstr = "BC6H: Reserved mode 11011 encountered during decoding\n"; break;
1227 case 0x1F: warnstr = "BC6H: Reserved mode 11111 encountered during decoding\n"; break;
1228 }
1229 OutputDebugStringA( warnstr );
1230 #endif
1231 // Per the BC6H format spec, we must return opaque black
1232 for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1233 {
1234 pOut[i] = HDRColorA(0.0f, 0.0f, 0.0f, 1.0f);
1235 }
1236 }
1237 }
1238
1239 _Use_decl_annotations_
Encode(bool bSigned,const HDRColorA * const pIn)1240 void D3DX_BC6H::Encode(bool bSigned, const HDRColorA* const pIn)
1241 {
1242 assert( pIn );
1243
1244 EncodeParams EP(pIn, bSigned);
1245
1246 for(EP.uMode = 0; EP.uMode < ARRAYSIZE(ms_aInfo) && EP.fBestErr > 0; ++EP.uMode)
1247 {
1248 const uint8_t uShapes = ms_aInfo[EP.uMode].uPartitions ? 32 : 1;
1249 // Number of rough cases to look at. reasonable values of this are 1, uShapes/4, and uShapes
1250 // uShapes/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
1251 const size_t uItems = std::max<size_t>(1, uShapes >> 2);
1252 float afRoughMSE[BC6H_MAX_SHAPES];
1253 uint8_t auShape[BC6H_MAX_SHAPES];
1254
1255 // pick the best uItems shapes and refine these.
1256 for(EP.uShape = 0; EP.uShape < uShapes; ++EP.uShape)
1257 {
1258 size_t uShape = EP.uShape;
1259 afRoughMSE[uShape] = RoughMSE(&EP);
1260 auShape[uShape] = static_cast<uint8_t>(uShape);
1261 }
1262
1263 // Bubble up the first uItems items
1264 for(register size_t i = 0; i < uItems; i++)
1265 {
1266 for(register size_t j = i + 1; j < uShapes; j++)
1267 {
1268 if(afRoughMSE[i] > afRoughMSE[j])
1269 {
1270 std::swap(afRoughMSE[i], afRoughMSE[j]);
1271 std::swap(auShape[i], auShape[j]);
1272 }
1273 }
1274 }
1275
1276 for(size_t i = 0; i < uItems && EP.fBestErr > 0; i++)
1277 {
1278 EP.uShape = auShape[i];
1279 Refine(&EP);
1280 }
1281 }
1282 }
1283
1284
1285 //-------------------------------------------------------------------------------------
1286 _Use_decl_annotations_
Quantize(int iValue,int prec,bool bSigned)1287 int D3DX_BC6H::Quantize(int iValue, int prec, bool bSigned)
1288 {
1289 assert(prec > 1); // didn't bother to make it work for 1
1290 int q, s = 0;
1291 if(bSigned)
1292 {
1293 assert(iValue >= -F16MAX && iValue <= F16MAX);
1294 if(iValue < 0)
1295 {
1296 s = 1;
1297 iValue = -iValue;
1298 }
1299 q = (prec >= 16) ? iValue : (iValue << (prec-1)) / (F16MAX+1);
1300 if(s)
1301 q = -q;
1302 assert (q > -(1 << (prec-1)) && q < (1 << (prec-1)));
1303 }
1304 else
1305 {
1306 assert(iValue >= 0 && iValue <= F16MAX);
1307 q = (prec >= 15) ? iValue : (iValue << prec) / (F16MAX+1);
1308 assert (q >= 0 && q < (1 << prec));
1309 }
1310
1311 return q;
1312 }
1313
1314 _Use_decl_annotations_
Unquantize(int comp,uint8_t uBitsPerComp,bool bSigned)1315 int D3DX_BC6H::Unquantize(int comp, uint8_t uBitsPerComp, bool bSigned)
1316 {
1317 int unq = 0, s = 0;
1318 if(bSigned)
1319 {
1320 if(uBitsPerComp >= 16)
1321 {
1322 unq = comp;
1323 }
1324 else
1325 {
1326 if(comp < 0)
1327 {
1328 s = 1;
1329 comp = -comp;
1330 }
1331
1332 if(comp == 0) unq = 0;
1333 else if(comp >= ((1 << (uBitsPerComp - 1)) - 1)) unq = 0x7FFF;
1334 else unq = ((comp << 15) + 0x4000) >> (uBitsPerComp-1);
1335
1336 if(s) unq = -unq;
1337 }
1338 }
1339 else
1340 {
1341 if(uBitsPerComp >= 15) unq = comp;
1342 else if(comp == 0) unq = 0;
1343 else if(comp == ((1 << uBitsPerComp) - 1)) unq = 0xFFFF;
1344 else unq = ((comp << 16) + 0x8000) >> uBitsPerComp;
1345 }
1346
1347 return unq;
1348 }
1349
1350 _Use_decl_annotations_
FinishUnquantize(int comp,bool bSigned)1351 int D3DX_BC6H::FinishUnquantize(int comp, bool bSigned)
1352 {
1353 if(bSigned)
1354 {
1355 return (comp < 0) ? -(((-comp) * 31) >> 5) : (comp * 31) >> 5; // scale the magnitude by 31/32
1356 }
1357 else
1358 {
1359 return (comp * 31) >> 6; // scale the magnitude by 31/64
1360 }
1361 }
1362
1363
1364 //-------------------------------------------------------------------------------------
1365 _Use_decl_annotations_
EndPointsFit(const EncodeParams * pEP,const INTEndPntPair aEndPts[])1366 bool D3DX_BC6H::EndPointsFit(const EncodeParams* pEP, const INTEndPntPair aEndPts[])
1367 {
1368 assert( pEP );
1369 const bool bTransformed = ms_aInfo[pEP->uMode].bTransformed;
1370 const bool bIsSigned = pEP->bSigned;
1371 const LDRColorA& Prec0 = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
1372 const LDRColorA& Prec1 = ms_aInfo[pEP->uMode].RGBAPrec[0][1];
1373 const LDRColorA& Prec2 = ms_aInfo[pEP->uMode].RGBAPrec[1][0];
1374 const LDRColorA& Prec3 = ms_aInfo[pEP->uMode].RGBAPrec[1][1];
1375
1376 INTColor aBits[4];
1377 aBits[0].r = NBits(aEndPts[0].A.r, bIsSigned);
1378 aBits[0].g = NBits(aEndPts[0].A.g, bIsSigned);
1379 aBits[0].b = NBits(aEndPts[0].A.b, bIsSigned);
1380 aBits[1].r = NBits(aEndPts[0].B.r, bTransformed || bIsSigned);
1381 aBits[1].g = NBits(aEndPts[0].B.g, bTransformed || bIsSigned);
1382 aBits[1].b = NBits(aEndPts[0].B.b, bTransformed || bIsSigned);
1383 if(aBits[0].r > Prec0.r || aBits[1].r > Prec1.r ||
1384 aBits[0].g > Prec0.g || aBits[1].g > Prec1.g ||
1385 aBits[0].b > Prec0.b || aBits[1].b > Prec1.b)
1386 return false;
1387
1388 if(ms_aInfo[pEP->uMode].uPartitions)
1389 {
1390 aBits[2].r = NBits(aEndPts[1].A.r, bTransformed || bIsSigned);
1391 aBits[2].g = NBits(aEndPts[1].A.g, bTransformed || bIsSigned);
1392 aBits[2].b = NBits(aEndPts[1].A.b, bTransformed || bIsSigned);
1393 aBits[3].r = NBits(aEndPts[1].B.r, bTransformed || bIsSigned);
1394 aBits[3].g = NBits(aEndPts[1].B.g, bTransformed || bIsSigned);
1395 aBits[3].b = NBits(aEndPts[1].B.b, bTransformed || bIsSigned);
1396
1397 if(aBits[2].r > Prec2.r || aBits[3].r > Prec3.r ||
1398 aBits[2].g > Prec2.g || aBits[3].g > Prec3.g ||
1399 aBits[2].b > Prec2.b || aBits[3].b > Prec3.b)
1400 return false;
1401 }
1402
1403 return true;
1404 }
1405
1406 _Use_decl_annotations_
GeneratePaletteQuantized(const EncodeParams * pEP,const INTEndPntPair & endPts,INTColor aPalette[]) const1407 void D3DX_BC6H::GeneratePaletteQuantized(const EncodeParams* pEP, const INTEndPntPair& endPts, INTColor aPalette[]) const
1408 {
1409 assert( pEP );
1410 const size_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
1411 const size_t uNumIndices = size_t(1) << uIndexPrec;
1412 assert( uNumIndices > 0 );
1413 _Analysis_assume_( uNumIndices > 0 );
1414 const LDRColorA& Prec = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
1415
1416 // scale endpoints
1417 INTEndPntPair unqEndPts;
1418 unqEndPts.A.r = Unquantize(endPts.A.r, Prec.r, pEP->bSigned);
1419 unqEndPts.A.g = Unquantize(endPts.A.g, Prec.g, pEP->bSigned);
1420 unqEndPts.A.b = Unquantize(endPts.A.b, Prec.b, pEP->bSigned);
1421 unqEndPts.B.r = Unquantize(endPts.B.r, Prec.r, pEP->bSigned);
1422 unqEndPts.B.g = Unquantize(endPts.B.g, Prec.g, pEP->bSigned);
1423 unqEndPts.B.b = Unquantize(endPts.B.b, Prec.b, pEP->bSigned);
1424
1425 // interpolate
1426 const int* aWeights = nullptr;
1427 switch(uIndexPrec)
1428 {
1429 case 3: aWeights = g_aWeights3; assert(uNumIndices <= 8); _Analysis_assume_(uNumIndices <= 8); break;
1430 case 4: aWeights = g_aWeights4; assert(uNumIndices <= 16); _Analysis_assume_(uNumIndices <= 16); break;
1431 default:
1432 assert(false);
1433 for(size_t i = 0; i < uNumIndices; ++i)
1434 {
1435 #pragma prefast(suppress:22102 22103, "writing blocks in two halves confuses tool")
1436 aPalette[i] = INTColor(0,0,0);
1437 }
1438 return;
1439 }
1440
1441 for (size_t i = 0; i < uNumIndices; ++i)
1442 {
1443 aPalette[i].r = FinishUnquantize(
1444 (unqEndPts.A.r * (BC67_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.r * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT,
1445 pEP->bSigned);
1446 aPalette[i].g = FinishUnquantize(
1447 (unqEndPts.A.g * (BC67_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.g * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT,
1448 pEP->bSigned);
1449 aPalette[i].b = FinishUnquantize(
1450 (unqEndPts.A.b * (BC67_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.b * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT,
1451 pEP->bSigned);
1452 }
1453 }
1454
1455 // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
1456 _Use_decl_annotations_
MapColorsQuantized(const EncodeParams * pEP,const INTColor aColors[],size_t np,const INTEndPntPair & endPts) const1457 float D3DX_BC6H::MapColorsQuantized(const EncodeParams* pEP, const INTColor aColors[], size_t np, const INTEndPntPair &endPts) const
1458 {
1459 assert( pEP );
1460
1461 const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
1462 const uint8_t uNumIndices = 1 << uIndexPrec;
1463 INTColor aPalette[BC6H_MAX_INDICES];
1464 GeneratePaletteQuantized(pEP, endPts, aPalette);
1465
1466 float fTotErr = 0;
1467 for(size_t i = 0; i < np; ++i)
1468 {
1469 XMVECTOR vcolors = XMLoadSInt4( reinterpret_cast<const XMINT4*>( &aColors[i] ) );
1470
1471 // Compute ErrorMetricRGB
1472 XMVECTOR tpal = XMLoadSInt4( reinterpret_cast<const XMINT4*>( &aPalette[0] ) );
1473 tpal = XMVectorSubtract( vcolors, tpal );
1474 float fBestErr = XMVectorGetX( XMVector3Dot( tpal, tpal ) );
1475
1476 for(int j = 1; j < uNumIndices && fBestErr > 0; ++j)
1477 {
1478 // Compute ErrorMetricRGB
1479 tpal = XMLoadSInt4( reinterpret_cast<const XMINT4*>( &aPalette[j] ) );
1480 tpal = XMVectorSubtract( vcolors, tpal );
1481 float fErr = XMVectorGetX( XMVector3Dot( tpal, tpal ) );
1482 if(fErr > fBestErr) break; // error increased, so we're done searching
1483 if(fErr < fBestErr) fBestErr = fErr;
1484 }
1485 fTotErr += fBestErr;
1486 }
1487 return fTotErr;
1488 }
1489
1490 _Use_decl_annotations_
PerturbOne(const EncodeParams * pEP,const INTColor aColors[],size_t np,uint8_t ch,const INTEndPntPair & oldEndPts,INTEndPntPair & newEndPts,float fOldErr,int do_b) const1491 float D3DX_BC6H::PerturbOne(const EncodeParams* pEP, const INTColor aColors[], size_t np, uint8_t ch,
1492 const INTEndPntPair& oldEndPts, INTEndPntPair& newEndPts, float fOldErr, int do_b) const
1493 {
1494 assert( pEP );
1495 uint8_t uPrec;
1496 switch(ch)
1497 {
1498 case 0: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].r; break;
1499 case 1: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].g; break;
1500 case 2: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].b; break;
1501 default: assert(false); newEndPts = oldEndPts; return FLT_MAX;
1502 }
1503 INTEndPntPair tmpEndPts;
1504 float fMinErr = fOldErr;
1505 int beststep = 0;
1506
1507 // copy real endpoints so we can perturb them
1508 tmpEndPts = newEndPts = oldEndPts;
1509
1510 // do a logarithmic search for the best error for this endpoint (which)
1511 for(int step = 1 << (uPrec-1); step; step >>= 1)
1512 {
1513 bool bImproved = false;
1514 for(int sign = -1; sign <= 1; sign += 2)
1515 {
1516 if(do_b == 0)
1517 {
1518 tmpEndPts.A[ch] = newEndPts.A[ch] + sign * step;
1519 if(tmpEndPts.A[ch] < 0 || tmpEndPts.A[ch] >= (1 << uPrec))
1520 continue;
1521 }
1522 else
1523 {
1524 tmpEndPts.B[ch] = newEndPts.B[ch] + sign * step;
1525 if(tmpEndPts.B[ch] < 0 || tmpEndPts.B[ch] >= (1 << uPrec))
1526 continue;
1527 }
1528
1529 float fErr = MapColorsQuantized(pEP, aColors, np, tmpEndPts);
1530
1531 if(fErr < fMinErr)
1532 {
1533 bImproved = true;
1534 fMinErr = fErr;
1535 beststep = sign * step;
1536 }
1537 }
1538 // if this was an improvement, move the endpoint and continue search from there
1539 if(bImproved)
1540 {
1541 if(do_b == 0)
1542 newEndPts.A[ch] += beststep;
1543 else
1544 newEndPts.B[ch] += beststep;
1545 }
1546 }
1547 return fMinErr;
1548 }
1549
1550 _Use_decl_annotations_
OptimizeOne(const EncodeParams * pEP,const INTColor aColors[],size_t np,float aOrgErr,const INTEndPntPair & aOrgEndPts,INTEndPntPair & aOptEndPts) const1551 void D3DX_BC6H::OptimizeOne(const EncodeParams* pEP, const INTColor aColors[], size_t np, float aOrgErr,
1552 const INTEndPntPair &aOrgEndPts, INTEndPntPair &aOptEndPts) const
1553 {
1554 assert( pEP );
1555 float aOptErr = aOrgErr;
1556 aOptEndPts.A = aOrgEndPts.A;
1557 aOptEndPts.B = aOrgEndPts.B;
1558
1559 INTEndPntPair new_a, new_b;
1560 INTEndPntPair newEndPts;
1561 int do_b;
1562
1563 // now optimize each channel separately
1564 for(uint8_t ch = 0; ch < 3; ++ch)
1565 {
1566 // figure out which endpoint when perturbed gives the most improvement and start there
1567 // if we just alternate, we can easily end up in a local minima
1568 float fErr0 = PerturbOne(pEP, aColors, np, ch, aOptEndPts, new_a, aOptErr, 0); // perturb endpt A
1569 float fErr1 = PerturbOne(pEP, aColors, np, ch, aOptEndPts, new_b, aOptErr, 1); // perturb endpt B
1570
1571 if(fErr0 < fErr1)
1572 {
1573 if(fErr0 >= aOptErr) continue;
1574 aOptEndPts.A[ch] = new_a.A[ch];
1575 aOptErr = fErr0;
1576 do_b = 1; // do B next
1577 }
1578 else
1579 {
1580 if(fErr1 >= aOptErr) continue;
1581 aOptEndPts.B[ch] = new_b.B[ch];
1582 aOptErr = fErr1;
1583 do_b = 0; // do A next
1584 }
1585
1586 // now alternate endpoints and keep trying until there is no improvement
1587 for(;;)
1588 {
1589 float fErr = PerturbOne(pEP, aColors, np, ch, aOptEndPts, newEndPts, aOptErr, do_b);
1590 if(fErr >= aOptErr)
1591 break;
1592 if(do_b == 0)
1593 aOptEndPts.A[ch] = newEndPts.A[ch];
1594 else
1595 aOptEndPts.B[ch] = newEndPts.B[ch];
1596 aOptErr = fErr;
1597 do_b = 1 - do_b; // now move the other endpoint
1598 }
1599 }
1600 }
1601
1602 _Use_decl_annotations_
OptimizeEndPoints(const EncodeParams * pEP,const float aOrgErr[],const INTEndPntPair aOrgEndPts[],INTEndPntPair aOptEndPts[]) const1603 void D3DX_BC6H::OptimizeEndPoints(const EncodeParams* pEP, const float aOrgErr[], const INTEndPntPair aOrgEndPts[], INTEndPntPair aOptEndPts[]) const
1604 {
1605 assert( pEP );
1606 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1607 assert( uPartitions < BC6H_MAX_REGIONS );
1608 _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS );
1609 INTColor aPixels[NUM_PIXELS_PER_BLOCK];
1610
1611 for(size_t p = 0; p <= uPartitions; ++p)
1612 {
1613 // collect the pixels in the region
1614 size_t np = 0;
1615 for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1616 {
1617 if(g_aPartitionTable[p][pEP->uShape][i] == p)
1618 {
1619 aPixels[np++] = pEP->aIPixels[i];
1620 }
1621 }
1622
1623 OptimizeOne(pEP, aPixels, np, aOrgErr[p], aOrgEndPts[p], aOptEndPts[p]);
1624 }
1625 }
1626
1627 // Swap endpoints as needed to ensure that the indices at fix up have a 0 high-order bit
1628 _Use_decl_annotations_
SwapIndices(const EncodeParams * pEP,INTEndPntPair aEndPts[],size_t aIndices[])1629 void D3DX_BC6H::SwapIndices(const EncodeParams* pEP, INTEndPntPair aEndPts[], size_t aIndices[])
1630 {
1631 assert( pEP );
1632 const size_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1633 const size_t uNumIndices = size_t(1) << ms_aInfo[pEP->uMode].uIndexPrec;
1634 const size_t uHighIndexBit = uNumIndices >> 1;
1635
1636 assert( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1637 _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1638
1639 for(size_t p = 0; p <= uPartitions; ++p)
1640 {
1641 size_t i = g_aFixUp[uPartitions][pEP->uShape][p];
1642 assert(g_aPartitionTable[uPartitions][pEP->uShape][i] == p);
1643 if(aIndices[i] & uHighIndexBit)
1644 {
1645 // high bit is set, swap the aEndPts and indices for this region
1646 std::swap(aEndPts[p].A, aEndPts[p].B);
1647
1648 for(size_t j = 0; j < NUM_PIXELS_PER_BLOCK; ++j)
1649 if(g_aPartitionTable[uPartitions][pEP->uShape][j] == p)
1650 aIndices[j] = uNumIndices - 1 - aIndices[j];
1651 }
1652 }
1653 }
1654
1655 // assign indices given a tile, shape, and quantized endpoints, return toterr for each region
1656 _Use_decl_annotations_
AssignIndices(const EncodeParams * pEP,const INTEndPntPair aEndPts[],size_t aIndices[],float aTotErr[]) const1657 void D3DX_BC6H::AssignIndices(const EncodeParams* pEP, const INTEndPntPair aEndPts[], size_t aIndices[], float aTotErr[]) const
1658 {
1659 assert( pEP );
1660 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1661 const uint8_t uNumIndices = 1 << ms_aInfo[pEP->uMode].uIndexPrec;
1662
1663 assert( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1664 _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1665
1666 // build list of possibles
1667 INTColor aPalette[BC6H_MAX_REGIONS][BC6H_MAX_INDICES];
1668
1669 for(size_t p = 0; p <= uPartitions; ++p)
1670 {
1671 GeneratePaletteQuantized(pEP, aEndPts[p], aPalette[p]);
1672 aTotErr[p] = 0;
1673 }
1674
1675 for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1676 {
1677 const uint8_t uRegion = g_aPartitionTable[uPartitions][pEP->uShape][i];
1678 assert( uRegion < BC6H_MAX_REGIONS );
1679 _Analysis_assume_( uRegion < BC6H_MAX_REGIONS );
1680 float fBestErr = Norm(pEP->aIPixels[i], aPalette[uRegion][0]);
1681 aIndices[i] = 0;
1682
1683 for(uint8_t j = 1; j < uNumIndices && fBestErr > 0; ++j)
1684 {
1685 float fErr = Norm(pEP->aIPixels[i], aPalette[uRegion][j]);
1686 if(fErr > fBestErr) break; // error increased, so we're done searching
1687 if(fErr < fBestErr)
1688 {
1689 fBestErr = fErr;
1690 aIndices[i] = j;
1691 }
1692 }
1693 aTotErr[uRegion] += fBestErr;
1694 }
1695 }
1696
1697 _Use_decl_annotations_
QuantizeEndPts(const EncodeParams * pEP,INTEndPntPair * aQntEndPts) const1698 void D3DX_BC6H::QuantizeEndPts(const EncodeParams* pEP, INTEndPntPair* aQntEndPts) const
1699 {
1700 assert( pEP && aQntEndPts );
1701 const INTEndPntPair* aUnqEndPts = pEP->aUnqEndPts[pEP->uShape];
1702 const LDRColorA& Prec = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
1703 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1704 assert( uPartitions < BC6H_MAX_REGIONS );
1705 _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS );
1706
1707 for(size_t p = 0; p <= uPartitions; ++p)
1708 {
1709 aQntEndPts[p].A.r = Quantize(aUnqEndPts[p].A.r, Prec.r, pEP->bSigned);
1710 aQntEndPts[p].A.g = Quantize(aUnqEndPts[p].A.g, Prec.g, pEP->bSigned);
1711 aQntEndPts[p].A.b = Quantize(aUnqEndPts[p].A.b, Prec.b, pEP->bSigned);
1712 aQntEndPts[p].B.r = Quantize(aUnqEndPts[p].B.r, Prec.r, pEP->bSigned);
1713 aQntEndPts[p].B.g = Quantize(aUnqEndPts[p].B.g, Prec.g, pEP->bSigned);
1714 aQntEndPts[p].B.b = Quantize(aUnqEndPts[p].B.b, Prec.b, pEP->bSigned);
1715 }
1716 }
1717
1718 _Use_decl_annotations_
EmitBlock(const EncodeParams * pEP,const INTEndPntPair aEndPts[],const size_t aIndices[])1719 void D3DX_BC6H::EmitBlock(const EncodeParams* pEP, const INTEndPntPair aEndPts[], const size_t aIndices[])
1720 {
1721 assert( pEP );
1722 const uint8_t uRealMode = ms_aInfo[pEP->uMode].uMode;
1723 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1724 const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
1725 const size_t uHeaderBits = uPartitions > 0 ? 82 : 65;
1726 const ModeDescriptor* desc = ms_aDesc[pEP->uMode];
1727 size_t uStartBit = 0;
1728
1729 while(uStartBit < uHeaderBits)
1730 {
1731 switch(desc[uStartBit].m_eField)
1732 {
1733 case M: SetBit(uStartBit, uint8_t(uRealMode >> desc[uStartBit].m_uBit) & 0x01); break;
1734 case D: SetBit(uStartBit, uint8_t(pEP->uShape >> desc[uStartBit].m_uBit) & 0x01); break;
1735 case RW: SetBit(uStartBit, uint8_t(aEndPts[0].A.r >> desc[uStartBit].m_uBit) & 0x01); break;
1736 case RX: SetBit(uStartBit, uint8_t(aEndPts[0].B.r >> desc[uStartBit].m_uBit) & 0x01); break;
1737 case RY: SetBit(uStartBit, uint8_t(aEndPts[1].A.r >> desc[uStartBit].m_uBit) & 0x01); break;
1738 case RZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.r >> desc[uStartBit].m_uBit) & 0x01); break;
1739 case GW: SetBit(uStartBit, uint8_t(aEndPts[0].A.g >> desc[uStartBit].m_uBit) & 0x01); break;
1740 case GX: SetBit(uStartBit, uint8_t(aEndPts[0].B.g >> desc[uStartBit].m_uBit) & 0x01); break;
1741 case GY: SetBit(uStartBit, uint8_t(aEndPts[1].A.g >> desc[uStartBit].m_uBit) & 0x01); break;
1742 case GZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.g >> desc[uStartBit].m_uBit) & 0x01); break;
1743 case BW: SetBit(uStartBit, uint8_t(aEndPts[0].A.b >> desc[uStartBit].m_uBit) & 0x01); break;
1744 case BX: SetBit(uStartBit, uint8_t(aEndPts[0].B.b >> desc[uStartBit].m_uBit) & 0x01); break;
1745 case BY: SetBit(uStartBit, uint8_t(aEndPts[1].A.b >> desc[uStartBit].m_uBit) & 0x01); break;
1746 case BZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.b >> desc[uStartBit].m_uBit) & 0x01); break;
1747 default: assert(false);
1748 }
1749 }
1750
1751 for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1752 {
1753 if(IsFixUpOffset(ms_aInfo[pEP->uMode].uPartitions, pEP->uShape, i))
1754 SetBits(uStartBit, uIndexPrec - 1, static_cast<uint8_t>( aIndices[i] ));
1755 else
1756 SetBits(uStartBit, uIndexPrec, static_cast<uint8_t>( aIndices[i] ));
1757 }
1758 assert(uStartBit == 128);
1759 }
1760
1761 _Use_decl_annotations_
Refine(EncodeParams * pEP)1762 void D3DX_BC6H::Refine(EncodeParams* pEP)
1763 {
1764 assert( pEP );
1765 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1766 assert( uPartitions < BC6H_MAX_REGIONS );
1767 _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS );
1768
1769 const bool bTransformed = ms_aInfo[pEP->uMode].bTransformed;
1770 float aOrgErr[BC6H_MAX_REGIONS], aOptErr[BC6H_MAX_REGIONS];
1771 INTEndPntPair aOrgEndPts[BC6H_MAX_REGIONS], aOptEndPts[BC6H_MAX_REGIONS];
1772 size_t aOrgIdx[NUM_PIXELS_PER_BLOCK], aOptIdx[NUM_PIXELS_PER_BLOCK];
1773
1774 QuantizeEndPts(pEP, aOrgEndPts);
1775 AssignIndices(pEP, aOrgEndPts, aOrgIdx, aOrgErr);
1776 SwapIndices(pEP, aOrgEndPts, aOrgIdx);
1777
1778 if(bTransformed) TransformForward(aOrgEndPts);
1779 if(EndPointsFit(pEP, aOrgEndPts))
1780 {
1781 if(bTransformed) TransformInverse(aOrgEndPts, ms_aInfo[pEP->uMode].RGBAPrec[0][0], pEP->bSigned);
1782 OptimizeEndPoints(pEP, aOrgErr, aOrgEndPts, aOptEndPts);
1783 AssignIndices(pEP, aOptEndPts, aOptIdx, aOptErr);
1784 SwapIndices(pEP, aOptEndPts, aOptIdx);
1785
1786 float fOrgTotErr = 0.0f, fOptTotErr = 0.0f;
1787 for(size_t p = 0; p <= uPartitions; ++p)
1788 {
1789 fOrgTotErr += aOrgErr[p];
1790 fOptTotErr += aOptErr[p];
1791 }
1792
1793 if(bTransformed) TransformForward(aOptEndPts);
1794 if(EndPointsFit(pEP, aOptEndPts) && fOptTotErr < fOrgTotErr && fOptTotErr < pEP->fBestErr)
1795 {
1796 pEP->fBestErr = fOptTotErr;
1797 EmitBlock(pEP, aOptEndPts, aOptIdx);
1798 }
1799 else if(fOrgTotErr < pEP->fBestErr)
1800 {
1801 // either it stopped fitting when we optimized it, or there was no improvement
1802 // so go back to the unoptimized endpoints which we know will fit
1803 if(bTransformed) TransformForward(aOrgEndPts);
1804 pEP->fBestErr = fOrgTotErr;
1805 EmitBlock(pEP, aOrgEndPts, aOrgIdx);
1806 }
1807 }
1808 }
1809
1810 _Use_decl_annotations_
GeneratePaletteUnquantized(const EncodeParams * pEP,size_t uRegion,INTColor aPalette[])1811 void D3DX_BC6H::GeneratePaletteUnquantized(const EncodeParams* pEP, size_t uRegion, INTColor aPalette[])
1812 {
1813 assert( pEP );
1814 assert( uRegion < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1815 _Analysis_assume_( uRegion < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1816 const INTEndPntPair& endPts = pEP->aUnqEndPts[pEP->uShape][uRegion];
1817 const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
1818 const uint8_t uNumIndices = 1 << uIndexPrec;
1819 assert(uNumIndices > 0);
1820 _Analysis_assume_(uNumIndices > 0);
1821
1822 const int* aWeights = nullptr;
1823 switch(uIndexPrec)
1824 {
1825 case 3: aWeights = g_aWeights3; assert(uNumIndices <= 8); _Analysis_assume_(uNumIndices <= 8); break;
1826 case 4: aWeights = g_aWeights4; assert(uNumIndices <= 16); _Analysis_assume_(uNumIndices <= 16); break;
1827 default:
1828 assert(false);
1829 for(size_t i = 0; i < uNumIndices; ++i)
1830 {
1831 #pragma prefast(suppress:22102 22103, "writing blocks in two halves confuses tool")
1832 aPalette[i] = INTColor(0,0,0);
1833 }
1834 return;
1835 }
1836
1837 for(register size_t i = 0; i < uNumIndices; ++i)
1838 {
1839 aPalette[i].r = (endPts.A.r * (BC67_WEIGHT_MAX - aWeights[i]) + endPts.B.r * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT;
1840 aPalette[i].g = (endPts.A.g * (BC67_WEIGHT_MAX - aWeights[i]) + endPts.B.g * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT;
1841 aPalette[i].b = (endPts.A.b * (BC67_WEIGHT_MAX - aWeights[i]) + endPts.B.b * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT;
1842 }
1843 }
1844
1845 _Use_decl_annotations_
MapColors(const EncodeParams * pEP,size_t uRegion,size_t np,const size_t * auIndex) const1846 float D3DX_BC6H::MapColors(const EncodeParams* pEP, size_t uRegion, size_t np, const size_t* auIndex) const
1847 {
1848 assert( pEP );
1849 const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
1850 const uint8_t uNumIndices = 1 << uIndexPrec;
1851 INTColor aPalette[BC6H_MAX_INDICES];
1852 GeneratePaletteUnquantized(pEP, uRegion, aPalette);
1853
1854 float fTotalErr = 0.0f;
1855 for(size_t i = 0; i < np; ++i)
1856 {
1857 float fBestErr = Norm(pEP->aIPixels[auIndex[i]], aPalette[0]);
1858 for(uint8_t j = 1; j < uNumIndices && fBestErr > 0.0f; ++j)
1859 {
1860 float fErr = Norm(pEP->aIPixels[auIndex[i]], aPalette[j]);
1861 if(fErr > fBestErr) break; // error increased, so we're done searching
1862 if(fErr < fBestErr) fBestErr = fErr;
1863 }
1864 fTotalErr += fBestErr;
1865 }
1866
1867 return fTotalErr;
1868 }
1869
1870 _Use_decl_annotations_
RoughMSE(EncodeParams * pEP) const1871 float D3DX_BC6H::RoughMSE(EncodeParams* pEP) const
1872 {
1873 assert( pEP );
1874 assert( pEP->uShape < BC6H_MAX_SHAPES);
1875 _Analysis_assume_( pEP->uShape < BC6H_MAX_SHAPES);
1876
1877 INTEndPntPair* aEndPts = pEP->aUnqEndPts[pEP->uShape];
1878
1879 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1880 assert( uPartitions < BC6H_MAX_REGIONS );
1881 _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS );
1882
1883 size_t auPixIdx[NUM_PIXELS_PER_BLOCK];
1884
1885 float fError = 0.0f;
1886 for(size_t p = 0; p <= uPartitions; ++p)
1887 {
1888 size_t np = 0;
1889 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1890 {
1891 if(g_aPartitionTable[uPartitions][pEP->uShape][i] == p)
1892 {
1893 auPixIdx[np++] = i;
1894 }
1895 }
1896
1897 // handle simple cases
1898 assert(np > 0);
1899 if(np == 1)
1900 {
1901 aEndPts[p].A = pEP->aIPixels[auPixIdx[0]];
1902 aEndPts[p].B = pEP->aIPixels[auPixIdx[0]];
1903 continue;
1904 }
1905 else if(np == 2)
1906 {
1907 aEndPts[p].A = pEP->aIPixels[auPixIdx[0]];
1908 aEndPts[p].B = pEP->aIPixels[auPixIdx[1]];
1909 continue;
1910 }
1911
1912 HDRColorA epA, epB;
1913 OptimizeRGB(pEP->aHDRPixels, &epA, &epB, 4, np, auPixIdx);
1914 aEndPts[p].A.Set(epA, pEP->bSigned);
1915 aEndPts[p].B.Set(epB, pEP->bSigned);
1916 if(pEP->bSigned)
1917 {
1918 aEndPts[p].A.Clamp(-F16MAX, F16MAX);
1919 aEndPts[p].B.Clamp(-F16MAX, F16MAX);
1920 }
1921 else
1922 {
1923 aEndPts[p].A.Clamp(0, F16MAX);
1924 aEndPts[p].B.Clamp(0, F16MAX);
1925 }
1926
1927 fError += MapColors(pEP, p, np, auPixIdx);
1928 }
1929
1930 return fError;
1931 }
1932
1933
1934
1935 //-------------------------------------------------------------------------------------
1936 // BC7 Compression
1937 //-------------------------------------------------------------------------------------
1938 _Use_decl_annotations_
Decode(HDRColorA * pOut) const1939 void D3DX_BC7::Decode(HDRColorA* pOut) const
1940 {
1941 assert( pOut );
1942
1943 size_t uFirst = 0;
1944 while(uFirst < 128 && !GetBit(uFirst)) {}
1945 uint8_t uMode = uint8_t(uFirst - 1);
1946
1947 if(uMode < 8)
1948 {
1949 const uint8_t uPartitions = ms_aInfo[uMode].uPartitions;
1950 assert( uPartitions < BC7_MAX_REGIONS );
1951 _Analysis_assume_( uPartitions < BC7_MAX_REGIONS );
1952
1953 const uint8_t uNumEndPts = (uPartitions + 1) << 1;
1954 const uint8_t uIndexPrec = ms_aInfo[uMode].uIndexPrec;
1955 const uint8_t uIndexPrec2 = ms_aInfo[uMode].uIndexPrec2;
1956 register size_t i;
1957 size_t uStartBit = uMode + 1;
1958 uint8_t P[6];
1959 uint8_t uShape = GetBits(uStartBit, ms_aInfo[uMode].uPartitionBits);
1960 assert( uShape < BC7_MAX_SHAPES );
1961 _Analysis_assume_( uShape < BC7_MAX_SHAPES );
1962
1963 uint8_t uRotation = GetBits(uStartBit, ms_aInfo[uMode].uRotationBits);
1964 assert( uRotation < 4 );
1965
1966 uint8_t uIndexMode = GetBits(uStartBit, ms_aInfo[uMode].uIndexModeBits);
1967 assert( uIndexMode < 2 );
1968
1969 LDRColorA c[BC7_MAX_REGIONS << 1];
1970 const LDRColorA RGBAPrec = ms_aInfo[uMode].RGBAPrec;
1971 const LDRColorA RGBAPrecWithP = ms_aInfo[uMode].RGBAPrecWithP;
1972
1973 assert( uNumEndPts <= (BC7_MAX_REGIONS << 1) );
1974
1975 // Red channel
1976 for(i = 0; i < uNumEndPts; i++)
1977 {
1978 if ( uStartBit + RGBAPrec.r > 128 )
1979 {
1980 #ifdef _DEBUG
1981 OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
1982 #endif
1983 FillWithErrorColors( pOut );
1984 return;
1985 }
1986
1987 c[i].r = GetBits(uStartBit, RGBAPrec.r);
1988 }
1989
1990 // Green channel
1991 for(i = 0; i < uNumEndPts; i++)
1992 {
1993 if ( uStartBit + RGBAPrec.g > 128 )
1994 {
1995 #ifdef _DEBUG
1996 OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
1997 #endif
1998 FillWithErrorColors( pOut );
1999 return;
2000 }
2001
2002 c[i].g = GetBits(uStartBit, RGBAPrec.g);
2003 }
2004
2005 // Blue channel
2006 for(i = 0; i < uNumEndPts; i++)
2007 {
2008 if ( uStartBit + RGBAPrec.b > 128 )
2009 {
2010 #ifdef _DEBUG
2011 OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
2012 #endif
2013 FillWithErrorColors( pOut );
2014 return;
2015 }
2016
2017 c[i].b = GetBits(uStartBit, RGBAPrec.b);
2018 }
2019
2020 // Alpha channel
2021 for(i = 0; i < uNumEndPts; i++)
2022 {
2023 if ( uStartBit + RGBAPrec.a > 128 )
2024 {
2025 #ifdef _DEBUG
2026 OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
2027 #endif
2028 FillWithErrorColors( pOut );
2029 return;
2030 }
2031
2032 c[i].a = RGBAPrec.a ? GetBits(uStartBit, RGBAPrec.a) : 255;
2033 }
2034
2035 // P-bits
2036 assert( ms_aInfo[uMode].uPBits <= 6 );
2037 _Analysis_assume_( ms_aInfo[uMode].uPBits <= 6 );
2038 for(i = 0; i < ms_aInfo[uMode].uPBits; i++)
2039 {
2040 if ( uStartBit > 127 )
2041 {
2042 #ifdef _DEBUG
2043 OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
2044 #endif
2045 FillWithErrorColors( pOut );
2046 return;
2047 }
2048
2049 P[i] = GetBit(uStartBit);
2050 }
2051
2052 if(ms_aInfo[uMode].uPBits)
2053 {
2054 for(i = 0; i < uNumEndPts; i++)
2055 {
2056 size_t pi = i * ms_aInfo[uMode].uPBits / uNumEndPts;
2057 for(register uint8_t ch = 0; ch < BC7_NUM_CHANNELS; ch++)
2058 {
2059 if(RGBAPrec[ch] != RGBAPrecWithP[ch])
2060 {
2061 c[i][ch] = (c[i][ch] << 1) | P[pi];
2062 }
2063 }
2064 }
2065 }
2066
2067 for(i = 0; i < uNumEndPts; i++)
2068 {
2069 c[i] = Unquantize(c[i], RGBAPrecWithP);
2070 }
2071
2072 uint8_t w1[NUM_PIXELS_PER_BLOCK], w2[NUM_PIXELS_PER_BLOCK];
2073
2074 // read color indices
2075 for(i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2076 {
2077 size_t uNumBits = IsFixUpOffset(ms_aInfo[uMode].uPartitions, uShape, i) ? uIndexPrec - 1 : uIndexPrec;
2078 if ( uStartBit + uNumBits > 128 )
2079 {
2080 #ifdef _DEBUG
2081 OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
2082 #endif
2083 FillWithErrorColors( pOut );
2084 return;
2085 }
2086 w1[i] = GetBits(uStartBit, uNumBits);
2087 }
2088
2089 // read alpha indices
2090 if(uIndexPrec2)
2091 {
2092 for(i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2093 {
2094 size_t uNumBits = i ? uIndexPrec2 : uIndexPrec2 - 1;
2095 if ( uStartBit + uNumBits > 128 )
2096 {
2097 #ifdef _DEBUG
2098 OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
2099 #endif
2100 FillWithErrorColors( pOut );
2101 return;
2102 }
2103 w2[i] = GetBits(uStartBit, uNumBits );
2104 }
2105 }
2106
2107 for(i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
2108 {
2109 uint8_t uRegion = g_aPartitionTable[uPartitions][uShape][i];
2110 LDRColorA outPixel;
2111 if(uIndexPrec2 == 0)
2112 {
2113 LDRColorA::Interpolate(c[uRegion << 1], c[(uRegion << 1) + 1], w1[i], w1[i], uIndexPrec, uIndexPrec, outPixel);
2114 }
2115 else
2116 {
2117 if(uIndexMode == 0)
2118 {
2119 LDRColorA::Interpolate(c[uRegion << 1], c[(uRegion << 1) + 1], w1[i], w2[i], uIndexPrec, uIndexPrec2, outPixel);
2120 }
2121 else
2122 {
2123 LDRColorA::Interpolate(c[uRegion << 1], c[(uRegion << 1) + 1], w2[i], w1[i], uIndexPrec2, uIndexPrec, outPixel);
2124 }
2125 }
2126
2127 switch(uRotation)
2128 {
2129 case 1: std::swap(outPixel.r, outPixel.a); break;
2130 case 2: std::swap(outPixel.g, outPixel.a); break;
2131 case 3: std::swap(outPixel.b, outPixel.a); break;
2132 }
2133
2134 pOut[i] = HDRColorA(outPixel);
2135 }
2136 }
2137 else
2138 {
2139 #ifdef _DEBUG
2140 OutputDebugStringA( "BC7: Reserved mode 8 encountered during decoding\n" );
2141 #endif
2142 // Per the BC7 format spec, we must return transparent black
2143 memset( pOut, 0, sizeof(HDRColorA) * NUM_PIXELS_PER_BLOCK );
2144 }
2145 }
2146
2147 _Use_decl_annotations_
Encode(bool skip3subsets,const HDRColorA * const pIn)2148 void D3DX_BC7::Encode(bool skip3subsets, const HDRColorA* const pIn)
2149 {
2150 assert( pIn );
2151
2152 D3DX_BC7 final = *this;
2153 EncodeParams EP(pIn);
2154 float fMSEBest = FLT_MAX;
2155
2156 for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
2157 {
2158 EP.aLDRPixels[i].r = uint8_t( std::max<float>( 0.0f, std::min<float>( 255.0f, pIn[i].r * 255.0f + 0.01f ) ) );
2159 EP.aLDRPixels[i].g = uint8_t( std::max<float>( 0.0f, std::min<float>( 255.0f, pIn[i].g * 255.0f + 0.01f ) ) );
2160 EP.aLDRPixels[i].b = uint8_t( std::max<float>( 0.0f, std::min<float>( 255.0f, pIn[i].b * 255.0f + 0.01f ) ) );
2161 EP.aLDRPixels[i].a = uint8_t( std::max<float>( 0.0f, std::min<float>( 255.0f, pIn[i].a * 255.0f + 0.01f ) ) );
2162 }
2163
2164 for(EP.uMode = 0; EP.uMode < 8 && fMSEBest > 0; ++EP.uMode)
2165 {
2166 if ( skip3subsets && (EP.uMode == 0 || EP.uMode == 2) )
2167 {
2168 // 3 subset modes tend to be used rarely and add significant compression time
2169 continue;
2170 }
2171
2172 const size_t uShapes = size_t(1) << ms_aInfo[EP.uMode].uPartitionBits;
2173 assert( uShapes <= BC7_MAX_SHAPES );
2174 _Analysis_assume_( uShapes <= BC7_MAX_SHAPES );
2175
2176 const size_t uNumRots = size_t(1) << ms_aInfo[EP.uMode].uRotationBits;
2177 const size_t uNumIdxMode = size_t(1) << ms_aInfo[EP.uMode].uIndexModeBits;
2178 // Number of rough cases to look at. reasonable values of this are 1, uShapes/4, and uShapes
2179 // uShapes/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
2180 const size_t uItems = std::max<size_t>(1, uShapes >> 2);
2181 float afRoughMSE[BC7_MAX_SHAPES];
2182 size_t auShape[BC7_MAX_SHAPES];
2183
2184 for(size_t r = 0; r < uNumRots && fMSEBest > 0; ++r)
2185 {
2186 switch(r)
2187 {
2188 case 1: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].r, EP.aLDRPixels[i].a); break;
2189 case 2: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].g, EP.aLDRPixels[i].a); break;
2190 case 3: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].b, EP.aLDRPixels[i].a); break;
2191 }
2192
2193 for(size_t im = 0; im < uNumIdxMode && fMSEBest > 0; ++im)
2194 {
2195 // pick the best uItems shapes and refine these.
2196 for(size_t s = 0; s < uShapes; s++)
2197 {
2198 afRoughMSE[s] = RoughMSE(&EP, s, im);
2199 auShape[s] = s;
2200 }
2201
2202 // Bubble up the first uItems items
2203 for(size_t i = 0; i < uItems; i++)
2204 {
2205 for(size_t j = i + 1; j < uShapes; j++)
2206 {
2207 if(afRoughMSE[i] > afRoughMSE[j])
2208 {
2209 std::swap(afRoughMSE[i], afRoughMSE[j]);
2210 std::swap(auShape[i], auShape[j]);
2211 }
2212 }
2213 }
2214
2215 for(size_t i = 0; i < uItems && fMSEBest > 0; i++)
2216 {
2217 float fMSE = Refine(&EP, auShape[i], r, im);
2218 if(fMSE < fMSEBest)
2219 {
2220 final = *this;
2221 fMSEBest = fMSE;
2222 }
2223 }
2224 }
2225
2226 switch(r)
2227 {
2228 case 1: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].r, EP.aLDRPixels[i].a); break;
2229 case 2: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].g, EP.aLDRPixels[i].a); break;
2230 case 3: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].b, EP.aLDRPixels[i].a); break;
2231 }
2232 }
2233 }
2234
2235 *this = final;
2236 }
2237
2238
2239 //-------------------------------------------------------------------------------------
2240 _Use_decl_annotations_
GeneratePaletteQuantized(const EncodeParams * pEP,size_t uIndexMode,const LDREndPntPair & endPts,LDRColorA aPalette[]) const2241 void D3DX_BC7::GeneratePaletteQuantized(const EncodeParams* pEP, size_t uIndexMode, const LDREndPntPair& endPts, LDRColorA aPalette[]) const
2242 {
2243 assert( pEP );
2244 const size_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec;
2245 const size_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2;
2246 const size_t uNumIndices = size_t(1) << uIndexPrec;
2247 const size_t uNumIndices2 = size_t(1) << uIndexPrec2;
2248 assert( uNumIndices > 0 && uNumIndices2 > 0 );
2249 _Analysis_assume_( uNumIndices > 0 && uNumIndices2 > 0 );
2250 assert( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) );
2251 _Analysis_assume_( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) );
2252
2253 LDRColorA a = Unquantize(endPts.A, ms_aInfo[pEP->uMode].RGBAPrecWithP);
2254 LDRColorA b = Unquantize(endPts.B, ms_aInfo[pEP->uMode].RGBAPrecWithP);
2255 if(uIndexPrec2 == 0)
2256 {
2257 for(register size_t i = 0; i < uNumIndices; i++)
2258 LDRColorA::Interpolate(a, b, i, i, uIndexPrec, uIndexPrec, aPalette[i]);
2259 }
2260 else
2261 {
2262 for(register size_t i = 0; i < uNumIndices; i++)
2263 LDRColorA::InterpolateRGB(a, b, i, uIndexPrec, aPalette[i]);
2264 for(register size_t i = 0; i < uNumIndices2; i++)
2265 LDRColorA::InterpolateA(a, b, i, uIndexPrec2, aPalette[i]);
2266 }
2267 }
2268
2269 _Use_decl_annotations_
PerturbOne(const EncodeParams * pEP,const LDRColorA aColors[],size_t np,size_t uIndexMode,size_t ch,const LDREndPntPair & oldEndPts,LDREndPntPair & newEndPts,float fOldErr,uint8_t do_b) const2270 float D3DX_BC7::PerturbOne(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode, size_t ch,
2271 const LDREndPntPair &oldEndPts, LDREndPntPair &newEndPts, float fOldErr, uint8_t do_b) const
2272 {
2273 assert( pEP );
2274 const int prec = ms_aInfo[pEP->uMode].RGBAPrecWithP[ch];
2275 LDREndPntPair tmp_endPts = newEndPts = oldEndPts;
2276 float fMinErr = fOldErr;
2277 uint8_t* pnew_c = (do_b ? &newEndPts.B[ch] : &newEndPts.A[ch]);
2278 uint8_t* ptmp_c = (do_b ? &tmp_endPts.B[ch] : &tmp_endPts.A[ch]);
2279
2280 // do a logarithmic search for the best error for this endpoint (which)
2281 for(int step = 1 << (prec-1); step; step >>= 1)
2282 {
2283 bool bImproved = false;
2284 int beststep = 0;
2285 for(int sign = -1; sign <= 1; sign += 2)
2286 {
2287 int tmp = int(*pnew_c) + sign * step;
2288 if(tmp < 0 || tmp >= (1 << prec))
2289 continue;
2290 else
2291 *ptmp_c = (uint8_t) tmp;
2292
2293 float fTotalErr = MapColors(pEP, aColors, np, uIndexMode, tmp_endPts, fMinErr);
2294 if(fTotalErr < fMinErr)
2295 {
2296 bImproved = true;
2297 fMinErr = fTotalErr;
2298 beststep = sign * step;
2299 }
2300 }
2301
2302 // if this was an improvement, move the endpoint and continue search from there
2303 if(bImproved)
2304 *pnew_c = uint8_t(int(*pnew_c) + beststep);
2305 }
2306 return fMinErr;
2307 }
2308
2309 // perturb the endpoints at least -3 to 3.
2310 // always ensure endpoint ordering is preserved (no need to overlap the scan)
2311 _Use_decl_annotations_
Exhaustive(const EncodeParams * pEP,const LDRColorA aColors[],size_t np,size_t uIndexMode,size_t ch,float & fOrgErr,LDREndPntPair & optEndPt) const2312 void D3DX_BC7::Exhaustive(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode, size_t ch,
2313 float& fOrgErr, LDREndPntPair& optEndPt) const
2314 {
2315 assert( pEP );
2316 const uint8_t uPrec = ms_aInfo[pEP->uMode].RGBAPrecWithP[ch];
2317 LDREndPntPair tmpEndPt;
2318 if(fOrgErr == 0)
2319 return;
2320
2321 int delta = 5;
2322
2323 // ok figure out the range of A and B
2324 tmpEndPt = optEndPt;
2325 int alow = std::max<int>(0, int(optEndPt.A[ch]) - delta);
2326 int ahigh = std::min<int>((1 << uPrec) - 1, int(optEndPt.A[ch]) + delta);
2327 int blow = std::max<int>(0, int(optEndPt.B[ch]) - delta);
2328 int bhigh = std::min<int>((1 << uPrec) - 1, int(optEndPt.B[ch]) + delta);
2329 int amin = 0;
2330 int bmin = 0;
2331
2332 float fBestErr = fOrgErr;
2333 if(optEndPt.A[ch] <= optEndPt.B[ch])
2334 {
2335 // keep a <= b
2336 for(int a = alow; a <= ahigh; ++a)
2337 {
2338 for(int b = std::max<int>(a, blow); b < bhigh; ++b)
2339 {
2340 tmpEndPt.A[ch] = (uint8_t) a;
2341 tmpEndPt.B[ch] = (uint8_t) b;
2342
2343 float fErr = MapColors(pEP, aColors, np, uIndexMode, tmpEndPt, fBestErr);
2344 if(fErr < fBestErr)
2345 {
2346 amin = a;
2347 bmin = b;
2348 fBestErr = fErr;
2349 }
2350 }
2351 }
2352 }
2353 else
2354 {
2355 // keep b <= a
2356 for(int b = blow; b < bhigh; ++b)
2357 {
2358 for(int a = std::max<int>(b, alow); a <= ahigh; ++a)
2359 {
2360 tmpEndPt.A[ch] = (uint8_t) a;
2361 tmpEndPt.B[ch] = (uint8_t) b;
2362
2363 float fErr = MapColors(pEP, aColors, np, uIndexMode, tmpEndPt, fBestErr);
2364 if(fErr < fBestErr)
2365 {
2366 amin = a;
2367 bmin = b;
2368 fBestErr = fErr;
2369 }
2370 }
2371 }
2372 }
2373
2374 if(fBestErr < fOrgErr)
2375 {
2376 optEndPt.A[ch] = (uint8_t) amin;
2377 optEndPt.B[ch] = (uint8_t) bmin;
2378 fOrgErr = fBestErr;
2379 }
2380 }
2381
2382 _Use_decl_annotations_
OptimizeOne(const EncodeParams * pEP,const LDRColorA aColors[],size_t np,size_t uIndexMode,float fOrgErr,const LDREndPntPair & org,LDREndPntPair & opt) const2383 void D3DX_BC7::OptimizeOne(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode,
2384 float fOrgErr, const LDREndPntPair& org, LDREndPntPair& opt) const
2385 {
2386 assert( pEP );
2387
2388 float fOptErr = fOrgErr;
2389 opt = org;
2390
2391 LDREndPntPair new_a, new_b;
2392 LDREndPntPair newEndPts;
2393 uint8_t do_b;
2394
2395 // now optimize each channel separately
2396 for(size_t ch = 0; ch < BC7_NUM_CHANNELS; ++ch)
2397 {
2398 if(ms_aInfo[pEP->uMode].RGBAPrecWithP[ch] == 0)
2399 continue;
2400
2401 // figure out which endpoint when perturbed gives the most improvement and start there
2402 // if we just alternate, we can easily end up in a local minima
2403 float fErr0 = PerturbOne(pEP, aColors, np, uIndexMode, ch, opt, new_a, fOptErr, 0); // perturb endpt A
2404 float fErr1 = PerturbOne(pEP, aColors, np, uIndexMode, ch, opt, new_b, fOptErr, 1); // perturb endpt B
2405
2406 uint8_t& copt_a = opt.A[ch];
2407 uint8_t& copt_b = opt.B[ch];
2408 uint8_t& cnew_a = new_a.A[ch];
2409 uint8_t& cnew_b = new_a.B[ch];
2410
2411 if(fErr0 < fErr1)
2412 {
2413 if(fErr0 >= fOptErr)
2414 continue;
2415 copt_a = cnew_a;
2416 fOptErr = fErr0;
2417 do_b = 1; // do B next
2418 }
2419 else
2420 {
2421 if(fErr1 >= fOptErr)
2422 continue;
2423 copt_b = cnew_b;
2424 fOptErr = fErr1;
2425 do_b = 0; // do A next
2426 }
2427
2428 // now alternate endpoints and keep trying until there is no improvement
2429 for( ; ; )
2430 {
2431 float fErr = PerturbOne(pEP, aColors, np, uIndexMode, ch, opt, newEndPts, fOptErr, do_b);
2432 if(fErr >= fOptErr)
2433 break;
2434 if(do_b == 0)
2435 copt_a = cnew_a;
2436 else
2437 copt_b = cnew_b;
2438 fOptErr = fErr;
2439 do_b = 1 - do_b; // now move the other endpoint
2440 }
2441 }
2442
2443 // finally, do a small exhaustive search around what we think is the global minima to be sure
2444 for(size_t ch = 0; ch < BC7_NUM_CHANNELS; ch++)
2445 Exhaustive(pEP, aColors, np, uIndexMode, ch, fOptErr, opt);
2446 }
2447
2448 _Use_decl_annotations_
OptimizeEndPoints(const EncodeParams * pEP,size_t uShape,size_t uIndexMode,const float afOrgErr[],const LDREndPntPair aOrgEndPts[],LDREndPntPair aOptEndPts[]) const2449 void D3DX_BC7::OptimizeEndPoints(const EncodeParams* pEP, size_t uShape, size_t uIndexMode, const float afOrgErr[],
2450 const LDREndPntPair aOrgEndPts[], LDREndPntPair aOptEndPts[]) const
2451 {
2452 assert( pEP );
2453 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
2454 assert( uPartitions < BC7_MAX_REGIONS && uShape < BC7_MAX_SHAPES );
2455 _Analysis_assume_( uPartitions < BC7_MAX_REGIONS && uShape < BC7_MAX_SHAPES );
2456
2457 LDRColorA aPixels[NUM_PIXELS_PER_BLOCK];
2458
2459 for(size_t p = 0; p <= uPartitions; ++p)
2460 {
2461 // collect the pixels in the region
2462 size_t np = 0;
2463 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
2464 if(g_aPartitionTable[uPartitions][uShape][i] == p)
2465 aPixels[np++] = pEP->aLDRPixels[i];
2466
2467 OptimizeOne(pEP, aPixels, np, uIndexMode, afOrgErr[p], aOrgEndPts[p], aOptEndPts[p]);
2468 }
2469 }
2470
2471 _Use_decl_annotations_
AssignIndices(const EncodeParams * pEP,size_t uShape,size_t uIndexMode,LDREndPntPair endPts[],size_t aIndices[],size_t aIndices2[],float afTotErr[]) const2472 void D3DX_BC7::AssignIndices(const EncodeParams* pEP, size_t uShape, size_t uIndexMode, LDREndPntPair endPts[], size_t aIndices[], size_t aIndices2[],
2473 float afTotErr[]) const
2474 {
2475 assert( pEP );
2476 assert( uShape < BC7_MAX_SHAPES );
2477 _Analysis_assume_( uShape < BC7_MAX_SHAPES );
2478
2479 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
2480 assert( uPartitions < BC7_MAX_REGIONS );
2481 _Analysis_assume_( uPartitions < BC7_MAX_REGIONS );
2482
2483 const uint8_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec;
2484 const uint8_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2;
2485 const uint8_t uNumIndices = 1 << uIndexPrec;
2486 const uint8_t uNumIndices2 = 1 << uIndexPrec2;
2487
2488 assert( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) );
2489 _Analysis_assume_( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) );
2490
2491 const uint8_t uHighestIndexBit = uNumIndices >> 1;
2492 const uint8_t uHighestIndexBit2 = uNumIndices2 >> 1;
2493 LDRColorA aPalette[BC7_MAX_REGIONS][BC7_MAX_INDICES];
2494
2495 // build list of possibles
2496 for(size_t p = 0; p <= uPartitions; p++)
2497 {
2498 GeneratePaletteQuantized(pEP, uIndexMode, endPts[p], aPalette[p]);
2499 afTotErr[p] = 0;
2500 }
2501
2502 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2503 {
2504 uint8_t uRegion = g_aPartitionTable[uPartitions][uShape][i];
2505 assert( uRegion < BC7_MAX_REGIONS );
2506 _Analysis_assume_( uRegion < BC7_MAX_REGIONS );
2507 afTotErr[uRegion] += ComputeError(pEP->aLDRPixels[i], aPalette[uRegion], uIndexPrec, uIndexPrec2, &(aIndices[i]), &(aIndices2[i]));
2508 }
2509
2510 // swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
2511 if(uIndexPrec2 == 0)
2512 {
2513 for(register size_t p = 0; p <= uPartitions; p++)
2514 {
2515 if(aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit)
2516 {
2517 std::swap(endPts[p].A, endPts[p].B);
2518 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2519 if(g_aPartitionTable[uPartitions][uShape][i] == p)
2520 aIndices[i] = uNumIndices - 1 - aIndices[i];
2521 }
2522 assert((aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit) == 0);
2523 }
2524 }
2525 else
2526 {
2527 for(register size_t p = 0; p <= uPartitions; p++)
2528 {
2529 if(aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit)
2530 {
2531 std::swap(endPts[p].A.r, endPts[p].B.r);
2532 std::swap(endPts[p].A.g, endPts[p].B.g);
2533 std::swap(endPts[p].A.b, endPts[p].B.b);
2534 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2535 if(g_aPartitionTable[uPartitions][uShape][i] == p)
2536 aIndices[i] = uNumIndices - 1 - aIndices[i];
2537 }
2538 assert((aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit) == 0);
2539
2540 if(aIndices2[0] & uHighestIndexBit2)
2541 {
2542 std::swap(endPts[p].A.a, endPts[p].B.a);
2543 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2544 aIndices2[i] = uNumIndices2 - 1 - aIndices2[i];
2545 }
2546 assert((aIndices2[0] & uHighestIndexBit2) == 0);
2547 }
2548 }
2549 }
2550
2551 _Use_decl_annotations_
EmitBlock(const EncodeParams * pEP,size_t uShape,size_t uRotation,size_t uIndexMode,const LDREndPntPair aEndPts[],const size_t aIndex[],const size_t aIndex2[])2552 void D3DX_BC7::EmitBlock(const EncodeParams* pEP, size_t uShape, size_t uRotation, size_t uIndexMode, const LDREndPntPair aEndPts[], const size_t aIndex[], const size_t aIndex2[])
2553 {
2554 assert( pEP );
2555 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
2556 assert( uPartitions < BC7_MAX_REGIONS );
2557 _Analysis_assume_( uPartitions < BC7_MAX_REGIONS );
2558
2559 const size_t uPBits = ms_aInfo[pEP->uMode].uPBits;
2560 const size_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
2561 const size_t uIndexPrec2 = ms_aInfo[pEP->uMode].uIndexPrec2;
2562 const LDRColorA RGBAPrec = ms_aInfo[pEP->uMode].RGBAPrec;
2563 const LDRColorA RGBAPrecWithP = ms_aInfo[pEP->uMode].RGBAPrecWithP;
2564 register size_t i;
2565 size_t uStartBit = 0;
2566 SetBits(uStartBit, pEP->uMode, 0);
2567 SetBits(uStartBit, 1, 1);
2568 SetBits(uStartBit, ms_aInfo[pEP->uMode].uRotationBits, static_cast<uint8_t>( uRotation ));
2569 SetBits(uStartBit, ms_aInfo[pEP->uMode].uIndexModeBits, static_cast<uint8_t>( uIndexMode ));
2570 SetBits(uStartBit, ms_aInfo[pEP->uMode].uPartitionBits, static_cast<uint8_t>( uShape ));
2571
2572 if(uPBits)
2573 {
2574 const size_t uNumEP = size_t(1 + uPartitions) << 1;
2575 uint8_t aPVote[BC7_MAX_REGIONS << 1] = {0,0,0,0,0,0};
2576 uint8_t aCount[BC7_MAX_REGIONS << 1] = {0,0,0,0,0,0};
2577 for(uint8_t ch = 0; ch < BC7_NUM_CHANNELS; ch++)
2578 {
2579 uint8_t ep = 0;
2580 for(i = 0; i <= uPartitions; i++)
2581 {
2582 if(RGBAPrec[ch] == RGBAPrecWithP[ch])
2583 {
2584 SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].A[ch]);
2585 SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].B[ch]);
2586 }
2587 else
2588 {
2589 SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].A[ch] >> 1);
2590 SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].B[ch] >> 1);
2591 size_t idx = ep++ * uPBits / uNumEP;
2592 assert(idx < (BC7_MAX_REGIONS << 1));
2593 _Analysis_assume_(idx < (BC7_MAX_REGIONS << 1));
2594 aPVote[idx] += aEndPts[i].A[ch] & 0x01;
2595 aCount[idx]++;
2596 idx = ep++ * uPBits / uNumEP;
2597 assert(idx < (BC7_MAX_REGIONS << 1));
2598 _Analysis_assume_(idx < (BC7_MAX_REGIONS << 1));
2599 aPVote[idx] += aEndPts[i].B[ch] & 0x01;
2600 aCount[idx]++;
2601 }
2602 }
2603 }
2604
2605 for(i = 0; i < uPBits; i++)
2606 {
2607 SetBits(uStartBit, 1, aPVote[i] > (aCount[i] >> 1) ? 1 : 0);
2608 }
2609 }
2610 else
2611 {
2612 for(size_t ch = 0; ch < BC7_NUM_CHANNELS; ch++)
2613 {
2614 for(i = 0; i <= uPartitions; i++)
2615 {
2616 SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].A[ch] );
2617 SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].B[ch] );
2618 }
2619 }
2620 }
2621
2622 const size_t* aI1 = uIndexMode ? aIndex2 : aIndex;
2623 const size_t* aI2 = uIndexMode ? aIndex : aIndex2;
2624 for(i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2625 {
2626 if(IsFixUpOffset(ms_aInfo[pEP->uMode].uPartitions, uShape, i))
2627 SetBits(uStartBit, uIndexPrec - 1, static_cast<uint8_t>( aI1[i] ));
2628 else
2629 SetBits(uStartBit, uIndexPrec, static_cast<uint8_t>( aI1[i] ));
2630 }
2631 if(uIndexPrec2)
2632 for(i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2633 SetBits(uStartBit, i ? uIndexPrec2 : uIndexPrec2 - 1, static_cast<uint8_t>( aI2[i] ));
2634
2635 assert(uStartBit == 128);
2636 }
2637
2638 _Use_decl_annotations_
Refine(const EncodeParams * pEP,size_t uShape,size_t uRotation,size_t uIndexMode)2639 float D3DX_BC7::Refine(const EncodeParams* pEP, size_t uShape, size_t uRotation, size_t uIndexMode)
2640 {
2641 assert( pEP );
2642 assert( uShape < BC7_MAX_SHAPES );
2643 _Analysis_assume_( uShape < BC7_MAX_SHAPES );
2644 const LDREndPntPair* aEndPts = pEP->aEndPts[uShape];
2645
2646 const size_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
2647 assert( uPartitions < BC7_MAX_REGIONS );
2648 _Analysis_assume_( uPartitions < BC7_MAX_REGIONS );
2649
2650 LDREndPntPair aOrgEndPts[BC7_MAX_REGIONS];
2651 LDREndPntPair aOptEndPts[BC7_MAX_REGIONS];
2652 size_t aOrgIdx[NUM_PIXELS_PER_BLOCK];
2653 size_t aOrgIdx2[NUM_PIXELS_PER_BLOCK];
2654 size_t aOptIdx[NUM_PIXELS_PER_BLOCK];
2655 size_t aOptIdx2[NUM_PIXELS_PER_BLOCK];
2656 float aOrgErr[BC7_MAX_REGIONS];
2657 float aOptErr[BC7_MAX_REGIONS];
2658
2659 for(register size_t p = 0; p <= uPartitions; p++)
2660 {
2661 aOrgEndPts[p].A = Quantize(aEndPts[p].A, ms_aInfo[pEP->uMode].RGBAPrecWithP);
2662 aOrgEndPts[p].B = Quantize(aEndPts[p].B, ms_aInfo[pEP->uMode].RGBAPrecWithP);
2663 }
2664
2665 AssignIndices(pEP, uShape, uIndexMode, aOrgEndPts, aOrgIdx, aOrgIdx2, aOrgErr);
2666 OptimizeEndPoints(pEP, uShape, uIndexMode, aOrgErr, aOrgEndPts, aOptEndPts);
2667 AssignIndices(pEP, uShape, uIndexMode, aOptEndPts, aOptIdx, aOptIdx2, aOptErr);
2668
2669 float fOrgTotErr = 0, fOptTotErr = 0;
2670 for(register size_t p = 0; p <= uPartitions; p++)
2671 {
2672 fOrgTotErr += aOrgErr[p];
2673 fOptTotErr += aOptErr[p];
2674 }
2675 if(fOptTotErr < fOrgTotErr)
2676 {
2677 EmitBlock(pEP, uShape, uRotation, uIndexMode, aOptEndPts, aOptIdx, aOptIdx2);
2678 return fOptTotErr;
2679 }
2680 else
2681 {
2682 EmitBlock(pEP, uShape, uRotation, uIndexMode, aOrgEndPts, aOrgIdx, aOrgIdx2);
2683 return fOrgTotErr;
2684 }
2685 }
2686
2687 _Use_decl_annotations_
MapColors(const EncodeParams * pEP,const LDRColorA aColors[],size_t np,size_t uIndexMode,const LDREndPntPair & endPts,float fMinErr) const2688 float D3DX_BC7::MapColors(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode, const LDREndPntPair& endPts, float fMinErr) const
2689 {
2690 assert( pEP );
2691 const uint8_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec;
2692 const uint8_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2;
2693 LDRColorA aPalette[BC7_MAX_INDICES];
2694 float fTotalErr = 0;
2695
2696 GeneratePaletteQuantized(pEP, uIndexMode, endPts, aPalette);
2697 for(register size_t i = 0; i < np; ++i)
2698 {
2699 fTotalErr += ComputeError(aColors[i], aPalette, uIndexPrec, uIndexPrec2);
2700 if(fTotalErr > fMinErr) // check for early exit
2701 {
2702 fTotalErr = FLT_MAX;
2703 break;
2704 }
2705 }
2706
2707 return fTotalErr;
2708 }
2709
2710 _Use_decl_annotations_
RoughMSE(EncodeParams * pEP,size_t uShape,size_t uIndexMode)2711 float D3DX_BC7::RoughMSE(EncodeParams* pEP, size_t uShape, size_t uIndexMode)
2712 {
2713 assert( pEP );
2714 assert( uShape < BC7_MAX_SHAPES );
2715 _Analysis_assume_( uShape < BC7_MAX_SHAPES );
2716 LDREndPntPair* aEndPts = pEP->aEndPts[uShape];
2717
2718 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
2719 assert( uPartitions < BC7_MAX_REGIONS );
2720 _Analysis_assume_( uPartitions < BC7_MAX_REGIONS );
2721
2722 const uint8_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec;
2723 const uint8_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2;
2724 const uint8_t uNumIndices = 1 << uIndexPrec;
2725 const uint8_t uNumIndices2 = 1 << uIndexPrec2;
2726 size_t auPixIdx[NUM_PIXELS_PER_BLOCK];
2727 LDRColorA aPalette[BC7_MAX_REGIONS][BC7_MAX_INDICES];
2728
2729 for(size_t p = 0; p <= uPartitions; p++)
2730 {
2731 size_t np = 0;
2732 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2733 {
2734 if (g_aPartitionTable[uPartitions][uShape][i] == p)
2735 {
2736 auPixIdx[np++] = i;
2737 }
2738 }
2739
2740 // handle simple cases
2741 assert(np > 0);
2742 if(np == 1)
2743 {
2744 aEndPts[p].A = pEP->aLDRPixels[auPixIdx[0]];
2745 aEndPts[p].B = pEP->aLDRPixels[auPixIdx[0]];
2746 continue;
2747 }
2748 else if(np == 2)
2749 {
2750 aEndPts[p].A = pEP->aLDRPixels[auPixIdx[0]];
2751 aEndPts[p].B = pEP->aLDRPixels[auPixIdx[1]];
2752 continue;
2753 }
2754
2755 if(uIndexPrec2 == 0)
2756 {
2757 HDRColorA epA, epB;
2758 OptimizeRGBA(pEP->aHDRPixels, &epA, &epB, 4, np, auPixIdx);
2759 epA.Clamp(0.0f, 1.0f);
2760 epB.Clamp(0.0f, 1.0f);
2761 epA *= 255.0f;
2762 epB *= 255.0f;
2763 aEndPts[p].A = epA.ToLDRColorA();
2764 aEndPts[p].B = epB.ToLDRColorA();
2765 }
2766 else
2767 {
2768 uint8_t uMinAlpha = 255, uMaxAlpha = 0;
2769 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
2770 {
2771 uMinAlpha = std::min<uint8_t>(uMinAlpha, pEP->aLDRPixels[auPixIdx[i]].a);
2772 uMaxAlpha = std::max<uint8_t>(uMaxAlpha, pEP->aLDRPixels[auPixIdx[i]].a);
2773 }
2774
2775 HDRColorA epA, epB;
2776 OptimizeRGB(pEP->aHDRPixels, &epA, &epB, 4, np, auPixIdx);
2777 epA.Clamp(0.0f, 1.0f);
2778 epB.Clamp(0.0f, 1.0f);
2779 epA *= 255.0f;
2780 epB *= 255.0f;
2781 aEndPts[p].A = epA.ToLDRColorA();
2782 aEndPts[p].B = epB.ToLDRColorA();
2783 aEndPts[p].A.a = uMinAlpha;
2784 aEndPts[p].B.a = uMaxAlpha;
2785 }
2786 }
2787
2788 if(uIndexPrec2 == 0)
2789 {
2790 for(size_t p = 0; p <= uPartitions; p++)
2791 for(register size_t i = 0; i < uNumIndices; i++)
2792 LDRColorA::Interpolate(aEndPts[p].A, aEndPts[p].B, i, i, uIndexPrec, uIndexPrec, aPalette[p][i]);
2793 }
2794 else
2795 {
2796 for(size_t p = 0; p <= uPartitions; p++)
2797 {
2798 for(register size_t i = 0; i < uNumIndices; i++)
2799 LDRColorA::InterpolateRGB(aEndPts[p].A, aEndPts[p].B, i, uIndexPrec, aPalette[p][i]);
2800 for(register size_t i = 0; i < uNumIndices2; i++)
2801 LDRColorA::InterpolateA(aEndPts[p].A, aEndPts[p].B, i, uIndexPrec2, aPalette[p][i]);
2802 }
2803 }
2804
2805 float fTotalErr = 0;
2806 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2807 {
2808 uint8_t uRegion = g_aPartitionTable[uPartitions][uShape][i];
2809 fTotalErr += ComputeError(pEP->aLDRPixels[i], aPalette[uRegion], uIndexPrec, uIndexPrec2);
2810 }
2811
2812 return fTotalErr;
2813 }
2814
2815 //=====================================================================================
2816 // Entry points
2817 //=====================================================================================
2818
2819 //-------------------------------------------------------------------------------------
2820 // BC6H Compression
2821 //-------------------------------------------------------------------------------------
2822 _Use_decl_annotations_
D3DXDecodeBC6HU(XMVECTOR * pColor,const uint8_t * pBC)2823 void D3DXDecodeBC6HU(XMVECTOR *pColor, const uint8_t *pBC)
2824 {
2825 assert( pColor && pBC );
2826 static_assert( sizeof(D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes" );
2827 reinterpret_cast< const D3DX_BC6H* >( pBC )->Decode(false, reinterpret_cast<HDRColorA*>(pColor));
2828 }
2829
2830 _Use_decl_annotations_
D3DXDecodeBC6HS(XMVECTOR * pColor,const uint8_t * pBC)2831 void D3DXDecodeBC6HS(XMVECTOR *pColor, const uint8_t *pBC)
2832 {
2833 assert( pColor && pBC );
2834 static_assert( sizeof(D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes" );
2835 reinterpret_cast< const D3DX_BC6H* >( pBC )->Decode(true, reinterpret_cast<HDRColorA*>(pColor));
2836 }
2837
2838 _Use_decl_annotations_
D3DXEncodeBC6HU(uint8_t * pBC,const XMVECTOR * pColor,DWORD flags)2839 void D3DXEncodeBC6HU(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags)
2840 {
2841 UNREFERENCED_PARAMETER(flags);
2842 assert( pBC && pColor );
2843 static_assert( sizeof(D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes" );
2844 reinterpret_cast< D3DX_BC6H* >( pBC )->Encode(false, reinterpret_cast<const HDRColorA*>(pColor));
2845 }
2846
2847 _Use_decl_annotations_
D3DXEncodeBC6HS(uint8_t * pBC,const XMVECTOR * pColor,DWORD flags)2848 void D3DXEncodeBC6HS(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags)
2849 {
2850 UNREFERENCED_PARAMETER(flags);
2851 assert( pBC && pColor );
2852 static_assert( sizeof(D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes" );
2853 reinterpret_cast< D3DX_BC6H* >( pBC )->Encode(true, reinterpret_cast<const HDRColorA*>(pColor));
2854 }
2855
2856
2857 //-------------------------------------------------------------------------------------
2858 // BC7 Compression
2859 //-------------------------------------------------------------------------------------
2860 _Use_decl_annotations_
D3DXDecodeBC7(XMVECTOR * pColor,const uint8_t * pBC)2861 void D3DXDecodeBC7(XMVECTOR *pColor, const uint8_t *pBC)
2862 {
2863 assert( pColor && pBC );
2864 static_assert( sizeof(D3DX_BC7) == 16, "D3DX_BC7 should be 16 bytes" );
2865 reinterpret_cast< const D3DX_BC7* >( pBC )->Decode(reinterpret_cast<HDRColorA*>(pColor));
2866 }
2867
2868 _Use_decl_annotations_
D3DXEncodeBC7(uint8_t * pBC,const XMVECTOR * pColor,DWORD flags)2869 void D3DXEncodeBC7(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags)
2870 {
2871 assert( pBC && pColor );
2872 static_assert( sizeof(D3DX_BC7) == 16, "D3DX_BC7 should be 16 bytes" );
2873 reinterpret_cast< D3DX_BC7* >( pBC )->Encode( !(flags& BC_FLAGS_USE_3SUBSETS), reinterpret_cast<const HDRColorA*>(pColor));
2874 }
2875
2876 } // namespace
2877