1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2020 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17 
18 /**
19  * @brief Functions for generating partition tables on demand.
20  */
21 
22 #include "astc_codec_internals.h"
23 
24 /*
25 	Produce a canonicalized representation of a partition pattern
26 
27 	The largest possible such representation is 432 bits, equal to 7 uint64_t values.
28 */
gen_canonicalized_partition_table(int texel_count,const uint8_t * partition_table,uint64_t canonicalized[7])29 static void gen_canonicalized_partition_table(
30 	int texel_count,
31 	const uint8_t* partition_table,
32 	uint64_t canonicalized[7]
33 ) {
34 	int i;
35 	for (i = 0; i < 7; i++)
36 		canonicalized[i] = 0;
37 
38 	int mapped_index[4];
39 	int map_weight_count = 0;
40 	for (i = 0; i < 4; i++)
41 		mapped_index[i] = -1;
42 
43 	for (i = 0; i < texel_count; i++)
44 	{
45 		int index = partition_table[i];
46 		if (mapped_index[index] == -1)
47 			mapped_index[index] = map_weight_count++;
48 		uint64_t xlat_index = mapped_index[index];
49 		canonicalized[i >> 5] |= xlat_index << (2 * (i & 0x1F));
50 	}
51 }
52 
compare_canonicalized_partition_tables(const uint64_t part1[7],const uint64_t part2[7])53 static int compare_canonicalized_partition_tables(
54 	const uint64_t part1[7],
55 	const uint64_t part2[7]
56 ) {
57 	if (part1[0] != part2[0])
58 		return 0;
59 	if (part1[1] != part2[1])
60 		return 0;
61 	if (part1[2] != part2[2])
62 		return 0;
63 	if (part1[3] != part2[3])
64 		return 0;
65 	if (part1[4] != part2[4])
66 		return 0;
67 	if (part1[5] != part2[5])
68 		return 0;
69 	if (part1[6] != part2[6])
70 		return 0;
71 	return 1;
72 }
73 
74 /*
75    For a partition table, detect partitionss that are equivalent, then mark them as invalid. This reduces the number of partitions that the codec has to consider and thus improves encode
76    performance. */
partition_table_zap_equal_elements(int texel_count,partition_info * pi)77 static void partition_table_zap_equal_elements(
78 	int texel_count,
79 	partition_info* pi
80 ) {
81 	int partition_tables_zapped = 0;
82 	int i, j;
83 	uint64_t *canonicalizeds = new uint64_t[PARTITION_COUNT * 7];
84 
85 
86 	for (i = 0; i < PARTITION_COUNT; i++)
87 	{
88 		gen_canonicalized_partition_table(texel_count, pi[i].partition_of_texel, canonicalizeds + i * 7);
89 	}
90 
91 	for (i = 0; i < PARTITION_COUNT; i++)
92 	{
93 		for (j = 0; j < i; j++)
94 		{
95 			if (compare_canonicalized_partition_tables(canonicalizeds + 7 * i, canonicalizeds + 7 * j))
96 			{
97 				pi[i].partition_count = 0;
98 				partition_tables_zapped++;
99 				break;
100 			}
101 		}
102 	}
103 	delete[]canonicalizeds;
104 }
105 
hash52(uint32_t inp)106 static uint32_t hash52(uint32_t inp)
107 {
108 	inp ^= inp >> 15;
109 
110 	inp *= 0xEEDE0891;			// (2^4+1)*(2^7+1)*(2^17-1)
111 	inp ^= inp >> 5;
112 	inp += inp << 16;
113 	inp ^= inp >> 7;
114 	inp ^= inp >> 3;
115 	inp ^= inp << 6;
116 	inp ^= inp >> 17;
117 	return inp;
118 }
119 
select_partition(int seed,int x,int y,int z,int partitioncount,int small_block)120 static int select_partition(
121 	int seed,
122 	int x,
123 	int y,
124 	int z,
125 	int partitioncount,
126 	int small_block
127 ) {
128 	if (small_block)
129 	{
130 		x <<= 1;
131 		y <<= 1;
132 		z <<= 1;
133 	}
134 
135 	seed += (partitioncount - 1) * 1024;
136 
137 	uint32_t rnum = hash52(seed);
138 
139 	uint8_t seed1 = rnum & 0xF;
140 	uint8_t seed2 = (rnum >> 4) & 0xF;
141 	uint8_t seed3 = (rnum >> 8) & 0xF;
142 	uint8_t seed4 = (rnum >> 12) & 0xF;
143 	uint8_t seed5 = (rnum >> 16) & 0xF;
144 	uint8_t seed6 = (rnum >> 20) & 0xF;
145 	uint8_t seed7 = (rnum >> 24) & 0xF;
146 	uint8_t seed8 = (rnum >> 28) & 0xF;
147 	uint8_t seed9 = (rnum >> 18) & 0xF;
148 	uint8_t seed10 = (rnum >> 22) & 0xF;
149 	uint8_t seed11 = (rnum >> 26) & 0xF;
150 	uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
151 
152 	// squaring all the seeds in order to bias their distribution
153 	// towards lower values.
154 	seed1 *= seed1;
155 	seed2 *= seed2;
156 	seed3 *= seed3;
157 	seed4 *= seed4;
158 	seed5 *= seed5;
159 	seed6 *= seed6;
160 	seed7 *= seed7;
161 	seed8 *= seed8;
162 	seed9 *= seed9;
163 	seed10 *= seed10;
164 	seed11 *= seed11;
165 	seed12 *= seed12;
166 
167 	int sh1, sh2, sh3;
168 	if (seed & 1)
169 	{
170 		sh1 = (seed & 2 ? 4 : 5);
171 		sh2 = (partitioncount == 3 ? 6 : 5);
172 	}
173 	else
174 	{
175 		sh1 = (partitioncount == 3 ? 6 : 5);
176 		sh2 = (seed & 2 ? 4 : 5);
177 	}
178 	sh3 = (seed & 0x10) ? sh1 : sh2;
179 
180 	seed1 >>= sh1;
181 	seed2 >>= sh2;
182 	seed3 >>= sh1;
183 	seed4 >>= sh2;
184 	seed5 >>= sh1;
185 	seed6 >>= sh2;
186 	seed7 >>= sh1;
187 	seed8 >>= sh2;
188 
189 	seed9 >>= sh3;
190 	seed10 >>= sh3;
191 	seed11 >>= sh3;
192 	seed12 >>= sh3;
193 
194 	int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
195 	int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
196 	int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
197 	int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
198 
199 	// apply the saw
200 	a &= 0x3F;
201 	b &= 0x3F;
202 	c &= 0x3F;
203 	d &= 0x3F;
204 
205 	// remove some of the components if we are to output < 4 partitions.
206 	if (partitioncount <= 3)
207 		d = 0;
208 	if (partitioncount <= 2)
209 		c = 0;
210 	if (partitioncount <= 1)
211 		b = 0;
212 
213 	int partition;
214 	if (a >= b && a >= c && a >= d)
215 		partition = 0;
216 	else if (b >= c && b >= d)
217 		partition = 1;
218 	else if (c >= d)
219 		partition = 2;
220 	else
221 		partition = 3;
222 	return partition;
223 }
224 
generate_one_partition_table(const block_size_descriptor * bsd,int partition_count,int partition_index,partition_info * pt)225 static void generate_one_partition_table(
226 	const block_size_descriptor* bsd,
227 	int partition_count,
228 	int partition_index,
229 	partition_info* pt
230 ) {
231 	int texels_per_block = bsd->texel_count;
232 	int small_block = texels_per_block < 32;
233 
234 	uint8_t *partition_of_texel = pt->partition_of_texel;
235 	int x, y, z, i;
236 
237 	for (z = 0; z < bsd->zdim; z++)
238 		for (y = 0; y <  bsd->ydim; y++)
239 			for (x = 0; x <  bsd->xdim; x++)
240 			{
241 				uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block);
242 				*partition_of_texel++ = part;
243 			}
244 
245 	int counts[4];
246 	for (i = 0; i < 4; i++)
247 		counts[i] = 0;
248 
249 	for (i = 0; i < texels_per_block; i++)
250 	{
251 		int partition = pt->partition_of_texel[i];
252 		counts[partition]++;
253 	}
254 
255 	if (counts[0] == 0)
256 		pt->partition_count = 0;
257 	else if (counts[1] == 0)
258 		pt->partition_count = 1;
259 	else if (counts[2] == 0)
260 		pt->partition_count = 2;
261 	else if (counts[3] == 0)
262 		pt->partition_count = 3;
263 	else
264 		pt->partition_count = 4;
265 }
266 
267 /* Public function, see header file for detailed documentation */
init_partition_tables(block_size_descriptor * bsd)268 void init_partition_tables(
269 	block_size_descriptor* bsd
270 ) {
271 	partition_info *par_tab2 = bsd->partitions;
272 	partition_info *par_tab3 = par_tab2 + PARTITION_COUNT;
273 	partition_info *par_tab4 = par_tab3 + PARTITION_COUNT;
274 	partition_info *par_tab1 = par_tab4 + PARTITION_COUNT;
275 
276 	generate_one_partition_table(bsd, 1, 0, par_tab1);
277 	for (int i = 0; i < 1024; i++)
278 	{
279 		generate_one_partition_table(bsd, 2, i, par_tab2 + i);
280 		generate_one_partition_table(bsd, 3, i, par_tab3 + i);
281 		generate_one_partition_table(bsd, 4, i, par_tab4 + i);
282 	}
283 
284 	partition_table_zap_equal_elements(bsd->texel_count, par_tab2);
285 	partition_table_zap_equal_elements(bsd->texel_count, par_tab3);
286 	partition_table_zap_equal_elements(bsd->texel_count, par_tab4);
287 }
288