1 /*************************************************************************/
2 /* image_compress_cvtt.cpp */
3 /*************************************************************************/
4 /* This file is part of: */
5 /* GODOT ENGINE */
6 /* https://godotengine.org */
7 /*************************************************************************/
8 /* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */
9 /* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */
10 /* */
11 /* Permission is hereby granted, free of charge, to any person obtaining */
12 /* a copy of this software and associated documentation files (the */
13 /* "Software"), to deal in the Software without restriction, including */
14 /* without limitation the rights to use, copy, modify, merge, publish, */
15 /* distribute, sublicense, and/or sell copies of the Software, and to */
16 /* permit persons to whom the Software is furnished to do so, subject to */
17 /* the following conditions: */
18 /* */
19 /* The above copyright notice and this permission notice shall be */
20 /* included in all copies or substantial portions of the Software. */
21 /* */
22 /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
23 /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
24 /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
25 /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
26 /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
27 /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
28 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
29 /*************************************************************************/
30
31 #include "image_compress_cvtt.h"
32
33 #include "core/os/os.h"
34 #include "core/os/thread.h"
35 #include "core/print_string.h"
36
37 #include <ConvectionKernels.h>
38
39 struct CVTTCompressionJobParams {
40 bool is_hdr;
41 bool is_signed;
42 int bytes_per_pixel;
43
44 cvtt::Options options;
45 };
46
47 struct CVTTCompressionRowTask {
48 const uint8_t *in_mm_bytes;
49 uint8_t *out_mm_bytes;
50 int y_start;
51 int width;
52 int height;
53 };
54
55 struct CVTTCompressionJobQueue {
56 CVTTCompressionJobParams job_params;
57 const CVTTCompressionRowTask *job_tasks;
58 uint32_t num_tasks;
59 uint32_t current_task;
60 };
61
_digest_row_task(const CVTTCompressionJobParams & p_job_params,const CVTTCompressionRowTask & p_row_task)62 static void _digest_row_task(const CVTTCompressionJobParams &p_job_params, const CVTTCompressionRowTask &p_row_task) {
63 const uint8_t *in_bytes = p_row_task.in_mm_bytes;
64 uint8_t *out_bytes = p_row_task.out_mm_bytes;
65 int w = p_row_task.width;
66 int h = p_row_task.height;
67
68 int y_start = p_row_task.y_start;
69 int y_end = y_start + 4;
70
71 int bytes_per_pixel = p_job_params.bytes_per_pixel;
72 bool is_hdr = p_job_params.is_hdr;
73 bool is_signed = p_job_params.is_signed;
74
75 cvtt::PixelBlockU8 input_blocks_ldr[cvtt::NumParallelBlocks];
76 cvtt::PixelBlockF16 input_blocks_hdr[cvtt::NumParallelBlocks];
77
78 for (int x_start = 0; x_start < w; x_start += 4 * cvtt::NumParallelBlocks) {
79 int x_end = x_start + 4 * cvtt::NumParallelBlocks;
80
81 for (int y = y_start; y < y_end; y++) {
82 int first_input_element = (y - y_start) * 4;
83 const uint8_t *row_start;
84 if (y >= h) {
85 row_start = in_bytes + (h - 1) * (w * bytes_per_pixel);
86 } else {
87 row_start = in_bytes + y * (w * bytes_per_pixel);
88 }
89
90 for (int x = x_start; x < x_end; x++) {
91 const uint8_t *pixel_start;
92 if (x >= w) {
93 pixel_start = row_start + (w - 1) * bytes_per_pixel;
94 } else {
95 pixel_start = row_start + x * bytes_per_pixel;
96 }
97
98 int block_index = (x - x_start) / 4;
99 int block_element = (x - x_start) % 4 + first_input_element;
100 if (is_hdr) {
101 memcpy(input_blocks_hdr[block_index].m_pixels[block_element], pixel_start, bytes_per_pixel);
102 input_blocks_hdr[block_index].m_pixels[block_element][3] = 0x3c00; // 1.0 (unused)
103 } else {
104 memcpy(input_blocks_ldr[block_index].m_pixels[block_element], pixel_start, bytes_per_pixel);
105 }
106 }
107 }
108
109 uint8_t output_blocks[16 * cvtt::NumParallelBlocks];
110
111 if (is_hdr) {
112 if (is_signed) {
113 cvtt::Kernels::EncodeBC6HS(output_blocks, input_blocks_hdr, p_job_params.options);
114 } else {
115 cvtt::Kernels::EncodeBC6HU(output_blocks, input_blocks_hdr, p_job_params.options);
116 }
117 } else {
118 cvtt::Kernels::EncodeBC7(output_blocks, input_blocks_ldr, p_job_params.options);
119 }
120
121 unsigned int num_real_blocks = ((w - x_start) + 3) / 4;
122 if (num_real_blocks > cvtt::NumParallelBlocks) {
123 num_real_blocks = cvtt::NumParallelBlocks;
124 }
125
126 memcpy(out_bytes, output_blocks, 16 * num_real_blocks);
127 out_bytes += 16 * num_real_blocks;
128 }
129 }
130
_digest_job_queue(void * p_job_queue)131 static void _digest_job_queue(void *p_job_queue) {
132 CVTTCompressionJobQueue *job_queue = static_cast<CVTTCompressionJobQueue *>(p_job_queue);
133
134 for (uint32_t next_task = atomic_increment(&job_queue->current_task); next_task <= job_queue->num_tasks; next_task = atomic_increment(&job_queue->current_task)) {
135 _digest_row_task(job_queue->job_params, job_queue->job_tasks[next_task - 1]);
136 }
137 }
138
image_compress_cvtt(Image * p_image,float p_lossy_quality,Image::CompressSource p_source)139 void image_compress_cvtt(Image *p_image, float p_lossy_quality, Image::CompressSource p_source) {
140
141 if (p_image->get_format() >= Image::FORMAT_BPTC_RGBA)
142 return; //do not compress, already compressed
143
144 int w = p_image->get_width();
145 int h = p_image->get_height();
146
147 bool is_ldr = (p_image->get_format() <= Image::FORMAT_RGBA8);
148 bool is_hdr = (p_image->get_format() >= Image::FORMAT_RH) && (p_image->get_format() <= Image::FORMAT_RGBE9995);
149
150 if (!is_ldr && !is_hdr) {
151 return; // Not a usable source format
152 }
153
154 cvtt::Options options;
155 uint32_t flags = cvtt::Flags::Fastest;
156
157 if (p_lossy_quality > 0.85)
158 flags = cvtt::Flags::Ultra;
159 else if (p_lossy_quality > 0.75)
160 flags = cvtt::Flags::Better;
161 else if (p_lossy_quality > 0.55)
162 flags = cvtt::Flags::Default;
163 else if (p_lossy_quality > 0.35)
164 flags = cvtt::Flags::Fast;
165 else if (p_lossy_quality > 0.15)
166 flags = cvtt::Flags::Faster;
167
168 flags |= cvtt::Flags::BC7_RespectPunchThrough;
169
170 if (p_source == Image::COMPRESS_SOURCE_NORMAL) {
171 flags |= cvtt::Flags::Uniform;
172 }
173
174 Image::Format target_format = Image::FORMAT_BPTC_RGBA;
175
176 bool is_signed = false;
177 if (is_hdr) {
178 if (p_image->get_format() != Image::FORMAT_RGBH) {
179 p_image->convert(Image::FORMAT_RGBH);
180 }
181
182 PoolVector<uint8_t>::Read rb = p_image->get_data().read();
183
184 const uint16_t *source_data = reinterpret_cast<const uint16_t *>(&rb[0]);
185 int pixel_element_count = w * h * 3;
186 for (int i = 0; i < pixel_element_count; i++) {
187 if ((source_data[i] & 0x8000) != 0 && (source_data[i] & 0x7fff) != 0) {
188 is_signed = true;
189 break;
190 }
191 }
192
193 target_format = is_signed ? Image::FORMAT_BPTC_RGBF : Image::FORMAT_BPTC_RGBFU;
194 } else {
195 p_image->convert(Image::FORMAT_RGBA8); //still uses RGBA to convert
196 }
197
198 PoolVector<uint8_t>::Read rb = p_image->get_data().read();
199
200 PoolVector<uint8_t> data;
201 int target_size = Image::get_image_data_size(w, h, target_format, p_image->has_mipmaps());
202 int mm_count = p_image->has_mipmaps() ? Image::get_image_required_mipmaps(w, h, target_format) : 0;
203 data.resize(target_size);
204 int shift = Image::get_format_pixel_rshift(target_format);
205
206 PoolVector<uint8_t>::Write wb = data.write();
207
208 int dst_ofs = 0;
209
210 CVTTCompressionJobQueue job_queue;
211 job_queue.job_params.is_hdr = is_hdr;
212 job_queue.job_params.is_signed = is_signed;
213 job_queue.job_params.options = options;
214 job_queue.job_params.bytes_per_pixel = is_hdr ? 6 : 4;
215
216 #ifdef NO_THREADS
217 int num_job_threads = 0;
218 #else
219 int num_job_threads = OS::get_singleton()->can_use_threads() ? (OS::get_singleton()->get_processor_count() - 1) : 0;
220 #endif
221
222 PoolVector<CVTTCompressionRowTask> tasks;
223
224 for (int i = 0; i <= mm_count; i++) {
225
226 int bw = w % 4 != 0 ? w + (4 - w % 4) : w;
227 int bh = h % 4 != 0 ? h + (4 - h % 4) : h;
228
229 int src_ofs = p_image->get_mipmap_offset(i);
230
231 const uint8_t *in_bytes = &rb[src_ofs];
232 uint8_t *out_bytes = &wb[dst_ofs];
233
234 for (int y_start = 0; y_start < h; y_start += 4) {
235 CVTTCompressionRowTask row_task;
236 row_task.width = w;
237 row_task.height = h;
238 row_task.y_start = y_start;
239 row_task.in_mm_bytes = in_bytes;
240 row_task.out_mm_bytes = out_bytes;
241
242 if (num_job_threads > 0) {
243 tasks.push_back(row_task);
244 } else {
245 _digest_row_task(job_queue.job_params, row_task);
246 }
247
248 out_bytes += 16 * (bw / 4);
249 }
250
251 dst_ofs += (MAX(4, bw) * MAX(4, bh)) >> shift;
252 w = MAX(w / 2, 1);
253 h = MAX(h / 2, 1);
254 }
255
256 if (num_job_threads > 0) {
257 PoolVector<Thread *> threads;
258 threads.resize(num_job_threads);
259
260 PoolVector<Thread *>::Write threads_wb = threads.write();
261
262 PoolVector<CVTTCompressionRowTask>::Read tasks_rb = tasks.read();
263
264 job_queue.job_tasks = &tasks_rb[0];
265 job_queue.current_task = 0;
266 job_queue.num_tasks = static_cast<uint32_t>(tasks.size());
267
268 for (int i = 0; i < num_job_threads; i++) {
269 threads_wb[i] = Thread::create(_digest_job_queue, &job_queue);
270 }
271 _digest_job_queue(&job_queue);
272
273 for (int i = 0; i < num_job_threads; i++) {
274 Thread::wait_to_finish(threads_wb[i]);
275 memdelete(threads_wb[i]);
276 }
277 }
278
279 p_image->create(p_image->get_width(), p_image->get_height(), p_image->has_mipmaps(), target_format, data);
280 }
281
image_decompress_cvtt(Image * p_image)282 void image_decompress_cvtt(Image *p_image) {
283
284 Image::Format target_format;
285 bool is_signed = false;
286 bool is_hdr = false;
287
288 Image::Format input_format = p_image->get_format();
289
290 switch (input_format) {
291 case Image::FORMAT_BPTC_RGBA:
292 target_format = Image::FORMAT_RGBA8;
293 break;
294 case Image::FORMAT_BPTC_RGBF:
295 case Image::FORMAT_BPTC_RGBFU:
296 target_format = Image::FORMAT_RGBH;
297 is_signed = (input_format == Image::FORMAT_BPTC_RGBF);
298 is_hdr = true;
299 break;
300 default:
301 return; // Invalid input format
302 };
303
304 int w = p_image->get_width();
305 int h = p_image->get_height();
306
307 PoolVector<uint8_t>::Read rb = p_image->get_data().read();
308
309 PoolVector<uint8_t> data;
310 int target_size = Image::get_image_data_size(w, h, target_format, p_image->has_mipmaps());
311 int mm_count = p_image->get_mipmap_count();
312 data.resize(target_size);
313
314 PoolVector<uint8_t>::Write wb = data.write();
315
316 int bytes_per_pixel = is_hdr ? 6 : 4;
317
318 int dst_ofs = 0;
319
320 for (int i = 0; i <= mm_count; i++) {
321
322 int src_ofs = p_image->get_mipmap_offset(i);
323
324 const uint8_t *in_bytes = &rb[src_ofs];
325 uint8_t *out_bytes = &wb[dst_ofs];
326
327 cvtt::PixelBlockU8 output_blocks_ldr[cvtt::NumParallelBlocks];
328 cvtt::PixelBlockF16 output_blocks_hdr[cvtt::NumParallelBlocks];
329
330 for (int y_start = 0; y_start < h; y_start += 4) {
331 int y_end = y_start + 4;
332
333 for (int x_start = 0; x_start < w; x_start += 4 * cvtt::NumParallelBlocks) {
334 int x_end = x_start + 4 * cvtt::NumParallelBlocks;
335
336 uint8_t input_blocks[16 * cvtt::NumParallelBlocks];
337 memset(input_blocks, 0, sizeof(input_blocks));
338
339 unsigned int num_real_blocks = ((w - x_start) + 3) / 4;
340 if (num_real_blocks > cvtt::NumParallelBlocks) {
341 num_real_blocks = cvtt::NumParallelBlocks;
342 }
343
344 memcpy(input_blocks, in_bytes, 16 * num_real_blocks);
345 in_bytes += 16 * num_real_blocks;
346
347 if (is_hdr) {
348 if (is_signed) {
349 cvtt::Kernels::DecodeBC6HS(output_blocks_hdr, input_blocks);
350 } else {
351 cvtt::Kernels::DecodeBC6HU(output_blocks_hdr, input_blocks);
352 }
353 } else {
354 cvtt::Kernels::DecodeBC7(output_blocks_ldr, input_blocks);
355 }
356
357 for (int y = y_start; y < y_end; y++) {
358 int first_input_element = (y - y_start) * 4;
359 uint8_t *row_start;
360 if (y >= h) {
361 row_start = out_bytes + (h - 1) * (w * bytes_per_pixel);
362 } else {
363 row_start = out_bytes + y * (w * bytes_per_pixel);
364 }
365
366 for (int x = x_start; x < x_end; x++) {
367 uint8_t *pixel_start;
368 if (x >= w) {
369 pixel_start = row_start + (w - 1) * bytes_per_pixel;
370 } else {
371 pixel_start = row_start + x * bytes_per_pixel;
372 }
373
374 int block_index = (x - x_start) / 4;
375 int block_element = (x - x_start) % 4 + first_input_element;
376 if (is_hdr) {
377 memcpy(pixel_start, output_blocks_hdr[block_index].m_pixels[block_element], bytes_per_pixel);
378 } else {
379 memcpy(pixel_start, output_blocks_ldr[block_index].m_pixels[block_element], bytes_per_pixel);
380 }
381 }
382 }
383 }
384 }
385
386 dst_ofs += w * h * bytes_per_pixel;
387 w >>= 1;
388 h >>= 1;
389 }
390
391 rb.release();
392 wb.release();
393
394 p_image->create(p_image->get_width(), p_image->get_height(), p_image->has_mipmaps(), target_format, data);
395 }
396