1 /*************************************************************************/
2 /*  image_compress_cvtt.cpp                                              */
3 /*************************************************************************/
4 /*                       This file is part of:                           */
5 /*                           GODOT ENGINE                                */
6 /*                      https://godotengine.org                          */
7 /*************************************************************************/
8 /* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur.                 */
9 /* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md).   */
10 /*                                                                       */
11 /* Permission is hereby granted, free of charge, to any person obtaining */
12 /* a copy of this software and associated documentation files (the       */
13 /* "Software"), to deal in the Software without restriction, including   */
14 /* without limitation the rights to use, copy, modify, merge, publish,   */
15 /* distribute, sublicense, and/or sell copies of the Software, and to    */
16 /* permit persons to whom the Software is furnished to do so, subject to */
17 /* the following conditions:                                             */
18 /*                                                                       */
19 /* The above copyright notice and this permission notice shall be        */
20 /* included in all copies or substantial portions of the Software.       */
21 /*                                                                       */
22 /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
23 /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
24 /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
25 /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
26 /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
27 /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
28 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
29 /*************************************************************************/
30 
31 #include "image_compress_cvtt.h"
32 
33 #include "core/os/os.h"
34 #include "core/os/thread.h"
35 #include "core/print_string.h"
36 
37 #include <ConvectionKernels.h>
38 
39 struct CVTTCompressionJobParams {
40 	bool is_hdr;
41 	bool is_signed;
42 	int bytes_per_pixel;
43 
44 	cvtt::Options options;
45 };
46 
47 struct CVTTCompressionRowTask {
48 	const uint8_t *in_mm_bytes;
49 	uint8_t *out_mm_bytes;
50 	int y_start;
51 	int width;
52 	int height;
53 };
54 
55 struct CVTTCompressionJobQueue {
56 	CVTTCompressionJobParams job_params;
57 	const CVTTCompressionRowTask *job_tasks;
58 	uint32_t num_tasks;
59 	uint32_t current_task;
60 };
61 
_digest_row_task(const CVTTCompressionJobParams & p_job_params,const CVTTCompressionRowTask & p_row_task)62 static void _digest_row_task(const CVTTCompressionJobParams &p_job_params, const CVTTCompressionRowTask &p_row_task) {
63 	const uint8_t *in_bytes = p_row_task.in_mm_bytes;
64 	uint8_t *out_bytes = p_row_task.out_mm_bytes;
65 	int w = p_row_task.width;
66 	int h = p_row_task.height;
67 
68 	int y_start = p_row_task.y_start;
69 	int y_end = y_start + 4;
70 
71 	int bytes_per_pixel = p_job_params.bytes_per_pixel;
72 	bool is_hdr = p_job_params.is_hdr;
73 	bool is_signed = p_job_params.is_signed;
74 
75 	cvtt::PixelBlockU8 input_blocks_ldr[cvtt::NumParallelBlocks];
76 	cvtt::PixelBlockF16 input_blocks_hdr[cvtt::NumParallelBlocks];
77 
78 	for (int x_start = 0; x_start < w; x_start += 4 * cvtt::NumParallelBlocks) {
79 		int x_end = x_start + 4 * cvtt::NumParallelBlocks;
80 
81 		for (int y = y_start; y < y_end; y++) {
82 			int first_input_element = (y - y_start) * 4;
83 			const uint8_t *row_start;
84 			if (y >= h) {
85 				row_start = in_bytes + (h - 1) * (w * bytes_per_pixel);
86 			} else {
87 				row_start = in_bytes + y * (w * bytes_per_pixel);
88 			}
89 
90 			for (int x = x_start; x < x_end; x++) {
91 				const uint8_t *pixel_start;
92 				if (x >= w) {
93 					pixel_start = row_start + (w - 1) * bytes_per_pixel;
94 				} else {
95 					pixel_start = row_start + x * bytes_per_pixel;
96 				}
97 
98 				int block_index = (x - x_start) / 4;
99 				int block_element = (x - x_start) % 4 + first_input_element;
100 				if (is_hdr) {
101 					memcpy(input_blocks_hdr[block_index].m_pixels[block_element], pixel_start, bytes_per_pixel);
102 					input_blocks_hdr[block_index].m_pixels[block_element][3] = 0x3c00; // 1.0 (unused)
103 				} else {
104 					memcpy(input_blocks_ldr[block_index].m_pixels[block_element], pixel_start, bytes_per_pixel);
105 				}
106 			}
107 		}
108 
109 		uint8_t output_blocks[16 * cvtt::NumParallelBlocks];
110 
111 		if (is_hdr) {
112 			if (is_signed) {
113 				cvtt::Kernels::EncodeBC6HS(output_blocks, input_blocks_hdr, p_job_params.options);
114 			} else {
115 				cvtt::Kernels::EncodeBC6HU(output_blocks, input_blocks_hdr, p_job_params.options);
116 			}
117 		} else {
118 			cvtt::Kernels::EncodeBC7(output_blocks, input_blocks_ldr, p_job_params.options);
119 		}
120 
121 		unsigned int num_real_blocks = ((w - x_start) + 3) / 4;
122 		if (num_real_blocks > cvtt::NumParallelBlocks) {
123 			num_real_blocks = cvtt::NumParallelBlocks;
124 		}
125 
126 		memcpy(out_bytes, output_blocks, 16 * num_real_blocks);
127 		out_bytes += 16 * num_real_blocks;
128 	}
129 }
130 
_digest_job_queue(void * p_job_queue)131 static void _digest_job_queue(void *p_job_queue) {
132 	CVTTCompressionJobQueue *job_queue = static_cast<CVTTCompressionJobQueue *>(p_job_queue);
133 
134 	for (uint32_t next_task = atomic_increment(&job_queue->current_task); next_task <= job_queue->num_tasks; next_task = atomic_increment(&job_queue->current_task)) {
135 		_digest_row_task(job_queue->job_params, job_queue->job_tasks[next_task - 1]);
136 	}
137 }
138 
image_compress_cvtt(Image * p_image,float p_lossy_quality,Image::CompressSource p_source)139 void image_compress_cvtt(Image *p_image, float p_lossy_quality, Image::CompressSource p_source) {
140 
141 	if (p_image->get_format() >= Image::FORMAT_BPTC_RGBA)
142 		return; //do not compress, already compressed
143 
144 	int w = p_image->get_width();
145 	int h = p_image->get_height();
146 
147 	bool is_ldr = (p_image->get_format() <= Image::FORMAT_RGBA8);
148 	bool is_hdr = (p_image->get_format() >= Image::FORMAT_RH) && (p_image->get_format() <= Image::FORMAT_RGBE9995);
149 
150 	if (!is_ldr && !is_hdr) {
151 		return; // Not a usable source format
152 	}
153 
154 	cvtt::Options options;
155 	uint32_t flags = cvtt::Flags::Fastest;
156 
157 	if (p_lossy_quality > 0.85)
158 		flags = cvtt::Flags::Ultra;
159 	else if (p_lossy_quality > 0.75)
160 		flags = cvtt::Flags::Better;
161 	else if (p_lossy_quality > 0.55)
162 		flags = cvtt::Flags::Default;
163 	else if (p_lossy_quality > 0.35)
164 		flags = cvtt::Flags::Fast;
165 	else if (p_lossy_quality > 0.15)
166 		flags = cvtt::Flags::Faster;
167 
168 	flags |= cvtt::Flags::BC7_RespectPunchThrough;
169 
170 	if (p_source == Image::COMPRESS_SOURCE_NORMAL) {
171 		flags |= cvtt::Flags::Uniform;
172 	}
173 
174 	Image::Format target_format = Image::FORMAT_BPTC_RGBA;
175 
176 	bool is_signed = false;
177 	if (is_hdr) {
178 		if (p_image->get_format() != Image::FORMAT_RGBH) {
179 			p_image->convert(Image::FORMAT_RGBH);
180 		}
181 
182 		PoolVector<uint8_t>::Read rb = p_image->get_data().read();
183 
184 		const uint16_t *source_data = reinterpret_cast<const uint16_t *>(&rb[0]);
185 		int pixel_element_count = w * h * 3;
186 		for (int i = 0; i < pixel_element_count; i++) {
187 			if ((source_data[i] & 0x8000) != 0 && (source_data[i] & 0x7fff) != 0) {
188 				is_signed = true;
189 				break;
190 			}
191 		}
192 
193 		target_format = is_signed ? Image::FORMAT_BPTC_RGBF : Image::FORMAT_BPTC_RGBFU;
194 	} else {
195 		p_image->convert(Image::FORMAT_RGBA8); //still uses RGBA to convert
196 	}
197 
198 	PoolVector<uint8_t>::Read rb = p_image->get_data().read();
199 
200 	PoolVector<uint8_t> data;
201 	int target_size = Image::get_image_data_size(w, h, target_format, p_image->has_mipmaps());
202 	int mm_count = p_image->has_mipmaps() ? Image::get_image_required_mipmaps(w, h, target_format) : 0;
203 	data.resize(target_size);
204 	int shift = Image::get_format_pixel_rshift(target_format);
205 
206 	PoolVector<uint8_t>::Write wb = data.write();
207 
208 	int dst_ofs = 0;
209 
210 	CVTTCompressionJobQueue job_queue;
211 	job_queue.job_params.is_hdr = is_hdr;
212 	job_queue.job_params.is_signed = is_signed;
213 	job_queue.job_params.options = options;
214 	job_queue.job_params.bytes_per_pixel = is_hdr ? 6 : 4;
215 
216 #ifdef NO_THREADS
217 	int num_job_threads = 0;
218 #else
219 	int num_job_threads = OS::get_singleton()->can_use_threads() ? (OS::get_singleton()->get_processor_count() - 1) : 0;
220 #endif
221 
222 	PoolVector<CVTTCompressionRowTask> tasks;
223 
224 	for (int i = 0; i <= mm_count; i++) {
225 
226 		int bw = w % 4 != 0 ? w + (4 - w % 4) : w;
227 		int bh = h % 4 != 0 ? h + (4 - h % 4) : h;
228 
229 		int src_ofs = p_image->get_mipmap_offset(i);
230 
231 		const uint8_t *in_bytes = &rb[src_ofs];
232 		uint8_t *out_bytes = &wb[dst_ofs];
233 
234 		for (int y_start = 0; y_start < h; y_start += 4) {
235 			CVTTCompressionRowTask row_task;
236 			row_task.width = w;
237 			row_task.height = h;
238 			row_task.y_start = y_start;
239 			row_task.in_mm_bytes = in_bytes;
240 			row_task.out_mm_bytes = out_bytes;
241 
242 			if (num_job_threads > 0) {
243 				tasks.push_back(row_task);
244 			} else {
245 				_digest_row_task(job_queue.job_params, row_task);
246 			}
247 
248 			out_bytes += 16 * (bw / 4);
249 		}
250 
251 		dst_ofs += (MAX(4, bw) * MAX(4, bh)) >> shift;
252 		w = MAX(w / 2, 1);
253 		h = MAX(h / 2, 1);
254 	}
255 
256 	if (num_job_threads > 0) {
257 		PoolVector<Thread *> threads;
258 		threads.resize(num_job_threads);
259 
260 		PoolVector<Thread *>::Write threads_wb = threads.write();
261 
262 		PoolVector<CVTTCompressionRowTask>::Read tasks_rb = tasks.read();
263 
264 		job_queue.job_tasks = &tasks_rb[0];
265 		job_queue.current_task = 0;
266 		job_queue.num_tasks = static_cast<uint32_t>(tasks.size());
267 
268 		for (int i = 0; i < num_job_threads; i++) {
269 			threads_wb[i] = Thread::create(_digest_job_queue, &job_queue);
270 		}
271 		_digest_job_queue(&job_queue);
272 
273 		for (int i = 0; i < num_job_threads; i++) {
274 			Thread::wait_to_finish(threads_wb[i]);
275 			memdelete(threads_wb[i]);
276 		}
277 	}
278 
279 	p_image->create(p_image->get_width(), p_image->get_height(), p_image->has_mipmaps(), target_format, data);
280 }
281 
image_decompress_cvtt(Image * p_image)282 void image_decompress_cvtt(Image *p_image) {
283 
284 	Image::Format target_format;
285 	bool is_signed = false;
286 	bool is_hdr = false;
287 
288 	Image::Format input_format = p_image->get_format();
289 
290 	switch (input_format) {
291 		case Image::FORMAT_BPTC_RGBA:
292 			target_format = Image::FORMAT_RGBA8;
293 			break;
294 		case Image::FORMAT_BPTC_RGBF:
295 		case Image::FORMAT_BPTC_RGBFU:
296 			target_format = Image::FORMAT_RGBH;
297 			is_signed = (input_format == Image::FORMAT_BPTC_RGBF);
298 			is_hdr = true;
299 			break;
300 		default:
301 			return; // Invalid input format
302 	};
303 
304 	int w = p_image->get_width();
305 	int h = p_image->get_height();
306 
307 	PoolVector<uint8_t>::Read rb = p_image->get_data().read();
308 
309 	PoolVector<uint8_t> data;
310 	int target_size = Image::get_image_data_size(w, h, target_format, p_image->has_mipmaps());
311 	int mm_count = p_image->get_mipmap_count();
312 	data.resize(target_size);
313 
314 	PoolVector<uint8_t>::Write wb = data.write();
315 
316 	int bytes_per_pixel = is_hdr ? 6 : 4;
317 
318 	int dst_ofs = 0;
319 
320 	for (int i = 0; i <= mm_count; i++) {
321 
322 		int src_ofs = p_image->get_mipmap_offset(i);
323 
324 		const uint8_t *in_bytes = &rb[src_ofs];
325 		uint8_t *out_bytes = &wb[dst_ofs];
326 
327 		cvtt::PixelBlockU8 output_blocks_ldr[cvtt::NumParallelBlocks];
328 		cvtt::PixelBlockF16 output_blocks_hdr[cvtt::NumParallelBlocks];
329 
330 		for (int y_start = 0; y_start < h; y_start += 4) {
331 			int y_end = y_start + 4;
332 
333 			for (int x_start = 0; x_start < w; x_start += 4 * cvtt::NumParallelBlocks) {
334 				int x_end = x_start + 4 * cvtt::NumParallelBlocks;
335 
336 				uint8_t input_blocks[16 * cvtt::NumParallelBlocks];
337 				memset(input_blocks, 0, sizeof(input_blocks));
338 
339 				unsigned int num_real_blocks = ((w - x_start) + 3) / 4;
340 				if (num_real_blocks > cvtt::NumParallelBlocks) {
341 					num_real_blocks = cvtt::NumParallelBlocks;
342 				}
343 
344 				memcpy(input_blocks, in_bytes, 16 * num_real_blocks);
345 				in_bytes += 16 * num_real_blocks;
346 
347 				if (is_hdr) {
348 					if (is_signed) {
349 						cvtt::Kernels::DecodeBC6HS(output_blocks_hdr, input_blocks);
350 					} else {
351 						cvtt::Kernels::DecodeBC6HU(output_blocks_hdr, input_blocks);
352 					}
353 				} else {
354 					cvtt::Kernels::DecodeBC7(output_blocks_ldr, input_blocks);
355 				}
356 
357 				for (int y = y_start; y < y_end; y++) {
358 					int first_input_element = (y - y_start) * 4;
359 					uint8_t *row_start;
360 					if (y >= h) {
361 						row_start = out_bytes + (h - 1) * (w * bytes_per_pixel);
362 					} else {
363 						row_start = out_bytes + y * (w * bytes_per_pixel);
364 					}
365 
366 					for (int x = x_start; x < x_end; x++) {
367 						uint8_t *pixel_start;
368 						if (x >= w) {
369 							pixel_start = row_start + (w - 1) * bytes_per_pixel;
370 						} else {
371 							pixel_start = row_start + x * bytes_per_pixel;
372 						}
373 
374 						int block_index = (x - x_start) / 4;
375 						int block_element = (x - x_start) % 4 + first_input_element;
376 						if (is_hdr) {
377 							memcpy(pixel_start, output_blocks_hdr[block_index].m_pixels[block_element], bytes_per_pixel);
378 						} else {
379 							memcpy(pixel_start, output_blocks_ldr[block_index].m_pixels[block_element], bytes_per_pixel);
380 						}
381 					}
382 				}
383 			}
384 		}
385 
386 		dst_ofs += w * h * bytes_per_pixel;
387 		w >>= 1;
388 		h >>= 1;
389 	}
390 
391 	rb.release();
392 	wb.release();
393 
394 	p_image->create(p_image->get_width(), p_image->get_height(), p_image->has_mipmaps(), target_format, data);
395 }
396