1 //! Decoding of DXT (S3TC) compression
2 //!
3 //! DXT is an image format that supports lossy compression
4 //!
5 //! # Related Links
6 //! * <https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_texture_compression_s3tc.txt> - Description of the DXT compression OpenGL extensions.
7 //!
8 //! Note: this module only implements bare DXT encoding/decoding, it does not parse formats that can contain DXT files like .dds
9
10 use std::io::{self, Read, Seek, SeekFrom, Write};
11
12 use color::ColorType;
13 use image::{self, ImageDecoder, ImageDecoderExt, ImageError, ImageReadBuffer, ImageResult, Progress};
14
15 /// What version of DXT compression are we using?
16 /// Note that DXT2 and DXT4 are left away as they're
17 /// just DXT3 and DXT5 with premultiplied alpha
18 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
19 pub enum DXTVariant {
20 /// The DXT1 format. 48 bytes of RGB data in a 4x4 pixel square is
21 /// compressed into an 8 byte block of DXT1 data
22 DXT1,
23 /// The DXT3 format. 64 bytes of RGBA data in a 4x4 pixel square is
24 /// compressed into a 16 byte block of DXT3 data
25 DXT3,
26 /// The DXT5 format. 64 bytes of RGBA data in a 4x4 pixel square is
27 /// compressed into a 16 byte block of DXT5 data
28 DXT5,
29 }
30
31 impl DXTVariant {
32 /// Returns the amount of bytes of raw image data
33 /// that is encoded in a single DXTn block
decoded_bytes_per_block(self) -> usize34 fn decoded_bytes_per_block(self) -> usize {
35 match self {
36 DXTVariant::DXT1 => 48,
37 DXTVariant::DXT3 | DXTVariant::DXT5 => 64,
38 }
39 }
40
41 /// Returns the amount of bytes per block of encoded DXTn data
encoded_bytes_per_block(self) -> usize42 fn encoded_bytes_per_block(self) -> usize {
43 match self {
44 DXTVariant::DXT1 => 8,
45 DXTVariant::DXT3 | DXTVariant::DXT5 => 16,
46 }
47 }
48
49 /// Returns the colortype that is stored in this DXT variant
colortype(self) -> ColorType50 pub fn colortype(self) -> ColorType {
51 match self {
52 DXTVariant::DXT1 => ColorType::RGB(8),
53 DXTVariant::DXT3 | DXTVariant::DXT5 => ColorType::RGBA(8),
54 }
55 }
56 }
57
58 /// DXT decoder
59 pub struct DXTDecoder<R: Read> {
60 inner: R,
61 width_blocks: u32,
62 height_blocks: u32,
63 variant: DXTVariant,
64 row: u32,
65 }
66
67 impl<R: Read> DXTDecoder<R> {
68 /// Create a new DXT decoder that decodes from the stream ```r```.
69 /// As DXT is often stored as raw buffers with the width/height
70 /// somewhere else the width and height of the image need
71 /// to be passed in ```width``` and ```height```, as well as the
72 /// DXT variant in ```variant```.
73 /// width and height are required to be powers of 2 and at least 4.
74 /// otherwise an error will be returned
new( r: R, width: u32, height: u32, variant: DXTVariant, ) -> Result<DXTDecoder<R>, ImageError>75 pub fn new(
76 r: R,
77 width: u32,
78 height: u32,
79 variant: DXTVariant,
80 ) -> Result<DXTDecoder<R>, ImageError> {
81 if width % 4 != 0 || height % 4 != 0 {
82 return Err(ImageError::DimensionError);
83 }
84 let width_blocks = width / 4;
85 let height_blocks = height / 4;
86 Ok(DXTDecoder {
87 inner: r,
88 width_blocks,
89 height_blocks,
90 variant,
91 row: 0,
92 })
93 }
94
read_scanline(&mut self, buf: &mut [u8]) -> io::Result<usize>95 fn read_scanline(&mut self, buf: &mut [u8]) -> io::Result<usize> {
96 assert_eq!(buf.len() as u64, self.scanline_bytes());
97
98 let mut src =
99 vec![0u8; self.variant.encoded_bytes_per_block() * self.width_blocks as usize];
100 self.inner.read_exact(&mut src)?;
101 match self.variant {
102 DXTVariant::DXT1 => decode_dxt1_row(&src, buf),
103 DXTVariant::DXT3 => decode_dxt3_row(&src, buf),
104 DXTVariant::DXT5 => decode_dxt5_row(&src, buf),
105 }
106 self.row += 1;
107 Ok(buf.len())
108 }
109 }
110
111 // Note that, due to the way that DXT compression works, a scanline is considered to consist out of
112 // 4 lines of pixels.
113 impl<R: Read> ImageDecoder for DXTDecoder<R> {
114 type Reader = DXTReader<R>;
115
dimensions(&self) -> (u64, u64)116 fn dimensions(&self) -> (u64, u64) {
117 (self.width_blocks as u64 * 4, self.height_blocks as u64 * 4)
118 }
119
colortype(&self) -> ColorType120 fn colortype(&self) -> ColorType {
121 self.variant.colortype()
122 }
123
scanline_bytes(&self) -> u64124 fn scanline_bytes(&self) -> u64 {
125 self.variant.decoded_bytes_per_block() as u64 * self.width_blocks as u64
126 }
127
into_reader(self) -> ImageResult<Self::Reader>128 fn into_reader(self) -> ImageResult<Self::Reader> {
129 if self.total_bytes() > usize::max_value() as u64 {
130 return Err(ImageError::InsufficientMemory);
131 }
132
133 Ok(DXTReader {
134 buffer: ImageReadBuffer::new(self.scanline_bytes() as usize, self.total_bytes() as usize),
135 decoder: self,
136 })
137 }
138
read_image(mut self) -> ImageResult<Vec<u8>>139 fn read_image(mut self) -> ImageResult<Vec<u8>> {
140 if self.total_bytes() > usize::max_value() as u64 {
141 return Err(ImageError::InsufficientMemory);
142 }
143
144 let mut dest = vec![0u8; self.total_bytes() as usize];
145 for chunk in dest.chunks_mut(self.scanline_bytes() as usize) {
146 self.read_scanline(chunk)?;
147 }
148 Ok(dest)
149 }
150 }
151
152 impl<R: Read + Seek> ImageDecoderExt for DXTDecoder<R> {
read_rect_with_progress<F: Fn(Progress)>( &mut self, x: u64, y: u64, width: u64, height: u64, buf: &mut [u8], progress_callback: F, ) -> ImageResult<()>153 fn read_rect_with_progress<F: Fn(Progress)>(
154 &mut self,
155 x: u64,
156 y: u64,
157 width: u64,
158 height: u64,
159 buf: &mut [u8],
160 progress_callback: F,
161 ) -> ImageResult<()> {
162 let encoded_scanline_bytes = self.variant.encoded_bytes_per_block() as u64
163 * self.width_blocks as u64;
164
165 let start = self.inner.seek(SeekFrom::Current(0))?;
166 image::load_rect(x, y, width, height, buf, progress_callback, self,
167 |s, scanline| {
168 s.inner.seek(SeekFrom::Start(start + scanline * encoded_scanline_bytes))?;
169 Ok(())
170 },
171 |s, buf| s.read_scanline(buf))?;
172 self.inner.seek(SeekFrom::Start(start))?;
173 Ok(())
174 }
175 }
176
177 /// DXT reader
178 pub struct DXTReader<R: Read> {
179 buffer: ImageReadBuffer,
180 decoder: DXTDecoder<R>,
181 }
182 impl<R: Read> Read for DXTReader<R> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>183 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
184 let ref mut decoder = &mut self.decoder;
185 self.buffer.read(buf, |buf| decoder.read_scanline(buf))
186 }
187 }
188
189 /// DXT encoder
190 pub struct DXTEncoder<W: Write> {
191 w: W,
192 }
193
194 impl<W: Write> DXTEncoder<W> {
195 /// Create a new encoder that writes its output to ```w```
new(w: W) -> DXTEncoder<W>196 pub fn new(w: W) -> DXTEncoder<W> {
197 DXTEncoder { w }
198 }
199
200 /// Encodes the image data ```data```
201 /// that has dimensions ```width``` and ```height```
202 /// in ```DXTVariant``` ```variant```
203 /// data is assumed to be in variant.colortype()
encode( mut self, data: &[u8], width: u32, height: u32, variant: DXTVariant, ) -> ImageResult<()>204 pub fn encode(
205 mut self,
206 data: &[u8],
207 width: u32,
208 height: u32,
209 variant: DXTVariant,
210 ) -> ImageResult<()> {
211 if width % 4 != 0 || height % 4 != 0 {
212 return Err(ImageError::DimensionError);
213 }
214 let width_blocks = width / 4;
215 let height_blocks = height / 4;
216
217 let stride = variant.decoded_bytes_per_block();
218
219 assert!(data.len() >= width_blocks as usize * height_blocks as usize * stride);
220
221 for chunk in data.chunks(width_blocks as usize * stride) {
222 let data = match variant {
223 DXTVariant::DXT1 => encode_dxt1_row(chunk),
224 DXTVariant::DXT3 => encode_dxt3_row(chunk),
225 DXTVariant::DXT5 => encode_dxt5_row(chunk),
226 };
227 self.w.write_all(&data)?;
228 }
229 Ok(())
230 }
231 }
232
233 /**
234 * Actual encoding/decoding logic below.
235 */
236 use std::mem::swap;
237
238 type Rgb = [u8; 3];
239
240 /// decodes a 5-bit R, 6-bit G, 5-bit B 16-bit packed color value into 8-bit RGB
241 /// mapping is done so min/max range values are preserved. So for 5-bit
242 /// values 0x00 -> 0x00 and 0x1F -> 0xFF
enc565_decode(value: u16) -> Rgb243 fn enc565_decode(value: u16) -> Rgb {
244 let red = (value >> 11) & 0x1F;
245 let green = (value >> 5) & 0x3F;
246 let blue = (value) & 0x1F;
247 [
248 (red * 0xFF / 0x1F) as u8,
249 (green * 0xFF / 0x3F) as u8,
250 (blue * 0xFF / 0x1F) as u8,
251 ]
252 }
253
254 /// encodes an 8-bit RGB value into a 5-bit R, 6-bit G, 5-bit B 16-bit packed color value
255 /// mapping preserves min/max values. It is guaranteed that i == encode(decode(i)) for all i
enc565_encode(rgb: Rgb) -> u16256 fn enc565_encode(rgb: Rgb) -> u16 {
257 let red = (u16::from(rgb[0]) * 0x1F + 0x7E) / 0xFF;
258 let green = (u16::from(rgb[1]) * 0x3F + 0x7E) / 0xFF;
259 let blue = (u16::from(rgb[2]) * 0x1F + 0x7E) / 0xFF;
260 (red << 11) | (green << 5) | blue
261 }
262
263 /// utility function: squares a value
square(a: i32) -> i32264 fn square(a: i32) -> i32 {
265 a * a
266 }
267
268 /// returns the squared error between two RGB values
diff(a: Rgb, b: Rgb) -> i32269 fn diff(a: Rgb, b: Rgb) -> i32 {
270 square(i32::from(a[0]) - i32::from(b[0])) + square(i32::from(a[1]) - i32::from(b[1]))
271 + square(i32::from(a[2]) - i32::from(b[2]))
272 }
273
274 /*
275 * Functions for decoding DXT compression
276 */
277
278 /// Constructs the DXT5 alpha lookup table from the two alpha entries
279 /// if alpha0 > alpha1, constructs a table of [a0, a1, 6 linearly interpolated values from a0 to a1]
280 /// if alpha0 <= alpha1, constructs a table of [a0, a1, 4 linearly interpolated values from a0 to a1, 0, 0xFF]
alpha_table_dxt5(alpha0: u8, alpha1: u8) -> [u8; 8]281 fn alpha_table_dxt5(alpha0: u8, alpha1: u8) -> [u8; 8] {
282 let mut table = [alpha0, alpha1, 0, 0, 0, 0, 0, 0xFF];
283 if alpha0 > alpha1 {
284 for i in 2..8u16 {
285 table[i as usize] =
286 (((8 - i) * u16::from(alpha0) + (i - 1) * u16::from(alpha1)) / 7) as u8;
287 }
288 } else {
289 for i in 2..6u16 {
290 table[i as usize] =
291 (((6 - i) * u16::from(alpha0) + (i - 1) * u16::from(alpha1)) / 5) as u8;
292 }
293 }
294 table
295 }
296
297 /// decodes an 8-byte dxt color block into the RGB channels of a 16xRGB or 16xRGBA block.
298 /// source should have a length of 8, dest a length of 48 (RGB) or 64 (RGBA)
decode_dxt_colors(source: &[u8], dest: &mut [u8])299 fn decode_dxt_colors(source: &[u8], dest: &mut [u8]) {
300 // sanity checks, also enable the compiler to elide all following bound checks
301 assert!(source.len() == 8 && (dest.len() == 48 || dest.len() == 64));
302 // calculate pitch to store RGB values in dest (3 for RGB, 4 for RGBA)
303 let pitch = dest.len() / 16;
304
305 // extract color data
306 let color0 = u16::from(source[0]) | (u16::from(source[1]) << 8);
307 let color1 = u16::from(source[2]) | (u16::from(source[3]) << 8);
308 let color_table = u32::from(source[4]) | (u32::from(source[5]) << 8)
309 | (u32::from(source[6]) << 16) | (u32::from(source[7]) << 24);
310 // let color_table = source[4..8].iter().rev().fold(0, |t, &b| (t << 8) | b as u32);
311
312 // decode the colors to rgb format
313 let mut colors = [[0; 3]; 4];
314 colors[0] = enc565_decode(color0);
315 colors[1] = enc565_decode(color1);
316
317 // determine color interpolation method
318 if color0 > color1 {
319 // linearly interpolate the other two color table entries
320 for i in 0..3 {
321 colors[2][i] = ((u16::from(colors[0][i]) * 2 + u16::from(colors[1][i]) + 1) / 3) as u8;
322 colors[3][i] = ((u16::from(colors[0][i]) + u16::from(colors[1][i]) * 2 + 1) / 3) as u8;
323 }
324 } else {
325 // linearly interpolate one other entry, keep the other at 0
326 for i in 0..3 {
327 colors[2][i] = ((u16::from(colors[0][i]) + u16::from(colors[1][i]) + 1) / 2) as u8;
328 }
329 }
330
331 // serialize the result. Every color is determined by looking up
332 // two bits in color_table which identify which color to actually pick from the 4 possible colors
333 for i in 0..16 {
334 dest[i * pitch..i * pitch + 3]
335 .copy_from_slice(&colors[(color_table >> (i * 2)) as usize & 3]);
336 }
337 }
338
339 /// Decodes a 16-byte bock of dxt5 data to a 16xRGBA block
decode_dxt5_block(source: &[u8], dest: &mut [u8])340 fn decode_dxt5_block(source: &[u8], dest: &mut [u8]) {
341 assert!(source.len() == 16 && dest.len() == 64);
342
343 // extract alpha index table (stored as little endian 64-bit value)
344 let alpha_table = source[2..8]
345 .iter()
346 .rev()
347 .fold(0, |t, &b| (t << 8) | u64::from(b));
348
349 // alhpa level decode
350 let alphas = alpha_table_dxt5(source[0], source[1]);
351
352 // serialize alpha
353 for i in 0..16 {
354 dest[i * 4 + 3] = alphas[(alpha_table >> (i * 3)) as usize & 7];
355 }
356
357 // handle colors
358 decode_dxt_colors(&source[8..16], dest);
359 }
360
361 /// Decodes a 16-byte bock of dxt3 data to a 16xRGBA block
decode_dxt3_block(source: &[u8], dest: &mut [u8])362 fn decode_dxt3_block(source: &[u8], dest: &mut [u8]) {
363 assert!(source.len() == 16 && dest.len() == 64);
364
365 // extract alpha index table (stored as little endian 64-bit value)
366 let alpha_table = source[0..8]
367 .iter()
368 .rev()
369 .fold(0, |t, &b| (t << 8) | u64::from(b));
370
371 // serialize alpha (stored as 4-bit values)
372 for i in 0..16 {
373 dest[i * 4 + 3] = ((alpha_table >> (i * 4)) as u8 & 0xF) * 0x11;
374 }
375
376 // handle colors
377 decode_dxt_colors(&source[8..16], dest);
378 }
379
380 /// Decodes a 8-byte bock of dxt5 data to a 16xRGB block
decode_dxt1_block(source: &[u8], dest: &mut [u8])381 fn decode_dxt1_block(source: &[u8], dest: &mut [u8]) {
382 assert!(source.len() == 8 && dest.len() == 48);
383 decode_dxt_colors(&source, dest);
384 }
385
386 /// Decode a row of DXT1 data to four rows of RGBA data.
387 /// source.len() should be a multiple of 8, otherwise this panics.
decode_dxt1_row(source: &[u8], dest: &mut [u8])388 fn decode_dxt1_row(source: &[u8], dest: &mut [u8]) {
389 assert!(source.len() % 8 == 0);
390 let block_count = source.len() / 8;
391 assert!(dest.len() >= block_count * 48);
392
393 // contains the 16 decoded pixels per block
394 let mut decoded_block = [0u8; 48];
395
396 for (x, encoded_block) in source.chunks(8).enumerate() {
397 decode_dxt1_block(encoded_block, &mut decoded_block);
398
399 // copy the values from the decoded block to linewise RGB layout
400 for line in 0..4 {
401 let offset = (block_count * line + x) * 12;
402 dest[offset..offset + 12].copy_from_slice(&decoded_block[line * 12..(line + 1) * 12]);
403 }
404 }
405 }
406
407 /// Decode a row of DXT3 data to four rows of RGBA data.
408 /// source.len() should be a multiple of 16, otherwise this panics.
decode_dxt3_row(source: &[u8], dest: &mut [u8])409 fn decode_dxt3_row(source: &[u8], dest: &mut [u8]) {
410 assert!(source.len() % 16 == 0);
411 let block_count = source.len() / 16;
412 assert!(dest.len() >= block_count * 64);
413
414 // contains the 16 decoded pixels per block
415 let mut decoded_block = [0u8; 64];
416
417 for (x, encoded_block) in source.chunks(16).enumerate() {
418 decode_dxt3_block(encoded_block, &mut decoded_block);
419
420 // copy the values from the decoded block to linewise RGB layout
421 for line in 0..4 {
422 let offset = (block_count * line + x) * 16;
423 dest[offset..offset + 16].copy_from_slice(&decoded_block[line * 16..(line + 1) * 16]);
424 }
425 }
426 }
427
428 /// Decode a row of DXT5 data to four rows of RGBA data.
429 /// source.len() should be a multiple of 16, otherwise this panics.
decode_dxt5_row(source: &[u8], dest: &mut [u8])430 fn decode_dxt5_row(source: &[u8], dest: &mut [u8]) {
431 assert!(source.len() % 16 == 0);
432 let block_count = source.len() / 16;
433 assert!(dest.len() >= block_count * 64);
434
435 // contains the 16 decoded pixels per block
436 let mut decoded_block = [0u8; 64];
437
438 for (x, encoded_block) in source.chunks(16).enumerate() {
439 decode_dxt5_block(encoded_block, &mut decoded_block);
440
441 // copy the values from the decoded block to linewise RGB layout
442 for line in 0..4 {
443 let offset = (block_count * line + x) * 16;
444 dest[offset..offset + 16].copy_from_slice(&decoded_block[line * 16..(line + 1) * 16]);
445 }
446 }
447 }
448
449 /*
450 * Functions for encoding DXT compression
451 */
452
453 /// Tries to perform the color encoding part of dxt compression
454 /// the approach taken is simple, it picks unique combinations
455 /// of the colors present in the block, and attempts to encode the
456 /// block with each, picking the encoding that yields the least
457 /// squared error out of all of them.
458 ///
459 /// This could probably be faster but is already reasonably fast
460 /// and a good reference impl to optimize others against.
461 ///
462 /// Another way to perform this analysis would be to perform a
463 /// singular value decomposition of the different colors, and
464 /// then pick 2 points on this line as the base colors. But
465 /// this is still rather unwieldly math and has issues
466 /// with the 3-linear-colors-and-0 case, it's also worse
467 /// at conserving the original colors.
468 ///
469 /// source: should be RGBAx16 or RGBx16 bytes of data,
470 /// dest 8 bytes of resulting encoded color data
encode_dxt_colors(source: &[u8], dest: &mut [u8])471 fn encode_dxt_colors(source: &[u8], dest: &mut [u8]) {
472 // sanity checks and determine stride when parsing the source data
473 assert!((source.len() == 64 || source.len() == 48) && dest.len() == 8);
474 let stride = source.len() / 16;
475
476 // reference colors array
477 let mut colors = [[0u8; 3]; 4];
478
479 // Put the colors we're going to be processing in an array with pure RGB layout
480 // note: we reverse the pixel order here. The reason for this is found in the inner quantization loop.
481 let mut targets = [[0u8; 3]; 16];
482 for (s, d) in source.chunks(stride).rev().zip(&mut targets) {
483 *d = [s[0], s[1], s[2]];
484 }
485
486 // and a set of colors to pick from.
487 let mut colorspace = targets.to_vec();
488
489 // roundtrip all colors through the r5g6b5 encoding
490 for rgb in &mut colorspace {
491 *rgb = enc565_decode(enc565_encode(*rgb));
492 }
493
494 // and deduplicate the set of colors to choose from as the algorithm is O(N^2) in this
495 colorspace.dedup();
496
497 // in case of slight gradients it can happen that there's only one entry left in the color table.
498 // as the resulting banding can be quite bad if we would just left the block at the closest
499 // encodable color, we have a special path here that tries to emulate the wanted color
500 // using the linear interpolation between gradients
501 if colorspace.len() == 1 {
502 // the base color we got from colorspace reduction
503 let ref_rgb = colorspace[0];
504 // the unreduced color in this block that's the furthest away from the actual block
505 let mut rgb = targets
506 .iter()
507 .cloned()
508 .max_by_key(|rgb| diff(*rgb, ref_rgb))
509 .unwrap();
510 // amplify differences by 2.5, which should push them to the next quantized value
511 // if possible without overshoot
512 for i in 0..3 {
513 rgb[i] =
514 ((i16::from(rgb[i]) - i16::from(ref_rgb[i])) * 5 / 2 + i16::from(ref_rgb[i])) as u8;
515 }
516
517 // roundtrip it through quantization
518 let encoded = enc565_encode(rgb);
519 let rgb = enc565_decode(encoded);
520
521 // in case this didn't land us a different color the best way to represent this field is
522 // as a single color block
523 if rgb == ref_rgb {
524 dest[0] = encoded as u8;
525 dest[1] = (encoded >> 8) as u8;
526
527 for d in dest.iter_mut().take(8).skip(2) {
528 *d = 0;
529 }
530 return;
531 }
532
533 // we did find a separate value: add it to the options so after one round of quantization
534 // we're done
535 colorspace.push(rgb);
536 }
537
538 // block quantization loop: we basically just try every possible combination, returning
539 // the combination with the least squared error
540 // stores the best candidate colors
541 let mut chosen_colors = [[0; 3]; 4];
542 // did this index table use the [0,0,0] variant
543 let mut chosen_use_0 = false;
544 // error calculated for the last entry
545 let mut chosen_error = 0xFFFF_FFFFu32;
546
547 // loop through unique permutations of the colorspace, where c1 != c2
548 'search: for (i, &c1) in colorspace.iter().enumerate() {
549 colors[0] = c1;
550
551 for &c2 in &colorspace[0..i] {
552 colors[1] = c2;
553
554 // what's inside here is ran at most 120 times.
555 for use_0 in 0..2 {
556 // and 240 times here.
557
558 if use_0 != 0 {
559 // interpolate one color, set the other to 0
560 for i in 0..3 {
561 colors[2][i] =
562 ((u16::from(colors[0][i]) + u16::from(colors[1][i]) + 1) / 2) as u8;
563 }
564 colors[3] = [0, 0, 0];
565 } else {
566 // interpolate to get 2 more colors
567 for i in 0..3 {
568 colors[2][i] =
569 ((u16::from(colors[0][i]) * 2 + u16::from(colors[1][i]) + 1) / 3) as u8;
570 colors[3][i] =
571 ((u16::from(colors[0][i]) + u16::from(colors[1][i]) * 2 + 1) / 3) as u8;
572 }
573 }
574
575 // calculate the total error if we were to quantize the block with these color combinations
576 // both these loops have statically known iteration counts and are well vectorizable
577 // note that the inside of this can be run about 15360 times worst case, i.e. 960 times per
578 // pixel.
579 let total_error = targets
580 .iter()
581 .map(|t| colors.iter().map(|c| diff(*c, *t) as u32).min().unwrap())
582 .sum();
583
584 // update the match if we found a better one
585 if total_error < chosen_error {
586 chosen_colors = colors;
587 chosen_use_0 = use_0 != 0;
588 chosen_error = total_error;
589
590 // if we've got a perfect or at most 1 LSB off match, we're done
591 if total_error < 4 {
592 break 'search;
593 }
594 }
595 }
596 }
597 }
598
599 // calculate the final indices
600 // note that targets is already in reverse pixel order, to make the index computation easy.
601 let mut chosen_indices = 0u32;
602 for t in &targets {
603 let (idx, _) = chosen_colors
604 .iter()
605 .enumerate()
606 .min_by_key(|&(_, c)| diff(*c, *t))
607 .unwrap();
608 chosen_indices = (chosen_indices << 2) | idx as u32;
609 }
610
611 // encode the colors
612 let mut color0 = enc565_encode(chosen_colors[0]);
613 let mut color1 = enc565_encode(chosen_colors[1]);
614
615 // determine encoding. Note that color0 == color1 is impossible at this point
616 if color0 > color1 {
617 if chosen_use_0 {
618 swap(&mut color0, &mut color1);
619 // Indexes are packed 2 bits wide, swap index 0/1 but preserve 2/3.
620 let filter = (chosen_indices & 0xAAAA_AAAA) >> 1;
621 chosen_indices ^= filter ^ 0x5555_5555;
622 }
623 } else if !chosen_use_0 {
624 swap(&mut color0, &mut color1);
625 // Indexes are packed 2 bits wide, swap index 0/1 and 2/3.
626 chosen_indices ^= 0x5555_5555;
627 }
628
629 // encode everything.
630 dest[0] = color0 as u8;
631 dest[1] = (color0 >> 8) as u8;
632 dest[2] = color1 as u8;
633 dest[3] = (color1 >> 8) as u8;
634 for i in 0..4 {
635 dest[i + 4] = (chosen_indices >> (i * 8)) as u8;
636 }
637 }
638
639 /// Encodes a buffer of 16 alpha bytes into a dxt5 alpha index table,
640 /// where the alpha table they are indexed against is created by
641 /// calling alpha_table_dxt5(alpha0, alpha1)
642 /// returns the resulting error and alpha table
encode_dxt5_alpha(alpha0: u8, alpha1: u8, alphas: &[u8; 16]) -> (i32, u64)643 fn encode_dxt5_alpha(alpha0: u8, alpha1: u8, alphas: &[u8; 16]) -> (i32, u64) {
644 // create a table for the given alpha ranges
645 let table = alpha_table_dxt5(alpha0, alpha1);
646 let mut indices = 0u64;
647 let mut total_error = 0i32;
648
649 // least error brute force search
650 for (i, &a) in alphas.iter().enumerate() {
651 let (index, error) = table
652 .iter()
653 .enumerate()
654 .map(|(i, &e)| (i, square(i32::from(e) - i32::from(a))))
655 .min_by_key(|&(_, e)| e)
656 .unwrap();
657 total_error += error;
658 indices |= (index as u64) << (i * 3);
659 }
660
661 (total_error, indices)
662 }
663
664 /// Encodes a RGBAx16 sequence of bytes to a 16 bytes DXT5 block
encode_dxt5_block(source: &[u8], dest: &mut [u8])665 fn encode_dxt5_block(source: &[u8], dest: &mut [u8]) {
666 assert!(source.len() == 64 && dest.len() == 16);
667
668 // perform dxt color encoding
669 encode_dxt_colors(source, &mut dest[8..16]);
670
671 // copy out the alpha bytes
672 let mut alphas = [0; 16];
673 for i in 0..16 {
674 alphas[i] = source[i * 4 + 3];
675 }
676
677 // try both alpha compression methods, see which has the least error.
678 let alpha07 = alphas.iter().cloned().min().unwrap();
679 let alpha17 = alphas.iter().cloned().max().unwrap();
680 let (error7, indices7) = encode_dxt5_alpha(alpha07, alpha17, &alphas);
681
682 // if all alphas are 0 or 255 it doesn't particularly matter what we do here.
683 let alpha05 = alphas
684 .iter()
685 .cloned()
686 .filter(|&i| i != 255)
687 .max()
688 .unwrap_or(255);
689 let alpha15 = alphas
690 .iter()
691 .cloned()
692 .filter(|&i| i != 0)
693 .min()
694 .unwrap_or(0);
695 let (error5, indices5) = encode_dxt5_alpha(alpha05, alpha15, &alphas);
696
697 // pick the best one, encode the min/max values
698 let mut alpha_table = if error5 < error7 {
699 dest[0] = alpha05;
700 dest[1] = alpha15;
701 indices5
702 } else {
703 dest[0] = alpha07;
704 dest[1] = alpha17;
705 indices7
706 };
707
708 // encode the alphas
709 for byte in dest[2..8].iter_mut() {
710 *byte = alpha_table as u8;
711 alpha_table >>= 8;
712 }
713 }
714
715 /// Encodes a RGBAx16 sequence of bytes into a 16 bytes DXT3 block
encode_dxt3_block(source: &[u8], dest: &mut [u8])716 fn encode_dxt3_block(source: &[u8], dest: &mut [u8]) {
717 assert!(source.len() == 64 && dest.len() == 16);
718
719 // perform dxt color encoding
720 encode_dxt_colors(source, &mut dest[8..16]);
721
722 // DXT3 alpha compression is very simple, just round towards the nearest value
723
724 // index the alpha values into the 64bit alpha table
725 let mut alpha_table = 0u64;
726 for i in 0..16 {
727 let alpha = u64::from(source[i * 4 + 3]);
728 let alpha = (alpha + 0x8) / 0x11;
729 alpha_table |= alpha << (i * 4);
730 }
731
732 // encode the alpha values
733 for byte in &mut dest[0..8] {
734 *byte = alpha_table as u8;
735 alpha_table >>= 8;
736 }
737 }
738
739 /// Encodes a RGBx16 sequence of bytes into a 8 bytes DXT1 block
encode_dxt1_block(source: &[u8], dest: &mut [u8])740 fn encode_dxt1_block(source: &[u8], dest: &mut [u8]) {
741 assert!(source.len() == 48 && dest.len() == 8);
742
743 // perform dxt color encoding
744 encode_dxt_colors(source, dest);
745 }
746
747 /// Decode a row of DXT1 data to four rows of RGBA data.
748 /// source.len() should be a multiple of 8, otherwise this panics.
encode_dxt1_row(source: &[u8]) -> Vec<u8>749 fn encode_dxt1_row(source: &[u8]) -> Vec<u8> {
750 assert!(source.len() % 48 == 0);
751 let block_count = source.len() / 48;
752
753 let mut dest = vec![0u8; block_count * 8];
754 // contains the 16 decoded pixels per block
755 let mut decoded_block = [0u8; 48];
756
757 for (x, encoded_block) in dest.chunks_mut(8).enumerate() {
758 // copy the values from the decoded block to linewise RGB layout
759 for line in 0..4 {
760 let offset = (block_count * line + x) * 12;
761 decoded_block[line * 12..(line + 1) * 12].copy_from_slice(&source[offset..offset + 12]);
762 }
763
764 encode_dxt1_block(&decoded_block, encoded_block);
765 }
766 dest
767 }
768
769 /// Decode a row of DXT3 data to four rows of RGBA data.
770 /// source.len() should be a multiple of 16, otherwise this panics.
encode_dxt3_row(source: &[u8]) -> Vec<u8>771 fn encode_dxt3_row(source: &[u8]) -> Vec<u8> {
772 assert!(source.len() % 64 == 0);
773 let block_count = source.len() / 64;
774
775 let mut dest = vec![0u8; block_count * 16];
776 // contains the 16 decoded pixels per block
777 let mut decoded_block = [0u8; 64];
778
779 for (x, encoded_block) in dest.chunks_mut(16).enumerate() {
780 // copy the values from the decoded block to linewise RGB layout
781 for line in 0..4 {
782 let offset = (block_count * line + x) * 16;
783 decoded_block[line * 16..(line + 1) * 16].copy_from_slice(&source[offset..offset + 16]);
784 }
785
786 encode_dxt3_block(&decoded_block, encoded_block);
787 }
788 dest
789 }
790
791 /// Decode a row of DXT5 data to four rows of RGBA data.
792 /// source.len() should be a multiple of 16, otherwise this panics.
encode_dxt5_row(source: &[u8]) -> Vec<u8>793 fn encode_dxt5_row(source: &[u8]) -> Vec<u8> {
794 assert!(source.len() % 64 == 0);
795 let block_count = source.len() / 64;
796
797 let mut dest = vec![0u8; block_count * 16];
798 // contains the 16 decoded pixels per block
799 let mut decoded_block = [0u8; 64];
800
801 for (x, encoded_block) in dest.chunks_mut(16).enumerate() {
802 // copy the values from the decoded block to linewise RGB layout
803 for line in 0..4 {
804 let offset = (block_count * line + x) * 16;
805 decoded_block[line * 16..(line + 1) * 16].copy_from_slice(&source[offset..offset + 16]);
806 }
807
808 encode_dxt5_block(&decoded_block, encoded_block);
809 }
810 dest
811 }
812