1 //!  Decoding of DXT (S3TC) compression
2 //!
3 //!  DXT is an image format that supports lossy compression
4 //!
5 //!  # Related Links
6 //!  * <https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_texture_compression_s3tc.txt> - Description of the DXT compression OpenGL extensions.
7 //!
8 //!  Note: this module only implements bare DXT encoding/decoding, it does not parse formats that can contain DXT files like .dds
9 
10 use std::convert::TryFrom;
11 use std::io::{self, Read, Seek, SeekFrom, Write};
12 
13 use crate::color::ColorType;
14 use crate::error::{ImageError, ImageResult, ParameterError, ParameterErrorKind};
15 use crate::image::{self, ImageDecoder, ImageDecoderExt, ImageReadBuffer, Progress};
16 
17 /// What version of DXT compression are we using?
18 /// Note that DXT2 and DXT4 are left away as they're
19 /// just DXT3 and DXT5 with premultiplied alpha
20 ///
21 /// DEPRECATED: The name of this enum will be changed to [`DxtVariant`].
22 ///
23 /// TODO: rename to [`DxtVariant`]
24 ///
25 /// [`DxtVariant`]: type.DxtVariant.html
26 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
27 pub enum DXTVariant {
28     /// The DXT1 format. 48 bytes of RGB data in a 4x4 pixel square is
29     /// compressed into an 8 byte block of DXT1 data
30     DXT1,
31     /// The DXT3 format. 64 bytes of RGBA data in a 4x4 pixel square is
32     /// compressed into a 16 byte block of DXT3 data
33     DXT3,
34     /// The DXT5 format. 64 bytes of RGBA data in a 4x4 pixel square is
35     /// compressed into a 16 byte block of DXT5 data
36     DXT5,
37 }
38 
39 /// DXT compression version.
40 ///
41 /// An alias of [`DXTVariant`].
42 ///
43 /// TODO: remove when [`DXTVariant`] is renamed.
44 ///
45 /// [`DXTVariant`]: enum.DXTVariant.html
46 #[allow(dead_code)]
47 pub type DxtVariant = DXTVariant;
48 
49 impl DXTVariant {
50     /// Returns the amount of bytes of raw image data
51     /// that is encoded in a single DXTn block
decoded_bytes_per_block(self) -> usize52     fn decoded_bytes_per_block(self) -> usize {
53         match self {
54             DXTVariant::DXT1 => 48,
55             DXTVariant::DXT3 | DXTVariant::DXT5 => 64,
56         }
57     }
58 
59     /// Returns the amount of bytes per block of encoded DXTn data
encoded_bytes_per_block(self) -> usize60     fn encoded_bytes_per_block(self) -> usize {
61         match self {
62             DXTVariant::DXT1 => 8,
63             DXTVariant::DXT3 | DXTVariant::DXT5 => 16,
64         }
65     }
66 
67     /// Returns the color type that is stored in this DXT variant
color_type(self) -> ColorType68     pub fn color_type(self) -> ColorType {
69         match self {
70             DXTVariant::DXT1 => ColorType::Rgb8,
71             DXTVariant::DXT3 | DXTVariant::DXT5 => ColorType::Rgba8,
72         }
73     }
74 }
75 
76 /// DXT decoder
77 pub struct DxtDecoder<R: Read> {
78     inner: R,
79     width_blocks: u32,
80     height_blocks: u32,
81     variant: DXTVariant,
82     row: u32,
83 }
84 
85 impl<R: Read> DxtDecoder<R> {
86     /// Create a new DXT decoder that decodes from the stream ```r```.
87     /// As DXT is often stored as raw buffers with the width/height
88     /// somewhere else the width and height of the image need
89     /// to be passed in ```width``` and ```height```, as well as the
90     /// DXT variant in ```variant```.
91     /// width and height are required to be powers of 2 and at least 4.
92     /// otherwise an error will be returned
new( r: R, width: u32, height: u32, variant: DXTVariant, ) -> Result<DxtDecoder<R>, ImageError>93     pub fn new(
94         r: R,
95         width: u32,
96         height: u32,
97         variant: DXTVariant,
98     ) -> Result<DxtDecoder<R>, ImageError> {
99         if width % 4 != 0 || height % 4 != 0 {
100             // TODO: this is actually a bit of a weird case. We could return `DecodingError` but
101             // it's not really the format that is wrong However, the encoder should surely return
102             // `EncodingError` so it would be the logical choice for symmetry.
103             return Err(ImageError::Parameter(ParameterError::from_kind(
104                 ParameterErrorKind::DimensionMismatch,
105             )));
106         }
107         let width_blocks = width / 4;
108         let height_blocks = height / 4;
109         Ok(DxtDecoder {
110             inner: r,
111             width_blocks,
112             height_blocks,
113             variant,
114             row: 0,
115         })
116     }
117 
read_scanline(&mut self, buf: &mut [u8]) -> io::Result<usize>118     fn read_scanline(&mut self, buf: &mut [u8]) -> io::Result<usize> {
119         assert_eq!(u64::try_from(buf.len()), Ok(self.scanline_bytes()));
120 
121         let mut src =
122             vec![0u8; self.variant.encoded_bytes_per_block() * self.width_blocks as usize];
123         self.inner.read_exact(&mut src)?;
124         match self.variant {
125             DXTVariant::DXT1 => decode_dxt1_row(&src, buf),
126             DXTVariant::DXT3 => decode_dxt3_row(&src, buf),
127             DXTVariant::DXT5 => decode_dxt5_row(&src, buf),
128         }
129         self.row += 1;
130         Ok(buf.len())
131     }
132 }
133 
134 // Note that, due to the way that DXT compression works, a scanline is considered to consist out of
135 // 4 lines of pixels.
136 impl<'a, R: 'a + Read> ImageDecoder<'a> for DxtDecoder<R> {
137     type Reader = DxtReader<R>;
138 
dimensions(&self) -> (u32, u32)139     fn dimensions(&self) -> (u32, u32) {
140         (self.width_blocks * 4, self.height_blocks * 4)
141     }
142 
color_type(&self) -> ColorType143     fn color_type(&self) -> ColorType {
144         self.variant.color_type()
145     }
146 
scanline_bytes(&self) -> u64147     fn scanline_bytes(&self) -> u64 {
148         self.variant.decoded_bytes_per_block() as u64 * u64::from(self.width_blocks)
149     }
150 
into_reader(self) -> ImageResult<Self::Reader>151     fn into_reader(self) -> ImageResult<Self::Reader> {
152         Ok(DxtReader {
153             buffer: ImageReadBuffer::new(self.scanline_bytes(), self.total_bytes()),
154             decoder: self,
155         })
156     }
157 
read_image(mut self, buf: &mut [u8]) -> ImageResult<()>158     fn read_image(mut self, buf: &mut [u8]) -> ImageResult<()> {
159         assert_eq!(u64::try_from(buf.len()), Ok(self.total_bytes()));
160 
161         for chunk in buf.chunks_mut(self.scanline_bytes() as usize) {
162             self.read_scanline(chunk)?;
163         }
164         Ok(())
165     }
166 }
167 
168 impl<'a, R: 'a + Read + Seek> ImageDecoderExt<'a> for DxtDecoder<R> {
read_rect_with_progress<F: Fn(Progress)>( &mut self, x: u32, y: u32, width: u32, height: u32, buf: &mut [u8], progress_callback: F, ) -> ImageResult<()>169     fn read_rect_with_progress<F: Fn(Progress)>(
170         &mut self,
171         x: u32,
172         y: u32,
173         width: u32,
174         height: u32,
175         buf: &mut [u8],
176         progress_callback: F,
177     ) -> ImageResult<()> {
178         let encoded_scanline_bytes = self.variant.encoded_bytes_per_block() as u64
179             * u64::from(self.width_blocks);
180 
181         let start = self.inner.seek(SeekFrom::Current(0))?;
182         image::load_rect(x, y, width, height, buf, progress_callback, self,
183                          |s, scanline| {
184                              s.inner.seek(SeekFrom::Start(start + scanline * encoded_scanline_bytes))?;
185                              Ok(())
186                          },
187                          |s, buf| s.read_scanline(buf).map(|_| ()))?;
188         self.inner.seek(SeekFrom::Start(start))?;
189         Ok(())
190     }
191 }
192 
193 /// DXT reader
194 pub struct DxtReader<R: Read> {
195     buffer: ImageReadBuffer,
196     decoder: DxtDecoder<R>,
197 }
198 
199 /// DXT reader
200 ///
201 /// An alias of [`DxtReader`].
202 ///
203 /// TODO: remove
204 ///
205 /// [`DxtReader`]: struct.DxtReader.html
206 #[allow(dead_code)]
207 #[deprecated(note = "Use `DxtReader` instead")]
208 pub type DXTReader<R> = DxtReader<R>;
209 
210 impl<R: Read> Read for DxtReader<R> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>211     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
212         let decoder = &mut self.decoder;
213         self.buffer.read(buf, |buf| decoder.read_scanline(buf))
214     }
215 }
216 
217 /// DXT encoder
218 pub struct DxtEncoder<W: Write> {
219     w: W,
220 }
221 
222 /// DXT encoder
223 ///
224 /// An alias of [`DxtEncoder`].
225 ///
226 /// TODO: remove
227 ///
228 /// [`DxtEncoder`]: struct.DxtEncoder.html
229 #[allow(dead_code)]
230 #[deprecated(note = "Use `DxtEncoder` instead")]
231 pub type DXTEncoder<W> = DxtEncoder<W>;
232 
233 impl<W: Write> DxtEncoder<W> {
234     /// Create a new encoder that writes its output to ```w```
new(w: W) -> DxtEncoder<W>235     pub fn new(w: W) -> DxtEncoder<W> {
236         DxtEncoder { w }
237     }
238 
239     /// Encodes the image data ```data```
240     /// that has dimensions ```width``` and ```height```
241     /// in ```DXTVariant``` ```variant```
242     /// data is assumed to be in variant.color_type()
encode( mut self, data: &[u8], width: u32, height: u32, variant: DXTVariant, ) -> ImageResult<()>243     pub fn encode(
244         mut self,
245         data: &[u8],
246         width: u32,
247         height: u32,
248         variant: DXTVariant,
249     ) -> ImageResult<()> {
250         if width % 4 != 0 || height % 4 != 0 {
251             // TODO: this is not very idiomatic yet. Should return an EncodingError.
252             return Err(ImageError::Parameter(ParameterError::from_kind(
253                 ParameterErrorKind::DimensionMismatch,
254             )));
255         }
256         let width_blocks = width / 4;
257         let height_blocks = height / 4;
258 
259         let stride = variant.decoded_bytes_per_block();
260 
261         assert!(data.len() >= width_blocks as usize * height_blocks as usize * stride);
262 
263         for chunk in data.chunks(width_blocks as usize * stride) {
264             let data = match variant {
265                 DXTVariant::DXT1 => encode_dxt1_row(chunk),
266                 DXTVariant::DXT3 => encode_dxt3_row(chunk),
267                 DXTVariant::DXT5 => encode_dxt5_row(chunk),
268             };
269             self.w.write_all(&data)?;
270         }
271         Ok(())
272     }
273 }
274 
275 /**
276  * Actual encoding/decoding logic below.
277  */
278 use std::mem::swap;
279 
280 type Rgb = [u8; 3];
281 
282 /// decodes a 5-bit R, 6-bit G, 5-bit B 16-bit packed color value into 8-bit RGB
283 /// mapping is done so min/max range values are preserved. So for 5-bit
284 /// values 0x00 -> 0x00 and 0x1F -> 0xFF
enc565_decode(value: u16) -> Rgb285 fn enc565_decode(value: u16) -> Rgb {
286     let red = (value >> 11) & 0x1F;
287     let green = (value >> 5) & 0x3F;
288     let blue = (value) & 0x1F;
289     [
290         (red * 0xFF / 0x1F) as u8,
291         (green * 0xFF / 0x3F) as u8,
292         (blue * 0xFF / 0x1F) as u8,
293     ]
294 }
295 
296 /// encodes an 8-bit RGB value into a 5-bit R, 6-bit G, 5-bit B 16-bit packed color value
297 /// mapping preserves min/max values. It is guaranteed that i == encode(decode(i)) for all i
enc565_encode(rgb: Rgb) -> u16298 fn enc565_encode(rgb: Rgb) -> u16 {
299     let red = (u16::from(rgb[0]) * 0x1F + 0x7E) / 0xFF;
300     let green = (u16::from(rgb[1]) * 0x3F + 0x7E) / 0xFF;
301     let blue = (u16::from(rgb[2]) * 0x1F + 0x7E) / 0xFF;
302     (red << 11) | (green << 5) | blue
303 }
304 
305 /// utility function: squares a value
square(a: i32) -> i32306 fn square(a: i32) -> i32 {
307     a * a
308 }
309 
310 /// returns the squared error between two RGB values
diff(a: Rgb, b: Rgb) -> i32311 fn diff(a: Rgb, b: Rgb) -> i32 {
312     square(i32::from(a[0]) - i32::from(b[0])) + square(i32::from(a[1]) - i32::from(b[1]))
313         + square(i32::from(a[2]) - i32::from(b[2]))
314 }
315 
316 /*
317  * Functions for decoding DXT compression
318  */
319 
320 /// Constructs the DXT5 alpha lookup table from the two alpha entries
321 /// if alpha0 > alpha1, constructs a table of [a0, a1, 6 linearly interpolated values from a0 to a1]
322 /// if alpha0 <= alpha1, constructs a table of [a0, a1, 4 linearly interpolated values from a0 to a1, 0, 0xFF]
alpha_table_dxt5(alpha0: u8, alpha1: u8) -> [u8; 8]323 fn alpha_table_dxt5(alpha0: u8, alpha1: u8) -> [u8; 8] {
324     let mut table = [alpha0, alpha1, 0, 0, 0, 0, 0, 0xFF];
325     if alpha0 > alpha1 {
326         for i in 2..8u16 {
327             table[i as usize] =
328                 (((8 - i) * u16::from(alpha0) + (i - 1) * u16::from(alpha1)) / 7) as u8;
329         }
330     } else {
331         for i in 2..6u16 {
332             table[i as usize] =
333                 (((6 - i) * u16::from(alpha0) + (i - 1) * u16::from(alpha1)) / 5) as u8;
334         }
335     }
336     table
337 }
338 
339 /// decodes an 8-byte dxt color block into the RGB channels of a 16xRGB or 16xRGBA block.
340 /// source should have a length of 8, dest a length of 48 (RGB) or 64 (RGBA)
decode_dxt_colors(source: &[u8], dest: &mut [u8])341 fn decode_dxt_colors(source: &[u8], dest: &mut [u8]) {
342     // sanity checks, also enable the compiler to elide all following bound checks
343     assert!(source.len() == 8 && (dest.len() == 48 || dest.len() == 64));
344     // calculate pitch to store RGB values in dest (3 for RGB, 4 for RGBA)
345     let pitch = dest.len() / 16;
346 
347     // extract color data
348     let color0 = u16::from(source[0]) | (u16::from(source[1]) << 8);
349     let color1 = u16::from(source[2]) | (u16::from(source[3]) << 8);
350     let color_table = u32::from(source[4]) | (u32::from(source[5]) << 8)
351         | (u32::from(source[6]) << 16) | (u32::from(source[7]) << 24);
352     // let color_table = source[4..8].iter().rev().fold(0, |t, &b| (t << 8) | b as u32);
353 
354     // decode the colors to rgb format
355     let mut colors = [[0; 3]; 4];
356     colors[0] = enc565_decode(color0);
357     colors[1] = enc565_decode(color1);
358 
359     // determine color interpolation method
360     if color0 > color1 {
361         // linearly interpolate the other two color table entries
362         for i in 0..3 {
363             colors[2][i] = ((u16::from(colors[0][i]) * 2 + u16::from(colors[1][i]) + 1) / 3) as u8;
364             colors[3][i] = ((u16::from(colors[0][i]) + u16::from(colors[1][i]) * 2 + 1) / 3) as u8;
365         }
366     } else {
367         // linearly interpolate one other entry, keep the other at 0
368         for i in 0..3 {
369             colors[2][i] = ((u16::from(colors[0][i]) + u16::from(colors[1][i]) + 1) / 2) as u8;
370         }
371     }
372 
373     // serialize the result. Every color is determined by looking up
374     // two bits in color_table which identify which color to actually pick from the 4 possible colors
375     for i in 0..16 {
376         dest[i * pitch..i * pitch + 3]
377             .copy_from_slice(&colors[(color_table >> (i * 2)) as usize & 3]);
378     }
379 }
380 
381 /// Decodes a 16-byte bock of dxt5 data to a 16xRGBA block
decode_dxt5_block(source: &[u8], dest: &mut [u8])382 fn decode_dxt5_block(source: &[u8], dest: &mut [u8]) {
383     assert!(source.len() == 16 && dest.len() == 64);
384 
385     // extract alpha index table (stored as little endian 64-bit value)
386     let alpha_table = source[2..8]
387         .iter()
388         .rev()
389         .fold(0, |t, &b| (t << 8) | u64::from(b));
390 
391     // alhpa level decode
392     let alphas = alpha_table_dxt5(source[0], source[1]);
393 
394     // serialize alpha
395     for i in 0..16 {
396         dest[i * 4 + 3] = alphas[(alpha_table >> (i * 3)) as usize & 7];
397     }
398 
399     // handle colors
400     decode_dxt_colors(&source[8..16], dest);
401 }
402 
403 /// Decodes a 16-byte bock of dxt3 data to a 16xRGBA block
decode_dxt3_block(source: &[u8], dest: &mut [u8])404 fn decode_dxt3_block(source: &[u8], dest: &mut [u8]) {
405     assert!(source.len() == 16 && dest.len() == 64);
406 
407     // extract alpha index table (stored as little endian 64-bit value)
408     let alpha_table = source[0..8]
409         .iter()
410         .rev()
411         .fold(0, |t, &b| (t << 8) | u64::from(b));
412 
413     // serialize alpha (stored as 4-bit values)
414     for i in 0..16 {
415         dest[i * 4 + 3] = ((alpha_table >> (i * 4)) as u8 & 0xF) * 0x11;
416     }
417 
418     // handle colors
419     decode_dxt_colors(&source[8..16], dest);
420 }
421 
422 /// Decodes a 8-byte bock of dxt5 data to a 16xRGB block
decode_dxt1_block(source: &[u8], dest: &mut [u8])423 fn decode_dxt1_block(source: &[u8], dest: &mut [u8]) {
424     assert!(source.len() == 8 && dest.len() == 48);
425     decode_dxt_colors(&source, dest);
426 }
427 
428 /// Decode a row of DXT1 data to four rows of RGBA data.
429 /// source.len() should be a multiple of 8, otherwise this panics.
decode_dxt1_row(source: &[u8], dest: &mut [u8])430 fn decode_dxt1_row(source: &[u8], dest: &mut [u8]) {
431     assert!(source.len() % 8 == 0);
432     let block_count = source.len() / 8;
433     assert!(dest.len() >= block_count * 48);
434 
435     // contains the 16 decoded pixels per block
436     let mut decoded_block = [0u8; 48];
437 
438     for (x, encoded_block) in source.chunks(8).enumerate() {
439         decode_dxt1_block(encoded_block, &mut decoded_block);
440 
441         // copy the values from the decoded block to linewise RGB layout
442         for line in 0..4 {
443             let offset = (block_count * line + x) * 12;
444             dest[offset..offset + 12].copy_from_slice(&decoded_block[line * 12..(line + 1) * 12]);
445         }
446     }
447 }
448 
449 /// Decode a row of DXT3 data to four rows of RGBA data.
450 /// source.len() should be a multiple of 16, otherwise this panics.
decode_dxt3_row(source: &[u8], dest: &mut [u8])451 fn decode_dxt3_row(source: &[u8], dest: &mut [u8]) {
452     assert!(source.len() % 16 == 0);
453     let block_count = source.len() / 16;
454     assert!(dest.len() >= block_count * 64);
455 
456     // contains the 16 decoded pixels per block
457     let mut decoded_block = [0u8; 64];
458 
459     for (x, encoded_block) in source.chunks(16).enumerate() {
460         decode_dxt3_block(encoded_block, &mut decoded_block);
461 
462         // copy the values from the decoded block to linewise RGB layout
463         for line in 0..4 {
464             let offset = (block_count * line + x) * 16;
465             dest[offset..offset + 16].copy_from_slice(&decoded_block[line * 16..(line + 1) * 16]);
466         }
467     }
468 }
469 
470 /// Decode a row of DXT5 data to four rows of RGBA data.
471 /// source.len() should be a multiple of 16, otherwise this panics.
decode_dxt5_row(source: &[u8], dest: &mut [u8])472 fn decode_dxt5_row(source: &[u8], dest: &mut [u8]) {
473     assert!(source.len() % 16 == 0);
474     let block_count = source.len() / 16;
475     assert!(dest.len() >= block_count * 64);
476 
477     // contains the 16 decoded pixels per block
478     let mut decoded_block = [0u8; 64];
479 
480     for (x, encoded_block) in source.chunks(16).enumerate() {
481         decode_dxt5_block(encoded_block, &mut decoded_block);
482 
483         // copy the values from the decoded block to linewise RGB layout
484         for line in 0..4 {
485             let offset = (block_count * line + x) * 16;
486             dest[offset..offset + 16].copy_from_slice(&decoded_block[line * 16..(line + 1) * 16]);
487         }
488     }
489 }
490 
491 /*
492  * Functions for encoding DXT compression
493  */
494 
495 /// Tries to perform the color encoding part of dxt compression
496 /// the approach taken is simple, it picks unique combinations
497 /// of the colors present in the block, and attempts to encode the
498 /// block with each, picking the encoding that yields the least
499 /// squared error out of all of them.
500 ///
501 /// This could probably be faster but is already reasonably fast
502 /// and a good reference impl to optimize others against.
503 ///
504 /// Another way to perform this analysis would be to perform a
505 /// singular value decomposition of the different colors, and
506 /// then pick 2 points on this line as the base colors. But
507 /// this is still rather unwieldly math and has issues
508 /// with the 3-linear-colors-and-0 case, it's also worse
509 /// at conserving the original colors.
510 ///
511 /// source: should be RGBAx16 or RGBx16 bytes of data,
512 /// dest 8 bytes of resulting encoded color data
encode_dxt_colors(source: &[u8], dest: &mut [u8])513 fn encode_dxt_colors(source: &[u8], dest: &mut [u8]) {
514     // sanity checks and determine stride when parsing the source data
515     assert!((source.len() == 64 || source.len() == 48) && dest.len() == 8);
516     let stride = source.len() / 16;
517 
518     // reference colors array
519     let mut colors = [[0u8; 3]; 4];
520 
521     // Put the colors we're going to be processing in an array with pure RGB layout
522     // note: we reverse the pixel order here. The reason for this is found in the inner quantization loop.
523     let mut targets = [[0u8; 3]; 16];
524     for (s, d) in source.chunks(stride).rev().zip(&mut targets) {
525         *d = [s[0], s[1], s[2]];
526     }
527 
528     // and a set of colors to pick from.
529     let mut colorspace = targets.to_vec();
530 
531     // roundtrip all colors through the r5g6b5 encoding
532     for rgb in &mut colorspace {
533         *rgb = enc565_decode(enc565_encode(*rgb));
534     }
535 
536     // and deduplicate the set of colors to choose from as the algorithm is O(N^2) in this
537     colorspace.dedup();
538 
539     // in case of slight gradients it can happen that there's only one entry left in the color table.
540     // as the resulting banding can be quite bad if we would just left the block at the closest
541     // encodable color, we have a special path here that tries to emulate the wanted color
542     // using the linear interpolation between gradients
543     if colorspace.len() == 1 {
544         // the base color we got from colorspace reduction
545         let ref_rgb = colorspace[0];
546         // the unreduced color in this block that's the furthest away from the actual block
547         let mut rgb = targets
548             .iter()
549             .cloned()
550             .max_by_key(|rgb| diff(*rgb, ref_rgb))
551             .unwrap();
552         // amplify differences by 2.5, which should push them to the next quantized value
553         // if possible without overshoot
554         for i in 0..3 {
555             rgb[i] =
556                 ((i16::from(rgb[i]) - i16::from(ref_rgb[i])) * 5 / 2 + i16::from(ref_rgb[i])) as u8;
557         }
558 
559         // roundtrip it through quantization
560         let encoded = enc565_encode(rgb);
561         let rgb = enc565_decode(encoded);
562 
563         // in case this didn't land us a different color the best way to represent this field is
564         // as a single color block
565         if rgb == ref_rgb {
566             dest[0] = encoded as u8;
567             dest[1] = (encoded >> 8) as u8;
568 
569             for d in dest.iter_mut().take(8).skip(2) {
570                 *d = 0;
571             }
572             return;
573         }
574 
575         // we did find a separate value: add it to the options so after one round of quantization
576         // we're done
577         colorspace.push(rgb);
578     }
579 
580     // block quantization loop: we basically just try every possible combination, returning
581     // the combination with the least squared error
582     // stores the best candidate colors
583     let mut chosen_colors = [[0; 3]; 4];
584     // did this index table use the [0,0,0] variant
585     let mut chosen_use_0 = false;
586     // error calculated for the last entry
587     let mut chosen_error = 0xFFFF_FFFFu32;
588 
589     // loop through unique permutations of the colorspace, where c1 != c2
590     'search: for (i, &c1) in colorspace.iter().enumerate() {
591         colors[0] = c1;
592 
593         for &c2 in &colorspace[0..i] {
594             colors[1] = c2;
595 
596             // what's inside here is ran at most 120 times.
597             for use_0 in 0..2 {
598                 // and 240 times here.
599 
600                 if use_0 != 0 {
601                     // interpolate one color, set the other to 0
602                     for i in 0..3 {
603                         colors[2][i] =
604                             ((u16::from(colors[0][i]) + u16::from(colors[1][i]) + 1) / 2) as u8;
605                     }
606                     colors[3] = [0, 0, 0];
607                 } else {
608                     // interpolate to get 2 more colors
609                     for i in 0..3 {
610                         colors[2][i] =
611                             ((u16::from(colors[0][i]) * 2 + u16::from(colors[1][i]) + 1) / 3) as u8;
612                         colors[3][i] =
613                             ((u16::from(colors[0][i]) + u16::from(colors[1][i]) * 2 + 1) / 3) as u8;
614                     }
615                 }
616 
617                 // calculate the total error if we were to quantize the block with these color combinations
618                 // both these loops have statically known iteration counts and are well vectorizable
619                 // note that the inside of this can be run about 15360 times worst case, i.e. 960 times per
620                 // pixel.
621                 let total_error = targets
622                     .iter()
623                     .map(|t| colors.iter().map(|c| diff(*c, *t) as u32).min().unwrap())
624                     .sum();
625 
626                 // update the match if we found a better one
627                 if total_error < chosen_error {
628                     chosen_colors = colors;
629                     chosen_use_0 = use_0 != 0;
630                     chosen_error = total_error;
631 
632                     // if we've got a perfect or at most 1 LSB off match, we're done
633                     if total_error < 4 {
634                         break 'search;
635                     }
636                 }
637             }
638         }
639     }
640 
641     // calculate the final indices
642     // note that targets is already in reverse pixel order, to make the index computation easy.
643     let mut chosen_indices = 0u32;
644     for t in &targets {
645         let (idx, _) = chosen_colors
646             .iter()
647             .enumerate()
648             .min_by_key(|&(_, c)| diff(*c, *t))
649             .unwrap();
650         chosen_indices = (chosen_indices << 2) | idx as u32;
651     }
652 
653     // encode the colors
654     let mut color0 = enc565_encode(chosen_colors[0]);
655     let mut color1 = enc565_encode(chosen_colors[1]);
656 
657     // determine encoding. Note that color0 == color1 is impossible at this point
658     if color0 > color1 {
659         if chosen_use_0 {
660             swap(&mut color0, &mut color1);
661             // Indexes are packed 2 bits wide, swap index 0/1 but preserve 2/3.
662             let filter = (chosen_indices & 0xAAAA_AAAA) >> 1;
663             chosen_indices ^= filter ^ 0x5555_5555;
664         }
665     } else if !chosen_use_0 {
666         swap(&mut color0, &mut color1);
667         // Indexes are packed 2 bits wide, swap index 0/1 and 2/3.
668         chosen_indices ^= 0x5555_5555;
669     }
670 
671     // encode everything.
672     dest[0] = color0 as u8;
673     dest[1] = (color0 >> 8) as u8;
674     dest[2] = color1 as u8;
675     dest[3] = (color1 >> 8) as u8;
676     for i in 0..4 {
677         dest[i + 4] = (chosen_indices >> (i * 8)) as u8;
678     }
679 }
680 
681 /// Encodes a buffer of 16 alpha bytes into a dxt5 alpha index table,
682 /// where the alpha table they are indexed against is created by
683 /// calling alpha_table_dxt5(alpha0, alpha1)
684 /// returns the resulting error and alpha table
encode_dxt5_alpha(alpha0: u8, alpha1: u8, alphas: &[u8; 16]) -> (i32, u64)685 fn encode_dxt5_alpha(alpha0: u8, alpha1: u8, alphas: &[u8; 16]) -> (i32, u64) {
686     // create a table for the given alpha ranges
687     let table = alpha_table_dxt5(alpha0, alpha1);
688     let mut indices = 0u64;
689     let mut total_error = 0i32;
690 
691     // least error brute force search
692     for (i, &a) in alphas.iter().enumerate() {
693         let (index, error) = table
694             .iter()
695             .enumerate()
696             .map(|(i, &e)| (i, square(i32::from(e) - i32::from(a))))
697             .min_by_key(|&(_, e)| e)
698             .unwrap();
699         total_error += error;
700         indices |= (index as u64) << (i * 3);
701     }
702 
703     (total_error, indices)
704 }
705 
706 /// Encodes a RGBAx16 sequence of bytes to a 16 bytes DXT5 block
encode_dxt5_block(source: &[u8], dest: &mut [u8])707 fn encode_dxt5_block(source: &[u8], dest: &mut [u8]) {
708     assert!(source.len() == 64 && dest.len() == 16);
709 
710     // perform dxt color encoding
711     encode_dxt_colors(source, &mut dest[8..16]);
712 
713     // copy out the alpha bytes
714     let mut alphas = [0; 16];
715     for i in 0..16 {
716         alphas[i] = source[i * 4 + 3];
717     }
718 
719     // try both alpha compression methods, see which has the least error.
720     let alpha07 = alphas.iter().cloned().min().unwrap();
721     let alpha17 = alphas.iter().cloned().max().unwrap();
722     let (error7, indices7) = encode_dxt5_alpha(alpha07, alpha17, &alphas);
723 
724     // if all alphas are 0 or 255 it doesn't particularly matter what we do here.
725     let alpha05 = alphas
726         .iter()
727         .cloned()
728         .filter(|&i| i != 255)
729         .max()
730         .unwrap_or(255);
731     let alpha15 = alphas
732         .iter()
733         .cloned()
734         .filter(|&i| i != 0)
735         .min()
736         .unwrap_or(0);
737     let (error5, indices5) = encode_dxt5_alpha(alpha05, alpha15, &alphas);
738 
739     // pick the best one, encode the min/max values
740     let mut alpha_table = if error5 < error7 {
741         dest[0] = alpha05;
742         dest[1] = alpha15;
743         indices5
744     } else {
745         dest[0] = alpha07;
746         dest[1] = alpha17;
747         indices7
748     };
749 
750     // encode the alphas
751     for byte in dest[2..8].iter_mut() {
752         *byte = alpha_table as u8;
753         alpha_table >>= 8;
754     }
755 }
756 
757 /// Encodes a RGBAx16 sequence of bytes into a 16 bytes DXT3 block
encode_dxt3_block(source: &[u8], dest: &mut [u8])758 fn encode_dxt3_block(source: &[u8], dest: &mut [u8]) {
759     assert!(source.len() == 64 && dest.len() == 16);
760 
761     // perform dxt color encoding
762     encode_dxt_colors(source, &mut dest[8..16]);
763 
764     // DXT3 alpha compression is very simple, just round towards the nearest value
765 
766     // index the alpha values into the 64bit alpha table
767     let mut alpha_table = 0u64;
768     for i in 0..16 {
769         let alpha = u64::from(source[i * 4 + 3]);
770         let alpha = (alpha + 0x8) / 0x11;
771         alpha_table |= alpha << (i * 4);
772     }
773 
774     // encode the alpha values
775     for byte in &mut dest[0..8] {
776         *byte = alpha_table as u8;
777         alpha_table >>= 8;
778     }
779 }
780 
781 /// Encodes a RGBx16 sequence of bytes into a 8 bytes DXT1 block
encode_dxt1_block(source: &[u8], dest: &mut [u8])782 fn encode_dxt1_block(source: &[u8], dest: &mut [u8]) {
783     assert!(source.len() == 48 && dest.len() == 8);
784 
785     // perform dxt color encoding
786     encode_dxt_colors(source, dest);
787 }
788 
789 /// Decode a row of DXT1 data to four rows of RGBA data.
790 /// source.len() should be a multiple of 8, otherwise this panics.
encode_dxt1_row(source: &[u8]) -> Vec<u8>791 fn encode_dxt1_row(source: &[u8]) -> Vec<u8> {
792     assert!(source.len() % 48 == 0);
793     let block_count = source.len() / 48;
794 
795     let mut dest = vec![0u8; block_count * 8];
796     // contains the 16 decoded pixels per block
797     let mut decoded_block = [0u8; 48];
798 
799     for (x, encoded_block) in dest.chunks_mut(8).enumerate() {
800         // copy the values from the decoded block to linewise RGB layout
801         for line in 0..4 {
802             let offset = (block_count * line + x) * 12;
803             decoded_block[line * 12..(line + 1) * 12].copy_from_slice(&source[offset..offset + 12]);
804         }
805 
806         encode_dxt1_block(&decoded_block, encoded_block);
807     }
808     dest
809 }
810 
811 /// Decode a row of DXT3 data to four rows of RGBA data.
812 /// source.len() should be a multiple of 16, otherwise this panics.
encode_dxt3_row(source: &[u8]) -> Vec<u8>813 fn encode_dxt3_row(source: &[u8]) -> Vec<u8> {
814     assert!(source.len() % 64 == 0);
815     let block_count = source.len() / 64;
816 
817     let mut dest = vec![0u8; block_count * 16];
818     // contains the 16 decoded pixels per block
819     let mut decoded_block = [0u8; 64];
820 
821     for (x, encoded_block) in dest.chunks_mut(16).enumerate() {
822         // copy the values from the decoded block to linewise RGB layout
823         for line in 0..4 {
824             let offset = (block_count * line + x) * 16;
825             decoded_block[line * 16..(line + 1) * 16].copy_from_slice(&source[offset..offset + 16]);
826         }
827 
828         encode_dxt3_block(&decoded_block, encoded_block);
829     }
830     dest
831 }
832 
833 /// Decode a row of DXT5 data to four rows of RGBA data.
834 /// source.len() should be a multiple of 16, otherwise this panics.
encode_dxt5_row(source: &[u8]) -> Vec<u8>835 fn encode_dxt5_row(source: &[u8]) -> Vec<u8> {
836     assert!(source.len() % 64 == 0);
837     let block_count = source.len() / 64;
838 
839     let mut dest = vec![0u8; block_count * 16];
840     // contains the 16 decoded pixels per block
841     let mut decoded_block = [0u8; 64];
842 
843     for (x, encoded_block) in dest.chunks_mut(16).enumerate() {
844         // copy the values from the decoded block to linewise RGB layout
845         for line in 0..4 {
846             let offset = (block_count * line + x) * 16;
847             decoded_block[line * 16..(line + 1) * 16].copy_from_slice(&source[offset..offset + 16]);
848         }
849 
850         encode_dxt5_block(&decoded_block, encoded_block);
851     }
852     dest
853 }
854