1 //!  Decoding of DXT (S3TC) compression
2 //!
3 //!  DXT is an image format that supports lossy compression
4 //!
5 //!  # Related Links
6 //!  * <https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_texture_compression_s3tc.txt> - Description of the DXT compression OpenGL extensions.
7 //!
8 //!  Note: this module only implements bare DXT encoding/decoding, it does not parse formats that can contain DXT files like .dds
9 
10 use std::io::{self, Read, Seek, SeekFrom, Write};
11 
12 use color::ColorType;
13 use image::{self, ImageDecoder, ImageDecoderExt, ImageError, ImageReadBuffer, ImageResult, Progress};
14 
15 /// What version of DXT compression are we using?
16 /// Note that DXT2 and DXT4 are left away as they're
17 /// just DXT3 and DXT5 with premultiplied alpha
18 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
19 pub enum DXTVariant {
20     /// The DXT1 format. 48 bytes of RGB data in a 4x4 pixel square is
21     /// compressed into an 8 byte block of DXT1 data
22     DXT1,
23     /// The DXT3 format. 64 bytes of RGBA data in a 4x4 pixel square is
24     /// compressed into a 16 byte block of DXT3 data
25     DXT3,
26     /// The DXT5 format. 64 bytes of RGBA data in a 4x4 pixel square is
27     /// compressed into a 16 byte block of DXT5 data
28     DXT5,
29 }
30 
31 impl DXTVariant {
32     /// Returns the amount of bytes of raw image data
33     /// that is encoded in a single DXTn block
decoded_bytes_per_block(self) -> usize34     fn decoded_bytes_per_block(self) -> usize {
35         match self {
36             DXTVariant::DXT1 => 48,
37             DXTVariant::DXT3 | DXTVariant::DXT5 => 64,
38         }
39     }
40 
41     /// Returns the amount of bytes per block of encoded DXTn data
encoded_bytes_per_block(self) -> usize42     fn encoded_bytes_per_block(self) -> usize {
43         match self {
44             DXTVariant::DXT1 => 8,
45             DXTVariant::DXT3 | DXTVariant::DXT5 => 16,
46         }
47     }
48 
49     /// Returns the colortype that is stored in this DXT variant
colortype(self) -> ColorType50     pub fn colortype(self) -> ColorType {
51         match self {
52             DXTVariant::DXT1 => ColorType::RGB(8),
53             DXTVariant::DXT3 | DXTVariant::DXT5 => ColorType::RGBA(8),
54         }
55     }
56 }
57 
58 /// DXT decoder
59 pub struct DXTDecoder<R: Read> {
60     inner: R,
61     width_blocks: u32,
62     height_blocks: u32,
63     variant: DXTVariant,
64     row: u32,
65 }
66 
67 impl<R: Read> DXTDecoder<R> {
68     /// Create a new DXT decoder that decodes from the stream ```r```.
69     /// As DXT is often stored as raw buffers with the width/height
70     /// somewhere else the width and height of the image need
71     /// to be passed in ```width``` and ```height```, as well as the
72     /// DXT variant in ```variant```.
73     /// width and height are required to be powers of 2 and at least 4.
74     /// otherwise an error will be returned
new( r: R, width: u32, height: u32, variant: DXTVariant, ) -> Result<DXTDecoder<R>, ImageError>75     pub fn new(
76         r: R,
77         width: u32,
78         height: u32,
79         variant: DXTVariant,
80     ) -> Result<DXTDecoder<R>, ImageError> {
81         if width % 4 != 0 || height % 4 != 0 {
82             return Err(ImageError::DimensionError);
83         }
84         let width_blocks = width / 4;
85         let height_blocks = height / 4;
86         Ok(DXTDecoder {
87             inner: r,
88             width_blocks,
89             height_blocks,
90             variant,
91             row: 0,
92         })
93     }
94 
read_scanline(&mut self, buf: &mut [u8]) -> io::Result<usize>95     fn read_scanline(&mut self, buf: &mut [u8]) -> io::Result<usize> {
96         assert_eq!(buf.len() as u64, self.scanline_bytes());
97 
98         let mut src =
99             vec![0u8; self.variant.encoded_bytes_per_block() * self.width_blocks as usize];
100         self.inner.read_exact(&mut src)?;
101         match self.variant {
102             DXTVariant::DXT1 => decode_dxt1_row(&src, buf),
103             DXTVariant::DXT3 => decode_dxt3_row(&src, buf),
104             DXTVariant::DXT5 => decode_dxt5_row(&src, buf),
105         }
106         self.row += 1;
107         Ok(buf.len())
108     }
109 }
110 
111 // Note that, due to the way that DXT compression works, a scanline is considered to consist out of
112 // 4 lines of pixels.
113 impl<R: Read> ImageDecoder for DXTDecoder<R> {
114     type Reader = DXTReader<R>;
115 
dimensions(&self) -> (u64, u64)116     fn dimensions(&self) -> (u64, u64) {
117         (self.width_blocks as u64 * 4, self.height_blocks as u64 * 4)
118     }
119 
colortype(&self) -> ColorType120     fn colortype(&self) -> ColorType {
121         self.variant.colortype()
122     }
123 
scanline_bytes(&self) -> u64124     fn scanline_bytes(&self) -> u64 {
125         self.variant.decoded_bytes_per_block() as u64 * self.width_blocks as u64
126     }
127 
into_reader(self) -> ImageResult<Self::Reader>128     fn into_reader(self) -> ImageResult<Self::Reader> {
129         if self.total_bytes() > usize::max_value() as u64 {
130             return Err(ImageError::InsufficientMemory);
131         }
132 
133         Ok(DXTReader {
134             buffer: ImageReadBuffer::new(self.scanline_bytes() as usize, self.total_bytes() as usize),
135             decoder: self,
136         })
137     }
138 
read_image(mut self) -> ImageResult<Vec<u8>>139     fn read_image(mut self) -> ImageResult<Vec<u8>> {
140         if self.total_bytes() > usize::max_value() as u64 {
141             return Err(ImageError::InsufficientMemory);
142         }
143 
144         let mut dest = vec![0u8; self.total_bytes() as usize];
145         for chunk in dest.chunks_mut(self.scanline_bytes() as usize) {
146             self.read_scanline(chunk)?;
147         }
148         Ok(dest)
149     }
150 }
151 
152 impl<R: Read + Seek> ImageDecoderExt for DXTDecoder<R> {
read_rect_with_progress<F: Fn(Progress)>( &mut self, x: u64, y: u64, width: u64, height: u64, buf: &mut [u8], progress_callback: F, ) -> ImageResult<()>153     fn read_rect_with_progress<F: Fn(Progress)>(
154         &mut self,
155         x: u64,
156         y: u64,
157         width: u64,
158         height: u64,
159         buf: &mut [u8],
160         progress_callback: F,
161     ) -> ImageResult<()> {
162         let encoded_scanline_bytes = self.variant.encoded_bytes_per_block() as u64
163             * self.width_blocks as u64;
164 
165         let start = self.inner.seek(SeekFrom::Current(0))?;
166         image::load_rect(x, y, width, height, buf, progress_callback, self,
167                          |s, scanline| {
168                              s.inner.seek(SeekFrom::Start(start + scanline * encoded_scanline_bytes))?;
169                              Ok(())
170                          },
171                          |s, buf| s.read_scanline(buf))?;
172         self.inner.seek(SeekFrom::Start(start))?;
173         Ok(())
174     }
175 }
176 
177 /// DXT reader
178 pub struct DXTReader<R: Read> {
179     buffer: ImageReadBuffer,
180     decoder: DXTDecoder<R>,
181 }
182 impl<R: Read> Read for DXTReader<R> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>183     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
184         let ref mut decoder = &mut self.decoder;
185         self.buffer.read(buf, |buf| decoder.read_scanline(buf))
186     }
187 }
188 
189 /// DXT encoder
190 pub struct DXTEncoder<W: Write> {
191     w: W,
192 }
193 
194 impl<W: Write> DXTEncoder<W> {
195     /// Create a new encoder that writes its output to ```w```
new(w: W) -> DXTEncoder<W>196     pub fn new(w: W) -> DXTEncoder<W> {
197         DXTEncoder { w }
198     }
199 
200     /// Encodes the image data ```data```
201     /// that has dimensions ```width``` and ```height```
202     /// in ```DXTVariant``` ```variant```
203     /// data is assumed to be in variant.colortype()
encode( mut self, data: &[u8], width: u32, height: u32, variant: DXTVariant, ) -> ImageResult<()>204     pub fn encode(
205         mut self,
206         data: &[u8],
207         width: u32,
208         height: u32,
209         variant: DXTVariant,
210     ) -> ImageResult<()> {
211         if width % 4 != 0 || height % 4 != 0 {
212             return Err(ImageError::DimensionError);
213         }
214         let width_blocks = width / 4;
215         let height_blocks = height / 4;
216 
217         let stride = variant.decoded_bytes_per_block();
218 
219         assert!(data.len() >= width_blocks as usize * height_blocks as usize * stride);
220 
221         for chunk in data.chunks(width_blocks as usize * stride) {
222             let data = match variant {
223                 DXTVariant::DXT1 => encode_dxt1_row(chunk),
224                 DXTVariant::DXT3 => encode_dxt3_row(chunk),
225                 DXTVariant::DXT5 => encode_dxt5_row(chunk),
226             };
227             self.w.write_all(&data)?;
228         }
229         Ok(())
230     }
231 }
232 
233 /**
234  * Actual encoding/decoding logic below.
235  */
236 use std::mem::swap;
237 
238 type Rgb = [u8; 3];
239 
240 /// decodes a 5-bit R, 6-bit G, 5-bit B 16-bit packed color value into 8-bit RGB
241 /// mapping is done so min/max range values are preserved. So for 5-bit
242 /// values 0x00 -> 0x00 and 0x1F -> 0xFF
enc565_decode(value: u16) -> Rgb243 fn enc565_decode(value: u16) -> Rgb {
244     let red = (value >> 11) & 0x1F;
245     let green = (value >> 5) & 0x3F;
246     let blue = (value) & 0x1F;
247     [
248         (red * 0xFF / 0x1F) as u8,
249         (green * 0xFF / 0x3F) as u8,
250         (blue * 0xFF / 0x1F) as u8,
251     ]
252 }
253 
254 /// encodes an 8-bit RGB value into a 5-bit R, 6-bit G, 5-bit B 16-bit packed color value
255 /// mapping preserves min/max values. It is guaranteed that i == encode(decode(i)) for all i
enc565_encode(rgb: Rgb) -> u16256 fn enc565_encode(rgb: Rgb) -> u16 {
257     let red = (u16::from(rgb[0]) * 0x1F + 0x7E) / 0xFF;
258     let green = (u16::from(rgb[1]) * 0x3F + 0x7E) / 0xFF;
259     let blue = (u16::from(rgb[2]) * 0x1F + 0x7E) / 0xFF;
260     (red << 11) | (green << 5) | blue
261 }
262 
263 /// utility function: squares a value
square(a: i32) -> i32264 fn square(a: i32) -> i32 {
265     a * a
266 }
267 
268 /// returns the squared error between two RGB values
diff(a: Rgb, b: Rgb) -> i32269 fn diff(a: Rgb, b: Rgb) -> i32 {
270     square(i32::from(a[0]) - i32::from(b[0])) + square(i32::from(a[1]) - i32::from(b[1]))
271         + square(i32::from(a[2]) - i32::from(b[2]))
272 }
273 
274 /*
275  * Functions for decoding DXT compression
276  */
277 
278 /// Constructs the DXT5 alpha lookup table from the two alpha entries
279 /// if alpha0 > alpha1, constructs a table of [a0, a1, 6 linearly interpolated values from a0 to a1]
280 /// if alpha0 <= alpha1, constructs a table of [a0, a1, 4 linearly interpolated values from a0 to a1, 0, 0xFF]
alpha_table_dxt5(alpha0: u8, alpha1: u8) -> [u8; 8]281 fn alpha_table_dxt5(alpha0: u8, alpha1: u8) -> [u8; 8] {
282     let mut table = [alpha0, alpha1, 0, 0, 0, 0, 0, 0xFF];
283     if alpha0 > alpha1 {
284         for i in 2..8u16 {
285             table[i as usize] =
286                 (((8 - i) * u16::from(alpha0) + (i - 1) * u16::from(alpha1)) / 7) as u8;
287         }
288     } else {
289         for i in 2..6u16 {
290             table[i as usize] =
291                 (((6 - i) * u16::from(alpha0) + (i - 1) * u16::from(alpha1)) / 5) as u8;
292         }
293     }
294     table
295 }
296 
297 /// decodes an 8-byte dxt color block into the RGB channels of a 16xRGB or 16xRGBA block.
298 /// source should have a length of 8, dest a length of 48 (RGB) or 64 (RGBA)
decode_dxt_colors(source: &[u8], dest: &mut [u8])299 fn decode_dxt_colors(source: &[u8], dest: &mut [u8]) {
300     // sanity checks, also enable the compiler to elide all following bound checks
301     assert!(source.len() == 8 && (dest.len() == 48 || dest.len() == 64));
302     // calculate pitch to store RGB values in dest (3 for RGB, 4 for RGBA)
303     let pitch = dest.len() / 16;
304 
305     // extract color data
306     let color0 = u16::from(source[0]) | (u16::from(source[1]) << 8);
307     let color1 = u16::from(source[2]) | (u16::from(source[3]) << 8);
308     let color_table = u32::from(source[4]) | (u32::from(source[5]) << 8)
309         | (u32::from(source[6]) << 16) | (u32::from(source[7]) << 24);
310     // let color_table = source[4..8].iter().rev().fold(0, |t, &b| (t << 8) | b as u32);
311 
312     // decode the colors to rgb format
313     let mut colors = [[0; 3]; 4];
314     colors[0] = enc565_decode(color0);
315     colors[1] = enc565_decode(color1);
316 
317     // determine color interpolation method
318     if color0 > color1 {
319         // linearly interpolate the other two color table entries
320         for i in 0..3 {
321             colors[2][i] = ((u16::from(colors[0][i]) * 2 + u16::from(colors[1][i]) + 1) / 3) as u8;
322             colors[3][i] = ((u16::from(colors[0][i]) + u16::from(colors[1][i]) * 2 + 1) / 3) as u8;
323         }
324     } else {
325         // linearly interpolate one other entry, keep the other at 0
326         for i in 0..3 {
327             colors[2][i] = ((u16::from(colors[0][i]) + u16::from(colors[1][i]) + 1) / 2) as u8;
328         }
329     }
330 
331     // serialize the result. Every color is determined by looking up
332     // two bits in color_table which identify which color to actually pick from the 4 possible colors
333     for i in 0..16 {
334         dest[i * pitch..i * pitch + 3]
335             .copy_from_slice(&colors[(color_table >> (i * 2)) as usize & 3]);
336     }
337 }
338 
339 /// Decodes a 16-byte bock of dxt5 data to a 16xRGBA block
decode_dxt5_block(source: &[u8], dest: &mut [u8])340 fn decode_dxt5_block(source: &[u8], dest: &mut [u8]) {
341     assert!(source.len() == 16 && dest.len() == 64);
342 
343     // extract alpha index table (stored as little endian 64-bit value)
344     let alpha_table = source[2..8]
345         .iter()
346         .rev()
347         .fold(0, |t, &b| (t << 8) | u64::from(b));
348 
349     // alhpa level decode
350     let alphas = alpha_table_dxt5(source[0], source[1]);
351 
352     // serialize alpha
353     for i in 0..16 {
354         dest[i * 4 + 3] = alphas[(alpha_table >> (i * 3)) as usize & 7];
355     }
356 
357     // handle colors
358     decode_dxt_colors(&source[8..16], dest);
359 }
360 
361 /// Decodes a 16-byte bock of dxt3 data to a 16xRGBA block
decode_dxt3_block(source: &[u8], dest: &mut [u8])362 fn decode_dxt3_block(source: &[u8], dest: &mut [u8]) {
363     assert!(source.len() == 16 && dest.len() == 64);
364 
365     // extract alpha index table (stored as little endian 64-bit value)
366     let alpha_table = source[0..8]
367         .iter()
368         .rev()
369         .fold(0, |t, &b| (t << 8) | u64::from(b));
370 
371     // serialize alpha (stored as 4-bit values)
372     for i in 0..16 {
373         dest[i * 4 + 3] = ((alpha_table >> (i * 4)) as u8 & 0xF) * 0x11;
374     }
375 
376     // handle colors
377     decode_dxt_colors(&source[8..16], dest);
378 }
379 
380 /// Decodes a 8-byte bock of dxt5 data to a 16xRGB block
decode_dxt1_block(source: &[u8], dest: &mut [u8])381 fn decode_dxt1_block(source: &[u8], dest: &mut [u8]) {
382     assert!(source.len() == 8 && dest.len() == 48);
383     decode_dxt_colors(&source, dest);
384 }
385 
386 /// Decode a row of DXT1 data to four rows of RGBA data.
387 /// source.len() should be a multiple of 8, otherwise this panics.
decode_dxt1_row(source: &[u8], dest: &mut [u8])388 fn decode_dxt1_row(source: &[u8], dest: &mut [u8]) {
389     assert!(source.len() % 8 == 0);
390     let block_count = source.len() / 8;
391     assert!(dest.len() >= block_count * 48);
392 
393     // contains the 16 decoded pixels per block
394     let mut decoded_block = [0u8; 48];
395 
396     for (x, encoded_block) in source.chunks(8).enumerate() {
397         decode_dxt1_block(encoded_block, &mut decoded_block);
398 
399         // copy the values from the decoded block to linewise RGB layout
400         for line in 0..4 {
401             let offset = (block_count * line + x) * 12;
402             dest[offset..offset + 12].copy_from_slice(&decoded_block[line * 12..(line + 1) * 12]);
403         }
404     }
405 }
406 
407 /// Decode a row of DXT3 data to four rows of RGBA data.
408 /// source.len() should be a multiple of 16, otherwise this panics.
decode_dxt3_row(source: &[u8], dest: &mut [u8])409 fn decode_dxt3_row(source: &[u8], dest: &mut [u8]) {
410     assert!(source.len() % 16 == 0);
411     let block_count = source.len() / 16;
412     assert!(dest.len() >= block_count * 64);
413 
414     // contains the 16 decoded pixels per block
415     let mut decoded_block = [0u8; 64];
416 
417     for (x, encoded_block) in source.chunks(16).enumerate() {
418         decode_dxt3_block(encoded_block, &mut decoded_block);
419 
420         // copy the values from the decoded block to linewise RGB layout
421         for line in 0..4 {
422             let offset = (block_count * line + x) * 16;
423             dest[offset..offset + 16].copy_from_slice(&decoded_block[line * 16..(line + 1) * 16]);
424         }
425     }
426 }
427 
428 /// Decode a row of DXT5 data to four rows of RGBA data.
429 /// source.len() should be a multiple of 16, otherwise this panics.
decode_dxt5_row(source: &[u8], dest: &mut [u8])430 fn decode_dxt5_row(source: &[u8], dest: &mut [u8]) {
431     assert!(source.len() % 16 == 0);
432     let block_count = source.len() / 16;
433     assert!(dest.len() >= block_count * 64);
434 
435     // contains the 16 decoded pixels per block
436     let mut decoded_block = [0u8; 64];
437 
438     for (x, encoded_block) in source.chunks(16).enumerate() {
439         decode_dxt5_block(encoded_block, &mut decoded_block);
440 
441         // copy the values from the decoded block to linewise RGB layout
442         for line in 0..4 {
443             let offset = (block_count * line + x) * 16;
444             dest[offset..offset + 16].copy_from_slice(&decoded_block[line * 16..(line + 1) * 16]);
445         }
446     }
447 }
448 
449 /*
450  * Functions for encoding DXT compression
451  */
452 
453 /// Tries to perform the color encoding part of dxt compression
454 /// the approach taken is simple, it picks unique combinations
455 /// of the colors present in the block, and attempts to encode the
456 /// block with each, picking the encoding that yields the least
457 /// squared error out of all of them.
458 ///
459 /// This could probably be faster but is already reasonably fast
460 /// and a good reference impl to optimize others against.
461 ///
462 /// Another way to perform this analysis would be to perform a
463 /// singular value decomposition of the different colors, and
464 /// then pick 2 points on this line as the base colors. But
465 /// this is still rather unwieldly math and has issues
466 /// with the 3-linear-colors-and-0 case, it's also worse
467 /// at conserving the original colors.
468 ///
469 /// source: should be RGBAx16 or RGBx16 bytes of data,
470 /// dest 8 bytes of resulting encoded color data
encode_dxt_colors(source: &[u8], dest: &mut [u8])471 fn encode_dxt_colors(source: &[u8], dest: &mut [u8]) {
472     // sanity checks and determine stride when parsing the source data
473     assert!((source.len() == 64 || source.len() == 48) && dest.len() == 8);
474     let stride = source.len() / 16;
475 
476     // reference colors array
477     let mut colors = [[0u8; 3]; 4];
478 
479     // Put the colors we're going to be processing in an array with pure RGB layout
480     // note: we reverse the pixel order here. The reason for this is found in the inner quantization loop.
481     let mut targets = [[0u8; 3]; 16];
482     for (s, d) in source.chunks(stride).rev().zip(&mut targets) {
483         *d = [s[0], s[1], s[2]];
484     }
485 
486     // and a set of colors to pick from.
487     let mut colorspace = targets.to_vec();
488 
489     // roundtrip all colors through the r5g6b5 encoding
490     for rgb in &mut colorspace {
491         *rgb = enc565_decode(enc565_encode(*rgb));
492     }
493 
494     // and deduplicate the set of colors to choose from as the algorithm is O(N^2) in this
495     colorspace.dedup();
496 
497     // in case of slight gradients it can happen that there's only one entry left in the color table.
498     // as the resulting banding can be quite bad if we would just left the block at the closest
499     // encodable color, we have a special path here that tries to emulate the wanted color
500     // using the linear interpolation between gradients
501     if colorspace.len() == 1 {
502         // the base color we got from colorspace reduction
503         let ref_rgb = colorspace[0];
504         // the unreduced color in this block that's the furthest away from the actual block
505         let mut rgb = targets
506             .iter()
507             .cloned()
508             .max_by_key(|rgb| diff(*rgb, ref_rgb))
509             .unwrap();
510         // amplify differences by 2.5, which should push them to the next quantized value
511         // if possible without overshoot
512         for i in 0..3 {
513             rgb[i] =
514                 ((i16::from(rgb[i]) - i16::from(ref_rgb[i])) * 5 / 2 + i16::from(ref_rgb[i])) as u8;
515         }
516 
517         // roundtrip it through quantization
518         let encoded = enc565_encode(rgb);
519         let rgb = enc565_decode(encoded);
520 
521         // in case this didn't land us a different color the best way to represent this field is
522         // as a single color block
523         if rgb == ref_rgb {
524             dest[0] = encoded as u8;
525             dest[1] = (encoded >> 8) as u8;
526 
527             for d in dest.iter_mut().take(8).skip(2) {
528                 *d = 0;
529             }
530             return;
531         }
532 
533         // we did find a separate value: add it to the options so after one round of quantization
534         // we're done
535         colorspace.push(rgb);
536     }
537 
538     // block quantization loop: we basically just try every possible combination, returning
539     // the combination with the least squared error
540     // stores the best candidate colors
541     let mut chosen_colors = [[0; 3]; 4];
542     // did this index table use the [0,0,0] variant
543     let mut chosen_use_0 = false;
544     // error calculated for the last entry
545     let mut chosen_error = 0xFFFF_FFFFu32;
546 
547     // loop through unique permutations of the colorspace, where c1 != c2
548     'search: for (i, &c1) in colorspace.iter().enumerate() {
549         colors[0] = c1;
550 
551         for &c2 in &colorspace[0..i] {
552             colors[1] = c2;
553 
554             // what's inside here is ran at most 120 times.
555             for use_0 in 0..2 {
556                 // and 240 times here.
557 
558                 if use_0 != 0 {
559                     // interpolate one color, set the other to 0
560                     for i in 0..3 {
561                         colors[2][i] =
562                             ((u16::from(colors[0][i]) + u16::from(colors[1][i]) + 1) / 2) as u8;
563                     }
564                     colors[3] = [0, 0, 0];
565                 } else {
566                     // interpolate to get 2 more colors
567                     for i in 0..3 {
568                         colors[2][i] =
569                             ((u16::from(colors[0][i]) * 2 + u16::from(colors[1][i]) + 1) / 3) as u8;
570                         colors[3][i] =
571                             ((u16::from(colors[0][i]) + u16::from(colors[1][i]) * 2 + 1) / 3) as u8;
572                     }
573                 }
574 
575                 // calculate the total error if we were to quantize the block with these color combinations
576                 // both these loops have statically known iteration counts and are well vectorizable
577                 // note that the inside of this can be run about 15360 times worst case, i.e. 960 times per
578                 // pixel.
579                 let total_error = targets
580                     .iter()
581                     .map(|t| colors.iter().map(|c| diff(*c, *t) as u32).min().unwrap())
582                     .sum();
583 
584                 // update the match if we found a better one
585                 if total_error < chosen_error {
586                     chosen_colors = colors;
587                     chosen_use_0 = use_0 != 0;
588                     chosen_error = total_error;
589 
590                     // if we've got a perfect or at most 1 LSB off match, we're done
591                     if total_error < 4 {
592                         break 'search;
593                     }
594                 }
595             }
596         }
597     }
598 
599     // calculate the final indices
600     // note that targets is already in reverse pixel order, to make the index computation easy.
601     let mut chosen_indices = 0u32;
602     for t in &targets {
603         let (idx, _) = chosen_colors
604             .iter()
605             .enumerate()
606             .min_by_key(|&(_, c)| diff(*c, *t))
607             .unwrap();
608         chosen_indices = (chosen_indices << 2) | idx as u32;
609     }
610 
611     // encode the colors
612     let mut color0 = enc565_encode(chosen_colors[0]);
613     let mut color1 = enc565_encode(chosen_colors[1]);
614 
615     // determine encoding. Note that color0 == color1 is impossible at this point
616     if color0 > color1 {
617         if chosen_use_0 {
618             swap(&mut color0, &mut color1);
619             // Indexes are packed 2 bits wide, swap index 0/1 but preserve 2/3.
620             let filter = (chosen_indices & 0xAAAA_AAAA) >> 1;
621             chosen_indices ^= filter ^ 0x5555_5555;
622         }
623     } else if !chosen_use_0 {
624         swap(&mut color0, &mut color1);
625         // Indexes are packed 2 bits wide, swap index 0/1 and 2/3.
626         chosen_indices ^= 0x5555_5555;
627     }
628 
629     // encode everything.
630     dest[0] = color0 as u8;
631     dest[1] = (color0 >> 8) as u8;
632     dest[2] = color1 as u8;
633     dest[3] = (color1 >> 8) as u8;
634     for i in 0..4 {
635         dest[i + 4] = (chosen_indices >> (i * 8)) as u8;
636     }
637 }
638 
639 /// Encodes a buffer of 16 alpha bytes into a dxt5 alpha index table,
640 /// where the alpha table they are indexed against is created by
641 /// calling alpha_table_dxt5(alpha0, alpha1)
642 /// returns the resulting error and alpha table
encode_dxt5_alpha(alpha0: u8, alpha1: u8, alphas: &[u8; 16]) -> (i32, u64)643 fn encode_dxt5_alpha(alpha0: u8, alpha1: u8, alphas: &[u8; 16]) -> (i32, u64) {
644     // create a table for the given alpha ranges
645     let table = alpha_table_dxt5(alpha0, alpha1);
646     let mut indices = 0u64;
647     let mut total_error = 0i32;
648 
649     // least error brute force search
650     for (i, &a) in alphas.iter().enumerate() {
651         let (index, error) = table
652             .iter()
653             .enumerate()
654             .map(|(i, &e)| (i, square(i32::from(e) - i32::from(a))))
655             .min_by_key(|&(_, e)| e)
656             .unwrap();
657         total_error += error;
658         indices |= (index as u64) << (i * 3);
659     }
660 
661     (total_error, indices)
662 }
663 
664 /// Encodes a RGBAx16 sequence of bytes to a 16 bytes DXT5 block
encode_dxt5_block(source: &[u8], dest: &mut [u8])665 fn encode_dxt5_block(source: &[u8], dest: &mut [u8]) {
666     assert!(source.len() == 64 && dest.len() == 16);
667 
668     // perform dxt color encoding
669     encode_dxt_colors(source, &mut dest[8..16]);
670 
671     // copy out the alpha bytes
672     let mut alphas = [0; 16];
673     for i in 0..16 {
674         alphas[i] = source[i * 4 + 3];
675     }
676 
677     // try both alpha compression methods, see which has the least error.
678     let alpha07 = alphas.iter().cloned().min().unwrap();
679     let alpha17 = alphas.iter().cloned().max().unwrap();
680     let (error7, indices7) = encode_dxt5_alpha(alpha07, alpha17, &alphas);
681 
682     // if all alphas are 0 or 255 it doesn't particularly matter what we do here.
683     let alpha05 = alphas
684         .iter()
685         .cloned()
686         .filter(|&i| i != 255)
687         .max()
688         .unwrap_or(255);
689     let alpha15 = alphas
690         .iter()
691         .cloned()
692         .filter(|&i| i != 0)
693         .min()
694         .unwrap_or(0);
695     let (error5, indices5) = encode_dxt5_alpha(alpha05, alpha15, &alphas);
696 
697     // pick the best one, encode the min/max values
698     let mut alpha_table = if error5 < error7 {
699         dest[0] = alpha05;
700         dest[1] = alpha15;
701         indices5
702     } else {
703         dest[0] = alpha07;
704         dest[1] = alpha17;
705         indices7
706     };
707 
708     // encode the alphas
709     for byte in dest[2..8].iter_mut() {
710         *byte = alpha_table as u8;
711         alpha_table >>= 8;
712     }
713 }
714 
715 /// Encodes a RGBAx16 sequence of bytes into a 16 bytes DXT3 block
encode_dxt3_block(source: &[u8], dest: &mut [u8])716 fn encode_dxt3_block(source: &[u8], dest: &mut [u8]) {
717     assert!(source.len() == 64 && dest.len() == 16);
718 
719     // perform dxt color encoding
720     encode_dxt_colors(source, &mut dest[8..16]);
721 
722     // DXT3 alpha compression is very simple, just round towards the nearest value
723 
724     // index the alpha values into the 64bit alpha table
725     let mut alpha_table = 0u64;
726     for i in 0..16 {
727         let alpha = u64::from(source[i * 4 + 3]);
728         let alpha = (alpha + 0x8) / 0x11;
729         alpha_table |= alpha << (i * 4);
730     }
731 
732     // encode the alpha values
733     for byte in &mut dest[0..8] {
734         *byte = alpha_table as u8;
735         alpha_table >>= 8;
736     }
737 }
738 
739 /// Encodes a RGBx16 sequence of bytes into a 8 bytes DXT1 block
encode_dxt1_block(source: &[u8], dest: &mut [u8])740 fn encode_dxt1_block(source: &[u8], dest: &mut [u8]) {
741     assert!(source.len() == 48 && dest.len() == 8);
742 
743     // perform dxt color encoding
744     encode_dxt_colors(source, dest);
745 }
746 
747 /// Decode a row of DXT1 data to four rows of RGBA data.
748 /// source.len() should be a multiple of 8, otherwise this panics.
encode_dxt1_row(source: &[u8]) -> Vec<u8>749 fn encode_dxt1_row(source: &[u8]) -> Vec<u8> {
750     assert!(source.len() % 48 == 0);
751     let block_count = source.len() / 48;
752 
753     let mut dest = vec![0u8; block_count * 8];
754     // contains the 16 decoded pixels per block
755     let mut decoded_block = [0u8; 48];
756 
757     for (x, encoded_block) in dest.chunks_mut(8).enumerate() {
758         // copy the values from the decoded block to linewise RGB layout
759         for line in 0..4 {
760             let offset = (block_count * line + x) * 12;
761             decoded_block[line * 12..(line + 1) * 12].copy_from_slice(&source[offset..offset + 12]);
762         }
763 
764         encode_dxt1_block(&decoded_block, encoded_block);
765     }
766     dest
767 }
768 
769 /// Decode a row of DXT3 data to four rows of RGBA data.
770 /// source.len() should be a multiple of 16, otherwise this panics.
encode_dxt3_row(source: &[u8]) -> Vec<u8>771 fn encode_dxt3_row(source: &[u8]) -> Vec<u8> {
772     assert!(source.len() % 64 == 0);
773     let block_count = source.len() / 64;
774 
775     let mut dest = vec![0u8; block_count * 16];
776     // contains the 16 decoded pixels per block
777     let mut decoded_block = [0u8; 64];
778 
779     for (x, encoded_block) in dest.chunks_mut(16).enumerate() {
780         // copy the values from the decoded block to linewise RGB layout
781         for line in 0..4 {
782             let offset = (block_count * line + x) * 16;
783             decoded_block[line * 16..(line + 1) * 16].copy_from_slice(&source[offset..offset + 16]);
784         }
785 
786         encode_dxt3_block(&decoded_block, encoded_block);
787     }
788     dest
789 }
790 
791 /// Decode a row of DXT5 data to four rows of RGBA data.
792 /// source.len() should be a multiple of 16, otherwise this panics.
encode_dxt5_row(source: &[u8]) -> Vec<u8>793 fn encode_dxt5_row(source: &[u8]) -> Vec<u8> {
794     assert!(source.len() % 64 == 0);
795     let block_count = source.len() / 64;
796 
797     let mut dest = vec![0u8; block_count * 16];
798     // contains the 16 decoded pixels per block
799     let mut decoded_block = [0u8; 64];
800 
801     for (x, encoded_block) in dest.chunks_mut(16).enumerate() {
802         // copy the values from the decoded block to linewise RGB layout
803         for line in 0..4 {
804             let offset = (block_count * line + x) * 16;
805             decoded_block[line * 16..(line + 1) * 16].copy_from_slice(&source[offset..offset + 16]);
806         }
807 
808         encode_dxt5_block(&decoded_block, encoded_block);
809     }
810     dest
811 }
812