1 //! Decoding of DXT (S3TC) compression
2 //!
3 //! DXT is an image format that supports lossy compression
4 //!
5 //! # Related Links
6 //! * <https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_texture_compression_s3tc.txt> - Description of the DXT compression OpenGL extensions.
7 //!
8 //! Note: this module only implements bare DXT encoding/decoding, it does not parse formats that can contain DXT files like .dds
9
10 use std::convert::TryFrom;
11 use std::io::{self, Read, Seek, SeekFrom, Write};
12
13 use crate::color::ColorType;
14 use crate::error::{ImageError, ImageResult, ParameterError, ParameterErrorKind};
15 use crate::image::{self, ImageDecoder, ImageDecoderExt, ImageReadBuffer, Progress};
16
17 /// What version of DXT compression are we using?
18 /// Note that DXT2 and DXT4 are left away as they're
19 /// just DXT3 and DXT5 with premultiplied alpha
20 ///
21 /// DEPRECATED: The name of this enum will be changed to [`DxtVariant`].
22 ///
23 /// TODO: rename to [`DxtVariant`]
24 ///
25 /// [`DxtVariant`]: type.DxtVariant.html
26 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
27 pub enum DXTVariant {
28 /// The DXT1 format. 48 bytes of RGB data in a 4x4 pixel square is
29 /// compressed into an 8 byte block of DXT1 data
30 DXT1,
31 /// The DXT3 format. 64 bytes of RGBA data in a 4x4 pixel square is
32 /// compressed into a 16 byte block of DXT3 data
33 DXT3,
34 /// The DXT5 format. 64 bytes of RGBA data in a 4x4 pixel square is
35 /// compressed into a 16 byte block of DXT5 data
36 DXT5,
37 }
38
39 /// DXT compression version.
40 ///
41 /// An alias of [`DXTVariant`].
42 ///
43 /// TODO: remove when [`DXTVariant`] is renamed.
44 ///
45 /// [`DXTVariant`]: enum.DXTVariant.html
46 #[allow(dead_code)]
47 pub type DxtVariant = DXTVariant;
48
49 impl DXTVariant {
50 /// Returns the amount of bytes of raw image data
51 /// that is encoded in a single DXTn block
decoded_bytes_per_block(self) -> usize52 fn decoded_bytes_per_block(self) -> usize {
53 match self {
54 DXTVariant::DXT1 => 48,
55 DXTVariant::DXT3 | DXTVariant::DXT5 => 64,
56 }
57 }
58
59 /// Returns the amount of bytes per block of encoded DXTn data
encoded_bytes_per_block(self) -> usize60 fn encoded_bytes_per_block(self) -> usize {
61 match self {
62 DXTVariant::DXT1 => 8,
63 DXTVariant::DXT3 | DXTVariant::DXT5 => 16,
64 }
65 }
66
67 /// Returns the color type that is stored in this DXT variant
color_type(self) -> ColorType68 pub fn color_type(self) -> ColorType {
69 match self {
70 DXTVariant::DXT1 => ColorType::Rgb8,
71 DXTVariant::DXT3 | DXTVariant::DXT5 => ColorType::Rgba8,
72 }
73 }
74 }
75
76 /// DXT decoder
77 pub struct DxtDecoder<R: Read> {
78 inner: R,
79 width_blocks: u32,
80 height_blocks: u32,
81 variant: DXTVariant,
82 row: u32,
83 }
84
85 impl<R: Read> DxtDecoder<R> {
86 /// Create a new DXT decoder that decodes from the stream ```r```.
87 /// As DXT is often stored as raw buffers with the width/height
88 /// somewhere else the width and height of the image need
89 /// to be passed in ```width``` and ```height```, as well as the
90 /// DXT variant in ```variant```.
91 /// width and height are required to be powers of 2 and at least 4.
92 /// otherwise an error will be returned
new( r: R, width: u32, height: u32, variant: DXTVariant, ) -> Result<DxtDecoder<R>, ImageError>93 pub fn new(
94 r: R,
95 width: u32,
96 height: u32,
97 variant: DXTVariant,
98 ) -> Result<DxtDecoder<R>, ImageError> {
99 if width % 4 != 0 || height % 4 != 0 {
100 // TODO: this is actually a bit of a weird case. We could return `DecodingError` but
101 // it's not really the format that is wrong However, the encoder should surely return
102 // `EncodingError` so it would be the logical choice for symmetry.
103 return Err(ImageError::Parameter(ParameterError::from_kind(
104 ParameterErrorKind::DimensionMismatch,
105 )));
106 }
107 let width_blocks = width / 4;
108 let height_blocks = height / 4;
109 Ok(DxtDecoder {
110 inner: r,
111 width_blocks,
112 height_blocks,
113 variant,
114 row: 0,
115 })
116 }
117
read_scanline(&mut self, buf: &mut [u8]) -> io::Result<usize>118 fn read_scanline(&mut self, buf: &mut [u8]) -> io::Result<usize> {
119 assert_eq!(u64::try_from(buf.len()), Ok(self.scanline_bytes()));
120
121 let mut src =
122 vec![0u8; self.variant.encoded_bytes_per_block() * self.width_blocks as usize];
123 self.inner.read_exact(&mut src)?;
124 match self.variant {
125 DXTVariant::DXT1 => decode_dxt1_row(&src, buf),
126 DXTVariant::DXT3 => decode_dxt3_row(&src, buf),
127 DXTVariant::DXT5 => decode_dxt5_row(&src, buf),
128 }
129 self.row += 1;
130 Ok(buf.len())
131 }
132 }
133
134 // Note that, due to the way that DXT compression works, a scanline is considered to consist out of
135 // 4 lines of pixels.
136 impl<'a, R: 'a + Read> ImageDecoder<'a> for DxtDecoder<R> {
137 type Reader = DxtReader<R>;
138
dimensions(&self) -> (u32, u32)139 fn dimensions(&self) -> (u32, u32) {
140 (self.width_blocks * 4, self.height_blocks * 4)
141 }
142
color_type(&self) -> ColorType143 fn color_type(&self) -> ColorType {
144 self.variant.color_type()
145 }
146
scanline_bytes(&self) -> u64147 fn scanline_bytes(&self) -> u64 {
148 self.variant.decoded_bytes_per_block() as u64 * u64::from(self.width_blocks)
149 }
150
into_reader(self) -> ImageResult<Self::Reader>151 fn into_reader(self) -> ImageResult<Self::Reader> {
152 Ok(DxtReader {
153 buffer: ImageReadBuffer::new(self.scanline_bytes(), self.total_bytes()),
154 decoder: self,
155 })
156 }
157
read_image(mut self, buf: &mut [u8]) -> ImageResult<()>158 fn read_image(mut self, buf: &mut [u8]) -> ImageResult<()> {
159 assert_eq!(u64::try_from(buf.len()), Ok(self.total_bytes()));
160
161 for chunk in buf.chunks_mut(self.scanline_bytes() as usize) {
162 self.read_scanline(chunk)?;
163 }
164 Ok(())
165 }
166 }
167
168 impl<'a, R: 'a + Read + Seek> ImageDecoderExt<'a> for DxtDecoder<R> {
read_rect_with_progress<F: Fn(Progress)>( &mut self, x: u32, y: u32, width: u32, height: u32, buf: &mut [u8], progress_callback: F, ) -> ImageResult<()>169 fn read_rect_with_progress<F: Fn(Progress)>(
170 &mut self,
171 x: u32,
172 y: u32,
173 width: u32,
174 height: u32,
175 buf: &mut [u8],
176 progress_callback: F,
177 ) -> ImageResult<()> {
178 let encoded_scanline_bytes = self.variant.encoded_bytes_per_block() as u64
179 * u64::from(self.width_blocks);
180
181 let start = self.inner.seek(SeekFrom::Current(0))?;
182 image::load_rect(x, y, width, height, buf, progress_callback, self,
183 |s, scanline| {
184 s.inner.seek(SeekFrom::Start(start + scanline * encoded_scanline_bytes))?;
185 Ok(())
186 },
187 |s, buf| s.read_scanline(buf).map(|_| ()))?;
188 self.inner.seek(SeekFrom::Start(start))?;
189 Ok(())
190 }
191 }
192
193 /// DXT reader
194 pub struct DxtReader<R: Read> {
195 buffer: ImageReadBuffer,
196 decoder: DxtDecoder<R>,
197 }
198
199 /// DXT reader
200 ///
201 /// An alias of [`DxtReader`].
202 ///
203 /// TODO: remove
204 ///
205 /// [`DxtReader`]: struct.DxtReader.html
206 #[allow(dead_code)]
207 #[deprecated(note = "Use `DxtReader` instead")]
208 pub type DXTReader<R> = DxtReader<R>;
209
210 impl<R: Read> Read for DxtReader<R> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>211 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
212 let decoder = &mut self.decoder;
213 self.buffer.read(buf, |buf| decoder.read_scanline(buf))
214 }
215 }
216
217 /// DXT encoder
218 pub struct DxtEncoder<W: Write> {
219 w: W,
220 }
221
222 /// DXT encoder
223 ///
224 /// An alias of [`DxtEncoder`].
225 ///
226 /// TODO: remove
227 ///
228 /// [`DxtEncoder`]: struct.DxtEncoder.html
229 #[allow(dead_code)]
230 #[deprecated(note = "Use `DxtEncoder` instead")]
231 pub type DXTEncoder<W> = DxtEncoder<W>;
232
233 impl<W: Write> DxtEncoder<W> {
234 /// Create a new encoder that writes its output to ```w```
new(w: W) -> DxtEncoder<W>235 pub fn new(w: W) -> DxtEncoder<W> {
236 DxtEncoder { w }
237 }
238
239 /// Encodes the image data ```data```
240 /// that has dimensions ```width``` and ```height```
241 /// in ```DXTVariant``` ```variant```
242 /// data is assumed to be in variant.color_type()
encode( mut self, data: &[u8], width: u32, height: u32, variant: DXTVariant, ) -> ImageResult<()>243 pub fn encode(
244 mut self,
245 data: &[u8],
246 width: u32,
247 height: u32,
248 variant: DXTVariant,
249 ) -> ImageResult<()> {
250 if width % 4 != 0 || height % 4 != 0 {
251 // TODO: this is not very idiomatic yet. Should return an EncodingError.
252 return Err(ImageError::Parameter(ParameterError::from_kind(
253 ParameterErrorKind::DimensionMismatch,
254 )));
255 }
256 let width_blocks = width / 4;
257 let height_blocks = height / 4;
258
259 let stride = variant.decoded_bytes_per_block();
260
261 assert!(data.len() >= width_blocks as usize * height_blocks as usize * stride);
262
263 for chunk in data.chunks(width_blocks as usize * stride) {
264 let data = match variant {
265 DXTVariant::DXT1 => encode_dxt1_row(chunk),
266 DXTVariant::DXT3 => encode_dxt3_row(chunk),
267 DXTVariant::DXT5 => encode_dxt5_row(chunk),
268 };
269 self.w.write_all(&data)?;
270 }
271 Ok(())
272 }
273 }
274
275 /**
276 * Actual encoding/decoding logic below.
277 */
278 use std::mem::swap;
279
280 type Rgb = [u8; 3];
281
282 /// decodes a 5-bit R, 6-bit G, 5-bit B 16-bit packed color value into 8-bit RGB
283 /// mapping is done so min/max range values are preserved. So for 5-bit
284 /// values 0x00 -> 0x00 and 0x1F -> 0xFF
enc565_decode(value: u16) -> Rgb285 fn enc565_decode(value: u16) -> Rgb {
286 let red = (value >> 11) & 0x1F;
287 let green = (value >> 5) & 0x3F;
288 let blue = (value) & 0x1F;
289 [
290 (red * 0xFF / 0x1F) as u8,
291 (green * 0xFF / 0x3F) as u8,
292 (blue * 0xFF / 0x1F) as u8,
293 ]
294 }
295
296 /// encodes an 8-bit RGB value into a 5-bit R, 6-bit G, 5-bit B 16-bit packed color value
297 /// mapping preserves min/max values. It is guaranteed that i == encode(decode(i)) for all i
enc565_encode(rgb: Rgb) -> u16298 fn enc565_encode(rgb: Rgb) -> u16 {
299 let red = (u16::from(rgb[0]) * 0x1F + 0x7E) / 0xFF;
300 let green = (u16::from(rgb[1]) * 0x3F + 0x7E) / 0xFF;
301 let blue = (u16::from(rgb[2]) * 0x1F + 0x7E) / 0xFF;
302 (red << 11) | (green << 5) | blue
303 }
304
305 /// utility function: squares a value
square(a: i32) -> i32306 fn square(a: i32) -> i32 {
307 a * a
308 }
309
310 /// returns the squared error between two RGB values
diff(a: Rgb, b: Rgb) -> i32311 fn diff(a: Rgb, b: Rgb) -> i32 {
312 square(i32::from(a[0]) - i32::from(b[0])) + square(i32::from(a[1]) - i32::from(b[1]))
313 + square(i32::from(a[2]) - i32::from(b[2]))
314 }
315
316 /*
317 * Functions for decoding DXT compression
318 */
319
320 /// Constructs the DXT5 alpha lookup table from the two alpha entries
321 /// if alpha0 > alpha1, constructs a table of [a0, a1, 6 linearly interpolated values from a0 to a1]
322 /// if alpha0 <= alpha1, constructs a table of [a0, a1, 4 linearly interpolated values from a0 to a1, 0, 0xFF]
alpha_table_dxt5(alpha0: u8, alpha1: u8) -> [u8; 8]323 fn alpha_table_dxt5(alpha0: u8, alpha1: u8) -> [u8; 8] {
324 let mut table = [alpha0, alpha1, 0, 0, 0, 0, 0, 0xFF];
325 if alpha0 > alpha1 {
326 for i in 2..8u16 {
327 table[i as usize] =
328 (((8 - i) * u16::from(alpha0) + (i - 1) * u16::from(alpha1)) / 7) as u8;
329 }
330 } else {
331 for i in 2..6u16 {
332 table[i as usize] =
333 (((6 - i) * u16::from(alpha0) + (i - 1) * u16::from(alpha1)) / 5) as u8;
334 }
335 }
336 table
337 }
338
339 /// decodes an 8-byte dxt color block into the RGB channels of a 16xRGB or 16xRGBA block.
340 /// source should have a length of 8, dest a length of 48 (RGB) or 64 (RGBA)
decode_dxt_colors(source: &[u8], dest: &mut [u8])341 fn decode_dxt_colors(source: &[u8], dest: &mut [u8]) {
342 // sanity checks, also enable the compiler to elide all following bound checks
343 assert!(source.len() == 8 && (dest.len() == 48 || dest.len() == 64));
344 // calculate pitch to store RGB values in dest (3 for RGB, 4 for RGBA)
345 let pitch = dest.len() / 16;
346
347 // extract color data
348 let color0 = u16::from(source[0]) | (u16::from(source[1]) << 8);
349 let color1 = u16::from(source[2]) | (u16::from(source[3]) << 8);
350 let color_table = u32::from(source[4]) | (u32::from(source[5]) << 8)
351 | (u32::from(source[6]) << 16) | (u32::from(source[7]) << 24);
352 // let color_table = source[4..8].iter().rev().fold(0, |t, &b| (t << 8) | b as u32);
353
354 // decode the colors to rgb format
355 let mut colors = [[0; 3]; 4];
356 colors[0] = enc565_decode(color0);
357 colors[1] = enc565_decode(color1);
358
359 // determine color interpolation method
360 if color0 > color1 {
361 // linearly interpolate the other two color table entries
362 for i in 0..3 {
363 colors[2][i] = ((u16::from(colors[0][i]) * 2 + u16::from(colors[1][i]) + 1) / 3) as u8;
364 colors[3][i] = ((u16::from(colors[0][i]) + u16::from(colors[1][i]) * 2 + 1) / 3) as u8;
365 }
366 } else {
367 // linearly interpolate one other entry, keep the other at 0
368 for i in 0..3 {
369 colors[2][i] = ((u16::from(colors[0][i]) + u16::from(colors[1][i]) + 1) / 2) as u8;
370 }
371 }
372
373 // serialize the result. Every color is determined by looking up
374 // two bits in color_table which identify which color to actually pick from the 4 possible colors
375 for i in 0..16 {
376 dest[i * pitch..i * pitch + 3]
377 .copy_from_slice(&colors[(color_table >> (i * 2)) as usize & 3]);
378 }
379 }
380
381 /// Decodes a 16-byte bock of dxt5 data to a 16xRGBA block
decode_dxt5_block(source: &[u8], dest: &mut [u8])382 fn decode_dxt5_block(source: &[u8], dest: &mut [u8]) {
383 assert!(source.len() == 16 && dest.len() == 64);
384
385 // extract alpha index table (stored as little endian 64-bit value)
386 let alpha_table = source[2..8]
387 .iter()
388 .rev()
389 .fold(0, |t, &b| (t << 8) | u64::from(b));
390
391 // alhpa level decode
392 let alphas = alpha_table_dxt5(source[0], source[1]);
393
394 // serialize alpha
395 for i in 0..16 {
396 dest[i * 4 + 3] = alphas[(alpha_table >> (i * 3)) as usize & 7];
397 }
398
399 // handle colors
400 decode_dxt_colors(&source[8..16], dest);
401 }
402
403 /// Decodes a 16-byte bock of dxt3 data to a 16xRGBA block
decode_dxt3_block(source: &[u8], dest: &mut [u8])404 fn decode_dxt3_block(source: &[u8], dest: &mut [u8]) {
405 assert!(source.len() == 16 && dest.len() == 64);
406
407 // extract alpha index table (stored as little endian 64-bit value)
408 let alpha_table = source[0..8]
409 .iter()
410 .rev()
411 .fold(0, |t, &b| (t << 8) | u64::from(b));
412
413 // serialize alpha (stored as 4-bit values)
414 for i in 0..16 {
415 dest[i * 4 + 3] = ((alpha_table >> (i * 4)) as u8 & 0xF) * 0x11;
416 }
417
418 // handle colors
419 decode_dxt_colors(&source[8..16], dest);
420 }
421
422 /// Decodes a 8-byte bock of dxt5 data to a 16xRGB block
decode_dxt1_block(source: &[u8], dest: &mut [u8])423 fn decode_dxt1_block(source: &[u8], dest: &mut [u8]) {
424 assert!(source.len() == 8 && dest.len() == 48);
425 decode_dxt_colors(&source, dest);
426 }
427
428 /// Decode a row of DXT1 data to four rows of RGBA data.
429 /// source.len() should be a multiple of 8, otherwise this panics.
decode_dxt1_row(source: &[u8], dest: &mut [u8])430 fn decode_dxt1_row(source: &[u8], dest: &mut [u8]) {
431 assert!(source.len() % 8 == 0);
432 let block_count = source.len() / 8;
433 assert!(dest.len() >= block_count * 48);
434
435 // contains the 16 decoded pixels per block
436 let mut decoded_block = [0u8; 48];
437
438 for (x, encoded_block) in source.chunks(8).enumerate() {
439 decode_dxt1_block(encoded_block, &mut decoded_block);
440
441 // copy the values from the decoded block to linewise RGB layout
442 for line in 0..4 {
443 let offset = (block_count * line + x) * 12;
444 dest[offset..offset + 12].copy_from_slice(&decoded_block[line * 12..(line + 1) * 12]);
445 }
446 }
447 }
448
449 /// Decode a row of DXT3 data to four rows of RGBA data.
450 /// source.len() should be a multiple of 16, otherwise this panics.
decode_dxt3_row(source: &[u8], dest: &mut [u8])451 fn decode_dxt3_row(source: &[u8], dest: &mut [u8]) {
452 assert!(source.len() % 16 == 0);
453 let block_count = source.len() / 16;
454 assert!(dest.len() >= block_count * 64);
455
456 // contains the 16 decoded pixels per block
457 let mut decoded_block = [0u8; 64];
458
459 for (x, encoded_block) in source.chunks(16).enumerate() {
460 decode_dxt3_block(encoded_block, &mut decoded_block);
461
462 // copy the values from the decoded block to linewise RGB layout
463 for line in 0..4 {
464 let offset = (block_count * line + x) * 16;
465 dest[offset..offset + 16].copy_from_slice(&decoded_block[line * 16..(line + 1) * 16]);
466 }
467 }
468 }
469
470 /// Decode a row of DXT5 data to four rows of RGBA data.
471 /// source.len() should be a multiple of 16, otherwise this panics.
decode_dxt5_row(source: &[u8], dest: &mut [u8])472 fn decode_dxt5_row(source: &[u8], dest: &mut [u8]) {
473 assert!(source.len() % 16 == 0);
474 let block_count = source.len() / 16;
475 assert!(dest.len() >= block_count * 64);
476
477 // contains the 16 decoded pixels per block
478 let mut decoded_block = [0u8; 64];
479
480 for (x, encoded_block) in source.chunks(16).enumerate() {
481 decode_dxt5_block(encoded_block, &mut decoded_block);
482
483 // copy the values from the decoded block to linewise RGB layout
484 for line in 0..4 {
485 let offset = (block_count * line + x) * 16;
486 dest[offset..offset + 16].copy_from_slice(&decoded_block[line * 16..(line + 1) * 16]);
487 }
488 }
489 }
490
491 /*
492 * Functions for encoding DXT compression
493 */
494
495 /// Tries to perform the color encoding part of dxt compression
496 /// the approach taken is simple, it picks unique combinations
497 /// of the colors present in the block, and attempts to encode the
498 /// block with each, picking the encoding that yields the least
499 /// squared error out of all of them.
500 ///
501 /// This could probably be faster but is already reasonably fast
502 /// and a good reference impl to optimize others against.
503 ///
504 /// Another way to perform this analysis would be to perform a
505 /// singular value decomposition of the different colors, and
506 /// then pick 2 points on this line as the base colors. But
507 /// this is still rather unwieldly math and has issues
508 /// with the 3-linear-colors-and-0 case, it's also worse
509 /// at conserving the original colors.
510 ///
511 /// source: should be RGBAx16 or RGBx16 bytes of data,
512 /// dest 8 bytes of resulting encoded color data
encode_dxt_colors(source: &[u8], dest: &mut [u8])513 fn encode_dxt_colors(source: &[u8], dest: &mut [u8]) {
514 // sanity checks and determine stride when parsing the source data
515 assert!((source.len() == 64 || source.len() == 48) && dest.len() == 8);
516 let stride = source.len() / 16;
517
518 // reference colors array
519 let mut colors = [[0u8; 3]; 4];
520
521 // Put the colors we're going to be processing in an array with pure RGB layout
522 // note: we reverse the pixel order here. The reason for this is found in the inner quantization loop.
523 let mut targets = [[0u8; 3]; 16];
524 for (s, d) in source.chunks(stride).rev().zip(&mut targets) {
525 *d = [s[0], s[1], s[2]];
526 }
527
528 // and a set of colors to pick from.
529 let mut colorspace = targets.to_vec();
530
531 // roundtrip all colors through the r5g6b5 encoding
532 for rgb in &mut colorspace {
533 *rgb = enc565_decode(enc565_encode(*rgb));
534 }
535
536 // and deduplicate the set of colors to choose from as the algorithm is O(N^2) in this
537 colorspace.dedup();
538
539 // in case of slight gradients it can happen that there's only one entry left in the color table.
540 // as the resulting banding can be quite bad if we would just left the block at the closest
541 // encodable color, we have a special path here that tries to emulate the wanted color
542 // using the linear interpolation between gradients
543 if colorspace.len() == 1 {
544 // the base color we got from colorspace reduction
545 let ref_rgb = colorspace[0];
546 // the unreduced color in this block that's the furthest away from the actual block
547 let mut rgb = targets
548 .iter()
549 .cloned()
550 .max_by_key(|rgb| diff(*rgb, ref_rgb))
551 .unwrap();
552 // amplify differences by 2.5, which should push them to the next quantized value
553 // if possible without overshoot
554 for i in 0..3 {
555 rgb[i] =
556 ((i16::from(rgb[i]) - i16::from(ref_rgb[i])) * 5 / 2 + i16::from(ref_rgb[i])) as u8;
557 }
558
559 // roundtrip it through quantization
560 let encoded = enc565_encode(rgb);
561 let rgb = enc565_decode(encoded);
562
563 // in case this didn't land us a different color the best way to represent this field is
564 // as a single color block
565 if rgb == ref_rgb {
566 dest[0] = encoded as u8;
567 dest[1] = (encoded >> 8) as u8;
568
569 for d in dest.iter_mut().take(8).skip(2) {
570 *d = 0;
571 }
572 return;
573 }
574
575 // we did find a separate value: add it to the options so after one round of quantization
576 // we're done
577 colorspace.push(rgb);
578 }
579
580 // block quantization loop: we basically just try every possible combination, returning
581 // the combination with the least squared error
582 // stores the best candidate colors
583 let mut chosen_colors = [[0; 3]; 4];
584 // did this index table use the [0,0,0] variant
585 let mut chosen_use_0 = false;
586 // error calculated for the last entry
587 let mut chosen_error = 0xFFFF_FFFFu32;
588
589 // loop through unique permutations of the colorspace, where c1 != c2
590 'search: for (i, &c1) in colorspace.iter().enumerate() {
591 colors[0] = c1;
592
593 for &c2 in &colorspace[0..i] {
594 colors[1] = c2;
595
596 // what's inside here is ran at most 120 times.
597 for use_0 in 0..2 {
598 // and 240 times here.
599
600 if use_0 != 0 {
601 // interpolate one color, set the other to 0
602 for i in 0..3 {
603 colors[2][i] =
604 ((u16::from(colors[0][i]) + u16::from(colors[1][i]) + 1) / 2) as u8;
605 }
606 colors[3] = [0, 0, 0];
607 } else {
608 // interpolate to get 2 more colors
609 for i in 0..3 {
610 colors[2][i] =
611 ((u16::from(colors[0][i]) * 2 + u16::from(colors[1][i]) + 1) / 3) as u8;
612 colors[3][i] =
613 ((u16::from(colors[0][i]) + u16::from(colors[1][i]) * 2 + 1) / 3) as u8;
614 }
615 }
616
617 // calculate the total error if we were to quantize the block with these color combinations
618 // both these loops have statically known iteration counts and are well vectorizable
619 // note that the inside of this can be run about 15360 times worst case, i.e. 960 times per
620 // pixel.
621 let total_error = targets
622 .iter()
623 .map(|t| colors.iter().map(|c| diff(*c, *t) as u32).min().unwrap())
624 .sum();
625
626 // update the match if we found a better one
627 if total_error < chosen_error {
628 chosen_colors = colors;
629 chosen_use_0 = use_0 != 0;
630 chosen_error = total_error;
631
632 // if we've got a perfect or at most 1 LSB off match, we're done
633 if total_error < 4 {
634 break 'search;
635 }
636 }
637 }
638 }
639 }
640
641 // calculate the final indices
642 // note that targets is already in reverse pixel order, to make the index computation easy.
643 let mut chosen_indices = 0u32;
644 for t in &targets {
645 let (idx, _) = chosen_colors
646 .iter()
647 .enumerate()
648 .min_by_key(|&(_, c)| diff(*c, *t))
649 .unwrap();
650 chosen_indices = (chosen_indices << 2) | idx as u32;
651 }
652
653 // encode the colors
654 let mut color0 = enc565_encode(chosen_colors[0]);
655 let mut color1 = enc565_encode(chosen_colors[1]);
656
657 // determine encoding. Note that color0 == color1 is impossible at this point
658 if color0 > color1 {
659 if chosen_use_0 {
660 swap(&mut color0, &mut color1);
661 // Indexes are packed 2 bits wide, swap index 0/1 but preserve 2/3.
662 let filter = (chosen_indices & 0xAAAA_AAAA) >> 1;
663 chosen_indices ^= filter ^ 0x5555_5555;
664 }
665 } else if !chosen_use_0 {
666 swap(&mut color0, &mut color1);
667 // Indexes are packed 2 bits wide, swap index 0/1 and 2/3.
668 chosen_indices ^= 0x5555_5555;
669 }
670
671 // encode everything.
672 dest[0] = color0 as u8;
673 dest[1] = (color0 >> 8) as u8;
674 dest[2] = color1 as u8;
675 dest[3] = (color1 >> 8) as u8;
676 for i in 0..4 {
677 dest[i + 4] = (chosen_indices >> (i * 8)) as u8;
678 }
679 }
680
681 /// Encodes a buffer of 16 alpha bytes into a dxt5 alpha index table,
682 /// where the alpha table they are indexed against is created by
683 /// calling alpha_table_dxt5(alpha0, alpha1)
684 /// returns the resulting error and alpha table
encode_dxt5_alpha(alpha0: u8, alpha1: u8, alphas: &[u8; 16]) -> (i32, u64)685 fn encode_dxt5_alpha(alpha0: u8, alpha1: u8, alphas: &[u8; 16]) -> (i32, u64) {
686 // create a table for the given alpha ranges
687 let table = alpha_table_dxt5(alpha0, alpha1);
688 let mut indices = 0u64;
689 let mut total_error = 0i32;
690
691 // least error brute force search
692 for (i, &a) in alphas.iter().enumerate() {
693 let (index, error) = table
694 .iter()
695 .enumerate()
696 .map(|(i, &e)| (i, square(i32::from(e) - i32::from(a))))
697 .min_by_key(|&(_, e)| e)
698 .unwrap();
699 total_error += error;
700 indices |= (index as u64) << (i * 3);
701 }
702
703 (total_error, indices)
704 }
705
706 /// Encodes a RGBAx16 sequence of bytes to a 16 bytes DXT5 block
encode_dxt5_block(source: &[u8], dest: &mut [u8])707 fn encode_dxt5_block(source: &[u8], dest: &mut [u8]) {
708 assert!(source.len() == 64 && dest.len() == 16);
709
710 // perform dxt color encoding
711 encode_dxt_colors(source, &mut dest[8..16]);
712
713 // copy out the alpha bytes
714 let mut alphas = [0; 16];
715 for i in 0..16 {
716 alphas[i] = source[i * 4 + 3];
717 }
718
719 // try both alpha compression methods, see which has the least error.
720 let alpha07 = alphas.iter().cloned().min().unwrap();
721 let alpha17 = alphas.iter().cloned().max().unwrap();
722 let (error7, indices7) = encode_dxt5_alpha(alpha07, alpha17, &alphas);
723
724 // if all alphas are 0 or 255 it doesn't particularly matter what we do here.
725 let alpha05 = alphas
726 .iter()
727 .cloned()
728 .filter(|&i| i != 255)
729 .max()
730 .unwrap_or(255);
731 let alpha15 = alphas
732 .iter()
733 .cloned()
734 .filter(|&i| i != 0)
735 .min()
736 .unwrap_or(0);
737 let (error5, indices5) = encode_dxt5_alpha(alpha05, alpha15, &alphas);
738
739 // pick the best one, encode the min/max values
740 let mut alpha_table = if error5 < error7 {
741 dest[0] = alpha05;
742 dest[1] = alpha15;
743 indices5
744 } else {
745 dest[0] = alpha07;
746 dest[1] = alpha17;
747 indices7
748 };
749
750 // encode the alphas
751 for byte in dest[2..8].iter_mut() {
752 *byte = alpha_table as u8;
753 alpha_table >>= 8;
754 }
755 }
756
757 /// Encodes a RGBAx16 sequence of bytes into a 16 bytes DXT3 block
encode_dxt3_block(source: &[u8], dest: &mut [u8])758 fn encode_dxt3_block(source: &[u8], dest: &mut [u8]) {
759 assert!(source.len() == 64 && dest.len() == 16);
760
761 // perform dxt color encoding
762 encode_dxt_colors(source, &mut dest[8..16]);
763
764 // DXT3 alpha compression is very simple, just round towards the nearest value
765
766 // index the alpha values into the 64bit alpha table
767 let mut alpha_table = 0u64;
768 for i in 0..16 {
769 let alpha = u64::from(source[i * 4 + 3]);
770 let alpha = (alpha + 0x8) / 0x11;
771 alpha_table |= alpha << (i * 4);
772 }
773
774 // encode the alpha values
775 for byte in &mut dest[0..8] {
776 *byte = alpha_table as u8;
777 alpha_table >>= 8;
778 }
779 }
780
781 /// Encodes a RGBx16 sequence of bytes into a 8 bytes DXT1 block
encode_dxt1_block(source: &[u8], dest: &mut [u8])782 fn encode_dxt1_block(source: &[u8], dest: &mut [u8]) {
783 assert!(source.len() == 48 && dest.len() == 8);
784
785 // perform dxt color encoding
786 encode_dxt_colors(source, dest);
787 }
788
789 /// Decode a row of DXT1 data to four rows of RGBA data.
790 /// source.len() should be a multiple of 8, otherwise this panics.
encode_dxt1_row(source: &[u8]) -> Vec<u8>791 fn encode_dxt1_row(source: &[u8]) -> Vec<u8> {
792 assert!(source.len() % 48 == 0);
793 let block_count = source.len() / 48;
794
795 let mut dest = vec![0u8; block_count * 8];
796 // contains the 16 decoded pixels per block
797 let mut decoded_block = [0u8; 48];
798
799 for (x, encoded_block) in dest.chunks_mut(8).enumerate() {
800 // copy the values from the decoded block to linewise RGB layout
801 for line in 0..4 {
802 let offset = (block_count * line + x) * 12;
803 decoded_block[line * 12..(line + 1) * 12].copy_from_slice(&source[offset..offset + 12]);
804 }
805
806 encode_dxt1_block(&decoded_block, encoded_block);
807 }
808 dest
809 }
810
811 /// Decode a row of DXT3 data to four rows of RGBA data.
812 /// source.len() should be a multiple of 16, otherwise this panics.
encode_dxt3_row(source: &[u8]) -> Vec<u8>813 fn encode_dxt3_row(source: &[u8]) -> Vec<u8> {
814 assert!(source.len() % 64 == 0);
815 let block_count = source.len() / 64;
816
817 let mut dest = vec![0u8; block_count * 16];
818 // contains the 16 decoded pixels per block
819 let mut decoded_block = [0u8; 64];
820
821 for (x, encoded_block) in dest.chunks_mut(16).enumerate() {
822 // copy the values from the decoded block to linewise RGB layout
823 for line in 0..4 {
824 let offset = (block_count * line + x) * 16;
825 decoded_block[line * 16..(line + 1) * 16].copy_from_slice(&source[offset..offset + 16]);
826 }
827
828 encode_dxt3_block(&decoded_block, encoded_block);
829 }
830 dest
831 }
832
833 /// Decode a row of DXT5 data to four rows of RGBA data.
834 /// source.len() should be a multiple of 16, otherwise this panics.
encode_dxt5_row(source: &[u8]) -> Vec<u8>835 fn encode_dxt5_row(source: &[u8]) -> Vec<u8> {
836 assert!(source.len() % 64 == 0);
837 let block_count = source.len() / 64;
838
839 let mut dest = vec![0u8; block_count * 16];
840 // contains the 16 decoded pixels per block
841 let mut decoded_block = [0u8; 64];
842
843 for (x, encoded_block) in dest.chunks_mut(16).enumerate() {
844 // copy the values from the decoded block to linewise RGB layout
845 for line in 0..4 {
846 let offset = (block_count * line + x) * 16;
847 decoded_block[line * 16..(line + 1) * 16].copy_from_slice(&source[offset..offset + 16]);
848 }
849
850 encode_dxt5_block(&decoded_block, encoded_block);
851 }
852 dest
853 }
854