1 //! A module for all encoding needs.
2 use crate::error::{BufferResult, LzwError, LzwStatus};
3 use crate::{BitOrder, Code, StreamBuf, MAX_CODESIZE, MAX_ENTRIES, STREAM_BUF_SIZE};
4 
5 use crate::alloc::{boxed::Box, vec::Vec};
6 #[cfg(feature = "std")]
7 use crate::error::StreamResult;
8 #[cfg(feature = "std")]
9 use std::io::{self, BufRead, Write};
10 
11 /// The state for encoding data with an LZW algorithm.
12 ///
13 /// The same structure can be utilized with streams as well as your own buffers and driver logic.
14 /// It may even be possible to mix them if you are sufficiently careful not to lose any written
15 /// data in the process.
16 pub struct Encoder {
17     /// Internally dispatch via a dynamic trait object. This did not have any significant
18     /// performance impact as we batch data internally and this pointer does not change after
19     /// creation!
20     state: Box<dyn Stateful + Send + 'static>,
21 }
22 
23 /// A encoding stream sink.
24 ///
25 /// See [`Encoder::into_stream`] on how to create this type.
26 ///
27 /// [`Encoder::into_stream`]: struct.Encoder.html#method.into_stream
28 #[cfg_attr(
29     not(feature = "std"),
30     deprecated = "This type is only useful with the `std` feature."
31 )]
32 #[cfg_attr(not(feature = "std"), allow(dead_code))]
33 pub struct IntoStream<'d, W> {
34     encoder: &'d mut Encoder,
35     writer: W,
36     buffer: Option<StreamBuf<'d>>,
37     default_size: usize,
38 }
39 
40 trait Stateful {
advance(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult41     fn advance(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult;
mark_ended(&mut self) -> bool42     fn mark_ended(&mut self) -> bool;
43     /// Reset the state tracking if end code has been written.
restart(&mut self)44     fn restart(&mut self);
45     /// Reset the decoder to the beginning, dropping all buffers etc.
reset(&mut self)46     fn reset(&mut self);
47 }
48 
49 struct EncodeState<B: Buffer> {
50     /// The configured minimal code size.
51     min_size: u8,
52     /// The current encoding symbol tree.
53     tree: Tree,
54     /// If we have pushed the end code.
55     has_ended: bool,
56     /// If tiff then bumps are a single code sooner.
57     is_tiff: bool,
58     /// The code corresponding to the currently read characters.
59     current_code: Code,
60     /// The clear code for resetting the dictionary.
61     clear_code: Code,
62     /// The bit buffer for encoding.
63     buffer: B,
64 }
65 
66 struct MsbBuffer {
67     /// The current code length.
68     code_size: u8,
69     /// The buffer bits.
70     buffer: u64,
71     /// The number of valid buffer bits.
72     bits_in_buffer: u8,
73 }
74 
75 struct LsbBuffer {
76     /// The current code length.
77     code_size: u8,
78     /// The buffer bits.
79     buffer: u64,
80     /// The number of valid buffer bits.
81     bits_in_buffer: u8,
82 }
83 
84 trait Buffer {
new(size: u8) -> Self85     fn new(size: u8) -> Self;
86     /// Reset the code size in the buffer.
reset(&mut self, min_size: u8)87     fn reset(&mut self, min_size: u8);
88     /// Apply effects of a Clear Code.
clear(&mut self, min_size: u8)89     fn clear(&mut self, min_size: u8);
90     /// Insert a code into the buffer.
buffer_code(&mut self, code: Code)91     fn buffer_code(&mut self, code: Code);
92     /// Push bytes if the buffer space is getting small.
push_out(&mut self, out: &mut &mut [u8]) -> bool93     fn push_out(&mut self, out: &mut &mut [u8]) -> bool;
94     /// Flush all full bytes, returning if at least one more byte remains.
flush_out(&mut self, out: &mut &mut [u8]) -> bool95     fn flush_out(&mut self, out: &mut &mut [u8]) -> bool;
96     /// Pad the buffer to a full byte.
buffer_pad(&mut self)97     fn buffer_pad(&mut self);
98     /// Increase the maximum code size.
bump_code_size(&mut self)99     fn bump_code_size(&mut self);
100     /// Return the maximum code with the current code size.
max_code(&self) -> Code101     fn max_code(&self) -> Code;
102     /// Return the current code size in bits.
code_size(&self) -> u8103     fn code_size(&self) -> u8;
104 }
105 
106 /// One tree node for at most each code.
107 /// To avoid using too much memory we keep nodes with few successors in optimized form. This form
108 /// doesn't offer lookup by indexing but instead does a linear search.
109 #[derive(Default)]
110 struct Tree {
111     simples: Vec<Simple>,
112     complex: Vec<Full>,
113     keys: Vec<CompressedKey>,
114 }
115 
116 #[derive(Clone, Copy)]
117 enum FullKey {
118     NoSuccessor,
119     Simple(u16),
120     Full(u16),
121 }
122 
123 #[derive(Clone, Copy)]
124 struct CompressedKey(u16);
125 
126 const SHORT: usize = 16;
127 
128 #[derive(Clone, Copy)]
129 struct Simple {
130     codes: [Code; SHORT],
131     chars: [u8; SHORT],
132     count: u8,
133 }
134 
135 #[derive(Clone, Copy)]
136 struct Full {
137     char_continuation: [Code; 256],
138 }
139 
140 impl Encoder {
141     /// Create a new encoder with the specified bit order and symbol size.
142     ///
143     /// The algorithm for dynamically increasing the code symbol bit width is compatible with the
144     /// original specification. In particular you will need to specify an `Lsb` bit oder to encode
145     /// the data portion of a compressed `gif` image.
146     ///
147     /// # Panics
148     ///
149     /// The `size` needs to be in the interval `2..=12`.
new(order: BitOrder, size: u8) -> Self150     pub fn new(order: BitOrder, size: u8) -> Self {
151         type Boxed = Box<dyn Stateful + Send + 'static>;
152         super::assert_code_size(size);
153         let state = match order {
154             BitOrder::Lsb => Box::new(EncodeState::<LsbBuffer>::new(size)) as Boxed,
155             BitOrder::Msb => Box::new(EncodeState::<MsbBuffer>::new(size)) as Boxed,
156         };
157 
158         Encoder { state }
159     }
160 
161     /// Create a TIFF compatible encoder with the specified bit order and symbol size.
162     ///
163     /// The algorithm for dynamically increasing the code symbol bit width is compatible with the
164     /// TIFF specification, which is a misinterpretation of the original algorithm for increasing
165     /// the code size. It switches one symbol sooner.
166     ///
167     /// # Panics
168     ///
169     /// The `size` needs to be in the interval `2..=12`.
with_tiff_size_switch(order: BitOrder, size: u8) -> Self170     pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self {
171         type Boxed = Box<dyn Stateful + Send + 'static>;
172         super::assert_code_size(size);
173         let state = match order {
174             BitOrder::Lsb => {
175                 let mut state = Box::new(EncodeState::<LsbBuffer>::new(size));
176                 state.is_tiff = true;
177                 state as Boxed
178             }
179             BitOrder::Msb => {
180                 let mut state = Box::new(EncodeState::<MsbBuffer>::new(size));
181                 state.is_tiff = true;
182                 state as Boxed
183             }
184         };
185 
186         Encoder { state }
187     }
188 
189     /// Encode some bytes from `inp` into `out`.
190     ///
191     /// See [`into_stream`] for high-level functions (this interface is only available with the
192     /// `std` feature) and [`finish`] for marking the input data as complete.
193     ///
194     /// When some input byte is invalid, i.e. is not smaller than `1 << size`, then that byte and
195     /// all following ones will _not_ be consumed and the `status` of the result will signal an
196     /// error. The result will also indicate that all bytes up to but not including the offending
197     /// byte have been consumed. You may try again with a fixed byte.
198     ///
199     /// [`into_stream`]: #method.into_stream
200     /// [`finish`]: #method.finish
encode_bytes(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult201     pub fn encode_bytes(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult {
202         self.state.advance(inp, out)
203     }
204 
205     /// Construct a encoder into a writer.
206     #[cfg(feature = "std")]
into_stream<W: Write>(&mut self, writer: W) -> IntoStream<'_, W>207     pub fn into_stream<W: Write>(&mut self, writer: W) -> IntoStream<'_, W> {
208         IntoStream {
209             encoder: self,
210             writer,
211             buffer: None,
212             default_size: STREAM_BUF_SIZE,
213         }
214     }
215 
216     /// Mark the encoding as in the process of finishing.
217     ///
218     /// The next following call to `encode_bytes` which is able to consume the complete input will
219     /// also try to emit an end code. It's not recommended, but also not unsound, to use different
220     /// byte slices in different calls from this point forward and thus to 'delay' the actual end
221     /// of the data stream. The behaviour after the end marker has been written is unspecified but
222     /// sound.
finish(&mut self)223     pub fn finish(&mut self) {
224         self.state.mark_ended();
225     }
226 
227     /// Undo marking this data stream as ending.
228     /// FIXME: clarify how this interacts with padding introduced after end code.
229     #[allow(dead_code)]
restart(&mut self)230     pub(crate) fn restart(&mut self) {
231         self.state.restart()
232     }
233 
234     /// Reset all internal state.
235     ///
236     /// This produce an encoder as if just constructed with `new` but taking slightly less work. In
237     /// particular it will not deallocate any internal allocations. It will also avoid some
238     /// duplicate setup work.
reset(&mut self)239     pub fn reset(&mut self) {
240         self.state.reset()
241     }
242 }
243 
244 #[cfg(feature = "std")]
245 impl<'d, W: Write> IntoStream<'d, W> {
246     /// Encode data from a reader.
247     ///
248     /// This will drain the supplied reader. It will not encode an end marker after all data has
249     /// been processed.
encode(&mut self, read: impl BufRead) -> StreamResult250     pub fn encode(&mut self, read: impl BufRead) -> StreamResult {
251         self.encode_part(read, false)
252     }
253 
254     /// Encode data from a reader and an end marker.
encode_all(mut self, read: impl BufRead) -> StreamResult255     pub fn encode_all(mut self, read: impl BufRead) -> StreamResult {
256         self.encode_part(read, true)
257     }
258 
259     /// Set the size of the intermediate encode buffer.
260     ///
261     /// A buffer of this size is allocated to hold one part of the encoded stream when no buffer is
262     /// available and any encoding method is called. No buffer is allocated if `set_buffer` has
263     /// been called. The buffer is reused.
264     ///
265     /// # Panics
266     /// This method panics if `size` is `0`.
set_buffer_size(&mut self, size: usize)267     pub fn set_buffer_size(&mut self, size: usize) {
268         assert_ne!(size, 0, "Attempted to set empty buffer");
269         self.default_size = size;
270     }
271 
272     /// Use a particular buffer as an intermediate encode buffer.
273     ///
274     /// Calling this sets or replaces the buffer. When a buffer has been set then it is used
275     /// instead of a dynamically allocating a buffer. Note that the size of the buffer is relevant
276     /// for efficient encoding as there is additional overhead from `write` calls each time the
277     /// buffer has been filled.
278     ///
279     /// # Panics
280     /// This method panics if the `buffer` is empty.
set_buffer(&mut self, buffer: &'d mut [u8])281     pub fn set_buffer(&mut self, buffer: &'d mut [u8]) {
282         assert_ne!(buffer.len(), 0, "Attempted to set empty buffer");
283         self.buffer = Some(StreamBuf::Borrowed(buffer));
284     }
285 
encode_part(&mut self, mut read: impl BufRead, finish: bool) -> StreamResult286     fn encode_part(&mut self, mut read: impl BufRead, finish: bool) -> StreamResult {
287         let IntoStream {
288             encoder,
289             writer,
290             buffer,
291             default_size,
292         } = self;
293         enum Progress {
294             Ok,
295             Done,
296         }
297 
298         let mut bytes_read = 0;
299         let mut bytes_written = 0;
300 
301         let read_bytes = &mut bytes_read;
302         let write_bytes = &mut bytes_written;
303 
304         let outbuf: &mut [u8] =
305             match { buffer.get_or_insert_with(|| StreamBuf::Owned(vec![0u8; *default_size])) } {
306                 StreamBuf::Borrowed(slice) => &mut *slice,
307                 StreamBuf::Owned(vec) => &mut *vec,
308             };
309         assert!(!outbuf.is_empty());
310 
311         let once = move || {
312             let data = read.fill_buf()?;
313 
314             if data.is_empty() {
315                 if finish {
316                     encoder.finish();
317                 } else {
318                     return Ok(Progress::Done);
319                 }
320             }
321 
322             let result = encoder.encode_bytes(data, &mut outbuf[..]);
323             *read_bytes += result.consumed_in;
324             *write_bytes += result.consumed_out;
325             read.consume(result.consumed_in);
326 
327             let done = result.status.map_err(|err| {
328                 io::Error::new(io::ErrorKind::InvalidData, &*format!("{:?}", err))
329             })?;
330 
331             if let LzwStatus::Done = done {
332                 writer.write_all(&outbuf[..result.consumed_out])?;
333                 return Ok(Progress::Done);
334             }
335 
336             if let LzwStatus::NoProgress = done {
337                 return Err(io::Error::new(
338                     io::ErrorKind::UnexpectedEof,
339                     "No more data but no end marker detected",
340                 ));
341             }
342 
343             writer.write_all(&outbuf[..result.consumed_out])?;
344             Ok(Progress::Ok)
345         };
346 
347         let status = core::iter::repeat_with(once)
348             // scan+fuse can be replaced with map_while
349             .scan((), |(), result| match result {
350                 Ok(Progress::Ok) => Some(Ok(())),
351                 Err(err) => Some(Err(err)),
352                 Ok(Progress::Done) => None,
353             })
354             .fuse()
355             .collect();
356 
357         StreamResult {
358             bytes_read,
359             bytes_written,
360             status,
361         }
362     }
363 }
364 
365 impl<B: Buffer> EncodeState<B> {
new(min_size: u8) -> Self366     fn new(min_size: u8) -> Self {
367         let clear_code = 1 << min_size;
368         let mut tree = Tree::default();
369         tree.init(min_size);
370         let mut state = EncodeState {
371             min_size,
372             tree,
373             has_ended: false,
374             is_tiff: false,
375             current_code: clear_code,
376             clear_code,
377             buffer: B::new(min_size),
378         };
379         state.buffer_code(clear_code);
380         state
381     }
382 }
383 
384 impl<B: Buffer> Stateful for EncodeState<B> {
advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult385     fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult {
386         let c_in = inp.len();
387         let c_out = out.len();
388         let mut status = Ok(LzwStatus::Ok);
389 
390         'encoding: loop {
391             if self.push_out(&mut out) {
392                 break;
393             }
394 
395             if inp.is_empty() && self.has_ended {
396                 let end = self.end_code();
397                 if self.current_code != end {
398                     if self.current_code != self.clear_code {
399                         self.buffer_code(self.current_code);
400 
401                         // When reading this code, the decoder will add an extra entry to its table
402                         // before reading th end code. Thusly, it may increase its code size based
403                         // on this additional entry.
404                         if self.tree.keys.len() + usize::from(self.is_tiff)
405                             > usize::from(self.buffer.max_code())
406                             && self.buffer.code_size() < MAX_CODESIZE
407                         {
408                             self.buffer.bump_code_size();
409                         }
410                     }
411                     self.buffer_code(end);
412                     self.current_code = end;
413                     self.buffer_pad();
414                 }
415 
416                 break;
417             }
418 
419             let mut next_code = None;
420             let mut bytes = inp.iter();
421             while let Some(&byte) = bytes.next() {
422                 if self.min_size < 8 && byte >= 1 << self.min_size {
423                     status = Err(LzwError::InvalidCode);
424                     break 'encoding;
425                 }
426 
427                 inp = bytes.as_slice();
428                 match self.tree.iterate(self.current_code, byte) {
429                     Ok(code) => self.current_code = code,
430                     Err(_) => {
431                         next_code = Some(self.current_code);
432 
433                         self.current_code = u16::from(byte);
434                         break;
435                     }
436                 }
437             }
438 
439             match next_code {
440                 // No more bytes, no code produced.
441                 None => break,
442                 Some(code) => {
443                     self.buffer_code(code);
444 
445                     if self.tree.keys.len() + usize::from(self.is_tiff)
446                         > usize::from(self.buffer.max_code()) + 1
447                         && self.buffer.code_size() < MAX_CODESIZE
448                     {
449                         self.buffer.bump_code_size();
450                     }
451 
452                     if self.tree.keys.len() > MAX_ENTRIES {
453                         self.buffer_code(self.clear_code);
454                         self.tree.reset(self.min_size);
455                         self.buffer.clear(self.min_size);
456                     }
457                 }
458             }
459         }
460 
461         if inp.is_empty() && self.current_code == self.end_code() {
462             if !self.flush_out(&mut out) {
463                 status = Ok(LzwStatus::Done);
464             }
465         }
466 
467         BufferResult {
468             consumed_in: c_in - inp.len(),
469             consumed_out: c_out - out.len(),
470             status,
471         }
472     }
473 
mark_ended(&mut self) -> bool474     fn mark_ended(&mut self) -> bool {
475         core::mem::replace(&mut self.has_ended, true)
476     }
477 
restart(&mut self)478     fn restart(&mut self) {
479         self.has_ended = false;
480     }
481 
reset(&mut self)482     fn reset(&mut self) {
483         self.restart();
484         self.current_code = self.clear_code;
485         self.tree.reset(self.min_size);
486         self.buffer.reset(self.min_size);
487         self.buffer_code(self.clear_code);
488     }
489 }
490 
491 impl<B: Buffer> EncodeState<B> {
push_out(&mut self, out: &mut &mut [u8]) -> bool492     fn push_out(&mut self, out: &mut &mut [u8]) -> bool {
493         self.buffer.push_out(out)
494     }
495 
flush_out(&mut self, out: &mut &mut [u8]) -> bool496     fn flush_out(&mut self, out: &mut &mut [u8]) -> bool {
497         self.buffer.flush_out(out)
498     }
499 
end_code(&self) -> Code500     fn end_code(&self) -> Code {
501         self.clear_code + 1
502     }
503 
buffer_pad(&mut self)504     fn buffer_pad(&mut self) {
505         self.buffer.buffer_pad();
506     }
507 
buffer_code(&mut self, code: Code)508     fn buffer_code(&mut self, code: Code) {
509         self.buffer.buffer_code(code);
510     }
511 }
512 
513 impl Buffer for MsbBuffer {
new(min_size: u8) -> Self514     fn new(min_size: u8) -> Self {
515         MsbBuffer {
516             code_size: min_size + 1,
517             buffer: 0,
518             bits_in_buffer: 0,
519         }
520     }
521 
reset(&mut self, min_size: u8)522     fn reset(&mut self, min_size: u8) {
523         self.code_size = min_size + 1;
524         self.buffer = 0;
525         self.bits_in_buffer = 0;
526     }
527 
clear(&mut self, min_size: u8)528     fn clear(&mut self, min_size: u8) {
529         self.code_size = min_size + 1;
530     }
531 
buffer_code(&mut self, code: Code)532     fn buffer_code(&mut self, code: Code) {
533         let shift = 64 - self.bits_in_buffer - self.code_size;
534         self.buffer |= u64::from(code) << shift;
535         self.bits_in_buffer += self.code_size;
536     }
537 
push_out(&mut self, out: &mut &mut [u8]) -> bool538     fn push_out(&mut self, out: &mut &mut [u8]) -> bool {
539         if self.bits_in_buffer + 2 * self.code_size < 64 {
540             return false;
541         }
542 
543         self.flush_out(out)
544     }
545 
flush_out(&mut self, out: &mut &mut [u8]) -> bool546     fn flush_out(&mut self, out: &mut &mut [u8]) -> bool {
547         let want = usize::from(self.bits_in_buffer / 8);
548         let count = want.min((*out).len());
549         let (bytes, tail) = core::mem::replace(out, &mut []).split_at_mut(count);
550         *out = tail;
551 
552         for b in bytes {
553             *b = ((self.buffer & 0xff00_0000_0000_0000) >> 56) as u8;
554             self.buffer <<= 8;
555             self.bits_in_buffer -= 8;
556         }
557 
558         count < want
559     }
560 
buffer_pad(&mut self)561     fn buffer_pad(&mut self) {
562         let to_byte = self.bits_in_buffer.wrapping_neg() & 0x7;
563         self.bits_in_buffer += to_byte;
564     }
565 
bump_code_size(&mut self)566     fn bump_code_size(&mut self) {
567         self.code_size += 1;
568     }
569 
max_code(&self) -> Code570     fn max_code(&self) -> Code {
571         (1 << self.code_size) - 1
572     }
573 
code_size(&self) -> u8574     fn code_size(&self) -> u8 {
575         self.code_size
576     }
577 }
578 
579 impl Buffer for LsbBuffer {
new(min_size: u8) -> Self580     fn new(min_size: u8) -> Self {
581         LsbBuffer {
582             code_size: min_size + 1,
583             buffer: 0,
584             bits_in_buffer: 0,
585         }
586     }
587 
reset(&mut self, min_size: u8)588     fn reset(&mut self, min_size: u8) {
589         self.code_size = min_size + 1;
590         self.buffer = 0;
591         self.bits_in_buffer = 0;
592     }
593 
clear(&mut self, min_size: u8)594     fn clear(&mut self, min_size: u8) {
595         self.code_size = min_size + 1;
596     }
597 
buffer_code(&mut self, code: Code)598     fn buffer_code(&mut self, code: Code) {
599         self.buffer |= u64::from(code) << self.bits_in_buffer;
600         self.bits_in_buffer += self.code_size;
601     }
602 
push_out(&mut self, out: &mut &mut [u8]) -> bool603     fn push_out(&mut self, out: &mut &mut [u8]) -> bool {
604         if self.bits_in_buffer + 2 * self.code_size < 64 {
605             return false;
606         }
607 
608         self.flush_out(out)
609     }
610 
flush_out(&mut self, out: &mut &mut [u8]) -> bool611     fn flush_out(&mut self, out: &mut &mut [u8]) -> bool {
612         let want = usize::from(self.bits_in_buffer / 8);
613         let count = want.min((*out).len());
614         let (bytes, tail) = core::mem::replace(out, &mut []).split_at_mut(count);
615         *out = tail;
616 
617         for b in bytes {
618             *b = (self.buffer & 0x0000_0000_0000_00ff) as u8;
619             self.buffer >>= 8;
620             self.bits_in_buffer -= 8;
621         }
622 
623         count < want
624     }
625 
buffer_pad(&mut self)626     fn buffer_pad(&mut self) {
627         let to_byte = self.bits_in_buffer.wrapping_neg() & 0x7;
628         self.bits_in_buffer += to_byte;
629     }
630 
bump_code_size(&mut self)631     fn bump_code_size(&mut self) {
632         self.code_size += 1;
633     }
634 
max_code(&self) -> Code635     fn max_code(&self) -> Code {
636         (1 << self.code_size) - 1
637     }
638 
code_size(&self) -> u8639     fn code_size(&self) -> u8 {
640         self.code_size
641     }
642 }
643 
644 impl Tree {
init(&mut self, min_size: u8)645     fn init(&mut self, min_size: u8) {
646         // We need a way to represent the state of a currently empty buffer. We use the clear code
647         // for this, thus create one complex mapping that leads to the one-char base codes.
648         self.keys
649             .resize((1 << min_size) + 2, FullKey::NoSuccessor.into());
650         self.complex.push(Full {
651             char_continuation: [0; 256],
652         });
653         let map_of_begin = self.complex.last_mut().unwrap();
654         for ch in 0u16..256 {
655             map_of_begin.char_continuation[usize::from(ch)] = ch;
656         }
657         self.keys[1 << min_size] = FullKey::Full(0).into();
658     }
659 
reset(&mut self, min_size: u8)660     fn reset(&mut self, min_size: u8) {
661         self.simples.clear();
662         self.keys.truncate((1 << min_size) + 2);
663         // Keep entry for clear code.
664         self.complex.truncate(1);
665         // The first complex is not changed..
666         for k in self.keys[..(1 << min_size) + 2].iter_mut() {
667             *k = FullKey::NoSuccessor.into();
668         }
669         self.keys[1 << min_size] = FullKey::Full(0).into();
670     }
671 
at_key(&self, code: Code, ch: u8) -> Option<Code>672     fn at_key(&self, code: Code, ch: u8) -> Option<Code> {
673         let key = self.keys[usize::from(code)];
674         match FullKey::from(key) {
675             FullKey::NoSuccessor => None,
676             FullKey::Simple(idx) => {
677                 let nexts = &self.simples[usize::from(idx)];
678                 let successors = nexts
679                     .codes
680                     .iter()
681                     .zip(nexts.chars.iter())
682                     .take(usize::from(nexts.count));
683                 for (&scode, &sch) in successors {
684                     if sch == ch {
685                         return Some(scode);
686                     }
687                 }
688 
689                 None
690             }
691             FullKey::Full(idx) => {
692                 let full = &self.complex[usize::from(idx)];
693                 let precode = full.char_continuation[usize::from(ch)];
694                 if usize::from(precode) < MAX_ENTRIES {
695                     Some(precode)
696                 } else {
697                     None
698                 }
699             }
700         }
701     }
702 
703     /// Iterate to the next char.
704     /// Return Ok when it was already in the tree or creates a new entry for it and returns Err.
iterate(&mut self, code: Code, ch: u8) -> Result<Code, Code>705     fn iterate(&mut self, code: Code, ch: u8) -> Result<Code, Code> {
706         if let Some(next) = self.at_key(code, ch) {
707             Ok(next)
708         } else {
709             Err(self.append(code, ch))
710         }
711     }
712 
append(&mut self, code: Code, ch: u8) -> Code713     fn append(&mut self, code: Code, ch: u8) -> Code {
714         let next: Code = self.keys.len() as u16;
715         let key = self.keys[usize::from(code)];
716         // TODO: with debug assertions, check for non-existence
717         match FullKey::from(key) {
718             FullKey::NoSuccessor => {
719                 let new_key = FullKey::Simple(self.simples.len() as u16);
720                 self.simples.push(Simple::default());
721                 let simples = self.simples.last_mut().unwrap();
722                 simples.codes[0] = next;
723                 simples.chars[0] = ch;
724                 simples.count = 1;
725                 self.keys[usize::from(code)] = new_key.into();
726             }
727             FullKey::Simple(idx) if usize::from(self.simples[usize::from(idx)].count) < SHORT => {
728                 let nexts = &mut self.simples[usize::from(idx)];
729                 let nidx = usize::from(nexts.count);
730                 nexts.chars[nidx] = ch;
731                 nexts.codes[nidx] = next;
732                 nexts.count += 1;
733             }
734             FullKey::Simple(idx) => {
735                 let new_key = FullKey::Full(self.complex.len() as u16);
736                 let simples = &self.simples[usize::from(idx)];
737                 self.complex.push(Full {
738                     char_continuation: [Code::max_value(); 256],
739                 });
740                 let full = self.complex.last_mut().unwrap();
741                 for (&pch, &pcont) in simples.chars.iter().zip(simples.codes.iter()) {
742                     full.char_continuation[usize::from(pch)] = pcont;
743                 }
744                 self.keys[usize::from(code)] = new_key.into();
745             }
746             FullKey::Full(idx) => {
747                 let full = &mut self.complex[usize::from(idx)];
748                 full.char_continuation[usize::from(ch)] = next;
749             }
750         }
751         self.keys.push(FullKey::NoSuccessor.into());
752         next
753     }
754 }
755 
756 impl Default for FullKey {
default() -> Self757     fn default() -> Self {
758         FullKey::NoSuccessor
759     }
760 }
761 
762 impl Default for Simple {
default() -> Self763     fn default() -> Self {
764         Simple {
765             codes: [0; SHORT],
766             chars: [0; SHORT],
767             count: 0,
768         }
769     }
770 }
771 
772 impl From<CompressedKey> for FullKey {
from(CompressedKey(key): CompressedKey) -> Self773     fn from(CompressedKey(key): CompressedKey) -> Self {
774         match (key >> MAX_CODESIZE) & 0xf {
775             0 => FullKey::Full(key & 0xfff),
776             1 => FullKey::Simple(key & 0xfff),
777             _ => FullKey::NoSuccessor,
778         }
779     }
780 }
781 
782 impl From<FullKey> for CompressedKey {
from(full: FullKey) -> Self783     fn from(full: FullKey) -> Self {
784         CompressedKey(match full {
785             FullKey::NoSuccessor => 0x2000,
786             FullKey::Simple(code) => 0x1000 | code,
787             FullKey::Full(code) => code,
788         })
789     }
790 }
791 
792 #[cfg(test)]
793 mod tests {
794     use super::{BitOrder, Encoder, LzwError, LzwStatus};
795     use crate::alloc::vec::Vec;
796     use crate::decode::Decoder;
797     #[cfg(feature = "std")]
798     use crate::StreamBuf;
799 
800     #[test]
invalid_input_rejected()801     fn invalid_input_rejected() {
802         const BIT_LEN: u8 = 2;
803         let ref input = [0, 1 << BIT_LEN /* invalid */, 0];
804         let ref mut target = [0u8; 128];
805         let mut encoder = Encoder::new(BitOrder::Msb, BIT_LEN);
806 
807         encoder.finish();
808         // We require simulation of normality, that is byte-for-byte compression.
809         let result = encoder.encode_bytes(input, target);
810         assert!(if let Err(LzwError::InvalidCode) = result.status {
811             true
812         } else {
813             false
814         });
815         assert_eq!(result.consumed_in, 1);
816 
817         let fixed = encoder.encode_bytes(&[1, 0], &mut target[result.consumed_out..]);
818         assert!(if let Ok(LzwStatus::Done) = fixed.status {
819             true
820         } else {
821             false
822         });
823         assert_eq!(fixed.consumed_in, 2);
824 
825         // Okay, now test we actually fixed it.
826         let ref mut compare = [0u8; 4];
827         let mut todo = &target[..result.consumed_out + fixed.consumed_out];
828         let mut free = &mut compare[..];
829         let mut decoder = Decoder::new(BitOrder::Msb, BIT_LEN);
830 
831         // Decode with up to 16 rounds, far too much but inconsequential.
832         for _ in 0..16 {
833             if decoder.has_ended() {
834                 break;
835             }
836 
837             let result = decoder.decode_bytes(todo, free);
838             assert!(result.status.is_ok());
839             todo = &todo[result.consumed_in..];
840             free = &mut free[result.consumed_out..];
841         }
842 
843         let remaining = { free }.len();
844         let len = compare.len() - remaining;
845         assert_eq!(todo, &[]);
846         assert_eq!(compare[..len], [0, 1, 0]);
847     }
848 
849     #[test]
850     #[should_panic]
invalid_code_size_low()851     fn invalid_code_size_low() {
852         let _ = Encoder::new(BitOrder::Msb, 1);
853     }
854 
855     #[test]
856     #[should_panic]
invalid_code_size_high()857     fn invalid_code_size_high() {
858         let _ = Encoder::new(BitOrder::Msb, 14);
859     }
860 
make_decoded() -> Vec<u8>861     fn make_decoded() -> Vec<u8> {
862         const FILE: &'static [u8] =
863             include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/Cargo.lock"));
864         return Vec::from(FILE);
865     }
866 
867     #[test]
868     #[cfg(feature = "std")]
into_stream_buffer_no_alloc()869     fn into_stream_buffer_no_alloc() {
870         let encoded = make_decoded();
871         let mut encoder = Encoder::new(BitOrder::Msb, 8);
872 
873         let mut output = vec![];
874         let mut buffer = [0; 512];
875         let mut istream = encoder.into_stream(&mut output);
876         istream.set_buffer(&mut buffer[..]);
877         istream.encode(&encoded[..]).status.unwrap();
878 
879         match istream.buffer {
880             Some(StreamBuf::Borrowed(_)) => {}
881             None => panic!("Decoded without buffer??"),
882             Some(StreamBuf::Owned(_)) => panic!("Unexpected buffer allocation"),
883         }
884     }
885 
886     #[test]
887     #[cfg(feature = "std")]
into_stream_buffer_small_alloc()888     fn into_stream_buffer_small_alloc() {
889         struct WriteTap<W: std::io::Write>(W);
890         const BUF_SIZE: usize = 512;
891 
892         impl<W: std::io::Write> std::io::Write for WriteTap<W> {
893             fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
894                 assert!(buf.len() <= BUF_SIZE);
895                 self.0.write(buf)
896             }
897             fn flush(&mut self) -> std::io::Result<()> {
898                 self.0.flush()
899             }
900         }
901 
902         let encoded = make_decoded();
903         let mut encoder = Encoder::new(BitOrder::Msb, 8);
904 
905         let mut output = vec![];
906         let mut istream = encoder.into_stream(WriteTap(&mut output));
907         istream.set_buffer_size(512);
908         istream.encode(&encoded[..]).status.unwrap();
909 
910         match istream.buffer {
911             Some(StreamBuf::Owned(vec)) => assert!(vec.len() <= BUF_SIZE),
912             Some(StreamBuf::Borrowed(_)) => panic!("Unexpected borrowed buffer, where from?"),
913             None => panic!("Decoded without buffer??"),
914         }
915     }
916 
917     #[test]
918     #[cfg(feature = "std")]
reset()919     fn reset() {
920         let encoded = make_decoded();
921         let mut encoder = Encoder::new(BitOrder::Msb, 8);
922         let mut reference = None;
923 
924         for _ in 0..2 {
925             let mut output = vec![];
926             let mut buffer = [0; 512];
927             let mut istream = encoder.into_stream(&mut output);
928             istream.set_buffer(&mut buffer[..]);
929             istream.encode_all(&encoded[..]).status.unwrap();
930 
931             encoder.reset();
932             if let Some(reference) = &reference {
933                 assert_eq!(output, *reference);
934             } else {
935                 reference = Some(output);
936             }
937         }
938     }
939 }
940