1 //! A module for all encoding needs. 2 use crate::error::{BufferResult, LzwError, LzwStatus}; 3 use crate::{BitOrder, Code, StreamBuf, MAX_CODESIZE, MAX_ENTRIES, STREAM_BUF_SIZE}; 4 5 use crate::alloc::{boxed::Box, vec::Vec}; 6 #[cfg(feature = "std")] 7 use crate::error::StreamResult; 8 #[cfg(feature = "std")] 9 use std::io::{self, BufRead, Write}; 10 11 /// The state for encoding data with an LZW algorithm. 12 /// 13 /// The same structure can be utilized with streams as well as your own buffers and driver logic. 14 /// It may even be possible to mix them if you are sufficiently careful not to lose any written 15 /// data in the process. 16 pub struct Encoder { 17 /// Internally dispatch via a dynamic trait object. This did not have any significant 18 /// performance impact as we batch data internally and this pointer does not change after 19 /// creation! 20 state: Box<dyn Stateful + Send + 'static>, 21 } 22 23 /// A encoding stream sink. 24 /// 25 /// See [`Encoder::into_stream`] on how to create this type. 26 /// 27 /// [`Encoder::into_stream`]: struct.Encoder.html#method.into_stream 28 #[cfg_attr( 29 not(feature = "std"), 30 deprecated = "This type is only useful with the `std` feature." 31 )] 32 #[cfg_attr(not(feature = "std"), allow(dead_code))] 33 pub struct IntoStream<'d, W> { 34 encoder: &'d mut Encoder, 35 writer: W, 36 buffer: Option<StreamBuf<'d>>, 37 default_size: usize, 38 } 39 40 trait Stateful { advance(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult41 fn advance(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult; mark_ended(&mut self) -> bool42 fn mark_ended(&mut self) -> bool; 43 /// Reset the state tracking if end code has been written. restart(&mut self)44 fn restart(&mut self); 45 /// Reset the decoder to the beginning, dropping all buffers etc. reset(&mut self)46 fn reset(&mut self); 47 } 48 49 struct EncodeState<B: Buffer> { 50 /// The configured minimal code size. 51 min_size: u8, 52 /// The current encoding symbol tree. 53 tree: Tree, 54 /// If we have pushed the end code. 55 has_ended: bool, 56 /// If tiff then bumps are a single code sooner. 57 is_tiff: bool, 58 /// The code corresponding to the currently read characters. 59 current_code: Code, 60 /// The clear code for resetting the dictionary. 61 clear_code: Code, 62 /// The bit buffer for encoding. 63 buffer: B, 64 } 65 66 struct MsbBuffer { 67 /// The current code length. 68 code_size: u8, 69 /// The buffer bits. 70 buffer: u64, 71 /// The number of valid buffer bits. 72 bits_in_buffer: u8, 73 } 74 75 struct LsbBuffer { 76 /// The current code length. 77 code_size: u8, 78 /// The buffer bits. 79 buffer: u64, 80 /// The number of valid buffer bits. 81 bits_in_buffer: u8, 82 } 83 84 trait Buffer { new(size: u8) -> Self85 fn new(size: u8) -> Self; 86 /// Reset the code size in the buffer. reset(&mut self, min_size: u8)87 fn reset(&mut self, min_size: u8); 88 /// Apply effects of a Clear Code. clear(&mut self, min_size: u8)89 fn clear(&mut self, min_size: u8); 90 /// Insert a code into the buffer. buffer_code(&mut self, code: Code)91 fn buffer_code(&mut self, code: Code); 92 /// Push bytes if the buffer space is getting small. push_out(&mut self, out: &mut &mut [u8]) -> bool93 fn push_out(&mut self, out: &mut &mut [u8]) -> bool; 94 /// Flush all full bytes, returning if at least one more byte remains. flush_out(&mut self, out: &mut &mut [u8]) -> bool95 fn flush_out(&mut self, out: &mut &mut [u8]) -> bool; 96 /// Pad the buffer to a full byte. buffer_pad(&mut self)97 fn buffer_pad(&mut self); 98 /// Increase the maximum code size. bump_code_size(&mut self)99 fn bump_code_size(&mut self); 100 /// Return the maximum code with the current code size. max_code(&self) -> Code101 fn max_code(&self) -> Code; 102 /// Return the current code size in bits. code_size(&self) -> u8103 fn code_size(&self) -> u8; 104 } 105 106 /// One tree node for at most each code. 107 /// To avoid using too much memory we keep nodes with few successors in optimized form. This form 108 /// doesn't offer lookup by indexing but instead does a linear search. 109 #[derive(Default)] 110 struct Tree { 111 simples: Vec<Simple>, 112 complex: Vec<Full>, 113 keys: Vec<CompressedKey>, 114 } 115 116 #[derive(Clone, Copy)] 117 enum FullKey { 118 NoSuccessor, 119 Simple(u16), 120 Full(u16), 121 } 122 123 #[derive(Clone, Copy)] 124 struct CompressedKey(u16); 125 126 const SHORT: usize = 16; 127 128 #[derive(Clone, Copy)] 129 struct Simple { 130 codes: [Code; SHORT], 131 chars: [u8; SHORT], 132 count: u8, 133 } 134 135 #[derive(Clone, Copy)] 136 struct Full { 137 char_continuation: [Code; 256], 138 } 139 140 impl Encoder { 141 /// Create a new encoder with the specified bit order and symbol size. 142 /// 143 /// The algorithm for dynamically increasing the code symbol bit width is compatible with the 144 /// original specification. In particular you will need to specify an `Lsb` bit oder to encode 145 /// the data portion of a compressed `gif` image. 146 /// 147 /// # Panics 148 /// 149 /// The `size` needs to be in the interval `2..=12`. new(order: BitOrder, size: u8) -> Self150 pub fn new(order: BitOrder, size: u8) -> Self { 151 type Boxed = Box<dyn Stateful + Send + 'static>; 152 super::assert_code_size(size); 153 let state = match order { 154 BitOrder::Lsb => Box::new(EncodeState::<LsbBuffer>::new(size)) as Boxed, 155 BitOrder::Msb => Box::new(EncodeState::<MsbBuffer>::new(size)) as Boxed, 156 }; 157 158 Encoder { state } 159 } 160 161 /// Create a TIFF compatible encoder with the specified bit order and symbol size. 162 /// 163 /// The algorithm for dynamically increasing the code symbol bit width is compatible with the 164 /// TIFF specification, which is a misinterpretation of the original algorithm for increasing 165 /// the code size. It switches one symbol sooner. 166 /// 167 /// # Panics 168 /// 169 /// The `size` needs to be in the interval `2..=12`. with_tiff_size_switch(order: BitOrder, size: u8) -> Self170 pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self { 171 type Boxed = Box<dyn Stateful + Send + 'static>; 172 super::assert_code_size(size); 173 let state = match order { 174 BitOrder::Lsb => { 175 let mut state = Box::new(EncodeState::<LsbBuffer>::new(size)); 176 state.is_tiff = true; 177 state as Boxed 178 } 179 BitOrder::Msb => { 180 let mut state = Box::new(EncodeState::<MsbBuffer>::new(size)); 181 state.is_tiff = true; 182 state as Boxed 183 } 184 }; 185 186 Encoder { state } 187 } 188 189 /// Encode some bytes from `inp` into `out`. 190 /// 191 /// See [`into_stream`] for high-level functions (this interface is only available with the 192 /// `std` feature) and [`finish`] for marking the input data as complete. 193 /// 194 /// When some input byte is invalid, i.e. is not smaller than `1 << size`, then that byte and 195 /// all following ones will _not_ be consumed and the `status` of the result will signal an 196 /// error. The result will also indicate that all bytes up to but not including the offending 197 /// byte have been consumed. You may try again with a fixed byte. 198 /// 199 /// [`into_stream`]: #method.into_stream 200 /// [`finish`]: #method.finish encode_bytes(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult201 pub fn encode_bytes(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult { 202 self.state.advance(inp, out) 203 } 204 205 /// Construct a encoder into a writer. 206 #[cfg(feature = "std")] into_stream<W: Write>(&mut self, writer: W) -> IntoStream<'_, W>207 pub fn into_stream<W: Write>(&mut self, writer: W) -> IntoStream<'_, W> { 208 IntoStream { 209 encoder: self, 210 writer, 211 buffer: None, 212 default_size: STREAM_BUF_SIZE, 213 } 214 } 215 216 /// Mark the encoding as in the process of finishing. 217 /// 218 /// The next following call to `encode_bytes` which is able to consume the complete input will 219 /// also try to emit an end code. It's not recommended, but also not unsound, to use different 220 /// byte slices in different calls from this point forward and thus to 'delay' the actual end 221 /// of the data stream. The behaviour after the end marker has been written is unspecified but 222 /// sound. finish(&mut self)223 pub fn finish(&mut self) { 224 self.state.mark_ended(); 225 } 226 227 /// Undo marking this data stream as ending. 228 /// FIXME: clarify how this interacts with padding introduced after end code. 229 #[allow(dead_code)] restart(&mut self)230 pub(crate) fn restart(&mut self) { 231 self.state.restart() 232 } 233 234 /// Reset all internal state. 235 /// 236 /// This produce an encoder as if just constructed with `new` but taking slightly less work. In 237 /// particular it will not deallocate any internal allocations. It will also avoid some 238 /// duplicate setup work. reset(&mut self)239 pub fn reset(&mut self) { 240 self.state.reset() 241 } 242 } 243 244 #[cfg(feature = "std")] 245 impl<'d, W: Write> IntoStream<'d, W> { 246 /// Encode data from a reader. 247 /// 248 /// This will drain the supplied reader. It will not encode an end marker after all data has 249 /// been processed. encode(&mut self, read: impl BufRead) -> StreamResult250 pub fn encode(&mut self, read: impl BufRead) -> StreamResult { 251 self.encode_part(read, false) 252 } 253 254 /// Encode data from a reader and an end marker. encode_all(mut self, read: impl BufRead) -> StreamResult255 pub fn encode_all(mut self, read: impl BufRead) -> StreamResult { 256 self.encode_part(read, true) 257 } 258 259 /// Set the size of the intermediate encode buffer. 260 /// 261 /// A buffer of this size is allocated to hold one part of the encoded stream when no buffer is 262 /// available and any encoding method is called. No buffer is allocated if `set_buffer` has 263 /// been called. The buffer is reused. 264 /// 265 /// # Panics 266 /// This method panics if `size` is `0`. set_buffer_size(&mut self, size: usize)267 pub fn set_buffer_size(&mut self, size: usize) { 268 assert_ne!(size, 0, "Attempted to set empty buffer"); 269 self.default_size = size; 270 } 271 272 /// Use a particular buffer as an intermediate encode buffer. 273 /// 274 /// Calling this sets or replaces the buffer. When a buffer has been set then it is used 275 /// instead of a dynamically allocating a buffer. Note that the size of the buffer is relevant 276 /// for efficient encoding as there is additional overhead from `write` calls each time the 277 /// buffer has been filled. 278 /// 279 /// # Panics 280 /// This method panics if the `buffer` is empty. set_buffer(&mut self, buffer: &'d mut [u8])281 pub fn set_buffer(&mut self, buffer: &'d mut [u8]) { 282 assert_ne!(buffer.len(), 0, "Attempted to set empty buffer"); 283 self.buffer = Some(StreamBuf::Borrowed(buffer)); 284 } 285 encode_part(&mut self, mut read: impl BufRead, finish: bool) -> StreamResult286 fn encode_part(&mut self, mut read: impl BufRead, finish: bool) -> StreamResult { 287 let IntoStream { 288 encoder, 289 writer, 290 buffer, 291 default_size, 292 } = self; 293 enum Progress { 294 Ok, 295 Done, 296 } 297 298 let mut bytes_read = 0; 299 let mut bytes_written = 0; 300 301 let read_bytes = &mut bytes_read; 302 let write_bytes = &mut bytes_written; 303 304 let outbuf: &mut [u8] = 305 match { buffer.get_or_insert_with(|| StreamBuf::Owned(vec![0u8; *default_size])) } { 306 StreamBuf::Borrowed(slice) => &mut *slice, 307 StreamBuf::Owned(vec) => &mut *vec, 308 }; 309 assert!(!outbuf.is_empty()); 310 311 let once = move || { 312 let data = read.fill_buf()?; 313 314 if data.is_empty() { 315 if finish { 316 encoder.finish(); 317 } else { 318 return Ok(Progress::Done); 319 } 320 } 321 322 let result = encoder.encode_bytes(data, &mut outbuf[..]); 323 *read_bytes += result.consumed_in; 324 *write_bytes += result.consumed_out; 325 read.consume(result.consumed_in); 326 327 let done = result.status.map_err(|err| { 328 io::Error::new(io::ErrorKind::InvalidData, &*format!("{:?}", err)) 329 })?; 330 331 if let LzwStatus::Done = done { 332 writer.write_all(&outbuf[..result.consumed_out])?; 333 return Ok(Progress::Done); 334 } 335 336 if let LzwStatus::NoProgress = done { 337 return Err(io::Error::new( 338 io::ErrorKind::UnexpectedEof, 339 "No more data but no end marker detected", 340 )); 341 } 342 343 writer.write_all(&outbuf[..result.consumed_out])?; 344 Ok(Progress::Ok) 345 }; 346 347 let status = core::iter::repeat_with(once) 348 // scan+fuse can be replaced with map_while 349 .scan((), |(), result| match result { 350 Ok(Progress::Ok) => Some(Ok(())), 351 Err(err) => Some(Err(err)), 352 Ok(Progress::Done) => None, 353 }) 354 .fuse() 355 .collect(); 356 357 StreamResult { 358 bytes_read, 359 bytes_written, 360 status, 361 } 362 } 363 } 364 365 impl<B: Buffer> EncodeState<B> { new(min_size: u8) -> Self366 fn new(min_size: u8) -> Self { 367 let clear_code = 1 << min_size; 368 let mut tree = Tree::default(); 369 tree.init(min_size); 370 let mut state = EncodeState { 371 min_size, 372 tree, 373 has_ended: false, 374 is_tiff: false, 375 current_code: clear_code, 376 clear_code, 377 buffer: B::new(min_size), 378 }; 379 state.buffer_code(clear_code); 380 state 381 } 382 } 383 384 impl<B: Buffer> Stateful for EncodeState<B> { advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult385 fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult { 386 let c_in = inp.len(); 387 let c_out = out.len(); 388 let mut status = Ok(LzwStatus::Ok); 389 390 'encoding: loop { 391 if self.push_out(&mut out) { 392 break; 393 } 394 395 if inp.is_empty() && self.has_ended { 396 let end = self.end_code(); 397 if self.current_code != end { 398 if self.current_code != self.clear_code { 399 self.buffer_code(self.current_code); 400 401 // When reading this code, the decoder will add an extra entry to its table 402 // before reading th end code. Thusly, it may increase its code size based 403 // on this additional entry. 404 if self.tree.keys.len() + usize::from(self.is_tiff) 405 > usize::from(self.buffer.max_code()) 406 && self.buffer.code_size() < MAX_CODESIZE 407 { 408 self.buffer.bump_code_size(); 409 } 410 } 411 self.buffer_code(end); 412 self.current_code = end; 413 self.buffer_pad(); 414 } 415 416 break; 417 } 418 419 let mut next_code = None; 420 let mut bytes = inp.iter(); 421 while let Some(&byte) = bytes.next() { 422 if self.min_size < 8 && byte >= 1 << self.min_size { 423 status = Err(LzwError::InvalidCode); 424 break 'encoding; 425 } 426 427 inp = bytes.as_slice(); 428 match self.tree.iterate(self.current_code, byte) { 429 Ok(code) => self.current_code = code, 430 Err(_) => { 431 next_code = Some(self.current_code); 432 433 self.current_code = u16::from(byte); 434 break; 435 } 436 } 437 } 438 439 match next_code { 440 // No more bytes, no code produced. 441 None => break, 442 Some(code) => { 443 self.buffer_code(code); 444 445 if self.tree.keys.len() + usize::from(self.is_tiff) 446 > usize::from(self.buffer.max_code()) + 1 447 && self.buffer.code_size() < MAX_CODESIZE 448 { 449 self.buffer.bump_code_size(); 450 } 451 452 if self.tree.keys.len() > MAX_ENTRIES { 453 self.buffer_code(self.clear_code); 454 self.tree.reset(self.min_size); 455 self.buffer.clear(self.min_size); 456 } 457 } 458 } 459 } 460 461 if inp.is_empty() && self.current_code == self.end_code() { 462 if !self.flush_out(&mut out) { 463 status = Ok(LzwStatus::Done); 464 } 465 } 466 467 BufferResult { 468 consumed_in: c_in - inp.len(), 469 consumed_out: c_out - out.len(), 470 status, 471 } 472 } 473 mark_ended(&mut self) -> bool474 fn mark_ended(&mut self) -> bool { 475 core::mem::replace(&mut self.has_ended, true) 476 } 477 restart(&mut self)478 fn restart(&mut self) { 479 self.has_ended = false; 480 } 481 reset(&mut self)482 fn reset(&mut self) { 483 self.restart(); 484 self.current_code = self.clear_code; 485 self.tree.reset(self.min_size); 486 self.buffer.reset(self.min_size); 487 self.buffer_code(self.clear_code); 488 } 489 } 490 491 impl<B: Buffer> EncodeState<B> { push_out(&mut self, out: &mut &mut [u8]) -> bool492 fn push_out(&mut self, out: &mut &mut [u8]) -> bool { 493 self.buffer.push_out(out) 494 } 495 flush_out(&mut self, out: &mut &mut [u8]) -> bool496 fn flush_out(&mut self, out: &mut &mut [u8]) -> bool { 497 self.buffer.flush_out(out) 498 } 499 end_code(&self) -> Code500 fn end_code(&self) -> Code { 501 self.clear_code + 1 502 } 503 buffer_pad(&mut self)504 fn buffer_pad(&mut self) { 505 self.buffer.buffer_pad(); 506 } 507 buffer_code(&mut self, code: Code)508 fn buffer_code(&mut self, code: Code) { 509 self.buffer.buffer_code(code); 510 } 511 } 512 513 impl Buffer for MsbBuffer { new(min_size: u8) -> Self514 fn new(min_size: u8) -> Self { 515 MsbBuffer { 516 code_size: min_size + 1, 517 buffer: 0, 518 bits_in_buffer: 0, 519 } 520 } 521 reset(&mut self, min_size: u8)522 fn reset(&mut self, min_size: u8) { 523 self.code_size = min_size + 1; 524 self.buffer = 0; 525 self.bits_in_buffer = 0; 526 } 527 clear(&mut self, min_size: u8)528 fn clear(&mut self, min_size: u8) { 529 self.code_size = min_size + 1; 530 } 531 buffer_code(&mut self, code: Code)532 fn buffer_code(&mut self, code: Code) { 533 let shift = 64 - self.bits_in_buffer - self.code_size; 534 self.buffer |= u64::from(code) << shift; 535 self.bits_in_buffer += self.code_size; 536 } 537 push_out(&mut self, out: &mut &mut [u8]) -> bool538 fn push_out(&mut self, out: &mut &mut [u8]) -> bool { 539 if self.bits_in_buffer + 2 * self.code_size < 64 { 540 return false; 541 } 542 543 self.flush_out(out) 544 } 545 flush_out(&mut self, out: &mut &mut [u8]) -> bool546 fn flush_out(&mut self, out: &mut &mut [u8]) -> bool { 547 let want = usize::from(self.bits_in_buffer / 8); 548 let count = want.min((*out).len()); 549 let (bytes, tail) = core::mem::replace(out, &mut []).split_at_mut(count); 550 *out = tail; 551 552 for b in bytes { 553 *b = ((self.buffer & 0xff00_0000_0000_0000) >> 56) as u8; 554 self.buffer <<= 8; 555 self.bits_in_buffer -= 8; 556 } 557 558 count < want 559 } 560 buffer_pad(&mut self)561 fn buffer_pad(&mut self) { 562 let to_byte = self.bits_in_buffer.wrapping_neg() & 0x7; 563 self.bits_in_buffer += to_byte; 564 } 565 bump_code_size(&mut self)566 fn bump_code_size(&mut self) { 567 self.code_size += 1; 568 } 569 max_code(&self) -> Code570 fn max_code(&self) -> Code { 571 (1 << self.code_size) - 1 572 } 573 code_size(&self) -> u8574 fn code_size(&self) -> u8 { 575 self.code_size 576 } 577 } 578 579 impl Buffer for LsbBuffer { new(min_size: u8) -> Self580 fn new(min_size: u8) -> Self { 581 LsbBuffer { 582 code_size: min_size + 1, 583 buffer: 0, 584 bits_in_buffer: 0, 585 } 586 } 587 reset(&mut self, min_size: u8)588 fn reset(&mut self, min_size: u8) { 589 self.code_size = min_size + 1; 590 self.buffer = 0; 591 self.bits_in_buffer = 0; 592 } 593 clear(&mut self, min_size: u8)594 fn clear(&mut self, min_size: u8) { 595 self.code_size = min_size + 1; 596 } 597 buffer_code(&mut self, code: Code)598 fn buffer_code(&mut self, code: Code) { 599 self.buffer |= u64::from(code) << self.bits_in_buffer; 600 self.bits_in_buffer += self.code_size; 601 } 602 push_out(&mut self, out: &mut &mut [u8]) -> bool603 fn push_out(&mut self, out: &mut &mut [u8]) -> bool { 604 if self.bits_in_buffer + 2 * self.code_size < 64 { 605 return false; 606 } 607 608 self.flush_out(out) 609 } 610 flush_out(&mut self, out: &mut &mut [u8]) -> bool611 fn flush_out(&mut self, out: &mut &mut [u8]) -> bool { 612 let want = usize::from(self.bits_in_buffer / 8); 613 let count = want.min((*out).len()); 614 let (bytes, tail) = core::mem::replace(out, &mut []).split_at_mut(count); 615 *out = tail; 616 617 for b in bytes { 618 *b = (self.buffer & 0x0000_0000_0000_00ff) as u8; 619 self.buffer >>= 8; 620 self.bits_in_buffer -= 8; 621 } 622 623 count < want 624 } 625 buffer_pad(&mut self)626 fn buffer_pad(&mut self) { 627 let to_byte = self.bits_in_buffer.wrapping_neg() & 0x7; 628 self.bits_in_buffer += to_byte; 629 } 630 bump_code_size(&mut self)631 fn bump_code_size(&mut self) { 632 self.code_size += 1; 633 } 634 max_code(&self) -> Code635 fn max_code(&self) -> Code { 636 (1 << self.code_size) - 1 637 } 638 code_size(&self) -> u8639 fn code_size(&self) -> u8 { 640 self.code_size 641 } 642 } 643 644 impl Tree { init(&mut self, min_size: u8)645 fn init(&mut self, min_size: u8) { 646 // We need a way to represent the state of a currently empty buffer. We use the clear code 647 // for this, thus create one complex mapping that leads to the one-char base codes. 648 self.keys 649 .resize((1 << min_size) + 2, FullKey::NoSuccessor.into()); 650 self.complex.push(Full { 651 char_continuation: [0; 256], 652 }); 653 let map_of_begin = self.complex.last_mut().unwrap(); 654 for ch in 0u16..256 { 655 map_of_begin.char_continuation[usize::from(ch)] = ch; 656 } 657 self.keys[1 << min_size] = FullKey::Full(0).into(); 658 } 659 reset(&mut self, min_size: u8)660 fn reset(&mut self, min_size: u8) { 661 self.simples.clear(); 662 self.keys.truncate((1 << min_size) + 2); 663 // Keep entry for clear code. 664 self.complex.truncate(1); 665 // The first complex is not changed.. 666 for k in self.keys[..(1 << min_size) + 2].iter_mut() { 667 *k = FullKey::NoSuccessor.into(); 668 } 669 self.keys[1 << min_size] = FullKey::Full(0).into(); 670 } 671 at_key(&self, code: Code, ch: u8) -> Option<Code>672 fn at_key(&self, code: Code, ch: u8) -> Option<Code> { 673 let key = self.keys[usize::from(code)]; 674 match FullKey::from(key) { 675 FullKey::NoSuccessor => None, 676 FullKey::Simple(idx) => { 677 let nexts = &self.simples[usize::from(idx)]; 678 let successors = nexts 679 .codes 680 .iter() 681 .zip(nexts.chars.iter()) 682 .take(usize::from(nexts.count)); 683 for (&scode, &sch) in successors { 684 if sch == ch { 685 return Some(scode); 686 } 687 } 688 689 None 690 } 691 FullKey::Full(idx) => { 692 let full = &self.complex[usize::from(idx)]; 693 let precode = full.char_continuation[usize::from(ch)]; 694 if usize::from(precode) < MAX_ENTRIES { 695 Some(precode) 696 } else { 697 None 698 } 699 } 700 } 701 } 702 703 /// Iterate to the next char. 704 /// Return Ok when it was already in the tree or creates a new entry for it and returns Err. iterate(&mut self, code: Code, ch: u8) -> Result<Code, Code>705 fn iterate(&mut self, code: Code, ch: u8) -> Result<Code, Code> { 706 if let Some(next) = self.at_key(code, ch) { 707 Ok(next) 708 } else { 709 Err(self.append(code, ch)) 710 } 711 } 712 append(&mut self, code: Code, ch: u8) -> Code713 fn append(&mut self, code: Code, ch: u8) -> Code { 714 let next: Code = self.keys.len() as u16; 715 let key = self.keys[usize::from(code)]; 716 // TODO: with debug assertions, check for non-existence 717 match FullKey::from(key) { 718 FullKey::NoSuccessor => { 719 let new_key = FullKey::Simple(self.simples.len() as u16); 720 self.simples.push(Simple::default()); 721 let simples = self.simples.last_mut().unwrap(); 722 simples.codes[0] = next; 723 simples.chars[0] = ch; 724 simples.count = 1; 725 self.keys[usize::from(code)] = new_key.into(); 726 } 727 FullKey::Simple(idx) if usize::from(self.simples[usize::from(idx)].count) < SHORT => { 728 let nexts = &mut self.simples[usize::from(idx)]; 729 let nidx = usize::from(nexts.count); 730 nexts.chars[nidx] = ch; 731 nexts.codes[nidx] = next; 732 nexts.count += 1; 733 } 734 FullKey::Simple(idx) => { 735 let new_key = FullKey::Full(self.complex.len() as u16); 736 let simples = &self.simples[usize::from(idx)]; 737 self.complex.push(Full { 738 char_continuation: [Code::max_value(); 256], 739 }); 740 let full = self.complex.last_mut().unwrap(); 741 for (&pch, &pcont) in simples.chars.iter().zip(simples.codes.iter()) { 742 full.char_continuation[usize::from(pch)] = pcont; 743 } 744 self.keys[usize::from(code)] = new_key.into(); 745 } 746 FullKey::Full(idx) => { 747 let full = &mut self.complex[usize::from(idx)]; 748 full.char_continuation[usize::from(ch)] = next; 749 } 750 } 751 self.keys.push(FullKey::NoSuccessor.into()); 752 next 753 } 754 } 755 756 impl Default for FullKey { default() -> Self757 fn default() -> Self { 758 FullKey::NoSuccessor 759 } 760 } 761 762 impl Default for Simple { default() -> Self763 fn default() -> Self { 764 Simple { 765 codes: [0; SHORT], 766 chars: [0; SHORT], 767 count: 0, 768 } 769 } 770 } 771 772 impl From<CompressedKey> for FullKey { from(CompressedKey(key): CompressedKey) -> Self773 fn from(CompressedKey(key): CompressedKey) -> Self { 774 match (key >> MAX_CODESIZE) & 0xf { 775 0 => FullKey::Full(key & 0xfff), 776 1 => FullKey::Simple(key & 0xfff), 777 _ => FullKey::NoSuccessor, 778 } 779 } 780 } 781 782 impl From<FullKey> for CompressedKey { from(full: FullKey) -> Self783 fn from(full: FullKey) -> Self { 784 CompressedKey(match full { 785 FullKey::NoSuccessor => 0x2000, 786 FullKey::Simple(code) => 0x1000 | code, 787 FullKey::Full(code) => code, 788 }) 789 } 790 } 791 792 #[cfg(test)] 793 mod tests { 794 use super::{BitOrder, Encoder, LzwError, LzwStatus}; 795 use crate::alloc::vec::Vec; 796 use crate::decode::Decoder; 797 #[cfg(feature = "std")] 798 use crate::StreamBuf; 799 800 #[test] invalid_input_rejected()801 fn invalid_input_rejected() { 802 const BIT_LEN: u8 = 2; 803 let ref input = [0, 1 << BIT_LEN /* invalid */, 0]; 804 let ref mut target = [0u8; 128]; 805 let mut encoder = Encoder::new(BitOrder::Msb, BIT_LEN); 806 807 encoder.finish(); 808 // We require simulation of normality, that is byte-for-byte compression. 809 let result = encoder.encode_bytes(input, target); 810 assert!(if let Err(LzwError::InvalidCode) = result.status { 811 true 812 } else { 813 false 814 }); 815 assert_eq!(result.consumed_in, 1); 816 817 let fixed = encoder.encode_bytes(&[1, 0], &mut target[result.consumed_out..]); 818 assert!(if let Ok(LzwStatus::Done) = fixed.status { 819 true 820 } else { 821 false 822 }); 823 assert_eq!(fixed.consumed_in, 2); 824 825 // Okay, now test we actually fixed it. 826 let ref mut compare = [0u8; 4]; 827 let mut todo = &target[..result.consumed_out + fixed.consumed_out]; 828 let mut free = &mut compare[..]; 829 let mut decoder = Decoder::new(BitOrder::Msb, BIT_LEN); 830 831 // Decode with up to 16 rounds, far too much but inconsequential. 832 for _ in 0..16 { 833 if decoder.has_ended() { 834 break; 835 } 836 837 let result = decoder.decode_bytes(todo, free); 838 assert!(result.status.is_ok()); 839 todo = &todo[result.consumed_in..]; 840 free = &mut free[result.consumed_out..]; 841 } 842 843 let remaining = { free }.len(); 844 let len = compare.len() - remaining; 845 assert_eq!(todo, &[]); 846 assert_eq!(compare[..len], [0, 1, 0]); 847 } 848 849 #[test] 850 #[should_panic] invalid_code_size_low()851 fn invalid_code_size_low() { 852 let _ = Encoder::new(BitOrder::Msb, 1); 853 } 854 855 #[test] 856 #[should_panic] invalid_code_size_high()857 fn invalid_code_size_high() { 858 let _ = Encoder::new(BitOrder::Msb, 14); 859 } 860 make_decoded() -> Vec<u8>861 fn make_decoded() -> Vec<u8> { 862 const FILE: &'static [u8] = 863 include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/Cargo.lock")); 864 return Vec::from(FILE); 865 } 866 867 #[test] 868 #[cfg(feature = "std")] into_stream_buffer_no_alloc()869 fn into_stream_buffer_no_alloc() { 870 let encoded = make_decoded(); 871 let mut encoder = Encoder::new(BitOrder::Msb, 8); 872 873 let mut output = vec![]; 874 let mut buffer = [0; 512]; 875 let mut istream = encoder.into_stream(&mut output); 876 istream.set_buffer(&mut buffer[..]); 877 istream.encode(&encoded[..]).status.unwrap(); 878 879 match istream.buffer { 880 Some(StreamBuf::Borrowed(_)) => {} 881 None => panic!("Decoded without buffer??"), 882 Some(StreamBuf::Owned(_)) => panic!("Unexpected buffer allocation"), 883 } 884 } 885 886 #[test] 887 #[cfg(feature = "std")] into_stream_buffer_small_alloc()888 fn into_stream_buffer_small_alloc() { 889 struct WriteTap<W: std::io::Write>(W); 890 const BUF_SIZE: usize = 512; 891 892 impl<W: std::io::Write> std::io::Write for WriteTap<W> { 893 fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { 894 assert!(buf.len() <= BUF_SIZE); 895 self.0.write(buf) 896 } 897 fn flush(&mut self) -> std::io::Result<()> { 898 self.0.flush() 899 } 900 } 901 902 let encoded = make_decoded(); 903 let mut encoder = Encoder::new(BitOrder::Msb, 8); 904 905 let mut output = vec![]; 906 let mut istream = encoder.into_stream(WriteTap(&mut output)); 907 istream.set_buffer_size(512); 908 istream.encode(&encoded[..]).status.unwrap(); 909 910 match istream.buffer { 911 Some(StreamBuf::Owned(vec)) => assert!(vec.len() <= BUF_SIZE), 912 Some(StreamBuf::Borrowed(_)) => panic!("Unexpected borrowed buffer, where from?"), 913 None => panic!("Decoded without buffer??"), 914 } 915 } 916 917 #[test] 918 #[cfg(feature = "std")] reset()919 fn reset() { 920 let encoded = make_decoded(); 921 let mut encoder = Encoder::new(BitOrder::Msb, 8); 922 let mut reference = None; 923 924 for _ in 0..2 { 925 let mut output = vec![]; 926 let mut buffer = [0; 512]; 927 let mut istream = encoder.into_stream(&mut output); 928 istream.set_buffer(&mut buffer[..]); 929 istream.encode_all(&encoded[..]).status.unwrap(); 930 931 encoder.reset(); 932 if let Some(reference) = &reference { 933 assert_eq!(output, *reference); 934 } else { 935 reference = Some(output); 936 } 937 } 938 } 939 } 940