1 use std::cmp; 2 use std::io; 3 4 use super::symbol; 5 use super::BlockType; 6 use bit; 7 use finish::{Complete, Finish}; 8 use lz77; 9 10 /// The default size of a DEFLATE block. 11 pub const DEFAULT_BLOCK_SIZE: usize = 1024 * 1024; 12 13 const MAX_NON_COMPRESSED_BLOCK_SIZE: usize = 0xFFFF; 14 15 /// Options for a DEFLATE encoder. 16 #[derive(Debug, Clone, PartialEq, Eq, Hash)] 17 pub struct EncodeOptions<E = lz77::DefaultLz77Encoder> { 18 block_size: usize, 19 dynamic_huffman: bool, 20 lz77: Option<E>, 21 } 22 impl Default for EncodeOptions<lz77::DefaultLz77Encoder> { default() -> Self23 fn default() -> Self { 24 Self::new() 25 } 26 } 27 impl EncodeOptions<lz77::DefaultLz77Encoder> { 28 /// Makes a default instance. 29 /// 30 /// # Examples 31 /// ``` 32 /// use libflate::deflate::{Encoder, EncodeOptions}; 33 /// 34 /// let options = EncodeOptions::new(); 35 /// let encoder = Encoder::with_options(Vec::new(), options); 36 /// ``` new() -> Self37 pub fn new() -> Self { 38 EncodeOptions { 39 block_size: DEFAULT_BLOCK_SIZE, 40 dynamic_huffman: true, 41 lz77: Some(lz77::DefaultLz77Encoder::new()), 42 } 43 } 44 } 45 impl<E> EncodeOptions<E> 46 where 47 E: lz77::Lz77Encode, 48 { 49 /// Specifies the LZ77 encoder used to compress input data. 50 /// 51 /// # Example 52 /// ``` 53 /// use libflate::lz77::DefaultLz77Encoder; 54 /// use libflate::deflate::{Encoder, EncodeOptions}; 55 /// 56 /// let options = EncodeOptions::with_lz77(DefaultLz77Encoder::new()); 57 /// let encoder = Encoder::with_options(Vec::new(), options); 58 /// ``` with_lz77(lz77: E) -> Self59 pub fn with_lz77(lz77: E) -> Self { 60 EncodeOptions { 61 block_size: DEFAULT_BLOCK_SIZE, 62 dynamic_huffman: true, 63 lz77: Some(lz77), 64 } 65 } 66 67 /// Disables LZ77 compression. 68 /// 69 /// # Example 70 /// ``` 71 /// use libflate::lz77::DefaultLz77Encoder; 72 /// use libflate::deflate::{Encoder, EncodeOptions}; 73 /// 74 /// let options = EncodeOptions::new().no_compression(); 75 /// let encoder = Encoder::with_options(Vec::new(), options); 76 /// ``` no_compression(mut self) -> Self77 pub fn no_compression(mut self) -> Self { 78 self.lz77 = None; 79 self 80 } 81 82 /// Specifies the hint of the size of a DEFLATE block. 83 /// 84 /// The default value is `DEFAULT_BLOCK_SIZE`. 85 /// 86 /// # Example 87 /// ``` 88 /// use libflate::deflate::{Encoder, EncodeOptions}; 89 /// 90 /// let options = EncodeOptions::new().block_size(512 * 1024); 91 /// let encoder = Encoder::with_options(Vec::new(), options); 92 /// ``` block_size(mut self, size: usize) -> Self93 pub fn block_size(mut self, size: usize) -> Self { 94 self.block_size = size; 95 self 96 } 97 98 /// Specifies to compress with fixed huffman codes. 99 /// 100 /// # Example 101 /// ``` 102 /// use libflate::deflate::{Encoder, EncodeOptions}; 103 /// 104 /// let options = EncodeOptions::new().fixed_huffman_codes(); 105 /// let encoder = Encoder::with_options(Vec::new(), options); 106 /// ``` fixed_huffman_codes(mut self) -> Self107 pub fn fixed_huffman_codes(mut self) -> Self { 108 self.dynamic_huffman = false; 109 self 110 } 111 get_block_type(&self) -> BlockType112 fn get_block_type(&self) -> BlockType { 113 if self.lz77.is_none() { 114 BlockType::Raw 115 } else if self.dynamic_huffman { 116 BlockType::Dynamic 117 } else { 118 BlockType::Fixed 119 } 120 } get_block_size(&self) -> usize121 fn get_block_size(&self) -> usize { 122 if self.lz77.is_none() { 123 cmp::min(self.block_size, MAX_NON_COMPRESSED_BLOCK_SIZE) 124 } else { 125 self.block_size 126 } 127 } 128 } 129 130 /// DEFLATE encoder. 131 #[derive(Debug)] 132 pub struct Encoder<W, E = lz77::DefaultLz77Encoder> { 133 writer: bit::BitWriter<W>, 134 block: Block<E>, 135 } 136 impl<W> Encoder<W, lz77::DefaultLz77Encoder> 137 where 138 W: io::Write, 139 { 140 /// Makes a new encoder instance. 141 /// 142 /// Encoded DEFLATE stream is written to `inner`. 143 /// 144 /// # Examples 145 /// ``` 146 /// use std::io::Write; 147 /// use libflate::deflate::Encoder; 148 /// 149 /// let mut encoder = Encoder::new(Vec::new()); 150 /// encoder.write_all(b"Hello World!").unwrap(); 151 /// 152 /// assert_eq!(encoder.finish().into_result().unwrap(), 153 /// [5, 192, 49, 13, 0, 0, 8, 3, 65, 43, 224, 6, 7, 24, 128, 237, 154 /// 147, 38, 245, 63, 244, 230, 65, 181, 50, 215, 1]); 155 /// ``` new(inner: W) -> Self156 pub fn new(inner: W) -> Self { 157 Self::with_options(inner, EncodeOptions::default()) 158 } 159 } 160 impl<W, E> Encoder<W, E> 161 where 162 W: io::Write, 163 E: lz77::Lz77Encode, 164 { 165 /// Makes a new encoder instance with specified options. 166 /// 167 /// Encoded DEFLATE stream is written to `inner`. 168 /// 169 /// # Examples 170 /// ``` 171 /// use std::io::Write; 172 /// use libflate::deflate::{Encoder, EncodeOptions}; 173 /// 174 /// let options = EncodeOptions::new().no_compression(); 175 /// let mut encoder = Encoder::with_options(Vec::new(), options); 176 /// encoder.write_all(b"Hello World!").unwrap(); 177 /// 178 /// assert_eq!(encoder.finish().into_result().unwrap(), 179 /// [1, 12, 0, 243, 255, 72, 101, 108, 108, 111, 32, 87, 111, 180 /// 114, 108, 100, 33]); 181 /// ``` with_options(inner: W, options: EncodeOptions<E>) -> Self182 pub fn with_options(inner: W, options: EncodeOptions<E>) -> Self { 183 Encoder { 184 writer: bit::BitWriter::new(inner), 185 block: Block::new(options), 186 } 187 } 188 189 /// Flushes internal buffer and returns the inner stream. 190 /// 191 /// # Examples 192 /// ``` 193 /// use std::io::Write; 194 /// use libflate::deflate::Encoder; 195 /// 196 /// let mut encoder = Encoder::new(Vec::new()); 197 /// encoder.write_all(b"Hello World!").unwrap(); 198 /// 199 /// assert_eq!(encoder.finish().into_result().unwrap(), 200 /// [5, 192, 49, 13, 0, 0, 8, 3, 65, 43, 224, 6, 7, 24, 128, 237, 201 /// 147, 38, 245, 63, 244, 230, 65, 181, 50, 215, 1]); 202 /// ``` finish(mut self) -> Finish<W, io::Error>203 pub fn finish(mut self) -> Finish<W, io::Error> { 204 match self.block.finish(&mut self.writer) { 205 Ok(_) => Finish::new(self.writer.into_inner(), None), 206 Err(e) => Finish::new(self.writer.into_inner(), Some(e)), 207 } 208 } 209 210 /// Returns the immutable reference to the inner stream. as_inner_ref(&self) -> &W211 pub fn as_inner_ref(&self) -> &W { 212 self.writer.as_inner_ref() 213 } 214 215 /// Returns the mutable reference to the inner stream. as_inner_mut(&mut self) -> &mut W216 pub fn as_inner_mut(&mut self) -> &mut W { 217 self.writer.as_inner_mut() 218 } 219 220 /// Unwraps the `Encoder`, returning the inner stream. into_inner(self) -> W221 pub fn into_inner(self) -> W { 222 self.writer.into_inner() 223 } 224 } 225 impl<W, E> io::Write for Encoder<W, E> 226 where 227 W: io::Write, 228 E: lz77::Lz77Encode, 229 { write(&mut self, buf: &[u8]) -> io::Result<usize>230 fn write(&mut self, buf: &[u8]) -> io::Result<usize> { 231 self.block.write(&mut self.writer, buf)?; 232 Ok(buf.len()) 233 } flush(&mut self) -> io::Result<()>234 fn flush(&mut self) -> io::Result<()> { 235 self.writer.as_inner_mut().flush() 236 } 237 } 238 impl<W, E> Complete for Encoder<W, E> 239 where 240 W: io::Write, 241 E: lz77::Lz77Encode, 242 { complete(self) -> io::Result<()>243 fn complete(self) -> io::Result<()> { 244 self.finish().into_result().map(|_| ()) 245 } 246 } 247 248 #[derive(Debug)] 249 struct Block<E> { 250 block_type: BlockType, 251 block_size: usize, 252 block_buf: BlockBuf<E>, 253 } 254 impl<E> Block<E> 255 where 256 E: lz77::Lz77Encode, 257 { new(options: EncodeOptions<E>) -> Self258 fn new(options: EncodeOptions<E>) -> Self { 259 Block { 260 block_type: options.get_block_type(), 261 block_size: options.get_block_size(), 262 block_buf: BlockBuf::new(options.lz77, options.dynamic_huffman), 263 } 264 } write<W>(&mut self, writer: &mut bit::BitWriter<W>, buf: &[u8]) -> io::Result<()> where W: io::Write,265 fn write<W>(&mut self, writer: &mut bit::BitWriter<W>, buf: &[u8]) -> io::Result<()> 266 where 267 W: io::Write, 268 { 269 self.block_buf.append(buf); 270 while self.block_buf.len() >= self.block_size { 271 writer.write_bit(false)?; 272 writer.write_bits(2, self.block_type as u16)?; 273 self.block_buf.flush(writer)?; 274 } 275 Ok(()) 276 } finish<W>(mut self, writer: &mut bit::BitWriter<W>) -> io::Result<()> where W: io::Write,277 fn finish<W>(mut self, writer: &mut bit::BitWriter<W>) -> io::Result<()> 278 where 279 W: io::Write, 280 { 281 writer.write_bit(true)?; 282 writer.write_bits(2, self.block_type as u16)?; 283 self.block_buf.flush(writer)?; 284 writer.flush()?; 285 Ok(()) 286 } 287 } 288 289 #[derive(Debug)] 290 enum BlockBuf<E> { 291 Raw(RawBuf), 292 Fixed(CompressBuf<symbol::FixedHuffmanCodec, E>), 293 Dynamic(CompressBuf<symbol::DynamicHuffmanCodec, E>), 294 } 295 impl<E> BlockBuf<E> 296 where 297 E: lz77::Lz77Encode, 298 { new(lz77: Option<E>, dynamic: bool) -> Self299 fn new(lz77: Option<E>, dynamic: bool) -> Self { 300 if let Some(lz77) = lz77 { 301 if dynamic { 302 BlockBuf::Dynamic(CompressBuf::new(symbol::DynamicHuffmanCodec, lz77)) 303 } else { 304 BlockBuf::Fixed(CompressBuf::new(symbol::FixedHuffmanCodec, lz77)) 305 } 306 } else { 307 BlockBuf::Raw(RawBuf::new()) 308 } 309 } append(&mut self, buf: &[u8])310 fn append(&mut self, buf: &[u8]) { 311 match *self { 312 BlockBuf::Raw(ref mut b) => b.append(buf), 313 BlockBuf::Fixed(ref mut b) => b.append(buf), 314 BlockBuf::Dynamic(ref mut b) => b.append(buf), 315 } 316 } len(&self) -> usize317 fn len(&self) -> usize { 318 match *self { 319 BlockBuf::Raw(ref b) => b.len(), 320 BlockBuf::Fixed(ref b) => b.len(), 321 BlockBuf::Dynamic(ref b) => b.len(), 322 } 323 } flush<W>(&mut self, writer: &mut bit::BitWriter<W>) -> io::Result<()> where W: io::Write,324 fn flush<W>(&mut self, writer: &mut bit::BitWriter<W>) -> io::Result<()> 325 where 326 W: io::Write, 327 { 328 match *self { 329 BlockBuf::Raw(ref mut b) => b.flush(writer), 330 BlockBuf::Fixed(ref mut b) => b.flush(writer), 331 BlockBuf::Dynamic(ref mut b) => b.flush(writer), 332 } 333 } 334 } 335 336 #[derive(Debug)] 337 struct RawBuf { 338 buf: Vec<u8>, 339 } 340 impl RawBuf { new() -> Self341 fn new() -> Self { 342 RawBuf { buf: Vec::new() } 343 } append(&mut self, buf: &[u8])344 fn append(&mut self, buf: &[u8]) { 345 self.buf.extend_from_slice(buf); 346 } len(&self) -> usize347 fn len(&self) -> usize { 348 self.buf.len() 349 } flush<W>(&mut self, writer: &mut bit::BitWriter<W>) -> io::Result<()> where W: io::Write,350 fn flush<W>(&mut self, writer: &mut bit::BitWriter<W>) -> io::Result<()> 351 where 352 W: io::Write, 353 { 354 let size = cmp::min(self.buf.len(), MAX_NON_COMPRESSED_BLOCK_SIZE); 355 writer.flush()?; 356 writer 357 .as_inner_mut() 358 .write_all(&(size as u16).to_le_bytes())?; 359 writer 360 .as_inner_mut() 361 .write_all(&(!size as u16).to_le_bytes())?; 362 writer.as_inner_mut().write_all(&self.buf[..size])?; 363 self.buf.drain(0..size); 364 Ok(()) 365 } 366 } 367 368 #[derive(Debug)] 369 struct CompressBuf<H, E> { 370 huffman: H, 371 lz77: E, 372 buf: Vec<symbol::Symbol>, 373 original_size: usize, 374 } 375 impl<H, E> CompressBuf<H, E> 376 where 377 H: symbol::HuffmanCodec, 378 E: lz77::Lz77Encode, 379 { new(huffman: H, lz77: E) -> Self380 fn new(huffman: H, lz77: E) -> Self { 381 CompressBuf { 382 huffman, 383 lz77, 384 buf: Vec::new(), 385 original_size: 0, 386 } 387 } append(&mut self, buf: &[u8])388 fn append(&mut self, buf: &[u8]) { 389 self.original_size += buf.len(); 390 self.lz77.encode(buf, &mut self.buf); 391 } len(&self) -> usize392 fn len(&self) -> usize { 393 self.original_size 394 } flush<W>(&mut self, writer: &mut bit::BitWriter<W>) -> io::Result<()> where W: io::Write,395 fn flush<W>(&mut self, writer: &mut bit::BitWriter<W>) -> io::Result<()> 396 where 397 W: io::Write, 398 { 399 self.lz77.flush(&mut self.buf); 400 self.buf.push(symbol::Symbol::EndOfBlock); 401 let symbol_encoder = self.huffman.build(&self.buf)?; 402 self.huffman.save(writer, &symbol_encoder)?; 403 for s in self.buf.drain(..) { 404 symbol_encoder.encode(writer, &s)?; 405 } 406 self.original_size = 0; 407 Ok(()) 408 } 409 } 410 411 impl lz77::Sink for Vec<symbol::Symbol> { consume(&mut self, code: lz77::Code)412 fn consume(&mut self, code: lz77::Code) { 413 let symbol = match code { 414 lz77::Code::Literal(b) => symbol::Symbol::Literal(b), 415 lz77::Code::Pointer { 416 length, 417 backward_distance, 418 } => symbol::Symbol::Share { 419 length, 420 distance: backward_distance, 421 }, 422 }; 423 self.push(symbol); 424 } 425 } 426