1 use std::cmp;
2 use std::io;
3 
4 use super::symbol;
5 use super::BlockType;
6 use bit;
7 use finish::{Complete, Finish};
8 use lz77;
9 
10 /// The default size of a DEFLATE block.
11 pub const DEFAULT_BLOCK_SIZE: usize = 1024 * 1024;
12 
13 const MAX_NON_COMPRESSED_BLOCK_SIZE: usize = 0xFFFF;
14 
15 /// Options for a DEFLATE encoder.
16 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
17 pub struct EncodeOptions<E = lz77::DefaultLz77Encoder> {
18     block_size: usize,
19     dynamic_huffman: bool,
20     lz77: Option<E>,
21 }
22 impl Default for EncodeOptions<lz77::DefaultLz77Encoder> {
default() -> Self23     fn default() -> Self {
24         Self::new()
25     }
26 }
27 impl EncodeOptions<lz77::DefaultLz77Encoder> {
28     /// Makes a default instance.
29     ///
30     /// # Examples
31     /// ```
32     /// use libflate::deflate::{Encoder, EncodeOptions};
33     ///
34     /// let options = EncodeOptions::new();
35     /// let encoder = Encoder::with_options(Vec::new(), options);
36     /// ```
new() -> Self37     pub fn new() -> Self {
38         EncodeOptions {
39             block_size: DEFAULT_BLOCK_SIZE,
40             dynamic_huffman: true,
41             lz77: Some(lz77::DefaultLz77Encoder::new()),
42         }
43     }
44 }
45 impl<E> EncodeOptions<E>
46 where
47     E: lz77::Lz77Encode,
48 {
49     /// Specifies the LZ77 encoder used to compress input data.
50     ///
51     /// # Example
52     /// ```
53     /// use libflate::lz77::DefaultLz77Encoder;
54     /// use libflate::deflate::{Encoder, EncodeOptions};
55     ///
56     /// let options = EncodeOptions::with_lz77(DefaultLz77Encoder::new());
57     /// let encoder = Encoder::with_options(Vec::new(), options);
58     /// ```
with_lz77(lz77: E) -> Self59     pub fn with_lz77(lz77: E) -> Self {
60         EncodeOptions {
61             block_size: DEFAULT_BLOCK_SIZE,
62             dynamic_huffman: true,
63             lz77: Some(lz77),
64         }
65     }
66 
67     /// Disables LZ77 compression.
68     ///
69     /// # Example
70     /// ```
71     /// use libflate::lz77::DefaultLz77Encoder;
72     /// use libflate::deflate::{Encoder, EncodeOptions};
73     ///
74     /// let options = EncodeOptions::new().no_compression();
75     /// let encoder = Encoder::with_options(Vec::new(), options);
76     /// ```
no_compression(mut self) -> Self77     pub fn no_compression(mut self) -> Self {
78         self.lz77 = None;
79         self
80     }
81 
82     /// Specifies the hint of the size of a DEFLATE block.
83     ///
84     /// The default value is `DEFAULT_BLOCK_SIZE`.
85     ///
86     /// # Example
87     /// ```
88     /// use libflate::deflate::{Encoder, EncodeOptions};
89     ///
90     /// let options = EncodeOptions::new().block_size(512 * 1024);
91     /// let encoder = Encoder::with_options(Vec::new(), options);
92     /// ```
block_size(mut self, size: usize) -> Self93     pub fn block_size(mut self, size: usize) -> Self {
94         self.block_size = size;
95         self
96     }
97 
98     /// Specifies to compress with fixed huffman codes.
99     ///
100     /// # Example
101     /// ```
102     /// use libflate::deflate::{Encoder, EncodeOptions};
103     ///
104     /// let options = EncodeOptions::new().fixed_huffman_codes();
105     /// let encoder = Encoder::with_options(Vec::new(), options);
106     /// ```
fixed_huffman_codes(mut self) -> Self107     pub fn fixed_huffman_codes(mut self) -> Self {
108         self.dynamic_huffman = false;
109         self
110     }
111 
get_block_type(&self) -> BlockType112     fn get_block_type(&self) -> BlockType {
113         if self.lz77.is_none() {
114             BlockType::Raw
115         } else if self.dynamic_huffman {
116             BlockType::Dynamic
117         } else {
118             BlockType::Fixed
119         }
120     }
get_block_size(&self) -> usize121     fn get_block_size(&self) -> usize {
122         if self.lz77.is_none() {
123             cmp::min(self.block_size, MAX_NON_COMPRESSED_BLOCK_SIZE)
124         } else {
125             self.block_size
126         }
127     }
128 }
129 
130 /// DEFLATE encoder.
131 #[derive(Debug)]
132 pub struct Encoder<W, E = lz77::DefaultLz77Encoder> {
133     writer: bit::BitWriter<W>,
134     block: Block<E>,
135 }
136 impl<W> Encoder<W, lz77::DefaultLz77Encoder>
137 where
138     W: io::Write,
139 {
140     /// Makes a new encoder instance.
141     ///
142     /// Encoded DEFLATE stream is written to `inner`.
143     ///
144     /// # Examples
145     /// ```
146     /// use std::io::Write;
147     /// use libflate::deflate::Encoder;
148     ///
149     /// let mut encoder = Encoder::new(Vec::new());
150     /// encoder.write_all(b"Hello World!").unwrap();
151     ///
152     /// assert_eq!(encoder.finish().into_result().unwrap(),
153     ///            [5, 192, 49, 13, 0, 0, 8, 3, 65, 43, 224, 6, 7, 24, 128, 237,
154     ///            147, 38, 245, 63, 244, 230, 65, 181, 50, 215, 1]);
155     /// ```
new(inner: W) -> Self156     pub fn new(inner: W) -> Self {
157         Self::with_options(inner, EncodeOptions::default())
158     }
159 }
160 impl<W, E> Encoder<W, E>
161 where
162     W: io::Write,
163     E: lz77::Lz77Encode,
164 {
165     /// Makes a new encoder instance with specified options.
166     ///
167     /// Encoded DEFLATE stream is written to `inner`.
168     ///
169     /// # Examples
170     /// ```
171     /// use std::io::Write;
172     /// use libflate::deflate::{Encoder, EncodeOptions};
173     ///
174     /// let options = EncodeOptions::new().no_compression();
175     /// let mut encoder = Encoder::with_options(Vec::new(), options);
176     /// encoder.write_all(b"Hello World!").unwrap();
177     ///
178     /// assert_eq!(encoder.finish().into_result().unwrap(),
179     ///            [1, 12, 0, 243, 255, 72, 101, 108, 108, 111, 32, 87, 111,
180     ///             114, 108, 100, 33]);
181     /// ```
with_options(inner: W, options: EncodeOptions<E>) -> Self182     pub fn with_options(inner: W, options: EncodeOptions<E>) -> Self {
183         Encoder {
184             writer: bit::BitWriter::new(inner),
185             block: Block::new(options),
186         }
187     }
188 
189     /// Flushes internal buffer and returns the inner stream.
190     ///
191     /// # Examples
192     /// ```
193     /// use std::io::Write;
194     /// use libflate::deflate::Encoder;
195     ///
196     /// let mut encoder = Encoder::new(Vec::new());
197     /// encoder.write_all(b"Hello World!").unwrap();
198     ///
199     /// assert_eq!(encoder.finish().into_result().unwrap(),
200     ///            [5, 192, 49, 13, 0, 0, 8, 3, 65, 43, 224, 6, 7, 24, 128, 237,
201     ///            147, 38, 245, 63, 244, 230, 65, 181, 50, 215, 1]);
202     /// ```
finish(mut self) -> Finish<W, io::Error>203     pub fn finish(mut self) -> Finish<W, io::Error> {
204         match self.block.finish(&mut self.writer) {
205             Ok(_) => Finish::new(self.writer.into_inner(), None),
206             Err(e) => Finish::new(self.writer.into_inner(), Some(e)),
207         }
208     }
209 
210     /// Returns the immutable reference to the inner stream.
as_inner_ref(&self) -> &W211     pub fn as_inner_ref(&self) -> &W {
212         self.writer.as_inner_ref()
213     }
214 
215     /// Returns the mutable reference to the inner stream.
as_inner_mut(&mut self) -> &mut W216     pub fn as_inner_mut(&mut self) -> &mut W {
217         self.writer.as_inner_mut()
218     }
219 
220     /// Unwraps the `Encoder`, returning the inner stream.
into_inner(self) -> W221     pub fn into_inner(self) -> W {
222         self.writer.into_inner()
223     }
224 }
225 impl<W, E> io::Write for Encoder<W, E>
226 where
227     W: io::Write,
228     E: lz77::Lz77Encode,
229 {
write(&mut self, buf: &[u8]) -> io::Result<usize>230     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
231         self.block.write(&mut self.writer, buf)?;
232         Ok(buf.len())
233     }
flush(&mut self) -> io::Result<()>234     fn flush(&mut self) -> io::Result<()> {
235         self.writer.as_inner_mut().flush()
236     }
237 }
238 impl<W, E> Complete for Encoder<W, E>
239 where
240     W: io::Write,
241     E: lz77::Lz77Encode,
242 {
complete(self) -> io::Result<()>243     fn complete(self) -> io::Result<()> {
244         self.finish().into_result().map(|_| ())
245     }
246 }
247 
248 #[derive(Debug)]
249 struct Block<E> {
250     block_type: BlockType,
251     block_size: usize,
252     block_buf: BlockBuf<E>,
253 }
254 impl<E> Block<E>
255 where
256     E: lz77::Lz77Encode,
257 {
new(options: EncodeOptions<E>) -> Self258     fn new(options: EncodeOptions<E>) -> Self {
259         Block {
260             block_type: options.get_block_type(),
261             block_size: options.get_block_size(),
262             block_buf: BlockBuf::new(options.lz77, options.dynamic_huffman),
263         }
264     }
write<W>(&mut self, writer: &mut bit::BitWriter<W>, buf: &[u8]) -> io::Result<()> where W: io::Write,265     fn write<W>(&mut self, writer: &mut bit::BitWriter<W>, buf: &[u8]) -> io::Result<()>
266     where
267         W: io::Write,
268     {
269         self.block_buf.append(buf);
270         while self.block_buf.len() >= self.block_size {
271             writer.write_bit(false)?;
272             writer.write_bits(2, self.block_type as u16)?;
273             self.block_buf.flush(writer)?;
274         }
275         Ok(())
276     }
finish<W>(mut self, writer: &mut bit::BitWriter<W>) -> io::Result<()> where W: io::Write,277     fn finish<W>(mut self, writer: &mut bit::BitWriter<W>) -> io::Result<()>
278     where
279         W: io::Write,
280     {
281         writer.write_bit(true)?;
282         writer.write_bits(2, self.block_type as u16)?;
283         self.block_buf.flush(writer)?;
284         writer.flush()?;
285         Ok(())
286     }
287 }
288 
289 #[derive(Debug)]
290 enum BlockBuf<E> {
291     Raw(RawBuf),
292     Fixed(CompressBuf<symbol::FixedHuffmanCodec, E>),
293     Dynamic(CompressBuf<symbol::DynamicHuffmanCodec, E>),
294 }
295 impl<E> BlockBuf<E>
296 where
297     E: lz77::Lz77Encode,
298 {
new(lz77: Option<E>, dynamic: bool) -> Self299     fn new(lz77: Option<E>, dynamic: bool) -> Self {
300         if let Some(lz77) = lz77 {
301             if dynamic {
302                 BlockBuf::Dynamic(CompressBuf::new(symbol::DynamicHuffmanCodec, lz77))
303             } else {
304                 BlockBuf::Fixed(CompressBuf::new(symbol::FixedHuffmanCodec, lz77))
305             }
306         } else {
307             BlockBuf::Raw(RawBuf::new())
308         }
309     }
append(&mut self, buf: &[u8])310     fn append(&mut self, buf: &[u8]) {
311         match *self {
312             BlockBuf::Raw(ref mut b) => b.append(buf),
313             BlockBuf::Fixed(ref mut b) => b.append(buf),
314             BlockBuf::Dynamic(ref mut b) => b.append(buf),
315         }
316     }
len(&self) -> usize317     fn len(&self) -> usize {
318         match *self {
319             BlockBuf::Raw(ref b) => b.len(),
320             BlockBuf::Fixed(ref b) => b.len(),
321             BlockBuf::Dynamic(ref b) => b.len(),
322         }
323     }
flush<W>(&mut self, writer: &mut bit::BitWriter<W>) -> io::Result<()> where W: io::Write,324     fn flush<W>(&mut self, writer: &mut bit::BitWriter<W>) -> io::Result<()>
325     where
326         W: io::Write,
327     {
328         match *self {
329             BlockBuf::Raw(ref mut b) => b.flush(writer),
330             BlockBuf::Fixed(ref mut b) => b.flush(writer),
331             BlockBuf::Dynamic(ref mut b) => b.flush(writer),
332         }
333     }
334 }
335 
336 #[derive(Debug)]
337 struct RawBuf {
338     buf: Vec<u8>,
339 }
340 impl RawBuf {
new() -> Self341     fn new() -> Self {
342         RawBuf { buf: Vec::new() }
343     }
append(&mut self, buf: &[u8])344     fn append(&mut self, buf: &[u8]) {
345         self.buf.extend_from_slice(buf);
346     }
len(&self) -> usize347     fn len(&self) -> usize {
348         self.buf.len()
349     }
flush<W>(&mut self, writer: &mut bit::BitWriter<W>) -> io::Result<()> where W: io::Write,350     fn flush<W>(&mut self, writer: &mut bit::BitWriter<W>) -> io::Result<()>
351     where
352         W: io::Write,
353     {
354         let size = cmp::min(self.buf.len(), MAX_NON_COMPRESSED_BLOCK_SIZE);
355         writer.flush()?;
356         writer
357             .as_inner_mut()
358             .write_all(&(size as u16).to_le_bytes())?;
359         writer
360             .as_inner_mut()
361             .write_all(&(!size as u16).to_le_bytes())?;
362         writer.as_inner_mut().write_all(&self.buf[..size])?;
363         self.buf.drain(0..size);
364         Ok(())
365     }
366 }
367 
368 #[derive(Debug)]
369 struct CompressBuf<H, E> {
370     huffman: H,
371     lz77: E,
372     buf: Vec<symbol::Symbol>,
373     original_size: usize,
374 }
375 impl<H, E> CompressBuf<H, E>
376 where
377     H: symbol::HuffmanCodec,
378     E: lz77::Lz77Encode,
379 {
new(huffman: H, lz77: E) -> Self380     fn new(huffman: H, lz77: E) -> Self {
381         CompressBuf {
382             huffman,
383             lz77,
384             buf: Vec::new(),
385             original_size: 0,
386         }
387     }
append(&mut self, buf: &[u8])388     fn append(&mut self, buf: &[u8]) {
389         self.original_size += buf.len();
390         self.lz77.encode(buf, &mut self.buf);
391     }
len(&self) -> usize392     fn len(&self) -> usize {
393         self.original_size
394     }
flush<W>(&mut self, writer: &mut bit::BitWriter<W>) -> io::Result<()> where W: io::Write,395     fn flush<W>(&mut self, writer: &mut bit::BitWriter<W>) -> io::Result<()>
396     where
397         W: io::Write,
398     {
399         self.lz77.flush(&mut self.buf);
400         self.buf.push(symbol::Symbol::EndOfBlock);
401         let symbol_encoder = self.huffman.build(&self.buf)?;
402         self.huffman.save(writer, &symbol_encoder)?;
403         for s in self.buf.drain(..) {
404             symbol_encoder.encode(writer, &s)?;
405         }
406         self.original_size = 0;
407         Ok(())
408     }
409 }
410 
411 impl lz77::Sink for Vec<symbol::Symbol> {
consume(&mut self, code: lz77::Code)412     fn consume(&mut self, code: lz77::Code) {
413         let symbol = match code {
414             lz77::Code::Literal(b) => symbol::Symbol::Literal(b),
415             lz77::Code::Pointer {
416                 length,
417                 backward_distance,
418             } => symbol::Symbol::Share {
419                 length,
420                 distance: backward_distance,
421             },
422         };
423         self.push(symbol);
424     }
425 }
426