1 use std::cmp;
2 use std::collections::HashMap;
3 
4 use super::Code;
5 use super::Lz77Encode;
6 use super::Sink;
7 
8 /// A [`Lz77Encode`] implementation used by default.
9 #[derive(Debug)]
10 pub struct DefaultLz77Encoder {
11     window_size: u16,
12     max_length: u16,
13     buf: Vec<u8>,
14 }
15 
16 impl DefaultLz77Encoder {
17     /// Makes a new encoder instance.
18     ///
19     /// # Examples
20     /// ```
21     /// use libflate::deflate;
22     /// use libflate::lz77::{self, Lz77Encode, DefaultLz77Encoder};
23     ///
24     /// let lz77 = DefaultLz77Encoder::new();
25     /// assert_eq!(lz77.window_size(), lz77::MAX_WINDOW_SIZE);
26     ///
27     /// let options = deflate::EncodeOptions::with_lz77(lz77);
28     /// let _deflate = deflate::Encoder::with_options(Vec::new(), options);
29     /// ```
new() -> Self30     pub fn new() -> Self {
31         DefaultLz77EncoderBuilder::new().build()
32     }
33 
34     /// Makes a new encoder instance with specified window size.
35     ///
36     /// Larger window size is prefered to raise compression ratio,
37     /// but it may require more working memory to encode and decode data.
38     ///
39     /// # Examples
40     /// ```
41     /// use libflate::deflate;
42     /// use libflate::lz77::{self, Lz77Encode, DefaultLz77Encoder};
43     ///
44     /// let lz77 = DefaultLz77Encoder::with_window_size(1024);
45     /// assert_eq!(lz77.window_size(), 1024);
46     ///
47     /// let options = deflate::EncodeOptions::with_lz77(lz77);
48     /// let _deflate = deflate::Encoder::with_options(Vec::new(), options);
49     /// ```
with_window_size(size: u16) -> Self50     pub fn with_window_size(size: u16) -> Self {
51         DefaultLz77EncoderBuilder::new()
52             .window_size(cmp::min(size, super::MAX_WINDOW_SIZE))
53             .build()
54     }
55 }
56 
57 impl Default for DefaultLz77Encoder {
default() -> Self58     fn default() -> Self {
59         Self::new()
60     }
61 }
62 
63 impl Lz77Encode for DefaultLz77Encoder {
encode<S>(&mut self, buf: &[u8], sink: S) where S: Sink,64     fn encode<S>(&mut self, buf: &[u8], sink: S)
65     where
66         S: Sink,
67     {
68         self.buf.extend_from_slice(buf);
69         if self.buf.len() >= self.window_size as usize * 8 {
70             self.flush(sink);
71         }
72     }
flush<S>(&mut self, mut sink: S) where S: Sink,73     fn flush<S>(&mut self, mut sink: S)
74     where
75         S: Sink,
76     {
77         let mut prefix_table = PrefixTable::new(self.buf.len());
78         let mut i = 0;
79         let end = cmp::max(3, self.buf.len()) - 3;
80         while i < end {
81             let key = prefix(&self.buf[i..]);
82             let matched = prefix_table.insert(key, i as u32);
83             if let Some(j) = matched.map(|j| j as usize) {
84                 let distance = i - j;
85                 if distance <= self.window_size as usize {
86                     let length = 3 + longest_common_prefix(
87                         &self.buf,
88                         i + 3,
89                         j + 3,
90                         self.max_length as usize,
91                     );
92                     sink.consume(Code::Pointer {
93                         length,
94                         backward_distance: distance as u16,
95                     });
96                     for k in (i..).take(length as usize).skip(1) {
97                         if k >= end {
98                             break;
99                         }
100                         prefix_table.insert(prefix(&self.buf[k..]), k as u32);
101                     }
102                     i += length as usize;
103                     continue;
104                 }
105             }
106             sink.consume(Code::Literal(self.buf[i]));
107             i += 1;
108         }
109         for b in &self.buf[i..] {
110             sink.consume(Code::Literal(*b));
111         }
112         self.buf.clear();
113     }
window_size(&self) -> u16114     fn window_size(&self) -> u16 {
115         self.window_size
116     }
117 }
118 
119 #[inline]
prefix(input_buf: &[u8]) -> [u8; 3]120 fn prefix(input_buf: &[u8]) -> [u8; 3] {
121     let buf: &[u8] = &input_buf[..3]; // perform bounds check once
122     [buf[0], buf[1], buf[2]]
123 }
124 
125 #[inline]
longest_common_prefix(buf: &[u8], i: usize, j: usize, max: usize) -> u16126 fn longest_common_prefix(buf: &[u8], i: usize, j: usize, max: usize) -> u16 {
127     buf[i..]
128         .iter()
129         .take(max - 3)
130         .zip(&buf[j..])
131         .take_while(|&(x, y)| x == y)
132         .count() as u16
133 }
134 
135 #[derive(Debug)]
136 enum PrefixTable {
137     Small(HashMap<[u8; 3], u32>),
138     Large(LargePrefixTable),
139 }
140 impl PrefixTable {
new(bytes: usize) -> Self141     fn new(bytes: usize) -> Self {
142         if bytes < super::MAX_WINDOW_SIZE as usize {
143             PrefixTable::Small(HashMap::new())
144         } else {
145             PrefixTable::Large(LargePrefixTable::new())
146         }
147     }
148 
149     #[inline]
insert(&mut self, prefix: [u8; 3], position: u32) -> Option<u32>150     fn insert(&mut self, prefix: [u8; 3], position: u32) -> Option<u32> {
151         match *self {
152             PrefixTable::Small(ref mut x) => x.insert(prefix, position),
153             PrefixTable::Large(ref mut x) => x.insert(prefix, position),
154         }
155     }
156 }
157 
158 #[derive(Debug)]
159 struct LargePrefixTable {
160     table: Vec<Vec<(u8, u32)>>,
161 }
162 impl LargePrefixTable {
new() -> Self163     fn new() -> Self {
164         LargePrefixTable {
165             table: (0..=0xFFFF).map(|_| Vec::new()).collect(),
166         }
167     }
168 
169     #[inline]
insert(&mut self, prefix: [u8; 3], position: u32) -> Option<u32>170     fn insert(&mut self, prefix: [u8; 3], position: u32) -> Option<u32> {
171         let p0 = prefix[0] as usize;
172         let p1 = prefix[1] as usize;
173         let p2 = prefix[2];
174 
175         let i = (p0 << 8) + p1;
176         let positions = &mut self.table[i];
177         for &mut (key, ref mut value) in positions.iter_mut() {
178             if key == p2 {
179                 let old = *value;
180                 *value = position;
181                 return Some(old);
182             }
183         }
184         positions.push((p2, position));
185         None
186     }
187 }
188 
189 /// Type for constructing instances of [`DefaultLz77Encoder`].
190 ///
191 /// # Examples
192 /// ```
193 /// use libflate_lz77::{
194 ///     DefaultLz77EncoderBuilder,
195 ///     MAX_LENGTH,
196 ///     MAX_WINDOW_SIZE,
197 /// };
198 ///
199 /// // Produce an encoder explicitly with the default window size and max copy length
200 /// let _encoder = DefaultLz77EncoderBuilder::new()
201 ///     .window_size(MAX_WINDOW_SIZE)
202 ///     .max_length(MAX_LENGTH)
203 ///     .build();
204 /// ```
205 #[derive(Debug)]
206 pub struct DefaultLz77EncoderBuilder {
207     window_size: u16,
208     max_length: u16,
209 }
210 
211 impl DefaultLz77EncoderBuilder {
212     /// Create a builder with the default parameters for the encoder.
new() -> Self213     pub fn new() -> Self {
214         DefaultLz77EncoderBuilder {
215             window_size: super::MAX_WINDOW_SIZE,
216             max_length: super::MAX_LENGTH,
217         }
218     }
219 
220     /// Set the size of the sliding search window used during compression.
221     ///
222     /// Larger values require more memory. The standard window size may be
223     /// unsuitable for a particular Sink; for example, if the encoding used
224     /// cannot express pointer distances past a certain size, you would want the
225     /// window size to be no greater than the Sink's limit.
window_size(self, window_size: u16) -> Self226     pub fn window_size(self, window_size: u16) -> Self {
227         DefaultLz77EncoderBuilder {
228             window_size: cmp::min(window_size, super::MAX_WINDOW_SIZE),
229             ..self
230         }
231     }
232 
233     /// Set the maximum length of a pointer command this encoder will emit.
234     ///
235     /// Some uses of LZ77 may not be able to encode pointers of the standard
236     /// maximum length of 258 bytes. In this case, you may set your own maximum
237     /// which can be encoded by the Sink.
max_length(self, max_length: u16) -> Self238     pub fn max_length(self, max_length: u16) -> Self {
239         DefaultLz77EncoderBuilder {
240             max_length: cmp::min(max_length, super::MAX_LENGTH),
241             ..self
242         }
243     }
244 
245     /// Build the encoder with the builder state's parameters.
build(self) -> DefaultLz77Encoder246     pub fn build(self) -> DefaultLz77Encoder {
247         DefaultLz77Encoder {
248             window_size: self.window_size,
249             max_length: self.max_length,
250             buf: Vec::new(),
251         }
252     }
253 }
254 
255 impl Default for DefaultLz77EncoderBuilder {
default() -> Self256     fn default() -> Self {
257         Self::new()
258     }
259 }
260