1 use std::cmp;
2 use std::collections::HashMap;
3 
4 use super::Code;
5 use super::Lz77Encode;
6 use super::Sink;
7 
8 /// A `Lz77Encode` implementation used by default.
9 #[derive(Debug)]
10 pub struct DefaultLz77Encoder {
11     window_size: u16,
12     buf: Vec<u8>,
13 }
14 impl DefaultLz77Encoder {
15     /// Makes a new encoder instance.
16     ///
17     /// # Examples
18     /// ```
19     /// use libflate::deflate;
20     /// use libflate::lz77::{self, Lz77Encode, DefaultLz77Encoder};
21     ///
22     /// let lz77 = DefaultLz77Encoder::new();
23     /// assert_eq!(lz77.window_size(), lz77::MAX_WINDOW_SIZE);
24     ///
25     /// let options = deflate::EncodeOptions::with_lz77(lz77);
26     /// let _deflate = deflate::Encoder::with_options(Vec::new(), options);
27     /// ```
new() -> Self28     pub fn new() -> Self {
29         Self::with_window_size(super::MAX_WINDOW_SIZE)
30     }
31 
32     /// Makes a new encoder instance with specified window size.
33     ///
34     /// Larger window size is prefered to raise compression ratio,
35     /// but it may require more working memory to encode and decode data.
36     ///
37     /// # Examples
38     /// ```
39     /// use libflate::deflate;
40     /// use libflate::lz77::{self, Lz77Encode, DefaultLz77Encoder};
41     ///
42     /// let lz77 = DefaultLz77Encoder::with_window_size(1024);
43     /// assert_eq!(lz77.window_size(), 1024);
44     ///
45     /// let options = deflate::EncodeOptions::with_lz77(lz77);
46     /// let _deflate = deflate::Encoder::with_options(Vec::new(), options);
47     /// ```
with_window_size(size: u16) -> Self48     pub fn with_window_size(size: u16) -> Self {
49         DefaultLz77Encoder {
50             window_size: cmp::min(size, super::MAX_WINDOW_SIZE),
51             buf: Vec::new(),
52         }
53     }
54 }
55 impl Default for DefaultLz77Encoder {
default() -> Self56     fn default() -> Self {
57         Self::new()
58     }
59 }
60 impl Lz77Encode for DefaultLz77Encoder {
encode<S>(&mut self, buf: &[u8], sink: S) where S: Sink,61     fn encode<S>(&mut self, buf: &[u8], sink: S)
62     where
63         S: Sink,
64     {
65         self.buf.extend_from_slice(buf);
66         if self.buf.len() >= self.window_size as usize * 8 {
67             self.flush(sink);
68         }
69     }
flush<S>(&mut self, mut sink: S) where S: Sink,70     fn flush<S>(&mut self, mut sink: S)
71     where
72         S: Sink,
73     {
74         let mut prefix_table = PrefixTable::new(self.buf.len());
75         let mut i = 0;
76         let end = cmp::max(3, self.buf.len()) - 3;
77         while i < end {
78             let key = prefix(&self.buf[i..]);
79             let matched = prefix_table.insert(key, i as u32);
80             if let Some(j) = matched.map(|j| j as usize) {
81                 let distance = i - j;
82                 if distance <= self.window_size as usize {
83                     let length = 3 + longest_common_prefix(&self.buf, i + 3, j + 3);
84                     sink.consume(Code::Pointer {
85                         length,
86                         backward_distance: distance as u16,
87                     });
88                     for k in (i..).take(length as usize).skip(1) {
89                         if k >= end {
90                             break;
91                         }
92                         prefix_table.insert(prefix(&self.buf[k..]), k as u32);
93                     }
94                     i += length as usize;
95                     continue;
96                 }
97             }
98             sink.consume(Code::Literal(self.buf[i]));
99             i += 1;
100         }
101         for b in &self.buf[i..] {
102             sink.consume(Code::Literal(*b));
103         }
104         self.buf.clear();
105     }
window_size(&self) -> u16106     fn window_size(&self) -> u16 {
107         self.window_size
108     }
109 }
110 
111 #[inline]
prefix(input_buf: &[u8]) -> [u8; 3]112 fn prefix(input_buf: &[u8]) -> [u8; 3] {
113     let buf: &[u8] = &input_buf[..3]; // perform bounds check once
114     [buf[0], buf[1], buf[2]]
115 }
116 
117 #[inline]
longest_common_prefix(buf: &[u8], i: usize, j: usize) -> u16118 fn longest_common_prefix(buf: &[u8], i: usize, j: usize) -> u16 {
119     buf[i..]
120         .iter()
121         .take(super::MAX_LENGTH as usize - 3)
122         .zip(&buf[j..])
123         .take_while(|&(x, y)| x == y)
124         .count() as u16
125 }
126 
127 #[derive(Debug)]
128 enum PrefixTable {
129     Small(HashMap<[u8; 3], u32>),
130     Large(LargePrefixTable),
131 }
132 impl PrefixTable {
new(bytes: usize) -> Self133     fn new(bytes: usize) -> Self {
134         if bytes < super::MAX_WINDOW_SIZE as usize {
135             PrefixTable::Small(HashMap::new())
136         } else {
137             PrefixTable::Large(LargePrefixTable::new())
138         }
139     }
140 
141     #[inline]
insert(&mut self, prefix: [u8; 3], position: u32) -> Option<u32>142     fn insert(&mut self, prefix: [u8; 3], position: u32) -> Option<u32> {
143         match *self {
144             PrefixTable::Small(ref mut x) => x.insert(prefix, position),
145             PrefixTable::Large(ref mut x) => x.insert(prefix, position),
146         }
147     }
148 }
149 
150 #[derive(Debug)]
151 struct LargePrefixTable {
152     table: Vec<Vec<(u8, u32)>>,
153 }
154 impl LargePrefixTable {
new() -> Self155     fn new() -> Self {
156         LargePrefixTable {
157             table: (0..=0xFFFF).map(|_| Vec::new()).collect(),
158         }
159     }
160 
161     #[inline]
insert(&mut self, prefix: [u8; 3], position: u32) -> Option<u32>162     fn insert(&mut self, prefix: [u8; 3], position: u32) -> Option<u32> {
163         let p0 = prefix[0] as usize;
164         let p1 = prefix[1] as usize;
165         let p2 = prefix[2];
166 
167         let i = (p0 << 8) + p1;
168         let positions = &mut self.table[i];
169         for &mut (key, ref mut value) in positions.iter_mut() {
170             if key == p2 {
171                 let old = *value;
172                 *value = position;
173                 return Some(old);
174             }
175         }
176         positions.push((p2, position));
177         None
178     }
179 }
180 
181 #[cfg(test)]
182 mod tests {
183     use super::*;
184     use deflate::symbol::Symbol;
185 
186     #[test]
187     // See: https://github.com/sile/libflate/issues/21
issue21()188     fn issue21() {
189         let mut enc = DefaultLz77Encoder::new();
190         let mut sink = Vec::new();
191         enc.encode(b"aaaaa", &mut sink);
192         enc.flush(&mut sink);
193         assert_eq!(
194             sink,
195             vec![
196                 Symbol::Literal(97),
197                 Symbol::Share {
198                     length: 4,
199                     distance: 1
200                 }
201             ]
202         );
203     }
204 }
205