1 use std::cmp;
2 use std::collections::HashMap;
3
4 use super::Code;
5 use super::Lz77Encode;
6 use super::Sink;
7
8 /// A `Lz77Encode` implementation used by default.
9 #[derive(Debug)]
10 pub struct DefaultLz77Encoder {
11 window_size: u16,
12 buf: Vec<u8>,
13 }
14 impl DefaultLz77Encoder {
15 /// Makes a new encoder instance.
16 ///
17 /// # Examples
18 /// ```
19 /// use libflate::deflate;
20 /// use libflate::lz77::{self, Lz77Encode, DefaultLz77Encoder};
21 ///
22 /// let lz77 = DefaultLz77Encoder::new();
23 /// assert_eq!(lz77.window_size(), lz77::MAX_WINDOW_SIZE);
24 ///
25 /// let options = deflate::EncodeOptions::with_lz77(lz77);
26 /// let _deflate = deflate::Encoder::with_options(Vec::new(), options);
27 /// ```
new() -> Self28 pub fn new() -> Self {
29 Self::with_window_size(super::MAX_WINDOW_SIZE)
30 }
31
32 /// Makes a new encoder instance with specified window size.
33 ///
34 /// Larger window size is prefered to raise compression ratio,
35 /// but it may require more working memory to encode and decode data.
36 ///
37 /// # Examples
38 /// ```
39 /// use libflate::deflate;
40 /// use libflate::lz77::{self, Lz77Encode, DefaultLz77Encoder};
41 ///
42 /// let lz77 = DefaultLz77Encoder::with_window_size(1024);
43 /// assert_eq!(lz77.window_size(), 1024);
44 ///
45 /// let options = deflate::EncodeOptions::with_lz77(lz77);
46 /// let _deflate = deflate::Encoder::with_options(Vec::new(), options);
47 /// ```
with_window_size(size: u16) -> Self48 pub fn with_window_size(size: u16) -> Self {
49 DefaultLz77Encoder {
50 window_size: cmp::min(size, super::MAX_WINDOW_SIZE),
51 buf: Vec::new(),
52 }
53 }
54 }
55 impl Default for DefaultLz77Encoder {
default() -> Self56 fn default() -> Self {
57 Self::new()
58 }
59 }
60 impl Lz77Encode for DefaultLz77Encoder {
encode<S>(&mut self, buf: &[u8], sink: S) where S: Sink,61 fn encode<S>(&mut self, buf: &[u8], sink: S)
62 where
63 S: Sink,
64 {
65 self.buf.extend_from_slice(buf);
66 if self.buf.len() >= self.window_size as usize * 8 {
67 self.flush(sink);
68 }
69 }
flush<S>(&mut self, mut sink: S) where S: Sink,70 fn flush<S>(&mut self, mut sink: S)
71 where
72 S: Sink,
73 {
74 let mut prefix_table = PrefixTable::new(self.buf.len());
75 let mut i = 0;
76 let end = cmp::max(3, self.buf.len()) - 3;
77 while i < end {
78 let key = prefix(&self.buf[i..]);
79 let matched = prefix_table.insert(key, i as u32);
80 if let Some(j) = matched.map(|j| j as usize) {
81 let distance = i - j;
82 if distance <= self.window_size as usize {
83 let length = 3 + longest_common_prefix(&self.buf, i + 3, j + 3);
84 sink.consume(Code::Pointer {
85 length,
86 backward_distance: distance as u16,
87 });
88 for k in (i..).take(length as usize).skip(1) {
89 if k >= end {
90 break;
91 }
92 prefix_table.insert(prefix(&self.buf[k..]), k as u32);
93 }
94 i += length as usize;
95 continue;
96 }
97 }
98 sink.consume(Code::Literal(self.buf[i]));
99 i += 1;
100 }
101 for b in &self.buf[i..] {
102 sink.consume(Code::Literal(*b));
103 }
104 self.buf.clear();
105 }
window_size(&self) -> u16106 fn window_size(&self) -> u16 {
107 self.window_size
108 }
109 }
110
111 #[inline]
prefix(input_buf: &[u8]) -> [u8; 3]112 fn prefix(input_buf: &[u8]) -> [u8; 3] {
113 let buf: &[u8] = &input_buf[..3]; // perform bounds check once
114 [buf[0], buf[1], buf[2]]
115 }
116
117 #[inline]
longest_common_prefix(buf: &[u8], i: usize, j: usize) -> u16118 fn longest_common_prefix(buf: &[u8], i: usize, j: usize) -> u16 {
119 buf[i..]
120 .iter()
121 .take(super::MAX_LENGTH as usize - 3)
122 .zip(&buf[j..])
123 .take_while(|&(x, y)| x == y)
124 .count() as u16
125 }
126
127 #[derive(Debug)]
128 enum PrefixTable {
129 Small(HashMap<[u8; 3], u32>),
130 Large(LargePrefixTable),
131 }
132 impl PrefixTable {
new(bytes: usize) -> Self133 fn new(bytes: usize) -> Self {
134 if bytes < super::MAX_WINDOW_SIZE as usize {
135 PrefixTable::Small(HashMap::new())
136 } else {
137 PrefixTable::Large(LargePrefixTable::new())
138 }
139 }
140
141 #[inline]
insert(&mut self, prefix: [u8; 3], position: u32) -> Option<u32>142 fn insert(&mut self, prefix: [u8; 3], position: u32) -> Option<u32> {
143 match *self {
144 PrefixTable::Small(ref mut x) => x.insert(prefix, position),
145 PrefixTable::Large(ref mut x) => x.insert(prefix, position),
146 }
147 }
148 }
149
150 #[derive(Debug)]
151 struct LargePrefixTable {
152 table: Vec<Vec<(u8, u32)>>,
153 }
154 impl LargePrefixTable {
new() -> Self155 fn new() -> Self {
156 LargePrefixTable {
157 table: (0..=0xFFFF).map(|_| Vec::new()).collect(),
158 }
159 }
160
161 #[inline]
insert(&mut self, prefix: [u8; 3], position: u32) -> Option<u32>162 fn insert(&mut self, prefix: [u8; 3], position: u32) -> Option<u32> {
163 let p0 = prefix[0] as usize;
164 let p1 = prefix[1] as usize;
165 let p2 = prefix[2];
166
167 let i = (p0 << 8) + p1;
168 let positions = &mut self.table[i];
169 for &mut (key, ref mut value) in positions.iter_mut() {
170 if key == p2 {
171 let old = *value;
172 *value = position;
173 return Some(old);
174 }
175 }
176 positions.push((p2, position));
177 None
178 }
179 }
180
181 #[cfg(test)]
182 mod tests {
183 use super::*;
184 use deflate::symbol::Symbol;
185
186 #[test]
187 // See: https://github.com/sile/libflate/issues/21
issue21()188 fn issue21() {
189 let mut enc = DefaultLz77Encoder::new();
190 let mut sink = Vec::new();
191 enc.encode(b"aaaaa", &mut sink);
192 enc.flush(&mut sink);
193 assert_eq!(
194 sink,
195 vec![
196 Symbol::Literal(97),
197 Symbol::Share {
198 length: 4,
199 distance: 1
200 }
201 ]
202 );
203 }
204 }
205