1 //! A minimal implementation of Adler32 for Rust.
2 //!
3 //! This provides the simple method adler32(), that exhausts a Read and
4 //! computes the Adler32 hash, as well as the RollingAdler32 struct, that can
5 //! build a hash byte-by-byte, allowing to 'forget' past bytes in a rolling
6 //! fashion.
7 //!
8 //! The adler32 code has been translated (as accurately as I could manage) from
9 //! the zlib implementation.
10
11 #[cfg(test)]
12 extern crate rand;
13
14 use std::io;
15
16 // adler32 algorithm and implementation taken from zlib; http://www.zlib.net/
17 // It was translated into Rust as accurately as I could manage
18 // The (slow) reference was taken from Wikipedia; https://en.wikipedia.org/
19
20 /* zlib.h -- interface of the 'zlib' general purpose compression library
21 version 1.2.8, April 28th, 2013
22
23 Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
24
25 This software is provided 'as-is', without any express or implied
26 warranty. In no event will the authors be held liable for any damages
27 arising from the use of this software.
28
29 Permission is granted to anyone to use this software for any purpose,
30 including commercial applications, and to alter it and redistribute it
31 freely, subject to the following restrictions:
32
33 1. The origin of this software must not be misrepresented; you must not
34 claim that you wrote the original software. If you use this software
35 in a product, an acknowledgment in the product documentation would be
36 appreciated but is not required.
37 2. Altered source versions must be plainly marked as such, and must not be
38 misrepresented as being the original software.
39 3. This notice may not be removed or altered from any source distribution.
40
41 Jean-loup Gailly Mark Adler
42 jloup@gzip.org madler@alumni.caltech.edu
43
44 */
45
46 // largest prime smaller than 65536
47 const BASE: u32 = 65521;
48
49 // NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
50 const NMAX: usize = 5552;
51
52 #[inline(always)]
do1(adler: &mut u32, sum2: &mut u32, buf: &[u8])53 fn do1(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
54 *adler += u32::from(buf[0]);
55 *sum2 += *adler;
56 }
57
58 #[inline(always)]
do2(adler: &mut u32, sum2: &mut u32, buf: &[u8])59 fn do2(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
60 do1(adler, sum2, &buf[0..1]);
61 do1(adler, sum2, &buf[1..2]);
62 }
63
64 #[inline(always)]
do4(adler: &mut u32, sum2: &mut u32, buf: &[u8])65 fn do4(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
66 do2(adler, sum2, &buf[0..2]);
67 do2(adler, sum2, &buf[2..4]);
68 }
69
70 #[inline(always)]
do8(adler: &mut u32, sum2: &mut u32, buf: &[u8])71 fn do8(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
72 do4(adler, sum2, &buf[0..4]);
73 do4(adler, sum2, &buf[4..8]);
74 }
75
76 #[inline(always)]
do16(adler: &mut u32, sum2: &mut u32, buf: &[u8])77 fn do16(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
78 do8(adler, sum2, &buf[0..8]);
79 do8(adler, sum2, &buf[8..16]);
80 }
81
82 /// A rolling version of the Adler32 hash, which can 'forget' past bytes.
83 ///
84 /// Calling remove() will update the hash to the value it would have if that
85 /// past byte had never been fed to the algorithm. This allows you to get the
86 /// hash of a rolling window very efficiently.
87 pub struct RollingAdler32 {
88 a: u32,
89 b: u32,
90 }
91
92 impl Default for RollingAdler32 {
default() -> RollingAdler3293 fn default() -> RollingAdler32 {
94 RollingAdler32::new()
95 }
96 }
97
98 impl RollingAdler32 {
99 /// Creates an empty Adler32 context (with hash 1).
new() -> RollingAdler32100 pub fn new() -> RollingAdler32 {
101 Self::from_value(1)
102 }
103
104 /// Creates an Adler32 context with the given initial value.
from_value(adler32: u32) -> RollingAdler32105 pub fn from_value(adler32: u32) -> RollingAdler32 {
106 let a = adler32 & 0xFFFF;
107 let b = adler32 >> 16;
108 RollingAdler32 { a, b }
109 }
110
111 /// Convenience function initializing a context from the hash of a buffer.
from_buffer(buffer: &[u8]) -> RollingAdler32112 pub fn from_buffer(buffer: &[u8]) -> RollingAdler32 {
113 let mut hash = RollingAdler32::new();
114 hash.update_buffer(buffer);
115 hash
116 }
117
118 /// Returns the current hash.
hash(&self) -> u32119 pub fn hash(&self) -> u32 {
120 (self.b << 16) | self.a
121 }
122
123 /// Removes the given `byte` that was fed to the algorithm `size` bytes ago.
remove(&mut self, size: usize, byte: u8)124 pub fn remove(&mut self, size: usize, byte: u8) {
125 let byte = u32::from(byte);
126 self.a = (self.a + BASE - byte) % BASE;
127 self.b = ((self.b + BASE - 1)
128 .wrapping_add(BASE.wrapping_sub(size as u32)
129 .wrapping_mul(byte))) % BASE;
130 }
131
132 /// Feeds a new `byte` to the algorithm to update the hash.
update(&mut self, byte: u8)133 pub fn update(&mut self, byte: u8) {
134 let byte = u32::from(byte);
135 self.a = (self.a + byte) % BASE;
136 self.b = (self.b + self.a) % BASE;
137 }
138
139 /// Feeds a vector of bytes to the algorithm to update the hash.
update_buffer(&mut self, buffer: &[u8])140 pub fn update_buffer(&mut self, buffer: &[u8]) {
141 let len = buffer.len();
142
143 // in case user likes doing a byte at a time, keep it fast
144 if len == 1 {
145 self.update(buffer[0]);
146 return;
147 }
148
149 // in case short lengths are provided, keep it somewhat fast
150 if len < 16 {
151 for byte in buffer.iter().take(len) {
152 self.a += u32::from(*byte);
153 self.b += self.a;
154 }
155 if self.a >= BASE {
156 self.a -= BASE;
157 }
158 self.b %= BASE;
159 return;
160 }
161
162 let mut pos = 0;
163
164 // do length NMAX blocks -- requires just one modulo operation;
165 while pos + NMAX <= len {
166 let end = pos + NMAX;
167 while pos < end {
168 // 16 sums unrolled
169 do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
170 pos += 16;
171 }
172 self.a %= BASE;
173 self.b %= BASE;
174 }
175
176 // do remaining bytes (less than NMAX, still just one modulo)
177 if pos < len { // avoid modulos if none remaining
178 while len - pos >= 16 {
179 do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
180 pos += 16;
181 }
182 while len - pos > 0 {
183 self.a += u32::from(buffer[pos]);
184 self.b += self.a;
185 pos += 1;
186 }
187 self.a %= BASE;
188 self.b %= BASE;
189 }
190 }
191 }
192
193 /// Consume a Read object and returns the Adler32 hash.
adler32<R: io::Read>(mut reader: R) -> io::Result<u32>194 pub fn adler32<R: io::Read>(mut reader: R) -> io::Result<u32> {
195 let mut hash = RollingAdler32::new();
196 let mut buffer = [0u8; NMAX];
197 let mut read = try!(reader.read(&mut buffer));
198 while read > 0 {
199 hash.update_buffer(&buffer[..read]);
200 read = try!(reader.read(&mut buffer));
201 }
202 Ok(hash.hash())
203 }
204
205 #[cfg(test)]
206 mod test {
207 use rand;
208 use rand::Rng;
209 use std::io;
210
211 use super::{BASE, adler32, RollingAdler32};
212
adler32_slow<R: io::Read>(reader: R) -> io::Result<u32>213 fn adler32_slow<R: io::Read>(reader: R) -> io::Result<u32> {
214 let mut a: u32 = 1;
215 let mut b: u32 = 0;
216
217 for byte in reader.bytes() {
218 let byte = try!(byte) as u32;
219 a = (a + byte) % BASE;
220 b = (b + a) % BASE;
221 }
222
223 Ok((b << 16) | a)
224 }
225
226 #[test]
testvectors()227 fn testvectors() {
228 fn do_test(v: u32, bytes: &[u8]) {
229 let mut hash = RollingAdler32::new();
230 hash.update_buffer(&bytes);
231 assert_eq!(hash.hash(), v);
232
233 let r = io::Cursor::new(bytes);
234 assert_eq!(adler32(r).unwrap(), v);
235 }
236 do_test(0x00000001, b"");
237 do_test(0x00620062, b"a");
238 do_test(0x024d0127, b"abc");
239 do_test(0x29750586, b"message digest");
240 do_test(0x90860b20, b"abcdefghijklmnopqrstuvwxyz");
241 do_test(0x8adb150c, b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
242 abcdefghijklmnopqrstuvwxyz\
243 0123456789");
244 do_test(0x97b61069, b"1234567890123456789012345678901234567890\
245 1234567890123456789012345678901234567890");
246 do_test(0xD6251498, &[255; 64000]);
247 }
248
249 #[test]
compare()250 fn compare() {
251 let mut rng = rand::thread_rng();
252 let mut data = vec![0u8; 5589];
253 for size in [0, 1, 3, 4, 5, 31, 32, 33, 67,
254 5550, 5552, 5553, 5568, 5584, 5589].iter().cloned() {
255 rng.fill_bytes(&mut data[..size]);
256 let r1 = io::Cursor::new(&data[..size]);
257 let r2 = r1.clone();
258 if adler32_slow(r1).unwrap() != adler32(r2).unwrap() {
259 panic!("Comparison failed, size={}", size);
260 }
261 }
262 }
263
264 #[test]
rolling()265 fn rolling() {
266 assert_eq!(RollingAdler32::from_value(0x01020304).hash(), 0x01020304);
267
268 fn do_test(a: &[u8], b: &[u8]) {
269 let mut total = Vec::with_capacity(a.len() + b.len());
270 total.extend(a);
271 total.extend(b);
272 let mut h = RollingAdler32::from_buffer(&total[..(b.len())]);
273 for i in 0..(a.len()) {
274 h.remove(b.len(), a[i]);
275 h.update(total[b.len() + i]);
276 }
277 assert_eq!(h.hash(), adler32(b).unwrap());
278 }
279 do_test(b"a", b"b");
280 do_test(b"", b"this a test");
281 do_test(b"th", b"is a test");
282 do_test(b"this a ", b"test");
283 }
284
285 #[test]
long_window_remove()286 fn long_window_remove() {
287 let mut hash = RollingAdler32::new();
288 let w = 65536;
289 assert!(w as u32 > BASE);
290
291 let mut bytes = vec![0; w*3];
292 for (i, b) in bytes.iter_mut().enumerate() {
293 *b = i as u8;
294 }
295
296 for (i, b) in bytes.iter().enumerate() {
297 if i >= w {
298 hash.remove(w, bytes[i - w]);
299 }
300 hash.update(*b);
301 if i > 0 && i % w == 0 {
302 assert_eq!(hash.hash(), 0x433a8772);
303 }
304 }
305 assert_eq!(hash.hash(), 0xbbba8772);
306 }
307 }
308