1 //! A minimal implementation of Adler32 for Rust.
2 //!
3 //! This provides the simple method adler32(), that exhausts a Read and
4 //! computes the Adler32 hash, as well as the RollingAdler32 struct, that can
5 //! build a hash byte-by-byte, allowing to 'forget' past bytes in a rolling
6 //! fashion.
7 //!
8 //! The adler32 code has been translated (as accurately as I could manage) from
9 //! the zlib implementation.
10 
11 #[cfg(test)]
12 extern crate rand;
13 
14 use std::io;
15 
16 // adler32 algorithm and implementation taken from zlib; http://www.zlib.net/
17 // It was translated into Rust as accurately as I could manage
18 // The (slow) reference was taken from Wikipedia; https://en.wikipedia.org/
19 
20 /* zlib.h -- interface of the 'zlib' general purpose compression library
21   version 1.2.8, April 28th, 2013
22 
23   Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
24 
25   This software is provided 'as-is', without any express or implied
26   warranty.  In no event will the authors be held liable for any damages
27   arising from the use of this software.
28 
29   Permission is granted to anyone to use this software for any purpose,
30   including commercial applications, and to alter it and redistribute it
31   freely, subject to the following restrictions:
32 
33   1. The origin of this software must not be misrepresented; you must not
34      claim that you wrote the original software. If you use this software
35      in a product, an acknowledgment in the product documentation would be
36      appreciated but is not required.
37   2. Altered source versions must be plainly marked as such, and must not be
38      misrepresented as being the original software.
39   3. This notice may not be removed or altered from any source distribution.
40 
41   Jean-loup Gailly        Mark Adler
42   jloup@gzip.org          madler@alumni.caltech.edu
43 
44 */
45 
46 // largest prime smaller than 65536
47 const BASE: u32 = 65521;
48 
49 // NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
50 const NMAX: usize = 5552;
51 
52 #[inline(always)]
do1(adler: &mut u32, sum2: &mut u32, buf: &[u8])53 fn do1(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
54     *adler += u32::from(buf[0]);
55     *sum2 += *adler;
56 }
57 
58 #[inline(always)]
do2(adler: &mut u32, sum2: &mut u32, buf: &[u8])59 fn do2(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
60     do1(adler, sum2, &buf[0..1]);
61     do1(adler, sum2, &buf[1..2]);
62 }
63 
64 #[inline(always)]
do4(adler: &mut u32, sum2: &mut u32, buf: &[u8])65 fn do4(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
66     do2(adler, sum2, &buf[0..2]);
67     do2(adler, sum2, &buf[2..4]);
68 }
69 
70 #[inline(always)]
do8(adler: &mut u32, sum2: &mut u32, buf: &[u8])71 fn do8(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
72     do4(adler, sum2, &buf[0..4]);
73     do4(adler, sum2, &buf[4..8]);
74 }
75 
76 #[inline(always)]
do16(adler: &mut u32, sum2: &mut u32, buf: &[u8])77 fn do16(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
78     do8(adler, sum2, &buf[0..8]);
79     do8(adler, sum2, &buf[8..16]);
80 }
81 
82 /// A rolling version of the Adler32 hash, which can 'forget' past bytes.
83 ///
84 /// Calling remove() will update the hash to the value it would have if that
85 /// past byte had never been fed to the algorithm. This allows you to get the
86 /// hash of a rolling window very efficiently.
87 pub struct RollingAdler32 {
88     a: u32,
89     b: u32,
90 }
91 
92 impl Default for RollingAdler32 {
default() -> RollingAdler3293     fn default() -> RollingAdler32 {
94         RollingAdler32::new()
95     }
96 }
97 
98 impl RollingAdler32 {
99     /// Creates an empty Adler32 context (with hash 1).
new() -> RollingAdler32100     pub fn new() -> RollingAdler32 {
101         Self::from_value(1)
102     }
103 
104     /// Creates an Adler32 context with the given initial value.
from_value(adler32: u32) -> RollingAdler32105     pub fn from_value(adler32: u32) -> RollingAdler32 {
106         let a = adler32 & 0xFFFF;
107         let b = adler32 >> 16;
108         RollingAdler32 { a, b }
109     }
110 
111     /// Convenience function initializing a context from the hash of a buffer.
from_buffer(buffer: &[u8]) -> RollingAdler32112     pub fn from_buffer(buffer: &[u8]) -> RollingAdler32 {
113         let mut hash = RollingAdler32::new();
114         hash.update_buffer(buffer);
115         hash
116     }
117 
118     /// Returns the current hash.
hash(&self) -> u32119     pub fn hash(&self) -> u32 {
120         (self.b << 16) | self.a
121     }
122 
123     /// Removes the given `byte` that was fed to the algorithm `size` bytes ago.
remove(&mut self, size: usize, byte: u8)124     pub fn remove(&mut self, size: usize, byte: u8) {
125         let byte = u32::from(byte);
126         self.a = (self.a + BASE - byte) % BASE;
127         self.b = ((self.b + BASE - 1)
128                       .wrapping_add(BASE.wrapping_sub(size as u32)
129                                         .wrapping_mul(byte))) % BASE;
130     }
131 
132     /// Feeds a new `byte` to the algorithm to update the hash.
update(&mut self, byte: u8)133     pub fn update(&mut self, byte: u8) {
134         let byte = u32::from(byte);
135         self.a = (self.a + byte) % BASE;
136         self.b = (self.b + self.a) % BASE;
137     }
138 
139     /// Feeds a vector of bytes to the algorithm to update the hash.
update_buffer(&mut self, buffer: &[u8])140     pub fn update_buffer(&mut self, buffer: &[u8]) {
141         let len = buffer.len();
142 
143         // in case user likes doing a byte at a time, keep it fast
144         if len == 1 {
145             self.update(buffer[0]);
146             return;
147         }
148 
149         // in case short lengths are provided, keep it somewhat fast
150         if len < 16 {
151             for byte in buffer.iter().take(len) {
152                 self.a += u32::from(*byte);
153                 self.b += self.a;
154             }
155             if self.a >= BASE {
156                 self.a -= BASE;
157             }
158             self.b %= BASE;
159             return;
160         }
161 
162         let mut pos = 0;
163 
164         // do length NMAX blocks -- requires just one modulo operation;
165         while pos + NMAX <= len {
166             let end = pos + NMAX;
167             while pos < end {
168                 // 16 sums unrolled
169                 do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
170                 pos += 16;
171             }
172             self.a %= BASE;
173             self.b %= BASE;
174         }
175 
176         // do remaining bytes (less than NMAX, still just one modulo)
177         if pos < len { // avoid modulos if none remaining
178             while len - pos >= 16 {
179                 do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
180                 pos += 16;
181             }
182             while len - pos > 0 {
183                 self.a += u32::from(buffer[pos]);
184                 self.b += self.a;
185                 pos += 1;
186             }
187             self.a %= BASE;
188             self.b %= BASE;
189         }
190     }
191 }
192 
193 /// Consume a Read object and returns the Adler32 hash.
adler32<R: io::Read>(mut reader: R) -> io::Result<u32>194 pub fn adler32<R: io::Read>(mut reader: R) -> io::Result<u32> {
195     let mut hash = RollingAdler32::new();
196     let mut buffer = [0u8; NMAX];
197     let mut read = try!(reader.read(&mut buffer));
198     while read > 0 {
199         hash.update_buffer(&buffer[..read]);
200         read = try!(reader.read(&mut buffer));
201     }
202     Ok(hash.hash())
203 }
204 
205 #[cfg(test)]
206 mod test {
207     use rand;
208     use rand::Rng;
209     use std::io;
210 
211     use super::{BASE, adler32, RollingAdler32};
212 
adler32_slow<R: io::Read>(reader: R) -> io::Result<u32>213     fn adler32_slow<R: io::Read>(reader: R) -> io::Result<u32> {
214         let mut a: u32 = 1;
215         let mut b: u32 = 0;
216 
217         for byte in reader.bytes() {
218             let byte = try!(byte) as u32;
219             a = (a + byte) % BASE;
220             b = (b + a) % BASE;
221         }
222 
223         Ok((b << 16) | a)
224     }
225 
226     #[test]
testvectors()227     fn testvectors() {
228         fn do_test(v: u32, bytes: &[u8]) {
229             let mut hash = RollingAdler32::new();
230             hash.update_buffer(&bytes);
231             assert_eq!(hash.hash(), v);
232 
233             let r = io::Cursor::new(bytes);
234             assert_eq!(adler32(r).unwrap(), v);
235         }
236         do_test(0x00000001, b"");
237         do_test(0x00620062, b"a");
238         do_test(0x024d0127, b"abc");
239         do_test(0x29750586, b"message digest");
240         do_test(0x90860b20, b"abcdefghijklmnopqrstuvwxyz");
241         do_test(0x8adb150c, b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
242                               abcdefghijklmnopqrstuvwxyz\
243                               0123456789");
244         do_test(0x97b61069, b"1234567890123456789012345678901234567890\
245                               1234567890123456789012345678901234567890");
246         do_test(0xD6251498, &[255; 64000]);
247     }
248 
249     #[test]
compare()250     fn compare() {
251         let mut rng = rand::thread_rng();
252         let mut data = vec![0u8; 5589];
253         for size in [0, 1, 3, 4, 5, 31, 32, 33, 67,
254                      5550, 5552, 5553, 5568, 5584, 5589].iter().cloned() {
255             rng.fill_bytes(&mut data[..size]);
256             let r1 = io::Cursor::new(&data[..size]);
257             let r2 = r1.clone();
258             if adler32_slow(r1).unwrap() != adler32(r2).unwrap() {
259                 panic!("Comparison failed, size={}", size);
260             }
261         }
262     }
263 
264     #[test]
rolling()265     fn rolling() {
266         assert_eq!(RollingAdler32::from_value(0x01020304).hash(), 0x01020304);
267 
268         fn do_test(a: &[u8], b: &[u8]) {
269             let mut total = Vec::with_capacity(a.len() + b.len());
270             total.extend(a);
271             total.extend(b);
272             let mut h = RollingAdler32::from_buffer(&total[..(b.len())]);
273             for i in 0..(a.len()) {
274                 h.remove(b.len(), a[i]);
275                 h.update(total[b.len() + i]);
276             }
277             assert_eq!(h.hash(), adler32(b).unwrap());
278         }
279         do_test(b"a", b"b");
280         do_test(b"", b"this a test");
281         do_test(b"th", b"is a test");
282         do_test(b"this a ", b"test");
283     }
284 
285     #[test]
long_window_remove()286     fn long_window_remove() {
287         let mut hash = RollingAdler32::new();
288         let w = 65536;
289         assert!(w as u32 > BASE);
290 
291         let mut bytes = vec![0; w*3];
292         for (i, b) in bytes.iter_mut().enumerate() {
293             *b = i as u8;
294         }
295 
296         for (i, b) in bytes.iter().enumerate() {
297             if i >= w {
298                 hash.remove(w, bytes[i - w]);
299             }
300             hash.update(*b);
301             if i > 0 && i % w == 0 {
302                 assert_eq!(hash.hash(), 0x433a8772);
303             }
304         }
305         assert_eq!(hash.hash(), 0xbbba8772);
306     }
307 }
308