1 //! A minimal implementation of Adler32 for Rust.
2 //!
3 //! This provides the simple method adler32(), that exhausts a Read and
4 //! computes the Adler32 hash, as well as the RollingAdler32 struct, that can
5 //! build a hash byte-by-byte, allowing to 'forget' past bytes in a rolling
6 //! fashion.
7 //!
8 //! The adler32 code has been translated (as accurately as I could manage) from
9 //! the zlib implementation.
10 
11 #[cfg(test)]
12 extern crate rand;
13 
14 use std::io;
15 
16 // adler32 algorithm and implementation taken from zlib; http://www.zlib.net/
17 // It was translated into Rust as accurately as I could manage
18 // The (slow) reference was taken from Wikipedia; https://en.wikipedia.org/
19 
20 /* zlib.h -- interface of the 'zlib' general purpose compression library
21   version 1.2.8, April 28th, 2013
22 
23   Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
24 
25   This software is provided 'as-is', without any express or implied
26   warranty.  In no event will the authors be held liable for any damages
27   arising from the use of this software.
28 
29   Permission is granted to anyone to use this software for any purpose,
30   including commercial applications, and to alter it and redistribute it
31   freely, subject to the following restrictions:
32 
33   1. The origin of this software must not be misrepresented; you must not
34      claim that you wrote the original software. If you use this software
35      in a product, an acknowledgment in the product documentation would be
36      appreciated but is not required.
37   2. Altered source versions must be plainly marked as such, and must not be
38      misrepresented as being the original software.
39   3. This notice may not be removed or altered from any source distribution.
40 
41   Jean-loup Gailly        Mark Adler
42   jloup@gzip.org          madler@alumni.caltech.edu
43 
44 */
45 
46 // largest prime smaller than 65536
47 const BASE: u32 = 65521;
48 
49 // NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
50 const NMAX: usize = 5552;
51 
52 #[inline(always)]
do1(adler: &mut u32, sum2: &mut u32, buf: &[u8])53 fn do1(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
54     *adler += buf[0] as u32;
55     *sum2 += *adler;
56 }
57 
58 #[inline(always)]
do2(adler: &mut u32, sum2: &mut u32, buf: &[u8])59 fn do2(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
60     do1(adler, sum2, &buf[0..1]);
61     do1(adler, sum2, &buf[1..2]);
62 }
63 
64 #[inline(always)]
do4(adler: &mut u32, sum2: &mut u32, buf: &[u8])65 fn do4(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
66     do2(adler, sum2, &buf[0..2]);
67     do2(adler, sum2, &buf[2..4]);
68 }
69 
70 #[inline(always)]
do8(adler: &mut u32, sum2: &mut u32, buf: &[u8])71 fn do8(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
72     do4(adler, sum2, &buf[0..4]);
73     do4(adler, sum2, &buf[4..8]);
74 }
75 
76 #[inline(always)]
do16(adler: &mut u32, sum2: &mut u32, buf: &[u8])77 fn do16(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
78     do8(adler, sum2, &buf[0..8]);
79     do8(adler, sum2, &buf[8..16]);
80 }
81 
82 /// A rolling version of the Adler32 hash, which can 'forget' past bytes.
83 ///
84 /// Calling remove() will update the hash to the value it would have if that
85 /// past byte had never been fed to the algorithm. This allows you to get the
86 /// hash of a rolling window very efficiently.
87 pub struct RollingAdler32 {
88     a: u32,
89     b: u32,
90 }
91 
92 impl RollingAdler32 {
93     /// Creates an empty Adler32 context (with hash 1).
new() -> RollingAdler3294     pub fn new() -> RollingAdler32 {
95         Self::from_value(1)
96     }
97 
98     /// Creates an Adler32 context with the given initial value.
from_value(adler32: u32) -> RollingAdler3299     pub fn from_value(adler32: u32) -> RollingAdler32 {
100         let a = adler32 & 0xFFFF;
101         let b = adler32 >> 16;
102         RollingAdler32 { a: a, b: b }
103     }
104 
105     /// Convenience function initializing a context from the hash of a buffer.
from_buffer(buffer: &[u8]) -> RollingAdler32106     pub fn from_buffer(buffer: &[u8]) -> RollingAdler32 {
107         let mut hash = RollingAdler32::new();
108         hash.update_buffer(buffer);
109         hash
110     }
111 
112     /// Returns the current hash.
hash(&self) -> u32113     pub fn hash(&self) -> u32 {
114         (self.b << 16) | self.a
115     }
116 
117     /// Removes the given `byte` that was fed to the algorithm `size` bytes ago.
remove(&mut self, size: usize, byte: u8)118     pub fn remove(&mut self, size: usize, byte: u8) {
119         let byte = byte as u32;
120         self.a = (self.a + BASE - byte) % BASE;
121         self.b = ((self.b + BASE - 1)
122                       .wrapping_add(BASE.wrapping_sub(size as u32)
123                                         .wrapping_mul(byte))) % BASE;
124     }
125 
126     /// Feeds a new `byte` to the algorithm to update the hash.
update(&mut self, byte: u8)127     pub fn update(&mut self, byte: u8) {
128         let byte = byte as u32;
129         self.a = (self.a + byte) % BASE;
130         self.b = (self.b + self.a) % BASE;
131     }
132 
133     /// Feeds a vector of bytes to the algorithm to update the hash.
update_buffer(&mut self, buffer: &[u8])134     pub fn update_buffer(&mut self, buffer: &[u8]) {
135         let len = buffer.len();
136 
137         // in case user likes doing a byte at a time, keep it fast
138         if len == 1 {
139             self.update(buffer[0]);
140             return;
141         }
142 
143         // in case short lengths are provided, keep it somewhat fast
144         if len < 16 {
145             for pos in 0..len {
146                 self.a += buffer[pos] as u32;
147                 self.b += self.a;
148             }
149             if self.a >= BASE {
150                 self.a -= BASE;
151             }
152             self.b %= BASE;
153             return;
154         }
155 
156         let mut pos = 0;
157 
158         // do length NMAX blocks -- requires just one modulo operation;
159         while pos + NMAX <= len {
160             let end = pos + NMAX;
161             while pos < end {
162                 // 16 sums unrolled
163                 do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
164                 pos += 16;
165             }
166             self.a %= BASE;
167             self.b %= BASE;
168         }
169 
170         // do remaining bytes (less than NMAX, still just one modulo)
171         if pos < len { // avoid modulos if none remaining
172             while len - pos >= 16 {
173                 do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
174                 pos += 16;
175             }
176             while len - pos > 0 {
177                 self.a += buffer[pos] as u32;
178                 self.b += self.a;
179                 pos += 1;
180             }
181             self.a %= BASE;
182             self.b %= BASE;
183         }
184     }
185 }
186 
187 /// Consume a Read object and returns the Adler32 hash.
adler32<R: io::Read>(mut reader: R) -> io::Result<u32>188 pub fn adler32<R: io::Read>(mut reader: R) -> io::Result<u32> {
189     let mut hash = RollingAdler32::new();
190     let mut buffer = [0u8; NMAX];
191     let mut read = try!(reader.read(&mut buffer));
192     while read > 0 {
193         hash.update_buffer(&buffer[..read]);
194         read = try!(reader.read(&mut buffer));
195     }
196     Ok(hash.hash())
197 }
198 
199 #[cfg(test)]
200 mod test {
201     use rand;
202     use rand::Rng;
203     use std::io;
204 
205     use super::{BASE, adler32, RollingAdler32};
206 
adler32_slow<R: io::Read>(reader: R) -> io::Result<u32>207     fn adler32_slow<R: io::Read>(reader: R) -> io::Result<u32> {
208         let mut a: u32 = 1;
209         let mut b: u32 = 0;
210 
211         for byte in reader.bytes() {
212             let byte = try!(byte) as u32;
213             a = (a + byte) % BASE;
214             b = (b + a) % BASE;
215         }
216 
217         Ok((b << 16) | a)
218     }
219 
220     #[test]
testvectors()221     fn testvectors() {
222         fn do_test(v: u32, bytes: &[u8]) {
223             let mut hash = RollingAdler32::new();
224             hash.update_buffer(&bytes);
225             assert_eq!(hash.hash(), v);
226 
227             let r = io::Cursor::new(bytes);
228             assert_eq!(adler32(r).unwrap(), v);
229         }
230         do_test(0x00000001, b"");
231         do_test(0x00620062, b"a");
232         do_test(0x024d0127, b"abc");
233         do_test(0x29750586, b"message digest");
234         do_test(0x90860b20, b"abcdefghijklmnopqrstuvwxyz");
235         do_test(0x8adb150c, b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
236                               abcdefghijklmnopqrstuvwxyz\
237                               0123456789");
238         do_test(0x97b61069, b"1234567890123456789012345678901234567890\
239                               1234567890123456789012345678901234567890");
240         do_test(0xD6251498, &[255; 64000]);
241     }
242 
243     #[test]
compare()244     fn compare() {
245         let mut rng = rand::thread_rng();
246         let mut data = vec![0u8; 5589];
247         for size in [0, 1, 3, 4, 5, 31, 32, 33, 67,
248                      5550, 5552, 5553, 5568, 5584, 5589].iter().cloned() {
249             rng.fill_bytes(&mut data[..size]);
250             let r1 = io::Cursor::new(&data[..size]);
251             let r2 = r1.clone();
252             if adler32_slow(r1).unwrap() != adler32(r2).unwrap() {
253                 panic!("Comparison failed, size={}", size);
254             }
255         }
256     }
257 
258     #[test]
rolling()259     fn rolling() {
260         assert_eq!(RollingAdler32::from_value(0x01020304).hash(), 0x01020304);
261 
262         fn do_test(a: &[u8], b: &[u8]) {
263             let mut total = Vec::with_capacity(a.len() + b.len());
264             total.extend(a);
265             total.extend(b);
266             let mut h = RollingAdler32::from_buffer(&total[..(b.len())]);
267             for i in 0..(a.len()) {
268                 h.remove(b.len(), a[i]);
269                 h.update(total[b.len() + i]);
270             }
271             assert_eq!(h.hash(), adler32(b).unwrap());
272         }
273         do_test(b"a", b"b");
274         do_test(b"", b"this a test");
275         do_test(b"th", b"is a test");
276         do_test(b"this a ", b"test");
277     }
278 
279     #[test]
long_window_remove()280     fn long_window_remove() {
281         let mut hash = RollingAdler32::new();
282         let w = 65536;
283         assert!(w as u32 > BASE);
284 
285         let mut bytes = vec![0; w*3];
286         for (i, b) in bytes.iter_mut().enumerate() {
287             *b = i as u8;
288         }
289 
290         for (i, b) in bytes.iter().enumerate() {
291             if i >= w {
292                 hash.remove(w, bytes[i - w]);
293             }
294             hash.update(*b);
295             if i > 0 && i % w == 0 {
296                 assert_eq!(hash.hash(), 0x433a8772);
297             }
298         }
299         assert_eq!(hash.hash(), 0xbbba8772);
300     }
301 }
302