1 //! A minimal implementation of Adler32 for Rust.
2 //!
3 //! This provides the simple method adler32(), that exhausts a Read and
4 //! computes the Adler32 hash, as well as the RollingAdler32 struct, that can
5 //! build a hash byte-by-byte, allowing to 'forget' past bytes in a rolling
6 //! fashion.
7 //!
8 //! The adler32 code has been translated (as accurately as I could manage) from
9 //! the zlib implementation.
10
11 #[cfg(test)]
12 extern crate rand;
13
14 use std::io;
15
16 // adler32 algorithm and implementation taken from zlib; http://www.zlib.net/
17 // It was translated into Rust as accurately as I could manage
18 // The (slow) reference was taken from Wikipedia; https://en.wikipedia.org/
19
20 /* zlib.h -- interface of the 'zlib' general purpose compression library
21 version 1.2.8, April 28th, 2013
22
23 Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
24
25 This software is provided 'as-is', without any express or implied
26 warranty. In no event will the authors be held liable for any damages
27 arising from the use of this software.
28
29 Permission is granted to anyone to use this software for any purpose,
30 including commercial applications, and to alter it and redistribute it
31 freely, subject to the following restrictions:
32
33 1. The origin of this software must not be misrepresented; you must not
34 claim that you wrote the original software. If you use this software
35 in a product, an acknowledgment in the product documentation would be
36 appreciated but is not required.
37 2. Altered source versions must be plainly marked as such, and must not be
38 misrepresented as being the original software.
39 3. This notice may not be removed or altered from any source distribution.
40
41 Jean-loup Gailly Mark Adler
42 jloup@gzip.org madler@alumni.caltech.edu
43
44 */
45
46 // largest prime smaller than 65536
47 const BASE: u32 = 65521;
48
49 // NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
50 const NMAX: usize = 5552;
51
52 #[inline(always)]
do1(adler: &mut u32, sum2: &mut u32, buf: &[u8])53 fn do1(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
54 *adler += buf[0] as u32;
55 *sum2 += *adler;
56 }
57
58 #[inline(always)]
do2(adler: &mut u32, sum2: &mut u32, buf: &[u8])59 fn do2(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
60 do1(adler, sum2, &buf[0..1]);
61 do1(adler, sum2, &buf[1..2]);
62 }
63
64 #[inline(always)]
do4(adler: &mut u32, sum2: &mut u32, buf: &[u8])65 fn do4(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
66 do2(adler, sum2, &buf[0..2]);
67 do2(adler, sum2, &buf[2..4]);
68 }
69
70 #[inline(always)]
do8(adler: &mut u32, sum2: &mut u32, buf: &[u8])71 fn do8(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
72 do4(adler, sum2, &buf[0..4]);
73 do4(adler, sum2, &buf[4..8]);
74 }
75
76 #[inline(always)]
do16(adler: &mut u32, sum2: &mut u32, buf: &[u8])77 fn do16(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
78 do8(adler, sum2, &buf[0..8]);
79 do8(adler, sum2, &buf[8..16]);
80 }
81
82 /// A rolling version of the Adler32 hash, which can 'forget' past bytes.
83 ///
84 /// Calling remove() will update the hash to the value it would have if that
85 /// past byte had never been fed to the algorithm. This allows you to get the
86 /// hash of a rolling window very efficiently.
87 pub struct RollingAdler32 {
88 a: u32,
89 b: u32,
90 }
91
92 impl RollingAdler32 {
93 /// Creates an empty Adler32 context (with hash 1).
new() -> RollingAdler3294 pub fn new() -> RollingAdler32 {
95 Self::from_value(1)
96 }
97
98 /// Creates an Adler32 context with the given initial value.
from_value(adler32: u32) -> RollingAdler3299 pub fn from_value(adler32: u32) -> RollingAdler32 {
100 let a = adler32 & 0xFFFF;
101 let b = adler32 >> 16;
102 RollingAdler32 { a: a, b: b }
103 }
104
105 /// Convenience function initializing a context from the hash of a buffer.
from_buffer(buffer: &[u8]) -> RollingAdler32106 pub fn from_buffer(buffer: &[u8]) -> RollingAdler32 {
107 let mut hash = RollingAdler32::new();
108 hash.update_buffer(buffer);
109 hash
110 }
111
112 /// Returns the current hash.
hash(&self) -> u32113 pub fn hash(&self) -> u32 {
114 (self.b << 16) | self.a
115 }
116
117 /// Removes the given `byte` that was fed to the algorithm `size` bytes ago.
remove(&mut self, size: usize, byte: u8)118 pub fn remove(&mut self, size: usize, byte: u8) {
119 let byte = byte as u32;
120 self.a = (self.a + BASE - byte) % BASE;
121 self.b = ((self.b + BASE - 1)
122 .wrapping_add(BASE.wrapping_sub(size as u32)
123 .wrapping_mul(byte))) % BASE;
124 }
125
126 /// Feeds a new `byte` to the algorithm to update the hash.
update(&mut self, byte: u8)127 pub fn update(&mut self, byte: u8) {
128 let byte = byte as u32;
129 self.a = (self.a + byte) % BASE;
130 self.b = (self.b + self.a) % BASE;
131 }
132
133 /// Feeds a vector of bytes to the algorithm to update the hash.
update_buffer(&mut self, buffer: &[u8])134 pub fn update_buffer(&mut self, buffer: &[u8]) {
135 let len = buffer.len();
136
137 // in case user likes doing a byte at a time, keep it fast
138 if len == 1 {
139 self.update(buffer[0]);
140 return;
141 }
142
143 // in case short lengths are provided, keep it somewhat fast
144 if len < 16 {
145 for pos in 0..len {
146 self.a += buffer[pos] as u32;
147 self.b += self.a;
148 }
149 if self.a >= BASE {
150 self.a -= BASE;
151 }
152 self.b %= BASE;
153 return;
154 }
155
156 let mut pos = 0;
157
158 // do length NMAX blocks -- requires just one modulo operation;
159 while pos + NMAX <= len {
160 let end = pos + NMAX;
161 while pos < end {
162 // 16 sums unrolled
163 do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
164 pos += 16;
165 }
166 self.a %= BASE;
167 self.b %= BASE;
168 }
169
170 // do remaining bytes (less than NMAX, still just one modulo)
171 if pos < len { // avoid modulos if none remaining
172 while len - pos >= 16 {
173 do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
174 pos += 16;
175 }
176 while len - pos > 0 {
177 self.a += buffer[pos] as u32;
178 self.b += self.a;
179 pos += 1;
180 }
181 self.a %= BASE;
182 self.b %= BASE;
183 }
184 }
185 }
186
187 /// Consume a Read object and returns the Adler32 hash.
adler32<R: io::Read>(mut reader: R) -> io::Result<u32>188 pub fn adler32<R: io::Read>(mut reader: R) -> io::Result<u32> {
189 let mut hash = RollingAdler32::new();
190 let mut buffer = [0u8; NMAX];
191 let mut read = try!(reader.read(&mut buffer));
192 while read > 0 {
193 hash.update_buffer(&buffer[..read]);
194 read = try!(reader.read(&mut buffer));
195 }
196 Ok(hash.hash())
197 }
198
199 #[cfg(test)]
200 mod test {
201 use rand;
202 use rand::Rng;
203 use std::io;
204
205 use super::{BASE, adler32, RollingAdler32};
206
adler32_slow<R: io::Read>(reader: R) -> io::Result<u32>207 fn adler32_slow<R: io::Read>(reader: R) -> io::Result<u32> {
208 let mut a: u32 = 1;
209 let mut b: u32 = 0;
210
211 for byte in reader.bytes() {
212 let byte = try!(byte) as u32;
213 a = (a + byte) % BASE;
214 b = (b + a) % BASE;
215 }
216
217 Ok((b << 16) | a)
218 }
219
220 #[test]
testvectors()221 fn testvectors() {
222 fn do_test(v: u32, bytes: &[u8]) {
223 let mut hash = RollingAdler32::new();
224 hash.update_buffer(&bytes);
225 assert_eq!(hash.hash(), v);
226
227 let r = io::Cursor::new(bytes);
228 assert_eq!(adler32(r).unwrap(), v);
229 }
230 do_test(0x00000001, b"");
231 do_test(0x00620062, b"a");
232 do_test(0x024d0127, b"abc");
233 do_test(0x29750586, b"message digest");
234 do_test(0x90860b20, b"abcdefghijklmnopqrstuvwxyz");
235 do_test(0x8adb150c, b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
236 abcdefghijklmnopqrstuvwxyz\
237 0123456789");
238 do_test(0x97b61069, b"1234567890123456789012345678901234567890\
239 1234567890123456789012345678901234567890");
240 do_test(0xD6251498, &[255; 64000]);
241 }
242
243 #[test]
compare()244 fn compare() {
245 let mut rng = rand::thread_rng();
246 let mut data = vec![0u8; 5589];
247 for size in [0, 1, 3, 4, 5, 31, 32, 33, 67,
248 5550, 5552, 5553, 5568, 5584, 5589].iter().cloned() {
249 rng.fill_bytes(&mut data[..size]);
250 let r1 = io::Cursor::new(&data[..size]);
251 let r2 = r1.clone();
252 if adler32_slow(r1).unwrap() != adler32(r2).unwrap() {
253 panic!("Comparison failed, size={}", size);
254 }
255 }
256 }
257
258 #[test]
rolling()259 fn rolling() {
260 assert_eq!(RollingAdler32::from_value(0x01020304).hash(), 0x01020304);
261
262 fn do_test(a: &[u8], b: &[u8]) {
263 let mut total = Vec::with_capacity(a.len() + b.len());
264 total.extend(a);
265 total.extend(b);
266 let mut h = RollingAdler32::from_buffer(&total[..(b.len())]);
267 for i in 0..(a.len()) {
268 h.remove(b.len(), a[i]);
269 h.update(total[b.len() + i]);
270 }
271 assert_eq!(h.hash(), adler32(b).unwrap());
272 }
273 do_test(b"a", b"b");
274 do_test(b"", b"this a test");
275 do_test(b"th", b"is a test");
276 do_test(b"this a ", b"test");
277 }
278
279 #[test]
long_window_remove()280 fn long_window_remove() {
281 let mut hash = RollingAdler32::new();
282 let w = 65536;
283 assert!(w as u32 > BASE);
284
285 let mut bytes = vec![0; w*3];
286 for (i, b) in bytes.iter_mut().enumerate() {
287 *b = i as u8;
288 }
289
290 for (i, b) in bytes.iter().enumerate() {
291 if i >= w {
292 hash.remove(w, bytes[i - w]);
293 }
294 hash.update(*b);
295 if i > 0 && i % w == 0 {
296 assert_eq!(hash.hash(), 0x433a8772);
297 }
298 }
299 assert_eq!(hash.hash(), 0xbbba8772);
300 }
301 }
302