1 //! A minimal implementation of Adler32 for Rust.
2 //!
3 //! This provides the simple method adler32(), that exhausts a Read and
4 //! computes the Adler32 hash, as well as the RollingAdler32 struct, that can
5 //! build a hash byte-by-byte, allowing to 'forget' past bytes in a rolling
6 //! fashion.
7 //!
8 //! The adler32 code has been translated (as accurately as I could manage) from
9 //! the zlib implementation.
10 
11 #![forbid(unsafe_code)]
12 #![cfg_attr(not(feature = "std"), no_std)]
13 
14 
15 // adler32 algorithm and implementation taken from zlib; http://www.zlib.net/
16 // It was translated into Rust as accurately as I could manage
17 // The (slow) reference was taken from Wikipedia; https://en.wikipedia.org/
18 
19 /* zlib.h -- interface of the 'zlib' general purpose compression library
20   version 1.2.8, April 28th, 2013
21 
22   Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
23 
24   This software is provided 'as-is', without any express or implied
25   warranty.  In no event will the authors be held liable for any damages
26   arising from the use of this software.
27 
28   Permission is granted to anyone to use this software for any purpose,
29   including commercial applications, and to alter it and redistribute it
30   freely, subject to the following restrictions:
31 
32   1. The origin of this software must not be misrepresented; you must not
33      claim that you wrote the original software. If you use this software
34      in a product, an acknowledgment in the product documentation would be
35      appreciated but is not required.
36   2. Altered source versions must be plainly marked as such, and must not be
37      misrepresented as being the original software.
38   3. This notice may not be removed or altered from any source distribution.
39 
40   Jean-loup Gailly        Mark Adler
41   jloup@gzip.org          madler@alumni.caltech.edu
42 
43 */
44 
45 // largest prime smaller than 65536
46 const BASE: u32 = 65521;
47 
48 // NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
49 const NMAX: usize = 5552;
50 
51 #[inline(always)]
do1(adler: &mut u32, sum2: &mut u32, buf: &[u8])52 fn do1(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
53     *adler += u32::from(buf[0]);
54     *sum2 += *adler;
55 }
56 
57 #[inline(always)]
do2(adler: &mut u32, sum2: &mut u32, buf: &[u8])58 fn do2(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
59     do1(adler, sum2, &buf[0..1]);
60     do1(adler, sum2, &buf[1..2]);
61 }
62 
63 #[inline(always)]
do4(adler: &mut u32, sum2: &mut u32, buf: &[u8])64 fn do4(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
65     do2(adler, sum2, &buf[0..2]);
66     do2(adler, sum2, &buf[2..4]);
67 }
68 
69 #[inline(always)]
do8(adler: &mut u32, sum2: &mut u32, buf: &[u8])70 fn do8(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
71     do4(adler, sum2, &buf[0..4]);
72     do4(adler, sum2, &buf[4..8]);
73 }
74 
75 #[inline(always)]
do16(adler: &mut u32, sum2: &mut u32, buf: &[u8])76 fn do16(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
77     do8(adler, sum2, &buf[0..8]);
78     do8(adler, sum2, &buf[8..16]);
79 }
80 
81 /// A rolling version of the Adler32 hash, which can 'forget' past bytes.
82 ///
83 /// Calling remove() will update the hash to the value it would have if that
84 /// past byte had never been fed to the algorithm. This allows you to get the
85 /// hash of a rolling window very efficiently.
86 #[derive(Clone)]
87 pub struct RollingAdler32 {
88     a: u32,
89     b: u32,
90 }
91 
92 impl Default for RollingAdler32 {
default() -> RollingAdler3293     fn default() -> RollingAdler32 {
94         RollingAdler32::new()
95     }
96 }
97 
98 impl RollingAdler32 {
99     /// Creates an empty Adler32 context (with hash 1).
new() -> RollingAdler32100     pub fn new() -> RollingAdler32 {
101         Self::from_value(1)
102     }
103 
104     /// Creates an Adler32 context with the given initial value.
from_value(adler32: u32) -> RollingAdler32105     pub fn from_value(adler32: u32) -> RollingAdler32 {
106         let a = adler32 & 0xFFFF;
107         let b = adler32 >> 16;
108         RollingAdler32 { a, b }
109     }
110 
111     /// Convenience function initializing a context from the hash of a buffer.
from_buffer(buffer: &[u8]) -> RollingAdler32112     pub fn from_buffer(buffer: &[u8]) -> RollingAdler32 {
113         let mut hash = RollingAdler32::new();
114         hash.update_buffer(buffer);
115         hash
116     }
117 
118     /// Returns the current hash.
hash(&self) -> u32119     pub fn hash(&self) -> u32 {
120         (self.b << 16) | self.a
121     }
122 
123     /// Removes the given `byte` that was fed to the algorithm `size` bytes ago.
remove(&mut self, size: usize, byte: u8)124     pub fn remove(&mut self, size: usize, byte: u8) {
125         let byte = u32::from(byte);
126         self.a = (self.a + BASE - byte) % BASE;
127         self.b = ((self.b + BASE - 1)
128             .wrapping_add(BASE.wrapping_sub(size as u32).wrapping_mul(byte)))
129             % BASE;
130     }
131 
132     /// Feeds a new `byte` to the algorithm to update the hash.
update(&mut self, byte: u8)133     pub fn update(&mut self, byte: u8) {
134         let byte = u32::from(byte);
135         self.a = (self.a + byte) % BASE;
136         self.b = (self.b + self.a) % BASE;
137     }
138 
139     /// Feeds a vector of bytes to the algorithm to update the hash.
update_buffer(&mut self, buffer: &[u8])140     pub fn update_buffer(&mut self, buffer: &[u8]) {
141         let len = buffer.len();
142 
143         // in case user likes doing a byte at a time, keep it fast
144         if len == 1 {
145             self.update(buffer[0]);
146             return;
147         }
148 
149         // in case short lengths are provided, keep it somewhat fast
150         if len < 16 {
151             for byte in buffer.iter().take(len) {
152                 self.a += u32::from(*byte);
153                 self.b += self.a;
154             }
155             if self.a >= BASE {
156                 self.a -= BASE;
157             }
158             self.b %= BASE;
159             return;
160         }
161 
162         let mut pos = 0;
163 
164         // do length NMAX blocks -- requires just one modulo operation;
165         while pos + NMAX <= len {
166             let end = pos + NMAX;
167             while pos < end {
168                 // 16 sums unrolled
169                 do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
170                 pos += 16;
171             }
172             self.a %= BASE;
173             self.b %= BASE;
174         }
175 
176         // do remaining bytes (less than NMAX, still just one modulo)
177         if pos < len {
178             // avoid modulos if none remaining
179             while len - pos >= 16 {
180                 do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
181                 pos += 16;
182             }
183             while len - pos > 0 {
184                 self.a += u32::from(buffer[pos]);
185                 self.b += self.a;
186                 pos += 1;
187             }
188             self.a %= BASE;
189             self.b %= BASE;
190         }
191     }
192 }
193 
194 /// Consume a Read object and returns the Adler32 hash.
195 #[cfg(feature = "std")]
adler32<R: std::io::Read>(mut reader: R) -> std::io::Result<u32>196 pub fn adler32<R: std::io::Read>(mut reader: R) -> std::io::Result<u32> {
197     let mut hash = RollingAdler32::new();
198     let mut buffer = [0u8; NMAX];
199     let mut read = reader.read(&mut buffer)?;
200     while read > 0 {
201         hash.update_buffer(&buffer[..read]);
202         read = reader.read(&mut buffer)?;
203     }
204     Ok(hash.hash())
205 }
206 
207 #[cfg(test)]
208 mod test {
209     use rand::Rng;
210     use std::io;
211     #[cfg(target_arch = "wasm32")]
212     use wasm_bindgen_test::wasm_bindgen_test;
213 
214     use super::{adler32, RollingAdler32, BASE};
215 
adler32_slow<R: io::Read>(reader: R) -> io::Result<u32>216     fn adler32_slow<R: io::Read>(reader: R) -> io::Result<u32> {
217         let mut a: u32 = 1;
218         let mut b: u32 = 0;
219 
220         for byte in reader.bytes() {
221             let byte = byte? as u32;
222             a = (a + byte) % BASE;
223             b = (b + a) % BASE;
224         }
225 
226         Ok((b << 16) | a)
227     }
228 
229     #[test]
230     #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
testvectors()231     fn testvectors() {
232         fn do_test(v: u32, bytes: &[u8]) {
233             let mut hash = RollingAdler32::new();
234             hash.update_buffer(&bytes);
235             assert_eq!(hash.hash(), v);
236 
237             let r = io::Cursor::new(bytes);
238             assert_eq!(adler32(r).unwrap(), v);
239         }
240         do_test(0x00000001, b"");
241         do_test(0x00620062, b"a");
242         do_test(0x024d0127, b"abc");
243         do_test(0x29750586, b"message digest");
244         do_test(0x90860b20, b"abcdefghijklmnopqrstuvwxyz");
245         do_test(
246             0x8adb150c,
247             b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
248                               abcdefghijklmnopqrstuvwxyz\
249                               0123456789",
250         );
251         do_test(
252             0x97b61069,
253             b"1234567890123456789012345678901234567890\
254                               1234567890123456789012345678901234567890",
255         );
256         do_test(0xD6251498, &[255; 64000]);
257     }
258 
259     #[test]
260     #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
compare()261     fn compare() {
262         let mut rng = rand::thread_rng();
263         let mut data = vec![0u8; 5589];
264         for size in [
265             0, 1, 3, 4, 5, 31, 32, 33, 67, 5550, 5552, 5553, 5568, 5584, 5589,
266         ]
267         .iter()
268         .cloned()
269         {
270             rng.fill(&mut data[..size]);
271             let r1 = io::Cursor::new(&data[..size]);
272             let r2 = r1.clone();
273             if adler32_slow(r1).unwrap() != adler32(r2).unwrap() {
274                 panic!("Comparison failed, size={}", size);
275             }
276         }
277     }
278 
279     #[test]
280     #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
rolling()281     fn rolling() {
282         assert_eq!(RollingAdler32::from_value(0x01020304).hash(), 0x01020304);
283 
284         fn do_test(a: &[u8], b: &[u8]) {
285             let mut total = Vec::with_capacity(a.len() + b.len());
286             total.extend(a);
287             total.extend(b);
288             let mut h = RollingAdler32::from_buffer(&total[..(b.len())]);
289             for i in 0..(a.len()) {
290                 h.remove(b.len(), a[i]);
291                 h.update(total[b.len() + i]);
292             }
293             assert_eq!(h.hash(), adler32(b).unwrap());
294         }
295         do_test(b"a", b"b");
296         do_test(b"", b"this a test");
297         do_test(b"th", b"is a test");
298         do_test(b"this a ", b"test");
299     }
300 
301     #[test]
302     #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
long_window_remove()303     fn long_window_remove() {
304         let mut hash = RollingAdler32::new();
305         let w = 65536;
306         assert!(w as u32 > BASE);
307 
308         let mut bytes = vec![0; w * 3];
309         for (i, b) in bytes.iter_mut().enumerate() {
310             *b = i as u8;
311         }
312 
313         for (i, b) in bytes.iter().enumerate() {
314             if i >= w {
315                 hash.remove(w, bytes[i - w]);
316             }
317             hash.update(*b);
318             if i > 0 && i % w == 0 {
319                 assert_eq!(hash.hash(), 0x433a8772);
320             }
321         }
322         assert_eq!(hash.hash(), 0xbbba8772);
323     }
324 }
325