1 //! A minimal implementation of Adler32 for Rust.
2 //!
3 //! This provides the simple method adler32(), that exhausts a Read and
4 //! computes the Adler32 hash, as well as the RollingAdler32 struct, that can
5 //! build a hash byte-by-byte, allowing to 'forget' past bytes in a rolling
6 //! fashion.
7 //!
8 //! The adler32 code has been translated (as accurately as I could manage) from
9 //! the zlib implementation.
10
11 #![forbid(unsafe_code)]
12 #![cfg_attr(not(feature = "std"), no_std)]
13
14
15 // adler32 algorithm and implementation taken from zlib; http://www.zlib.net/
16 // It was translated into Rust as accurately as I could manage
17 // The (slow) reference was taken from Wikipedia; https://en.wikipedia.org/
18
19 /* zlib.h -- interface of the 'zlib' general purpose compression library
20 version 1.2.8, April 28th, 2013
21
22 Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
23
24 This software is provided 'as-is', without any express or implied
25 warranty. In no event will the authors be held liable for any damages
26 arising from the use of this software.
27
28 Permission is granted to anyone to use this software for any purpose,
29 including commercial applications, and to alter it and redistribute it
30 freely, subject to the following restrictions:
31
32 1. The origin of this software must not be misrepresented; you must not
33 claim that you wrote the original software. If you use this software
34 in a product, an acknowledgment in the product documentation would be
35 appreciated but is not required.
36 2. Altered source versions must be plainly marked as such, and must not be
37 misrepresented as being the original software.
38 3. This notice may not be removed or altered from any source distribution.
39
40 Jean-loup Gailly Mark Adler
41 jloup@gzip.org madler@alumni.caltech.edu
42
43 */
44
45 // largest prime smaller than 65536
46 const BASE: u32 = 65521;
47
48 // NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
49 const NMAX: usize = 5552;
50
51 #[inline(always)]
do1(adler: &mut u32, sum2: &mut u32, buf: &[u8])52 fn do1(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
53 *adler += u32::from(buf[0]);
54 *sum2 += *adler;
55 }
56
57 #[inline(always)]
do2(adler: &mut u32, sum2: &mut u32, buf: &[u8])58 fn do2(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
59 do1(adler, sum2, &buf[0..1]);
60 do1(adler, sum2, &buf[1..2]);
61 }
62
63 #[inline(always)]
do4(adler: &mut u32, sum2: &mut u32, buf: &[u8])64 fn do4(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
65 do2(adler, sum2, &buf[0..2]);
66 do2(adler, sum2, &buf[2..4]);
67 }
68
69 #[inline(always)]
do8(adler: &mut u32, sum2: &mut u32, buf: &[u8])70 fn do8(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
71 do4(adler, sum2, &buf[0..4]);
72 do4(adler, sum2, &buf[4..8]);
73 }
74
75 #[inline(always)]
do16(adler: &mut u32, sum2: &mut u32, buf: &[u8])76 fn do16(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
77 do8(adler, sum2, &buf[0..8]);
78 do8(adler, sum2, &buf[8..16]);
79 }
80
81 /// A rolling version of the Adler32 hash, which can 'forget' past bytes.
82 ///
83 /// Calling remove() will update the hash to the value it would have if that
84 /// past byte had never been fed to the algorithm. This allows you to get the
85 /// hash of a rolling window very efficiently.
86 #[derive(Clone)]
87 pub struct RollingAdler32 {
88 a: u32,
89 b: u32,
90 }
91
92 impl Default for RollingAdler32 {
default() -> RollingAdler3293 fn default() -> RollingAdler32 {
94 RollingAdler32::new()
95 }
96 }
97
98 impl RollingAdler32 {
99 /// Creates an empty Adler32 context (with hash 1).
new() -> RollingAdler32100 pub fn new() -> RollingAdler32 {
101 Self::from_value(1)
102 }
103
104 /// Creates an Adler32 context with the given initial value.
from_value(adler32: u32) -> RollingAdler32105 pub fn from_value(adler32: u32) -> RollingAdler32 {
106 let a = adler32 & 0xFFFF;
107 let b = adler32 >> 16;
108 RollingAdler32 { a, b }
109 }
110
111 /// Convenience function initializing a context from the hash of a buffer.
from_buffer(buffer: &[u8]) -> RollingAdler32112 pub fn from_buffer(buffer: &[u8]) -> RollingAdler32 {
113 let mut hash = RollingAdler32::new();
114 hash.update_buffer(buffer);
115 hash
116 }
117
118 /// Returns the current hash.
hash(&self) -> u32119 pub fn hash(&self) -> u32 {
120 (self.b << 16) | self.a
121 }
122
123 /// Removes the given `byte` that was fed to the algorithm `size` bytes ago.
remove(&mut self, size: usize, byte: u8)124 pub fn remove(&mut self, size: usize, byte: u8) {
125 let byte = u32::from(byte);
126 self.a = (self.a + BASE - byte) % BASE;
127 self.b = ((self.b + BASE - 1)
128 .wrapping_add(BASE.wrapping_sub(size as u32).wrapping_mul(byte)))
129 % BASE;
130 }
131
132 /// Feeds a new `byte` to the algorithm to update the hash.
update(&mut self, byte: u8)133 pub fn update(&mut self, byte: u8) {
134 let byte = u32::from(byte);
135 self.a = (self.a + byte) % BASE;
136 self.b = (self.b + self.a) % BASE;
137 }
138
139 /// Feeds a vector of bytes to the algorithm to update the hash.
update_buffer(&mut self, buffer: &[u8])140 pub fn update_buffer(&mut self, buffer: &[u8]) {
141 let len = buffer.len();
142
143 // in case user likes doing a byte at a time, keep it fast
144 if len == 1 {
145 self.update(buffer[0]);
146 return;
147 }
148
149 // in case short lengths are provided, keep it somewhat fast
150 if len < 16 {
151 for byte in buffer.iter().take(len) {
152 self.a += u32::from(*byte);
153 self.b += self.a;
154 }
155 if self.a >= BASE {
156 self.a -= BASE;
157 }
158 self.b %= BASE;
159 return;
160 }
161
162 let mut pos = 0;
163
164 // do length NMAX blocks -- requires just one modulo operation;
165 while pos + NMAX <= len {
166 let end = pos + NMAX;
167 while pos < end {
168 // 16 sums unrolled
169 do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
170 pos += 16;
171 }
172 self.a %= BASE;
173 self.b %= BASE;
174 }
175
176 // do remaining bytes (less than NMAX, still just one modulo)
177 if pos < len {
178 // avoid modulos if none remaining
179 while len - pos >= 16 {
180 do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
181 pos += 16;
182 }
183 while len - pos > 0 {
184 self.a += u32::from(buffer[pos]);
185 self.b += self.a;
186 pos += 1;
187 }
188 self.a %= BASE;
189 self.b %= BASE;
190 }
191 }
192 }
193
194 /// Consume a Read object and returns the Adler32 hash.
195 #[cfg(feature = "std")]
adler32<R: std::io::Read>(mut reader: R) -> std::io::Result<u32>196 pub fn adler32<R: std::io::Read>(mut reader: R) -> std::io::Result<u32> {
197 let mut hash = RollingAdler32::new();
198 let mut buffer = [0u8; NMAX];
199 let mut read = reader.read(&mut buffer)?;
200 while read > 0 {
201 hash.update_buffer(&buffer[..read]);
202 read = reader.read(&mut buffer)?;
203 }
204 Ok(hash.hash())
205 }
206
207 #[cfg(test)]
208 mod test {
209 use rand::Rng;
210 use std::io;
211 #[cfg(target_arch = "wasm32")]
212 use wasm_bindgen_test::wasm_bindgen_test;
213
214 use super::{adler32, RollingAdler32, BASE};
215
adler32_slow<R: io::Read>(reader: R) -> io::Result<u32>216 fn adler32_slow<R: io::Read>(reader: R) -> io::Result<u32> {
217 let mut a: u32 = 1;
218 let mut b: u32 = 0;
219
220 for byte in reader.bytes() {
221 let byte = byte? as u32;
222 a = (a + byte) % BASE;
223 b = (b + a) % BASE;
224 }
225
226 Ok((b << 16) | a)
227 }
228
229 #[test]
230 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
testvectors()231 fn testvectors() {
232 fn do_test(v: u32, bytes: &[u8]) {
233 let mut hash = RollingAdler32::new();
234 hash.update_buffer(&bytes);
235 assert_eq!(hash.hash(), v);
236
237 let r = io::Cursor::new(bytes);
238 assert_eq!(adler32(r).unwrap(), v);
239 }
240 do_test(0x00000001, b"");
241 do_test(0x00620062, b"a");
242 do_test(0x024d0127, b"abc");
243 do_test(0x29750586, b"message digest");
244 do_test(0x90860b20, b"abcdefghijklmnopqrstuvwxyz");
245 do_test(
246 0x8adb150c,
247 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
248 abcdefghijklmnopqrstuvwxyz\
249 0123456789",
250 );
251 do_test(
252 0x97b61069,
253 b"1234567890123456789012345678901234567890\
254 1234567890123456789012345678901234567890",
255 );
256 do_test(0xD6251498, &[255; 64000]);
257 }
258
259 #[test]
260 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
compare()261 fn compare() {
262 let mut rng = rand::thread_rng();
263 let mut data = vec![0u8; 5589];
264 for size in [
265 0, 1, 3, 4, 5, 31, 32, 33, 67, 5550, 5552, 5553, 5568, 5584, 5589,
266 ]
267 .iter()
268 .cloned()
269 {
270 rng.fill(&mut data[..size]);
271 let r1 = io::Cursor::new(&data[..size]);
272 let r2 = r1.clone();
273 if adler32_slow(r1).unwrap() != adler32(r2).unwrap() {
274 panic!("Comparison failed, size={}", size);
275 }
276 }
277 }
278
279 #[test]
280 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
rolling()281 fn rolling() {
282 assert_eq!(RollingAdler32::from_value(0x01020304).hash(), 0x01020304);
283
284 fn do_test(a: &[u8], b: &[u8]) {
285 let mut total = Vec::with_capacity(a.len() + b.len());
286 total.extend(a);
287 total.extend(b);
288 let mut h = RollingAdler32::from_buffer(&total[..(b.len())]);
289 for i in 0..(a.len()) {
290 h.remove(b.len(), a[i]);
291 h.update(total[b.len() + i]);
292 }
293 assert_eq!(h.hash(), adler32(b).unwrap());
294 }
295 do_test(b"a", b"b");
296 do_test(b"", b"this a test");
297 do_test(b"th", b"is a test");
298 do_test(b"this a ", b"test");
299 }
300
301 #[test]
302 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
long_window_remove()303 fn long_window_remove() {
304 let mut hash = RollingAdler32::new();
305 let w = 65536;
306 assert!(w as u32 > BASE);
307
308 let mut bytes = vec![0; w * 3];
309 for (i, b) in bytes.iter_mut().enumerate() {
310 *b = i as u8;
311 }
312
313 for (i, b) in bytes.iter().enumerate() {
314 if i >= w {
315 hash.remove(w, bytes[i - w]);
316 }
317 hash.update(*b);
318 if i > 0 && i % w == 0 {
319 assert_eq!(hash.hash(), 0x433a8772);
320 }
321 }
322 assert_eq!(hash.hash(), 0xbbba8772);
323 }
324 }
325