1 use std::arch::aarch64 as arch;
2 
3 #[derive(Clone)]
4 pub struct State {
5     state: u32,
6 }
7 
8 impl State {
new(state: u32) -> Option<Self>9     pub fn new(state: u32) -> Option<Self> {
10         if is_aarch64_feature_detected!("crc") {
11             // SAFETY: The conditions above ensure that all
12             //         required instructions are supported by the CPU.
13             Some(Self { state })
14         } else {
15             None
16         }
17     }
18 
update(&mut self, buf: &[u8])19     pub fn update(&mut self, buf: &[u8]) {
20         // SAFETY: The `State::new` constructor ensures that all
21         //         required instructions are supported by the CPU.
22         self.state = unsafe { calculate(self.state, buf) }
23     }
24 
finalize(self) -> u3225     pub fn finalize(self) -> u32 {
26         self.state
27     }
28 
reset(&mut self)29     pub fn reset(&mut self) {
30         self.state = 0;
31     }
32 
combine(&mut self, other: u32, amount: u64)33     pub fn combine(&mut self, other: u32, amount: u64) {
34         self.state = ::combine::combine(self.state, other, amount);
35     }
36 }
37 
38 // target_feature is necessary to allow rustc to inline the crc32* wrappers
39 #[target_feature(enable = "crc")]
calculate(crc: u32, data: &[u8]) -> u3240 pub unsafe fn calculate(crc: u32, data: &[u8]) -> u32 {
41     let mut c32 = !crc;
42     let (pre_quad, quads, post_quad) = data.align_to::<u64>();
43 
44     c32 = pre_quad.iter().fold(c32, |acc, &b| arch::__crc32b(acc, b));
45 
46     // unrolling increases performance by a lot
47     let mut quad_iter = quads.chunks_exact(8);
48     for chunk in &mut quad_iter {
49         c32 = arch::__crc32d(c32, chunk[0]);
50         c32 = arch::__crc32d(c32, chunk[1]);
51         c32 = arch::__crc32d(c32, chunk[2]);
52         c32 = arch::__crc32d(c32, chunk[3]);
53         c32 = arch::__crc32d(c32, chunk[4]);
54         c32 = arch::__crc32d(c32, chunk[5]);
55         c32 = arch::__crc32d(c32, chunk[6]);
56         c32 = arch::__crc32d(c32, chunk[7]);
57     }
58     c32 = quad_iter
59         .remainder()
60         .iter()
61         .fold(c32, |acc, &q| arch::__crc32d(acc, q));
62 
63     c32 = post_quad.iter().fold(c32, |acc, &b| arch::__crc32b(acc, b));
64 
65     !c32
66 }
67 
68 #[cfg(test)]
69 mod test {
70     quickcheck! {
71         fn check_against_baseline(init: u32, chunks: Vec<(Vec<u8>, usize)>) -> bool {
72             let mut baseline = super::super::super::baseline::State::new(init);
73             let mut aarch64 = super::State::new(init).expect("not supported");
74             for (chunk, mut offset) in chunks {
75                 // simulate random alignments by offsetting the slice by up to 15 bytes
76                 offset &= 0xF;
77                 if chunk.len() <= offset {
78                     baseline.update(&chunk);
79                     aarch64.update(&chunk);
80                 } else {
81                     baseline.update(&chunk[offset..]);
82                     aarch64.update(&chunk[offset..]);
83                 }
84             }
85             aarch64.finalize() == baseline.finalize()
86         }
87     }
88 }
89