1 // Sadly, all of the crc crates in Rust appear to be insufficient for high
2 // performance algorithms. In fact, the implementation here is insufficient!
3 // The best implementation uses the CRC32 instruction from SSE 4.2, but using
4 // specialty instructions on stable Rust is an absolute nightmare at the
5 // moment. The only way I can think to do it is to write some Assembly in a
6 // separate source file and run an Assembler from build.rs. But we'd need to be
7 // careful to only do this on platforms that support the CRC32 instruction,
8 // which means we'd need to query CPUID. There are a couple Rust crates for
9 // that, but of course, they only work on unstable Rust so we'd need to hand
10 // roll that too.
11 //
12 // As a stopgap, we implement the fastest (slicing-by-16) algorithm described
13 // here: http://create.stephan-brumme.com/crc32/
14 // (Actually, slicing-by-16 doesn't seem to be measurably faster than
15 // slicing-by-8 on my i7-6900K, so use slicing-by-8.)
16 //
17 // For Snappy, we only care about CRC32C (32 bit, Castagnoli).
18 //
19 // ---AG
20 
21 // We have a bunch of different implementations of crc32 below that seem
22 // somewhat useful to leave around for easy benchmarking.
23 #![allow(dead_code)]
24 
25 use byteorder::{ByteOrder, LittleEndian as LE};
26 
27 const CASTAGNOLI_POLY: u32 = 0x82f63b78;
28 
29 lazy_static! {
30     static ref TABLE: [u32; 256] = make_table(CASTAGNOLI_POLY);
31     static ref TABLE16: [[u32; 256]; 16] = {
32         let mut tab = [[0; 256]; 16];
33         tab[0] = make_table(CASTAGNOLI_POLY);
34         for i in 0..256 {
35             let mut crc = tab[0][i];
36             for j in 1..16 {
37                 crc = (crc >> 8) ^ tab[0][crc as u8 as usize];
38                 tab[j][i] = crc;
39             }
40         }
41         tab
42     };
43 }
44 
45 /// Returns the CRC32 checksum of `buf` using the Castagnoli polynomial.
crc32c(buf: &[u8]) -> u3246 pub fn crc32c(buf: &[u8]) -> u32 {
47     // I can't measure any difference between slice8 and slice16.
48     crc32c_slice8(buf)
49 }
50 
51 /// Returns the CRC32 checksum of `buf` using the Castagnoli polynomial.
crc32c_slice16(mut buf: &[u8]) -> u3252 fn crc32c_slice16(mut buf: &[u8]) -> u32 {
53     let tab = &*TABLE;
54     let tab16 = &*TABLE16;
55     let mut crc: u32 = !0;
56     while buf.len() >= 16 {
57         crc ^= LE::read_u32(&buf[0..4]);
58         crc = tab16[0][buf[15] as usize]
59             ^ tab16[1][buf[14] as usize]
60             ^ tab16[2][buf[13] as usize]
61             ^ tab16[3][buf[12] as usize]
62             ^ tab16[4][buf[11] as usize]
63             ^ tab16[5][buf[10] as usize]
64             ^ tab16[6][buf[9] as usize]
65             ^ tab16[7][buf[8] as usize]
66             ^ tab16[8][buf[7] as usize]
67             ^ tab16[9][buf[6] as usize]
68             ^ tab16[10][buf[5] as usize]
69             ^ tab16[11][buf[4] as usize]
70             ^ tab16[12][(crc >> 24) as u8 as usize]
71             ^ tab16[13][(crc >> 16) as u8 as usize]
72             ^ tab16[14][(crc >> 8 ) as u8 as usize]
73             ^ tab16[15][(crc      ) as u8 as usize];
74         buf = &buf[16..];
75     }
76     for &b in buf {
77         crc = tab[((crc as u8) ^ b) as usize] ^ (crc >> 8);
78     }
79     !crc
80 }
81 
82 /// Returns the CRC32 checksum of `buf` using the Castagnoli polynomial.
crc32c_slice8(mut buf: &[u8]) -> u3283 fn crc32c_slice8(mut buf: &[u8]) -> u32 {
84     let tab = &*TABLE;
85     let tab8 = &*TABLE16;
86     let mut crc: u32 = !0;
87     while buf.len() >= 8 {
88         crc ^= LE::read_u32(&buf[0..4]);
89         crc = tab8[0][buf[7] as usize]
90             ^ tab8[1][buf[6] as usize]
91             ^ tab8[2][buf[5] as usize]
92             ^ tab8[3][buf[4] as usize]
93             ^ tab8[4][(crc >> 24) as u8 as usize]
94             ^ tab8[5][(crc >> 16) as u8 as usize]
95             ^ tab8[6][(crc >> 8 ) as u8 as usize]
96             ^ tab8[7][(crc      ) as u8 as usize];
97         buf = &buf[8..];
98     }
99     for &b in buf {
100         crc = tab[((crc as u8) ^ b) as usize] ^ (crc >> 8);
101     }
102     !crc
103 }
104 
105 /// Returns the CRC32 checksum of `buf` using the Castagnoli polynomial.
crc32c_slice4(mut buf: &[u8]) -> u32106 fn crc32c_slice4(mut buf: &[u8]) -> u32 {
107     let tab = &*TABLE;
108     let tab4 = &*TABLE16;
109     let mut crc: u32 = !0;
110     while buf.len() >= 4 {
111         crc ^= LE::read_u32(&buf[0..4]);
112         crc = tab4[0][(crc >> 24) as u8 as usize]
113             ^ tab4[1][(crc >> 16) as u8 as usize]
114             ^ tab4[2][(crc >> 8 ) as u8 as usize]
115             ^ tab4[3][(crc      ) as u8 as usize];
116         buf = &buf[4..];
117     }
118     for &b in buf {
119         crc = tab[((crc as u8) ^ b) as usize] ^ (crc >> 8);
120     }
121     !crc
122 }
123 
124 /// Returns the CRC32 checksum of `buf` using the Castagnoli polynomial.
crc32c_multiple(buf: &[u8]) -> u32125 fn crc32c_multiple(buf: &[u8]) -> u32 {
126     let tab = &*TABLE;
127     let mut crc: u32 = !0;
128     for &b in buf {
129         crc = tab[((crc as u8) ^ b) as usize] ^ (crc >> 8);
130     }
131     !crc
132 }
133 
134 /// Returns the CRC32 checksum of `buf` using the Castagnoli polynomial.
crc32c_bitwise(buf: &[u8]) -> u32135 fn crc32c_bitwise(buf: &[u8]) -> u32 {
136     let mut crc: u32 = !0;
137     for &b in buf {
138         crc ^= b as u32;
139         for _ in 0..8 {
140             crc = (crc >> 1) ^ ((crc & 1) * CASTAGNOLI_POLY);
141         }
142     }
143     !crc
144 }
145 
make_table(poly: u32) -> [u32; 256]146 fn make_table(poly: u32) -> [u32; 256] {
147     let mut tab = [0; 256];
148     for i in 0u32..256u32 {
149         let mut crc = i;
150         for _ in 0..8 {
151             if crc & 1 == 1 {
152                 crc = (crc >> 1) ^ poly;
153             } else {
154                 crc >>= 1;
155             }
156         }
157         tab[i as usize] = crc;
158     }
159     tab
160 }
161