1 /* crc32.c -- compute the CRC-32 of a data stream
2 * Copyright (C) 1995-2006, 2010, 2011, 2012 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 *
5 * Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster
6 * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing
7 * tables for updating the shift register in one step with three exclusive-ors
8 * instead of four steps with four exclusive-ors. This results in about a
9 * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
10 */
11
12 /* @(#) $Id$ */
13
14 /*
15 Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
16 protection on the static variables used to control the first-use generation
17 of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
18 first call get_crc_table() to initialize the tables before allowing more than
19 one thread to use crc32().
20
21 DYNAMIC_CRC_TABLE and MAKECRCH can be #defined to write out crc32.h.
22 */
23
24 #ifdef MAKECRCH
25 # include <stdio.h>
26 # ifndef DYNAMIC_CRC_TABLE
27 # define DYNAMIC_CRC_TABLE
28 # endif /* !DYNAMIC_CRC_TABLE */
29 #endif /* MAKECRCH */
30
31 #include "zutil.h" /* for STDC and FAR definitions */
32
33 #define local static
34
35 /* Definitions for doing the crc four data bytes at a time. */
36 #if !defined(NOBYFOUR) && defined(Z_U4)
37 # define BYFOUR
38 #endif
39 #ifdef BYFOUR
40 local unsigned long crc32_little OF((unsigned long,
41 const unsigned char FAR *, unsigned));
42 local unsigned long crc32_big OF((unsigned long,
43 const unsigned char FAR *, unsigned));
44 # define TBLS 8
45 #else
46 # define TBLS 1
47 #endif /* BYFOUR */
48
49 /* Local functions for crc concatenation */
50 local unsigned long gf2_matrix_times OF((unsigned long *mat,
51 unsigned long vec));
52 local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat));
53 local uLong crc32_combine_ OF((uLong crc1, uLong crc2, z_off64_t len2));
54
55
56 #ifdef DYNAMIC_CRC_TABLE
57
58 local volatile int crc_table_empty = 1;
59 local z_crc_t FAR crc_table[TBLS][256];
60 local void make_crc_table OF((void));
61 #ifdef MAKECRCH
62 local void write_table OF((FILE *, const z_crc_t FAR *));
63 #endif /* MAKECRCH */
64 /*
65 Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
66 x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
67
68 Polynomials over GF(2) are represented in binary, one bit per coefficient,
69 with the lowest powers in the most significant bit. Then adding polynomials
70 is just exclusive-or, and multiplying a polynomial by x is a right shift by
71 one. If we call the above polynomial p, and represent a byte as the
72 polynomial q, also with the lowest power in the most significant bit (so the
73 byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p,
74 where a mod b means the remainder after dividing a by b.
75
76 This calculation is done using the shift-register method of multiplying and
77 taking the remainder. The register is initialized to zero, and for each
78 incoming bit, x^32 is added mod p to the register if the bit is a one (where
79 x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by
80 x (which is shifting right by one and adding x^32 mod p if the bit shifted
81 out is a one). We start with the highest power (least significant bit) of
82 q and repeat for all eight bits of q.
83
84 The first table is simply the CRC of all possible eight bit values. This is
85 all the information needed to generate CRCs on data a byte at a time for all
86 combinations of CRC register values and incoming bytes. The remaining tables
87 allow for word-at-a-time CRC calculation for both big-endian and little-
88 endian machines, where a word is four bytes.
89 */
make_crc_table()90 local void make_crc_table()
91 {
92 z_crc_t c;
93 int n, k;
94 z_crc_t poly; /* polynomial exclusive-or pattern */
95 /* terms of polynomial defining this crc (except x^32): */
96 static volatile int first = 1; /* flag to limit concurrent making */
97 static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26};
98
99 /* See if another task is already doing this (not thread-safe, but better
100 than nothing -- significantly reduces duration of vulnerability in
101 case the advice about DYNAMIC_CRC_TABLE is ignored) */
102 if (first) {
103 first = 0;
104
105 /* make exclusive-or pattern from polynomial (0xedb88320UL) */
106 poly = 0;
107 for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++)
108 poly |= (z_crc_t)1 << (31 - p[n]);
109
110 /* generate a crc for every 8-bit value */
111 for (n = 0; n < 256; n++) {
112 c = (z_crc_t)n;
113 for (k = 0; k < 8; k++)
114 c = c & 1 ? poly ^ (c >> 1) : c >> 1;
115 crc_table[0][n] = c;
116 }
117
118 #ifdef BYFOUR
119 /* generate crc for each value followed by one, two, and three zeros,
120 and then the byte reversal of those as well as the first table */
121 for (n = 0; n < 256; n++) {
122 c = crc_table[0][n];
123 crc_table[4][n] = ZSWAP32(c);
124 for (k = 1; k < 4; k++) {
125 c = crc_table[0][c & 0xff] ^ (c >> 8);
126 crc_table[k][n] = c;
127 crc_table[k + 4][n] = ZSWAP32(c);
128 }
129 }
130 #endif /* BYFOUR */
131
132 crc_table_empty = 0;
133 }
134 else { /* not first */
135 /* wait for the other guy to finish (not efficient, but rare) */
136 while (crc_table_empty)
137 ;
138 }
139
140 #ifdef MAKECRCH
141 /* write out CRC tables to crc32.h */
142 {
143 FILE *out;
144
145 out = fopen("crc32.h", "w");
146 if (out == NULL) return;
147 fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n");
148 fprintf(out, " * Generated automatically by crc32.c\n */\n\n");
149 fprintf(out, "local const z_crc_t FAR ");
150 fprintf(out, "crc_table[TBLS][256] =\n{\n {\n");
151 write_table(out, crc_table[0]);
152 # ifdef BYFOUR
153 fprintf(out, "#ifdef BYFOUR\n");
154 for (k = 1; k < 8; k++) {
155 fprintf(out, " },\n {\n");
156 write_table(out, crc_table[k]);
157 }
158 fprintf(out, "#endif\n");
159 # endif /* BYFOUR */
160 fprintf(out, " }\n};\n");
161 fclose(out);
162 }
163 #endif /* MAKECRCH */
164 }
165
166 #ifdef MAKECRCH
write_table(out,table)167 local void write_table(out, table)
168 FILE *out;
169 const z_crc_t FAR *table;
170 {
171 int n;
172
173 for (n = 0; n < 256; n++)
174 fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ",
175 (unsigned long)(table[n]),
176 n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", "));
177 }
178 #endif /* MAKECRCH */
179
180 #else /* !DYNAMIC_CRC_TABLE */
181 /* ========================================================================
182 * Tables of CRC-32s of all single-byte values, made by make_crc_table().
183 */
184 #include "crc32.h"
185 #endif /* DYNAMIC_CRC_TABLE */
186
187 /* =========================================================================
188 * This function can be used by asm versions of crc32()
189 */
get_crc_table()190 const z_crc_t FAR * ZEXPORT get_crc_table()
191 {
192 #ifdef DYNAMIC_CRC_TABLE
193 if (crc_table_empty)
194 make_crc_table();
195 #endif /* DYNAMIC_CRC_TABLE */
196 return (const z_crc_t FAR *)crc_table;
197 }
198
199 /* ========================================================================= */
200 #define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8)
201 #define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
202 #define DO4 DO1; DO1; DO1; DO1
203
204 /* ========================================================================= */
crc32(crc,buf,len)205 unsigned long ZEXPORT crc32(crc, buf, len)
206 unsigned long crc;
207 const unsigned char FAR *buf;
208 uInt len;
209 {
210 if (buf == Z_NULL) return 0UL;
211
212 #ifdef DYNAMIC_CRC_TABLE
213 if (crc_table_empty)
214 make_crc_table();
215 #endif /* DYNAMIC_CRC_TABLE */
216
217 #ifdef BYFOUR
218 if (sizeof(void *) == sizeof(ptrdiff_t)) {
219 z_crc_t endian;
220
221 endian = 1;
222 if (*((unsigned char *)(&endian)))
223 return crc32_little(crc, buf, len);
224 else
225 return crc32_big(crc, buf, len);
226 }
227 #endif /* BYFOUR */
228 crc = crc ^ 0xffffffffUL;
229
230 #ifdef CRC32_UNROLL_LESS
231 while (len >= 4) {
232 DO4;
233 len -= 4;
234 }
235 #else
236 while (len >= 8) {
237 DO8;
238 len -= 8;
239 }
240 #endif
241
242 if (len) do {
243 DO1;
244 } while (--len);
245 return crc ^ 0xffffffffUL;
246 }
247
248 #ifdef BYFOUR
249
250 /* ========================================================================= */
251 #define DOLIT4 c ^= *buf4++; \
252 c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \
253 crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24]
254 #define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4
255
256 /* ========================================================================= */
crc32_little(crc,buf,len)257 local unsigned long crc32_little(crc, buf, len)
258 unsigned long crc;
259 const unsigned char FAR *buf;
260 unsigned len;
261 {
262 register z_crc_t c;
263 register const z_crc_t FAR *buf4;
264
265 c = (z_crc_t)crc;
266 c = ~c;
267 while (len && ((ptrdiff_t)buf & 3)) {
268 c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
269 len--;
270 }
271
272 buf4 = (const z_crc_t FAR *)(const void FAR *)buf;
273
274 #ifndef CRC32_UNROLL_LESS
275 while (len >= 32) {
276 DOLIT32;
277 len -= 32;
278 }
279 #endif
280
281 while (len >= 4) {
282 DOLIT4;
283 len -= 4;
284 }
285 buf = (const unsigned char FAR *)buf4;
286
287 if (len) do {
288 c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
289 } while (--len);
290 c = ~c;
291 return (unsigned long)c;
292 }
293
294 /* ========================================================================= */
295 #define DOBIG4 c ^= *++buf4; \
296 c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
297 crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
298 #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
299
300 /* ========================================================================= */
crc32_big(crc,buf,len)301 local unsigned long crc32_big(crc, buf, len)
302 unsigned long crc;
303 const unsigned char FAR *buf;
304 unsigned len;
305 {
306 register z_crc_t c;
307 register const z_crc_t FAR *buf4;
308
309 c = ZSWAP32((z_crc_t)crc);
310 c = ~c;
311 while (len && ((ptrdiff_t)buf & 3)) {
312 c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
313 len--;
314 }
315
316 buf4 = (const z_crc_t FAR *)(const void FAR *)buf;
317 buf4--;
318 while (len >= 32) {
319 DOBIG32;
320 len -= 32;
321 }
322 while (len >= 4) {
323 DOBIG4;
324 len -= 4;
325 }
326 buf4++;
327 buf = (const unsigned char FAR *)buf4;
328
329 if (len) do {
330 c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
331 } while (--len);
332 c = ~c;
333 return (unsigned long)(ZSWAP32(c));
334 }
335
336 #endif /* BYFOUR */
337
338 #define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */
339
340 /* ========================================================================= */
gf2_matrix_times(mat,vec)341 local unsigned long gf2_matrix_times(mat, vec)
342 unsigned long *mat;
343 unsigned long vec;
344 {
345 unsigned long sum;
346
347 sum = 0;
348 while (vec) {
349 if (vec & 1)
350 sum ^= *mat;
351 vec >>= 1;
352 mat++;
353 }
354 return sum;
355 }
356
357 /* ========================================================================= */
gf2_matrix_square(square,mat)358 local void gf2_matrix_square(square, mat)
359 unsigned long *square;
360 unsigned long *mat;
361 {
362 int n;
363
364 for (n = 0; n < GF2_DIM; n++)
365 square[n] = gf2_matrix_times(mat, mat[n]);
366 }
367
368 /* ========================================================================= */
crc32_combine_(crc1,crc2,len2)369 local uLong crc32_combine_(crc1, crc2, len2)
370 uLong crc1;
371 uLong crc2;
372 z_off64_t len2;
373 {
374 int n;
375 unsigned long row;
376 unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */
377 unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */
378
379 /* degenerate case (also disallow negative lengths) */
380 if (len2 <= 0)
381 return crc1;
382
383 /* put operator for one zero bit in odd */
384 odd[0] = 0xedb88320UL; /* CRC-32 polynomial */
385 row = 1;
386 for (n = 1; n < GF2_DIM; n++) {
387 odd[n] = row;
388 row <<= 1;
389 }
390
391 /* put operator for two zero bits in even */
392 gf2_matrix_square(even, odd);
393
394 /* put operator for four zero bits in odd */
395 gf2_matrix_square(odd, even);
396
397 /* apply len2 zeros to crc1 (first square will put the operator for one
398 zero byte, eight zero bits, in even) */
399 do {
400 /* apply zeros operator for this bit of len2 */
401 gf2_matrix_square(even, odd);
402 if (len2 & 1)
403 crc1 = gf2_matrix_times(even, crc1);
404 len2 >>= 1;
405
406 /* if no more bits set, then done */
407 if (len2 == 0)
408 break;
409
410 /* another iteration of the loop with odd and even swapped */
411 gf2_matrix_square(odd, even);
412 if (len2 & 1)
413 crc1 = gf2_matrix_times(odd, crc1);
414 len2 >>= 1;
415
416 /* if no more bits set, then done */
417 } while (len2 != 0);
418
419 /* return combined crc */
420 crc1 ^= crc2;
421 return crc1;
422 }
423
424 /* ========================================================================= */
crc32_combine(crc1,crc2,len2)425 uLong ZEXPORT crc32_combine(crc1, crc2, len2)
426 uLong crc1;
427 uLong crc2;
428 z_off_t len2;
429 {
430 return crc32_combine_(crc1, crc2, len2);
431 }
432
crc32_combine64(crc1,crc2,len2)433 uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
434 uLong crc1;
435 uLong crc2;
436 z_off64_t len2;
437 {
438 return crc32_combine_(crc1, crc2, len2);
439 }
440
441 #include "deflate.h"
442
443 #ifdef HAVE_PCLMULQDQ
444 #include "x86.h"
445 extern void ZLIB_INTERNAL crc_fold_init(deflate_state *z_const s);
446 extern void ZLIB_INTERNAL crc_fold_copy(deflate_state *z_const s,
447 unsigned char *dst, z_const unsigned char *src, long len);
448 extern unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state *z_const s);
449 #endif
450
crc_reset(deflate_state * const s)451 ZLIB_INTERNAL void crc_reset(deflate_state *const s)
452 {
453 #ifdef HAVE_PCLMULQDQ
454 if (x86_cpu_has_pclmulqdq) {
455 crc_fold_init(s);
456 return;
457 }
458 #endif
459 s->strm->adler = crc32(0L, Z_NULL, 0);
460 }
461
crc_finalize(deflate_state * const s)462 ZLIB_INTERNAL void crc_finalize(deflate_state *const s)
463 {
464 #ifdef HAVE_PCLMULQDQ
465 if (x86_cpu_has_pclmulqdq)
466 s->strm->adler = crc_fold_512to32(s);
467 #endif
468 }
469
copy_with_crc(z_streamp strm,Bytef * dst,long size)470 ZLIB_INTERNAL void copy_with_crc(z_streamp strm, Bytef *dst, long size)
471 {
472 #ifdef HAVE_PCLMULQDQ
473 if (x86_cpu_has_pclmulqdq) {
474 crc_fold_copy(strm->state, dst, strm->next_in, size);
475 return;
476 }
477 #endif
478 zmemcpy(dst, strm->next_in, size);
479 strm->adler = crc32(strm->adler, dst, size);
480 }
481
482