1 /*-------------------------------------------------------------------------
2  *
3  * pg_crc32c_sse42.c
4  *	  Compute CRC-32C checksum using Intel SSE 4.2 instructions.
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/port/pg_crc32c_sse42.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "c.h"
16 
17 #include "port/pg_crc32c.h"
18 
19 #include <nmmintrin.h>
20 
pg_attribute_no_sanitize_alignment()21 pg_attribute_no_sanitize_alignment()
22 pg_crc32c
23 pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len)
24 {
25 	const unsigned char *p = data;
26 	const unsigned char *pend = p + len;
27 
28 	/*
29 	 * Process eight bytes of data at a time.
30 	 *
31 	 * NB: We do unaligned accesses here. The Intel architecture allows that,
32 	 * and performance testing didn't show any performance gain from aligning
33 	 * the begin address.
34 	 */
35 #ifdef __x86_64__
36 	while (p + 8 <= pend)
37 	{
38 		crc = (uint32) _mm_crc32_u64(crc, *((const uint64 *) p));
39 		p += 8;
40 	}
41 
42 	/* Process remaining full four bytes if any */
43 	if (p + 4 <= pend)
44 	{
45 		crc = _mm_crc32_u32(crc, *((const unsigned int *) p));
46 		p += 4;
47 	}
48 #else
49 
50 	/*
51 	 * Process four bytes at a time. (The eight byte instruction is not
52 	 * available on the 32-bit x86 architecture).
53 	 */
54 	while (p + 4 <= pend)
55 	{
56 		crc = _mm_crc32_u32(crc, *((const unsigned int *) p));
57 		p += 4;
58 	}
59 #endif							/* __x86_64__ */
60 
61 	/* Process any remaining bytes one at a time. */
62 	while (p < pend)
63 	{
64 		crc = _mm_crc32_u8(crc, *p);
65 		p++;
66 	}
67 
68 	return crc;
69 }
70