1 /* $Id: crc32_sse.cpp 597189 2019-11-18 20:14:52Z gotvyans $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Sergiy Gotvyanskyy
27 *
28 * File Description:
29 * CPU specific implementations of CRC32
30 *
31 */
32
33 #include <ncbi_pch.hpp>
34
35 #include <util/impl/ct_crc32.hpp>
36
37 #if defined(NCBI_SSE) && NCBI_SSE >= 42
38 #include <immintrin.h>
39 #endif
40
41 #if 1
42 static
43 constexpr
44 auto g_crc32_table = compile_time_bits::ct_crc32<compile_time_bits::platform_poly>::MakeCRC32Table{}();
45 #else
46 struct cont_t
47 {
48 uint32_t m_data[256];
49 };
50 static constexpr cont_t g_crc32_table {{
51 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
52 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
53 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
54 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
55 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b,
56 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
57 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54,
58 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
59 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
60 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
61 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5,
62 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
63 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45,
64 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
65 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
66 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
67 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48,
68 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
69 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687,
70 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
71 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
72 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
73 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8,
74 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
75 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096,
76 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
77 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
78 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
79 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9,
80 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
81 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36,
82 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
83 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
84 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
85 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043,
86 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
87 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3,
88 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
89 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
90 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
91 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652,
92 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
93 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d,
94 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
95 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
96 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
97 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2,
98 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
99 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530,
100 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
101 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
102 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
103 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f,
104 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
105 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90,
106 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
107 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
108 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
109 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321,
110 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
111 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81,
112 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
113 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
114 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351
115 }};
116 #endif
117
118 static
119 // this should generate non branching lower case functionality
convert_lower_case(int c)120 int convert_lower_case(int c) noexcept
121 {
122 int offset = 0;
123 if (c >= int('A'))
124 offset = 'a' - 'A';
125 if (c > int('Z'))
126 offset = 0;
127 return c + offset;
128 }
129
130 struct tabled_crc32
131 {
132 using type = uint32_t;
133
updatetabled_crc32134 static type update(type crc, uint8_t b) noexcept
135 {
136 crc = g_crc32_table.m_data[(crc ^ b) & 0xFFL] ^ (crc >> 8);
137
138 return crc;
139 }
updatetabled_crc32140 static type update(type crc, type d32) noexcept
141 {
142 size_t len = 4;
143 while (len--) {
144 uint8_t b = d32;
145 d32 = d32 >> 8;
146 crc = update(crc, b);
147 }
148 return crc;
149 }
150 };
151
152 #if defined(NCBI_SSE) && NCBI_SSE >= 42
153 struct sse42_crc32
154 {
155 using type = uint32_t;
156
updatesse42_crc32157 static type update(type crc, type d32) noexcept
158 {
159 return _mm_crc32_u32(crc, d32);
160 }
updatesse42_crc32161 static type update(type crc, uint8_t d8) noexcept
162 {
163 return _mm_crc32_u8(crc, d8);
164 }
updatesse42_crc32165 static type update(type crc, const uint8_t *buf, size_t len) noexcept
166 {
167 #ifdef SLOW_UNALIGNED_MEMORY_ACCESS
168 size_t aligned = (uintptr_t(buf) & 2);
169 while (aligned-- && len--) {
170 uint8_t b = *buf++;
171 crc = _mm_crc32_u8(crc, b);
172 }
173 #endif
174
175 while (len > 3) {
176 uint32_t d32 = *(const uint32_t*)buf;
177 buf += 4;
178 len -= 4;
179 crc = _mm_crc32_u32(crc, d32);
180 }
181
182 while (len--) {
183 uint8_t b = *buf++;
184 crc = _mm_crc32_u8(crc, b);
185 }
186 return crc;
187 }
188 };
189
190 template<ncbi::NStr::ECase case_sensitive>
sse42(const char * s,size_t size)191 uint32_t ct::SaltedCRC32<case_sensitive>::sse42(const char* s, size_t size) noexcept
192 {
193 uint32_t len = (uint32_t)size;
194 uint32_t hash = sse42_crc32::update(0, len);
195 while (len--)
196 {
197 int c = static_cast<uint8_t>(*s++);
198 if (case_sensitive == ncbi::NStr::eNocase)
199 c = convert_lower_case(c);
200 hash = sse42_crc32::update(hash, static_cast<uint8_t>(c));
201 }
202 return hash;
203 }
204
205 #endif
206
207 template<ncbi::NStr::ECase case_sensitive>
general(const char * s,size_t size)208 uint32_t ct::SaltedCRC32<case_sensitive>::general(const char* s, size_t size) noexcept
209 {
210 uint32_t len = (uint32_t)size;
211 uint32_t hash = tabled_crc32::update(0, len);
212 while (len--)
213 {
214 int c = static_cast<uint8_t>(*s++);
215 if (case_sensitive == ncbi::NStr::eNocase)
216 c = convert_lower_case(c);
217 hash = tabled_crc32::update(hash, static_cast<uint8_t>(c));
218 }
219 return hash;
220 }
221
222 template uint32_t ct::SaltedCRC32<ncbi::NStr::eNocase>::rt(const char* s, size_t size) noexcept;
223 template uint32_t ct::SaltedCRC32<ncbi::NStr::eCase>::rt(const char* s, size_t size) noexcept;
224 template uint32_t ct::SaltedCRC32<ncbi::NStr::eNocase>::general(const char* s, size_t size) noexcept;
225 template uint32_t ct::SaltedCRC32<ncbi::NStr::eCase>::general(const char* s, size_t size) noexcept;
226 #if defined(NCBI_SSE) && NCBI_SSE >= 42
227 template uint32_t ct::SaltedCRC32<ncbi::NStr::eNocase>::sse42(const char* s, size_t size) noexcept;
228 template uint32_t ct::SaltedCRC32<ncbi::NStr::eCase>::sse42(const char* s, size_t size) noexcept;
229 #endif
230