1 /*  $Id: crc32_sse.cpp 597189 2019-11-18 20:14:52Z gotvyans $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Sergiy Gotvyanskyy
27  *
28  * File Description:
29  *   CPU specific implementations of CRC32
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 
35 #include <util/impl/ct_crc32.hpp>
36 
37 #if defined(NCBI_SSE)  &&  NCBI_SSE >= 42
38     #include <immintrin.h>
39 #endif
40 
41 #if 1
42 static
43 constexpr
44 auto g_crc32_table = compile_time_bits::ct_crc32<compile_time_bits::platform_poly>::MakeCRC32Table{}();
45 #else
46 struct cont_t
47 {
48 	uint32_t m_data[256];
49 };
50 static constexpr cont_t g_crc32_table {{
51     0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
52     0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
53     0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
54     0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
55     0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b,
56     0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
57     0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54,
58     0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
59     0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
60     0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
61     0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5,
62     0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
63     0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45,
64     0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
65     0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
66     0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
67     0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48,
68     0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
69     0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687,
70     0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
71     0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
72     0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
73     0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8,
74     0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
75     0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096,
76     0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
77     0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
78     0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
79     0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9,
80     0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
81     0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36,
82     0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
83     0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
84     0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
85     0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043,
86     0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
87     0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3,
88     0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
89     0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
90     0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
91     0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652,
92     0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
93     0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d,
94     0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
95     0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
96     0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
97     0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2,
98     0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
99     0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530,
100     0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
101     0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
102     0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
103     0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f,
104     0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
105     0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90,
106     0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
107     0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
108     0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
109     0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321,
110     0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
111     0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81,
112     0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
113     0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
114     0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351
115   }};
116 #endif
117 
118 static
119 // this should generate non branching lower case functionality
convert_lower_case(int c)120 int convert_lower_case(int c) noexcept
121 {
122     int offset = 0;
123     if (c >= int('A'))
124         offset = 'a' - 'A';
125     if (c > int('Z'))
126         offset = 0;
127     return c + offset;
128 }
129 
130 struct tabled_crc32
131 {
132     using type = uint32_t;
133 
updatetabled_crc32134     static type update(type crc, uint8_t b) noexcept
135     {
136         crc = g_crc32_table.m_data[(crc ^ b) & 0xFFL] ^ (crc >> 8);
137 
138         return crc;
139     }
updatetabled_crc32140     static type update(type crc, type d32) noexcept
141     {
142         size_t len = 4;
143         while (len--) {
144             uint8_t b = d32;
145             d32 = d32 >> 8;
146             crc = update(crc, b);
147         }
148         return crc;
149     }
150 };
151 
152 #if defined(NCBI_SSE)  &&  NCBI_SSE >= 42
153 struct sse42_crc32
154 {
155     using type = uint32_t;
156 
updatesse42_crc32157     static type update(type crc, type d32) noexcept
158     {
159         return _mm_crc32_u32(crc, d32);
160     }
updatesse42_crc32161     static type update(type crc, uint8_t d8) noexcept
162     {
163         return _mm_crc32_u8(crc, d8);
164     }
updatesse42_crc32165     static type update(type crc, const uint8_t *buf, size_t len) noexcept
166     {
167 #ifdef SLOW_UNALIGNED_MEMORY_ACCESS
168         size_t aligned = (uintptr_t(buf) & 2);
169         while (aligned-- && len--) {
170             uint8_t b = *buf++;
171             crc = _mm_crc32_u8(crc, b);
172         }
173 #endif
174 
175         while (len > 3) {
176             uint32_t d32 = *(const uint32_t*)buf;
177             buf += 4;
178             len -= 4;
179             crc = _mm_crc32_u32(crc, d32);
180         }
181 
182         while (len--) {
183             uint8_t b = *buf++;
184             crc = _mm_crc32_u8(crc, b);
185         }
186         return crc;
187     }
188 };
189 
190 template<ncbi::NStr::ECase case_sensitive>
sse42(const char * s,size_t size)191 uint32_t ct::SaltedCRC32<case_sensitive>::sse42(const char* s, size_t size) noexcept
192 {
193     uint32_t len = (uint32_t)size;
194     uint32_t hash = sse42_crc32::update(0, len);
195     while (len--)
196     {
197         int c = static_cast<uint8_t>(*s++);
198         if (case_sensitive == ncbi::NStr::eNocase)
199             c = convert_lower_case(c);
200         hash = sse42_crc32::update(hash, static_cast<uint8_t>(c));
201     }
202     return hash;
203 }
204 
205 #endif
206 
207 template<ncbi::NStr::ECase case_sensitive>
general(const char * s,size_t size)208 uint32_t ct::SaltedCRC32<case_sensitive>::general(const char* s, size_t size) noexcept
209 {
210     uint32_t len = (uint32_t)size;
211     uint32_t hash = tabled_crc32::update(0, len);
212     while (len--)
213     {
214         int c = static_cast<uint8_t>(*s++);
215         if (case_sensitive == ncbi::NStr::eNocase)
216             c = convert_lower_case(c);
217         hash = tabled_crc32::update(hash, static_cast<uint8_t>(c));
218     }
219     return hash;
220 }
221 
222 template uint32_t ct::SaltedCRC32<ncbi::NStr::eNocase>::rt(const char* s, size_t size) noexcept;
223 template uint32_t ct::SaltedCRC32<ncbi::NStr::eCase>::rt(const char* s, size_t size) noexcept;
224 template uint32_t ct::SaltedCRC32<ncbi::NStr::eNocase>::general(const char* s, size_t size) noexcept;
225 template uint32_t ct::SaltedCRC32<ncbi::NStr::eCase>::general(const char* s, size_t size) noexcept;
226 #if defined(NCBI_SSE)  &&  NCBI_SSE >= 42
227 template uint32_t ct::SaltedCRC32<ncbi::NStr::eNocase>::sse42(const char* s, size_t size) noexcept;
228 template uint32_t ct::SaltedCRC32<ncbi::NStr::eCase>::sse42(const char* s, size_t size) noexcept;
229 #endif
230