1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <array>
18 #include <utility>
19 
20 #include <folly/Bits.h>
21 #include <folly/hash/detail/ChecksumDetail.h>
22 
23 namespace folly {
24 
25 // Standard galois-field multiply.  The only modification is that a,
26 // b, m, and p are all bit-reflected.
27 //
28 // https://en.wikipedia.org/wiki/Finite_field_arithmetic
gf_multiply_sw_1(size_t i,uint32_t p,uint32_t a,uint32_t b,uint32_t m)29 static constexpr uint32_t gf_multiply_sw_1(
30     size_t i, uint32_t p, uint32_t a, uint32_t b, uint32_t m) {
31   // clang-format off
32   return i == 32 ? p : gf_multiply_sw_1(
33       /* i = */ i + 1,
34       /* p = */ p ^ (-((b >> 31) & 1) & a),
35       /* a = */ (a >> 1) ^ (-(a & 1) & m),
36       /* b = */ b << 1,
37       /* m = */ m);
38   // clang-format on
39 }
gf_multiply_sw(uint32_t a,uint32_t b,uint32_t m)40 static constexpr uint32_t gf_multiply_sw(uint32_t a, uint32_t b, uint32_t m) {
41   return gf_multiply_sw_1(/* i = */ 0, /* p = */ 0, a, b, m);
42 }
43 
gf_square_sw(uint32_t a,uint32_t m)44 static constexpr uint32_t gf_square_sw(uint32_t a, uint32_t m) {
45   return gf_multiply_sw(a, a, m);
46 }
47 
48 namespace {
49 
50 template <size_t i, uint32_t m>
51 struct gf_powers_memo {
52   static constexpr uint32_t value =
53       gf_square_sw(gf_powers_memo<i - 1, m>::value, m);
54 };
55 template <uint32_t m>
56 struct gf_powers_memo<0, m> {
57   static constexpr uint32_t value = m;
58 };
59 
60 template <uint32_t m>
61 struct gf_powers_make {
62   template <size_t... i>
operator ()folly::__anon042b30030111::gf_powers_make63   constexpr auto operator()(std::index_sequence<i...>) const {
64     return std::array<uint32_t, sizeof...(i)>{{gf_powers_memo<i, m>::value...}};
65   }
66 };
67 
68 } // namespace
69 
70 #if FOLLY_SSE_PREREQ(4, 2)
71 
72 // Reduction taken from
73 // https://www.nicst.de/crc.pdf
74 //
75 // This is an intrinsics-based implementation of listing 3.
gf_multiply_crc32c_hw(uint64_t crc1,uint64_t crc2,uint32_t)76 static uint32_t gf_multiply_crc32c_hw(uint64_t crc1, uint64_t crc2, uint32_t) {
77   const auto crc1_xmm = _mm_set_epi64x(0, crc1);
78   const auto crc2_xmm = _mm_set_epi64x(0, crc2);
79   const auto count = _mm_set_epi64x(0, 1);
80   const auto res0 = _mm_clmulepi64_si128(crc2_xmm, crc1_xmm, 0x00);
81   const auto res1 = _mm_sll_epi64(res0, count);
82 
83   // Use hardware crc32c to do reduction from 64 -> 32 bytes
84   const auto res2 = _mm_cvtsi128_si64(res1);
85   const auto res3 = _mm_crc32_u32(0, res2);
86   const auto res4 = _mm_extract_epi32(res1, 1);
87   return res3 ^ res4;
88 }
89 
gf_multiply_crc32_hw(uint64_t crc1,uint64_t crc2,uint32_t)90 static uint32_t gf_multiply_crc32_hw(uint64_t crc1, uint64_t crc2, uint32_t) {
91   const auto crc1_xmm = _mm_set_epi64x(0, crc1);
92   const auto crc2_xmm = _mm_set_epi64x(0, crc2);
93   const auto count = _mm_set_epi64x(0, 1);
94   const auto res0 = _mm_clmulepi64_si128(crc2_xmm, crc1_xmm, 0x00);
95   const auto res1 = _mm_sll_epi64(res0, count);
96 
97   // Do barrett reduction of 64 -> 32 bytes
98   const auto mask32 = _mm_set_epi32(0, 0, 0, 0xFFFFFFFF);
99   const auto barrett_reduction_constants =
100       _mm_set_epi32(0x1, 0xDB710641, 0x1, 0xF7011641);
101   const auto res2 = _mm_clmulepi64_si128(
102       _mm_and_si128(res1, mask32), barrett_reduction_constants, 0x00);
103   const auto res3 = _mm_clmulepi64_si128(
104       _mm_and_si128(res2, mask32), barrett_reduction_constants, 0x10);
105   return _mm_cvtsi128_si32(_mm_srli_si128(_mm_xor_si128(res3, res1), 4));
106 }
107 
108 #else
109 
gf_multiply_crc32c_hw(uint64_t,uint64_t,uint32_t)110 static uint32_t gf_multiply_crc32c_hw(uint64_t, uint64_t, uint32_t) {
111   return 0;
112 }
gf_multiply_crc32_hw(uint64_t,uint64_t,uint32_t)113 static uint32_t gf_multiply_crc32_hw(uint64_t, uint64_t, uint32_t) {
114   return 0;
115 }
116 
117 #endif
118 
119 static constexpr uint32_t crc32c_m = 0x82f63b78;
120 static constexpr uint32_t crc32_m = 0xedb88320;
121 
122 /*
123  * Pre-calculated powers tables for crc32c and crc32.
124  */
125 static constexpr std::array<uint32_t, 62> const crc32c_powers =
126     gf_powers_make<crc32c_m>{}(std::make_index_sequence<62>{});
127 static constexpr std::array<uint32_t, 62> const crc32_powers =
128     gf_powers_make<crc32_m>{}(std::make_index_sequence<62>{});
129 
130 template <typename F>
crc32_append_zeroes(F mult,uint32_t crc,size_t len,uint32_t polynomial,std::array<uint32_t,62> const & powers_array)131 static uint32_t crc32_append_zeroes(
132     F mult,
133     uint32_t crc,
134     size_t len,
135     uint32_t polynomial,
136     std::array<uint32_t, 62> const& powers_array) {
137   auto powers = powers_array.data();
138 
139   // Append by multiplying by consecutive powers of two of the zeroes
140   // array
141   len >>= 2;
142 
143   while (len) {
144     // Advance directly to next bit set.
145     auto r = findFirstSet(len) - 1;
146     len >>= r;
147     powers += r;
148 
149     crc = mult(crc, *powers, polynomial);
150 
151     len >>= 1;
152     powers++;
153   }
154 
155   return crc;
156 }
157 
158 namespace detail {
159 
crc32_combine_sw(uint32_t crc1,uint32_t crc2,size_t crc2len)160 uint32_t crc32_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
161   return crc2 ^
162       crc32_append_zeroes(gf_multiply_sw, crc1, crc2len, crc32_m, crc32_powers);
163 }
164 
crc32_combine_hw(uint32_t crc1,uint32_t crc2,size_t crc2len)165 uint32_t crc32_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
166   return crc2 ^
167       crc32_append_zeroes(
168              gf_multiply_crc32_hw, crc1, crc2len, crc32_m, crc32_powers);
169 }
170 
crc32c_combine_sw(uint32_t crc1,uint32_t crc2,size_t crc2len)171 uint32_t crc32c_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
172   return crc2 ^
173       crc32_append_zeroes(
174              gf_multiply_sw, crc1, crc2len, crc32c_m, crc32c_powers);
175 }
176 
crc32c_combine_hw(uint32_t crc1,uint32_t crc2,size_t crc2len)177 uint32_t crc32c_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
178   return crc2 ^
179       crc32_append_zeroes(
180              gf_multiply_crc32c_hw, crc1, crc2len, crc32c_m, crc32c_powers);
181 }
182 
183 } // namespace detail
184 
185 } // namespace folly
186