1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <folly/experimental/crypto/LtHash.h>
18 
19 #include <folly/CpuId.h>
20 
21 #ifdef __SSE2__
22 #include <emmintrin.h>
23 #endif
24 #ifdef __AVX2__
25 #include <immintrin.h>
26 #endif
27 
28 #include <folly/Memory.h>
29 
30 namespace folly {
31 namespace crypto {
32 namespace detail {
33 
allocateCacheAlignedIOBuf(size_t size)34 folly::IOBuf allocateCacheAlignedIOBuf(size_t size) {
35   void* ptr = folly::aligned_malloc(size, kCacheLineSize);
36   if (ptr == nullptr) {
37     throw std::bad_alloc();
38   }
39   return folly::IOBuf(
40       folly::IOBuf::TAKE_OWNERSHIP,
41       ptr,
42       static_cast<uint64_t>(size), // capacity
43       0ULL, // initial size
44       [](void* addr, void* /* userData*/) { folly::aligned_free(addr); });
45 }
46 
allocateCacheAlignedIOBufUnique(size_t size)47 std::unique_ptr<folly::IOBuf> allocateCacheAlignedIOBufUnique(size_t size) {
48   return std::make_unique<folly::IOBuf>(allocateCacheAlignedIOBuf(size));
49 }
50 
isCacheAlignedAddress(const void * addr)51 bool isCacheAlignedAddress(const void* addr) {
52   auto addrValue = reinterpret_cast<size_t>(addr);
53   return (addrValue & (kCacheLineSize - 1)) == 0;
54 }
55 
56 // static
57 template <>
isAvailable()58 bool MathOperation<MathEngine::SIMPLE>::isAvailable() {
59   return true;
60 }
61 
62 // static
63 template <>
isAvailable()64 bool MathOperation<MathEngine::SSE2>::isAvailable() {
65   static const bool kIsAvailable =
66       CpuId().sse2() && MathOperation<MathEngine::SSE2>::isImplemented();
67   return kIsAvailable;
68 }
69 
70 // static
71 template <>
isAvailable()72 bool MathOperation<MathEngine::AVX2>::isAvailable() {
73   static const bool kIsAvailable =
74       CpuId().avx2() && MathOperation<MathEngine::AVX2>::isImplemented();
75   return kIsAvailable;
76 }
77 
78 // static
79 template <>
isAvailable()80 bool MathOperation<MathEngine::AUTO>::isAvailable() {
81   return true;
82 }
83 
84 // static
85 template <>
isImplemented()86 bool MathOperation<MathEngine::AUTO>::isImplemented() {
87   return true;
88 }
89 
90 // static
91 template <>
add(uint64_t dataMask,size_t bitsPerElement,folly::ByteRange b1,folly::ByteRange b2,folly::MutableByteRange out)92 void MathOperation<MathEngine::AUTO>::add(
93     uint64_t dataMask,
94     size_t bitsPerElement,
95     folly::ByteRange b1,
96     folly::ByteRange b2,
97     folly::MutableByteRange out) {
98   // Note: implementation is a function pointer that is initialized to point
99   // at the fastest available implementation the first time this function is
100   // called.
101   static auto implementation = []() {
102     if (MathOperation<MathEngine::AVX2>::isAvailable()) {
103       LOG(INFO) << "Selected AVX2 MathEngine for add() operation";
104       return MathOperation<MathEngine::AVX2>::add;
105     } else if (MathOperation<MathEngine::SSE2>::isAvailable()) {
106       LOG(INFO) << "Selected SSE2 MathEngine for add() operation";
107       return MathOperation<MathEngine::SSE2>::add;
108     } else {
109       LOG(INFO) << "Selected SIMPLE MathEngine for add() operation";
110       return MathOperation<MathEngine::SIMPLE>::add;
111     }
112   }();
113   implementation(dataMask, bitsPerElement, b1, b2, out);
114 }
115 
116 // static
117 template <>
sub(uint64_t dataMask,size_t bitsPerElement,folly::ByteRange b1,folly::ByteRange b2,folly::MutableByteRange out)118 void MathOperation<MathEngine::AUTO>::sub(
119     uint64_t dataMask,
120     size_t bitsPerElement,
121     folly::ByteRange b1,
122     folly::ByteRange b2,
123     folly::MutableByteRange out) {
124   // Note: implementation is a function pointer that is initialized to point
125   // at the fastest available implementation the first time this function is
126   // called.
127   static auto implementation = []() {
128     if (MathOperation<MathEngine::AVX2>::isAvailable()) {
129       LOG(INFO) << "Selected AVX2 MathEngine for sub() operation";
130       return MathOperation<MathEngine::AVX2>::sub;
131     } else if (MathOperation<MathEngine::SSE2>::isAvailable()) {
132       LOG(INFO) << "Selected SSE2 MathEngine for sub() operation";
133       return MathOperation<MathEngine::SSE2>::sub;
134     } else {
135       LOG(INFO) << "Selected SIMPLE MathEngine for sub() operation";
136       return MathOperation<MathEngine::SIMPLE>::sub;
137     }
138   }();
139   implementation(dataMask, bitsPerElement, b1, b2, out);
140 }
141 
142 // static
143 template <>
clearPaddingBits(uint64_t dataMask,folly::MutableByteRange buf)144 void MathOperation<MathEngine::AUTO>::clearPaddingBits(
145     uint64_t dataMask, folly::MutableByteRange buf) {
146   // Note: implementation is a function pointer that is initialized to point
147   // at the fastest available implementation the first time this function is
148   // called.
149   static auto implementation = []() {
150     if (MathOperation<MathEngine::AVX2>::isAvailable()) {
151       LOG(INFO) << "Selected AVX2 MathEngine for clearPaddingBits() operation";
152       return MathOperation<MathEngine::AVX2>::clearPaddingBits;
153     } else if (MathOperation<MathEngine::SSE2>::isAvailable()) {
154       LOG(INFO) << "Selected SSE2 MathEngine for clearPaddingBits() operation";
155       return MathOperation<MathEngine::SSE2>::clearPaddingBits;
156     } else {
157       LOG(INFO)
158           << "Selected SIMPLE MathEngine for clearPaddingBits() operation";
159       return MathOperation<MathEngine::SIMPLE>::clearPaddingBits;
160     }
161   }();
162   implementation(dataMask, buf);
163 }
164 
165 // static
166 template <>
checkPaddingBits(uint64_t dataMask,folly::ByteRange buf)167 bool MathOperation<MathEngine::AUTO>::checkPaddingBits(
168     uint64_t dataMask, folly::ByteRange buf) {
169   // Note: implementation is a function pointer that is initialized to point
170   // at the fastest available implementation the first time this function is
171   // called.
172   static auto implementation = []() {
173     if (MathOperation<MathEngine::AVX2>::isAvailable()) {
174       LOG(INFO) << "Selected AVX2 MathEngine for checkPaddingBits() operation";
175       return MathOperation<MathEngine::AVX2>::checkPaddingBits;
176     } else if (MathOperation<MathEngine::SSE2>::isAvailable()) {
177       LOG(INFO) << "Selected SSE2 MathEngine for checkPaddingBits() operation";
178       return MathOperation<MathEngine::SSE2>::checkPaddingBits;
179     } else {
180       LOG(INFO)
181           << "Selected SIMPLE MathEngine for checkPaddingBits() operation";
182       return MathOperation<MathEngine::SIMPLE>::checkPaddingBits;
183     }
184   }();
185   return implementation(dataMask, buf);
186 }
187 
188 template struct MathOperation<MathEngine::AUTO>;
189 
190 } // namespace detail
191 } // namespace crypto
192 } // namespace folly
193