1 /*
2 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License").
5 * You may not use this file except in compliance with the License.
6 * A copy of the License is located at
7 *
8 * http://aws.amazon.com/apache2.0
9 *
10 * or in the "license" file accompanying this file. This file is distributed
11 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12 * express or implied. See the License for the specific language governing
13 * permissions and limitations under the License.
14 */
15
16 #include "s2n_pq.h"
17
18 static bool sikep434r3_asm_enabled = false;
19
20 /* BIKE Round-3 code supports several levels of optimization */
21 static bool bike_r3_avx2_enabled = false;
22 static bool bike_r3_avx512_enabled = false;
23 static bool bike_r3_pclmul_enabled = false;
24 static bool bike_r3_vpclmul_enabled = false;
25
26 static bool kyber512r3_avx2_bmi2_enabled = false;
27
28 #if defined(S2N_CPUID_AVAILABLE)
29 /* https://en.wikipedia.org/wiki/CPUID */
30 #include <cpuid.h>
31
32 #define PROCESSOR_INFO_AND_FEATURES 1
33 #define EXTENDED_FEATURES_LEAF 7
34 #define EXTENDED_FEATURES_SUBLEAF_ZERO 0
35
36 /* The cpuid.h header included with older versions of gcc and
37 * clang doesn't include definitions for bit_ADX, bit_BMI2, or
38 * __get_cpuid_count(). */
39 #if !defined(bit_ADX)
40 #define bit_ADX (1 << 19)
41 #endif
42
43 #if !defined(bit_BMI2)
44 #define bit_BMI2 (1 << 8)
45 #endif
46
47 /* BIKE related CPU features */
48 #define EBX_BIT_AVX2 (1 << 5)
49 #define EBX_BIT_AVX512 (1 << 16)
50 #define ECX_BIT_VPCLMUL (1 << 10)
51 #define ECX_BIT_PCLMUL (1 << 1)
52
s2n_get_cpuid_count(uint32_t leaf,uint32_t sub_leaf,uint32_t * eax,uint32_t * ebx,uint32_t * ecx,uint32_t * edx)53 bool s2n_get_cpuid_count(uint32_t leaf, uint32_t sub_leaf, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) {
54 /* 0x80000000 probes for extended cpuid info */
55 uint32_t max_level = __get_cpuid_max(leaf & 0x80000000, 0);
56
57 if (max_level == 0 || max_level < leaf) {
58 return false;
59 }
60
61 __cpuid_count(leaf, sub_leaf, *eax, *ebx, *ecx, *edx);
62 return true;
63 }
64
65 /* https://en.wikipedia.org/wiki/Bit_manipulation_instruction_set#BMI2_(Bit_Manipulation_Instruction_Set_2) */
s2n_cpu_supports_bmi2()66 bool s2n_cpu_supports_bmi2() {
67 uint32_t eax, ebx, ecx, edx;
68 if (!s2n_get_cpuid_count(EXTENDED_FEATURES_LEAF, EXTENDED_FEATURES_SUBLEAF_ZERO, &eax, &ebx, &ecx, &edx)) {
69 return false;
70 }
71
72 return (ebx & bit_BMI2);
73 }
74
75 /* https://en.wikipedia.org/wiki/Intel_ADX */
s2n_cpu_supports_adx()76 bool s2n_cpu_supports_adx() {
77 uint32_t eax, ebx, ecx, edx;
78 if (!s2n_get_cpuid_count(EXTENDED_FEATURES_LEAF, EXTENDED_FEATURES_SUBLEAF_ZERO, &eax, &ebx, &ecx, &edx)) {
79 return false;
80 }
81
82 return (ebx & bit_ADX);
83 }
84
s2n_cpu_supports_avx2()85 bool s2n_cpu_supports_avx2() {
86 uint32_t eax, ebx, ecx, edx;
87 if (!s2n_get_cpuid_count(EXTENDED_FEATURES_LEAF, EXTENDED_FEATURES_SUBLEAF_ZERO, &eax, &ebx, &ecx, &edx)) {
88 return false;
89 }
90
91 return (ebx & EBX_BIT_AVX2);
92 }
93
s2n_cpu_supports_sikep434r3_asm()94 bool s2n_cpu_supports_sikep434r3_asm() {
95 #if defined(S2N_SIKE_P434_R3_ASM)
96 /* The sikep434r3 assembly code always requires BMI2. If the assembly
97 * was compiled with support for ADX, we also require ADX at runtime. */
98 #if defined(S2N_ADX)
99 return s2n_cpu_supports_bmi2() && s2n_cpu_supports_adx();
100 #else
101 return s2n_cpu_supports_bmi2();
102 #endif
103 #else
104 /* sikep434r3 assembly was not supported at compile time */
105 return false;
106 #endif /* defined(S2N_SIKE_P434_R3_ASM) */
107 }
108
s2n_cpu_supports_bike_r3_avx2()109 bool s2n_cpu_supports_bike_r3_avx2() {
110 #if defined(S2N_BIKE_R3_AVX2)
111 uint32_t eax, ebx, ecx, edx;
112 if (!s2n_get_cpuid_count(EXTENDED_FEATURES_LEAF, EXTENDED_FEATURES_SUBLEAF_ZERO, &eax, &ebx, &ecx, &edx)) {
113 return false;
114 }
115 return ((ebx & EBX_BIT_AVX2) != 0);
116 #else
117 return false;
118 #endif
119 }
120
s2n_cpu_supports_bike_r3_avx512()121 bool s2n_cpu_supports_bike_r3_avx512() {
122 #if defined(S2N_BIKE_R3_AVX512)
123 uint32_t eax, ebx, ecx, edx;
124 if (!s2n_get_cpuid_count(EXTENDED_FEATURES_LEAF, EXTENDED_FEATURES_SUBLEAF_ZERO, &eax, &ebx, &ecx, &edx)) {
125 return false;
126 }
127 return ((ebx & EBX_BIT_AVX512) != 0);
128 #else
129 return false;
130 #endif
131 }
132
s2n_cpu_supports_bike_r3_pclmul()133 bool s2n_cpu_supports_bike_r3_pclmul() {
134 #if defined(S2N_BIKE_R3_PCLMUL)
135 uint32_t eax, ebx, ecx, edx;
136 if (!s2n_get_cpuid_count(PROCESSOR_INFO_AND_FEATURES, EXTENDED_FEATURES_SUBLEAF_ZERO, &eax, &ebx, &ecx, &edx)) {
137 return false;
138 }
139 return ((ecx & ECX_BIT_PCLMUL) != 0);
140 #else
141 return false;
142 #endif
143 }
144
s2n_cpu_supports_bike_r3_vpclmul()145 bool s2n_cpu_supports_bike_r3_vpclmul() {
146 #if defined(S2N_BIKE_R3_AVX512)
147 uint32_t eax, ebx, ecx, edx;
148 if (!s2n_get_cpuid_count(EXTENDED_FEATURES_LEAF, EXTENDED_FEATURES_SUBLEAF_ZERO, &eax, &ebx, &ecx, &edx)) {
149 return false;
150 }
151 return ((ecx & ECX_BIT_VPCLMUL) != 0);
152 #else
153 return false;
154 #endif
155 }
156
s2n_cpu_supports_kyber512r3_avx2_bmi2()157 bool s2n_cpu_supports_kyber512r3_avx2_bmi2() {
158 #if defined(S2N_KYBER512R3_AVX2_BMI2)
159 return s2n_cpu_supports_bmi2() && s2n_cpu_supports_avx2();
160 #else
161 return false;
162 #endif
163 }
164
165 #else /* defined(S2N_CPUID_AVAILABLE) */
166
167 /* If CPUID is not available, we cannot perform necessary run-time checks. */
s2n_cpu_supports_sikep434r3_asm()168 bool s2n_cpu_supports_sikep434r3_asm() {
169 return false;
170 }
171
s2n_cpu_supports_bike_r3_avx2()172 bool s2n_cpu_supports_bike_r3_avx2() {
173 return false;
174 }
175
s2n_cpu_supports_bike_r3_avx512()176 bool s2n_cpu_supports_bike_r3_avx512() {
177 return false;
178 }
179
s2n_cpu_supports_bike_r3_pclmul()180 bool s2n_cpu_supports_bike_r3_pclmul() {
181 return false;
182 }
183
s2n_cpu_supports_bike_r3_vpclmul()184 bool s2n_cpu_supports_bike_r3_vpclmul() {
185 return false;
186 }
187
s2n_cpu_supports_kyber512r3_avx2_bmi2()188 bool s2n_cpu_supports_kyber512r3_avx2_bmi2() {
189 return false;
190 }
191
192 #endif /* defined(S2N_CPUID_AVAILABLE) */
193
s2n_sikep434r3_asm_is_enabled()194 bool s2n_sikep434r3_asm_is_enabled() {
195 return sikep434r3_asm_enabled;
196 }
197
s2n_bike_r3_is_avx2_enabled()198 bool s2n_bike_r3_is_avx2_enabled() {
199 return bike_r3_avx2_enabled;
200 }
201
s2n_bike_r3_is_avx512_enabled()202 bool s2n_bike_r3_is_avx512_enabled() {
203 return bike_r3_avx512_enabled;
204 }
205
s2n_bike_r3_is_pclmul_enabled()206 bool s2n_bike_r3_is_pclmul_enabled() {
207 return bike_r3_pclmul_enabled;
208 }
209
s2n_bike_r3_is_vpclmul_enabled()210 bool s2n_bike_r3_is_vpclmul_enabled() {
211 return bike_r3_vpclmul_enabled;
212 }
213
s2n_kyber512r3_is_avx2_bmi2_enabled()214 bool s2n_kyber512r3_is_avx2_bmi2_enabled() {
215 return kyber512r3_avx2_bmi2_enabled;
216 }
217
s2n_pq_is_enabled()218 bool s2n_pq_is_enabled() {
219 #if defined(S2N_NO_PQ)
220 return false;
221 #else
222 return !s2n_is_in_fips_mode();
223 #endif
224 }
225
s2n_disable_sikep434r3_asm()226 S2N_RESULT s2n_disable_sikep434r3_asm() {
227 sikep434r3_asm_enabled = false;
228 return S2N_RESULT_OK;
229 }
230
s2n_disable_bike_r3_opt_all()231 S2N_RESULT s2n_disable_bike_r3_opt_all() {
232 bike_r3_avx2_enabled = false;
233 bike_r3_avx512_enabled = false;
234 bike_r3_pclmul_enabled = false;
235 bike_r3_vpclmul_enabled = false;
236 return S2N_RESULT_OK;
237 }
238
s2n_disable_kyber512r3_opt_avx2_bmi2()239 S2N_RESULT s2n_disable_kyber512r3_opt_avx2_bmi2() {
240 kyber512r3_avx2_bmi2_enabled = false;
241 return S2N_RESULT_OK;
242 }
243
s2n_try_enable_bike_r3_opt_pclmul()244 S2N_RESULT s2n_try_enable_bike_r3_opt_pclmul() {
245 if (s2n_pq_is_enabled() && s2n_cpu_supports_bike_r3_pclmul()) {
246 bike_r3_pclmul_enabled = true;
247 }
248 return S2N_RESULT_OK;
249 }
250
s2n_try_enable_bike_r3_opt_avx2()251 S2N_RESULT s2n_try_enable_bike_r3_opt_avx2() {
252 /* When AVX2 is available, PCLMUL is too by default. */
253 RESULT_ENSURE_OK(s2n_try_enable_bike_r3_opt_pclmul(), S2N_ERR_SAFETY);
254 if (s2n_pq_is_enabled() && s2n_cpu_supports_bike_r3_avx2()) {
255 bike_r3_avx2_enabled = true;
256 }
257 return S2N_RESULT_OK;
258 }
259
s2n_try_enable_bike_r3_opt_avx512()260 S2N_RESULT s2n_try_enable_bike_r3_opt_avx512() {
261 /* When AVX512 is available, AVX2 is too by default. */
262 RESULT_ENSURE_OK(s2n_try_enable_bike_r3_opt_avx2(), S2N_ERR_SAFETY);
263 if (s2n_pq_is_enabled() && s2n_cpu_supports_bike_r3_avx512()) {
264 bike_r3_avx512_enabled = true;
265 }
266 return S2N_RESULT_OK;
267 }
268
s2n_try_enable_bike_r3_opt_vpclmul()269 S2N_RESULT s2n_try_enable_bike_r3_opt_vpclmul() {
270 RESULT_ENSURE_OK(s2n_try_enable_bike_r3_opt_avx512(), S2N_ERR_SAFETY);
271 /* Only Enable VPCLMUL if AVX512 is also supported. This is to because the BIKE R3 VPCLMUL requires 512-bit version
272 * of VPCLMUL, and not the 256-bit version that is available on AMD Zen 3 processors. */
273 if (s2n_pq_is_enabled() && s2n_cpu_supports_bike_r3_vpclmul() && s2n_bike_r3_is_avx512_enabled()) {
274 bike_r3_vpclmul_enabled = true;
275 }
276 return S2N_RESULT_OK;
277 }
278
s2n_try_enable_sikep434r3_asm()279 S2N_RESULT s2n_try_enable_sikep434r3_asm() {
280 if (s2n_pq_is_enabled() && s2n_cpu_supports_sikep434r3_asm()) {
281 sikep434r3_asm_enabled = true;
282 }
283 return S2N_RESULT_OK;
284 }
285
s2n_try_enable_kyber512r3_opt_avx2_bmi2()286 S2N_RESULT s2n_try_enable_kyber512r3_opt_avx2_bmi2() {
287 if (s2n_pq_is_enabled() && s2n_cpu_supports_kyber512r3_avx2_bmi2()) {
288 kyber512r3_avx2_bmi2_enabled = true;
289 }
290 return S2N_RESULT_OK;
291 }
292
s2n_bike_r3_x86_64_opt_init()293 S2N_RESULT s2n_bike_r3_x86_64_opt_init()
294 {
295 /* try_enable_vpclmul function recursively tries to enable
296 * all the optimizations (avx2, avx512, pclmul, vpclmul),
297 * so it's sufficient to call only this function. */
298 RESULT_ENSURE_OK(s2n_try_enable_bike_r3_opt_vpclmul(), S2N_ERR_SAFETY);
299 return S2N_RESULT_OK;
300 }
301
s2n_pq_init()302 S2N_RESULT s2n_pq_init() {
303 RESULT_ENSURE_OK(s2n_try_enable_sikep434r3_asm(), S2N_ERR_SAFETY);
304 RESULT_ENSURE_OK(s2n_bike_r3_x86_64_opt_init(), S2N_ERR_SAFETY);
305 RESULT_ENSURE_OK(s2n_try_enable_kyber512r3_opt_avx2_bmi2(), S2N_ERR_SAFETY);
306
307 return S2N_RESULT_OK;
308 }
309