1 /*
2  * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License").
5  * You may not use this file except in compliance with the License.
6  * A copy of the License is located at
7  *
8  *  http://aws.amazon.com/apache2.0
9  *
10  * or in the "license" file accompanying this file. This file is distributed
11  * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12  * express or implied. See the License for the specific language governing
13  * permissions and limitations under the License.
14  */
15 
16 #include "s2n_pq.h"
17 
18 static bool sikep434r3_asm_enabled = false;
19 
20 /* BIKE Round-3 code supports several levels of optimization */
21 static bool bike_r3_avx2_enabled    = false;
22 static bool bike_r3_avx512_enabled  = false;
23 static bool bike_r3_pclmul_enabled  = false;
24 static bool bike_r3_vpclmul_enabled = false;
25 
26 static bool kyber512r3_avx2_bmi2_enabled = false;
27 
28 #if defined(S2N_CPUID_AVAILABLE)
29 /* https://en.wikipedia.org/wiki/CPUID */
30 #include <cpuid.h>
31 
32 #define PROCESSOR_INFO_AND_FEATURES    1
33 #define EXTENDED_FEATURES_LEAF         7
34 #define EXTENDED_FEATURES_SUBLEAF_ZERO 0
35 
36 /* The cpuid.h header included with older versions of gcc and
37  * clang doesn't include definitions for bit_ADX, bit_BMI2, or
38  * __get_cpuid_count(). */
39 #if !defined(bit_ADX)
40     #define bit_ADX (1 << 19)
41 #endif
42 
43 #if !defined(bit_BMI2)
44     #define bit_BMI2 (1 << 8)
45 #endif
46 
47 /* BIKE related CPU features */
48 #define EBX_BIT_AVX2    (1 << 5)
49 #define EBX_BIT_AVX512  (1 << 16)
50 #define ECX_BIT_VPCLMUL (1 << 10)
51 #define ECX_BIT_PCLMUL  (1 << 1)
52 
s2n_get_cpuid_count(uint32_t leaf,uint32_t sub_leaf,uint32_t * eax,uint32_t * ebx,uint32_t * ecx,uint32_t * edx)53 bool s2n_get_cpuid_count(uint32_t leaf, uint32_t sub_leaf, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) {
54     /* 0x80000000 probes for extended cpuid info */
55     uint32_t max_level = __get_cpuid_max(leaf & 0x80000000, 0);
56 
57     if (max_level == 0 || max_level < leaf) {
58         return false;
59     }
60 
61     __cpuid_count(leaf, sub_leaf, *eax, *ebx, *ecx, *edx);
62     return true;
63 }
64 
65 /* https://en.wikipedia.org/wiki/Bit_manipulation_instruction_set#BMI2_(Bit_Manipulation_Instruction_Set_2) */
s2n_cpu_supports_bmi2()66 bool s2n_cpu_supports_bmi2() {
67     uint32_t eax, ebx, ecx, edx;
68     if (!s2n_get_cpuid_count(EXTENDED_FEATURES_LEAF, EXTENDED_FEATURES_SUBLEAF_ZERO, &eax, &ebx, &ecx, &edx)) {
69         return false;
70     }
71 
72     return (ebx & bit_BMI2);
73 }
74 
75 /* https://en.wikipedia.org/wiki/Intel_ADX */
s2n_cpu_supports_adx()76 bool s2n_cpu_supports_adx() {
77     uint32_t eax, ebx, ecx, edx;
78     if (!s2n_get_cpuid_count(EXTENDED_FEATURES_LEAF, EXTENDED_FEATURES_SUBLEAF_ZERO, &eax, &ebx, &ecx, &edx)) {
79         return false;
80     }
81 
82     return (ebx & bit_ADX);
83 }
84 
s2n_cpu_supports_avx2()85 bool s2n_cpu_supports_avx2() {
86     uint32_t eax, ebx, ecx, edx;
87     if (!s2n_get_cpuid_count(EXTENDED_FEATURES_LEAF, EXTENDED_FEATURES_SUBLEAF_ZERO, &eax, &ebx, &ecx, &edx)) {
88         return false;
89     }
90 
91     return (ebx & EBX_BIT_AVX2);
92 }
93 
s2n_cpu_supports_sikep434r3_asm()94 bool s2n_cpu_supports_sikep434r3_asm() {
95 #if defined(S2N_SIKE_P434_R3_ASM)
96     /* The sikep434r3 assembly code always requires BMI2. If the assembly
97      * was compiled with support for ADX, we also require ADX at runtime. */
98 #if defined(S2N_ADX)
99     return s2n_cpu_supports_bmi2() && s2n_cpu_supports_adx();
100 #else
101     return s2n_cpu_supports_bmi2();
102 #endif
103 #else
104     /* sikep434r3 assembly was not supported at compile time */
105     return false;
106 #endif /* defined(S2N_SIKE_P434_R3_ASM) */
107 }
108 
s2n_cpu_supports_bike_r3_avx2()109 bool s2n_cpu_supports_bike_r3_avx2() {
110 #if defined(S2N_BIKE_R3_AVX2)
111     uint32_t eax, ebx, ecx, edx;
112     if (!s2n_get_cpuid_count(EXTENDED_FEATURES_LEAF, EXTENDED_FEATURES_SUBLEAF_ZERO, &eax, &ebx, &ecx, &edx)) {
113         return false;
114     }
115     return ((ebx & EBX_BIT_AVX2) != 0);
116 #else
117     return false;
118 #endif
119 }
120 
s2n_cpu_supports_bike_r3_avx512()121 bool s2n_cpu_supports_bike_r3_avx512() {
122 #if defined(S2N_BIKE_R3_AVX512)
123     uint32_t eax, ebx, ecx, edx;
124     if (!s2n_get_cpuid_count(EXTENDED_FEATURES_LEAF, EXTENDED_FEATURES_SUBLEAF_ZERO, &eax, &ebx, &ecx, &edx)) {
125         return false;
126     }
127     return ((ebx & EBX_BIT_AVX512) != 0);
128 #else
129     return false;
130 #endif
131 }
132 
s2n_cpu_supports_bike_r3_pclmul()133 bool s2n_cpu_supports_bike_r3_pclmul() {
134 #if defined(S2N_BIKE_R3_PCLMUL)
135     uint32_t eax, ebx, ecx, edx;
136     if (!s2n_get_cpuid_count(PROCESSOR_INFO_AND_FEATURES, EXTENDED_FEATURES_SUBLEAF_ZERO, &eax, &ebx, &ecx, &edx)) {
137         return false;
138     }
139     return ((ecx & ECX_BIT_PCLMUL) != 0);
140 #else
141     return false;
142 #endif
143 }
144 
s2n_cpu_supports_bike_r3_vpclmul()145 bool s2n_cpu_supports_bike_r3_vpclmul() {
146 #if defined(S2N_BIKE_R3_AVX512)
147     uint32_t eax, ebx, ecx, edx;
148     if (!s2n_get_cpuid_count(EXTENDED_FEATURES_LEAF, EXTENDED_FEATURES_SUBLEAF_ZERO, &eax, &ebx, &ecx, &edx)) {
149         return false;
150     }
151     return ((ecx & ECX_BIT_VPCLMUL) != 0);
152 #else
153     return false;
154 #endif
155 }
156 
s2n_cpu_supports_kyber512r3_avx2_bmi2()157 bool s2n_cpu_supports_kyber512r3_avx2_bmi2() {
158 #if defined(S2N_KYBER512R3_AVX2_BMI2)
159     return s2n_cpu_supports_bmi2() && s2n_cpu_supports_avx2();
160 #else
161     return false;
162 #endif
163 }
164 
165 #else /* defined(S2N_CPUID_AVAILABLE) */
166 
167 /* If CPUID is not available, we cannot perform necessary run-time checks. */
s2n_cpu_supports_sikep434r3_asm()168 bool s2n_cpu_supports_sikep434r3_asm() {
169     return false;
170 }
171 
s2n_cpu_supports_bike_r3_avx2()172 bool s2n_cpu_supports_bike_r3_avx2() {
173     return false;
174 }
175 
s2n_cpu_supports_bike_r3_avx512()176 bool s2n_cpu_supports_bike_r3_avx512() {
177     return false;
178 }
179 
s2n_cpu_supports_bike_r3_pclmul()180 bool s2n_cpu_supports_bike_r3_pclmul() {
181     return false;
182 }
183 
s2n_cpu_supports_bike_r3_vpclmul()184 bool s2n_cpu_supports_bike_r3_vpclmul() {
185     return false;
186 }
187 
s2n_cpu_supports_kyber512r3_avx2_bmi2()188 bool s2n_cpu_supports_kyber512r3_avx2_bmi2() {
189     return false;
190 }
191 
192 #endif /* defined(S2N_CPUID_AVAILABLE) */
193 
s2n_sikep434r3_asm_is_enabled()194 bool s2n_sikep434r3_asm_is_enabled() {
195     return sikep434r3_asm_enabled;
196 }
197 
s2n_bike_r3_is_avx2_enabled()198 bool s2n_bike_r3_is_avx2_enabled() {
199     return bike_r3_avx2_enabled;
200 }
201 
s2n_bike_r3_is_avx512_enabled()202 bool s2n_bike_r3_is_avx512_enabled() {
203     return bike_r3_avx512_enabled;
204 }
205 
s2n_bike_r3_is_pclmul_enabled()206 bool s2n_bike_r3_is_pclmul_enabled() {
207     return bike_r3_pclmul_enabled;
208 }
209 
s2n_bike_r3_is_vpclmul_enabled()210 bool s2n_bike_r3_is_vpclmul_enabled() {
211     return bike_r3_vpclmul_enabled;
212 }
213 
s2n_kyber512r3_is_avx2_bmi2_enabled()214 bool s2n_kyber512r3_is_avx2_bmi2_enabled() {
215     return kyber512r3_avx2_bmi2_enabled;
216 }
217 
s2n_pq_is_enabled()218 bool s2n_pq_is_enabled() {
219 #if defined(S2N_NO_PQ)
220     return false;
221 #else
222     return !s2n_is_in_fips_mode();
223 #endif
224 }
225 
s2n_disable_sikep434r3_asm()226 S2N_RESULT s2n_disable_sikep434r3_asm() {
227     sikep434r3_asm_enabled = false;
228     return S2N_RESULT_OK;
229 }
230 
s2n_disable_bike_r3_opt_all()231 S2N_RESULT s2n_disable_bike_r3_opt_all() {
232     bike_r3_avx2_enabled    = false;
233     bike_r3_avx512_enabled  = false;
234     bike_r3_pclmul_enabled  = false;
235     bike_r3_vpclmul_enabled = false;
236     return S2N_RESULT_OK;
237 }
238 
s2n_disable_kyber512r3_opt_avx2_bmi2()239 S2N_RESULT s2n_disable_kyber512r3_opt_avx2_bmi2() {
240     kyber512r3_avx2_bmi2_enabled = false;
241     return S2N_RESULT_OK;
242 }
243 
s2n_try_enable_bike_r3_opt_pclmul()244 S2N_RESULT s2n_try_enable_bike_r3_opt_pclmul() {
245     if (s2n_pq_is_enabled() && s2n_cpu_supports_bike_r3_pclmul()) {
246         bike_r3_pclmul_enabled = true;
247     }
248     return S2N_RESULT_OK;
249 }
250 
s2n_try_enable_bike_r3_opt_avx2()251 S2N_RESULT s2n_try_enable_bike_r3_opt_avx2() {
252     /* When AVX2 is available, PCLMUL is too by default. */
253     RESULT_ENSURE_OK(s2n_try_enable_bike_r3_opt_pclmul(), S2N_ERR_SAFETY);
254     if (s2n_pq_is_enabled() && s2n_cpu_supports_bike_r3_avx2()) {
255         bike_r3_avx2_enabled = true;
256     }
257     return S2N_RESULT_OK;
258 }
259 
s2n_try_enable_bike_r3_opt_avx512()260 S2N_RESULT s2n_try_enable_bike_r3_opt_avx512() {
261     /* When AVX512 is available, AVX2 is too by default. */
262     RESULT_ENSURE_OK(s2n_try_enable_bike_r3_opt_avx2(), S2N_ERR_SAFETY);
263     if (s2n_pq_is_enabled() && s2n_cpu_supports_bike_r3_avx512()) {
264         bike_r3_avx512_enabled = true;
265     }
266     return S2N_RESULT_OK;
267 }
268 
s2n_try_enable_bike_r3_opt_vpclmul()269 S2N_RESULT s2n_try_enable_bike_r3_opt_vpclmul() {
270     RESULT_ENSURE_OK(s2n_try_enable_bike_r3_opt_avx512(), S2N_ERR_SAFETY);
271     /* Only Enable VPCLMUL if AVX512 is also supported. This is to because the BIKE R3 VPCLMUL requires 512-bit version
272      * of VPCLMUL, and not the 256-bit version that is available on AMD Zen 3 processors. */
273     if (s2n_pq_is_enabled() && s2n_cpu_supports_bike_r3_vpclmul() && s2n_bike_r3_is_avx512_enabled()) {
274         bike_r3_vpclmul_enabled = true;
275     }
276     return S2N_RESULT_OK;
277 }
278 
s2n_try_enable_sikep434r3_asm()279 S2N_RESULT s2n_try_enable_sikep434r3_asm() {
280     if (s2n_pq_is_enabled() && s2n_cpu_supports_sikep434r3_asm()) {
281         sikep434r3_asm_enabled = true;
282     }
283     return S2N_RESULT_OK;
284 }
285 
s2n_try_enable_kyber512r3_opt_avx2_bmi2()286 S2N_RESULT s2n_try_enable_kyber512r3_opt_avx2_bmi2() {
287     if (s2n_pq_is_enabled() && s2n_cpu_supports_kyber512r3_avx2_bmi2()) {
288         kyber512r3_avx2_bmi2_enabled = true;
289     }
290     return S2N_RESULT_OK;
291 }
292 
s2n_bike_r3_x86_64_opt_init()293 S2N_RESULT s2n_bike_r3_x86_64_opt_init()
294 {
295     /* try_enable_vpclmul function recursively tries to enable
296      * all the optimizations (avx2, avx512, pclmul, vpclmul),
297      * so it's sufficient to call only this function. */
298     RESULT_ENSURE_OK(s2n_try_enable_bike_r3_opt_vpclmul(), S2N_ERR_SAFETY);
299     return S2N_RESULT_OK;
300 }
301 
s2n_pq_init()302 S2N_RESULT s2n_pq_init() {
303     RESULT_ENSURE_OK(s2n_try_enable_sikep434r3_asm(), S2N_ERR_SAFETY);
304     RESULT_ENSURE_OK(s2n_bike_r3_x86_64_opt_init(), S2N_ERR_SAFETY);
305     RESULT_ENSURE_OK(s2n_try_enable_kyber512r3_opt_avx2_bmi2(), S2N_ERR_SAFETY);
306 
307     return S2N_RESULT_OK;
308 }
309