1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
25  */
26 
27 #ifndef _LIBSPL_SYS_SIMD_H
28 #define	_LIBSPL_SYS_SIMD_H
29 
30 #include <sys/isa_defs.h>
31 #include <sys/types.h>
32 
33 /* including <sys/auxv.h> clashes with AT_UID and others */
34 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__)
35 #if defined(__FreeBSD__)
36 #define	AT_HWCAP	25
37 #define	AT_HWCAP2	26
38 extern int elf_aux_info(int aux, void *buf, int buflen);
39 static inline unsigned long getauxval(unsigned long key)
40 {
41 	unsigned long val = 0UL;
42 
43 	if (elf_aux_info((int)key, &val, sizeof (val)) != 0)
44 		return (0UL);
45 
46 	return (val);
47 }
48 #elif defined(__linux__)
49 #define	AT_HWCAP	16
50 #define	AT_HWCAP2	26
51 extern unsigned long getauxval(unsigned long type);
52 #endif /* __linux__ */
53 #endif /* arm || aarch64 || powerpc */
54 
55 #if defined(__x86)
56 #include <cpuid.h>
57 
58 #define	kfpu_allowed()		1
59 #define	kfpu_begin()		do {} while (0)
60 #define	kfpu_end()		do {} while (0)
61 #define	kfpu_init()		0
62 #define	kfpu_fini()		((void) 0)
63 
64 /*
65  * CPUID feature tests for user-space.
66  *
67  * x86 registers used implicitly by CPUID
68  */
69 typedef enum cpuid_regs {
70 	EAX = 0,
71 	EBX,
72 	ECX,
73 	EDX,
74 	CPUID_REG_CNT = 4
75 } cpuid_regs_t;
76 
77 /*
78  * List of instruction sets identified by CPUID
79  */
80 typedef enum cpuid_inst_sets {
81 	SSE = 0,
82 	SSE2,
83 	SSE3,
84 	SSSE3,
85 	SSE4_1,
86 	SSE4_2,
87 	OSXSAVE,
88 	AVX,
89 	AVX2,
90 	BMI1,
91 	BMI2,
92 	AVX512F,
93 	AVX512CD,
94 	AVX512DQ,
95 	AVX512BW,
96 	AVX512IFMA,
97 	AVX512VBMI,
98 	AVX512PF,
99 	AVX512ER,
100 	AVX512VL,
101 	AES,
102 	PCLMULQDQ,
103 	MOVBE,
104 	SHA_NI
105 } cpuid_inst_sets_t;
106 
107 /*
108  * Instruction set descriptor.
109  */
110 typedef struct cpuid_feature_desc {
111 	uint32_t leaf;		/* CPUID leaf */
112 	uint32_t subleaf;	/* CPUID sub-leaf */
113 	uint32_t flag;		/* bit mask of the feature */
114 	cpuid_regs_t reg;	/* which CPUID return register to test */
115 } cpuid_feature_desc_t;
116 
117 #define	_AVX512F_BIT		(1U << 16)
118 #define	_AVX512CD_BIT		(_AVX512F_BIT | (1U << 28))
119 #define	_AVX512DQ_BIT		(_AVX512F_BIT | (1U << 17))
120 #define	_AVX512BW_BIT		(_AVX512F_BIT | (1U << 30))
121 #define	_AVX512IFMA_BIT		(_AVX512F_BIT | (1U << 21))
122 #define	_AVX512VBMI_BIT		(1U << 1) /* AVX512F_BIT is on another leaf  */
123 #define	_AVX512PF_BIT		(_AVX512F_BIT | (1U << 26))
124 #define	_AVX512ER_BIT		(_AVX512F_BIT | (1U << 27))
125 #define	_AVX512VL_BIT		(1U << 31) /* if used also check other levels */
126 #define	_AES_BIT		(1U << 25)
127 #define	_PCLMULQDQ_BIT		(1U << 1)
128 #define	_MOVBE_BIT		(1U << 22)
129 #define	_SHA_NI_BIT		(1U << 29)
130 
131 /*
132  * Descriptions of supported instruction sets
133  */
134 static const cpuid_feature_desc_t cpuid_features[] = {
135 	[SSE]		= {1U, 0U,	1U << 25,	EDX	},
136 	[SSE2]		= {1U, 0U,	1U << 26,	EDX	},
137 	[SSE3]		= {1U, 0U,	1U << 0,	ECX	},
138 	[SSSE3]		= {1U, 0U,	1U << 9,	ECX	},
139 	[SSE4_1]	= {1U, 0U,	1U << 19,	ECX	},
140 	[SSE4_2]	= {1U, 0U,	1U << 20,	ECX	},
141 	[OSXSAVE]	= {1U, 0U,	1U << 27,	ECX	},
142 	[AVX]		= {1U, 0U,	1U << 28,	ECX	},
143 	[AVX2]		= {7U, 0U,	1U << 5,	EBX	},
144 	[BMI1]		= {7U, 0U,	1U << 3,	EBX	},
145 	[BMI2]		= {7U, 0U,	1U << 8,	EBX	},
146 	[AVX512F]	= {7U, 0U, _AVX512F_BIT,	EBX	},
147 	[AVX512CD]	= {7U, 0U, _AVX512CD_BIT,	EBX	},
148 	[AVX512DQ]	= {7U, 0U, _AVX512DQ_BIT,	EBX	},
149 	[AVX512BW]	= {7U, 0U, _AVX512BW_BIT,	EBX	},
150 	[AVX512IFMA]	= {7U, 0U, _AVX512IFMA_BIT,	EBX	},
151 	[AVX512VBMI]	= {7U, 0U, _AVX512VBMI_BIT,	ECX	},
152 	[AVX512PF]	= {7U, 0U, _AVX512PF_BIT,	EBX	},
153 	[AVX512ER]	= {7U, 0U, _AVX512ER_BIT,	EBX	},
154 	[AVX512VL]	= {7U, 0U, _AVX512ER_BIT,	EBX	},
155 	[AES]		= {1U, 0U, _AES_BIT,		ECX	},
156 	[PCLMULQDQ]	= {1U, 0U, _PCLMULQDQ_BIT,	ECX	},
157 	[MOVBE]		= {1U, 0U, _MOVBE_BIT,		ECX	},
158 	[SHA_NI]	= {7U, 0U, _SHA_NI_BIT,		EBX	},
159 };
160 
161 /*
162  * Check if OS supports AVX and AVX2 by checking XCR0
163  * Only call this function if CPUID indicates that AVX feature is
164  * supported by the CPU, otherwise it might be an illegal instruction.
165  */
166 static inline uint64_t
167 xgetbv(uint32_t index)
168 {
169 	uint32_t eax, edx;
170 	/* xgetbv - instruction byte code */
171 	__asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0"
172 	    : "=a" (eax), "=d" (edx)
173 	    : "c" (index));
174 
175 	return ((((uint64_t)edx)<<32) | (uint64_t)eax);
176 }
177 
178 /*
179  * Check if CPU supports a feature
180  */
181 static inline boolean_t
182 __cpuid_check_feature(const cpuid_feature_desc_t *desc)
183 {
184 	uint32_t r[CPUID_REG_CNT];
185 
186 	if (__get_cpuid_max(0, NULL) >= desc->leaf) {
187 		/*
188 		 * __cpuid_count is needed to properly check
189 		 * for AVX2. It is a macro, so return parameters
190 		 * are passed by value.
191 		 */
192 		__cpuid_count(desc->leaf, desc->subleaf,
193 		    r[EAX], r[EBX], r[ECX], r[EDX]);
194 		return ((r[desc->reg] & desc->flag) == desc->flag);
195 	}
196 	return (B_FALSE);
197 }
198 
199 #define	CPUID_FEATURE_CHECK(name, id)				\
200 static inline boolean_t						\
201 __cpuid_has_ ## name(void)					\
202 {								\
203 	return (__cpuid_check_feature(&cpuid_features[id]));	\
204 }
205 
206 /*
207  * Define functions for user-space CPUID features testing
208  */
209 CPUID_FEATURE_CHECK(sse, SSE);
210 CPUID_FEATURE_CHECK(sse2, SSE2);
211 CPUID_FEATURE_CHECK(sse3, SSE3);
212 CPUID_FEATURE_CHECK(ssse3, SSSE3);
213 CPUID_FEATURE_CHECK(sse4_1, SSE4_1);
214 CPUID_FEATURE_CHECK(sse4_2, SSE4_2);
215 CPUID_FEATURE_CHECK(avx, AVX);
216 CPUID_FEATURE_CHECK(avx2, AVX2);
217 CPUID_FEATURE_CHECK(osxsave, OSXSAVE);
218 CPUID_FEATURE_CHECK(bmi1, BMI1);
219 CPUID_FEATURE_CHECK(bmi2, BMI2);
220 CPUID_FEATURE_CHECK(avx512f, AVX512F);
221 CPUID_FEATURE_CHECK(avx512cd, AVX512CD);
222 CPUID_FEATURE_CHECK(avx512dq, AVX512DQ);
223 CPUID_FEATURE_CHECK(avx512bw, AVX512BW);
224 CPUID_FEATURE_CHECK(avx512ifma, AVX512IFMA);
225 CPUID_FEATURE_CHECK(avx512vbmi, AVX512VBMI);
226 CPUID_FEATURE_CHECK(avx512pf, AVX512PF);
227 CPUID_FEATURE_CHECK(avx512er, AVX512ER);
228 CPUID_FEATURE_CHECK(avx512vl, AVX512VL);
229 CPUID_FEATURE_CHECK(aes, AES);
230 CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ);
231 CPUID_FEATURE_CHECK(movbe, MOVBE);
232 CPUID_FEATURE_CHECK(shani, SHA_NI);
233 
234 /*
235  * Detect register set support
236  */
237 static inline boolean_t
238 __simd_state_enabled(const uint64_t state)
239 {
240 	boolean_t has_osxsave;
241 	uint64_t xcr0;
242 
243 	has_osxsave = __cpuid_has_osxsave();
244 	if (!has_osxsave)
245 		return (B_FALSE);
246 
247 	xcr0 = xgetbv(0);
248 	return ((xcr0 & state) == state);
249 }
250 
251 #define	_XSTATE_SSE_AVX		(0x2 | 0x4)
252 #define	_XSTATE_AVX512		(0xE0 | _XSTATE_SSE_AVX)
253 
254 #define	__ymm_enabled()		__simd_state_enabled(_XSTATE_SSE_AVX)
255 #define	__zmm_enabled()		__simd_state_enabled(_XSTATE_AVX512)
256 
257 /*
258  * Check if SSE instruction set is available
259  */
260 static inline boolean_t
261 zfs_sse_available(void)
262 {
263 	return (__cpuid_has_sse());
264 }
265 
266 /*
267  * Check if SSE2 instruction set is available
268  */
269 static inline boolean_t
270 zfs_sse2_available(void)
271 {
272 	return (__cpuid_has_sse2());
273 }
274 
275 /*
276  * Check if SSE3 instruction set is available
277  */
278 static inline boolean_t
279 zfs_sse3_available(void)
280 {
281 	return (__cpuid_has_sse3());
282 }
283 
284 /*
285  * Check if SSSE3 instruction set is available
286  */
287 static inline boolean_t
288 zfs_ssse3_available(void)
289 {
290 	return (__cpuid_has_ssse3());
291 }
292 
293 /*
294  * Check if SSE4.1 instruction set is available
295  */
296 static inline boolean_t
297 zfs_sse4_1_available(void)
298 {
299 	return (__cpuid_has_sse4_1());
300 }
301 
302 /*
303  * Check if SSE4.2 instruction set is available
304  */
305 static inline boolean_t
306 zfs_sse4_2_available(void)
307 {
308 	return (__cpuid_has_sse4_2());
309 }
310 
311 /*
312  * Check if AVX instruction set is available
313  */
314 static inline boolean_t
315 zfs_avx_available(void)
316 {
317 	return (__cpuid_has_avx() && __ymm_enabled());
318 }
319 
320 /*
321  * Check if AVX2 instruction set is available
322  */
323 static inline boolean_t
324 zfs_avx2_available(void)
325 {
326 	return (__cpuid_has_avx2() && __ymm_enabled());
327 }
328 
329 /*
330  * Check if BMI1 instruction set is available
331  */
332 static inline boolean_t
333 zfs_bmi1_available(void)
334 {
335 	return (__cpuid_has_bmi1());
336 }
337 
338 /*
339  * Check if BMI2 instruction set is available
340  */
341 static inline boolean_t
342 zfs_bmi2_available(void)
343 {
344 	return (__cpuid_has_bmi2());
345 }
346 
347 /*
348  * Check if AES instruction set is available
349  */
350 static inline boolean_t
351 zfs_aes_available(void)
352 {
353 	return (__cpuid_has_aes());
354 }
355 
356 /*
357  * Check if PCLMULQDQ instruction set is available
358  */
359 static inline boolean_t
360 zfs_pclmulqdq_available(void)
361 {
362 	return (__cpuid_has_pclmulqdq());
363 }
364 
365 /*
366  * Check if MOVBE instruction is available
367  */
368 static inline boolean_t
369 zfs_movbe_available(void)
370 {
371 	return (__cpuid_has_movbe());
372 }
373 
374 /*
375  * Check if SHA_NI instruction is available
376  */
377 static inline boolean_t
378 zfs_shani_available(void)
379 {
380 	return (__cpuid_has_shani());
381 }
382 
383 /*
384  * AVX-512 family of instruction sets:
385  *
386  * AVX512F	Foundation
387  * AVX512CD	Conflict Detection Instructions
388  * AVX512ER	Exponential and Reciprocal Instructions
389  * AVX512PF	Prefetch Instructions
390  *
391  * AVX512BW	Byte and Word Instructions
392  * AVX512DQ	Double-word and Quadword Instructions
393  * AVX512VL	Vector Length Extensions
394  *
395  * AVX512IFMA	Integer Fused Multiply Add (Not supported by kernel 4.4)
396  * AVX512VBMI	Vector Byte Manipulation Instructions
397  */
398 
399 /*
400  * Check if AVX512F instruction set is available
401  */
402 static inline boolean_t
403 zfs_avx512f_available(void)
404 {
405 	return (__cpuid_has_avx512f() && __zmm_enabled());
406 }
407 
408 /*
409  * Check if AVX512CD instruction set is available
410  */
411 static inline boolean_t
412 zfs_avx512cd_available(void)
413 {
414 	return (__cpuid_has_avx512cd() && __zmm_enabled());
415 }
416 
417 /*
418  * Check if AVX512ER instruction set is available
419  */
420 static inline boolean_t
421 zfs_avx512er_available(void)
422 {
423 	return (__cpuid_has_avx512er() && __zmm_enabled());
424 }
425 
426 /*
427  * Check if AVX512PF instruction set is available
428  */
429 static inline boolean_t
430 zfs_avx512pf_available(void)
431 {
432 	return (__cpuid_has_avx512pf() && __zmm_enabled());
433 }
434 
435 /*
436  * Check if AVX512BW instruction set is available
437  */
438 static inline boolean_t
439 zfs_avx512bw_available(void)
440 {
441 	return (__cpuid_has_avx512bw() && __zmm_enabled());
442 }
443 
444 /*
445  * Check if AVX512DQ instruction set is available
446  */
447 static inline boolean_t
448 zfs_avx512dq_available(void)
449 {
450 	return (__cpuid_has_avx512dq() && __zmm_enabled());
451 }
452 
453 /*
454  * Check if AVX512VL instruction set is available
455  */
456 static inline boolean_t
457 zfs_avx512vl_available(void)
458 {
459 	return (__cpuid_has_avx512vl() && __zmm_enabled());
460 }
461 
462 /*
463  * Check if AVX512IFMA instruction set is available
464  */
465 static inline boolean_t
466 zfs_avx512ifma_available(void)
467 {
468 	return (__cpuid_has_avx512ifma() && __zmm_enabled());
469 }
470 
471 /*
472  * Check if AVX512VBMI instruction set is available
473  */
474 static inline boolean_t
475 zfs_avx512vbmi_available(void)
476 {
477 	return (__cpuid_has_avx512f() && __cpuid_has_avx512vbmi() &&
478 	    __zmm_enabled());
479 }
480 
481 #elif defined(__arm__)
482 
483 #define	kfpu_allowed()		1
484 #define	kfpu_initialize(tsk)	do {} while (0)
485 #define	kfpu_begin()		do {} while (0)
486 #define	kfpu_end()		do {} while (0)
487 
488 #define	HWCAP_NEON		0x00001000
489 #define	HWCAP2_SHA2		0x00000008
490 
491 /*
492  * Check if NEON is available
493  */
494 static inline boolean_t
495 zfs_neon_available(void)
496 {
497 	unsigned long hwcap = getauxval(AT_HWCAP);
498 	return (hwcap & HWCAP_NEON);
499 }
500 
501 /*
502  * Check if SHA2 is available
503  */
504 static inline boolean_t
505 zfs_sha256_available(void)
506 {
507 	unsigned long hwcap = getauxval(AT_HWCAP);
508 	return (hwcap & HWCAP2_SHA2);
509 }
510 
511 #elif defined(__aarch64__)
512 
513 #define	kfpu_allowed()		1
514 #define	kfpu_initialize(tsk)	do {} while (0)
515 #define	kfpu_begin()		do {} while (0)
516 #define	kfpu_end()		do {} while (0)
517 
518 #define	HWCAP_FP		0x00000001
519 #define	HWCAP_SHA2		0x00000040
520 #define	HWCAP_SHA512		0x00200000
521 
522 /*
523  * Check if NEON is available
524  */
525 static inline boolean_t
526 zfs_neon_available(void)
527 {
528 	unsigned long hwcap = getauxval(AT_HWCAP);
529 	return (hwcap & HWCAP_FP);
530 }
531 
532 /*
533  * Check if SHA2 is available
534  */
535 static inline boolean_t
536 zfs_sha256_available(void)
537 {
538 	unsigned long hwcap = getauxval(AT_HWCAP);
539 	return (hwcap & HWCAP_SHA2);
540 }
541 
542 /*
543  * Check if SHA512 is available
544  */
545 static inline boolean_t
546 zfs_sha512_available(void)
547 {
548 	unsigned long hwcap = getauxval(AT_HWCAP);
549 	return (hwcap & HWCAP_SHA512);
550 }
551 
552 #elif defined(__powerpc__)
553 
554 #define	kfpu_allowed()		1
555 #define	kfpu_initialize(tsk)	do {} while (0)
556 #define	kfpu_begin()		do {} while (0)
557 #define	kfpu_end()		do {} while (0)
558 
559 #define	PPC_FEATURE_HAS_ALTIVEC	0x10000000
560 #define	PPC_FEATURE_HAS_VSX	0x00000080
561 #define	PPC_FEATURE2_ARCH_2_07	0x80000000
562 
563 static inline boolean_t
564 zfs_altivec_available(void)
565 {
566 	unsigned long hwcap = getauxval(AT_HWCAP);
567 	return (hwcap & PPC_FEATURE_HAS_ALTIVEC);
568 }
569 
570 static inline boolean_t
571 zfs_vsx_available(void)
572 {
573 	unsigned long hwcap = getauxval(AT_HWCAP);
574 	return (hwcap & PPC_FEATURE_HAS_VSX);
575 }
576 
577 static inline boolean_t
578 zfs_isa207_available(void)
579 {
580 	unsigned long hwcap = getauxval(AT_HWCAP);
581 	unsigned long hwcap2 = getauxval(AT_HWCAP2);
582 	return ((hwcap & PPC_FEATURE_HAS_VSX) &&
583 	    (hwcap2 & PPC_FEATURE2_ARCH_2_07));
584 }
585 
586 #else
587 
588 #define	kfpu_allowed()		0
589 #define	kfpu_initialize(tsk)	do {} while (0)
590 #define	kfpu_begin()		do {} while (0)
591 #define	kfpu_end()		do {} while (0)
592 
593 #endif
594 
595 #endif /* _LIBSPL_SYS_SIMD_H */
596