1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (C) 2016 Gvozden Neskovic <neskovic@compeng.uni-frankfurt.de>.
23  */
24 
25 /*
26  * USER API:
27  *
28  * Kernel fpu methods:
29  *	kfpu_allowed()
30  *	kfpu_begin()
31  *	kfpu_end()
32  *	kfpu_init()
33  *	kfpu_fini()
34  *
35  * SIMD support:
36  *
37  * Following functions should be called to determine whether CPU feature
38  * is supported. All functions are usable in kernel and user space.
39  * If a SIMD algorithm is using more than one instruction set
40  * all relevant feature test functions should be called.
41  *
42  * Supported features:
43  *	zfs_sse_available()
44  *	zfs_sse2_available()
45  *	zfs_sse3_available()
46  *	zfs_ssse3_available()
47  *	zfs_sse4_1_available()
48  *	zfs_sse4_2_available()
49  *
50  *	zfs_avx_available()
51  *	zfs_avx2_available()
52  *
53  *	zfs_bmi1_available()
54  *	zfs_bmi2_available()
55  *
56  *	zfs_avx512f_available()
57  *	zfs_avx512cd_available()
58  *	zfs_avx512er_available()
59  *	zfs_avx512pf_available()
60  *	zfs_avx512bw_available()
61  *	zfs_avx512dq_available()
62  *	zfs_avx512vl_available()
63  *	zfs_avx512ifma_available()
64  *	zfs_avx512vbmi_available()
65  *
66  * NOTE(AVX-512VL):	If using AVX-512 instructions with 128Bit registers
67  *			also add zfs_avx512vl_available() to feature check.
68  */
69 
70 #ifndef _LINUX_SIMD_X86_H
71 #define	_LINUX_SIMD_X86_H
72 
73 /* only for __x86 */
74 #if defined(__x86)
75 
76 #include <sys/types.h>
77 #include <asm/cpufeature.h>
78 
79 /*
80  * Disable the WARN_ON_FPU() macro to prevent additional dependencies
81  * when providing the kfpu_* functions.  Relevant warnings are included
82  * as appropriate and are unconditionally enabled.
83  */
84 #if defined(CONFIG_X86_DEBUG_FPU) && !defined(KERNEL_EXPORTS_X86_FPU)
85 #undef CONFIG_X86_DEBUG_FPU
86 #endif
87 
88 #if defined(HAVE_KERNEL_FPU_API_HEADER)
89 #include <asm/fpu/api.h>
90 #include <asm/fpu/internal.h>
91 #else
92 #include <asm/i387.h>
93 #include <asm/xcr.h>
94 #endif
95 
96 /*
97  * The following cases are for kernels which export either the
98  * kernel_fpu_* or __kernel_fpu_* functions.
99  */
100 #if defined(KERNEL_EXPORTS_X86_FPU)
101 
102 #define	kfpu_allowed()		1
103 #define	kfpu_init()		0
104 #define	kfpu_fini()		((void) 0)
105 
106 #if defined(HAVE_UNDERSCORE_KERNEL_FPU)
107 #define	kfpu_begin()		\
108 {				\
109 	preempt_disable();	\
110 	__kernel_fpu_begin();	\
111 }
112 #define	kfpu_end()		\
113 {				\
114 	__kernel_fpu_end();	\
115 	preempt_enable();	\
116 }
117 
118 #elif defined(HAVE_KERNEL_FPU)
119 #define	kfpu_begin()		kernel_fpu_begin()
120 #define	kfpu_end()		kernel_fpu_end()
121 
122 #else
123 /*
124  * This case is unreachable.  When KERNEL_EXPORTS_X86_FPU is defined then
125  * either HAVE_UNDERSCORE_KERNEL_FPU or HAVE_KERNEL_FPU must be defined.
126  */
127 #error "Unreachable kernel configuration"
128 #endif
129 
130 #else /* defined(KERNEL_EXPORTS_X86_FPU) */
131 
132 /*
133  * When the kernel_fpu_* symbols are unavailable then provide our own
134  * versions which allow the FPU to be safely used.
135  */
136 #if defined(HAVE_KERNEL_FPU_INTERNAL)
137 
138 #include <linux/mm.h>
139 
140 extern union fpregs_state **zfs_kfpu_fpregs;
141 
142 /*
143  * Initialize per-cpu variables to store FPU state.
144  */
145 static inline void
146 kfpu_fini(void)
147 {
148 	int cpu;
149 
150 	for_each_possible_cpu(cpu) {
151 		if (zfs_kfpu_fpregs[cpu] != NULL) {
152 			free_pages((unsigned long)zfs_kfpu_fpregs[cpu],
153 			    get_order(sizeof (union fpregs_state)));
154 		}
155 	}
156 
157 	kfree(zfs_kfpu_fpregs);
158 }
159 
160 static inline int
161 kfpu_init(void)
162 {
163 	zfs_kfpu_fpregs = kzalloc(num_possible_cpus() *
164 	    sizeof (union fpregs_state *), GFP_KERNEL);
165 	if (zfs_kfpu_fpregs == NULL)
166 		return (-ENOMEM);
167 
168 	/*
169 	 * The fxsave and xsave operations require 16-/64-byte alignment of
170 	 * the target memory. Since kmalloc() provides no alignment
171 	 * guarantee instead use alloc_pages_node().
172 	 */
173 	unsigned int order = get_order(sizeof (union fpregs_state));
174 	int cpu;
175 
176 	for_each_possible_cpu(cpu) {
177 		struct page *page = alloc_pages_node(cpu_to_node(cpu),
178 		    GFP_KERNEL | __GFP_ZERO, order);
179 		if (page == NULL) {
180 			kfpu_fini();
181 			return (-ENOMEM);
182 		}
183 
184 		zfs_kfpu_fpregs[cpu] = page_address(page);
185 	}
186 
187 	return (0);
188 }
189 
190 #define	kfpu_allowed()		1
191 #define	ex_handler_fprestore	ex_handler_default
192 
193 /*
194  * FPU save and restore instructions.
195  */
196 #define	__asm			__asm__ __volatile__
197 #define	kfpu_fxsave(addr)	__asm("fxsave %0" : "=m" (*(addr)))
198 #define	kfpu_fxsaveq(addr)	__asm("fxsaveq %0" : "=m" (*(addr)))
199 #define	kfpu_fnsave(addr)	__asm("fnsave %0; fwait" : "=m" (*(addr)))
200 #define	kfpu_fxrstor(addr)	__asm("fxrstor %0" : : "m" (*(addr)))
201 #define	kfpu_fxrstorq(addr)	__asm("fxrstorq %0" : : "m" (*(addr)))
202 #define	kfpu_frstor(addr)	__asm("frstor %0" : : "m" (*(addr)))
203 #define	kfpu_fxsr_clean(rval)	__asm("fnclex; emms; fildl %P[addr]" \
204 				    : : [addr] "m" (rval));
205 
206 static inline void
207 kfpu_save_xsave(struct xregs_state *addr, uint64_t mask)
208 {
209 	uint32_t low, hi;
210 	int err;
211 
212 	low = mask;
213 	hi = mask >> 32;
214 	XSTATE_XSAVE(addr, low, hi, err);
215 	WARN_ON_ONCE(err);
216 }
217 
218 static inline void
219 kfpu_save_fxsr(struct fxregs_state *addr)
220 {
221 	if (IS_ENABLED(CONFIG_X86_32))
222 		kfpu_fxsave(addr);
223 	else
224 		kfpu_fxsaveq(addr);
225 }
226 
227 static inline void
228 kfpu_save_fsave(struct fregs_state *addr)
229 {
230 	kfpu_fnsave(addr);
231 }
232 
233 static inline void
234 kfpu_begin(void)
235 {
236 	/*
237 	 * Preemption and interrupts must be disabled for the critical
238 	 * region where the FPU state is being modified.
239 	 */
240 	preempt_disable();
241 	local_irq_disable();
242 
243 	/*
244 	 * The current FPU registers need to be preserved by kfpu_begin()
245 	 * and restored by kfpu_end().  They are stored in a dedicated
246 	 * per-cpu variable, not in the task struct, this allows any user
247 	 * FPU state to be correctly preserved and restored.
248 	 */
249 	union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()];
250 
251 	if (static_cpu_has(X86_FEATURE_XSAVE)) {
252 		kfpu_save_xsave(&state->xsave, ~0);
253 	} else if (static_cpu_has(X86_FEATURE_FXSR)) {
254 		kfpu_save_fxsr(&state->fxsave);
255 	} else {
256 		kfpu_save_fsave(&state->fsave);
257 	}
258 }
259 
260 static inline void
261 kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask)
262 {
263 	uint32_t low, hi;
264 
265 	low = mask;
266 	hi = mask >> 32;
267 	XSTATE_XRESTORE(addr, low, hi);
268 }
269 
270 static inline void
271 kfpu_restore_fxsr(struct fxregs_state *addr)
272 {
273 	/*
274 	 * On AuthenticAMD K7 and K8 processors the fxrstor instruction only
275 	 * restores the _x87 FOP, FIP, and FDP registers when an exception
276 	 * is pending.  Clean the _x87 state to force the restore.
277 	 */
278 	if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK)))
279 		kfpu_fxsr_clean(addr);
280 
281 	if (IS_ENABLED(CONFIG_X86_32)) {
282 		kfpu_fxrstor(addr);
283 	} else {
284 		kfpu_fxrstorq(addr);
285 	}
286 }
287 
288 static inline void
289 kfpu_restore_fsave(struct fregs_state *addr)
290 {
291 	kfpu_frstor(addr);
292 }
293 
294 static inline void
295 kfpu_end(void)
296 {
297 	union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()];
298 
299 	if (static_cpu_has(X86_FEATURE_XSAVE)) {
300 		kfpu_restore_xsave(&state->xsave, ~0);
301 	} else if (static_cpu_has(X86_FEATURE_FXSR)) {
302 		kfpu_restore_fxsr(&state->fxsave);
303 	} else {
304 		kfpu_restore_fsave(&state->fsave);
305 	}
306 
307 	local_irq_enable();
308 	preempt_enable();
309 }
310 
311 #else
312 
313 /*
314  * FPU support is unavailable.
315  */
316 #define	kfpu_allowed()		0
317 #define	kfpu_begin()		do {} while (0)
318 #define	kfpu_end()		do {} while (0)
319 #define	kfpu_init()		0
320 #define	kfpu_fini()		((void) 0)
321 
322 #endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */
323 #endif /* defined(KERNEL_EXPORTS_X86_FPU) */
324 
325 /*
326  * Linux kernel provides an interface for CPU feature testing.
327  */
328 
329 /*
330  * Detect register set support
331  */
332 static inline boolean_t
333 __simd_state_enabled(const uint64_t state)
334 {
335 	boolean_t has_osxsave;
336 	uint64_t xcr0;
337 
338 #if defined(X86_FEATURE_OSXSAVE)
339 	has_osxsave = !!boot_cpu_has(X86_FEATURE_OSXSAVE);
340 #else
341 	has_osxsave = B_FALSE;
342 #endif
343 	if (!has_osxsave)
344 		return (B_FALSE);
345 
346 	xcr0 = xgetbv(0);
347 	return ((xcr0 & state) == state);
348 }
349 
350 #define	_XSTATE_SSE_AVX		(0x2 | 0x4)
351 #define	_XSTATE_AVX512		(0xE0 | _XSTATE_SSE_AVX)
352 
353 #define	__ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX)
354 #define	__zmm_enabled() __simd_state_enabled(_XSTATE_AVX512)
355 
356 /*
357  * Check if SSE instruction set is available
358  */
359 static inline boolean_t
360 zfs_sse_available(void)
361 {
362 	return (!!boot_cpu_has(X86_FEATURE_XMM));
363 }
364 
365 /*
366  * Check if SSE2 instruction set is available
367  */
368 static inline boolean_t
369 zfs_sse2_available(void)
370 {
371 	return (!!boot_cpu_has(X86_FEATURE_XMM2));
372 }
373 
374 /*
375  * Check if SSE3 instruction set is available
376  */
377 static inline boolean_t
378 zfs_sse3_available(void)
379 {
380 	return (!!boot_cpu_has(X86_FEATURE_XMM3));
381 }
382 
383 /*
384  * Check if SSSE3 instruction set is available
385  */
386 static inline boolean_t
387 zfs_ssse3_available(void)
388 {
389 	return (!!boot_cpu_has(X86_FEATURE_SSSE3));
390 }
391 
392 /*
393  * Check if SSE4.1 instruction set is available
394  */
395 static inline boolean_t
396 zfs_sse4_1_available(void)
397 {
398 	return (!!boot_cpu_has(X86_FEATURE_XMM4_1));
399 }
400 
401 /*
402  * Check if SSE4.2 instruction set is available
403  */
404 static inline boolean_t
405 zfs_sse4_2_available(void)
406 {
407 	return (!!boot_cpu_has(X86_FEATURE_XMM4_2));
408 }
409 
410 /*
411  * Check if AVX instruction set is available
412  */
413 static inline boolean_t
414 zfs_avx_available(void)
415 {
416 	return (boot_cpu_has(X86_FEATURE_AVX) && __ymm_enabled());
417 }
418 
419 /*
420  * Check if AVX2 instruction set is available
421  */
422 static inline boolean_t
423 zfs_avx2_available(void)
424 {
425 	return (boot_cpu_has(X86_FEATURE_AVX2) && __ymm_enabled());
426 }
427 
428 /*
429  * Check if BMI1 instruction set is available
430  */
431 static inline boolean_t
432 zfs_bmi1_available(void)
433 {
434 #if defined(X86_FEATURE_BMI1)
435 	return (!!boot_cpu_has(X86_FEATURE_BMI1));
436 #else
437 	return (B_FALSE);
438 #endif
439 }
440 
441 /*
442  * Check if BMI2 instruction set is available
443  */
444 static inline boolean_t
445 zfs_bmi2_available(void)
446 {
447 #if defined(X86_FEATURE_BMI2)
448 	return (!!boot_cpu_has(X86_FEATURE_BMI2));
449 #else
450 	return (B_FALSE);
451 #endif
452 }
453 
454 /*
455  * Check if AES instruction set is available
456  */
457 static inline boolean_t
458 zfs_aes_available(void)
459 {
460 #if defined(X86_FEATURE_AES)
461 	return (!!boot_cpu_has(X86_FEATURE_AES));
462 #else
463 	return (B_FALSE);
464 #endif
465 }
466 
467 /*
468  * Check if PCLMULQDQ instruction set is available
469  */
470 static inline boolean_t
471 zfs_pclmulqdq_available(void)
472 {
473 #if defined(X86_FEATURE_PCLMULQDQ)
474 	return (!!boot_cpu_has(X86_FEATURE_PCLMULQDQ));
475 #else
476 	return (B_FALSE);
477 #endif
478 }
479 
480 /*
481  * Check if MOVBE instruction is available
482  */
483 static inline boolean_t
484 zfs_movbe_available(void)
485 {
486 #if defined(X86_FEATURE_MOVBE)
487 	return (!!boot_cpu_has(X86_FEATURE_MOVBE));
488 #else
489 	return (B_FALSE);
490 #endif
491 }
492 
493 /*
494  * AVX-512 family of instruction sets:
495  *
496  * AVX512F	Foundation
497  * AVX512CD	Conflict Detection Instructions
498  * AVX512ER	Exponential and Reciprocal Instructions
499  * AVX512PF	Prefetch Instructions
500  *
501  * AVX512BW	Byte and Word Instructions
502  * AVX512DQ	Double-word and Quadword Instructions
503  * AVX512VL	Vector Length Extensions
504  *
505  * AVX512IFMA	Integer Fused Multiply Add (Not supported by kernel 4.4)
506  * AVX512VBMI	Vector Byte Manipulation Instructions
507  */
508 
509 /*
510  * Check if AVX512F instruction set is available
511  */
512 static inline boolean_t
513 zfs_avx512f_available(void)
514 {
515 	boolean_t has_avx512 = B_FALSE;
516 
517 #if defined(X86_FEATURE_AVX512F)
518 	has_avx512 = !!boot_cpu_has(X86_FEATURE_AVX512F);
519 #endif
520 	return (has_avx512 && __zmm_enabled());
521 }
522 
523 /*
524  * Check if AVX512CD instruction set is available
525  */
526 static inline boolean_t
527 zfs_avx512cd_available(void)
528 {
529 	boolean_t has_avx512 = B_FALSE;
530 
531 #if defined(X86_FEATURE_AVX512CD)
532 	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
533 	    boot_cpu_has(X86_FEATURE_AVX512CD);
534 #endif
535 	return (has_avx512 && __zmm_enabled());
536 }
537 
538 /*
539  * Check if AVX512ER instruction set is available
540  */
541 static inline boolean_t
542 zfs_avx512er_available(void)
543 {
544 	boolean_t has_avx512 = B_FALSE;
545 
546 #if defined(X86_FEATURE_AVX512ER)
547 	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
548 	    boot_cpu_has(X86_FEATURE_AVX512ER);
549 #endif
550 	return (has_avx512 && __zmm_enabled());
551 }
552 
553 /*
554  * Check if AVX512PF instruction set is available
555  */
556 static inline boolean_t
557 zfs_avx512pf_available(void)
558 {
559 	boolean_t has_avx512 = B_FALSE;
560 
561 #if defined(X86_FEATURE_AVX512PF)
562 	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
563 	    boot_cpu_has(X86_FEATURE_AVX512PF);
564 #endif
565 	return (has_avx512 && __zmm_enabled());
566 }
567 
568 /*
569  * Check if AVX512BW instruction set is available
570  */
571 static inline boolean_t
572 zfs_avx512bw_available(void)
573 {
574 	boolean_t has_avx512 = B_FALSE;
575 
576 #if defined(X86_FEATURE_AVX512BW)
577 	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
578 	    boot_cpu_has(X86_FEATURE_AVX512BW);
579 #endif
580 
581 	return (has_avx512 && __zmm_enabled());
582 }
583 
584 /*
585  * Check if AVX512DQ instruction set is available
586  */
587 static inline boolean_t
588 zfs_avx512dq_available(void)
589 {
590 	boolean_t has_avx512 = B_FALSE;
591 
592 #if defined(X86_FEATURE_AVX512DQ)
593 	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
594 	    boot_cpu_has(X86_FEATURE_AVX512DQ);
595 #endif
596 	return (has_avx512 && __zmm_enabled());
597 }
598 
599 /*
600  * Check if AVX512VL instruction set is available
601  */
602 static inline boolean_t
603 zfs_avx512vl_available(void)
604 {
605 	boolean_t has_avx512 = B_FALSE;
606 
607 #if defined(X86_FEATURE_AVX512VL)
608 	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
609 	    boot_cpu_has(X86_FEATURE_AVX512VL);
610 #endif
611 	return (has_avx512 && __zmm_enabled());
612 }
613 
614 /*
615  * Check if AVX512IFMA instruction set is available
616  */
617 static inline boolean_t
618 zfs_avx512ifma_available(void)
619 {
620 	boolean_t has_avx512 = B_FALSE;
621 
622 #if defined(X86_FEATURE_AVX512IFMA)
623 	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
624 	    boot_cpu_has(X86_FEATURE_AVX512IFMA);
625 #endif
626 	return (has_avx512 && __zmm_enabled());
627 }
628 
629 /*
630  * Check if AVX512VBMI instruction set is available
631  */
632 static inline boolean_t
633 zfs_avx512vbmi_available(void)
634 {
635 	boolean_t has_avx512 = B_FALSE;
636 
637 #if defined(X86_FEATURE_AVX512VBMI)
638 	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
639 	    boot_cpu_has(X86_FEATURE_AVX512VBMI);
640 #endif
641 	return (has_avx512 && __zmm_enabled());
642 }
643 
644 #endif /* defined(__x86) */
645 
646 #endif /* _LINUX_SIMD_X86_H */
647