1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (C) 2016 Gvozden Neskovic <neskovic@compeng.uni-frankfurt.de>. 23 */ 24 25 /* 26 * USER API: 27 * 28 * Kernel fpu methods: 29 * kfpu_allowed() 30 * kfpu_begin() 31 * kfpu_end() 32 * kfpu_init() 33 * kfpu_fini() 34 * 35 * SIMD support: 36 * 37 * Following functions should be called to determine whether CPU feature 38 * is supported. All functions are usable in kernel and user space. 39 * If a SIMD algorithm is using more than one instruction set 40 * all relevant feature test functions should be called. 41 * 42 * Supported features: 43 * zfs_sse_available() 44 * zfs_sse2_available() 45 * zfs_sse3_available() 46 * zfs_ssse3_available() 47 * zfs_sse4_1_available() 48 * zfs_sse4_2_available() 49 * 50 * zfs_avx_available() 51 * zfs_avx2_available() 52 * 53 * zfs_bmi1_available() 54 * zfs_bmi2_available() 55 * 56 * zfs_avx512f_available() 57 * zfs_avx512cd_available() 58 * zfs_avx512er_available() 59 * zfs_avx512pf_available() 60 * zfs_avx512bw_available() 61 * zfs_avx512dq_available() 62 * zfs_avx512vl_available() 63 * zfs_avx512ifma_available() 64 * zfs_avx512vbmi_available() 65 * 66 * NOTE(AVX-512VL): If using AVX-512 instructions with 128Bit registers 67 * also add zfs_avx512vl_available() to feature check. 68 */ 69 70 #ifndef _LINUX_SIMD_X86_H 71 #define _LINUX_SIMD_X86_H 72 73 /* only for __x86 */ 74 #if defined(__x86) 75 76 #include <sys/types.h> 77 #include <asm/cpufeature.h> 78 79 /* 80 * Disable the WARN_ON_FPU() macro to prevent additional dependencies 81 * when providing the kfpu_* functions. Relevant warnings are included 82 * as appropriate and are unconditionally enabled. 83 */ 84 #if defined(CONFIG_X86_DEBUG_FPU) && !defined(KERNEL_EXPORTS_X86_FPU) 85 #undef CONFIG_X86_DEBUG_FPU 86 #endif 87 88 #if defined(HAVE_KERNEL_FPU_API_HEADER) 89 #include <asm/fpu/api.h> 90 #include <asm/fpu/internal.h> 91 #else 92 #include <asm/i387.h> 93 #include <asm/xcr.h> 94 #endif 95 96 /* 97 * The following cases are for kernels which export either the 98 * kernel_fpu_* or __kernel_fpu_* functions. 99 */ 100 #if defined(KERNEL_EXPORTS_X86_FPU) 101 102 #define kfpu_allowed() 1 103 #define kfpu_init() 0 104 #define kfpu_fini() ((void) 0) 105 106 #if defined(HAVE_UNDERSCORE_KERNEL_FPU) 107 #define kfpu_begin() \ 108 { \ 109 preempt_disable(); \ 110 __kernel_fpu_begin(); \ 111 } 112 #define kfpu_end() \ 113 { \ 114 __kernel_fpu_end(); \ 115 preempt_enable(); \ 116 } 117 118 #elif defined(HAVE_KERNEL_FPU) 119 #define kfpu_begin() kernel_fpu_begin() 120 #define kfpu_end() kernel_fpu_end() 121 122 #else 123 /* 124 * This case is unreachable. When KERNEL_EXPORTS_X86_FPU is defined then 125 * either HAVE_UNDERSCORE_KERNEL_FPU or HAVE_KERNEL_FPU must be defined. 126 */ 127 #error "Unreachable kernel configuration" 128 #endif 129 130 #else /* defined(KERNEL_EXPORTS_X86_FPU) */ 131 132 /* 133 * When the kernel_fpu_* symbols are unavailable then provide our own 134 * versions which allow the FPU to be safely used. 135 */ 136 #if defined(HAVE_KERNEL_FPU_INTERNAL) 137 138 #include <linux/mm.h> 139 140 extern union fpregs_state **zfs_kfpu_fpregs; 141 142 /* 143 * Initialize per-cpu variables to store FPU state. 144 */ 145 static inline void 146 kfpu_fini(void) 147 { 148 int cpu; 149 150 for_each_possible_cpu(cpu) { 151 if (zfs_kfpu_fpregs[cpu] != NULL) { 152 free_pages((unsigned long)zfs_kfpu_fpregs[cpu], 153 get_order(sizeof (union fpregs_state))); 154 } 155 } 156 157 kfree(zfs_kfpu_fpregs); 158 } 159 160 static inline int 161 kfpu_init(void) 162 { 163 zfs_kfpu_fpregs = kzalloc(num_possible_cpus() * 164 sizeof (union fpregs_state *), GFP_KERNEL); 165 if (zfs_kfpu_fpregs == NULL) 166 return (-ENOMEM); 167 168 /* 169 * The fxsave and xsave operations require 16-/64-byte alignment of 170 * the target memory. Since kmalloc() provides no alignment 171 * guarantee instead use alloc_pages_node(). 172 */ 173 unsigned int order = get_order(sizeof (union fpregs_state)); 174 int cpu; 175 176 for_each_possible_cpu(cpu) { 177 struct page *page = alloc_pages_node(cpu_to_node(cpu), 178 GFP_KERNEL | __GFP_ZERO, order); 179 if (page == NULL) { 180 kfpu_fini(); 181 return (-ENOMEM); 182 } 183 184 zfs_kfpu_fpregs[cpu] = page_address(page); 185 } 186 187 return (0); 188 } 189 190 #define kfpu_allowed() 1 191 #define ex_handler_fprestore ex_handler_default 192 193 /* 194 * FPU save and restore instructions. 195 */ 196 #define __asm __asm__ __volatile__ 197 #define kfpu_fxsave(addr) __asm("fxsave %0" : "=m" (*(addr))) 198 #define kfpu_fxsaveq(addr) __asm("fxsaveq %0" : "=m" (*(addr))) 199 #define kfpu_fnsave(addr) __asm("fnsave %0; fwait" : "=m" (*(addr))) 200 #define kfpu_fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) 201 #define kfpu_fxrstorq(addr) __asm("fxrstorq %0" : : "m" (*(addr))) 202 #define kfpu_frstor(addr) __asm("frstor %0" : : "m" (*(addr))) 203 #define kfpu_fxsr_clean(rval) __asm("fnclex; emms; fildl %P[addr]" \ 204 : : [addr] "m" (rval)); 205 206 static inline void 207 kfpu_save_xsave(struct xregs_state *addr, uint64_t mask) 208 { 209 uint32_t low, hi; 210 int err; 211 212 low = mask; 213 hi = mask >> 32; 214 XSTATE_XSAVE(addr, low, hi, err); 215 WARN_ON_ONCE(err); 216 } 217 218 static inline void 219 kfpu_save_fxsr(struct fxregs_state *addr) 220 { 221 if (IS_ENABLED(CONFIG_X86_32)) 222 kfpu_fxsave(addr); 223 else 224 kfpu_fxsaveq(addr); 225 } 226 227 static inline void 228 kfpu_save_fsave(struct fregs_state *addr) 229 { 230 kfpu_fnsave(addr); 231 } 232 233 static inline void 234 kfpu_begin(void) 235 { 236 /* 237 * Preemption and interrupts must be disabled for the critical 238 * region where the FPU state is being modified. 239 */ 240 preempt_disable(); 241 local_irq_disable(); 242 243 /* 244 * The current FPU registers need to be preserved by kfpu_begin() 245 * and restored by kfpu_end(). They are stored in a dedicated 246 * per-cpu variable, not in the task struct, this allows any user 247 * FPU state to be correctly preserved and restored. 248 */ 249 union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()]; 250 251 if (static_cpu_has(X86_FEATURE_XSAVE)) { 252 kfpu_save_xsave(&state->xsave, ~0); 253 } else if (static_cpu_has(X86_FEATURE_FXSR)) { 254 kfpu_save_fxsr(&state->fxsave); 255 } else { 256 kfpu_save_fsave(&state->fsave); 257 } 258 } 259 260 static inline void 261 kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask) 262 { 263 uint32_t low, hi; 264 265 low = mask; 266 hi = mask >> 32; 267 XSTATE_XRESTORE(addr, low, hi); 268 } 269 270 static inline void 271 kfpu_restore_fxsr(struct fxregs_state *addr) 272 { 273 /* 274 * On AuthenticAMD K7 and K8 processors the fxrstor instruction only 275 * restores the _x87 FOP, FIP, and FDP registers when an exception 276 * is pending. Clean the _x87 state to force the restore. 277 */ 278 if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) 279 kfpu_fxsr_clean(addr); 280 281 if (IS_ENABLED(CONFIG_X86_32)) { 282 kfpu_fxrstor(addr); 283 } else { 284 kfpu_fxrstorq(addr); 285 } 286 } 287 288 static inline void 289 kfpu_restore_fsave(struct fregs_state *addr) 290 { 291 kfpu_frstor(addr); 292 } 293 294 static inline void 295 kfpu_end(void) 296 { 297 union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()]; 298 299 if (static_cpu_has(X86_FEATURE_XSAVE)) { 300 kfpu_restore_xsave(&state->xsave, ~0); 301 } else if (static_cpu_has(X86_FEATURE_FXSR)) { 302 kfpu_restore_fxsr(&state->fxsave); 303 } else { 304 kfpu_restore_fsave(&state->fsave); 305 } 306 307 local_irq_enable(); 308 preempt_enable(); 309 } 310 311 #else 312 313 /* 314 * FPU support is unavailable. 315 */ 316 #define kfpu_allowed() 0 317 #define kfpu_begin() do {} while (0) 318 #define kfpu_end() do {} while (0) 319 #define kfpu_init() 0 320 #define kfpu_fini() ((void) 0) 321 322 #endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */ 323 #endif /* defined(KERNEL_EXPORTS_X86_FPU) */ 324 325 /* 326 * Linux kernel provides an interface for CPU feature testing. 327 */ 328 329 /* 330 * Detect register set support 331 */ 332 static inline boolean_t 333 __simd_state_enabled(const uint64_t state) 334 { 335 boolean_t has_osxsave; 336 uint64_t xcr0; 337 338 #if defined(X86_FEATURE_OSXSAVE) 339 has_osxsave = !!boot_cpu_has(X86_FEATURE_OSXSAVE); 340 #else 341 has_osxsave = B_FALSE; 342 #endif 343 if (!has_osxsave) 344 return (B_FALSE); 345 346 xcr0 = xgetbv(0); 347 return ((xcr0 & state) == state); 348 } 349 350 #define _XSTATE_SSE_AVX (0x2 | 0x4) 351 #define _XSTATE_AVX512 (0xE0 | _XSTATE_SSE_AVX) 352 353 #define __ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX) 354 #define __zmm_enabled() __simd_state_enabled(_XSTATE_AVX512) 355 356 /* 357 * Check if SSE instruction set is available 358 */ 359 static inline boolean_t 360 zfs_sse_available(void) 361 { 362 return (!!boot_cpu_has(X86_FEATURE_XMM)); 363 } 364 365 /* 366 * Check if SSE2 instruction set is available 367 */ 368 static inline boolean_t 369 zfs_sse2_available(void) 370 { 371 return (!!boot_cpu_has(X86_FEATURE_XMM2)); 372 } 373 374 /* 375 * Check if SSE3 instruction set is available 376 */ 377 static inline boolean_t 378 zfs_sse3_available(void) 379 { 380 return (!!boot_cpu_has(X86_FEATURE_XMM3)); 381 } 382 383 /* 384 * Check if SSSE3 instruction set is available 385 */ 386 static inline boolean_t 387 zfs_ssse3_available(void) 388 { 389 return (!!boot_cpu_has(X86_FEATURE_SSSE3)); 390 } 391 392 /* 393 * Check if SSE4.1 instruction set is available 394 */ 395 static inline boolean_t 396 zfs_sse4_1_available(void) 397 { 398 return (!!boot_cpu_has(X86_FEATURE_XMM4_1)); 399 } 400 401 /* 402 * Check if SSE4.2 instruction set is available 403 */ 404 static inline boolean_t 405 zfs_sse4_2_available(void) 406 { 407 return (!!boot_cpu_has(X86_FEATURE_XMM4_2)); 408 } 409 410 /* 411 * Check if AVX instruction set is available 412 */ 413 static inline boolean_t 414 zfs_avx_available(void) 415 { 416 return (boot_cpu_has(X86_FEATURE_AVX) && __ymm_enabled()); 417 } 418 419 /* 420 * Check if AVX2 instruction set is available 421 */ 422 static inline boolean_t 423 zfs_avx2_available(void) 424 { 425 return (boot_cpu_has(X86_FEATURE_AVX2) && __ymm_enabled()); 426 } 427 428 /* 429 * Check if BMI1 instruction set is available 430 */ 431 static inline boolean_t 432 zfs_bmi1_available(void) 433 { 434 #if defined(X86_FEATURE_BMI1) 435 return (!!boot_cpu_has(X86_FEATURE_BMI1)); 436 #else 437 return (B_FALSE); 438 #endif 439 } 440 441 /* 442 * Check if BMI2 instruction set is available 443 */ 444 static inline boolean_t 445 zfs_bmi2_available(void) 446 { 447 #if defined(X86_FEATURE_BMI2) 448 return (!!boot_cpu_has(X86_FEATURE_BMI2)); 449 #else 450 return (B_FALSE); 451 #endif 452 } 453 454 /* 455 * Check if AES instruction set is available 456 */ 457 static inline boolean_t 458 zfs_aes_available(void) 459 { 460 #if defined(X86_FEATURE_AES) 461 return (!!boot_cpu_has(X86_FEATURE_AES)); 462 #else 463 return (B_FALSE); 464 #endif 465 } 466 467 /* 468 * Check if PCLMULQDQ instruction set is available 469 */ 470 static inline boolean_t 471 zfs_pclmulqdq_available(void) 472 { 473 #if defined(X86_FEATURE_PCLMULQDQ) 474 return (!!boot_cpu_has(X86_FEATURE_PCLMULQDQ)); 475 #else 476 return (B_FALSE); 477 #endif 478 } 479 480 /* 481 * Check if MOVBE instruction is available 482 */ 483 static inline boolean_t 484 zfs_movbe_available(void) 485 { 486 #if defined(X86_FEATURE_MOVBE) 487 return (!!boot_cpu_has(X86_FEATURE_MOVBE)); 488 #else 489 return (B_FALSE); 490 #endif 491 } 492 493 /* 494 * AVX-512 family of instruction sets: 495 * 496 * AVX512F Foundation 497 * AVX512CD Conflict Detection Instructions 498 * AVX512ER Exponential and Reciprocal Instructions 499 * AVX512PF Prefetch Instructions 500 * 501 * AVX512BW Byte and Word Instructions 502 * AVX512DQ Double-word and Quadword Instructions 503 * AVX512VL Vector Length Extensions 504 * 505 * AVX512IFMA Integer Fused Multiply Add (Not supported by kernel 4.4) 506 * AVX512VBMI Vector Byte Manipulation Instructions 507 */ 508 509 /* 510 * Check if AVX512F instruction set is available 511 */ 512 static inline boolean_t 513 zfs_avx512f_available(void) 514 { 515 boolean_t has_avx512 = B_FALSE; 516 517 #if defined(X86_FEATURE_AVX512F) 518 has_avx512 = !!boot_cpu_has(X86_FEATURE_AVX512F); 519 #endif 520 return (has_avx512 && __zmm_enabled()); 521 } 522 523 /* 524 * Check if AVX512CD instruction set is available 525 */ 526 static inline boolean_t 527 zfs_avx512cd_available(void) 528 { 529 boolean_t has_avx512 = B_FALSE; 530 531 #if defined(X86_FEATURE_AVX512CD) 532 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && 533 boot_cpu_has(X86_FEATURE_AVX512CD); 534 #endif 535 return (has_avx512 && __zmm_enabled()); 536 } 537 538 /* 539 * Check if AVX512ER instruction set is available 540 */ 541 static inline boolean_t 542 zfs_avx512er_available(void) 543 { 544 boolean_t has_avx512 = B_FALSE; 545 546 #if defined(X86_FEATURE_AVX512ER) 547 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && 548 boot_cpu_has(X86_FEATURE_AVX512ER); 549 #endif 550 return (has_avx512 && __zmm_enabled()); 551 } 552 553 /* 554 * Check if AVX512PF instruction set is available 555 */ 556 static inline boolean_t 557 zfs_avx512pf_available(void) 558 { 559 boolean_t has_avx512 = B_FALSE; 560 561 #if defined(X86_FEATURE_AVX512PF) 562 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && 563 boot_cpu_has(X86_FEATURE_AVX512PF); 564 #endif 565 return (has_avx512 && __zmm_enabled()); 566 } 567 568 /* 569 * Check if AVX512BW instruction set is available 570 */ 571 static inline boolean_t 572 zfs_avx512bw_available(void) 573 { 574 boolean_t has_avx512 = B_FALSE; 575 576 #if defined(X86_FEATURE_AVX512BW) 577 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && 578 boot_cpu_has(X86_FEATURE_AVX512BW); 579 #endif 580 581 return (has_avx512 && __zmm_enabled()); 582 } 583 584 /* 585 * Check if AVX512DQ instruction set is available 586 */ 587 static inline boolean_t 588 zfs_avx512dq_available(void) 589 { 590 boolean_t has_avx512 = B_FALSE; 591 592 #if defined(X86_FEATURE_AVX512DQ) 593 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && 594 boot_cpu_has(X86_FEATURE_AVX512DQ); 595 #endif 596 return (has_avx512 && __zmm_enabled()); 597 } 598 599 /* 600 * Check if AVX512VL instruction set is available 601 */ 602 static inline boolean_t 603 zfs_avx512vl_available(void) 604 { 605 boolean_t has_avx512 = B_FALSE; 606 607 #if defined(X86_FEATURE_AVX512VL) 608 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && 609 boot_cpu_has(X86_FEATURE_AVX512VL); 610 #endif 611 return (has_avx512 && __zmm_enabled()); 612 } 613 614 /* 615 * Check if AVX512IFMA instruction set is available 616 */ 617 static inline boolean_t 618 zfs_avx512ifma_available(void) 619 { 620 boolean_t has_avx512 = B_FALSE; 621 622 #if defined(X86_FEATURE_AVX512IFMA) 623 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && 624 boot_cpu_has(X86_FEATURE_AVX512IFMA); 625 #endif 626 return (has_avx512 && __zmm_enabled()); 627 } 628 629 /* 630 * Check if AVX512VBMI instruction set is available 631 */ 632 static inline boolean_t 633 zfs_avx512vbmi_available(void) 634 { 635 boolean_t has_avx512 = B_FALSE; 636 637 #if defined(X86_FEATURE_AVX512VBMI) 638 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && 639 boot_cpu_has(X86_FEATURE_AVX512VBMI); 640 #endif 641 return (has_avx512 && __zmm_enabled()); 642 } 643 644 #endif /* defined(__x86) */ 645 646 #endif /* _LINUX_SIMD_X86_H */ 647