1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. 24 * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> 25 */ 26 27 #ifndef _LIBSPL_SYS_SIMD_H 28 #define _LIBSPL_SYS_SIMD_H 29 30 #include <sys/isa_defs.h> 31 #include <sys/types.h> 32 33 /* including <sys/auxv.h> clashes with AT_UID and others */ 34 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) 35 #if defined(__FreeBSD__) 36 #define AT_HWCAP 25 37 #define AT_HWCAP2 26 38 extern int elf_aux_info(int aux, void *buf, int buflen); 39 static inline unsigned long getauxval(unsigned long key) 40 { 41 unsigned long val = 0UL; 42 43 if (elf_aux_info((int)key, &val, sizeof (val)) != 0) 44 return (0UL); 45 46 return (val); 47 } 48 #elif defined(__linux__) 49 #define AT_HWCAP 16 50 #define AT_HWCAP2 26 51 extern unsigned long getauxval(unsigned long type); 52 #endif /* __linux__ */ 53 #endif /* arm || aarch64 || powerpc */ 54 55 #if defined(__x86) 56 #include <cpuid.h> 57 58 #define kfpu_allowed() 1 59 #define kfpu_begin() do {} while (0) 60 #define kfpu_end() do {} while (0) 61 #define kfpu_init() 0 62 #define kfpu_fini() ((void) 0) 63 64 /* 65 * CPUID feature tests for user-space. 66 * 67 * x86 registers used implicitly by CPUID 68 */ 69 typedef enum cpuid_regs { 70 EAX = 0, 71 EBX, 72 ECX, 73 EDX, 74 CPUID_REG_CNT = 4 75 } cpuid_regs_t; 76 77 /* 78 * List of instruction sets identified by CPUID 79 */ 80 typedef enum cpuid_inst_sets { 81 SSE = 0, 82 SSE2, 83 SSE3, 84 SSSE3, 85 SSE4_1, 86 SSE4_2, 87 OSXSAVE, 88 AVX, 89 AVX2, 90 BMI1, 91 BMI2, 92 AVX512F, 93 AVX512CD, 94 AVX512DQ, 95 AVX512BW, 96 AVX512IFMA, 97 AVX512VBMI, 98 AVX512PF, 99 AVX512ER, 100 AVX512VL, 101 AES, 102 PCLMULQDQ, 103 MOVBE, 104 SHA_NI 105 } cpuid_inst_sets_t; 106 107 /* 108 * Instruction set descriptor. 109 */ 110 typedef struct cpuid_feature_desc { 111 uint32_t leaf; /* CPUID leaf */ 112 uint32_t subleaf; /* CPUID sub-leaf */ 113 uint32_t flag; /* bit mask of the feature */ 114 cpuid_regs_t reg; /* which CPUID return register to test */ 115 } cpuid_feature_desc_t; 116 117 #define _AVX512F_BIT (1U << 16) 118 #define _AVX512CD_BIT (_AVX512F_BIT | (1U << 28)) 119 #define _AVX512DQ_BIT (_AVX512F_BIT | (1U << 17)) 120 #define _AVX512BW_BIT (_AVX512F_BIT | (1U << 30)) 121 #define _AVX512IFMA_BIT (_AVX512F_BIT | (1U << 21)) 122 #define _AVX512VBMI_BIT (1U << 1) /* AVX512F_BIT is on another leaf */ 123 #define _AVX512PF_BIT (_AVX512F_BIT | (1U << 26)) 124 #define _AVX512ER_BIT (_AVX512F_BIT | (1U << 27)) 125 #define _AVX512VL_BIT (1U << 31) /* if used also check other levels */ 126 #define _AES_BIT (1U << 25) 127 #define _PCLMULQDQ_BIT (1U << 1) 128 #define _MOVBE_BIT (1U << 22) 129 #define _SHA_NI_BIT (1U << 29) 130 131 /* 132 * Descriptions of supported instruction sets 133 */ 134 static const cpuid_feature_desc_t cpuid_features[] = { 135 [SSE] = {1U, 0U, 1U << 25, EDX }, 136 [SSE2] = {1U, 0U, 1U << 26, EDX }, 137 [SSE3] = {1U, 0U, 1U << 0, ECX }, 138 [SSSE3] = {1U, 0U, 1U << 9, ECX }, 139 [SSE4_1] = {1U, 0U, 1U << 19, ECX }, 140 [SSE4_2] = {1U, 0U, 1U << 20, ECX }, 141 [OSXSAVE] = {1U, 0U, 1U << 27, ECX }, 142 [AVX] = {1U, 0U, 1U << 28, ECX }, 143 [AVX2] = {7U, 0U, 1U << 5, EBX }, 144 [BMI1] = {7U, 0U, 1U << 3, EBX }, 145 [BMI2] = {7U, 0U, 1U << 8, EBX }, 146 [AVX512F] = {7U, 0U, _AVX512F_BIT, EBX }, 147 [AVX512CD] = {7U, 0U, _AVX512CD_BIT, EBX }, 148 [AVX512DQ] = {7U, 0U, _AVX512DQ_BIT, EBX }, 149 [AVX512BW] = {7U, 0U, _AVX512BW_BIT, EBX }, 150 [AVX512IFMA] = {7U, 0U, _AVX512IFMA_BIT, EBX }, 151 [AVX512VBMI] = {7U, 0U, _AVX512VBMI_BIT, ECX }, 152 [AVX512PF] = {7U, 0U, _AVX512PF_BIT, EBX }, 153 [AVX512ER] = {7U, 0U, _AVX512ER_BIT, EBX }, 154 [AVX512VL] = {7U, 0U, _AVX512ER_BIT, EBX }, 155 [AES] = {1U, 0U, _AES_BIT, ECX }, 156 [PCLMULQDQ] = {1U, 0U, _PCLMULQDQ_BIT, ECX }, 157 [MOVBE] = {1U, 0U, _MOVBE_BIT, ECX }, 158 [SHA_NI] = {7U, 0U, _SHA_NI_BIT, EBX }, 159 }; 160 161 /* 162 * Check if OS supports AVX and AVX2 by checking XCR0 163 * Only call this function if CPUID indicates that AVX feature is 164 * supported by the CPU, otherwise it might be an illegal instruction. 165 */ 166 static inline uint64_t 167 xgetbv(uint32_t index) 168 { 169 uint32_t eax, edx; 170 /* xgetbv - instruction byte code */ 171 __asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0" 172 : "=a" (eax), "=d" (edx) 173 : "c" (index)); 174 175 return ((((uint64_t)edx)<<32) | (uint64_t)eax); 176 } 177 178 /* 179 * Check if CPU supports a feature 180 */ 181 static inline boolean_t 182 __cpuid_check_feature(const cpuid_feature_desc_t *desc) 183 { 184 uint32_t r[CPUID_REG_CNT]; 185 186 if (__get_cpuid_max(0, NULL) >= desc->leaf) { 187 /* 188 * __cpuid_count is needed to properly check 189 * for AVX2. It is a macro, so return parameters 190 * are passed by value. 191 */ 192 __cpuid_count(desc->leaf, desc->subleaf, 193 r[EAX], r[EBX], r[ECX], r[EDX]); 194 return ((r[desc->reg] & desc->flag) == desc->flag); 195 } 196 return (B_FALSE); 197 } 198 199 #define CPUID_FEATURE_CHECK(name, id) \ 200 static inline boolean_t \ 201 __cpuid_has_ ## name(void) \ 202 { \ 203 return (__cpuid_check_feature(&cpuid_features[id])); \ 204 } 205 206 /* 207 * Define functions for user-space CPUID features testing 208 */ 209 CPUID_FEATURE_CHECK(sse, SSE); 210 CPUID_FEATURE_CHECK(sse2, SSE2); 211 CPUID_FEATURE_CHECK(sse3, SSE3); 212 CPUID_FEATURE_CHECK(ssse3, SSSE3); 213 CPUID_FEATURE_CHECK(sse4_1, SSE4_1); 214 CPUID_FEATURE_CHECK(sse4_2, SSE4_2); 215 CPUID_FEATURE_CHECK(avx, AVX); 216 CPUID_FEATURE_CHECK(avx2, AVX2); 217 CPUID_FEATURE_CHECK(osxsave, OSXSAVE); 218 CPUID_FEATURE_CHECK(bmi1, BMI1); 219 CPUID_FEATURE_CHECK(bmi2, BMI2); 220 CPUID_FEATURE_CHECK(avx512f, AVX512F); 221 CPUID_FEATURE_CHECK(avx512cd, AVX512CD); 222 CPUID_FEATURE_CHECK(avx512dq, AVX512DQ); 223 CPUID_FEATURE_CHECK(avx512bw, AVX512BW); 224 CPUID_FEATURE_CHECK(avx512ifma, AVX512IFMA); 225 CPUID_FEATURE_CHECK(avx512vbmi, AVX512VBMI); 226 CPUID_FEATURE_CHECK(avx512pf, AVX512PF); 227 CPUID_FEATURE_CHECK(avx512er, AVX512ER); 228 CPUID_FEATURE_CHECK(avx512vl, AVX512VL); 229 CPUID_FEATURE_CHECK(aes, AES); 230 CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ); 231 CPUID_FEATURE_CHECK(movbe, MOVBE); 232 CPUID_FEATURE_CHECK(shani, SHA_NI); 233 234 /* 235 * Detect register set support 236 */ 237 static inline boolean_t 238 __simd_state_enabled(const uint64_t state) 239 { 240 boolean_t has_osxsave; 241 uint64_t xcr0; 242 243 has_osxsave = __cpuid_has_osxsave(); 244 if (!has_osxsave) 245 return (B_FALSE); 246 247 xcr0 = xgetbv(0); 248 return ((xcr0 & state) == state); 249 } 250 251 #define _XSTATE_SSE_AVX (0x2 | 0x4) 252 #define _XSTATE_AVX512 (0xE0 | _XSTATE_SSE_AVX) 253 254 #define __ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX) 255 #define __zmm_enabled() __simd_state_enabled(_XSTATE_AVX512) 256 257 /* 258 * Check if SSE instruction set is available 259 */ 260 static inline boolean_t 261 zfs_sse_available(void) 262 { 263 return (__cpuid_has_sse()); 264 } 265 266 /* 267 * Check if SSE2 instruction set is available 268 */ 269 static inline boolean_t 270 zfs_sse2_available(void) 271 { 272 return (__cpuid_has_sse2()); 273 } 274 275 /* 276 * Check if SSE3 instruction set is available 277 */ 278 static inline boolean_t 279 zfs_sse3_available(void) 280 { 281 return (__cpuid_has_sse3()); 282 } 283 284 /* 285 * Check if SSSE3 instruction set is available 286 */ 287 static inline boolean_t 288 zfs_ssse3_available(void) 289 { 290 return (__cpuid_has_ssse3()); 291 } 292 293 /* 294 * Check if SSE4.1 instruction set is available 295 */ 296 static inline boolean_t 297 zfs_sse4_1_available(void) 298 { 299 return (__cpuid_has_sse4_1()); 300 } 301 302 /* 303 * Check if SSE4.2 instruction set is available 304 */ 305 static inline boolean_t 306 zfs_sse4_2_available(void) 307 { 308 return (__cpuid_has_sse4_2()); 309 } 310 311 /* 312 * Check if AVX instruction set is available 313 */ 314 static inline boolean_t 315 zfs_avx_available(void) 316 { 317 return (__cpuid_has_avx() && __ymm_enabled()); 318 } 319 320 /* 321 * Check if AVX2 instruction set is available 322 */ 323 static inline boolean_t 324 zfs_avx2_available(void) 325 { 326 return (__cpuid_has_avx2() && __ymm_enabled()); 327 } 328 329 /* 330 * Check if BMI1 instruction set is available 331 */ 332 static inline boolean_t 333 zfs_bmi1_available(void) 334 { 335 return (__cpuid_has_bmi1()); 336 } 337 338 /* 339 * Check if BMI2 instruction set is available 340 */ 341 static inline boolean_t 342 zfs_bmi2_available(void) 343 { 344 return (__cpuid_has_bmi2()); 345 } 346 347 /* 348 * Check if AES instruction set is available 349 */ 350 static inline boolean_t 351 zfs_aes_available(void) 352 { 353 return (__cpuid_has_aes()); 354 } 355 356 /* 357 * Check if PCLMULQDQ instruction set is available 358 */ 359 static inline boolean_t 360 zfs_pclmulqdq_available(void) 361 { 362 return (__cpuid_has_pclmulqdq()); 363 } 364 365 /* 366 * Check if MOVBE instruction is available 367 */ 368 static inline boolean_t 369 zfs_movbe_available(void) 370 { 371 return (__cpuid_has_movbe()); 372 } 373 374 /* 375 * Check if SHA_NI instruction is available 376 */ 377 static inline boolean_t 378 zfs_shani_available(void) 379 { 380 return (__cpuid_has_shani()); 381 } 382 383 /* 384 * AVX-512 family of instruction sets: 385 * 386 * AVX512F Foundation 387 * AVX512CD Conflict Detection Instructions 388 * AVX512ER Exponential and Reciprocal Instructions 389 * AVX512PF Prefetch Instructions 390 * 391 * AVX512BW Byte and Word Instructions 392 * AVX512DQ Double-word and Quadword Instructions 393 * AVX512VL Vector Length Extensions 394 * 395 * AVX512IFMA Integer Fused Multiply Add (Not supported by kernel 4.4) 396 * AVX512VBMI Vector Byte Manipulation Instructions 397 */ 398 399 /* 400 * Check if AVX512F instruction set is available 401 */ 402 static inline boolean_t 403 zfs_avx512f_available(void) 404 { 405 return (__cpuid_has_avx512f() && __zmm_enabled()); 406 } 407 408 /* 409 * Check if AVX512CD instruction set is available 410 */ 411 static inline boolean_t 412 zfs_avx512cd_available(void) 413 { 414 return (__cpuid_has_avx512cd() && __zmm_enabled()); 415 } 416 417 /* 418 * Check if AVX512ER instruction set is available 419 */ 420 static inline boolean_t 421 zfs_avx512er_available(void) 422 { 423 return (__cpuid_has_avx512er() && __zmm_enabled()); 424 } 425 426 /* 427 * Check if AVX512PF instruction set is available 428 */ 429 static inline boolean_t 430 zfs_avx512pf_available(void) 431 { 432 return (__cpuid_has_avx512pf() && __zmm_enabled()); 433 } 434 435 /* 436 * Check if AVX512BW instruction set is available 437 */ 438 static inline boolean_t 439 zfs_avx512bw_available(void) 440 { 441 return (__cpuid_has_avx512bw() && __zmm_enabled()); 442 } 443 444 /* 445 * Check if AVX512DQ instruction set is available 446 */ 447 static inline boolean_t 448 zfs_avx512dq_available(void) 449 { 450 return (__cpuid_has_avx512dq() && __zmm_enabled()); 451 } 452 453 /* 454 * Check if AVX512VL instruction set is available 455 */ 456 static inline boolean_t 457 zfs_avx512vl_available(void) 458 { 459 return (__cpuid_has_avx512vl() && __zmm_enabled()); 460 } 461 462 /* 463 * Check if AVX512IFMA instruction set is available 464 */ 465 static inline boolean_t 466 zfs_avx512ifma_available(void) 467 { 468 return (__cpuid_has_avx512ifma() && __zmm_enabled()); 469 } 470 471 /* 472 * Check if AVX512VBMI instruction set is available 473 */ 474 static inline boolean_t 475 zfs_avx512vbmi_available(void) 476 { 477 return (__cpuid_has_avx512f() && __cpuid_has_avx512vbmi() && 478 __zmm_enabled()); 479 } 480 481 #elif defined(__arm__) 482 483 #define kfpu_allowed() 1 484 #define kfpu_initialize(tsk) do {} while (0) 485 #define kfpu_begin() do {} while (0) 486 #define kfpu_end() do {} while (0) 487 488 #define HWCAP_NEON 0x00001000 489 #define HWCAP2_SHA2 0x00000008 490 491 /* 492 * Check if NEON is available 493 */ 494 static inline boolean_t 495 zfs_neon_available(void) 496 { 497 unsigned long hwcap = getauxval(AT_HWCAP); 498 return (hwcap & HWCAP_NEON); 499 } 500 501 /* 502 * Check if SHA2 is available 503 */ 504 static inline boolean_t 505 zfs_sha256_available(void) 506 { 507 unsigned long hwcap = getauxval(AT_HWCAP); 508 return (hwcap & HWCAP2_SHA2); 509 } 510 511 #elif defined(__aarch64__) 512 513 #define kfpu_allowed() 1 514 #define kfpu_initialize(tsk) do {} while (0) 515 #define kfpu_begin() do {} while (0) 516 #define kfpu_end() do {} while (0) 517 518 #define HWCAP_FP 0x00000001 519 #define HWCAP_SHA2 0x00000040 520 #define HWCAP_SHA512 0x00200000 521 522 /* 523 * Check if NEON is available 524 */ 525 static inline boolean_t 526 zfs_neon_available(void) 527 { 528 unsigned long hwcap = getauxval(AT_HWCAP); 529 return (hwcap & HWCAP_FP); 530 } 531 532 /* 533 * Check if SHA2 is available 534 */ 535 static inline boolean_t 536 zfs_sha256_available(void) 537 { 538 unsigned long hwcap = getauxval(AT_HWCAP); 539 return (hwcap & HWCAP_SHA2); 540 } 541 542 /* 543 * Check if SHA512 is available 544 */ 545 static inline boolean_t 546 zfs_sha512_available(void) 547 { 548 unsigned long hwcap = getauxval(AT_HWCAP); 549 return (hwcap & HWCAP_SHA512); 550 } 551 552 #elif defined(__powerpc__) 553 554 #define kfpu_allowed() 1 555 #define kfpu_initialize(tsk) do {} while (0) 556 #define kfpu_begin() do {} while (0) 557 #define kfpu_end() do {} while (0) 558 559 #define PPC_FEATURE_HAS_ALTIVEC 0x10000000 560 #define PPC_FEATURE_HAS_VSX 0x00000080 561 #define PPC_FEATURE2_ARCH_2_07 0x80000000 562 563 static inline boolean_t 564 zfs_altivec_available(void) 565 { 566 unsigned long hwcap = getauxval(AT_HWCAP); 567 return (hwcap & PPC_FEATURE_HAS_ALTIVEC); 568 } 569 570 static inline boolean_t 571 zfs_vsx_available(void) 572 { 573 unsigned long hwcap = getauxval(AT_HWCAP); 574 return (hwcap & PPC_FEATURE_HAS_VSX); 575 } 576 577 static inline boolean_t 578 zfs_isa207_available(void) 579 { 580 unsigned long hwcap = getauxval(AT_HWCAP); 581 unsigned long hwcap2 = getauxval(AT_HWCAP2); 582 return ((hwcap & PPC_FEATURE_HAS_VSX) && 583 (hwcap2 & PPC_FEATURE2_ARCH_2_07)); 584 } 585 586 #else 587 588 #define kfpu_allowed() 0 589 #define kfpu_initialize(tsk) do {} while (0) 590 #define kfpu_begin() do {} while (0) 591 #define kfpu_end() do {} while (0) 592 593 #endif 594 595 #endif /* _LIBSPL_SYS_SIMD_H */ 596