1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. 24 * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> 25 */ 26 27 #ifndef _LIBSPL_SYS_SIMD_H 28 #define _LIBSPL_SYS_SIMD_H 29 30 #include <sys/isa_defs.h> 31 #include <sys/types.h> 32 33 #if defined(__x86) 34 #include <cpuid.h> 35 36 #define kfpu_allowed() 1 37 #define kfpu_begin() do {} while (0) 38 #define kfpu_end() do {} while (0) 39 #define kfpu_init() 0 40 #define kfpu_fini() ((void) 0) 41 42 /* 43 * CPUID feature tests for user-space. 44 * 45 * x86 registers used implicitly by CPUID 46 */ 47 typedef enum cpuid_regs { 48 EAX = 0, 49 EBX, 50 ECX, 51 EDX, 52 CPUID_REG_CNT = 4 53 } cpuid_regs_t; 54 55 /* 56 * List of instruction sets identified by CPUID 57 */ 58 typedef enum cpuid_inst_sets { 59 SSE = 0, 60 SSE2, 61 SSE3, 62 SSSE3, 63 SSE4_1, 64 SSE4_2, 65 OSXSAVE, 66 AVX, 67 AVX2, 68 BMI1, 69 BMI2, 70 AVX512F, 71 AVX512CD, 72 AVX512DQ, 73 AVX512BW, 74 AVX512IFMA, 75 AVX512VBMI, 76 AVX512PF, 77 AVX512ER, 78 AVX512VL, 79 AES, 80 PCLMULQDQ, 81 MOVBE 82 } cpuid_inst_sets_t; 83 84 /* 85 * Instruction set descriptor. 86 */ 87 typedef struct cpuid_feature_desc { 88 uint32_t leaf; /* CPUID leaf */ 89 uint32_t subleaf; /* CPUID sub-leaf */ 90 uint32_t flag; /* bit mask of the feature */ 91 cpuid_regs_t reg; /* which CPUID return register to test */ 92 } cpuid_feature_desc_t; 93 94 #define _AVX512F_BIT (1U << 16) 95 #define _AVX512CD_BIT (_AVX512F_BIT | (1U << 28)) 96 #define _AVX512DQ_BIT (_AVX512F_BIT | (1U << 17)) 97 #define _AVX512BW_BIT (_AVX512F_BIT | (1U << 30)) 98 #define _AVX512IFMA_BIT (_AVX512F_BIT | (1U << 21)) 99 #define _AVX512VBMI_BIT (1U << 1) /* AVX512F_BIT is on another leaf */ 100 #define _AVX512PF_BIT (_AVX512F_BIT | (1U << 26)) 101 #define _AVX512ER_BIT (_AVX512F_BIT | (1U << 27)) 102 #define _AVX512VL_BIT (1U << 31) /* if used also check other levels */ 103 #define _AES_BIT (1U << 25) 104 #define _PCLMULQDQ_BIT (1U << 1) 105 #define _MOVBE_BIT (1U << 22) 106 107 /* 108 * Descriptions of supported instruction sets 109 */ 110 static const cpuid_feature_desc_t cpuid_features[] = { 111 [SSE] = {1U, 0U, 1U << 25, EDX }, 112 [SSE2] = {1U, 0U, 1U << 26, EDX }, 113 [SSE3] = {1U, 0U, 1U << 0, ECX }, 114 [SSSE3] = {1U, 0U, 1U << 9, ECX }, 115 [SSE4_1] = {1U, 0U, 1U << 19, ECX }, 116 [SSE4_2] = {1U, 0U, 1U << 20, ECX }, 117 [OSXSAVE] = {1U, 0U, 1U << 27, ECX }, 118 [AVX] = {1U, 0U, 1U << 28, ECX }, 119 [AVX2] = {7U, 0U, 1U << 5, EBX }, 120 [BMI1] = {7U, 0U, 1U << 3, EBX }, 121 [BMI2] = {7U, 0U, 1U << 8, EBX }, 122 [AVX512F] = {7U, 0U, _AVX512F_BIT, EBX }, 123 [AVX512CD] = {7U, 0U, _AVX512CD_BIT, EBX }, 124 [AVX512DQ] = {7U, 0U, _AVX512DQ_BIT, EBX }, 125 [AVX512BW] = {7U, 0U, _AVX512BW_BIT, EBX }, 126 [AVX512IFMA] = {7U, 0U, _AVX512IFMA_BIT, EBX }, 127 [AVX512VBMI] = {7U, 0U, _AVX512VBMI_BIT, ECX }, 128 [AVX512PF] = {7U, 0U, _AVX512PF_BIT, EBX }, 129 [AVX512ER] = {7U, 0U, _AVX512ER_BIT, EBX }, 130 [AVX512VL] = {7U, 0U, _AVX512ER_BIT, EBX }, 131 [AES] = {1U, 0U, _AES_BIT, ECX }, 132 [PCLMULQDQ] = {1U, 0U, _PCLMULQDQ_BIT, ECX }, 133 [MOVBE] = {1U, 0U, _MOVBE_BIT, ECX }, 134 }; 135 136 /* 137 * Check if OS supports AVX and AVX2 by checking XCR0 138 * Only call this function if CPUID indicates that AVX feature is 139 * supported by the CPU, otherwise it might be an illegal instruction. 140 */ 141 static inline uint64_t 142 xgetbv(uint32_t index) 143 { 144 uint32_t eax, edx; 145 /* xgetbv - instruction byte code */ 146 __asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0" 147 : "=a" (eax), "=d" (edx) 148 : "c" (index)); 149 150 return ((((uint64_t)edx)<<32) | (uint64_t)eax); 151 } 152 153 /* 154 * Check if CPU supports a feature 155 */ 156 static inline boolean_t 157 __cpuid_check_feature(const cpuid_feature_desc_t *desc) 158 { 159 uint32_t r[CPUID_REG_CNT]; 160 161 if (__get_cpuid_max(0, NULL) >= desc->leaf) { 162 /* 163 * __cpuid_count is needed to properly check 164 * for AVX2. It is a macro, so return parameters 165 * are passed by value. 166 */ 167 __cpuid_count(desc->leaf, desc->subleaf, 168 r[EAX], r[EBX], r[ECX], r[EDX]); 169 return ((r[desc->reg] & desc->flag) == desc->flag); 170 } 171 return (B_FALSE); 172 } 173 174 #define CPUID_FEATURE_CHECK(name, id) \ 175 static inline boolean_t \ 176 __cpuid_has_ ## name(void) \ 177 { \ 178 return (__cpuid_check_feature(&cpuid_features[id])); \ 179 } 180 181 /* 182 * Define functions for user-space CPUID features testing 183 */ 184 CPUID_FEATURE_CHECK(sse, SSE); 185 CPUID_FEATURE_CHECK(sse2, SSE2); 186 CPUID_FEATURE_CHECK(sse3, SSE3); 187 CPUID_FEATURE_CHECK(ssse3, SSSE3); 188 CPUID_FEATURE_CHECK(sse4_1, SSE4_1); 189 CPUID_FEATURE_CHECK(sse4_2, SSE4_2); 190 CPUID_FEATURE_CHECK(avx, AVX); 191 CPUID_FEATURE_CHECK(avx2, AVX2); 192 CPUID_FEATURE_CHECK(osxsave, OSXSAVE); 193 CPUID_FEATURE_CHECK(bmi1, BMI1); 194 CPUID_FEATURE_CHECK(bmi2, BMI2); 195 CPUID_FEATURE_CHECK(avx512f, AVX512F); 196 CPUID_FEATURE_CHECK(avx512cd, AVX512CD); 197 CPUID_FEATURE_CHECK(avx512dq, AVX512DQ); 198 CPUID_FEATURE_CHECK(avx512bw, AVX512BW); 199 CPUID_FEATURE_CHECK(avx512ifma, AVX512IFMA); 200 CPUID_FEATURE_CHECK(avx512vbmi, AVX512VBMI); 201 CPUID_FEATURE_CHECK(avx512pf, AVX512PF); 202 CPUID_FEATURE_CHECK(avx512er, AVX512ER); 203 CPUID_FEATURE_CHECK(avx512vl, AVX512VL); 204 CPUID_FEATURE_CHECK(aes, AES); 205 CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ); 206 CPUID_FEATURE_CHECK(movbe, MOVBE); 207 208 /* 209 * Detect register set support 210 */ 211 static inline boolean_t 212 __simd_state_enabled(const uint64_t state) 213 { 214 boolean_t has_osxsave; 215 uint64_t xcr0; 216 217 has_osxsave = __cpuid_has_osxsave(); 218 if (!has_osxsave) 219 return (B_FALSE); 220 221 xcr0 = xgetbv(0); 222 return ((xcr0 & state) == state); 223 } 224 225 #define _XSTATE_SSE_AVX (0x2 | 0x4) 226 #define _XSTATE_AVX512 (0xE0 | _XSTATE_SSE_AVX) 227 228 #define __ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX) 229 #define __zmm_enabled() __simd_state_enabled(_XSTATE_AVX512) 230 231 /* 232 * Check if SSE instruction set is available 233 */ 234 static inline boolean_t 235 zfs_sse_available(void) 236 { 237 return (__cpuid_has_sse()); 238 } 239 240 /* 241 * Check if SSE2 instruction set is available 242 */ 243 static inline boolean_t 244 zfs_sse2_available(void) 245 { 246 return (__cpuid_has_sse2()); 247 } 248 249 /* 250 * Check if SSE3 instruction set is available 251 */ 252 static inline boolean_t 253 zfs_sse3_available(void) 254 { 255 return (__cpuid_has_sse3()); 256 } 257 258 /* 259 * Check if SSSE3 instruction set is available 260 */ 261 static inline boolean_t 262 zfs_ssse3_available(void) 263 { 264 return (__cpuid_has_ssse3()); 265 } 266 267 /* 268 * Check if SSE4.1 instruction set is available 269 */ 270 static inline boolean_t 271 zfs_sse4_1_available(void) 272 { 273 return (__cpuid_has_sse4_1()); 274 } 275 276 /* 277 * Check if SSE4.2 instruction set is available 278 */ 279 static inline boolean_t 280 zfs_sse4_2_available(void) 281 { 282 return (__cpuid_has_sse4_2()); 283 } 284 285 /* 286 * Check if AVX instruction set is available 287 */ 288 static inline boolean_t 289 zfs_avx_available(void) 290 { 291 return (__cpuid_has_avx() && __ymm_enabled()); 292 } 293 294 /* 295 * Check if AVX2 instruction set is available 296 */ 297 static inline boolean_t 298 zfs_avx2_available(void) 299 { 300 return (__cpuid_has_avx2() && __ymm_enabled()); 301 } 302 303 /* 304 * Check if BMI1 instruction set is available 305 */ 306 static inline boolean_t 307 zfs_bmi1_available(void) 308 { 309 return (__cpuid_has_bmi1()); 310 } 311 312 /* 313 * Check if BMI2 instruction set is available 314 */ 315 static inline boolean_t 316 zfs_bmi2_available(void) 317 { 318 return (__cpuid_has_bmi2()); 319 } 320 321 /* 322 * Check if AES instruction set is available 323 */ 324 static inline boolean_t 325 zfs_aes_available(void) 326 { 327 return (__cpuid_has_aes()); 328 } 329 330 /* 331 * Check if PCLMULQDQ instruction set is available 332 */ 333 static inline boolean_t 334 zfs_pclmulqdq_available(void) 335 { 336 return (__cpuid_has_pclmulqdq()); 337 } 338 339 /* 340 * Check if MOVBE instruction is available 341 */ 342 static inline boolean_t 343 zfs_movbe_available(void) 344 { 345 return (__cpuid_has_movbe()); 346 } 347 348 /* 349 * AVX-512 family of instruction sets: 350 * 351 * AVX512F Foundation 352 * AVX512CD Conflict Detection Instructions 353 * AVX512ER Exponential and Reciprocal Instructions 354 * AVX512PF Prefetch Instructions 355 * 356 * AVX512BW Byte and Word Instructions 357 * AVX512DQ Double-word and Quadword Instructions 358 * AVX512VL Vector Length Extensions 359 * 360 * AVX512IFMA Integer Fused Multiply Add (Not supported by kernel 4.4) 361 * AVX512VBMI Vector Byte Manipulation Instructions 362 */ 363 364 /* 365 * Check if AVX512F instruction set is available 366 */ 367 static inline boolean_t 368 zfs_avx512f_available(void) 369 { 370 return (__cpuid_has_avx512f() && __zmm_enabled()); 371 } 372 373 /* 374 * Check if AVX512CD instruction set is available 375 */ 376 static inline boolean_t 377 zfs_avx512cd_available(void) 378 { 379 return (__cpuid_has_avx512cd() && __zmm_enabled()); 380 } 381 382 /* 383 * Check if AVX512ER instruction set is available 384 */ 385 static inline boolean_t 386 zfs_avx512er_available(void) 387 { 388 return (__cpuid_has_avx512er() && __zmm_enabled()); 389 } 390 391 /* 392 * Check if AVX512PF instruction set is available 393 */ 394 static inline boolean_t 395 zfs_avx512pf_available(void) 396 { 397 return (__cpuid_has_avx512pf() && __zmm_enabled()); 398 } 399 400 /* 401 * Check if AVX512BW instruction set is available 402 */ 403 static inline boolean_t 404 zfs_avx512bw_available(void) 405 { 406 return (__cpuid_has_avx512bw() && __zmm_enabled()); 407 } 408 409 /* 410 * Check if AVX512DQ instruction set is available 411 */ 412 static inline boolean_t 413 zfs_avx512dq_available(void) 414 { 415 return (__cpuid_has_avx512dq() && __zmm_enabled()); 416 } 417 418 /* 419 * Check if AVX512VL instruction set is available 420 */ 421 static inline boolean_t 422 zfs_avx512vl_available(void) 423 { 424 return (__cpuid_has_avx512vl() && __zmm_enabled()); 425 } 426 427 /* 428 * Check if AVX512IFMA instruction set is available 429 */ 430 static inline boolean_t 431 zfs_avx512ifma_available(void) 432 { 433 return (__cpuid_has_avx512ifma() && __zmm_enabled()); 434 } 435 436 /* 437 * Check if AVX512VBMI instruction set is available 438 */ 439 static inline boolean_t 440 zfs_avx512vbmi_available(void) 441 { 442 return (__cpuid_has_avx512f() && __cpuid_has_avx512vbmi() && 443 __zmm_enabled()); 444 } 445 446 #elif defined(__aarch64__) 447 448 #define kfpu_allowed() 1 449 #define kfpu_initialize(tsk) do {} while (0) 450 #define kfpu_begin() do {} while (0) 451 #define kfpu_end() do {} while (0) 452 453 #elif defined(__powerpc__) 454 455 /* including <sys/auxv.h> clashes with AT_UID and others */ 456 #if defined(__FreeBSD__) 457 #define AT_HWCAP 25 /* CPU feature flags. */ 458 #define AT_HWCAP2 26 /* CPU feature flags 2. */ 459 extern int elf_aux_info(int aux, void *buf, int buflen); 460 static inline unsigned long 461 getauxval(unsigned long key) 462 { 463 unsigned long val = 0UL; 464 465 if (elf_aux_info((int)key, &val, sizeof (val)) != 0) 466 return (0UL); 467 468 return (val); 469 } 470 #elif defined(__linux__) 471 #define AT_HWCAP 16 /* CPU feature flags. */ 472 #define AT_HWCAP2 26 /* CPU feature flags 2. */ 473 extern unsigned long getauxval(unsigned long type); 474 #endif 475 476 #define kfpu_allowed() 1 477 #define kfpu_initialize(tsk) do {} while (0) 478 #define kfpu_begin() do {} while (0) 479 #define kfpu_end() do {} while (0) 480 481 #define PPC_FEATURE_HAS_ALTIVEC 0x10000000 482 static inline boolean_t 483 zfs_altivec_available(void) 484 { 485 unsigned long hwcap = getauxval(AT_HWCAP); 486 487 return (hwcap & PPC_FEATURE_HAS_ALTIVEC); 488 } 489 490 #define PPC_FEATURE_HAS_VSX 0x00000080 491 static inline boolean_t 492 zfs_vsx_available(void) 493 { 494 unsigned long hwcap = getauxval(AT_HWCAP); 495 496 return (hwcap & PPC_FEATURE_HAS_VSX); 497 } 498 499 #define PPC_FEATURE2_ARCH_2_07 0x80000000 500 static inline boolean_t 501 zfs_isa207_available(void) 502 { 503 unsigned long hwcap = getauxval(AT_HWCAP); 504 unsigned long hwcap2 = getauxval(AT_HWCAP2); 505 506 return ((hwcap & PPC_FEATURE_HAS_VSX) && 507 (hwcap2 & PPC_FEATURE2_ARCH_2_07)); 508 } 509 510 #else 511 512 #define kfpu_allowed() 0 513 #define kfpu_initialize(tsk) do {} while (0) 514 #define kfpu_begin() do {} while (0) 515 #define kfpu_end() do {} while (0) 516 517 #endif 518 519 #endif /* _LIBSPL_SYS_SIMD_H */ 520