1 // Copyright 2020 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef HIGHWAY_HWY_BASE_H_ 16 #define HIGHWAY_HWY_BASE_H_ 17 18 // For SIMD module implementations and their callers, target-independent. 19 20 #include <stddef.h> 21 #include <stdint.h> 22 23 #include <atomic> 24 #include <cfloat> 25 26 // Add to #if conditions to prevent IDE from graying out code. 27 #if (defined __CDT_PARSER__) || (defined __INTELLISENSE__) || \ 28 (defined Q_CREATOR_RUN) || (defined(__CLANGD__)) 29 #define HWY_IDE 1 30 #else 31 #define HWY_IDE 0 32 #endif 33 34 //------------------------------------------------------------------------------ 35 // Detect compiler using predefined macros 36 37 // clang-cl defines _MSC_VER but doesn't behave like MSVC in other aspects like 38 // used in HWY_DIAGNOSTICS(). We include a check that we are not clang for that 39 // purpose. 40 #if defined(_MSC_VER) && !defined(__clang__) 41 #define HWY_COMPILER_MSVC _MSC_VER 42 #else 43 #define HWY_COMPILER_MSVC 0 44 #endif 45 46 #ifdef __INTEL_COMPILER 47 #define HWY_COMPILER_ICC __INTEL_COMPILER 48 #else 49 #define HWY_COMPILER_ICC 0 50 #endif 51 52 #ifdef __GNUC__ 53 #define HWY_COMPILER_GCC (__GNUC__ * 100 + __GNUC_MINOR__) 54 #else 55 #define HWY_COMPILER_GCC 0 56 #endif 57 58 // Clang can masquerade as MSVC/GCC, in which case both are set. 59 #ifdef __clang__ 60 #ifdef __APPLE__ 61 // Apple LLVM version is unrelated to the actual Clang version, which we need 62 // for enabling workarounds. Use the presence of warning flags to deduce it. 63 // Adapted from https://github.com/simd-everywhere/simde/ simde-detect-clang.h. 64 #if __has_warning("-Wformat-insufficient-args") 65 #define HWY_COMPILER_CLANG 1200 66 #elif __has_warning("-Wimplicit-const-int-float-conversion") 67 #define HWY_COMPILER_CLANG 1100 68 #elif __has_warning("-Wmisleading-indentation") 69 #define HWY_COMPILER_CLANG 1000 70 #elif defined(__FILE_NAME__) 71 #define HWY_COMPILER_CLANG 900 72 #elif __has_warning("-Wextra-semi-stmt") || \ 73 __has_builtin(__builtin_rotateleft32) 74 #define HWY_COMPILER_CLANG 800 75 #elif __has_warning("-Wc++98-compat-extra-semi") 76 #define HWY_COMPILER_CLANG 700 77 #else // Anything older than 7.0 is not recommended for Highway. 78 #define HWY_COMPILER_CLANG 600 79 #endif // __has_warning chain 80 #else // Non-Apple: normal version 81 #define HWY_COMPILER_CLANG (__clang_major__ * 100 + __clang_minor__) 82 #endif 83 #else // Not clang 84 #define HWY_COMPILER_CLANG 0 85 #endif 86 87 // More than one may be nonzero, but we want at least one. 88 #if !HWY_COMPILER_MSVC && !HWY_COMPILER_ICC && !HWY_COMPILER_GCC && \ 89 !HWY_COMPILER_CLANG 90 #error "Unsupported compiler" 91 #endif 92 93 //------------------------------------------------------------------------------ 94 // Compiler-specific definitions 95 96 #define HWY_STR_IMPL(macro) #macro 97 #define HWY_STR(macro) HWY_STR_IMPL(macro) 98 99 #if HWY_COMPILER_MSVC 100 101 #include <intrin.h> 102 103 #define HWY_RESTRICT __restrict 104 #define HWY_INLINE __forceinline 105 #define HWY_NOINLINE __declspec(noinline) 106 #define HWY_FLATTEN 107 #define HWY_NORETURN __declspec(noreturn) 108 #define HWY_LIKELY(expr) (expr) 109 #define HWY_UNLIKELY(expr) (expr) 110 #define HWY_PRAGMA(tokens) __pragma(tokens) 111 #define HWY_DIAGNOSTICS(tokens) HWY_PRAGMA(warning(tokens)) 112 #define HWY_DIAGNOSTICS_OFF(msc, gcc) HWY_DIAGNOSTICS(msc) 113 #define HWY_MAYBE_UNUSED 114 #define HWY_HAS_ASSUME_ALIGNED 0 115 #if (_MSC_VER >= 1700) 116 #define HWY_MUST_USE_RESULT _Check_return_ 117 #else 118 #define HWY_MUST_USE_RESULT 119 #endif 120 121 #else 122 123 #define HWY_RESTRICT __restrict__ 124 #define HWY_INLINE inline __attribute__((always_inline)) 125 #define HWY_NOINLINE __attribute__((noinline)) 126 #define HWY_FLATTEN __attribute__((flatten)) 127 #define HWY_NORETURN __attribute__((noreturn)) 128 #define HWY_LIKELY(expr) __builtin_expect(!!(expr), 1) 129 #define HWY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) 130 #define HWY_PRAGMA(tokens) _Pragma(#tokens) 131 #define HWY_DIAGNOSTICS(tokens) HWY_PRAGMA(GCC diagnostic tokens) 132 #define HWY_DIAGNOSTICS_OFF(msc, gcc) HWY_DIAGNOSTICS(gcc) 133 // Encountered "attribute list cannot appear here" when using the C++17 134 // [[maybe_unused]], so only use the old style attribute for now. 135 #define HWY_MAYBE_UNUSED __attribute__((unused)) 136 #define HWY_MUST_USE_RESULT __attribute__((warn_unused_result)) 137 138 #endif // !HWY_COMPILER_MSVC 139 140 //------------------------------------------------------------------------------ 141 // Builtin/attributes 142 143 #ifdef __has_builtin 144 #define HWY_HAS_BUILTIN(name) __has_builtin(name) 145 #else 146 #define HWY_HAS_BUILTIN(name) 0 147 #endif 148 149 #ifdef __has_attribute 150 #define HWY_HAS_ATTRIBUTE(name) __has_attribute(name) 151 #else 152 #define HWY_HAS_ATTRIBUTE(name) 0 153 #endif 154 155 // Enables error-checking of format strings. 156 #if HWY_HAS_ATTRIBUTE(__format__) 157 #define HWY_FORMAT(idx_fmt, idx_arg) \ 158 __attribute__((__format__(__printf__, idx_fmt, idx_arg))) 159 #else 160 #define HWY_FORMAT(idx_fmt, idx_arg) 161 #endif 162 163 // Returns a void* pointer which the compiler then assumes is N-byte aligned. 164 // Example: float* HWY_RESTRICT aligned = (float*)HWY_ASSUME_ALIGNED(in, 32); 165 // 166 // The assignment semantics are required by GCC/Clang. ICC provides an in-place 167 // __assume_aligned, whereas MSVC's __assume appears unsuitable. 168 #if HWY_HAS_BUILTIN(__builtin_assume_aligned) 169 #define HWY_ASSUME_ALIGNED(ptr, align) __builtin_assume_aligned((ptr), (align)) 170 #else 171 #define HWY_ASSUME_ALIGNED(ptr, align) (ptr) /* not supported */ 172 #endif 173 174 // Clang and GCC require attributes on each function into which SIMD intrinsics 175 // are inlined. Support both per-function annotation (HWY_ATTR) for lambdas and 176 // automatic annotation via pragmas. 177 #if HWY_COMPILER_CLANG 178 #define HWY_PUSH_ATTRIBUTES(targets_str) \ 179 HWY_PRAGMA(clang attribute push(__attribute__((target(targets_str))), \ 180 apply_to = function)) 181 #define HWY_POP_ATTRIBUTES HWY_PRAGMA(clang attribute pop) 182 #elif HWY_COMPILER_GCC 183 #define HWY_PUSH_ATTRIBUTES(targets_str) \ 184 HWY_PRAGMA(GCC push_options) HWY_PRAGMA(GCC target targets_str) 185 #define HWY_POP_ATTRIBUTES HWY_PRAGMA(GCC pop_options) 186 #else 187 #define HWY_PUSH_ATTRIBUTES(targets_str) 188 #define HWY_POP_ATTRIBUTES 189 #endif 190 191 //------------------------------------------------------------------------------ 192 // Detect architecture using predefined macros 193 194 #if defined(__i386__) || defined(_M_IX86) 195 #define HWY_ARCH_X86_32 1 196 #else 197 #define HWY_ARCH_X86_32 0 198 #endif 199 200 #if defined(__x86_64__) || defined(_M_X64) 201 #define HWY_ARCH_X86_64 1 202 #else 203 #define HWY_ARCH_X86_64 0 204 #endif 205 206 #if HWY_ARCH_X86_32 && HWY_ARCH_X86_64 207 #error "Cannot have both x86-32 and x86-64" 208 #endif 209 210 #if HWY_ARCH_X86_32 || HWY_ARCH_X86_64 211 #define HWY_ARCH_X86 1 212 #else 213 #define HWY_ARCH_X86 0 214 #endif 215 216 #if defined(__powerpc64__) || defined(_M_PPC) 217 #define HWY_ARCH_PPC 1 218 #else 219 #define HWY_ARCH_PPC 0 220 #endif 221 222 #if defined(__ARM_ARCH_ISA_A64) || defined(__aarch64__) || defined(_M_ARM64) 223 #define HWY_ARCH_ARM_A64 1 224 #else 225 #define HWY_ARCH_ARM_A64 0 226 #endif 227 228 #if defined(__arm__) || defined(_M_ARM) 229 #define HWY_ARCH_ARM_V7 1 230 #else 231 #define HWY_ARCH_ARM_V7 0 232 #endif 233 234 #if HWY_ARCH_ARM_A64 && HWY_ARCH_ARM_V7 235 #error "Cannot have both A64 and V7" 236 #endif 237 238 #if HWY_ARCH_ARM_A64 || HWY_ARCH_ARM_V7 239 #define HWY_ARCH_ARM 1 240 #else 241 #define HWY_ARCH_ARM 0 242 #endif 243 244 #if defined(__EMSCRIPTEN__) || defined(__wasm__) || defined(__WASM__) 245 #define HWY_ARCH_WASM 1 246 #else 247 #define HWY_ARCH_WASM 0 248 #endif 249 250 #ifdef __riscv 251 #define HWY_ARCH_RVV 1 252 #else 253 #define HWY_ARCH_RVV 0 254 #endif 255 256 // It is an error to detect multiple architectures at the same time, but OK to 257 // detect none of the above. 258 #if (HWY_ARCH_X86 + HWY_ARCH_PPC + HWY_ARCH_ARM + HWY_ARCH_WASM + \ 259 HWY_ARCH_RVV) > 1 260 #error "Must not detect more than one architecture" 261 #endif 262 263 //------------------------------------------------------------------------------ 264 // Macros 265 266 #define HWY_API static HWY_INLINE HWY_FLATTEN HWY_MAYBE_UNUSED 267 268 #define HWY_CONCAT_IMPL(a, b) a##b 269 #define HWY_CONCAT(a, b) HWY_CONCAT_IMPL(a, b) 270 271 #define HWY_MIN(a, b) ((a) < (b) ? (a) : (b)) 272 #define HWY_MAX(a, b) ((a) > (b) ? (a) : (b)) 273 274 // Compile-time fence to prevent undesirable code reordering. On Clang x86, the 275 // typical asm volatile("" : : : "memory") has no effect, whereas atomic fence 276 // does, without generating code. 277 #if HWY_ARCH_X86 278 #define HWY_FENCE std::atomic_thread_fence(std::memory_order_acq_rel) 279 #else 280 // TODO(janwas): investigate alternatives. On ARM, the above generates barriers. 281 #define HWY_FENCE 282 #endif 283 284 // 4 instances of a given literal value, useful as input to LoadDup128. 285 #define HWY_REP4(literal) literal, literal, literal, literal 286 287 #define HWY_ABORT(format, ...) \ 288 ::hwy::Abort(__FILE__, __LINE__, format, ##__VA_ARGS__) 289 290 // Always enabled. 291 #define HWY_ASSERT(condition) \ 292 do { \ 293 if (!(condition)) { \ 294 HWY_ABORT("Assert %s", #condition); \ 295 } \ 296 } while (0) 297 298 // Only for "debug" builds 299 #if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || \ 300 defined(MEMORY_SANITIZER) || defined(THREAD_SANITIZER) 301 #define HWY_DASSERT(condition) HWY_ASSERT(condition) 302 #else 303 #define HWY_DASSERT(condition) \ 304 do { \ 305 } while (0) 306 #endif 307 308 309 namespace hwy { 310 311 //------------------------------------------------------------------------------ 312 // Alignment 313 314 // Not guaranteed to be an upper bound, but the alignment established by 315 // aligned_allocator is HWY_MAX(HWY_ALIGNMENT, kMaxVectorSize). 316 #if HWY_ARCH_X86 317 static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 64; // AVX-512 318 #define HWY_ALIGN_MAX alignas(64) 319 #elif HWY_ARCH_RVV 320 // Not actually an upper bound on the size, but this value prevents crossing a 321 // 4K boundary (relevant on Andes). 322 static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 4096; 323 #define HWY_ALIGN_MAX alignas(8) // only elements need be aligned 324 #else 325 static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 16; 326 #define HWY_ALIGN_MAX alignas(16) 327 #endif 328 329 //------------------------------------------------------------------------------ 330 // Lane types 331 332 // Match [u]int##_t naming scheme so rvv-inl.h macros can obtain the type name 333 // by concatenating base type and bits. 334 335 // RVV already has a builtin type and the GCC intrinsics require it. 336 #if HWY_ARCH_RVV && HWY_COMPILER_GCC 337 #define HWY_NATIVE_FLOAT16 1 338 #else 339 #define HWY_NATIVE_FLOAT16 0 340 #endif 341 342 #if HWY_NATIVE_FLOAT16 343 using float16_t = __fp16; 344 // Clang does not allow __fp16 arguments, but scalar.h requires LaneType 345 // arguments, so use a wrapper. 346 // TODO(janwas): replace with _Float16 when that is supported? 347 #else 348 #pragma pack(push, 1) 349 struct float16_t { 350 uint16_t bits; 351 }; 352 #pragma pack(pop) 353 #endif 354 355 using float32_t = float; 356 using float64_t = double; 357 358 //------------------------------------------------------------------------------ 359 // Controlling overload resolution (SFINAE) 360 361 template <bool Condition, class T> 362 struct EnableIfT {}; 363 template <class T> 364 struct EnableIfT<true, T> { 365 using type = T; 366 }; 367 368 template <bool Condition, class T = void> 369 using EnableIf = typename EnableIfT<Condition, T>::type; 370 371 // Insert into template/function arguments to enable this overload only for 372 // vectors of AT MOST this many bits. 373 // 374 // Note that enabling for exactly 128 bits is unnecessary because a function can 375 // simply be overloaded with Vec128<T> and Full128<T> descriptor. Enabling for 376 // other sizes (e.g. 64 bit) can be achieved with Simd<T, 8 / sizeof(T)>. 377 #define HWY_IF_LE128(T, N) hwy::EnableIf<N * sizeof(T) <= 16>* = nullptr 378 #define HWY_IF_LE64(T, N) hwy::EnableIf<N * sizeof(T) <= 8>* = nullptr 379 #define HWY_IF_LE32(T, N) hwy::EnableIf<N * sizeof(T) <= 4>* = nullptr 380 381 #define HWY_IF_UNSIGNED(T) hwy::EnableIf<!IsSigned<T>()>* = nullptr 382 #define HWY_IF_SIGNED(T) \ 383 hwy::EnableIf<IsSigned<T>() && !IsFloat<T>()>* = nullptr 384 #define HWY_IF_FLOAT(T) hwy::EnableIf<hwy::IsFloat<T>()>* = nullptr 385 #define HWY_IF_NOT_FLOAT(T) hwy::EnableIf<!hwy::IsFloat<T>()>* = nullptr 386 387 #define HWY_IF_LANE_SIZE(T, bytes) \ 388 hwy::EnableIf<sizeof(T) == (bytes)>* = nullptr 389 #define HWY_IF_NOT_LANE_SIZE(T, bytes) \ 390 hwy::EnableIf<sizeof(T) != (bytes)>* = nullptr 391 392 // Empty struct used as a size tag type. 393 template <size_t N> 394 struct SizeTag {}; 395 396 //------------------------------------------------------------------------------ 397 // Type traits 398 399 template <typename T> 400 constexpr bool IsFloat() { 401 return T(1.25) != T(1); 402 } 403 404 template <typename T> 405 constexpr bool IsSigned() { 406 return T(0) > T(-1); 407 } 408 409 // Largest/smallest representable integer values. 410 template <typename T> 411 constexpr T LimitsMax() { 412 static_assert(!IsFloat<T>(), "Only for integer types"); 413 return IsSigned<T>() ? T((1ULL << (sizeof(T) * 8 - 1)) - 1) 414 : static_cast<T>(~0ull); 415 } 416 template <typename T> 417 constexpr T LimitsMin() { 418 static_assert(!IsFloat<T>(), "Only for integer types"); 419 return IsSigned<T>() ? T(-1) - LimitsMax<T>() : T(0); 420 } 421 422 // Largest/smallest representable value (integer or float). This naming avoids 423 // confusion with numeric_limits<float>::min() (the smallest positive value). 424 template <typename T> 425 constexpr T LowestValue() { 426 return LimitsMin<T>(); 427 } 428 template <> 429 constexpr float LowestValue<float>() { 430 return -FLT_MAX; 431 } 432 template <> 433 constexpr double LowestValue<double>() { 434 return -DBL_MAX; 435 } 436 437 template <typename T> 438 constexpr T HighestValue() { 439 return LimitsMax<T>(); 440 } 441 template <> 442 constexpr float HighestValue<float>() { 443 return FLT_MAX; 444 } 445 template <> 446 constexpr double HighestValue<double>() { 447 return DBL_MAX; 448 } 449 450 // Returns bitmask of the exponent field in IEEE binary32/64. 451 template <typename T> 452 constexpr T ExponentMask() { 453 static_assert(sizeof(T) == 0, "Only instantiate the specializations"); 454 return 0; 455 } 456 template <> 457 constexpr uint32_t ExponentMask<uint32_t>() { 458 return 0x7F800000; 459 } 460 template <> 461 constexpr uint64_t ExponentMask<uint64_t>() { 462 return 0x7FF0000000000000ULL; 463 } 464 465 // Returns 1 << mantissa_bits as a floating-point number. All integers whose 466 // absolute value are less than this can be represented exactly. 467 template <typename T> 468 constexpr T MantissaEnd() { 469 static_assert(sizeof(T) == 0, "Only instantiate the specializations"); 470 return 0; 471 } 472 template <> 473 constexpr float MantissaEnd<float>() { 474 return 8388608.0f; // 1 << 23 475 } 476 template <> 477 constexpr double MantissaEnd<double>() { 478 // floating point literal with p52 requires C++17. 479 return 4503599627370496.0; // 1 << 52 480 } 481 482 //------------------------------------------------------------------------------ 483 // Type relations 484 485 namespace detail { 486 487 template <typename T> 488 struct Relations; 489 template <> 490 struct Relations<uint8_t> { 491 using Unsigned = uint8_t; 492 using Signed = int8_t; 493 using Wide = uint16_t; 494 }; 495 template <> 496 struct Relations<int8_t> { 497 using Unsigned = uint8_t; 498 using Signed = int8_t; 499 using Wide = int16_t; 500 }; 501 template <> 502 struct Relations<uint16_t> { 503 using Unsigned = uint16_t; 504 using Signed = int16_t; 505 using Wide = uint32_t; 506 using Narrow = uint8_t; 507 }; 508 template <> 509 struct Relations<int16_t> { 510 using Unsigned = uint16_t; 511 using Signed = int16_t; 512 using Wide = int32_t; 513 using Narrow = int8_t; 514 }; 515 template <> 516 struct Relations<uint32_t> { 517 using Unsigned = uint32_t; 518 using Signed = int32_t; 519 using Float = float; 520 using Wide = uint64_t; 521 using Narrow = uint16_t; 522 }; 523 template <> 524 struct Relations<int32_t> { 525 using Unsigned = uint32_t; 526 using Signed = int32_t; 527 using Float = float; 528 using Wide = int64_t; 529 using Narrow = int16_t; 530 }; 531 template <> 532 struct Relations<uint64_t> { 533 using Unsigned = uint64_t; 534 using Signed = int64_t; 535 using Float = double; 536 using Narrow = uint32_t; 537 }; 538 template <> 539 struct Relations<int64_t> { 540 using Unsigned = uint64_t; 541 using Signed = int64_t; 542 using Float = double; 543 using Narrow = int32_t; 544 }; 545 template <> 546 struct Relations<float16_t> { 547 using Unsigned = uint16_t; 548 using Signed = int16_t; 549 using Float = float16_t; 550 using Wide = float; 551 }; 552 template <> 553 struct Relations<float> { 554 using Unsigned = uint32_t; 555 using Signed = int32_t; 556 using Float = float; 557 using Wide = double; 558 }; 559 template <> 560 struct Relations<double> { 561 using Unsigned = uint64_t; 562 using Signed = int64_t; 563 using Float = double; 564 using Narrow = float; 565 }; 566 567 } // namespace detail 568 569 // Aliases for types of a different category, but the same size. 570 template <typename T> 571 using MakeUnsigned = typename detail::Relations<T>::Unsigned; 572 template <typename T> 573 using MakeSigned = typename detail::Relations<T>::Signed; 574 template <typename T> 575 using MakeFloat = typename detail::Relations<T>::Float; 576 577 // Aliases for types of the same category, but different size. 578 template <typename T> 579 using MakeWide = typename detail::Relations<T>::Wide; 580 template <typename T> 581 using MakeNarrow = typename detail::Relations<T>::Narrow; 582 583 //------------------------------------------------------------------------------ 584 // Helper functions 585 586 template <typename T1, typename T2> 587 constexpr inline T1 DivCeil(T1 a, T2 b) { 588 return (a + b - 1) / b; 589 } 590 591 // Works for any `align`; if a power of two, compiler emits ADD+AND. 592 constexpr inline size_t RoundUpTo(size_t what, size_t align) { 593 return DivCeil(what, align) * align; 594 } 595 596 // Undefined results for x == 0. 597 HWY_API size_t Num0BitsBelowLS1Bit_Nonzero32(const uint32_t x) { 598 #if HWY_COMPILER_MSVC 599 unsigned long index; // NOLINT 600 _BitScanForward(&index, x); 601 return index; 602 #else // HWY_COMPILER_MSVC 603 return static_cast<size_t>(__builtin_ctz(x)); 604 #endif // HWY_COMPILER_MSVC 605 } 606 607 HWY_API size_t PopCount(uint64_t x) { 608 #if HWY_COMPILER_CLANG || HWY_COMPILER_GCC 609 return static_cast<size_t>(__builtin_popcountll(x)); 610 #elif HWY_COMPILER_MSVC && HWY_ARCH_X86_64 611 return _mm_popcnt_u64(x); 612 #elif HWY_COMPILER_MSVC && HWY_ARCH_X86_32 613 return _mm_popcnt_u32(uint32_t(x)) + _mm_popcnt_u32(uint32_t(x >> 32)); 614 #else 615 x -= ((x >> 1) & 0x55555555U); 616 x = (((x >> 2) & 0x33333333U) + (x & 0x33333333U)); 617 x = (((x >> 4) + x) & 0x0F0F0F0FU); 618 x += (x >> 8); 619 x += (x >> 16); 620 x += (x >> 32); 621 x = x & 0x0000007FU; 622 return (unsigned int)x; 623 #endif 624 } 625 626 // The source/destination must not overlap/alias. 627 template <size_t kBytes, typename From, typename To> 628 HWY_API void CopyBytes(const From* from, To* to) { 629 #if HWY_COMPILER_MSVC 630 const uint8_t* HWY_RESTRICT from_bytes = 631 reinterpret_cast<const uint8_t*>(from); 632 uint8_t* HWY_RESTRICT to_bytes = reinterpret_cast<uint8_t*>(to); 633 for (size_t i = 0; i < kBytes; ++i) { 634 to_bytes[i] = from_bytes[i]; 635 } 636 #else 637 // Avoids horrible codegen on Clang (series of PINSRB) 638 __builtin_memcpy(to, from, kBytes); 639 #endif 640 } 641 642 HWY_NORETURN void HWY_FORMAT(3, 4) 643 Abort(const char* file, int line, const char* format, ...); 644 645 } // namespace hwy 646 647 #endif // HIGHWAY_HWY_BASE_H_ 648