1 /* Copyright (C) 2014-2018 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 #ifndef _IMMINTRIN_H_INCLUDED 25 #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead." 26 #endif 27 28 #ifndef _AVX512VLDQINTRIN_H_INCLUDED 29 #define _AVX512VLDQINTRIN_H_INCLUDED 30 31 #if !defined(__AVX512VL__) || !defined(__AVX512DQ__) 32 #pragma GCC push_options 33 #pragma GCC target("avx512vl,avx512dq") 34 #define __DISABLE_AVX512VLDQ__ 35 #endif /* __AVX512VLDQ__ */ 36 37 extern __inline __m256i 38 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 39 _mm256_cvttpd_epi64 (__m256d __A) 40 { 41 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 42 (__v4di) 43 _mm256_setzero_si256 (), 44 (__mmask8) -1); 45 } 46 47 extern __inline __m256i 48 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 49 _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) 50 { 51 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 52 (__v4di) __W, 53 (__mmask8) __U); 54 } 55 56 extern __inline __m256i 57 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 58 _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) 59 { 60 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 61 (__v4di) 62 _mm256_setzero_si256 (), 63 (__mmask8) __U); 64 } 65 66 extern __inline __m128i 67 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 68 _mm_cvttpd_epi64 (__m128d __A) 69 { 70 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 71 (__v2di) 72 _mm_setzero_si128 (), 73 (__mmask8) -1); 74 } 75 76 extern __inline __m128i 77 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 78 _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) 79 { 80 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 81 (__v2di) __W, 82 (__mmask8) __U); 83 } 84 85 extern __inline __m128i 86 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 87 _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) 88 { 89 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 90 (__v2di) 91 _mm_setzero_si128 (), 92 (__mmask8) __U); 93 } 94 95 extern __inline __m256i 96 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 97 _mm256_cvttpd_epu64 (__m256d __A) 98 { 99 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 100 (__v4di) 101 _mm256_setzero_si256 (), 102 (__mmask8) -1); 103 } 104 105 extern __inline __m256i 106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 107 _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) 108 { 109 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 110 (__v4di) __W, 111 (__mmask8) __U); 112 } 113 114 extern __inline __m256i 115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 116 _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) 117 { 118 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 119 (__v4di) 120 _mm256_setzero_si256 (), 121 (__mmask8) __U); 122 } 123 124 extern __inline __m128i 125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 126 _mm_cvttpd_epu64 (__m128d __A) 127 { 128 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 129 (__v2di) 130 _mm_setzero_si128 (), 131 (__mmask8) -1); 132 } 133 134 extern __inline __m128i 135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 136 _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) 137 { 138 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 139 (__v2di) __W, 140 (__mmask8) __U); 141 } 142 143 extern __inline __m128i 144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 145 _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) 146 { 147 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 148 (__v2di) 149 _mm_setzero_si128 (), 150 (__mmask8) __U); 151 } 152 153 extern __inline __m256i 154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 155 _mm256_cvtpd_epi64 (__m256d __A) 156 { 157 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 158 (__v4di) 159 _mm256_setzero_si256 (), 160 (__mmask8) -1); 161 } 162 163 extern __inline __m256i 164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 165 _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) 166 { 167 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 168 (__v4di) __W, 169 (__mmask8) __U); 170 } 171 172 extern __inline __m256i 173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 174 _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) 175 { 176 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 177 (__v4di) 178 _mm256_setzero_si256 (), 179 (__mmask8) __U); 180 } 181 182 extern __inline __m128i 183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 184 _mm_cvtpd_epi64 (__m128d __A) 185 { 186 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 187 (__v2di) 188 _mm_setzero_si128 (), 189 (__mmask8) -1); 190 } 191 192 extern __inline __m128i 193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 194 _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) 195 { 196 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 197 (__v2di) __W, 198 (__mmask8) __U); 199 } 200 201 extern __inline __m128i 202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 203 _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) 204 { 205 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 206 (__v2di) 207 _mm_setzero_si128 (), 208 (__mmask8) __U); 209 } 210 211 extern __inline __m256i 212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 213 _mm256_cvtpd_epu64 (__m256d __A) 214 { 215 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 216 (__v4di) 217 _mm256_setzero_si256 (), 218 (__mmask8) -1); 219 } 220 221 extern __inline __m256i 222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 223 _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) 224 { 225 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 226 (__v4di) __W, 227 (__mmask8) __U); 228 } 229 230 extern __inline __m256i 231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 232 _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) 233 { 234 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 235 (__v4di) 236 _mm256_setzero_si256 (), 237 (__mmask8) __U); 238 } 239 240 extern __inline __m128i 241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 242 _mm_cvtpd_epu64 (__m128d __A) 243 { 244 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 245 (__v2di) 246 _mm_setzero_si128 (), 247 (__mmask8) -1); 248 } 249 250 extern __inline __m128i 251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 252 _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) 253 { 254 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 255 (__v2di) __W, 256 (__mmask8) __U); 257 } 258 259 extern __inline __m128i 260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 261 _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) 262 { 263 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 264 (__v2di) 265 _mm_setzero_si128 (), 266 (__mmask8) __U); 267 } 268 269 extern __inline __m256i 270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 271 _mm256_cvttps_epi64 (__m128 __A) 272 { 273 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 274 (__v4di) 275 _mm256_setzero_si256 (), 276 (__mmask8) -1); 277 } 278 279 extern __inline __m256i 280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 281 _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) 282 { 283 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 284 (__v4di) __W, 285 (__mmask8) __U); 286 } 287 288 extern __inline __m256i 289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 290 _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) 291 { 292 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 293 (__v4di) 294 _mm256_setzero_si256 (), 295 (__mmask8) __U); 296 } 297 298 extern __inline __m128i 299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 300 _mm_cvttps_epi64 (__m128 __A) 301 { 302 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 303 (__v2di) 304 _mm_setzero_si128 (), 305 (__mmask8) -1); 306 } 307 308 extern __inline __m128i 309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 310 _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) 311 { 312 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 313 (__v2di) __W, 314 (__mmask8) __U); 315 } 316 317 extern __inline __m128i 318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 319 _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) 320 { 321 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 322 (__v2di) 323 _mm_setzero_si128 (), 324 (__mmask8) __U); 325 } 326 327 extern __inline __m256i 328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 329 _mm256_cvttps_epu64 (__m128 __A) 330 { 331 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 332 (__v4di) 333 _mm256_setzero_si256 (), 334 (__mmask8) -1); 335 } 336 337 extern __inline __m256i 338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 339 _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) 340 { 341 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 342 (__v4di) __W, 343 (__mmask8) __U); 344 } 345 346 extern __inline __m256i 347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 348 _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) 349 { 350 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 351 (__v4di) 352 _mm256_setzero_si256 (), 353 (__mmask8) __U); 354 } 355 356 extern __inline __m128i 357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 358 _mm_cvttps_epu64 (__m128 __A) 359 { 360 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 361 (__v2di) 362 _mm_setzero_si128 (), 363 (__mmask8) -1); 364 } 365 366 extern __inline __m128i 367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 368 _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) 369 { 370 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 371 (__v2di) __W, 372 (__mmask8) __U); 373 } 374 375 extern __inline __m128i 376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 377 _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) 378 { 379 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 380 (__v2di) 381 _mm_setzero_si128 (), 382 (__mmask8) __U); 383 } 384 385 extern __inline __m256d 386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 387 _mm256_broadcast_f64x2 (__m128d __A) 388 { 389 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) 390 __A, 391 (__v4df)_mm256_undefined_pd(), 392 (__mmask8) -1); 393 } 394 395 extern __inline __m256d 396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 397 _mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A) 398 { 399 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) 400 __A, 401 (__v4df) 402 __O, __M); 403 } 404 405 extern __inline __m256d 406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 407 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) 408 { 409 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) 410 __A, 411 (__v4df) 412 _mm256_setzero_ps (), 413 __M); 414 } 415 416 extern __inline __m256i 417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 418 _mm256_broadcast_i64x2 (__m128i __A) 419 { 420 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) 421 __A, 422 (__v4di)_mm256_undefined_si256(), 423 (__mmask8) -1); 424 } 425 426 extern __inline __m256i 427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 428 _mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A) 429 { 430 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) 431 __A, 432 (__v4di) 433 __O, __M); 434 } 435 436 extern __inline __m256i 437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 438 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) 439 { 440 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) 441 __A, 442 (__v4di) 443 _mm256_setzero_si256 (), 444 __M); 445 } 446 447 extern __inline __m256 448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 449 _mm256_broadcast_f32x2 (__m128 __A) 450 { 451 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, 452 (__v8sf)_mm256_undefined_ps(), 453 (__mmask8) -1); 454 } 455 456 extern __inline __m256 457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 458 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) 459 { 460 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, 461 (__v8sf) __O, 462 __M); 463 } 464 465 extern __inline __m256 466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 467 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) 468 { 469 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, 470 (__v8sf) 471 _mm256_setzero_ps (), 472 __M); 473 } 474 475 extern __inline __m256i 476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 477 _mm256_broadcast_i32x2 (__m128i __A) 478 { 479 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) 480 __A, 481 (__v8si)_mm256_undefined_si256(), 482 (__mmask8) -1); 483 } 484 485 extern __inline __m256i 486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 487 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) 488 { 489 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) 490 __A, 491 (__v8si) 492 __O, __M); 493 } 494 495 extern __inline __m256i 496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 497 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) 498 { 499 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) 500 __A, 501 (__v8si) 502 _mm256_setzero_si256 (), 503 __M); 504 } 505 506 extern __inline __m128i 507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 508 _mm_broadcast_i32x2 (__m128i __A) 509 { 510 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) 511 __A, 512 (__v4si)_mm_undefined_si128(), 513 (__mmask8) -1); 514 } 515 516 extern __inline __m128i 517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 518 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) 519 { 520 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) 521 __A, 522 (__v4si) 523 __O, __M); 524 } 525 526 extern __inline __m128i 527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 528 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) 529 { 530 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) 531 __A, 532 (__v4si) 533 _mm_setzero_si128 (), 534 __M); 535 } 536 537 extern __inline __m256i 538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 539 _mm256_mullo_epi64 (__m256i __A, __m256i __B) 540 { 541 return (__m256i) ((__v4du) __A * (__v4du) __B); 542 } 543 544 extern __inline __m256i 545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 546 _mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 547 __m256i __B) 548 { 549 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, 550 (__v4di) __B, 551 (__v4di) __W, 552 (__mmask8) __U); 553 } 554 555 extern __inline __m256i 556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 557 _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 558 { 559 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, 560 (__v4di) __B, 561 (__v4di) 562 _mm256_setzero_si256 (), 563 (__mmask8) __U); 564 } 565 566 extern __inline __m128i 567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 568 _mm_mullo_epi64 (__m128i __A, __m128i __B) 569 { 570 return (__m128i) ((__v2du) __A * (__v2du) __B); 571 } 572 573 extern __inline __m128i 574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 575 _mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 576 __m128i __B) 577 { 578 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, 579 (__v2di) __B, 580 (__v2di) __W, 581 (__mmask8) __U); 582 } 583 584 extern __inline __m128i 585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 586 _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 587 { 588 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, 589 (__v2di) __B, 590 (__v2di) 591 _mm_setzero_si128 (), 592 (__mmask8) __U); 593 } 594 595 extern __inline __m256d 596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 597 _mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, 598 __m256d __B) 599 { 600 return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, 601 (__v4df) __B, 602 (__v4df) __W, 603 (__mmask8) __U); 604 } 605 606 extern __inline __m256d 607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 608 _mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) 609 { 610 return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, 611 (__v4df) __B, 612 (__v4df) 613 _mm256_setzero_pd (), 614 (__mmask8) __U); 615 } 616 617 extern __inline __m128d 618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 619 _mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, 620 __m128d __B) 621 { 622 return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, 623 (__v2df) __B, 624 (__v2df) __W, 625 (__mmask8) __U); 626 } 627 628 extern __inline __m128d 629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 630 _mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) 631 { 632 return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, 633 (__v2df) __B, 634 (__v2df) 635 _mm_setzero_pd (), 636 (__mmask8) __U); 637 } 638 639 extern __inline __m256 640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 641 _mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, 642 __m256 __B) 643 { 644 return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, 645 (__v8sf) __B, 646 (__v8sf) __W, 647 (__mmask8) __U); 648 } 649 650 extern __inline __m256 651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 652 _mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) 653 { 654 return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, 655 (__v8sf) __B, 656 (__v8sf) 657 _mm256_setzero_ps (), 658 (__mmask8) __U); 659 } 660 661 extern __inline __m128 662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 663 _mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 664 { 665 return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, 666 (__v4sf) __B, 667 (__v4sf) __W, 668 (__mmask8) __U); 669 } 670 671 extern __inline __m128 672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 673 _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) 674 { 675 return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, 676 (__v4sf) __B, 677 (__v4sf) 678 _mm_setzero_ps (), 679 (__mmask8) __U); 680 } 681 682 extern __inline __m256i 683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 684 _mm256_cvtps_epi64 (__m128 __A) 685 { 686 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 687 (__v4di) 688 _mm256_setzero_si256 (), 689 (__mmask8) -1); 690 } 691 692 extern __inline __m256i 693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 694 _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) 695 { 696 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 697 (__v4di) __W, 698 (__mmask8) __U); 699 } 700 701 extern __inline __m256i 702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 703 _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) 704 { 705 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 706 (__v4di) 707 _mm256_setzero_si256 (), 708 (__mmask8) __U); 709 } 710 711 extern __inline __m128i 712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 713 _mm_cvtps_epi64 (__m128 __A) 714 { 715 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 716 (__v2di) 717 _mm_setzero_si128 (), 718 (__mmask8) -1); 719 } 720 721 extern __inline __m128i 722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 723 _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) 724 { 725 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 726 (__v2di) __W, 727 (__mmask8) __U); 728 } 729 730 extern __inline __m128i 731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 732 _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) 733 { 734 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 735 (__v2di) 736 _mm_setzero_si128 (), 737 (__mmask8) __U); 738 } 739 740 extern __inline __m256i 741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 742 _mm256_cvtps_epu64 (__m128 __A) 743 { 744 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 745 (__v4di) 746 _mm256_setzero_si256 (), 747 (__mmask8) -1); 748 } 749 750 extern __inline __m256i 751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 752 _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) 753 { 754 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 755 (__v4di) __W, 756 (__mmask8) __U); 757 } 758 759 extern __inline __m256i 760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 761 _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) 762 { 763 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 764 (__v4di) 765 _mm256_setzero_si256 (), 766 (__mmask8) __U); 767 } 768 769 extern __inline __m128i 770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 771 _mm_cvtps_epu64 (__m128 __A) 772 { 773 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 774 (__v2di) 775 _mm_setzero_si128 (), 776 (__mmask8) -1); 777 } 778 779 extern __inline __m128i 780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 781 _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) 782 { 783 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 784 (__v2di) __W, 785 (__mmask8) __U); 786 } 787 788 extern __inline __m128i 789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 790 _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) 791 { 792 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 793 (__v2di) 794 _mm_setzero_si128 (), 795 (__mmask8) __U); 796 } 797 798 extern __inline __m128 799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 800 _mm256_cvtepi64_ps (__m256i __A) 801 { 802 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 803 (__v4sf) 804 _mm_setzero_ps (), 805 (__mmask8) -1); 806 } 807 808 extern __inline __m128 809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 810 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) 811 { 812 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 813 (__v4sf) __W, 814 (__mmask8) __U); 815 } 816 817 extern __inline __m128 818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 819 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) 820 { 821 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 822 (__v4sf) 823 _mm_setzero_ps (), 824 (__mmask8) __U); 825 } 826 827 extern __inline __m128 828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 829 _mm_cvtepi64_ps (__m128i __A) 830 { 831 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 832 (__v4sf) 833 _mm_setzero_ps (), 834 (__mmask8) -1); 835 } 836 837 extern __inline __m128 838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 839 _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) 840 { 841 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 842 (__v4sf) __W, 843 (__mmask8) __U); 844 } 845 846 extern __inline __m128 847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 848 _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) 849 { 850 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 851 (__v4sf) 852 _mm_setzero_ps (), 853 (__mmask8) __U); 854 } 855 856 extern __inline __m128 857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 858 _mm256_cvtepu64_ps (__m256i __A) 859 { 860 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 861 (__v4sf) 862 _mm_setzero_ps (), 863 (__mmask8) -1); 864 } 865 866 extern __inline __m128 867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 868 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) 869 { 870 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 871 (__v4sf) __W, 872 (__mmask8) __U); 873 } 874 875 extern __inline __m128 876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 877 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) 878 { 879 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 880 (__v4sf) 881 _mm_setzero_ps (), 882 (__mmask8) __U); 883 } 884 885 extern __inline __m128 886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 887 _mm_cvtepu64_ps (__m128i __A) 888 { 889 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 890 (__v4sf) 891 _mm_setzero_ps (), 892 (__mmask8) -1); 893 } 894 895 extern __inline __m128 896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 897 _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) 898 { 899 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 900 (__v4sf) __W, 901 (__mmask8) __U); 902 } 903 904 extern __inline __m128 905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 906 _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) 907 { 908 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 909 (__v4sf) 910 _mm_setzero_ps (), 911 (__mmask8) __U); 912 } 913 914 extern __inline __m256d 915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 916 _mm256_cvtepi64_pd (__m256i __A) 917 { 918 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 919 (__v4df) 920 _mm256_setzero_pd (), 921 (__mmask8) -1); 922 } 923 924 extern __inline __m256d 925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 926 _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) 927 { 928 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 929 (__v4df) __W, 930 (__mmask8) __U); 931 } 932 933 extern __inline __m256d 934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 935 _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) 936 { 937 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 938 (__v4df) 939 _mm256_setzero_pd (), 940 (__mmask8) __U); 941 } 942 943 extern __inline __m128d 944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 945 _mm_cvtepi64_pd (__m128i __A) 946 { 947 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 948 (__v2df) 949 _mm_setzero_pd (), 950 (__mmask8) -1); 951 } 952 953 extern __inline __m128d 954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 955 _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) 956 { 957 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 958 (__v2df) __W, 959 (__mmask8) __U); 960 } 961 962 extern __inline __m128d 963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 964 _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) 965 { 966 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 967 (__v2df) 968 _mm_setzero_pd (), 969 (__mmask8) __U); 970 } 971 972 extern __inline __m256d 973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 974 _mm256_cvtepu64_pd (__m256i __A) 975 { 976 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 977 (__v4df) 978 _mm256_setzero_pd (), 979 (__mmask8) -1); 980 } 981 982 extern __inline __m256d 983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 984 _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) 985 { 986 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 987 (__v4df) __W, 988 (__mmask8) __U); 989 } 990 991 extern __inline __m256d 992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 993 _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) 994 { 995 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 996 (__v4df) 997 _mm256_setzero_pd (), 998 (__mmask8) __U); 999 } 1000 1001 extern __inline __m256d 1002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1003 _mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, 1004 __m256d __B) 1005 { 1006 return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, 1007 (__v4df) __B, 1008 (__v4df) __W, 1009 (__mmask8) __U); 1010 } 1011 1012 extern __inline __m256d 1013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1014 _mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) 1015 { 1016 return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, 1017 (__v4df) __B, 1018 (__v4df) 1019 _mm256_setzero_pd (), 1020 (__mmask8) __U); 1021 } 1022 1023 extern __inline __m128d 1024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1025 _mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1026 { 1027 return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, 1028 (__v2df) __B, 1029 (__v2df) __W, 1030 (__mmask8) __U); 1031 } 1032 1033 extern __inline __m128d 1034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1035 _mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) 1036 { 1037 return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, 1038 (__v2df) __B, 1039 (__v2df) 1040 _mm_setzero_pd (), 1041 (__mmask8) __U); 1042 } 1043 1044 extern __inline __m256 1045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1046 _mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 1047 { 1048 return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, 1049 (__v8sf) __B, 1050 (__v8sf) __W, 1051 (__mmask8) __U); 1052 } 1053 1054 extern __inline __m256 1055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1056 _mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) 1057 { 1058 return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, 1059 (__v8sf) __B, 1060 (__v8sf) 1061 _mm256_setzero_ps (), 1062 (__mmask8) __U); 1063 } 1064 1065 extern __inline __m128 1066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1067 _mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1068 { 1069 return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, 1070 (__v4sf) __B, 1071 (__v4sf) __W, 1072 (__mmask8) __U); 1073 } 1074 1075 extern __inline __m128 1076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1077 _mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) 1078 { 1079 return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, 1080 (__v4sf) __B, 1081 (__v4sf) 1082 _mm_setzero_ps (), 1083 (__mmask8) __U); 1084 } 1085 1086 extern __inline __m128d 1087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1088 _mm_cvtepu64_pd (__m128i __A) 1089 { 1090 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 1091 (__v2df) 1092 _mm_setzero_pd (), 1093 (__mmask8) -1); 1094 } 1095 1096 extern __inline __m128d 1097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1098 _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) 1099 { 1100 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 1101 (__v2df) __W, 1102 (__mmask8) __U); 1103 } 1104 1105 extern __inline __m128d 1106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1107 _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) 1108 { 1109 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 1110 (__v2df) 1111 _mm_setzero_pd (), 1112 (__mmask8) __U); 1113 } 1114 1115 extern __inline __m256d 1116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1117 _mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, 1118 __m256d __B) 1119 { 1120 return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, 1121 (__v4df) __B, 1122 (__v4df) __W, 1123 (__mmask8) __U); 1124 } 1125 1126 extern __inline __m256d 1127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1128 _mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) 1129 { 1130 return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, 1131 (__v4df) __B, 1132 (__v4df) 1133 _mm256_setzero_pd (), 1134 (__mmask8) __U); 1135 } 1136 1137 extern __inline __m128d 1138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1139 _mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1140 { 1141 return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, 1142 (__v2df) __B, 1143 (__v2df) __W, 1144 (__mmask8) __U); 1145 } 1146 1147 extern __inline __m128d 1148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1149 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) 1150 { 1151 return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, 1152 (__v2df) __B, 1153 (__v2df) 1154 _mm_setzero_pd (), 1155 (__mmask8) __U); 1156 } 1157 1158 extern __inline __m256 1159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1160 _mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 1161 { 1162 return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, 1163 (__v8sf) __B, 1164 (__v8sf) __W, 1165 (__mmask8) __U); 1166 } 1167 1168 extern __inline __m256 1169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1170 _mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) 1171 { 1172 return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, 1173 (__v8sf) __B, 1174 (__v8sf) 1175 _mm256_setzero_ps (), 1176 (__mmask8) __U); 1177 } 1178 1179 extern __inline __m128 1180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1181 _mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1182 { 1183 return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, 1184 (__v4sf) __B, 1185 (__v4sf) __W, 1186 (__mmask8) __U); 1187 } 1188 1189 extern __inline __m128 1190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1191 _mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) 1192 { 1193 return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, 1194 (__v4sf) __B, 1195 (__v4sf) 1196 _mm_setzero_ps (), 1197 (__mmask8) __U); 1198 } 1199 1200 extern __inline __m256d 1201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1202 _mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 1203 { 1204 return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, 1205 (__v4df) __B, 1206 (__v4df) __W, 1207 (__mmask8) __U); 1208 } 1209 1210 extern __inline __m256d 1211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1212 _mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) 1213 { 1214 return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, 1215 (__v4df) __B, 1216 (__v4df) 1217 _mm256_setzero_pd (), 1218 (__mmask8) __U); 1219 } 1220 1221 extern __inline __m128d 1222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1223 _mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1224 { 1225 return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, 1226 (__v2df) __B, 1227 (__v2df) __W, 1228 (__mmask8) __U); 1229 } 1230 1231 extern __inline __m128d 1232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1233 _mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) 1234 { 1235 return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, 1236 (__v2df) __B, 1237 (__v2df) 1238 _mm_setzero_pd (), 1239 (__mmask8) __U); 1240 } 1241 1242 extern __inline __m256 1243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1244 _mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 1245 { 1246 return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, 1247 (__v8sf) __B, 1248 (__v8sf) __W, 1249 (__mmask8) __U); 1250 } 1251 1252 extern __inline __m256 1253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1254 _mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) 1255 { 1256 return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, 1257 (__v8sf) __B, 1258 (__v8sf) 1259 _mm256_setzero_ps (), 1260 (__mmask8) __U); 1261 } 1262 1263 extern __inline __m128 1264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1265 _mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1266 { 1267 return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, 1268 (__v4sf) __B, 1269 (__v4sf) __W, 1270 (__mmask8) __U); 1271 } 1272 1273 extern __inline __m128 1274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1275 _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) 1276 { 1277 return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, 1278 (__v4sf) __B, 1279 (__v4sf) 1280 _mm_setzero_ps (), 1281 (__mmask8) __U); 1282 } 1283 1284 extern __inline __m128i 1285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1286 _mm_movm_epi32 (__mmask8 __A) 1287 { 1288 return (__m128i) __builtin_ia32_cvtmask2d128 (__A); 1289 } 1290 1291 extern __inline __m256i 1292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1293 _mm256_movm_epi32 (__mmask8 __A) 1294 { 1295 return (__m256i) __builtin_ia32_cvtmask2d256 (__A); 1296 } 1297 1298 extern __inline __m128i 1299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1300 _mm_movm_epi64 (__mmask8 __A) 1301 { 1302 return (__m128i) __builtin_ia32_cvtmask2q128 (__A); 1303 } 1304 1305 extern __inline __m256i 1306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1307 _mm256_movm_epi64 (__mmask8 __A) 1308 { 1309 return (__m256i) __builtin_ia32_cvtmask2q256 (__A); 1310 } 1311 1312 extern __inline __mmask8 1313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1314 _mm_movepi32_mask (__m128i __A) 1315 { 1316 return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); 1317 } 1318 1319 extern __inline __mmask8 1320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1321 _mm256_movepi32_mask (__m256i __A) 1322 { 1323 return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A); 1324 } 1325 1326 extern __inline __mmask8 1327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1328 _mm_movepi64_mask (__m128i __A) 1329 { 1330 return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A); 1331 } 1332 1333 extern __inline __mmask8 1334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1335 _mm256_movepi64_mask (__m256i __A) 1336 { 1337 return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); 1338 } 1339 1340 #ifdef __OPTIMIZE__ 1341 extern __inline __m128d 1342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1343 _mm256_extractf64x2_pd (__m256d __A, const int __imm) 1344 { 1345 return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A, 1346 __imm, 1347 (__v2df) 1348 _mm_setzero_pd (), 1349 (__mmask8) -1); 1350 } 1351 1352 extern __inline __m128d 1353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1354 _mm256_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m256d __A, 1355 const int __imm) 1356 { 1357 return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A, 1358 __imm, 1359 (__v2df) __W, 1360 (__mmask8) 1361 __U); 1362 } 1363 1364 extern __inline __m128d 1365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1366 _mm256_maskz_extractf64x2_pd (__mmask8 __U, __m256d __A, 1367 const int __imm) 1368 { 1369 return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A, 1370 __imm, 1371 (__v2df) 1372 _mm_setzero_pd (), 1373 (__mmask8) 1374 __U); 1375 } 1376 1377 extern __inline __m128i 1378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1379 _mm256_extracti64x2_epi64 (__m256i __A, const int __imm) 1380 { 1381 return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A, 1382 __imm, 1383 (__v2di) 1384 _mm_setzero_si128 (), 1385 (__mmask8) -1); 1386 } 1387 1388 extern __inline __m128i 1389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1390 _mm256_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m256i __A, 1391 const int __imm) 1392 { 1393 return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A, 1394 __imm, 1395 (__v2di) __W, 1396 (__mmask8) 1397 __U); 1398 } 1399 1400 extern __inline __m128i 1401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1402 _mm256_maskz_extracti64x2_epi64 (__mmask8 __U, __m256i __A, 1403 const int __imm) 1404 { 1405 return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A, 1406 __imm, 1407 (__v2di) 1408 _mm_setzero_si128 (), 1409 (__mmask8) 1410 __U); 1411 } 1412 1413 extern __inline __m256d 1414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1415 _mm256_reduce_pd (__m256d __A, int __B) 1416 { 1417 return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B, 1418 (__v4df) 1419 _mm256_setzero_pd (), 1420 (__mmask8) -1); 1421 } 1422 1423 extern __inline __m256d 1424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1425 _mm256_mask_reduce_pd (__m256d __W, __mmask8 __U, __m256d __A, int __B) 1426 { 1427 return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B, 1428 (__v4df) __W, 1429 (__mmask8) __U); 1430 } 1431 1432 extern __inline __m256d 1433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1434 _mm256_maskz_reduce_pd (__mmask8 __U, __m256d __A, int __B) 1435 { 1436 return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B, 1437 (__v4df) 1438 _mm256_setzero_pd (), 1439 (__mmask8) __U); 1440 } 1441 1442 extern __inline __m128d 1443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1444 _mm_reduce_pd (__m128d __A, int __B) 1445 { 1446 return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, 1447 (__v2df) 1448 _mm_setzero_pd (), 1449 (__mmask8) -1); 1450 } 1451 1452 extern __inline __m128d 1453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1454 _mm_mask_reduce_pd (__m128d __W, __mmask8 __U, __m128d __A, int __B) 1455 { 1456 return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, 1457 (__v2df) __W, 1458 (__mmask8) __U); 1459 } 1460 1461 extern __inline __m128d 1462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1463 _mm_maskz_reduce_pd (__mmask8 __U, __m128d __A, int __B) 1464 { 1465 return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, 1466 (__v2df) 1467 _mm_setzero_pd (), 1468 (__mmask8) __U); 1469 } 1470 1471 extern __inline __m256 1472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1473 _mm256_reduce_ps (__m256 __A, int __B) 1474 { 1475 return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B, 1476 (__v8sf) 1477 _mm256_setzero_ps (), 1478 (__mmask8) -1); 1479 } 1480 1481 extern __inline __m256 1482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1483 _mm256_mask_reduce_ps (__m256 __W, __mmask8 __U, __m256 __A, int __B) 1484 { 1485 return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B, 1486 (__v8sf) __W, 1487 (__mmask8) __U); 1488 } 1489 1490 extern __inline __m256 1491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1492 _mm256_maskz_reduce_ps (__mmask8 __U, __m256 __A, int __B) 1493 { 1494 return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B, 1495 (__v8sf) 1496 _mm256_setzero_ps (), 1497 (__mmask8) __U); 1498 } 1499 1500 extern __inline __m128 1501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1502 _mm_reduce_ps (__m128 __A, int __B) 1503 { 1504 return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B, 1505 (__v4sf) 1506 _mm_setzero_ps (), 1507 (__mmask8) -1); 1508 } 1509 1510 extern __inline __m128 1511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1512 _mm_mask_reduce_ps (__m128 __W, __mmask8 __U, __m128 __A, int __B) 1513 { 1514 return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B, 1515 (__v4sf) __W, 1516 (__mmask8) __U); 1517 } 1518 1519 extern __inline __m128 1520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1521 _mm_maskz_reduce_ps (__mmask8 __U, __m128 __A, int __B) 1522 { 1523 return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B, 1524 (__v4sf) 1525 _mm_setzero_ps (), 1526 (__mmask8) __U); 1527 } 1528 1529 extern __inline __m256d 1530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1531 _mm256_range_pd (__m256d __A, __m256d __B, int __C) 1532 { 1533 return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, 1534 (__v4df) __B, __C, 1535 (__v4df) 1536 _mm256_setzero_pd (), 1537 (__mmask8) -1); 1538 } 1539 1540 extern __inline __m256d 1541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1542 _mm256_mask_range_pd (__m256d __W, __mmask8 __U, 1543 __m256d __A, __m256d __B, int __C) 1544 { 1545 return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, 1546 (__v4df) __B, __C, 1547 (__v4df) __W, 1548 (__mmask8) __U); 1549 } 1550 1551 extern __inline __m256d 1552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1553 _mm256_maskz_range_pd (__mmask8 __U, __m256d __A, __m256d __B, int __C) 1554 { 1555 return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, 1556 (__v4df) __B, __C, 1557 (__v4df) 1558 _mm256_setzero_pd (), 1559 (__mmask8) __U); 1560 } 1561 1562 extern __inline __m128d 1563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1564 _mm_range_pd (__m128d __A, __m128d __B, int __C) 1565 { 1566 return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, 1567 (__v2df) __B, __C, 1568 (__v2df) 1569 _mm_setzero_pd (), 1570 (__mmask8) -1); 1571 } 1572 1573 extern __inline __m128d 1574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1575 _mm_mask_range_pd (__m128d __W, __mmask8 __U, 1576 __m128d __A, __m128d __B, int __C) 1577 { 1578 return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, 1579 (__v2df) __B, __C, 1580 (__v2df) __W, 1581 (__mmask8) __U); 1582 } 1583 1584 extern __inline __m128d 1585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1586 _mm_maskz_range_pd (__mmask8 __U, __m128d __A, __m128d __B, int __C) 1587 { 1588 return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, 1589 (__v2df) __B, __C, 1590 (__v2df) 1591 _mm_setzero_pd (), 1592 (__mmask8) __U); 1593 } 1594 1595 extern __inline __m256 1596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1597 _mm256_range_ps (__m256 __A, __m256 __B, int __C) 1598 { 1599 return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, 1600 (__v8sf) __B, __C, 1601 (__v8sf) 1602 _mm256_setzero_ps (), 1603 (__mmask8) -1); 1604 } 1605 1606 extern __inline __m256 1607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1608 _mm256_mask_range_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B, 1609 int __C) 1610 { 1611 return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, 1612 (__v8sf) __B, __C, 1613 (__v8sf) __W, 1614 (__mmask8) __U); 1615 } 1616 1617 extern __inline __m256 1618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1619 _mm256_maskz_range_ps (__mmask8 __U, __m256 __A, __m256 __B, int __C) 1620 { 1621 return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, 1622 (__v8sf) __B, __C, 1623 (__v8sf) 1624 _mm256_setzero_ps (), 1625 (__mmask8) __U); 1626 } 1627 1628 extern __inline __m128 1629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1630 _mm_range_ps (__m128 __A, __m128 __B, int __C) 1631 { 1632 return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, 1633 (__v4sf) __B, __C, 1634 (__v4sf) 1635 _mm_setzero_ps (), 1636 (__mmask8) -1); 1637 } 1638 1639 extern __inline __m128 1640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1641 _mm_mask_range_ps (__m128 __W, __mmask8 __U, 1642 __m128 __A, __m128 __B, int __C) 1643 { 1644 return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, 1645 (__v4sf) __B, __C, 1646 (__v4sf) __W, 1647 (__mmask8) __U); 1648 } 1649 1650 extern __inline __m128 1651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1652 _mm_maskz_range_ps (__mmask8 __U, __m128 __A, __m128 __B, int __C) 1653 { 1654 return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, 1655 (__v4sf) __B, __C, 1656 (__v4sf) 1657 _mm_setzero_ps (), 1658 (__mmask8) __U); 1659 } 1660 1661 extern __inline __mmask8 1662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1663 _mm256_mask_fpclass_pd_mask (__mmask8 __U, __m256d __A, 1664 const int __imm) 1665 { 1666 return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A, 1667 __imm, __U); 1668 } 1669 1670 extern __inline __mmask8 1671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1672 _mm256_fpclass_pd_mask (__m256d __A, const int __imm) 1673 { 1674 return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A, 1675 __imm, 1676 (__mmask8) -1); 1677 } 1678 1679 extern __inline __mmask8 1680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1681 _mm256_mask_fpclass_ps_mask (__mmask8 __U, __m256 __A, const int __imm) 1682 { 1683 return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A, 1684 __imm, __U); 1685 } 1686 1687 extern __inline __mmask8 1688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1689 _mm256_fpclass_ps_mask (__m256 __A, const int __imm) 1690 { 1691 return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A, 1692 __imm, 1693 (__mmask8) -1); 1694 } 1695 1696 extern __inline __mmask8 1697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1698 _mm_mask_fpclass_pd_mask (__mmask8 __U, __m128d __A, const int __imm) 1699 { 1700 return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A, 1701 __imm, __U); 1702 } 1703 1704 extern __inline __mmask8 1705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1706 _mm_fpclass_pd_mask (__m128d __A, const int __imm) 1707 { 1708 return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A, 1709 __imm, 1710 (__mmask8) -1); 1711 } 1712 1713 extern __inline __mmask8 1714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1715 _mm_mask_fpclass_ps_mask (__mmask8 __U, __m128 __A, const int __imm) 1716 { 1717 return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A, 1718 __imm, __U); 1719 } 1720 1721 extern __inline __mmask8 1722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1723 _mm_fpclass_ps_mask (__m128 __A, const int __imm) 1724 { 1725 return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A, 1726 __imm, 1727 (__mmask8) -1); 1728 } 1729 1730 extern __inline __m256i 1731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1732 _mm256_inserti64x2 (__m256i __A, __m128i __B, const int __imm) 1733 { 1734 return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A, 1735 (__v2di) __B, 1736 __imm, 1737 (__v4di) 1738 _mm256_setzero_si256 (), 1739 (__mmask8) -1); 1740 } 1741 1742 extern __inline __m256i 1743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1744 _mm256_mask_inserti64x2 (__m256i __W, __mmask8 __U, __m256i __A, 1745 __m128i __B, const int __imm) 1746 { 1747 return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A, 1748 (__v2di) __B, 1749 __imm, 1750 (__v4di) __W, 1751 (__mmask8) 1752 __U); 1753 } 1754 1755 extern __inline __m256i 1756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1757 _mm256_maskz_inserti64x2 (__mmask8 __U, __m256i __A, __m128i __B, 1758 const int __imm) 1759 { 1760 return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A, 1761 (__v2di) __B, 1762 __imm, 1763 (__v4di) 1764 _mm256_setzero_si256 (), 1765 (__mmask8) 1766 __U); 1767 } 1768 1769 extern __inline __m256d 1770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1771 _mm256_insertf64x2 (__m256d __A, __m128d __B, const int __imm) 1772 { 1773 return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A, 1774 (__v2df) __B, 1775 __imm, 1776 (__v4df) 1777 _mm256_setzero_pd (), 1778 (__mmask8) -1); 1779 } 1780 1781 extern __inline __m256d 1782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1783 _mm256_mask_insertf64x2 (__m256d __W, __mmask8 __U, __m256d __A, 1784 __m128d __B, const int __imm) 1785 { 1786 return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A, 1787 (__v2df) __B, 1788 __imm, 1789 (__v4df) __W, 1790 (__mmask8) 1791 __U); 1792 } 1793 1794 extern __inline __m256d 1795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1796 _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B, 1797 const int __imm) 1798 { 1799 return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A, 1800 (__v2df) __B, 1801 __imm, 1802 (__v4df) 1803 _mm256_setzero_pd (), 1804 (__mmask8) 1805 __U); 1806 } 1807 1808 #else 1809 #define _mm256_insertf64x2(X, Y, C) \ 1810 ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\ 1811 (__v2df)(__m128d) (Y), (int) (C), \ 1812 (__v4df)(__m256d)_mm256_setzero_pd(), \ 1813 (__mmask8)-1)) 1814 1815 #define _mm256_mask_insertf64x2(W, U, X, Y, C) \ 1816 ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\ 1817 (__v2df)(__m128d) (Y), (int) (C), \ 1818 (__v4df)(__m256d)(W), \ 1819 (__mmask8)(U))) 1820 1821 #define _mm256_maskz_insertf64x2(U, X, Y, C) \ 1822 ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\ 1823 (__v2df)(__m128d) (Y), (int) (C), \ 1824 (__v4df)(__m256d)_mm256_setzero_pd(), \ 1825 (__mmask8)(U))) 1826 1827 #define _mm256_inserti64x2(X, Y, C) \ 1828 ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\ 1829 (__v2di)(__m128i) (Y), (int) (C), \ 1830 (__v4di)(__m256i)_mm256_setzero_si256 (), \ 1831 (__mmask8)-1)) 1832 1833 #define _mm256_mask_inserti64x2(W, U, X, Y, C) \ 1834 ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\ 1835 (__v2di)(__m128i) (Y), (int) (C), \ 1836 (__v4di)(__m256i)(W), \ 1837 (__mmask8)(U))) 1838 1839 #define _mm256_maskz_inserti64x2(U, X, Y, C) \ 1840 ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\ 1841 (__v2di)(__m128i) (Y), (int) (C), \ 1842 (__v4di)(__m256i)_mm256_setzero_si256 (), \ 1843 (__mmask8)(U))) 1844 1845 #define _mm256_extractf64x2_pd(X, C) \ 1846 ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\ 1847 (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1)) 1848 1849 #define _mm256_mask_extractf64x2_pd(W, U, X, C) \ 1850 ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\ 1851 (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U))) 1852 1853 #define _mm256_maskz_extractf64x2_pd(U, X, C) \ 1854 ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\ 1855 (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U))) 1856 1857 #define _mm256_extracti64x2_epi64(X, C) \ 1858 ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\ 1859 (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8)-1)) 1860 1861 #define _mm256_mask_extracti64x2_epi64(W, U, X, C) \ 1862 ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\ 1863 (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U))) 1864 1865 #define _mm256_maskz_extracti64x2_epi64(U, X, C) \ 1866 ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\ 1867 (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8) (U))) 1868 1869 #define _mm256_reduce_pd(A, B) \ 1870 ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \ 1871 (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)-1)) 1872 1873 #define _mm256_mask_reduce_pd(W, U, A, B) \ 1874 ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \ 1875 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U))) 1876 1877 #define _mm256_maskz_reduce_pd(U, A, B) \ 1878 ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \ 1879 (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)(U))) 1880 1881 #define _mm_reduce_pd(A, B) \ 1882 ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \ 1883 (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)-1)) 1884 1885 #define _mm_mask_reduce_pd(W, U, A, B) \ 1886 ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \ 1887 (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U))) 1888 1889 #define _mm_maskz_reduce_pd(U, A, B) \ 1890 ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \ 1891 (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)(U))) 1892 1893 #define _mm256_reduce_ps(A, B) \ 1894 ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \ 1895 (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1)) 1896 1897 #define _mm256_mask_reduce_ps(W, U, A, B) \ 1898 ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \ 1899 (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U))) 1900 1901 #define _mm256_maskz_reduce_ps(U, A, B) \ 1902 ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \ 1903 (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U))) 1904 1905 #define _mm_reduce_ps(A, B) \ 1906 ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \ 1907 (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)-1)) 1908 1909 #define _mm_mask_reduce_ps(W, U, A, B) \ 1910 ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \ 1911 (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U))) 1912 1913 #define _mm_maskz_reduce_ps(U, A, B) \ 1914 ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \ 1915 (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)(U))) 1916 1917 #define _mm256_range_pd(A, B, C) \ 1918 ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \ 1919 (__v4df)(__m256d)(B), (int)(C), \ 1920 (__v4df)_mm256_setzero_pd(), (__mmask8)-1)) 1921 1922 #define _mm256_maskz_range_pd(U, A, B, C) \ 1923 ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \ 1924 (__v4df)(__m256d)(B), (int)(C), \ 1925 (__v4df)_mm256_setzero_pd(), (__mmask8)(U))) 1926 1927 #define _mm_range_pd(A, B, C) \ 1928 ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \ 1929 (__v2df)(__m128d)(B), (int)(C), \ 1930 (__v2df)_mm_setzero_pd(), (__mmask8)-1)) 1931 1932 #define _mm256_range_ps(A, B, C) \ 1933 ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \ 1934 (__v8sf)(__m256)(B), (int)(C), \ 1935 (__v8sf)_mm256_setzero_ps(), (__mmask8)-1)) 1936 1937 #define _mm256_mask_range_ps(W, U, A, B, C) \ 1938 ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \ 1939 (__v8sf)(__m256)(B), (int)(C), \ 1940 (__v8sf)(__m256)(W), (__mmask8)(U))) 1941 1942 #define _mm256_maskz_range_ps(U, A, B, C) \ 1943 ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \ 1944 (__v8sf)(__m256)(B), (int)(C), \ 1945 (__v8sf)_mm256_setzero_ps(), (__mmask8)(U))) 1946 1947 #define _mm_range_ps(A, B, C) \ 1948 ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \ 1949 (__v4sf)(__m128)(B), (int)(C), \ 1950 (__v4sf)_mm_setzero_ps(), (__mmask8)-1)) 1951 1952 #define _mm_mask_range_ps(W, U, A, B, C) \ 1953 ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \ 1954 (__v4sf)(__m128)(B), (int)(C), \ 1955 (__v4sf)(__m128)(W), (__mmask8)(U))) 1956 1957 #define _mm_maskz_range_ps(U, A, B, C) \ 1958 ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \ 1959 (__v4sf)(__m128)(B), (int)(C), \ 1960 (__v4sf)_mm_setzero_ps(), (__mmask8)(U))) 1961 1962 #define _mm256_mask_range_pd(W, U, A, B, C) \ 1963 ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \ 1964 (__v4df)(__m256d)(B), (int)(C), \ 1965 (__v4df)(__m256d)(W), (__mmask8)(U))) 1966 1967 #define _mm_mask_range_pd(W, U, A, B, C) \ 1968 ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \ 1969 (__v2df)(__m128d)(B), (int)(C), \ 1970 (__v2df)(__m128d)(W), (__mmask8)(U))) 1971 1972 #define _mm_maskz_range_pd(U, A, B, C) \ 1973 ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \ 1974 (__v2df)(__m128d)(B), (int)(C), \ 1975 (__v2df)_mm_setzero_pd(), (__mmask8)(U))) 1976 1977 #define _mm256_mask_fpclass_pd_mask(u, X, C) \ 1978 ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \ 1979 (int) (C),(__mmask8)(u))) 1980 1981 #define _mm256_mask_fpclass_ps_mask(u, X, C) \ 1982 ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \ 1983 (int) (C),(__mmask8)(u))) 1984 1985 #define _mm_mask_fpclass_pd_mask(u, X, C) \ 1986 ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \ 1987 (int) (C),(__mmask8)(u))) 1988 1989 #define _mm_mask_fpclass_ps_mask(u, X, C) \ 1990 ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \ 1991 (int) (C),(__mmask8)(u))) 1992 1993 #define _mm256_fpclass_pd_mask(X, C) \ 1994 ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \ 1995 (int) (C),(__mmask8)-1)) 1996 1997 #define _mm256_fpclass_ps_mask(X, C) \ 1998 ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \ 1999 (int) (C),(__mmask8)-1)) 2000 2001 #define _mm_fpclass_pd_mask(X, C) \ 2002 ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \ 2003 (int) (C),(__mmask8)-1)) 2004 2005 #define _mm_fpclass_ps_mask(X, C) \ 2006 ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \ 2007 (int) (C),(__mmask8)-1)) 2008 2009 #endif 2010 2011 #ifdef __DISABLE_AVX512VLDQ__ 2012 #undef __DISABLE_AVX512VLDQ__ 2013 #pragma GCC pop_options 2014 #endif /* __DISABLE_AVX512VLDQ__ */ 2015 2016 #endif /* _AVX512VLDQINTRIN_H_INCLUDED */ 2017