1 /* 2 * mmintrin.h 3 * 4 * This file is part of the ReactOS CRT package. 5 * 6 * Contributors: 7 * Timo Kreuzer (timo.kreuzer@reactos.org) 8 * 9 * THIS SOFTWARE IS NOT COPYRIGHTED 10 * 11 * This source code is offered for use in the public domain. You may 12 * use, modify or distribute it freely. 13 * 14 * This code is distributed in the hope that it will be useful but 15 * WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESS OR IMPLIED ARE HEREBY 16 * DISCLAIMED. This includes but is not limited to warranties of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 18 * 19 */ 20 21 #pragma once 22 #ifndef _MMINTRIN_H_INCLUDED 23 #define _MMINTRIN_H_INCLUDED 24 25 #include <vcruntime.h> 26 27 #ifdef __cplusplus 28 extern "C" { 29 #endif 30 31 #ifdef _MSC_VER 32 #define DECLSPEC_INTRINTYPE __declspec(intrin_type) 33 #else 34 #define DECLSPEC_INTRINTYPE 35 #endif 36 37 #if defined(_MSC_VER) && !defined(__clang__) 38 39 typedef union DECLSPEC_INTRINTYPE _CRT_ALIGN(8) __m64 40 { 41 unsigned __int64 m64_u64; 42 float m64_f32[2]; 43 __int8 m64_i8[8]; 44 __int16 m64_i16[4]; 45 __int32 m64_i32[2]; 46 __int64 m64_i64; 47 unsigned __int8 m64_u8[8]; 48 unsigned __int16 m64_u16[4]; 49 unsigned __int32 m64_u32[2]; 50 } __m64; 51 52 #else /* _MSC_VER */ 53 54 typedef long long __v1di __attribute__((__vector_size__(8))); 55 typedef int __v2si __attribute__((__vector_size__(8))); 56 typedef short __v4hi __attribute__((__vector_size__(8))); 57 typedef char __v8qi __attribute__((__vector_size__(8))); 58 59 typedef float __m64 __attribute__((__vector_size__(8), __aligned__(16))); 60 61 #ifdef __clang__ 62 #define __INTRIN_INLINE_MMX __INTRIN_INLINE __attribute__((__target__("mmx"),__min_vector_width__(64))) 63 #else 64 #define __INTRIN_INLINE_MMX __INTRIN_INLINE __attribute__((__target__("mmx"))) 65 #endif 66 67 #endif /* _MSC_VER */ 68 69 #ifdef _M_IX86 70 71 void _m_empty(void); 72 __m64 _m_from_int(int i); 73 int _m_to_int(__m64 m); 74 __m64 _m_packsswb(__m64 a, __m64 b); 75 __m64 _m_packssdw(__m64 a, __m64 b); 76 __m64 _m_packuswb(__m64 a, __m64 b); 77 __m64 _m_punpckhbw(__m64 a, __m64 b); 78 __m64 _m_punpckhwd(__m64 a, __m64 b); 79 __m64 _m_punpckhdq(__m64 a, __m64 b); 80 __m64 _m_punpcklbw(__m64 a, __m64 b); 81 __m64 _m_punpcklwd(__m64 a, __m64 b); 82 __m64 _m_punpckldq(__m64 a, __m64 b); 83 __m64 _m_paddb(__m64 a, __m64 b); 84 __m64 _m_paddw(__m64 a, __m64 b); 85 __m64 _m_paddd(__m64 a, __m64 b); 86 __m64 _m_paddsb(__m64 a, __m64 b); 87 __m64 _m_paddsw(__m64 a, __m64 b); 88 __m64 _m_paddusb(__m64 a, __m64 b); 89 __m64 _m_paddusw(__m64 a, __m64 b); 90 __m64 _m_psubb(__m64 a, __m64 b); 91 __m64 _m_psubw(__m64 a, __m64 b); 92 __m64 _m_psubd(__m64 a, __m64 b); 93 __m64 _m_psubsb(__m64 a, __m64 b); 94 __m64 _m_psubsw(__m64 a, __m64 b); 95 __m64 _m_psubusb(__m64 a, __m64 b); 96 __m64 _m_psubusw(__m64 a, __m64 b); 97 __m64 _m_pmaddwd(__m64 a, __m64 b); 98 __m64 _m_pmulhw(__m64 a, __m64 b); 99 __m64 _m_pmullw(__m64 a, __m64 b); 100 __m64 _m_psllw(__m64 a, __m64 count); 101 __m64 _m_psllwi(__m64 a, int imm8); 102 __m64 _m_pslld(__m64 a, __m64 count); 103 __m64 _m_pslldi(__m64 a, int imm8); 104 __m64 _m_psllq(__m64 a, __m64 count); 105 __m64 _m_psllqi(__m64 a, int imm8); 106 __m64 _m_psraw(__m64 a, __m64 count); 107 __m64 _m_psrawi(__m64 a, int imm8); 108 __m64 _m_psrad(__m64 a, __m64 count); 109 __m64 _m_psradi(__m64 a, int imm8); 110 __m64 _m_psrlw(__m64 a, __m64 count); 111 __m64 _m_psrlwi(__m64 a, int imm8); 112 __m64 _m_psrld(__m64 a, __m64 count); 113 __m64 _m_psrldi(__m64 a, int imm8); 114 __m64 _m_psrlq(__m64 a, __m64 count); 115 __m64 _m_psrlqi(__m64 a, int imm8); 116 __m64 _m_pand(__m64 a, __m64 b); 117 __m64 _m_pandn(__m64 a, __m64 b); 118 __m64 _m_por(__m64 a, __m64 b); 119 __m64 _m_pxor(__m64 a, __m64 b); 120 __m64 _m_pcmpeqb(__m64 a, __m64 b); 121 __m64 _m_pcmpgtb(__m64 a, __m64 b); 122 __m64 _m_pcmpeqw(__m64 a, __m64 b); 123 __m64 _m_pcmpgtw(__m64 a, __m64 b); 124 __m64 _m_pcmpeqd(__m64 a, __m64 b); 125 __m64 _m_pcmpgtd(__m64 a, __m64 b); 126 __m64 _mm_setzero_si64(void); 127 __m64 _mm_set_pi32(int i1, int i0); 128 __m64 _mm_set_pi16(short s3, short s2, short s1, short s0); 129 __m64 _mm_set_pi8(char b7, char b6, char b5, char b4, 130 char b3, char b2, char b1, char b0); 131 __m64 _mm_setr_pi32(int i1, int i0); 132 __m64 _mm_setr_pi16(short s3, short s2, short s1, short s0); 133 __m64 _mm_setr_pi8(char b7, char b6, char b5, char b4, 134 char b3, char b2, char b1, char b0); 135 __m64 _mm_set1_pi32(int i); 136 __m64 _mm_set1_pi16(short s); 137 __m64 _mm_set1_pi8(char b); 138 139 /* Alternate names */ 140 #define _mm_empty _m_empty 141 #define _mm_cvtsi32_si64 _m_from_int 142 #define _mm_cvtsi64_si32 _m_to_int 143 #define _mm_packs_pi16 _m_packsswb 144 #define _mm_packs_pi32 _m_packssdw 145 #define _mm_packs_pu16 _m_packuswb 146 #define _mm_unpackhi_pi8 _m_punpckhbw 147 #define _mm_unpackhi_pi16 _m_punpckhwd 148 #define _mm_unpackhi_pi32 _m_punpckhdq 149 #define _mm_unpacklo_pi8 _m_punpcklbw 150 #define _mm_unpacklo_pi16 _m_punpcklwd 151 #define _mm_unpacklo_pi32 _m_punpckldq 152 #define _mm_add_pi8 _m_paddb 153 #define _mm_add_pi16 _m_paddw 154 #define _mm_add_pi32 _m_paddd 155 #define _mm_adds_pi8 _m_paddsb 156 #define _mm_adds_pi16 _m_paddsw 157 #define _mm_adds_pu8 _m_paddusb 158 #define _mm_adds_pu16 _m_paddusw 159 #define _mm_sub_pi8 _m_psubb 160 #define _mm_sub_pi16 _m_psubw 161 #define _mm_sub_pi32 _m_psubd 162 #define _mm_subs_pi8 _m_psubsb 163 #define _mm_subs_pi16 _m_psubsw 164 #define _mm_subs_pu8 _m_psubusb 165 #define _mm_subs_pu16 _m_psubusw 166 #define _mm_madd_pi16 _m_pmaddwd 167 #define _mm_mulhi_pi16 _m_pmulhw 168 #define _mm_mullo_pi16 _m_pmullw 169 #define _mm_sll_pi16 _m_psllw 170 #define _mm_slli_pi16 _m_psllwi 171 #define _mm_sll_pi32 _m_pslld 172 #define _mm_slli_pi32 _m_pslldi 173 #define _mm_sll_si64 _m_psllq 174 #define _mm_slli_si64 _m_psllqi 175 #define _mm_sra_pi16 _m_psraw 176 #define _mm_srai_pi16 _m_psrawi 177 #define _mm_sra_pi32 _m_psrad 178 #define _mm_srai_pi32 _m_psradi 179 #define _mm_srl_pi16 _m_psrlw 180 #define _mm_srli_pi16 _m_psrlwi 181 #define _mm_srl_pi32 _m_psrld 182 #define _mm_srli_pi32 _m_psrldi 183 #define _mm_srl_si64 _m_psrlq 184 #define _mm_srli_si64 _m_psrlqi 185 #define _mm_and_si64 _m_pand 186 #define _mm_andnot_si64 _m_pandn 187 #define _mm_or_si64 _m_por 188 #define _mm_xor_si64 _m_pxor 189 #define _mm_cmpeq_pi8 _m_pcmpeqb 190 #define _mm_cmpgt_pi8 _m_pcmpgtb 191 #define _mm_cmpeq_pi16 _m_pcmpeqw 192 #define _mm_cmpgt_pi16 _m_pcmpgtw 193 #define _mm_cmpeq_pi32 _m_pcmpeqd 194 #define _mm_cmpgt_pi32 _m_pcmpgtd 195 196 /* Use intrinsics on MSVC */ 197 #if defined(_MSC_VER) && !defined(__clang__) 198 #pragma intrinsic(_m_empty) 199 #pragma intrinsic(_m_from_int) 200 #pragma intrinsic(_m_to_int) 201 #pragma intrinsic(_m_packsswb) 202 #pragma intrinsic(_m_packssdw) 203 #pragma intrinsic(_m_packuswb) 204 #pragma intrinsic(_m_punpckhbw) 205 #pragma intrinsic(_m_punpckhwd) 206 #pragma intrinsic(_m_punpckhdq) 207 #pragma intrinsic(_m_punpcklbw) 208 #pragma intrinsic(_m_punpcklwd) 209 #pragma intrinsic(_m_punpckldq) 210 #pragma intrinsic(_m_paddb) 211 #pragma intrinsic(_m_paddw) 212 #pragma intrinsic(_m_paddd) 213 #pragma intrinsic(_m_paddsb) 214 #pragma intrinsic(_m_paddsw) 215 #pragma intrinsic(_m_paddusb) 216 #pragma intrinsic(_m_paddusw) 217 #pragma intrinsic(_m_psubb) 218 #pragma intrinsic(_m_psubw) 219 #pragma intrinsic(_m_psubd) 220 #pragma intrinsic(_m_psubsb) 221 #pragma intrinsic(_m_psubsw) 222 #pragma intrinsic(_m_psubusb) 223 #pragma intrinsic(_m_psubusw) 224 #pragma intrinsic(_m_pmaddwd) 225 #pragma intrinsic(_m_pmulhw) 226 #pragma intrinsic(_m_pmullw) 227 #pragma intrinsic(_m_psllw) 228 #pragma intrinsic(_m_psllwi) 229 #pragma intrinsic(_m_pslld) 230 #pragma intrinsic(_m_pslldi) 231 #pragma intrinsic(_m_psllq) 232 #pragma intrinsic(_m_psllqi) 233 #pragma intrinsic(_m_psraw) 234 #pragma intrinsic(_m_psrawi) 235 #pragma intrinsic(_m_psrad) 236 #pragma intrinsic(_m_psradi) 237 #pragma intrinsic(_m_psrlw) 238 #pragma intrinsic(_m_psrlwi) 239 #pragma intrinsic(_m_psrld) 240 #pragma intrinsic(_m_psrldi) 241 #pragma intrinsic(_m_psrlq) 242 #pragma intrinsic(_m_psrlqi) 243 #pragma intrinsic(_m_pand) 244 #pragma intrinsic(_m_pandn) 245 #pragma intrinsic(_m_por) 246 #pragma intrinsic(_m_pxor) 247 #pragma intrinsic(_m_pcmpeqb) 248 #pragma intrinsic(_m_pcmpgtb) 249 #pragma intrinsic(_m_pcmpeqw) 250 #pragma intrinsic(_m_pcmpgtw) 251 #pragma intrinsic(_m_pcmpeqd) 252 #pragma intrinsic(_m_pcmpgtd) 253 #pragma intrinsic(_mm_setzero_si64) 254 #pragma intrinsic(_mm_set_pi32) 255 #pragma intrinsic(_mm_set_pi16) 256 #pragma intrinsic(_mm_set_pi8) 257 #pragma intrinsic(_mm_setr_pi32) 258 #pragma intrinsic(_mm_setr_pi16) 259 #pragma intrinsic(_mm_setr_pi8) 260 #pragma intrinsic(_mm_set1_pi32) 261 #pragma intrinsic(_mm_set1_pi16) 262 #pragma intrinsic(_mm_set1_pi8) 263 264 /* Use inline functions on GCC/Clang */ 265 #else // GCC / Clang Clang-CL 266 267 /* 268 - GCC: https://github.com/gcc-mirror/gcc/blob/master/gcc/config/i386/mmintrin.h 269 - Clang: https://github.com/llvm/llvm-project/blob/main/clang/lib/Headers/mmintrin.h 270 */ 271 272 // _m_empty 273 __INTRIN_INLINE_MMX void _mm_empty(void) 274 { 275 __builtin_ia32_emms(); 276 } 277 278 // _m_from_int 279 __INTRIN_INLINE_MMX __m64 _mm_cvtsi32_si64(int i) 280 { 281 return (__m64)__builtin_ia32_vec_init_v2si(i, 0); 282 } 283 284 // _m_to_int 285 __INTRIN_INLINE_MMX int _mm_cvtsi64_si32(__m64 m) 286 { 287 return __builtin_ia32_vec_ext_v2si((__v2si)m, 0); 288 } 289 290 // _m_packsswb 291 __INTRIN_INLINE_MMX __m64 _mm_packs_pi16(__m64 a, __m64 b) 292 { 293 return (__m64)__builtin_ia32_packsswb((__v4hi)a, (__v4hi)b); 294 } 295 296 // _m_packssdw 297 __INTRIN_INLINE_MMX __m64 _mm_packs_pi32(__m64 a, __m64 b) 298 { 299 return (__m64)__builtin_ia32_packssdw((__v2si)a, (__v2si)b); 300 } 301 302 // _m_packuswb 303 __INTRIN_INLINE_MMX __m64 _mm_packs_pu16(__m64 a, __m64 b) 304 { 305 return (__m64)__builtin_ia32_packuswb((__v4hi)a, (__v4hi)b); 306 } 307 308 // _m_punpckhbw 309 __INTRIN_INLINE_MMX __m64 _mm_unpackhi_pi8(__m64 a, __m64 b) 310 { 311 return (__m64)__builtin_ia32_punpckhbw((__v8qi)a, (__v8qi)b); 312 } 313 314 // _m_punpckhwd 315 __INTRIN_INLINE_MMX __m64 _mm_unpackhi_pi16(__m64 a, __m64 b) 316 { 317 return (__m64)__builtin_ia32_punpckhwd((__v4hi)a, (__v4hi)b); 318 } 319 320 // _m_punpckhdq 321 __INTRIN_INLINE_MMX __m64 _mm_unpackhi_pi32(__m64 a, __m64 b) 322 { 323 return (__m64)__builtin_ia32_punpckhdq((__v2si)a, (__v2si)b); 324 } 325 326 // _m_punpcklbw 327 __INTRIN_INLINE_MMX __m64 _mm_unpacklo_pi8(__m64 a, __m64 b) 328 { 329 return (__m64)__builtin_ia32_punpcklbw((__v8qi)a, (__v8qi)b); 330 } 331 332 // _m_punpcklwd 333 __INTRIN_INLINE_MMX __m64 _mm_unpacklo_pi16(__m64 a, __m64 b) 334 { 335 return (__m64)__builtin_ia32_punpcklwd((__v4hi)a, (__v4hi)b); 336 } 337 338 // _m_punpckldq 339 __INTRIN_INLINE_MMX __m64 _mm_unpacklo_pi32(__m64 a, __m64 b) 340 { 341 return (__m64)__builtin_ia32_punpckldq((__v2si)a, (__v2si)b); 342 } 343 344 // _m_paddb 345 __INTRIN_INLINE_MMX __m64 _mm_add_pi8(__m64 a, __m64 b) 346 { 347 return (__m64)__builtin_ia32_paddb((__v8qi)a, (__v8qi)b); 348 } 349 350 // _m_paddw 351 __INTRIN_INLINE_MMX __m64 _mm_add_pi16(__m64 a, __m64 b) 352 { 353 return (__m64)__builtin_ia32_paddw((__v4hi)a, (__v4hi)b); 354 } 355 356 // _m_paddd 357 __INTRIN_INLINE_MMX __m64 _mm_add_pi32(__m64 a, __m64 b) 358 { 359 return (__m64)__builtin_ia32_paddd((__v2si)a, (__v2si)b); 360 } 361 362 // _m_paddsb 363 __INTRIN_INLINE_MMX __m64 _mm_adds_pi8(__m64 a, __m64 b) 364 { 365 return (__m64)__builtin_ia32_paddsb((__v8qi)a, (__v8qi)b); 366 } 367 368 // _m_paddsw 369 __INTRIN_INLINE_MMX __m64 _mm_adds_pi16(__m64 a, __m64 b) 370 { 371 return (__m64)__builtin_ia32_paddsw((__v4hi)a, (__v4hi)b); 372 } 373 374 // _m_paddusb 375 __INTRIN_INLINE_MMX __m64 _mm_adds_pu8(__m64 a, __m64 b) 376 { 377 return (__m64)__builtin_ia32_paddusb((__v8qi)a, (__v8qi)b); 378 } 379 380 // _m_paddusw 381 __INTRIN_INLINE_MMX __m64 _mm_adds_pu16(__m64 a, __m64 b) 382 { 383 return (__m64)__builtin_ia32_paddusw((__v4hi)a, (__v4hi)b); 384 } 385 386 // _m_psubb 387 __INTRIN_INLINE_MMX __m64 _mm_sub_pi8(__m64 a, __m64 b) 388 { 389 return (__m64)__builtin_ia32_psubb((__v8qi)a, (__v8qi)b); 390 } 391 392 // _m_psubw 393 __INTRIN_INLINE_MMX __m64 _mm_sub_pi16(__m64 a, __m64 b) 394 { 395 return (__m64)__builtin_ia32_psubw((__v4hi)a, (__v4hi)b); 396 } 397 398 // _m_psubd 399 __INTRIN_INLINE_MMX __m64 _mm_sub_pi32(__m64 a, __m64 b) 400 { 401 return (__m64)__builtin_ia32_psubd((__v2si)a, (__v2si)b); 402 } 403 404 // _m_psubsb 405 __INTRIN_INLINE_MMX __m64 _mm_subs_pi8(__m64 a, __m64 b) 406 { 407 return (__m64)__builtin_ia32_psubsb((__v8qi)a, (__v8qi)b); 408 } 409 410 // _m_psubsw 411 __INTRIN_INLINE_MMX __m64 _mm_subs_pi16(__m64 a, __m64 b) 412 { 413 return (__m64)__builtin_ia32_psubsw((__v4hi)a, (__v4hi)b); 414 } 415 416 // _m_psubusb 417 __INTRIN_INLINE_MMX __m64 _mm_subs_pu8(__m64 a, __m64 b) 418 { 419 return (__m64)__builtin_ia32_psubusb((__v8qi)a, (__v8qi)b); 420 } 421 422 // _m_psubusw 423 __INTRIN_INLINE_MMX __m64 _mm_subs_pu16(__m64 a, __m64 b) 424 { 425 return (__m64)__builtin_ia32_psubusw((__v4hi)a, (__v4hi)b); 426 } 427 428 // _m_pmaddwd 429 __INTRIN_INLINE_MMX __m64 _mm_madd_pi16(__m64 a, __m64 b) 430 { 431 return (__m64)__builtin_ia32_pmaddwd((__v4hi)a, (__v4hi)b); 432 } 433 434 // _m_pmulhw 435 __INTRIN_INLINE_MMX __m64 _mm_mulhi_pi16(__m64 a, __m64 b) 436 { 437 return (__m64)__builtin_ia32_pmulhw((__v4hi)a, (__v4hi)b); 438 } 439 440 // _m_pmullw 441 __INTRIN_INLINE_MMX __m64 _mm_mullo_pi16(__m64 a, __m64 b) 442 { 443 return (__m64)__builtin_ia32_pmullw((__v4hi)a, (__v4hi)b); 444 } 445 446 // _m_psllw 447 __INTRIN_INLINE_MMX __m64 _mm_sll_pi16(__m64 a, __m64 count) 448 { 449 return (__m64)__builtin_ia32_psllw((__v4hi)a, (__v4hi)count); 450 } 451 452 // _m_psllwi 453 __INTRIN_INLINE_MMX __m64 _mm_slli_pi16(__m64 a, int imm8) 454 { 455 return (__m64)__builtin_ia32_psllwi((__v4hi)a, imm8); 456 } 457 458 // _m_pslld 459 __INTRIN_INLINE_MMX __m64 _mm_sll_pi32(__m64 a, __m64 count) 460 { 461 return (__m64)__builtin_ia32_pslld((__v2si)a, (__v2si)count); 462 } 463 464 // _m_pslldi 465 __INTRIN_INLINE_MMX __m64 _mm_slli_pi32(__m64 a, int imm8) 466 { 467 return (__m64)__builtin_ia32_pslldi((__v2si)a, imm8); 468 } 469 470 // _m_psllq 471 __INTRIN_INLINE_MMX __m64 _mm_sll_si64(__m64 a, __m64 count) 472 { 473 return (__m64)__builtin_ia32_psllq((__v1di)a, (__v1di)count); 474 } 475 476 // _m_psllqi 477 __INTRIN_INLINE_MMX __m64 _mm_slli_si64(__m64 a, int imm8) 478 { 479 return (__m64)__builtin_ia32_psllqi((__v1di)a, imm8); 480 } 481 482 // _m_psraw 483 __INTRIN_INLINE_MMX __m64 _mm_sra_pi16(__m64 a, __m64 count) 484 { 485 return (__m64)__builtin_ia32_psraw((__v4hi)a, (__v4hi)count); 486 } 487 488 // _m_psrawi 489 __INTRIN_INLINE_MMX __m64 _mm_srai_pi16(__m64 a, int imm8) 490 { 491 return (__m64)__builtin_ia32_psrawi((__v4hi)a, imm8); 492 } 493 494 // _m_psrad 495 __INTRIN_INLINE_MMX __m64 _mm_sra_pi32(__m64 a, __m64 count) 496 { 497 return (__m64)__builtin_ia32_psrad((__v2si)a, (__v2si)count); 498 } 499 500 // _m_psradi 501 __INTRIN_INLINE_MMX __m64 _mm_srai_pi32(__m64 a, int imm8) 502 { 503 return (__m64)__builtin_ia32_psradi((__v2si)a, imm8); 504 } 505 506 // _m_psrlw 507 __INTRIN_INLINE_MMX __m64 _mm_srl_pi16(__m64 a, __m64 count) 508 { 509 return (__m64)__builtin_ia32_psrlw((__v4hi)a, (__v4hi)count); 510 } 511 512 // _m_psrlwi 513 __INTRIN_INLINE_MMX __m64 _mm_srli_pi16(__m64 a, int imm8) 514 { 515 return (__m64)__builtin_ia32_psrlwi((__v4hi)a, imm8); 516 } 517 518 // _m_psrld 519 __INTRIN_INLINE_MMX __m64 _mm_srl_pi32(__m64 a, __m64 count) 520 { 521 return (__m64)__builtin_ia32_psrld((__v2si)a, (__v2si)count); 522 } 523 524 // _m_psrldi 525 __INTRIN_INLINE_MMX __m64 _mm_srli_pi32(__m64 a, int imm8) 526 { 527 return (__m64)__builtin_ia32_psrldi((__v2si)a, imm8); 528 } 529 530 // _m_psrlq 531 __INTRIN_INLINE_MMX __m64 _mm_srl_si64(__m64 a, __m64 count) 532 { 533 return (__m64)__builtin_ia32_psrlq((__v1di)a, (__v1di)count); 534 } 535 536 // _m_psrlqi 537 __INTRIN_INLINE_MMX __m64 _mm_srli_si64(__m64 a, int imm8) 538 { 539 return (__m64)__builtin_ia32_psrlqi((__v1di)a, imm8); 540 } 541 542 // _m_pand 543 __INTRIN_INLINE_MMX __m64 _mm_and_si64(__m64 a, __m64 b) 544 { 545 return (__m64)__builtin_ia32_pand((__v2si)a, (__v2si)b); 546 } 547 548 // _m_pandn 549 __INTRIN_INLINE_MMX __m64 _mm_andnot_si64(__m64 a, __m64 b) 550 { 551 return (__m64)__builtin_ia32_pandn((__v2si)a, (__v2si)b); 552 } 553 554 // _m_por 555 __INTRIN_INLINE_MMX __m64 _mm_or_si64(__m64 a, __m64 b) 556 { 557 return (__m64)__builtin_ia32_por((__v2si)a, (__v2si)b); 558 } 559 560 // _m_pxor 561 __INTRIN_INLINE_MMX __m64 _mm_xor_si64(__m64 a, __m64 b) 562 { 563 return (__m64)__builtin_ia32_pxor((__v2si)a, (__v2si)b); 564 } 565 566 // _m_pcmpeqb 567 __INTRIN_INLINE_MMX __m64 _mm_cmpeq_pi8(__m64 a, __m64 b) 568 { 569 return (__m64)__builtin_ia32_pcmpeqb((__v8qi)a, (__v8qi)b); 570 } 571 572 // _m_pcmpgtb 573 __INTRIN_INLINE_MMX __m64 _mm_cmpgt_pi8(__m64 a, __m64 b) 574 { 575 return (__m64)__builtin_ia32_pcmpgtb((__v8qi)a, (__v8qi)b); 576 } 577 578 // _m_pcmpeqw 579 __INTRIN_INLINE_MMX __m64 _mm_cmpeq_pi16(__m64 a, __m64 b) 580 { 581 return (__m64)__builtin_ia32_pcmpeqw((__v4hi)a, (__v4hi)b); 582 } 583 584 // _m_pcmpgtw 585 __INTRIN_INLINE_MMX __m64 _mm_cmpgt_pi16(__m64 a, __m64 b) 586 { 587 return (__m64)__builtin_ia32_pcmpgtw((__v4hi)a, (__v4hi)b); 588 } 589 590 // _m_pcmpeqd 591 __INTRIN_INLINE_MMX __m64 _mm_cmpeq_pi32(__m64 a, __m64 b) 592 { 593 return (__m64)__builtin_ia32_pcmpeqd((__v2si)a, (__v2si)b); 594 } 595 596 // _m_pcmpgtd 597 __INTRIN_INLINE_MMX __m64 _mm_cmpgt_pi32(__m64 a, __m64 b) 598 { 599 return (__m64)__builtin_ia32_pcmpgtd((__v2si)a, (__v2si)b); 600 } 601 602 __INTRIN_INLINE_MMX __m64 _mm_setzero_si64(void) 603 { 604 return (__m64) { 0 }; 605 } 606 607 __INTRIN_INLINE_MMX __m64 _mm_set_pi32(int i1, int i0) 608 { 609 return (__m64)__builtin_ia32_vec_init_v2si(i0, i1); 610 } 611 612 __INTRIN_INLINE_MMX __m64 _mm_set_pi16(short s3, short s2, short s1, short s0) 613 { 614 return (__m64)__builtin_ia32_vec_init_v4hi(s0, s1, s2, s3); 615 } 616 617 __INTRIN_INLINE_MMX __m64 _mm_set_pi8(char b7, char b6, char b5, char b4, 618 char b3, char b2, char b1, char b0) 619 { 620 return (__m64)__builtin_ia32_vec_init_v8qi(b0, b1, b2, b3, b4, b5, b6, b7); 621 } 622 623 __INTRIN_INLINE_MMX __m64 _mm_setr_pi32(int i1, int i0) 624 { 625 return _mm_set_pi32(i0, i1); 626 } 627 628 __INTRIN_INLINE_MMX __m64 _mm_setr_pi16(short s3, short s2, short s1, short s0) 629 { 630 return _mm_set_pi16(s0, s1, s2, s3); 631 } 632 633 __INTRIN_INLINE_MMX __m64 _mm_setr_pi8(char b7, char b6, char b5, char b4, 634 char b3, char b2, char b1, char b0) 635 { 636 return _mm_set_pi8(b7, b6, b5, b4, b3, b2, b1, b0); 637 } 638 639 __INTRIN_INLINE_MMX __m64 _mm_set1_pi32(int i) 640 { 641 return _mm_set_pi32(i, i); 642 } 643 644 __INTRIN_INLINE_MMX __m64 _mm_set1_pi16(short s) 645 { 646 return _mm_set_pi16(s, s, s, s); 647 } 648 649 __INTRIN_INLINE_MMX __m64 _mm_set1_pi8(char b) 650 { 651 return _mm_set_pi8(b, b, b, b, b, b, b, b); 652 } 653 654 #endif /* __GNUC__ */ 655 656 #endif /* _M_IX86 */ 657 658 #ifdef __cplusplus 659 } 660 #endif 661 662 #endif /* _MMINTRIN_H_INCLUDED */ 663