1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE 3; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 4; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 5; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE 6; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 7; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 8 9; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c 10 11define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 12; SSE-LABEL: test_mm_add_epi8: 13; SSE: # %bb.0: 14; SSE-NEXT: paddb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfc,0xc1] 15; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 16; 17; AVX1-LABEL: test_mm_add_epi8: 18; AVX1: # %bb.0: 19; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] 20; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 21; 22; AVX512-LABEL: test_mm_add_epi8: 23; AVX512: # %bb.0: 24; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] 25; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 26 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 27 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 28 %res = add <16 x i8> %arg0, %arg1 29 %bc = bitcast <16 x i8> %res to <2 x i64> 30 ret <2 x i64> %bc 31} 32 33define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 34; SSE-LABEL: test_mm_add_epi16: 35; SSE: # %bb.0: 36; SSE-NEXT: paddw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfd,0xc1] 37; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 38; 39; AVX1-LABEL: test_mm_add_epi16: 40; AVX1: # %bb.0: 41; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] 42; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 43; 44; AVX512-LABEL: test_mm_add_epi16: 45; AVX512: # %bb.0: 46; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] 47; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 48 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 49 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 50 %res = add <8 x i16> %arg0, %arg1 51 %bc = bitcast <8 x i16> %res to <2 x i64> 52 ret <2 x i64> %bc 53} 54 55define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 56; SSE-LABEL: test_mm_add_epi32: 57; SSE: # %bb.0: 58; SSE-NEXT: paddd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfe,0xc1] 59; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 60; 61; AVX1-LABEL: test_mm_add_epi32: 62; AVX1: # %bb.0: 63; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1] 64; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 65; 66; AVX512-LABEL: test_mm_add_epi32: 67; AVX512: # %bb.0: 68; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] 69; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 70 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 71 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 72 %res = add <4 x i32> %arg0, %arg1 73 %bc = bitcast <4 x i32> %res to <2 x i64> 74 ret <2 x i64> %bc 75} 76 77define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind { 78; SSE-LABEL: test_mm_add_epi64: 79; SSE: # %bb.0: 80; SSE-NEXT: paddq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd4,0xc1] 81; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 82; 83; AVX1-LABEL: test_mm_add_epi64: 84; AVX1: # %bb.0: 85; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd4,0xc1] 86; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 87; 88; AVX512-LABEL: test_mm_add_epi64: 89; AVX512: # %bb.0: 90; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc1] 91; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 92 %res = add <2 x i64> %a0, %a1 93 ret <2 x i64> %res 94} 95 96define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 97; SSE-LABEL: test_mm_add_pd: 98; SSE: # %bb.0: 99; SSE-NEXT: addpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x58,0xc1] 100; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 101; 102; AVX1-LABEL: test_mm_add_pd: 103; AVX1: # %bb.0: 104; AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1] 105; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 106; 107; AVX512-LABEL: test_mm_add_pd: 108; AVX512: # %bb.0: 109; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1] 110; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 111 %res = fadd <2 x double> %a0, %a1 112 ret <2 x double> %res 113} 114 115define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 116; SSE-LABEL: test_mm_add_sd: 117; SSE: # %bb.0: 118; SSE-NEXT: addsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x58,0xc1] 119; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 120; 121; AVX1-LABEL: test_mm_add_sd: 122; AVX1: # %bb.0: 123; AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x58,0xc1] 124; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 125; 126; AVX512-LABEL: test_mm_add_sd: 127; AVX512: # %bb.0: 128; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x58,0xc1] 129; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 130 %ext0 = extractelement <2 x double> %a0, i32 0 131 %ext1 = extractelement <2 x double> %a1, i32 0 132 %fadd = fadd double %ext0, %ext1 133 %res = insertelement <2 x double> %a0, double %fadd, i32 0 134 ret <2 x double> %res 135} 136 137define <2 x i64> @test_mm_adds_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 138; SSE-LABEL: test_mm_adds_epi8: 139; SSE: # %bb.0: 140; SSE-NEXT: paddsb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xec,0xc1] 141; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 142; 143; AVX1-LABEL: test_mm_adds_epi8: 144; AVX1: # %bb.0: 145; AVX1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xec,0xc1] 146; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 147; 148; AVX512-LABEL: test_mm_adds_epi8: 149; AVX512: # %bb.0: 150; AVX512-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1] 151; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 152 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 153 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 154 %res = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %arg0, <16 x i8> %arg1) 155 %bc = bitcast <16 x i8> %res to <2 x i64> 156 ret <2 x i64> %bc 157} 158declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 159 160define <2 x i64> @test_mm_adds_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 161; SSE-LABEL: test_mm_adds_epi16: 162; SSE: # %bb.0: 163; SSE-NEXT: paddsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xed,0xc1] 164; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 165; 166; AVX1-LABEL: test_mm_adds_epi16: 167; AVX1: # %bb.0: 168; AVX1-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xed,0xc1] 169; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 170; 171; AVX512-LABEL: test_mm_adds_epi16: 172; AVX512: # %bb.0: 173; AVX512-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1] 174; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 175 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 176 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 177 %res = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %arg0, <8 x i16> %arg1) 178 %bc = bitcast <8 x i16> %res to <2 x i64> 179 ret <2 x i64> %bc 180} 181declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 182 183define <2 x i64> @test_mm_adds_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 184; SSE-LABEL: test_mm_adds_epu8: 185; SSE: # %bb.0: 186; SSE-NEXT: paddusb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdc,0xc1] 187; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 188; 189; AVX1-LABEL: test_mm_adds_epu8: 190; AVX1: # %bb.0: 191; AVX1-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdc,0xc1] 192; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 193; 194; AVX512-LABEL: test_mm_adds_epu8: 195; AVX512: # %bb.0: 196; AVX512-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1] 197; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 198 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 199 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 200 %res = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %arg0, <16 x i8> %arg1) 201 %bc = bitcast <16 x i8> %res to <2 x i64> 202 ret <2 x i64> %bc 203} 204declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>) 205 206define <2 x i64> @test_mm_adds_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 207; SSE-LABEL: test_mm_adds_epu16: 208; SSE: # %bb.0: 209; SSE-NEXT: paddusw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdd,0xc1] 210; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 211; 212; AVX1-LABEL: test_mm_adds_epu16: 213; AVX1: # %bb.0: 214; AVX1-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdd,0xc1] 215; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 216; 217; AVX512-LABEL: test_mm_adds_epu16: 218; AVX512: # %bb.0: 219; AVX512-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1] 220; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 221 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 222 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 223 %res = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %arg0, <8 x i16> %arg1) 224 %bc = bitcast <8 x i16> %res to <2 x i64> 225 ret <2 x i64> %bc 226} 227declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) 228 229define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 230; SSE-LABEL: test_mm_and_pd: 231; SSE: # %bb.0: 232; SSE-NEXT: andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1] 233; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 234; 235; AVX1-LABEL: test_mm_and_pd: 236; AVX1: # %bb.0: 237; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1] 238; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 239; 240; AVX512-LABEL: test_mm_and_pd: 241; AVX512: # %bb.0: 242; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1] 243; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 244 %arg0 = bitcast <2 x double> %a0 to <4 x i32> 245 %arg1 = bitcast <2 x double> %a1 to <4 x i32> 246 %res = and <4 x i32> %arg0, %arg1 247 %bc = bitcast <4 x i32> %res to <2 x double> 248 ret <2 x double> %bc 249} 250 251define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { 252; SSE-LABEL: test_mm_and_si128: 253; SSE: # %bb.0: 254; SSE-NEXT: andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1] 255; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 256; 257; AVX1-LABEL: test_mm_and_si128: 258; AVX1: # %bb.0: 259; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1] 260; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 261; 262; AVX512-LABEL: test_mm_and_si128: 263; AVX512: # %bb.0: 264; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1] 265; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 266 %res = and <2 x i64> %a0, %a1 267 ret <2 x i64> %res 268} 269 270define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 271; SSE-LABEL: test_mm_andnot_pd: 272; SSE: # %bb.0: 273; SSE-NEXT: pcmpeqd %xmm2, %xmm2 # encoding: [0x66,0x0f,0x76,0xd2] 274; SSE-NEXT: pxor %xmm2, %xmm0 # encoding: [0x66,0x0f,0xef,0xc2] 275; SSE-NEXT: pand %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc1] 276; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 277; 278; AVX1-LABEL: test_mm_andnot_pd: 279; AVX1: # %bb.0: 280; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2] 281; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xef,0xc2] 282; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdb,0xc1] 283; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 284; 285; AVX512-LABEL: test_mm_andnot_pd: 286; AVX512: # %bb.0: 287; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f] 288; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1] 289; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 290 %arg0 = bitcast <2 x double> %a0 to <4 x i32> 291 %arg1 = bitcast <2 x double> %a1 to <4 x i32> 292 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1> 293 %res = and <4 x i32> %not, %arg1 294 %bc = bitcast <4 x i32> %res to <2 x double> 295 ret <2 x double> %bc 296} 297 298define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { 299; SSE-LABEL: test_mm_andnot_si128: 300; SSE: # %bb.0: 301; SSE-NEXT: pcmpeqd %xmm2, %xmm2 # encoding: [0x66,0x0f,0x76,0xd2] 302; SSE-NEXT: pxor %xmm2, %xmm0 # encoding: [0x66,0x0f,0xef,0xc2] 303; SSE-NEXT: pand %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc1] 304; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 305; 306; AVX1-LABEL: test_mm_andnot_si128: 307; AVX1: # %bb.0: 308; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2] 309; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xef,0xc2] 310; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdb,0xc1] 311; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 312; 313; AVX512-LABEL: test_mm_andnot_si128: 314; AVX512: # %bb.0: 315; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f] 316; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1] 317; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 318 %not = xor <2 x i64> %a0, <i64 -1, i64 -1> 319 %res = and <2 x i64> %not, %a1 320 ret <2 x i64> %res 321} 322 323define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 324; SSE-LABEL: test_mm_avg_epu8: 325; SSE: # %bb.0: 326; SSE-NEXT: pavgb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe0,0xc1] 327; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 328; 329; AVX1-LABEL: test_mm_avg_epu8: 330; AVX1: # %bb.0: 331; AVX1-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe0,0xc1] 332; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 333; 334; AVX512-LABEL: test_mm_avg_epu8: 335; AVX512: # %bb.0: 336; AVX512-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xc1] 337; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 338 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 339 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 340 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1) 341 %bc = bitcast <16 x i8> %res to <2 x i64> 342 ret <2 x i64> %bc 343} 344declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1) nounwind readnone 345 346define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 347; SSE-LABEL: test_mm_avg_epu16: 348; SSE: # %bb.0: 349; SSE-NEXT: pavgw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe3,0xc1] 350; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 351; 352; AVX1-LABEL: test_mm_avg_epu16: 353; AVX1: # %bb.0: 354; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe3,0xc1] 355; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 356; 357; AVX512-LABEL: test_mm_avg_epu16: 358; AVX512: # %bb.0: 359; AVX512-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xc1] 360; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 361 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 362 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 363 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %arg0, <8 x i16> %arg1) 364 %bc = bitcast <8 x i16> %res to <2 x i64> 365 ret <2 x i64> %bc 366} 367declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 368 369define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind { 370; SSE-LABEL: test_mm_bslli_si128: 371; SSE: # %bb.0: 372; SSE-NEXT: pslldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xf8,0x05] 373; SSE-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 374; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 375; 376; AVX1-LABEL: test_mm_bslli_si128: 377; AVX1: # %bb.0: 378; AVX1-NEXT: vpslldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x05] 379; AVX1-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 380; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 381; 382; AVX512-LABEL: test_mm_bslli_si128: 383; AVX512: # %bb.0: 384; AVX512-NEXT: vpslldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x05] 385; AVX512-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 386; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 387 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 388 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26> 389 %bc = bitcast <16 x i8> %res to <2 x i64> 390 ret <2 x i64> %bc 391} 392 393define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind { 394; SSE-LABEL: test_mm_bsrli_si128: 395; SSE: # %bb.0: 396; SSE-NEXT: psrldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xd8,0x05] 397; SSE-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 398; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 399; 400; AVX1-LABEL: test_mm_bsrli_si128: 401; AVX1: # %bb.0: 402; AVX1-NEXT: vpsrldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x05] 403; AVX1-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 404; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 405; 406; AVX512-LABEL: test_mm_bsrli_si128: 407; AVX512: # %bb.0: 408; AVX512-NEXT: vpsrldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x05] 409; AVX512-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 410; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 411 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 412 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> 413 %bc = bitcast <16 x i8> %res to <2 x i64> 414 ret <2 x i64> %bc 415} 416 417define <4 x float> @test_mm_castpd_ps(<2 x double> %a0) nounwind { 418; CHECK-LABEL: test_mm_castpd_ps: 419; CHECK: # %bb.0: 420; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 421 %res = bitcast <2 x double> %a0 to <4 x float> 422 ret <4 x float> %res 423} 424 425define <2 x i64> @test_mm_castpd_si128(<2 x double> %a0) nounwind { 426; CHECK-LABEL: test_mm_castpd_si128: 427; CHECK: # %bb.0: 428; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 429 %res = bitcast <2 x double> %a0 to <2 x i64> 430 ret <2 x i64> %res 431} 432 433define <2 x double> @test_mm_castps_pd(<4 x float> %a0) nounwind { 434; CHECK-LABEL: test_mm_castps_pd: 435; CHECK: # %bb.0: 436; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 437 %res = bitcast <4 x float> %a0 to <2 x double> 438 ret <2 x double> %res 439} 440 441define <2 x i64> @test_mm_castps_si128(<4 x float> %a0) nounwind { 442; CHECK-LABEL: test_mm_castps_si128: 443; CHECK: # %bb.0: 444; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 445 %res = bitcast <4 x float> %a0 to <2 x i64> 446 ret <2 x i64> %res 447} 448 449define <2 x double> @test_mm_castsi128_pd(<2 x i64> %a0) nounwind { 450; CHECK-LABEL: test_mm_castsi128_pd: 451; CHECK: # %bb.0: 452; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 453 %res = bitcast <2 x i64> %a0 to <2 x double> 454 ret <2 x double> %res 455} 456 457define <4 x float> @test_mm_castsi128_ps(<2 x i64> %a0) nounwind { 458; CHECK-LABEL: test_mm_castsi128_ps: 459; CHECK: # %bb.0: 460; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 461 %res = bitcast <2 x i64> %a0 to <4 x float> 462 ret <4 x float> %res 463} 464 465define void @test_mm_clflush(i8* %a0) nounwind { 466; X86-LABEL: test_mm_clflush: 467; X86: # %bb.0: 468; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 469; X86-NEXT: clflush (%eax) # encoding: [0x0f,0xae,0x38] 470; X86-NEXT: retl # encoding: [0xc3] 471; 472; X64-LABEL: test_mm_clflush: 473; X64: # %bb.0: 474; X64-NEXT: clflush (%rdi) # encoding: [0x0f,0xae,0x3f] 475; X64-NEXT: retq # encoding: [0xc3] 476 call void @llvm.x86.sse2.clflush(i8* %a0) 477 ret void 478} 479declare void @llvm.x86.sse2.clflush(i8*) nounwind readnone 480 481define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 482; SSE-LABEL: test_mm_cmpeq_epi8: 483; SSE: # %bb.0: 484; SSE-NEXT: pcmpeqb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x74,0xc1] 485; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 486; 487; AVX1-LABEL: test_mm_cmpeq_epi8: 488; AVX1: # %bb.0: 489; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x74,0xc1] 490; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 491; 492; AVX512-LABEL: test_mm_cmpeq_epi8: 493; AVX512: # %bb.0: 494; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] 495; AVX512-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0] 496; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 497 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 498 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 499 %cmp = icmp eq <16 x i8> %arg0, %arg1 500 %res = sext <16 x i1> %cmp to <16 x i8> 501 %bc = bitcast <16 x i8> %res to <2 x i64> 502 ret <2 x i64> %bc 503} 504 505define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 506; SSE-LABEL: test_mm_cmpeq_epi16: 507; SSE: # %bb.0: 508; SSE-NEXT: pcmpeqw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x75,0xc1] 509; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 510; 511; AVX1-LABEL: test_mm_cmpeq_epi16: 512; AVX1: # %bb.0: 513; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x75,0xc1] 514; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 515; 516; AVX512-LABEL: test_mm_cmpeq_epi16: 517; AVX512: # %bb.0: 518; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] 519; AVX512-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0] 520; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 521 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 522 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 523 %cmp = icmp eq <8 x i16> %arg0, %arg1 524 %res = sext <8 x i1> %cmp to <8 x i16> 525 %bc = bitcast <8 x i16> %res to <2 x i64> 526 ret <2 x i64> %bc 527} 528 529define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 530; SSE-LABEL: test_mm_cmpeq_epi32: 531; SSE: # %bb.0: 532; SSE-NEXT: pcmpeqd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x76,0xc1] 533; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 534; 535; AVX1-LABEL: test_mm_cmpeq_epi32: 536; AVX1: # %bb.0: 537; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x76,0xc1] 538; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 539; 540; AVX512-LABEL: test_mm_cmpeq_epi32: 541; AVX512: # %bb.0: 542; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1] 543; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 544; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 545 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 546 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 547 %cmp = icmp eq <4 x i32> %arg0, %arg1 548 %res = sext <4 x i1> %cmp to <4 x i32> 549 %bc = bitcast <4 x i32> %res to <2 x i64> 550 ret <2 x i64> %bc 551} 552 553define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 554; SSE-LABEL: test_mm_cmpeq_pd: 555; SSE: # %bb.0: 556; SSE-NEXT: cmpeqpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x00] 557; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 558; 559; AVX1-LABEL: test_mm_cmpeq_pd: 560; AVX1: # %bb.0: 561; AVX1-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x00] 562; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 563; 564; AVX512-LABEL: test_mm_cmpeq_pd: 565; AVX512: # %bb.0: 566; AVX512-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x00] 567; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 568; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 569 %fcmp = fcmp oeq <2 x double> %a0, %a1 570 %sext = sext <2 x i1> %fcmp to <2 x i64> 571 %res = bitcast <2 x i64> %sext to <2 x double> 572 ret <2 x double> %res 573} 574 575define <2 x double> @test_mm_cmpeq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 576; SSE-LABEL: test_mm_cmpeq_sd: 577; SSE: # %bb.0: 578; SSE-NEXT: cmpeqsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x00] 579; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 580; 581; AVX-LABEL: test_mm_cmpeq_sd: 582; AVX: # %bb.0: 583; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x00] 584; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 585 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0) 586 ret <2 x double> %res 587} 588declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 589 590define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 591; SSE-LABEL: test_mm_cmpge_pd: 592; SSE: # %bb.0: 593; SSE-NEXT: cmplepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x02] 594; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1] 595; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 596; 597; AVX1-LABEL: test_mm_cmpge_pd: 598; AVX1: # %bb.0: 599; AVX1-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x02] 600; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 601; 602; AVX512-LABEL: test_mm_cmpge_pd: 603; AVX512: # %bb.0: 604; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x02] 605; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 606; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 607 %fcmp = fcmp ole <2 x double> %a1, %a0 608 %sext = sext <2 x i1> %fcmp to <2 x i64> 609 %res = bitcast <2 x i64> %sext to <2 x double> 610 ret <2 x double> %res 611} 612 613define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 614; SSE-LABEL: test_mm_cmpge_sd: 615; SSE: # %bb.0: 616; SSE-NEXT: cmplesd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x02] 617; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1] 618; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1] 619; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 620; 621; AVX-LABEL: test_mm_cmpge_sd: 622; AVX: # %bb.0: 623; AVX-NEXT: vcmplesd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x02] 624; AVX-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01] 625; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1] 626; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 627 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 2) 628 %ext0 = extractelement <2 x double> %cmp, i32 0 629 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 630 %ext1 = extractelement <2 x double> %a0, i32 1 631 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 632 ret <2 x double> %ins1 633} 634 635define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 636; SSE-LABEL: test_mm_cmpgt_epi8: 637; SSE: # %bb.0: 638; SSE-NEXT: pcmpgtb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x64,0xc1] 639; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 640; 641; AVX1-LABEL: test_mm_cmpgt_epi8: 642; AVX1: # %bb.0: 643; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x64,0xc1] 644; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 645; 646; AVX512-LABEL: test_mm_cmpgt_epi8: 647; AVX512: # %bb.0: 648; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1] 649; AVX512-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0] 650; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 651 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 652 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 653 %cmp = icmp sgt <16 x i8> %arg0, %arg1 654 %res = sext <16 x i1> %cmp to <16 x i8> 655 %bc = bitcast <16 x i8> %res to <2 x i64> 656 ret <2 x i64> %bc 657} 658 659define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 660; SSE-LABEL: test_mm_cmpgt_epi16: 661; SSE: # %bb.0: 662; SSE-NEXT: pcmpgtw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x65,0xc1] 663; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 664; 665; AVX1-LABEL: test_mm_cmpgt_epi16: 666; AVX1: # %bb.0: 667; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x65,0xc1] 668; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 669; 670; AVX512-LABEL: test_mm_cmpgt_epi16: 671; AVX512: # %bb.0: 672; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1] 673; AVX512-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0] 674; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 675 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 676 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 677 %cmp = icmp sgt <8 x i16> %arg0, %arg1 678 %res = sext <8 x i1> %cmp to <8 x i16> 679 %bc = bitcast <8 x i16> %res to <2 x i64> 680 ret <2 x i64> %bc 681} 682 683define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 684; SSE-LABEL: test_mm_cmpgt_epi32: 685; SSE: # %bb.0: 686; SSE-NEXT: pcmpgtd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x66,0xc1] 687; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 688; 689; AVX1-LABEL: test_mm_cmpgt_epi32: 690; AVX1: # %bb.0: 691; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x66,0xc1] 692; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 693; 694; AVX512-LABEL: test_mm_cmpgt_epi32: 695; AVX512: # %bb.0: 696; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1] 697; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 698; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 699 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 700 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 701 %cmp = icmp sgt <4 x i32> %arg0, %arg1 702 %res = sext <4 x i1> %cmp to <4 x i32> 703 %bc = bitcast <4 x i32> %res to <2 x i64> 704 ret <2 x i64> %bc 705} 706 707define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 708; SSE-LABEL: test_mm_cmpgt_pd: 709; SSE: # %bb.0: 710; SSE-NEXT: cmpltpd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x01] 711; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1] 712; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 713; 714; AVX1-LABEL: test_mm_cmpgt_pd: 715; AVX1: # %bb.0: 716; AVX1-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x01] 717; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 718; 719; AVX512-LABEL: test_mm_cmpgt_pd: 720; AVX512: # %bb.0: 721; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x01] 722; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 723; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 724 %fcmp = fcmp olt <2 x double> %a1, %a0 725 %sext = sext <2 x i1> %fcmp to <2 x i64> 726 %res = bitcast <2 x i64> %sext to <2 x double> 727 ret <2 x double> %res 728} 729 730define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 731; SSE-LABEL: test_mm_cmpgt_sd: 732; SSE: # %bb.0: 733; SSE-NEXT: cmpltsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x01] 734; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1] 735; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1] 736; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 737; 738; AVX-LABEL: test_mm_cmpgt_sd: 739; AVX: # %bb.0: 740; AVX-NEXT: vcmpltsd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x01] 741; AVX-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01] 742; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1] 743; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 744 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 1) 745 %ext0 = extractelement <2 x double> %cmp, i32 0 746 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 747 %ext1 = extractelement <2 x double> %a0, i32 1 748 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 749 ret <2 x double> %ins1 750} 751 752define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 753; SSE-LABEL: test_mm_cmple_pd: 754; SSE: # %bb.0: 755; SSE-NEXT: cmplepd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x02] 756; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 757; 758; AVX1-LABEL: test_mm_cmple_pd: 759; AVX1: # %bb.0: 760; AVX1-NEXT: vcmplepd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x02] 761; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 762; 763; AVX512-LABEL: test_mm_cmple_pd: 764; AVX512: # %bb.0: 765; AVX512-NEXT: vcmplepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02] 766; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 767; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 768 %fcmp = fcmp ole <2 x double> %a0, %a1 769 %sext = sext <2 x i1> %fcmp to <2 x i64> 770 %res = bitcast <2 x i64> %sext to <2 x double> 771 ret <2 x double> %res 772} 773 774define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 775; SSE-LABEL: test_mm_cmple_sd: 776; SSE: # %bb.0: 777; SSE-NEXT: cmplesd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x02] 778; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 779; 780; AVX-LABEL: test_mm_cmple_sd: 781; AVX: # %bb.0: 782; AVX-NEXT: vcmplesd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x02] 783; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 784 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 2) 785 ret <2 x double> %res 786} 787 788define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 789; SSE-LABEL: test_mm_cmplt_epi8: 790; SSE: # %bb.0: 791; SSE-NEXT: pcmpgtb %xmm0, %xmm1 # encoding: [0x66,0x0f,0x64,0xc8] 792; SSE-NEXT: movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1] 793; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 794; 795; AVX1-LABEL: test_mm_cmplt_epi8: 796; AVX1: # %bb.0: 797; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x64,0xc0] 798; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 799; 800; AVX512-LABEL: test_mm_cmplt_epi8: 801; AVX512: # %bb.0: 802; AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x64,0xc0] 803; AVX512-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0] 804; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 805 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 806 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 807 %cmp = icmp sgt <16 x i8> %arg1, %arg0 808 %res = sext <16 x i1> %cmp to <16 x i8> 809 %bc = bitcast <16 x i8> %res to <2 x i64> 810 ret <2 x i64> %bc 811} 812 813define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 814; SSE-LABEL: test_mm_cmplt_epi16: 815; SSE: # %bb.0: 816; SSE-NEXT: pcmpgtw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x65,0xc8] 817; SSE-NEXT: movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1] 818; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 819; 820; AVX1-LABEL: test_mm_cmplt_epi16: 821; AVX1: # %bb.0: 822; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x65,0xc0] 823; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 824; 825; AVX512-LABEL: test_mm_cmplt_epi16: 826; AVX512: # %bb.0: 827; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x65,0xc0] 828; AVX512-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0] 829; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 830 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 831 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 832 %cmp = icmp sgt <8 x i16> %arg1, %arg0 833 %res = sext <8 x i1> %cmp to <8 x i16> 834 %bc = bitcast <8 x i16> %res to <2 x i64> 835 ret <2 x i64> %bc 836} 837 838define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 839; SSE-LABEL: test_mm_cmplt_epi32: 840; SSE: # %bb.0: 841; SSE-NEXT: pcmpgtd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x66,0xc8] 842; SSE-NEXT: movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1] 843; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 844; 845; AVX1-LABEL: test_mm_cmplt_epi32: 846; AVX1: # %bb.0: 847; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x66,0xc0] 848; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 849; 850; AVX512-LABEL: test_mm_cmplt_epi32: 851; AVX512: # %bb.0: 852; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x66,0xc0] 853; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 854; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 855 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 856 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 857 %cmp = icmp sgt <4 x i32> %arg1, %arg0 858 %res = sext <4 x i1> %cmp to <4 x i32> 859 %bc = bitcast <4 x i32> %res to <2 x i64> 860 ret <2 x i64> %bc 861} 862 863define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 864; SSE-LABEL: test_mm_cmplt_pd: 865; SSE: # %bb.0: 866; SSE-NEXT: cmpltpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x01] 867; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 868; 869; AVX1-LABEL: test_mm_cmplt_pd: 870; AVX1: # %bb.0: 871; AVX1-NEXT: vcmpltpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x01] 872; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 873; 874; AVX512-LABEL: test_mm_cmplt_pd: 875; AVX512: # %bb.0: 876; AVX512-NEXT: vcmpltpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x01] 877; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 878; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 879 %fcmp = fcmp olt <2 x double> %a0, %a1 880 %sext = sext <2 x i1> %fcmp to <2 x i64> 881 %res = bitcast <2 x i64> %sext to <2 x double> 882 ret <2 x double> %res 883} 884 885define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 886; SSE-LABEL: test_mm_cmplt_sd: 887; SSE: # %bb.0: 888; SSE-NEXT: cmpltsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x01] 889; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 890; 891; AVX-LABEL: test_mm_cmplt_sd: 892; AVX: # %bb.0: 893; AVX-NEXT: vcmpltsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x01] 894; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 895 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 1) 896 ret <2 x double> %res 897} 898 899define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 900; SSE-LABEL: test_mm_cmpneq_pd: 901; SSE: # %bb.0: 902; SSE-NEXT: cmpneqpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x04] 903; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 904; 905; AVX1-LABEL: test_mm_cmpneq_pd: 906; AVX1: # %bb.0: 907; AVX1-NEXT: vcmpneqpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x04] 908; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 909; 910; AVX512-LABEL: test_mm_cmpneq_pd: 911; AVX512: # %bb.0: 912; AVX512-NEXT: vcmpneqpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x04] 913; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 914; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 915 %fcmp = fcmp une <2 x double> %a0, %a1 916 %sext = sext <2 x i1> %fcmp to <2 x i64> 917 %res = bitcast <2 x i64> %sext to <2 x double> 918 ret <2 x double> %res 919} 920 921define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 922; SSE-LABEL: test_mm_cmpneq_sd: 923; SSE: # %bb.0: 924; SSE-NEXT: cmpneqsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x04] 925; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 926; 927; AVX-LABEL: test_mm_cmpneq_sd: 928; AVX: # %bb.0: 929; AVX-NEXT: vcmpneqsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x04] 930; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 931 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 4) 932 ret <2 x double> %res 933} 934 935define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 936; SSE-LABEL: test_mm_cmpnge_pd: 937; SSE: # %bb.0: 938; SSE-NEXT: cmpnlepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x06] 939; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1] 940; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 941; 942; AVX1-LABEL: test_mm_cmpnge_pd: 943; AVX1: # %bb.0: 944; AVX1-NEXT: vcmpnlepd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x06] 945; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 946; 947; AVX512-LABEL: test_mm_cmpnge_pd: 948; AVX512: # %bb.0: 949; AVX512-NEXT: vcmpnlepd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x06] 950; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 951; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 952 %fcmp = fcmp ugt <2 x double> %a1, %a0 953 %sext = sext <2 x i1> %fcmp to <2 x i64> 954 %res = bitcast <2 x i64> %sext to <2 x double> 955 ret <2 x double> %res 956} 957 958define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 959; SSE-LABEL: test_mm_cmpnge_sd: 960; SSE: # %bb.0: 961; SSE-NEXT: cmpnlesd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x06] 962; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1] 963; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1] 964; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 965; 966; AVX-LABEL: test_mm_cmpnge_sd: 967; AVX: # %bb.0: 968; AVX-NEXT: vcmpnlesd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x06] 969; AVX-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01] 970; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1] 971; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 972 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 6) 973 %ext0 = extractelement <2 x double> %cmp, i32 0 974 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 975 %ext1 = extractelement <2 x double> %a0, i32 1 976 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 977 ret <2 x double> %ins1 978} 979 980define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 981; SSE-LABEL: test_mm_cmpngt_pd: 982; SSE: # %bb.0: 983; SSE-NEXT: cmpnltpd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x05] 984; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1] 985; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 986; 987; AVX1-LABEL: test_mm_cmpngt_pd: 988; AVX1: # %bb.0: 989; AVX1-NEXT: vcmpnltpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x05] 990; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 991; 992; AVX512-LABEL: test_mm_cmpngt_pd: 993; AVX512: # %bb.0: 994; AVX512-NEXT: vcmpnltpd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x05] 995; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 996; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 997 %fcmp = fcmp uge <2 x double> %a1, %a0 998 %sext = sext <2 x i1> %fcmp to <2 x i64> 999 %res = bitcast <2 x i64> %sext to <2 x double> 1000 ret <2 x double> %res 1001} 1002 1003define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1004; SSE-LABEL: test_mm_cmpngt_sd: 1005; SSE: # %bb.0: 1006; SSE-NEXT: cmpnltsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x05] 1007; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1] 1008; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1] 1009; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1010; 1011; AVX-LABEL: test_mm_cmpngt_sd: 1012; AVX: # %bb.0: 1013; AVX-NEXT: vcmpnltsd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x05] 1014; AVX-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01] 1015; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1] 1016; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1017 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 5) 1018 %ext0 = extractelement <2 x double> %cmp, i32 0 1019 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 1020 %ext1 = extractelement <2 x double> %a0, i32 1 1021 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 1022 ret <2 x double> %ins1 1023} 1024 1025define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1026; SSE-LABEL: test_mm_cmpnle_pd: 1027; SSE: # %bb.0: 1028; SSE-NEXT: cmpnlepd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x06] 1029; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1030; 1031; AVX1-LABEL: test_mm_cmpnle_pd: 1032; AVX1: # %bb.0: 1033; AVX1-NEXT: vcmpnlepd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x06] 1034; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1035; 1036; AVX512-LABEL: test_mm_cmpnle_pd: 1037; AVX512: # %bb.0: 1038; AVX512-NEXT: vcmpnlepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x06] 1039; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 1040; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1041 %fcmp = fcmp ugt <2 x double> %a0, %a1 1042 %sext = sext <2 x i1> %fcmp to <2 x i64> 1043 %res = bitcast <2 x i64> %sext to <2 x double> 1044 ret <2 x double> %res 1045} 1046 1047define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1048; SSE-LABEL: test_mm_cmpnle_sd: 1049; SSE: # %bb.0: 1050; SSE-NEXT: cmpnlesd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x06] 1051; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1052; 1053; AVX-LABEL: test_mm_cmpnle_sd: 1054; AVX: # %bb.0: 1055; AVX-NEXT: vcmpnlesd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x06] 1056; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1057 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 6) 1058 ret <2 x double> %res 1059} 1060 1061define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1062; SSE-LABEL: test_mm_cmpnlt_pd: 1063; SSE: # %bb.0: 1064; SSE-NEXT: cmpnltpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x05] 1065; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1066; 1067; AVX1-LABEL: test_mm_cmpnlt_pd: 1068; AVX1: # %bb.0: 1069; AVX1-NEXT: vcmpnltpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x05] 1070; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1071; 1072; AVX512-LABEL: test_mm_cmpnlt_pd: 1073; AVX512: # %bb.0: 1074; AVX512-NEXT: vcmpnltpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x05] 1075; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 1076; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1077 %fcmp = fcmp uge <2 x double> %a0, %a1 1078 %sext = sext <2 x i1> %fcmp to <2 x i64> 1079 %res = bitcast <2 x i64> %sext to <2 x double> 1080 ret <2 x double> %res 1081} 1082 1083define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1084; SSE-LABEL: test_mm_cmpnlt_sd: 1085; SSE: # %bb.0: 1086; SSE-NEXT: cmpnltsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x05] 1087; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1088; 1089; AVX-LABEL: test_mm_cmpnlt_sd: 1090; AVX: # %bb.0: 1091; AVX-NEXT: vcmpnltsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x05] 1092; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1093 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 5) 1094 ret <2 x double> %res 1095} 1096 1097define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1098; SSE-LABEL: test_mm_cmpord_pd: 1099; SSE: # %bb.0: 1100; SSE-NEXT: cmpordpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x07] 1101; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1102; 1103; AVX1-LABEL: test_mm_cmpord_pd: 1104; AVX1: # %bb.0: 1105; AVX1-NEXT: vcmpordpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x07] 1106; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1107; 1108; AVX512-LABEL: test_mm_cmpord_pd: 1109; AVX512: # %bb.0: 1110; AVX512-NEXT: vcmpordpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x07] 1111; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 1112; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1113 %fcmp = fcmp ord <2 x double> %a0, %a1 1114 %sext = sext <2 x i1> %fcmp to <2 x i64> 1115 %res = bitcast <2 x i64> %sext to <2 x double> 1116 ret <2 x double> %res 1117} 1118 1119define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1120; SSE-LABEL: test_mm_cmpord_sd: 1121; SSE: # %bb.0: 1122; SSE-NEXT: cmpordsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x07] 1123; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1124; 1125; AVX-LABEL: test_mm_cmpord_sd: 1126; AVX: # %bb.0: 1127; AVX-NEXT: vcmpordsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x07] 1128; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1129 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) 1130 ret <2 x double> %res 1131} 1132 1133define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1134; SSE-LABEL: test_mm_cmpunord_pd: 1135; SSE: # %bb.0: 1136; SSE-NEXT: cmpunordpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x03] 1137; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1138; 1139; AVX1-LABEL: test_mm_cmpunord_pd: 1140; AVX1: # %bb.0: 1141; AVX1-NEXT: vcmpunordpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x03] 1142; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1143; 1144; AVX512-LABEL: test_mm_cmpunord_pd: 1145; AVX512: # %bb.0: 1146; AVX512-NEXT: vcmpunordpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x03] 1147; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 1148; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1149 %fcmp = fcmp uno <2 x double> %a0, %a1 1150 %sext = sext <2 x i1> %fcmp to <2 x i64> 1151 %res = bitcast <2 x i64> %sext to <2 x double> 1152 ret <2 x double> %res 1153} 1154 1155define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1156; SSE-LABEL: test_mm_cmpunord_sd: 1157; SSE: # %bb.0: 1158; SSE-NEXT: cmpunordsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x03] 1159; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1160; 1161; AVX-LABEL: test_mm_cmpunord_sd: 1162; AVX: # %bb.0: 1163; AVX-NEXT: vcmpunordsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x03] 1164; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1165 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 3) 1166 ret <2 x double> %res 1167} 1168 1169define i32 @test_mm_comieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1170; SSE-LABEL: test_mm_comieq_sd: 1171; SSE: # %bb.0: 1172; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1] 1173; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 1174; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 1175; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 1176; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 1177; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1178; 1179; AVX1-LABEL: test_mm_comieq_sd: 1180; AVX1: # %bb.0: 1181; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1] 1182; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 1183; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 1184; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 1185; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 1186; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1187; 1188; AVX512-LABEL: test_mm_comieq_sd: 1189; AVX512: # %bb.0: 1190; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] 1191; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 1192; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 1193; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 1194; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 1195; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1196 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) 1197 ret i32 %res 1198} 1199declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 1200 1201define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1202; SSE-LABEL: test_mm_comige_sd: 1203; SSE: # %bb.0: 1204; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1205; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1] 1206; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 1207; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1208; 1209; AVX1-LABEL: test_mm_comige_sd: 1210; AVX1: # %bb.0: 1211; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1212; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1] 1213; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 1214; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1215; 1216; AVX512-LABEL: test_mm_comige_sd: 1217; AVX512: # %bb.0: 1218; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1219; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] 1220; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 1221; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1222 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) 1223 ret i32 %res 1224} 1225declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone 1226 1227define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1228; SSE-LABEL: test_mm_comigt_sd: 1229; SSE: # %bb.0: 1230; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1231; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1] 1232; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 1233; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1234; 1235; AVX1-LABEL: test_mm_comigt_sd: 1236; AVX1: # %bb.0: 1237; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1238; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1] 1239; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 1240; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1241; 1242; AVX512-LABEL: test_mm_comigt_sd: 1243; AVX512: # %bb.0: 1244; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1245; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] 1246; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 1247; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1248 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) 1249 ret i32 %res 1250} 1251declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone 1252 1253define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1254; SSE-LABEL: test_mm_comile_sd: 1255; SSE: # %bb.0: 1256; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1257; SSE-NEXT: comisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2f,0xc8] 1258; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 1259; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1260; 1261; AVX1-LABEL: test_mm_comile_sd: 1262; AVX1: # %bb.0: 1263; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1264; AVX1-NEXT: vcomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2f,0xc8] 1265; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 1266; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1267; 1268; AVX512-LABEL: test_mm_comile_sd: 1269; AVX512: # %bb.0: 1270; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1271; AVX512-NEXT: vcomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8] 1272; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 1273; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1274 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) 1275 ret i32 %res 1276} 1277declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone 1278 1279define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1280; SSE-LABEL: test_mm_comilt_sd: 1281; SSE: # %bb.0: 1282; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1283; SSE-NEXT: comisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2f,0xc8] 1284; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 1285; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1286; 1287; AVX1-LABEL: test_mm_comilt_sd: 1288; AVX1: # %bb.0: 1289; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1290; AVX1-NEXT: vcomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2f,0xc8] 1291; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 1292; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1293; 1294; AVX512-LABEL: test_mm_comilt_sd: 1295; AVX512: # %bb.0: 1296; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1297; AVX512-NEXT: vcomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8] 1298; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 1299; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1300 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) 1301 ret i32 %res 1302} 1303declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone 1304 1305define i32 @test_mm_comineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1306; SSE-LABEL: test_mm_comineq_sd: 1307; SSE: # %bb.0: 1308; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1] 1309; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 1310; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 1311; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 1312; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 1313; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1314; 1315; AVX1-LABEL: test_mm_comineq_sd: 1316; AVX1: # %bb.0: 1317; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1] 1318; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 1319; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 1320; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 1321; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 1322; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1323; 1324; AVX512-LABEL: test_mm_comineq_sd: 1325; AVX512: # %bb.0: 1326; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] 1327; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 1328; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 1329; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 1330; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 1331; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1332 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) 1333 ret i32 %res 1334} 1335declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone 1336 1337define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind { 1338; SSE-LABEL: test_mm_cvtepi32_pd: 1339; SSE: # %bb.0: 1340; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 # encoding: [0xf3,0x0f,0xe6,0xc0] 1341; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1342; 1343; AVX1-LABEL: test_mm_cvtepi32_pd: 1344; AVX1: # %bb.0: 1345; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xe6,0xc0] 1346; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1347; 1348; AVX512-LABEL: test_mm_cvtepi32_pd: 1349; AVX512: # %bb.0: 1350; AVX512-NEXT: vcvtdq2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0] 1351; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1352 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 1353 %ext = shufflevector <4 x i32> %arg0, <4 x i32> %arg0, <2 x i32> <i32 0, i32 1> 1354 %res = sitofp <2 x i32> %ext to <2 x double> 1355 ret <2 x double> %res 1356} 1357 1358define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind { 1359; SSE-LABEL: test_mm_cvtepi32_ps: 1360; SSE: # %bb.0: 1361; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 # encoding: [0x0f,0x5b,0xc0] 1362; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1363; 1364; AVX1-LABEL: test_mm_cvtepi32_ps: 1365; AVX1: # %bb.0: 1366; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5b,0xc0] 1367; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1368; 1369; AVX512-LABEL: test_mm_cvtepi32_ps: 1370; AVX512: # %bb.0: 1371; AVX512-NEXT: vcvtdq2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0] 1372; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1373 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 1374 %res = sitofp <4 x i32> %arg0 to <4 x float> 1375 ret <4 x float> %res 1376} 1377 1378define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind { 1379; SSE-LABEL: test_mm_cvtpd_epi32: 1380; SSE: # %bb.0: 1381; SSE-NEXT: cvtpd2dq %xmm0, %xmm0 # encoding: [0xf2,0x0f,0xe6,0xc0] 1382; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1383; 1384; AVX1-LABEL: test_mm_cvtpd_epi32: 1385; AVX1: # %bb.0: 1386; AVX1-NEXT: vcvtpd2dq %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xe6,0xc0] 1387; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1388; 1389; AVX512-LABEL: test_mm_cvtpd_epi32: 1390; AVX512: # %bb.0: 1391; AVX512-NEXT: vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0] 1392; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1393 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) 1394 %bc = bitcast <4 x i32> %res to <2 x i64> 1395 ret <2 x i64> %bc 1396} 1397declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 1398 1399define <4 x float> @test_mm_cvtpd_ps(<2 x double> %a0) nounwind { 1400; SSE-LABEL: test_mm_cvtpd_ps: 1401; SSE: # %bb.0: 1402; SSE-NEXT: cvtpd2ps %xmm0, %xmm0 # encoding: [0x66,0x0f,0x5a,0xc0] 1403; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1404; 1405; AVX1-LABEL: test_mm_cvtpd_ps: 1406; AVX1: # %bb.0: 1407; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5a,0xc0] 1408; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1409; 1410; AVX512-LABEL: test_mm_cvtpd_ps: 1411; AVX512: # %bb.0: 1412; AVX512-NEXT: vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0] 1413; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1414 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) 1415 ret <4 x float> %res 1416} 1417declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone 1418 1419define <2 x i64> @test_mm_cvtps_epi32(<4 x float> %a0) nounwind { 1420; SSE-LABEL: test_mm_cvtps_epi32: 1421; SSE: # %bb.0: 1422; SSE-NEXT: cvtps2dq %xmm0, %xmm0 # encoding: [0x66,0x0f,0x5b,0xc0] 1423; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1424; 1425; AVX1-LABEL: test_mm_cvtps_epi32: 1426; AVX1: # %bb.0: 1427; AVX1-NEXT: vcvtps2dq %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5b,0xc0] 1428; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1429; 1430; AVX512-LABEL: test_mm_cvtps_epi32: 1431; AVX512: # %bb.0: 1432; AVX512-NEXT: vcvtps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5b,0xc0] 1433; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1434 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) 1435 %bc = bitcast <4 x i32> %res to <2 x i64> 1436 ret <2 x i64> %bc 1437} 1438declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 1439 1440define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind { 1441; SSE-LABEL: test_mm_cvtps_pd: 1442; SSE: # %bb.0: 1443; SSE-NEXT: cvtps2pd %xmm0, %xmm0 # encoding: [0x0f,0x5a,0xc0] 1444; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1445; 1446; AVX1-LABEL: test_mm_cvtps_pd: 1447; AVX1: # %bb.0: 1448; AVX1-NEXT: vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0] 1449; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1450; 1451; AVX512-LABEL: test_mm_cvtps_pd: 1452; AVX512: # %bb.0: 1453; AVX512-NEXT: vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0] 1454; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1455 %ext = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1> 1456 %res = fpext <2 x float> %ext to <2 x double> 1457 ret <2 x double> %res 1458} 1459 1460define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind { 1461; X86-SSE-LABEL: test_mm_cvtsd_f64: 1462; X86-SSE: # %bb.0: 1463; X86-SSE-NEXT: pushl %ebp # encoding: [0x55] 1464; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 1465; X86-SSE-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] 1466; X86-SSE-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] 1467; X86-SSE-NEXT: movlps %xmm0, (%esp) # encoding: [0x0f,0x13,0x04,0x24] 1468; X86-SSE-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] 1469; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 1470; X86-SSE-NEXT: popl %ebp # encoding: [0x5d] 1471; X86-SSE-NEXT: retl # encoding: [0xc3] 1472; 1473; X86-AVX1-LABEL: test_mm_cvtsd_f64: 1474; X86-AVX1: # %bb.0: 1475; X86-AVX1-NEXT: pushl %ebp # encoding: [0x55] 1476; X86-AVX1-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 1477; X86-AVX1-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] 1478; X86-AVX1-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] 1479; X86-AVX1-NEXT: vmovlps %xmm0, (%esp) # encoding: [0xc5,0xf8,0x13,0x04,0x24] 1480; X86-AVX1-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] 1481; X86-AVX1-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 1482; X86-AVX1-NEXT: popl %ebp # encoding: [0x5d] 1483; X86-AVX1-NEXT: retl # encoding: [0xc3] 1484; 1485; X86-AVX512-LABEL: test_mm_cvtsd_f64: 1486; X86-AVX512: # %bb.0: 1487; X86-AVX512-NEXT: pushl %ebp # encoding: [0x55] 1488; X86-AVX512-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 1489; X86-AVX512-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] 1490; X86-AVX512-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] 1491; X86-AVX512-NEXT: vmovlps %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x04,0x24] 1492; X86-AVX512-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] 1493; X86-AVX512-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 1494; X86-AVX512-NEXT: popl %ebp # encoding: [0x5d] 1495; X86-AVX512-NEXT: retl # encoding: [0xc3] 1496; 1497; X64-LABEL: test_mm_cvtsd_f64: 1498; X64: # %bb.0: 1499; X64-NEXT: retq # encoding: [0xc3] 1500 %res = extractelement <2 x double> %a0, i32 0 1501 ret double %res 1502} 1503 1504define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind { 1505; SSE-LABEL: test_mm_cvtsd_si32: 1506; SSE: # %bb.0: 1507; SSE-NEXT: cvtsd2si %xmm0, %eax # encoding: [0xf2,0x0f,0x2d,0xc0] 1508; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1509; 1510; AVX1-LABEL: test_mm_cvtsd_si32: 1511; AVX1: # %bb.0: 1512; AVX1-NEXT: vcvtsd2si %xmm0, %eax # encoding: [0xc5,0xfb,0x2d,0xc0] 1513; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1514; 1515; AVX512-LABEL: test_mm_cvtsd_si32: 1516; AVX512: # %bb.0: 1517; AVX512-NEXT: vcvtsd2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2d,0xc0] 1518; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1519 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) 1520 ret i32 %res 1521} 1522declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 1523 1524define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) { 1525; SSE-LABEL: test_mm_cvtsd_ss: 1526; SSE: # %bb.0: 1527; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5a,0xc1] 1528; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1529; 1530; AVX1-LABEL: test_mm_cvtsd_ss: 1531; AVX1: # %bb.0: 1532; AVX1-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0xc1] 1533; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1534; 1535; AVX512-LABEL: test_mm_cvtsd_ss: 1536; AVX512: # %bb.0: 1537; AVX512-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0xc1] 1538; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1539 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) 1540 ret <4 x float> %res 1541} 1542declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 1543 1544define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) { 1545; X86-SSE-LABEL: test_mm_cvtsd_ss_load: 1546; X86-SSE: # %bb.0: 1547; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1548; X86-SSE-NEXT: cvtsd2ss (%eax), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x00] 1549; X86-SSE-NEXT: retl # encoding: [0xc3] 1550; 1551; X86-AVX1-LABEL: test_mm_cvtsd_ss_load: 1552; X86-AVX1: # %bb.0: 1553; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1554; X86-AVX1-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x00] 1555; X86-AVX1-NEXT: retl # encoding: [0xc3] 1556; 1557; X86-AVX512-LABEL: test_mm_cvtsd_ss_load: 1558; X86-AVX512: # %bb.0: 1559; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1560; X86-AVX512-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x00] 1561; X86-AVX512-NEXT: retl # encoding: [0xc3] 1562; 1563; X64-SSE-LABEL: test_mm_cvtsd_ss_load: 1564; X64-SSE: # %bb.0: 1565; X64-SSE-NEXT: cvtsd2ss (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x07] 1566; X64-SSE-NEXT: retq # encoding: [0xc3] 1567; 1568; X64-AVX1-LABEL: test_mm_cvtsd_ss_load: 1569; X64-AVX1: # %bb.0: 1570; X64-AVX1-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x07] 1571; X64-AVX1-NEXT: retq # encoding: [0xc3] 1572; 1573; X64-AVX512-LABEL: test_mm_cvtsd_ss_load: 1574; X64-AVX512: # %bb.0: 1575; X64-AVX512-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x07] 1576; X64-AVX512-NEXT: retq # encoding: [0xc3] 1577 %a1 = load <2 x double>, <2 x double>* %p1 1578 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) 1579 ret <4 x float> %res 1580} 1581 1582define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind { 1583; SSE-LABEL: test_mm_cvtsi128_si32: 1584; SSE: # %bb.0: 1585; SSE-NEXT: movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0] 1586; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1587; 1588; AVX1-LABEL: test_mm_cvtsi128_si32: 1589; AVX1: # %bb.0: 1590; AVX1-NEXT: vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0] 1591; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1592; 1593; AVX512-LABEL: test_mm_cvtsi128_si32: 1594; AVX512: # %bb.0: 1595; AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0] 1596; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1597 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 1598 %res = extractelement <4 x i32> %arg0, i32 0 1599 ret i32 %res 1600} 1601 1602define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind { 1603; X86-SSE-LABEL: test_mm_cvtsi32_sd: 1604; X86-SSE: # %bb.0: 1605; X86-SSE-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x2a,0x44,0x24,0x04] 1606; X86-SSE-NEXT: retl # encoding: [0xc3] 1607; 1608; X86-AVX1-LABEL: test_mm_cvtsi32_sd: 1609; X86-AVX1: # %bb.0: 1610; X86-AVX1-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04] 1611; X86-AVX1-NEXT: retl # encoding: [0xc3] 1612; 1613; X86-AVX512-LABEL: test_mm_cvtsi32_sd: 1614; X86-AVX512: # %bb.0: 1615; X86-AVX512-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04] 1616; X86-AVX512-NEXT: retl # encoding: [0xc3] 1617; 1618; X64-SSE-LABEL: test_mm_cvtsi32_sd: 1619; X64-SSE: # %bb.0: 1620; X64-SSE-NEXT: cvtsi2sd %edi, %xmm0 # encoding: [0xf2,0x0f,0x2a,0xc7] 1621; X64-SSE-NEXT: retq # encoding: [0xc3] 1622; 1623; X64-AVX1-LABEL: test_mm_cvtsi32_sd: 1624; X64-AVX1: # %bb.0: 1625; X64-AVX1-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0xc7] 1626; X64-AVX1-NEXT: retq # encoding: [0xc3] 1627; 1628; X64-AVX512-LABEL: test_mm_cvtsi32_sd: 1629; X64-AVX512: # %bb.0: 1630; X64-AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7] 1631; X64-AVX512-NEXT: retq # encoding: [0xc3] 1632 %cvt = sitofp i32 %a1 to double 1633 %res = insertelement <2 x double> %a0, double %cvt, i32 0 1634 ret <2 x double> %res 1635} 1636 1637define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind { 1638; X86-SSE-LABEL: test_mm_cvtsi32_si128: 1639; X86-SSE: # %bb.0: 1640; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 1641; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1642; X86-SSE-NEXT: retl # encoding: [0xc3] 1643; 1644; X86-AVX1-LABEL: test_mm_cvtsi32_si128: 1645; X86-AVX1: # %bb.0: 1646; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 1647; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 1648; X86-AVX1-NEXT: retl # encoding: [0xc3] 1649; 1650; X86-AVX512-LABEL: test_mm_cvtsi32_si128: 1651; X86-AVX512: # %bb.0: 1652; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 1653; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 1654; X86-AVX512-NEXT: retl # encoding: [0xc3] 1655; 1656; X64-SSE-LABEL: test_mm_cvtsi32_si128: 1657; X64-SSE: # %bb.0: 1658; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 1659; X64-SSE-NEXT: retq # encoding: [0xc3] 1660; 1661; X64-AVX1-LABEL: test_mm_cvtsi32_si128: 1662; X64-AVX1: # %bb.0: 1663; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] 1664; X64-AVX1-NEXT: retq # encoding: [0xc3] 1665; 1666; X64-AVX512-LABEL: test_mm_cvtsi32_si128: 1667; X64-AVX512: # %bb.0: 1668; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] 1669; X64-AVX512-NEXT: retq # encoding: [0xc3] 1670 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 1671 %res1 = insertelement <4 x i32> %res0, i32 0, i32 1 1672 %res2 = insertelement <4 x i32> %res1, i32 0, i32 2 1673 %res3 = insertelement <4 x i32> %res2, i32 0, i32 3 1674 %res = bitcast <4 x i32> %res3 to <2 x i64> 1675 ret <2 x i64> %res 1676} 1677 1678define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind { 1679; SSE-LABEL: test_mm_cvtss_sd: 1680; SSE: # %bb.0: 1681; SSE-NEXT: cvtss2sd %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5a,0xc1] 1682; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1683; 1684; AVX1-LABEL: test_mm_cvtss_sd: 1685; AVX1: # %bb.0: 1686; AVX1-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5a,0xc1] 1687; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1688; 1689; AVX512-LABEL: test_mm_cvtss_sd: 1690; AVX512: # %bb.0: 1691; AVX512-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0xc1] 1692; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1693 %ext = extractelement <4 x float> %a1, i32 0 1694 %cvt = fpext float %ext to double 1695 %res = insertelement <2 x double> %a0, double %cvt, i32 0 1696 ret <2 x double> %res 1697} 1698 1699define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind { 1700; SSE-LABEL: test_mm_cvttpd_epi32: 1701; SSE: # %bb.0: 1702; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 # encoding: [0x66,0x0f,0xe6,0xc0] 1703; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1704; 1705; AVX1-LABEL: test_mm_cvttpd_epi32: 1706; AVX1: # %bb.0: 1707; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe6,0xc0] 1708; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1709; 1710; AVX512-LABEL: test_mm_cvttpd_epi32: 1711; AVX512: # %bb.0: 1712; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0] 1713; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1714 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) 1715 %bc = bitcast <4 x i32> %res to <2 x i64> 1716 ret <2 x i64> %bc 1717} 1718declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 1719 1720define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind { 1721; SSE-LABEL: test_mm_cvttps_epi32: 1722; SSE: # %bb.0: 1723; SSE-NEXT: cvttps2dq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x5b,0xc0] 1724; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1725; 1726; AVX1-LABEL: test_mm_cvttps_epi32: 1727; AVX1: # %bb.0: 1728; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5b,0xc0] 1729; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1730; 1731; AVX512-LABEL: test_mm_cvttps_epi32: 1732; AVX512: # %bb.0: 1733; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0] 1734; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1735 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) 1736 %bc = bitcast <4 x i32> %res to <2 x i64> 1737 ret <2 x i64> %bc 1738} 1739declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone 1740 1741define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind { 1742; SSE-LABEL: test_mm_cvttsd_si32: 1743; SSE: # %bb.0: 1744; SSE-NEXT: cvttsd2si %xmm0, %eax # encoding: [0xf2,0x0f,0x2c,0xc0] 1745; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1746; 1747; AVX1-LABEL: test_mm_cvttsd_si32: 1748; AVX1: # %bb.0: 1749; AVX1-NEXT: vcvttsd2si %xmm0, %eax # encoding: [0xc5,0xfb,0x2c,0xc0] 1750; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1751; 1752; AVX512-LABEL: test_mm_cvttsd_si32: 1753; AVX512: # %bb.0: 1754; AVX512-NEXT: vcvttsd2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2c,0xc0] 1755; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1756 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) 1757 ret i32 %res 1758} 1759declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 1760 1761define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1762; SSE-LABEL: test_mm_div_pd: 1763; SSE: # %bb.0: 1764; SSE-NEXT: divpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5e,0xc1] 1765; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1766; 1767; AVX1-LABEL: test_mm_div_pd: 1768; AVX1: # %bb.0: 1769; AVX1-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5e,0xc1] 1770; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1771; 1772; AVX512-LABEL: test_mm_div_pd: 1773; AVX512: # %bb.0: 1774; AVX512-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5e,0xc1] 1775; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1776 %res = fdiv <2 x double> %a0, %a1 1777 ret <2 x double> %res 1778} 1779 1780define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1781; SSE-LABEL: test_mm_div_sd: 1782; SSE: # %bb.0: 1783; SSE-NEXT: divsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5e,0xc1] 1784; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1785; 1786; AVX1-LABEL: test_mm_div_sd: 1787; AVX1: # %bb.0: 1788; AVX1-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5e,0xc1] 1789; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1790; 1791; AVX512-LABEL: test_mm_div_sd: 1792; AVX512: # %bb.0: 1793; AVX512-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5e,0xc1] 1794; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1795 %ext0 = extractelement <2 x double> %a0, i32 0 1796 %ext1 = extractelement <2 x double> %a1, i32 0 1797 %fdiv = fdiv double %ext0, %ext1 1798 %res = insertelement <2 x double> %a0, double %fdiv, i32 0 1799 ret <2 x double> %res 1800} 1801 1802define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind { 1803; SSE-LABEL: test_mm_extract_epi16: 1804; SSE: # %bb.0: 1805; SSE-NEXT: pextrw $1, %xmm0, %eax # encoding: [0x66,0x0f,0xc5,0xc0,0x01] 1806; SSE-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0] 1807; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1808; 1809; AVX1-LABEL: test_mm_extract_epi16: 1810; AVX1: # %bb.0: 1811; AVX1-NEXT: vpextrw $1, %xmm0, %eax # encoding: [0xc5,0xf9,0xc5,0xc0,0x01] 1812; AVX1-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0] 1813; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1814; 1815; AVX512-LABEL: test_mm_extract_epi16: 1816; AVX512: # %bb.0: 1817; AVX512-NEXT: vpextrw $1, %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc5,0xc0,0x01] 1818; AVX512-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0] 1819; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1820 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 1821 %ext = extractelement <8 x i16> %arg0, i32 1 1822 %res = zext i16 %ext to i32 1823 ret i32 %res 1824} 1825 1826define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind { 1827; X86-SSE-LABEL: test_mm_insert_epi16: 1828; X86-SSE: # %bb.0: 1829; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 1830; X86-SSE-NEXT: pinsrw $1, %eax, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc0,0x01] 1831; X86-SSE-NEXT: retl # encoding: [0xc3] 1832; 1833; X86-AVX1-LABEL: test_mm_insert_epi16: 1834; X86-AVX1: # %bb.0: 1835; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 1836; X86-AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 1837; X86-AVX1-NEXT: retl # encoding: [0xc3] 1838; 1839; X86-AVX512-LABEL: test_mm_insert_epi16: 1840; X86-AVX512: # %bb.0: 1841; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 1842; X86-AVX512-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 1843; X86-AVX512-NEXT: retl # encoding: [0xc3] 1844; 1845; X64-SSE-LABEL: test_mm_insert_epi16: 1846; X64-SSE: # %bb.0: 1847; X64-SSE-NEXT: pinsrw $1, %edi, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc7,0x01] 1848; X64-SSE-NEXT: retq # encoding: [0xc3] 1849; 1850; X64-AVX1-LABEL: test_mm_insert_epi16: 1851; X64-AVX1: # %bb.0: 1852; X64-AVX1-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x01] 1853; X64-AVX1-NEXT: retq # encoding: [0xc3] 1854; 1855; X64-AVX512-LABEL: test_mm_insert_epi16: 1856; X64-AVX512: # %bb.0: 1857; X64-AVX512-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x01] 1858; X64-AVX512-NEXT: retq # encoding: [0xc3] 1859 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 1860 %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1 1861 %bc = bitcast <8 x i16> %res to <2 x i64> 1862 ret <2 x i64> %bc 1863} 1864 1865define void @test_mm_lfence() nounwind { 1866; CHECK-LABEL: test_mm_lfence: 1867; CHECK: # %bb.0: 1868; CHECK-NEXT: lfence # encoding: [0x0f,0xae,0xe8] 1869; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1870 call void @llvm.x86.sse2.lfence() 1871 ret void 1872} 1873declare void @llvm.x86.sse2.lfence() nounwind readnone 1874 1875define <2 x double> @test_mm_load_pd(double* %a0) nounwind { 1876; X86-SSE-LABEL: test_mm_load_pd: 1877; X86-SSE: # %bb.0: 1878; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1879; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] 1880; X86-SSE-NEXT: retl # encoding: [0xc3] 1881; 1882; X86-AVX1-LABEL: test_mm_load_pd: 1883; X86-AVX1: # %bb.0: 1884; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1885; X86-AVX1-NEXT: vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00] 1886; X86-AVX1-NEXT: retl # encoding: [0xc3] 1887; 1888; X86-AVX512-LABEL: test_mm_load_pd: 1889; X86-AVX512: # %bb.0: 1890; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1891; X86-AVX512-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00] 1892; X86-AVX512-NEXT: retl # encoding: [0xc3] 1893; 1894; X64-SSE-LABEL: test_mm_load_pd: 1895; X64-SSE: # %bb.0: 1896; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 1897; X64-SSE-NEXT: retq # encoding: [0xc3] 1898; 1899; X64-AVX1-LABEL: test_mm_load_pd: 1900; X64-AVX1: # %bb.0: 1901; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07] 1902; X64-AVX1-NEXT: retq # encoding: [0xc3] 1903; 1904; X64-AVX512-LABEL: test_mm_load_pd: 1905; X64-AVX512: # %bb.0: 1906; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 1907; X64-AVX512-NEXT: retq # encoding: [0xc3] 1908 %arg0 = bitcast double* %a0 to <2 x double>* 1909 %res = load <2 x double>, <2 x double>* %arg0, align 16 1910 ret <2 x double> %res 1911} 1912 1913define <2 x double> @test_mm_load_sd(double* %a0) nounwind { 1914; X86-SSE-LABEL: test_mm_load_sd: 1915; X86-SSE: # %bb.0: 1916; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1917; X86-SSE-NEXT: movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00] 1918; X86-SSE-NEXT: # xmm0 = mem[0],zero 1919; X86-SSE-NEXT: retl # encoding: [0xc3] 1920; 1921; X86-AVX1-LABEL: test_mm_load_sd: 1922; X86-AVX1: # %bb.0: 1923; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1924; X86-AVX1-NEXT: vmovsd (%eax), %xmm0 # encoding: [0xc5,0xfb,0x10,0x00] 1925; X86-AVX1-NEXT: # xmm0 = mem[0],zero 1926; X86-AVX1-NEXT: retl # encoding: [0xc3] 1927; 1928; X86-AVX512-LABEL: test_mm_load_sd: 1929; X86-AVX512: # %bb.0: 1930; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1931; X86-AVX512-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] 1932; X86-AVX512-NEXT: # xmm0 = mem[0],zero 1933; X86-AVX512-NEXT: retl # encoding: [0xc3] 1934; 1935; X64-SSE-LABEL: test_mm_load_sd: 1936; X64-SSE: # %bb.0: 1937; X64-SSE-NEXT: movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07] 1938; X64-SSE-NEXT: # xmm0 = mem[0],zero 1939; X64-SSE-NEXT: retq # encoding: [0xc3] 1940; 1941; X64-AVX1-LABEL: test_mm_load_sd: 1942; X64-AVX1: # %bb.0: 1943; X64-AVX1-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07] 1944; X64-AVX1-NEXT: # xmm0 = mem[0],zero 1945; X64-AVX1-NEXT: retq # encoding: [0xc3] 1946; 1947; X64-AVX512-LABEL: test_mm_load_sd: 1948; X64-AVX512: # %bb.0: 1949; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] 1950; X64-AVX512-NEXT: # xmm0 = mem[0],zero 1951; X64-AVX512-NEXT: retq # encoding: [0xc3] 1952 %ld = load double, double* %a0, align 1 1953 %res0 = insertelement <2 x double> undef, double %ld, i32 0 1954 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1 1955 ret <2 x double> %res1 1956} 1957 1958define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind { 1959; X86-SSE-LABEL: test_mm_load_si128: 1960; X86-SSE: # %bb.0: 1961; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1962; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] 1963; X86-SSE-NEXT: retl # encoding: [0xc3] 1964; 1965; X86-AVX1-LABEL: test_mm_load_si128: 1966; X86-AVX1: # %bb.0: 1967; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1968; X86-AVX1-NEXT: vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00] 1969; X86-AVX1-NEXT: retl # encoding: [0xc3] 1970; 1971; X86-AVX512-LABEL: test_mm_load_si128: 1972; X86-AVX512: # %bb.0: 1973; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1974; X86-AVX512-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00] 1975; X86-AVX512-NEXT: retl # encoding: [0xc3] 1976; 1977; X64-SSE-LABEL: test_mm_load_si128: 1978; X64-SSE: # %bb.0: 1979; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 1980; X64-SSE-NEXT: retq # encoding: [0xc3] 1981; 1982; X64-AVX1-LABEL: test_mm_load_si128: 1983; X64-AVX1: # %bb.0: 1984; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07] 1985; X64-AVX1-NEXT: retq # encoding: [0xc3] 1986; 1987; X64-AVX512-LABEL: test_mm_load_si128: 1988; X64-AVX512: # %bb.0: 1989; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 1990; X64-AVX512-NEXT: retq # encoding: [0xc3] 1991 %res = load <2 x i64>, <2 x i64>* %a0, align 16 1992 ret <2 x i64> %res 1993} 1994 1995define <2 x double> @test_mm_load1_pd(double* %a0) nounwind { 1996; X86-SSE-LABEL: test_mm_load1_pd: 1997; X86-SSE: # %bb.0: 1998; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1999; X86-SSE-NEXT: movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00] 2000; X86-SSE-NEXT: # xmm0 = mem[0],zero 2001; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 2002; X86-SSE-NEXT: # xmm0 = xmm0[0,0] 2003; X86-SSE-NEXT: retl # encoding: [0xc3] 2004; 2005; X86-AVX1-LABEL: test_mm_load1_pd: 2006; X86-AVX1: # %bb.0: 2007; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2008; X86-AVX1-NEXT: vmovddup (%eax), %xmm0 # encoding: [0xc5,0xfb,0x12,0x00] 2009; X86-AVX1-NEXT: # xmm0 = mem[0,0] 2010; X86-AVX1-NEXT: retl # encoding: [0xc3] 2011; 2012; X86-AVX512-LABEL: test_mm_load1_pd: 2013; X86-AVX512: # %bb.0: 2014; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2015; X86-AVX512-NEXT: vmovddup (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x00] 2016; X86-AVX512-NEXT: # xmm0 = mem[0,0] 2017; X86-AVX512-NEXT: retl # encoding: [0xc3] 2018; 2019; X64-SSE-LABEL: test_mm_load1_pd: 2020; X64-SSE: # %bb.0: 2021; X64-SSE-NEXT: movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07] 2022; X64-SSE-NEXT: # xmm0 = mem[0],zero 2023; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 2024; X64-SSE-NEXT: # xmm0 = xmm0[0,0] 2025; X64-SSE-NEXT: retq # encoding: [0xc3] 2026; 2027; X64-AVX1-LABEL: test_mm_load1_pd: 2028; X64-AVX1: # %bb.0: 2029; X64-AVX1-NEXT: vmovddup (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x12,0x07] 2030; X64-AVX1-NEXT: # xmm0 = mem[0,0] 2031; X64-AVX1-NEXT: retq # encoding: [0xc3] 2032; 2033; X64-AVX512-LABEL: test_mm_load1_pd: 2034; X64-AVX512: # %bb.0: 2035; X64-AVX512-NEXT: vmovddup (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x07] 2036; X64-AVX512-NEXT: # xmm0 = mem[0,0] 2037; X64-AVX512-NEXT: retq # encoding: [0xc3] 2038 %ld = load double, double* %a0, align 8 2039 %res0 = insertelement <2 x double> undef, double %ld, i32 0 2040 %res1 = insertelement <2 x double> %res0, double %ld, i32 1 2041 ret <2 x double> %res1 2042} 2043 2044define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind { 2045; X86-SSE-LABEL: test_mm_loadh_pd: 2046; X86-SSE: # %bb.0: 2047; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2048; X86-SSE-NEXT: movhps (%eax), %xmm0 # encoding: [0x0f,0x16,0x00] 2049; X86-SSE-NEXT: # xmm0 = xmm0[0,1],mem[0,1] 2050; X86-SSE-NEXT: retl # encoding: [0xc3] 2051; 2052; X86-AVX1-LABEL: test_mm_loadh_pd: 2053; X86-AVX1: # %bb.0: 2054; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2055; X86-AVX1-NEXT: vmovhps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0x00] 2056; X86-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1] 2057; X86-AVX1-NEXT: retl # encoding: [0xc3] 2058; 2059; X86-AVX512-LABEL: test_mm_loadh_pd: 2060; X86-AVX512: # %bb.0: 2061; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2062; X86-AVX512-NEXT: vmovhps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x00] 2063; X86-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1] 2064; X86-AVX512-NEXT: retl # encoding: [0xc3] 2065; 2066; X64-SSE-LABEL: test_mm_loadh_pd: 2067; X64-SSE: # %bb.0: 2068; X64-SSE-NEXT: movhps (%rdi), %xmm0 # encoding: [0x0f,0x16,0x07] 2069; X64-SSE-NEXT: # xmm0 = xmm0[0,1],mem[0,1] 2070; X64-SSE-NEXT: retq # encoding: [0xc3] 2071; 2072; X64-AVX1-LABEL: test_mm_loadh_pd: 2073; X64-AVX1: # %bb.0: 2074; X64-AVX1-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0x07] 2075; X64-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1] 2076; X64-AVX1-NEXT: retq # encoding: [0xc3] 2077; 2078; X64-AVX512-LABEL: test_mm_loadh_pd: 2079; X64-AVX512: # %bb.0: 2080; X64-AVX512-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x07] 2081; X64-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1] 2082; X64-AVX512-NEXT: retq # encoding: [0xc3] 2083 %ld = load double, double* %a1, align 8 2084 %res = insertelement <2 x double> %a0, double %ld, i32 1 2085 ret <2 x double> %res 2086} 2087 2088define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind { 2089; X86-SSE-LABEL: test_mm_loadl_epi64: 2090; X86-SSE: # %bb.0: 2091; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2092; X86-SSE-NEXT: movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00] 2093; X86-SSE-NEXT: # xmm0 = mem[0],zero 2094; X86-SSE-NEXT: retl # encoding: [0xc3] 2095; 2096; X86-AVX1-LABEL: test_mm_loadl_epi64: 2097; X86-AVX1: # %bb.0: 2098; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2099; X86-AVX1-NEXT: vmovsd (%eax), %xmm0 # encoding: [0xc5,0xfb,0x10,0x00] 2100; X86-AVX1-NEXT: # xmm0 = mem[0],zero 2101; X86-AVX1-NEXT: retl # encoding: [0xc3] 2102; 2103; X86-AVX512-LABEL: test_mm_loadl_epi64: 2104; X86-AVX512: # %bb.0: 2105; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2106; X86-AVX512-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] 2107; X86-AVX512-NEXT: # xmm0 = mem[0],zero 2108; X86-AVX512-NEXT: retl # encoding: [0xc3] 2109; 2110; X64-SSE-LABEL: test_mm_loadl_epi64: 2111; X64-SSE: # %bb.0: 2112; X64-SSE-NEXT: movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07] 2113; X64-SSE-NEXT: # xmm0 = mem[0],zero 2114; X64-SSE-NEXT: retq # encoding: [0xc3] 2115; 2116; X64-AVX1-LABEL: test_mm_loadl_epi64: 2117; X64-AVX1: # %bb.0: 2118; X64-AVX1-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07] 2119; X64-AVX1-NEXT: # xmm0 = mem[0],zero 2120; X64-AVX1-NEXT: retq # encoding: [0xc3] 2121; 2122; X64-AVX512-LABEL: test_mm_loadl_epi64: 2123; X64-AVX512: # %bb.0: 2124; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] 2125; X64-AVX512-NEXT: # xmm0 = mem[0],zero 2126; X64-AVX512-NEXT: retq # encoding: [0xc3] 2127 %bc = bitcast <2 x i64>* %a1 to i64* 2128 %ld = load i64, i64* %bc, align 1 2129 %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0 2130 %res1 = insertelement <2 x i64> %res0, i64 0, i32 1 2131 ret <2 x i64> %res1 2132} 2133 2134define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind { 2135; X86-SSE-LABEL: test_mm_loadl_pd: 2136; X86-SSE: # %bb.0: 2137; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2138; X86-SSE-NEXT: movlps (%eax), %xmm0 # encoding: [0x0f,0x12,0x00] 2139; X86-SSE-NEXT: # xmm0 = mem[0,1],xmm0[2,3] 2140; X86-SSE-NEXT: retl # encoding: [0xc3] 2141; 2142; X86-AVX1-LABEL: test_mm_loadl_pd: 2143; X86-AVX1: # %bb.0: 2144; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2145; X86-AVX1-NEXT: vmovlps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x12,0x00] 2146; X86-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3] 2147; X86-AVX1-NEXT: retl # encoding: [0xc3] 2148; 2149; X86-AVX512-LABEL: test_mm_loadl_pd: 2150; X86-AVX512: # %bb.0: 2151; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2152; X86-AVX512-NEXT: vmovlps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x00] 2153; X86-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3] 2154; X86-AVX512-NEXT: retl # encoding: [0xc3] 2155; 2156; X64-SSE-LABEL: test_mm_loadl_pd: 2157; X64-SSE: # %bb.0: 2158; X64-SSE-NEXT: movlps (%rdi), %xmm0 # encoding: [0x0f,0x12,0x07] 2159; X64-SSE-NEXT: # xmm0 = mem[0,1],xmm0[2,3] 2160; X64-SSE-NEXT: retq # encoding: [0xc3] 2161; 2162; X64-AVX1-LABEL: test_mm_loadl_pd: 2163; X64-AVX1: # %bb.0: 2164; X64-AVX1-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x12,0x07] 2165; X64-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3] 2166; X64-AVX1-NEXT: retq # encoding: [0xc3] 2167; 2168; X64-AVX512-LABEL: test_mm_loadl_pd: 2169; X64-AVX512: # %bb.0: 2170; X64-AVX512-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x07] 2171; X64-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3] 2172; X64-AVX512-NEXT: retq # encoding: [0xc3] 2173 %ld = load double, double* %a1, align 8 2174 %res = insertelement <2 x double> %a0, double %ld, i32 0 2175 ret <2 x double> %res 2176} 2177 2178define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind { 2179; X86-SSE-LABEL: test_mm_loadr_pd: 2180; X86-SSE: # %bb.0: 2181; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2182; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] 2183; X86-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e] 2184; X86-SSE-NEXT: # xmm0 = xmm0[2,3,0,1] 2185; X86-SSE-NEXT: retl # encoding: [0xc3] 2186; 2187; X86-AVX1-LABEL: test_mm_loadr_pd: 2188; X86-AVX1: # %bb.0: 2189; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2190; X86-AVX1-NEXT: vpermilpd $1, (%eax), %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0x00,0x01] 2191; X86-AVX1-NEXT: # xmm0 = mem[1,0] 2192; X86-AVX1-NEXT: retl # encoding: [0xc3] 2193; 2194; X86-AVX512-LABEL: test_mm_loadr_pd: 2195; X86-AVX512: # %bb.0: 2196; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2197; X86-AVX512-NEXT: vpermilpd $1, (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x00,0x01] 2198; X86-AVX512-NEXT: # xmm0 = mem[1,0] 2199; X86-AVX512-NEXT: retl # encoding: [0xc3] 2200; 2201; X64-SSE-LABEL: test_mm_loadr_pd: 2202; X64-SSE: # %bb.0: 2203; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 2204; X64-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e] 2205; X64-SSE-NEXT: # xmm0 = xmm0[2,3,0,1] 2206; X64-SSE-NEXT: retq # encoding: [0xc3] 2207; 2208; X64-AVX1-LABEL: test_mm_loadr_pd: 2209; X64-AVX1: # %bb.0: 2210; X64-AVX1-NEXT: vpermilpd $1, (%rdi), %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01] 2211; X64-AVX1-NEXT: # xmm0 = mem[1,0] 2212; X64-AVX1-NEXT: retq # encoding: [0xc3] 2213; 2214; X64-AVX512-LABEL: test_mm_loadr_pd: 2215; X64-AVX512: # %bb.0: 2216; X64-AVX512-NEXT: vpermilpd $1, (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01] 2217; X64-AVX512-NEXT: # xmm0 = mem[1,0] 2218; X64-AVX512-NEXT: retq # encoding: [0xc3] 2219 %arg0 = bitcast double* %a0 to <2 x double>* 2220 %ld = load <2 x double>, <2 x double>* %arg0, align 16 2221 %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0> 2222 ret <2 x double> %res 2223} 2224 2225define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind { 2226; X86-SSE-LABEL: test_mm_loadu_pd: 2227; X86-SSE: # %bb.0: 2228; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2229; X86-SSE-NEXT: movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00] 2230; X86-SSE-NEXT: retl # encoding: [0xc3] 2231; 2232; X86-AVX1-LABEL: test_mm_loadu_pd: 2233; X86-AVX1: # %bb.0: 2234; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2235; X86-AVX1-NEXT: vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00] 2236; X86-AVX1-NEXT: retl # encoding: [0xc3] 2237; 2238; X86-AVX512-LABEL: test_mm_loadu_pd: 2239; X86-AVX512: # %bb.0: 2240; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2241; X86-AVX512-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] 2242; X86-AVX512-NEXT: retl # encoding: [0xc3] 2243; 2244; X64-SSE-LABEL: test_mm_loadu_pd: 2245; X64-SSE: # %bb.0: 2246; X64-SSE-NEXT: movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07] 2247; X64-SSE-NEXT: retq # encoding: [0xc3] 2248; 2249; X64-AVX1-LABEL: test_mm_loadu_pd: 2250; X64-AVX1: # %bb.0: 2251; X64-AVX1-NEXT: vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07] 2252; X64-AVX1-NEXT: retq # encoding: [0xc3] 2253; 2254; X64-AVX512-LABEL: test_mm_loadu_pd: 2255; X64-AVX512: # %bb.0: 2256; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 2257; X64-AVX512-NEXT: retq # encoding: [0xc3] 2258 %arg0 = bitcast double* %a0 to <2 x double>* 2259 %res = load <2 x double>, <2 x double>* %arg0, align 1 2260 ret <2 x double> %res 2261} 2262 2263define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind { 2264; X86-SSE-LABEL: test_mm_loadu_si128: 2265; X86-SSE: # %bb.0: 2266; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2267; X86-SSE-NEXT: movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00] 2268; X86-SSE-NEXT: retl # encoding: [0xc3] 2269; 2270; X86-AVX1-LABEL: test_mm_loadu_si128: 2271; X86-AVX1: # %bb.0: 2272; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2273; X86-AVX1-NEXT: vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00] 2274; X86-AVX1-NEXT: retl # encoding: [0xc3] 2275; 2276; X86-AVX512-LABEL: test_mm_loadu_si128: 2277; X86-AVX512: # %bb.0: 2278; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2279; X86-AVX512-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] 2280; X86-AVX512-NEXT: retl # encoding: [0xc3] 2281; 2282; X64-SSE-LABEL: test_mm_loadu_si128: 2283; X64-SSE: # %bb.0: 2284; X64-SSE-NEXT: movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07] 2285; X64-SSE-NEXT: retq # encoding: [0xc3] 2286; 2287; X64-AVX1-LABEL: test_mm_loadu_si128: 2288; X64-AVX1: # %bb.0: 2289; X64-AVX1-NEXT: vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07] 2290; X64-AVX1-NEXT: retq # encoding: [0xc3] 2291; 2292; X64-AVX512-LABEL: test_mm_loadu_si128: 2293; X64-AVX512: # %bb.0: 2294; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 2295; X64-AVX512-NEXT: retq # encoding: [0xc3] 2296 %res = load <2 x i64>, <2 x i64>* %a0, align 1 2297 ret <2 x i64> %res 2298} 2299 2300define <2 x i64> @test_mm_loadu_si64(i8* nocapture readonly %A) { 2301; X86-SSE-LABEL: test_mm_loadu_si64: 2302; X86-SSE: # %bb.0: # %entry 2303; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2304; X86-SSE-NEXT: movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00] 2305; X86-SSE-NEXT: # xmm0 = mem[0],zero 2306; X86-SSE-NEXT: retl # encoding: [0xc3] 2307; 2308; X86-AVX1-LABEL: test_mm_loadu_si64: 2309; X86-AVX1: # %bb.0: # %entry 2310; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2311; X86-AVX1-NEXT: vmovsd (%eax), %xmm0 # encoding: [0xc5,0xfb,0x10,0x00] 2312; X86-AVX1-NEXT: # xmm0 = mem[0],zero 2313; X86-AVX1-NEXT: retl # encoding: [0xc3] 2314; 2315; X86-AVX512-LABEL: test_mm_loadu_si64: 2316; X86-AVX512: # %bb.0: # %entry 2317; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2318; X86-AVX512-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] 2319; X86-AVX512-NEXT: # xmm0 = mem[0],zero 2320; X86-AVX512-NEXT: retl # encoding: [0xc3] 2321; 2322; X64-SSE-LABEL: test_mm_loadu_si64: 2323; X64-SSE: # %bb.0: # %entry 2324; X64-SSE-NEXT: movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07] 2325; X64-SSE-NEXT: # xmm0 = mem[0],zero 2326; X64-SSE-NEXT: retq # encoding: [0xc3] 2327; 2328; X64-AVX1-LABEL: test_mm_loadu_si64: 2329; X64-AVX1: # %bb.0: # %entry 2330; X64-AVX1-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07] 2331; X64-AVX1-NEXT: # xmm0 = mem[0],zero 2332; X64-AVX1-NEXT: retq # encoding: [0xc3] 2333; 2334; X64-AVX512-LABEL: test_mm_loadu_si64: 2335; X64-AVX512: # %bb.0: # %entry 2336; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] 2337; X64-AVX512-NEXT: # xmm0 = mem[0],zero 2338; X64-AVX512-NEXT: retq # encoding: [0xc3] 2339entry: 2340 %__v.i = bitcast i8* %A to i64* 2341 %0 = load i64, i64* %__v.i, align 1 2342 %vecinit1.i = insertelement <2 x i64> <i64 undef, i64 0>, i64 %0, i32 0 2343 ret <2 x i64> %vecinit1.i 2344} 2345 2346define <2 x i64> @test_mm_loadu_si32(i8* nocapture readonly %A) { 2347; X86-SSE-LABEL: test_mm_loadu_si32: 2348; X86-SSE: # %bb.0: # %entry 2349; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2350; X86-SSE-NEXT: movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00] 2351; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2352; X86-SSE-NEXT: retl # encoding: [0xc3] 2353; 2354; X86-AVX1-LABEL: test_mm_loadu_si32: 2355; X86-AVX1: # %bb.0: # %entry 2356; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2357; X86-AVX1-NEXT: vmovss (%eax), %xmm0 # encoding: [0xc5,0xfa,0x10,0x00] 2358; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2359; X86-AVX1-NEXT: retl # encoding: [0xc3] 2360; 2361; X86-AVX512-LABEL: test_mm_loadu_si32: 2362; X86-AVX512: # %bb.0: # %entry 2363; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2364; X86-AVX512-NEXT: vmovss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00] 2365; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2366; X86-AVX512-NEXT: retl # encoding: [0xc3] 2367; 2368; X64-SSE-LABEL: test_mm_loadu_si32: 2369; X64-SSE: # %bb.0: # %entry 2370; X64-SSE-NEXT: movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07] 2371; X64-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2372; X64-SSE-NEXT: retq # encoding: [0xc3] 2373; 2374; X64-AVX1-LABEL: test_mm_loadu_si32: 2375; X64-AVX1: # %bb.0: # %entry 2376; X64-AVX1-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07] 2377; X64-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2378; X64-AVX1-NEXT: retq # encoding: [0xc3] 2379; 2380; X64-AVX512-LABEL: test_mm_loadu_si32: 2381; X64-AVX512: # %bb.0: # %entry 2382; X64-AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 2383; X64-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2384; X64-AVX512-NEXT: retq # encoding: [0xc3] 2385entry: 2386 %__v.i = bitcast i8* %A to i32* 2387 %0 = load i32, i32* %__v.i, align 1 2388 %vecinit3.i = insertelement <4 x i32> <i32 undef, i32 0, i32 0, i32 0>, i32 %0, i32 0 2389 %1 = bitcast <4 x i32> %vecinit3.i to <2 x i64> 2390 ret <2 x i64> %1 2391} 2392 2393define <2 x i64> @test_mm_loadu_si16(i8* nocapture readonly %A) { 2394; X86-SSE-LABEL: test_mm_loadu_si16: 2395; X86-SSE: # %bb.0: # %entry 2396; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2397; X86-SSE-NEXT: movzwl (%eax), %eax # encoding: [0x0f,0xb7,0x00] 2398; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 2399; X86-SSE-NEXT: retl # encoding: [0xc3] 2400; 2401; X86-AVX1-LABEL: test_mm_loadu_si16: 2402; X86-AVX1: # %bb.0: # %entry 2403; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2404; X86-AVX1-NEXT: movzwl (%eax), %eax # encoding: [0x0f,0xb7,0x00] 2405; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 2406; X86-AVX1-NEXT: retl # encoding: [0xc3] 2407; 2408; X86-AVX512-LABEL: test_mm_loadu_si16: 2409; X86-AVX512: # %bb.0: # %entry 2410; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2411; X86-AVX512-NEXT: movzwl (%eax), %eax # encoding: [0x0f,0xb7,0x00] 2412; X86-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 2413; X86-AVX512-NEXT: retl # encoding: [0xc3] 2414; 2415; X64-SSE-LABEL: test_mm_loadu_si16: 2416; X64-SSE: # %bb.0: # %entry 2417; X64-SSE-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] 2418; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 2419; X64-SSE-NEXT: retq # encoding: [0xc3] 2420; 2421; X64-AVX1-LABEL: test_mm_loadu_si16: 2422; X64-AVX1: # %bb.0: # %entry 2423; X64-AVX1-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] 2424; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 2425; X64-AVX1-NEXT: retq # encoding: [0xc3] 2426; 2427; X64-AVX512-LABEL: test_mm_loadu_si16: 2428; X64-AVX512: # %bb.0: # %entry 2429; X64-AVX512-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] 2430; X64-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 2431; X64-AVX512-NEXT: retq # encoding: [0xc3] 2432entry: 2433 %__v.i = bitcast i8* %A to i16* 2434 %0 = load i16, i16* %__v.i, align 1 2435 %vecinit7.i = insertelement <8 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %0, i32 0 2436 %1 = bitcast <8 x i16> %vecinit7.i to <2 x i64> 2437 ret <2 x i64> %1 2438} 2439 2440define <2 x i64> @test_mm_madd_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2441; SSE-LABEL: test_mm_madd_epi16: 2442; SSE: # %bb.0: 2443; SSE-NEXT: pmaddwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf5,0xc1] 2444; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2445; 2446; AVX1-LABEL: test_mm_madd_epi16: 2447; AVX1: # %bb.0: 2448; AVX1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf5,0xc1] 2449; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2450; 2451; AVX512-LABEL: test_mm_madd_epi16: 2452; AVX512: # %bb.0: 2453; AVX512-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1] 2454; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2455 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2456 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2457 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %arg0, <8 x i16> %arg1) 2458 %bc = bitcast <4 x i32> %res to <2 x i64> 2459 ret <2 x i64> %bc 2460} 2461declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 2462 2463define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, i8* %a2) nounwind { 2464; X86-SSE-LABEL: test_mm_maskmoveu_si128: 2465; X86-SSE: # %bb.0: 2466; X86-SSE-NEXT: pushl %edi # encoding: [0x57] 2467; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi # encoding: [0x8b,0x7c,0x24,0x08] 2468; X86-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf7,0xc1] 2469; X86-SSE-NEXT: popl %edi # encoding: [0x5f] 2470; X86-SSE-NEXT: retl # encoding: [0xc3] 2471; 2472; X86-AVX-LABEL: test_mm_maskmoveu_si128: 2473; X86-AVX: # %bb.0: 2474; X86-AVX-NEXT: pushl %edi # encoding: [0x57] 2475; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edi # encoding: [0x8b,0x7c,0x24,0x08] 2476; X86-AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1] 2477; X86-AVX-NEXT: popl %edi # encoding: [0x5f] 2478; X86-AVX-NEXT: retl # encoding: [0xc3] 2479; 2480; X64-SSE-LABEL: test_mm_maskmoveu_si128: 2481; X64-SSE: # %bb.0: 2482; X64-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf7,0xc1] 2483; X64-SSE-NEXT: retq # encoding: [0xc3] 2484; 2485; X64-AVX-LABEL: test_mm_maskmoveu_si128: 2486; X64-AVX: # %bb.0: 2487; X64-AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1] 2488; X64-AVX-NEXT: retq # encoding: [0xc3] 2489 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 2490 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 2491 call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2) 2492 ret void 2493} 2494declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind 2495 2496define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2497; SSE-LABEL: test_mm_max_epi16: 2498; SSE: # %bb.0: 2499; SSE-NEXT: pmaxsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xee,0xc1] 2500; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2501; 2502; AVX1-LABEL: test_mm_max_epi16: 2503; AVX1: # %bb.0: 2504; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xee,0xc1] 2505; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2506; 2507; AVX512-LABEL: test_mm_max_epi16: 2508; AVX512: # %bb.0: 2509; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1] 2510; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2511 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2512 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2513 %sel = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %arg0, <8 x i16> %arg1) 2514 %bc = bitcast <8 x i16> %sel to <2 x i64> 2515 ret <2 x i64> %bc 2516} 2517declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>) 2518 2519define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2520; SSE-LABEL: test_mm_max_epu8: 2521; SSE: # %bb.0: 2522; SSE-NEXT: pmaxub %xmm1, %xmm0 # encoding: [0x66,0x0f,0xde,0xc1] 2523; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2524; 2525; AVX1-LABEL: test_mm_max_epu8: 2526; AVX1: # %bb.0: 2527; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xde,0xc1] 2528; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2529; 2530; AVX512-LABEL: test_mm_max_epu8: 2531; AVX512: # %bb.0: 2532; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xde,0xc1] 2533; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2534 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 2535 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 2536 %sel = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %arg0, <16 x i8> %arg1) 2537 %bc = bitcast <16 x i8> %sel to <2 x i64> 2538 ret <2 x i64> %bc 2539} 2540declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>) 2541 2542define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 2543; SSE-LABEL: test_mm_max_pd: 2544; SSE: # %bb.0: 2545; SSE-NEXT: maxpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5f,0xc1] 2546; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2547; 2548; AVX1-LABEL: test_mm_max_pd: 2549; AVX1: # %bb.0: 2550; AVX1-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5f,0xc1] 2551; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2552; 2553; AVX512-LABEL: test_mm_max_pd: 2554; AVX512: # %bb.0: 2555; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5f,0xc1] 2556; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2557 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) 2558 ret <2 x double> %res 2559} 2560declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 2561 2562define <2 x double> @test_mm_max_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 2563; SSE-LABEL: test_mm_max_sd: 2564; SSE: # %bb.0: 2565; SSE-NEXT: maxsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5f,0xc1] 2566; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2567; 2568; AVX1-LABEL: test_mm_max_sd: 2569; AVX1: # %bb.0: 2570; AVX1-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5f,0xc1] 2571; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2572; 2573; AVX512-LABEL: test_mm_max_sd: 2574; AVX512: # %bb.0: 2575; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5f,0xc1] 2576; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2577 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) 2578 ret <2 x double> %res 2579} 2580declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 2581 2582define void @test_mm_mfence() nounwind { 2583; CHECK-LABEL: test_mm_mfence: 2584; CHECK: # %bb.0: 2585; CHECK-NEXT: mfence # encoding: [0x0f,0xae,0xf0] 2586; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2587 call void @llvm.x86.sse2.mfence() 2588 ret void 2589} 2590declare void @llvm.x86.sse2.mfence() nounwind readnone 2591 2592define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2593; SSE-LABEL: test_mm_min_epi16: 2594; SSE: # %bb.0: 2595; SSE-NEXT: pminsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xea,0xc1] 2596; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2597; 2598; AVX1-LABEL: test_mm_min_epi16: 2599; AVX1: # %bb.0: 2600; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xea,0xc1] 2601; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2602; 2603; AVX512-LABEL: test_mm_min_epi16: 2604; AVX512: # %bb.0: 2605; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1] 2606; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2607 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2608 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2609 %sel = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %arg0, <8 x i16> %arg1) 2610 %bc = bitcast <8 x i16> %sel to <2 x i64> 2611 ret <2 x i64> %bc 2612} 2613declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>) 2614 2615define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2616; SSE-LABEL: test_mm_min_epu8: 2617; SSE: # %bb.0: 2618; SSE-NEXT: pminub %xmm1, %xmm0 # encoding: [0x66,0x0f,0xda,0xc1] 2619; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2620; 2621; AVX1-LABEL: test_mm_min_epu8: 2622; AVX1: # %bb.0: 2623; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xda,0xc1] 2624; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2625; 2626; AVX512-LABEL: test_mm_min_epu8: 2627; AVX512: # %bb.0: 2628; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xda,0xc1] 2629; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2630 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 2631 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 2632 %sel = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %arg0, <16 x i8> %arg1) 2633 %bc = bitcast <16 x i8> %sel to <2 x i64> 2634 ret <2 x i64> %bc 2635} 2636declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>) 2637 2638define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 2639; SSE-LABEL: test_mm_min_pd: 2640; SSE: # %bb.0: 2641; SSE-NEXT: minpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5d,0xc1] 2642; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2643; 2644; AVX1-LABEL: test_mm_min_pd: 2645; AVX1: # %bb.0: 2646; AVX1-NEXT: vminpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5d,0xc1] 2647; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2648; 2649; AVX512-LABEL: test_mm_min_pd: 2650; AVX512: # %bb.0: 2651; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5d,0xc1] 2652; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2653 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) 2654 ret <2 x double> %res 2655} 2656declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 2657 2658define <2 x double> @test_mm_min_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 2659; SSE-LABEL: test_mm_min_sd: 2660; SSE: # %bb.0: 2661; SSE-NEXT: minsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5d,0xc1] 2662; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2663; 2664; AVX1-LABEL: test_mm_min_sd: 2665; AVX1: # %bb.0: 2666; AVX1-NEXT: vminsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5d,0xc1] 2667; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2668; 2669; AVX512-LABEL: test_mm_min_sd: 2670; AVX512: # %bb.0: 2671; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5d,0xc1] 2672; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2673 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) 2674 ret <2 x double> %res 2675} 2676declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 2677 2678define <2 x i64> @test_mm_move_epi64(<2 x i64> %a0) nounwind { 2679; SSE-LABEL: test_mm_move_epi64: 2680; SSE: # %bb.0: 2681; SSE-NEXT: movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0] 2682; SSE-NEXT: # xmm0 = xmm0[0],zero 2683; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2684; 2685; AVX1-LABEL: test_mm_move_epi64: 2686; AVX1: # %bb.0: 2687; AVX1-NEXT: vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0] 2688; AVX1-NEXT: # xmm0 = xmm0[0],zero 2689; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2690; 2691; AVX512-LABEL: test_mm_move_epi64: 2692; AVX512: # %bb.0: 2693; AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0] 2694; AVX512-NEXT: # xmm0 = xmm0[0],zero 2695; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2696 %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2> 2697 ret <2 x i64> %res 2698} 2699 2700define <2 x double> @test_mm_move_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 2701; SSE-LABEL: test_mm_move_sd: 2702; SSE: # %bb.0: 2703; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1] 2704; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1] 2705; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2706; 2707; AVX-LABEL: test_mm_move_sd: 2708; AVX: # %bb.0: 2709; AVX-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] 2710; AVX-NEXT: # xmm0 = xmm1[0,1],xmm0[2,3] 2711; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2712 %ext0 = extractelement <2 x double> %a1, i32 0 2713 %res0 = insertelement <2 x double> undef, double %ext0, i32 0 2714 %ext1 = extractelement <2 x double> %a0, i32 1 2715 %res1 = insertelement <2 x double> %res0, double %ext1, i32 1 2716 ret <2 x double> %res1 2717} 2718 2719define i32 @test_mm_movemask_epi8(<2 x i64> %a0) nounwind { 2720; SSE-LABEL: test_mm_movemask_epi8: 2721; SSE: # %bb.0: 2722; SSE-NEXT: pmovmskb %xmm0, %eax # encoding: [0x66,0x0f,0xd7,0xc0] 2723; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2724; 2725; AVX-LABEL: test_mm_movemask_epi8: 2726; AVX: # %bb.0: 2727; AVX-NEXT: vpmovmskb %xmm0, %eax # encoding: [0xc5,0xf9,0xd7,0xc0] 2728; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2729 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 2730 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %arg0) 2731 ret i32 %res 2732} 2733declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone 2734 2735define i32 @test_mm_movemask_pd(<2 x double> %a0) nounwind { 2736; SSE-LABEL: test_mm_movemask_pd: 2737; SSE: # %bb.0: 2738; SSE-NEXT: movmskpd %xmm0, %eax # encoding: [0x66,0x0f,0x50,0xc0] 2739; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2740; 2741; AVX-LABEL: test_mm_movemask_pd: 2742; AVX: # %bb.0: 2743; AVX-NEXT: vmovmskpd %xmm0, %eax # encoding: [0xc5,0xf9,0x50,0xc0] 2744; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2745 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) 2746 ret i32 %res 2747} 2748declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone 2749 2750define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2751; SSE-LABEL: test_mm_mul_epu32: 2752; SSE: # %bb.0: 2753; SSE-NEXT: pmuludq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf4,0xc1] 2754; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2755; 2756; AVX1-LABEL: test_mm_mul_epu32: 2757; AVX1: # %bb.0: 2758; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf4,0xc1] 2759; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2760; 2761; AVX512-LABEL: test_mm_mul_epu32: 2762; AVX512: # %bb.0: 2763; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] 2764; AVX512-NEXT: vpblendd $10, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc2,0x0a] 2765; AVX512-NEXT: # xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 2766; AVX512-NEXT: vpblendd $10, %xmm2, %xmm1, %xmm1 # encoding: [0xc4,0xe3,0x71,0x02,0xca,0x0a] 2767; AVX512-NEXT: # xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 2768; AVX512-NEXT: vpmullq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1] 2769; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2770 %A = and <2 x i64> %a0, <i64 4294967295, i64 4294967295> 2771 %B = and <2 x i64> %a1, <i64 4294967295, i64 4294967295> 2772 %res = mul nuw <2 x i64> %A, %B 2773 ret <2 x i64> %res 2774} 2775 2776define <2 x double> @test_mm_mul_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 2777; SSE-LABEL: test_mm_mul_pd: 2778; SSE: # %bb.0: 2779; SSE-NEXT: mulpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x59,0xc1] 2780; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2781; 2782; AVX1-LABEL: test_mm_mul_pd: 2783; AVX1: # %bb.0: 2784; AVX1-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x59,0xc1] 2785; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2786; 2787; AVX512-LABEL: test_mm_mul_pd: 2788; AVX512: # %bb.0: 2789; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x59,0xc1] 2790; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2791 %res = fmul <2 x double> %a0, %a1 2792 ret <2 x double> %res 2793} 2794 2795define <2 x double> @test_mm_mul_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 2796; SSE-LABEL: test_mm_mul_sd: 2797; SSE: # %bb.0: 2798; SSE-NEXT: mulsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x59,0xc1] 2799; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2800; 2801; AVX1-LABEL: test_mm_mul_sd: 2802; AVX1: # %bb.0: 2803; AVX1-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x59,0xc1] 2804; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2805; 2806; AVX512-LABEL: test_mm_mul_sd: 2807; AVX512: # %bb.0: 2808; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x59,0xc1] 2809; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2810 %ext0 = extractelement <2 x double> %a0, i32 0 2811 %ext1 = extractelement <2 x double> %a1, i32 0 2812 %fmul = fmul double %ext0, %ext1 2813 %res = insertelement <2 x double> %a0, double %fmul, i32 0 2814 ret <2 x double> %res 2815} 2816 2817define <2 x i64> @test_mm_mulhi_epi16(<2 x i64> %a0, <2 x i64> %a1) { 2818; SSE-LABEL: test_mm_mulhi_epi16: 2819; SSE: # %bb.0: 2820; SSE-NEXT: pmulhw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe5,0xc1] 2821; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2822; 2823; AVX1-LABEL: test_mm_mulhi_epi16: 2824; AVX1: # %bb.0: 2825; AVX1-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe5,0xc1] 2826; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2827; 2828; AVX512-LABEL: test_mm_mulhi_epi16: 2829; AVX512: # %bb.0: 2830; AVX512-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1] 2831; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2832 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2833 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2834 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %arg0, <8 x i16> %arg1) 2835 %bc = bitcast <8 x i16> %res to <2 x i64> 2836 ret <2 x i64> %bc 2837} 2838declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone 2839 2840define <2 x i64> @test_mm_mulhi_epu16(<2 x i64> %a0, <2 x i64> %a1) { 2841; SSE-LABEL: test_mm_mulhi_epu16: 2842; SSE: # %bb.0: 2843; SSE-NEXT: pmulhuw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe4,0xc1] 2844; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2845; 2846; AVX1-LABEL: test_mm_mulhi_epu16: 2847; AVX1: # %bb.0: 2848; AVX1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe4,0xc1] 2849; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2850; 2851; AVX512-LABEL: test_mm_mulhi_epu16: 2852; AVX512: # %bb.0: 2853; AVX512-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1] 2854; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2855 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2856 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2857 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %arg0, <8 x i16> %arg1) 2858 %bc = bitcast <8 x i16> %res to <2 x i64> 2859 ret <2 x i64> %bc 2860} 2861declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone 2862 2863define <2 x i64> @test_mm_mullo_epi16(<2 x i64> %a0, <2 x i64> %a1) { 2864; SSE-LABEL: test_mm_mullo_epi16: 2865; SSE: # %bb.0: 2866; SSE-NEXT: pmullw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd5,0xc1] 2867; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2868; 2869; AVX1-LABEL: test_mm_mullo_epi16: 2870; AVX1: # %bb.0: 2871; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd5,0xc1] 2872; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2873; 2874; AVX512-LABEL: test_mm_mullo_epi16: 2875; AVX512: # %bb.0: 2876; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0xc1] 2877; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2878 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2879 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2880 %res = mul <8 x i16> %arg0, %arg1 2881 %bc = bitcast <8 x i16> %res to <2 x i64> 2882 ret <2 x i64> %bc 2883} 2884 2885define <2 x double> @test_mm_or_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 2886; SSE-LABEL: test_mm_or_pd: 2887; SSE: # %bb.0: 2888; SSE-NEXT: orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1] 2889; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2890; 2891; AVX1-LABEL: test_mm_or_pd: 2892; AVX1: # %bb.0: 2893; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1] 2894; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2895; 2896; AVX512-LABEL: test_mm_or_pd: 2897; AVX512: # %bb.0: 2898; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1] 2899; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2900 %arg0 = bitcast <2 x double> %a0 to <4 x i32> 2901 %arg1 = bitcast <2 x double> %a1 to <4 x i32> 2902 %res = or <4 x i32> %arg0, %arg1 2903 %bc = bitcast <4 x i32> %res to <2 x double> 2904 ret <2 x double> %bc 2905} 2906 2907define <2 x i64> @test_mm_or_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2908; SSE-LABEL: test_mm_or_si128: 2909; SSE: # %bb.0: 2910; SSE-NEXT: orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1] 2911; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2912; 2913; AVX1-LABEL: test_mm_or_si128: 2914; AVX1: # %bb.0: 2915; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1] 2916; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2917; 2918; AVX512-LABEL: test_mm_or_si128: 2919; AVX512: # %bb.0: 2920; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1] 2921; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2922 %res = or <2 x i64> %a0, %a1 2923 ret <2 x i64> %res 2924} 2925 2926define <2 x i64> @test_mm_packs_epi16(<2 x i64> %a0, <2 x i64> %a1) { 2927; SSE-LABEL: test_mm_packs_epi16: 2928; SSE: # %bb.0: 2929; SSE-NEXT: packsswb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x63,0xc1] 2930; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2931; 2932; AVX1-LABEL: test_mm_packs_epi16: 2933; AVX1: # %bb.0: 2934; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x63,0xc1] 2935; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2936; 2937; AVX512-LABEL: test_mm_packs_epi16: 2938; AVX512: # %bb.0: 2939; AVX512-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1] 2940; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2941 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2942 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2943 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %arg0, <8 x i16> %arg1) 2944 %bc = bitcast <16 x i8> %res to <2 x i64> 2945 ret <2 x i64> %bc 2946} 2947declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 2948 2949define <2 x i64> @test_mm_packs_epi32(<2 x i64> %a0, <2 x i64> %a1) { 2950; SSE-LABEL: test_mm_packs_epi32: 2951; SSE: # %bb.0: 2952; SSE-NEXT: packssdw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6b,0xc1] 2953; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2954; 2955; AVX1-LABEL: test_mm_packs_epi32: 2956; AVX1: # %bb.0: 2957; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x6b,0xc1] 2958; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2959; 2960; AVX512-LABEL: test_mm_packs_epi32: 2961; AVX512: # %bb.0: 2962; AVX512-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1] 2963; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2964 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 2965 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 2966 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %arg0, <4 x i32> %arg1) 2967 %bc = bitcast <8 x i16> %res to <2 x i64> 2968 ret <2 x i64> %bc 2969} 2970declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 2971 2972define <2 x i64> @test_mm_packus_epi16(<2 x i64> %a0, <2 x i64> %a1) { 2973; SSE-LABEL: test_mm_packus_epi16: 2974; SSE: # %bb.0: 2975; SSE-NEXT: packuswb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x67,0xc1] 2976; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2977; 2978; AVX1-LABEL: test_mm_packus_epi16: 2979; AVX1: # %bb.0: 2980; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x67,0xc1] 2981; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2982; 2983; AVX512-LABEL: test_mm_packus_epi16: 2984; AVX512: # %bb.0: 2985; AVX512-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1] 2986; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2987 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2988 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2989 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %arg0, <8 x i16> %arg1) 2990 %bc = bitcast <16 x i8> %res to <2 x i64> 2991 ret <2 x i64> %bc 2992} 2993declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 2994 2995define void @test_mm_pause() nounwind { 2996; CHECK-LABEL: test_mm_pause: 2997; CHECK: # %bb.0: 2998; CHECK-NEXT: pause # encoding: [0xf3,0x90] 2999; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3000 call void @llvm.x86.sse2.pause() 3001 ret void 3002} 3003declare void @llvm.x86.sse2.pause() nounwind readnone 3004 3005define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 3006; SSE-LABEL: test_mm_sad_epu8: 3007; SSE: # %bb.0: 3008; SSE-NEXT: psadbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf6,0xc1] 3009; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3010; 3011; AVX1-LABEL: test_mm_sad_epu8: 3012; AVX1: # %bb.0: 3013; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf6,0xc1] 3014; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3015; 3016; AVX512-LABEL: test_mm_sad_epu8: 3017; AVX512: # %bb.0: 3018; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf6,0xc1] 3019; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3020 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 3021 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 3022 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %arg0, <16 x i8> %arg1) 3023 ret <2 x i64> %res 3024} 3025declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone 3026 3027define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind { 3028; X86-SSE-LABEL: test_mm_set_epi8: 3029; X86-SSE: # %bb.0: 3030; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3031; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3032; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3033; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 3034; X86-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 3035; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3036; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 3037; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3038; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 3039; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3040; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 3041; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3042; X86-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] 3043; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 3044; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] 3045; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3046; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 3047; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 3048; X86-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 3049; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 3050; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] 3051; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3052; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 3053; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 3054; X86-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 3055; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3056; X86-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb] 3057; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 3058; X86-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca] 3059; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3060; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] 3061; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3062; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 3063; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3064; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 3065; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3066; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] 3067; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3068; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 3069; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 3070; X86-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 3071; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 3072; X86-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda] 3073; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 3074; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] 3075; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3076; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 3077; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3078; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 3079; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3080; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] 3081; X86-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] 3082; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 3083; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3084; X86-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4] 3085; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 3086; X86-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2] 3087; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 3088; X86-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3] 3089; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3090; X86-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 3091; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3092; X86-SSE-NEXT: retl # encoding: [0xc3] 3093; 3094; X86-AVX1-LABEL: test_mm_set_epi8: 3095; X86-AVX1: # %bb.0: 3096; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] 3097; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x40] 3098; X86-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] 3099; X86-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 3100; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 3101; X86-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 3102; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] 3103; X86-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 3104; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 3105; X86-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 3106; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] 3107; X86-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 3108; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 3109; X86-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 3110; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] 3111; X86-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 3112; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 3113; X86-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 3114; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] 3115; X86-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 3116; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 3117; X86-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 3118; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] 3119; X86-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 3120; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 3121; X86-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 3122; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 3123; X86-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 3124; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3125; X86-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 3126; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3127; X86-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 3128; X86-AVX1-NEXT: retl # encoding: [0xc3] 3129; 3130; X86-AVX512-LABEL: test_mm_set_epi8: 3131; X86-AVX512: # %bb.0: 3132; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] 3133; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x40] 3134; X86-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 3135; X86-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 3136; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 3137; X86-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 3138; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] 3139; X86-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 3140; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 3141; X86-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 3142; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] 3143; X86-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 3144; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 3145; X86-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 3146; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] 3147; X86-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 3148; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 3149; X86-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 3150; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] 3151; X86-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 3152; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 3153; X86-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 3154; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] 3155; X86-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 3156; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 3157; X86-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 3158; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 3159; X86-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 3160; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3161; X86-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 3162; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3163; X86-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 3164; X86-AVX512-NEXT: retl # encoding: [0xc3] 3165; 3166; X64-SSE-LABEL: test_mm_set_epi8: 3167; X64-SSE: # %bb.0: 3168; X64-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] 3169; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3170; X64-SSE-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] 3171; X64-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 3172; X64-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 3173; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3174; X64-SSE-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] 3175; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3176; X64-SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3177; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3178; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 3179; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3180; X64-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] 3181; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 3182; X64-SSE-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] 3183; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3184; X64-SSE-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] 3185; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 3186; X64-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 3187; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 3188; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3189; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3190; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 3191; X64-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 3192; X64-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 3193; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3194; X64-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb] 3195; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 3196; X64-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca] 3197; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3198; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 3199; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3200; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 3201; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3202; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 3203; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3204; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 3205; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3206; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 3207; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 3208; X64-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 3209; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 3210; X64-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda] 3211; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 3212; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 3213; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3214; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 3215; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3216; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 3217; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3218; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48] 3219; X64-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] 3220; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] 3221; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3222; X64-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4] 3223; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 3224; X64-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2] 3225; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 3226; X64-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3] 3227; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3228; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 3229; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3230; X64-SSE-NEXT: retq # encoding: [0xc3] 3231; 3232; X64-AVX1-LABEL: test_mm_set_epi8: 3233; X64-AVX1: # %bb.0: 3234; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb6,0x54,0x24,0x48] 3235; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] 3236; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 3237; X64-AVX1-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01] 3238; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 3239; X64-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 3240; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 3241; X64-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 3242; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 3243; X64-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 3244; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 3245; X64-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 3246; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 3247; X64-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 3248; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 3249; X64-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 3250; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 3251; X64-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 3252; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3253; X64-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 3254; X64-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] 3255; X64-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 3256; X64-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] 3257; X64-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 3258; X64-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3259; X64-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 3260; X64-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] 3261; X64-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 3262; X64-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] 3263; X64-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 3264; X64-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] 3265; X64-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 3266; X64-AVX1-NEXT: retq # encoding: [0xc3] 3267; 3268; X64-AVX512-LABEL: test_mm_set_epi8: 3269; X64-AVX512: # %bb.0: 3270; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb6,0x54,0x24,0x48] 3271; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] 3272; X64-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 3273; X64-AVX512-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01] 3274; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 3275; X64-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 3276; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 3277; X64-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 3278; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 3279; X64-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 3280; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 3281; X64-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 3282; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 3283; X64-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 3284; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 3285; X64-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 3286; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 3287; X64-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 3288; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3289; X64-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 3290; X64-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] 3291; X64-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 3292; X64-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] 3293; X64-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 3294; X64-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3295; X64-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 3296; X64-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] 3297; X64-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 3298; X64-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] 3299; X64-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 3300; X64-AVX512-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] 3301; X64-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 3302; X64-AVX512-NEXT: retq # encoding: [0xc3] 3303 %res0 = insertelement <16 x i8> undef, i8 %a15, i32 0 3304 %res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1 3305 %res2 = insertelement <16 x i8> %res1, i8 %a13, i32 2 3306 %res3 = insertelement <16 x i8> %res2, i8 %a12, i32 3 3307 %res4 = insertelement <16 x i8> %res3, i8 %a11, i32 4 3308 %res5 = insertelement <16 x i8> %res4, i8 %a10, i32 5 3309 %res6 = insertelement <16 x i8> %res5, i8 %a9 , i32 6 3310 %res7 = insertelement <16 x i8> %res6, i8 %a8 , i32 7 3311 %res8 = insertelement <16 x i8> %res7, i8 %a7 , i32 8 3312 %res9 = insertelement <16 x i8> %res8, i8 %a6 , i32 9 3313 %res10 = insertelement <16 x i8> %res9, i8 %a5 , i32 10 3314 %res11 = insertelement <16 x i8> %res10, i8 %a4 , i32 11 3315 %res12 = insertelement <16 x i8> %res11, i8 %a3 , i32 12 3316 %res13 = insertelement <16 x i8> %res12, i8 %a2 , i32 13 3317 %res14 = insertelement <16 x i8> %res13, i8 %a1 , i32 14 3318 %res15 = insertelement <16 x i8> %res14, i8 %a0 , i32 15 3319 %res = bitcast <16 x i8> %res15 to <2 x i64> 3320 ret <2 x i64> %res 3321} 3322 3323define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { 3324; X86-SSE-LABEL: test_mm_set_epi16: 3325; X86-SSE: # %bb.0: 3326; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 3327; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 3328; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 3329; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3330; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] 3331; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 3332; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 3333; X86-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] 3334; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] 3335; X86-SSE-NEXT: movd %eax, %xmm5 # encoding: [0x66,0x0f,0x6e,0xe8] 3336; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] 3337; X86-SSE-NEXT: movd %eax, %xmm6 # encoding: [0x66,0x0f,0x6e,0xf0] 3338; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] 3339; X86-SSE-NEXT: movd %eax, %xmm7 # encoding: [0x66,0x0f,0x6e,0xf8] 3340; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] 3341; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3342; X86-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] 3343; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 3344; X86-SSE-NEXT: punpcklwd %xmm3, %xmm4 # encoding: [0x66,0x0f,0x61,0xe3] 3345; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 3346; X86-SSE-NEXT: punpckldq %xmm2, %xmm4 # encoding: [0x66,0x0f,0x62,0xe2] 3347; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 3348; X86-SSE-NEXT: punpcklwd %xmm5, %xmm6 # encoding: [0x66,0x0f,0x61,0xf5] 3349; X86-SSE-NEXT: # xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] 3350; X86-SSE-NEXT: punpcklwd %xmm7, %xmm0 # encoding: [0x66,0x0f,0x61,0xc7] 3351; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] 3352; X86-SSE-NEXT: punpckldq %xmm6, %xmm0 # encoding: [0x66,0x0f,0x62,0xc6] 3353; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1] 3354; X86-SSE-NEXT: punpcklqdq %xmm4, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc4] 3355; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0] 3356; X86-SSE-NEXT: retl # encoding: [0xc3] 3357; 3358; X86-AVX1-LABEL: test_mm_set_epi16: 3359; X86-AVX1: # %bb.0: 3360; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] 3361; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 3362; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] 3363; X86-AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 3364; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] 3365; X86-AVX1-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 3366; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] 3367; X86-AVX1-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x03] 3368; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 3369; X86-AVX1-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 3370; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] 3371; X86-AVX1-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 3372; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 3373; X86-AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 3374; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 3375; X86-AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 3376; X86-AVX1-NEXT: retl # encoding: [0xc3] 3377; 3378; X86-AVX512-LABEL: test_mm_set_epi16: 3379; X86-AVX512: # %bb.0: 3380; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] 3381; X86-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 3382; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] 3383; X86-AVX512-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 3384; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] 3385; X86-AVX512-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 3386; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] 3387; X86-AVX512-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] 3388; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 3389; X86-AVX512-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 3390; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] 3391; X86-AVX512-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 3392; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 3393; X86-AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 3394; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 3395; X86-AVX512-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 3396; X86-AVX512-NEXT: retl # encoding: [0xc3] 3397; 3398; X64-SSE-LABEL: test_mm_set_epi16: 3399; X64-SSE: # %bb.0: 3400; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10] 3401; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 3402; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 3403; X64-SSE-NEXT: movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce] 3404; X64-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] 3405; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3406; X64-SSE-NEXT: movd %edx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc2] 3407; X64-SSE-NEXT: movd %ecx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd1] 3408; X64-SSE-NEXT: punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0] 3409; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 3410; X64-SSE-NEXT: punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1] 3411; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 3412; X64-SSE-NEXT: movd %r8d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc0] 3413; X64-SSE-NEXT: movd %r9d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xc9] 3414; X64-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] 3415; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3416; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 3417; X64-SSE-NEXT: movd %r10d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc2] 3418; X64-SSE-NEXT: punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3] 3419; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 3420; X64-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1] 3421; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3422; X64-SSE-NEXT: punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2] 3423; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0] 3424; X64-SSE-NEXT: retq # encoding: [0xc3] 3425; 3426; X64-AVX1-LABEL: test_mm_set_epi16: 3427; X64-AVX1: # %bb.0: 3428; X64-AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 3429; X64-AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08] 3430; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 3431; X64-AVX1-NEXT: vpinsrw $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01] 3432; X64-AVX1-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02] 3433; X64-AVX1-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03] 3434; X64-AVX1-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 3435; X64-AVX1-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x05] 3436; X64-AVX1-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x06] 3437; X64-AVX1-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 3438; X64-AVX1-NEXT: retq # encoding: [0xc3] 3439; 3440; X64-AVX512-LABEL: test_mm_set_epi16: 3441; X64-AVX512: # %bb.0: 3442; X64-AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 3443; X64-AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08] 3444; X64-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 3445; X64-AVX512-NEXT: vpinsrw $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01] 3446; X64-AVX512-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02] 3447; X64-AVX512-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03] 3448; X64-AVX512-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 3449; X64-AVX512-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x05] 3450; X64-AVX512-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x06] 3451; X64-AVX512-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 3452; X64-AVX512-NEXT: retq # encoding: [0xc3] 3453 %res0 = insertelement <8 x i16> undef, i16 %a7, i32 0 3454 %res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1 3455 %res2 = insertelement <8 x i16> %res1, i16 %a5, i32 2 3456 %res3 = insertelement <8 x i16> %res2, i16 %a4, i32 3 3457 %res4 = insertelement <8 x i16> %res3, i16 %a3, i32 4 3458 %res5 = insertelement <8 x i16> %res4, i16 %a2, i32 5 3459 %res6 = insertelement <8 x i16> %res5, i16 %a1, i32 6 3460 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7 3461 %res = bitcast <8 x i16> %res7 to <2 x i64> 3462 ret <2 x i64> %res 3463} 3464 3465define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { 3466; X86-SSE-LABEL: test_mm_set_epi32: 3467; X86-SSE: # %bb.0: 3468; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 3469; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 3470; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x08] 3471; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 3472; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 3473; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 3474; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x0c] 3475; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 3476; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] 3477; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 3478; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] 3479; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3480; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 3481; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3482; X86-SSE-NEXT: retl # encoding: [0xc3] 3483; 3484; X86-AVX1-LABEL: test_mm_set_epi32: 3485; X86-AVX1: # %bb.0: 3486; X86-AVX1-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x10] 3487; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 3488; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x01] 3489; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x02] 3490; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x03] 3491; X86-AVX1-NEXT: retl # encoding: [0xc3] 3492; 3493; X86-AVX512-LABEL: test_mm_set_epi32: 3494; X86-AVX512: # %bb.0: 3495; X86-AVX512-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x10] 3496; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 3497; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x01] 3498; X86-AVX512-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x02] 3499; X86-AVX512-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x03] 3500; X86-AVX512-NEXT: retl # encoding: [0xc3] 3501; 3502; X64-SSE-LABEL: test_mm_set_epi32: 3503; X64-SSE: # %bb.0: 3504; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 3505; X64-SSE-NEXT: movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce] 3506; X64-SSE-NEXT: punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8] 3507; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 3508; X64-SSE-NEXT: movd %edx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd2] 3509; X64-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1] 3510; X64-SSE-NEXT: punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2] 3511; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3512; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 3513; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3514; X64-SSE-NEXT: retq # encoding: [0xc3] 3515; 3516; X64-AVX1-LABEL: test_mm_set_epi32: 3517; X64-AVX1: # %bb.0: 3518; X64-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] 3519; X64-AVX1-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01] 3520; X64-AVX1-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02] 3521; X64-AVX1-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] 3522; X64-AVX1-NEXT: retq # encoding: [0xc3] 3523; 3524; X64-AVX512-LABEL: test_mm_set_epi32: 3525; X64-AVX512: # %bb.0: 3526; X64-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 3527; X64-AVX512-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01] 3528; X64-AVX512-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02] 3529; X64-AVX512-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] 3530; X64-AVX512-NEXT: retq # encoding: [0xc3] 3531 %res0 = insertelement <4 x i32> undef, i32 %a3, i32 0 3532 %res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1 3533 %res2 = insertelement <4 x i32> %res1, i32 %a1, i32 2 3534 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3 3535 %res = bitcast <4 x i32> %res3 to <2 x i64> 3536 ret <2 x i64> %res 3537} 3538 3539; TODO test_mm_set_epi64 3540 3541define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind { 3542; X86-SSE-LABEL: test_mm_set_epi64x: 3543; X86-SSE: # %bb.0: 3544; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x04] 3545; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 3546; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x08] 3547; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 3548; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 3549; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 3550; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x0c] 3551; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 3552; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x10] 3553; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 3554; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] 3555; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3556; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 3557; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3558; X86-SSE-NEXT: retl # encoding: [0xc3] 3559; 3560; X86-AVX1-LABEL: test_mm_set_epi64x: 3561; X86-AVX1: # %bb.0: 3562; X86-AVX1-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x0c] 3563; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 3564; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x01] 3565; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x02] 3566; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x03] 3567; X86-AVX1-NEXT: retl # encoding: [0xc3] 3568; 3569; X86-AVX512-LABEL: test_mm_set_epi64x: 3570; X86-AVX512: # %bb.0: 3571; X86-AVX512-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x0c] 3572; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 3573; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x01] 3574; X86-AVX512-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x02] 3575; X86-AVX512-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x03] 3576; X86-AVX512-NEXT: retl # encoding: [0xc3] 3577; 3578; X64-SSE-LABEL: test_mm_set_epi64x: 3579; X64-SSE: # %bb.0: 3580; X64-SSE-NEXT: movq %rdi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xcf] 3581; X64-SSE-NEXT: movq %rsi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc6] 3582; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 3583; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3584; X64-SSE-NEXT: retq # encoding: [0xc3] 3585; 3586; X64-AVX1-LABEL: test_mm_set_epi64x: 3587; X64-AVX1: # %bb.0: 3588; X64-AVX1-NEXT: vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] 3589; X64-AVX1-NEXT: vmovq %rsi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xce] 3590; X64-AVX1-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0] 3591; X64-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] 3592; X64-AVX1-NEXT: retq # encoding: [0xc3] 3593; 3594; X64-AVX512-LABEL: test_mm_set_epi64x: 3595; X64-AVX512: # %bb.0: 3596; X64-AVX512-NEXT: vmovq %rdi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] 3597; X64-AVX512-NEXT: vmovq %rsi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xce] 3598; X64-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0] 3599; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] 3600; X64-AVX512-NEXT: retq # encoding: [0xc3] 3601 %res0 = insertelement <2 x i64> undef, i64 %a1, i32 0 3602 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1 3603 ret <2 x i64> %res1 3604} 3605 3606define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind { 3607; X86-SSE-LABEL: test_mm_set_pd: 3608; X86-SSE: # %bb.0: 3609; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x0c] 3610; X86-SSE-NEXT: # xmm0 = mem[0],zero 3611; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf2,0x0f,0x10,0x4c,0x24,0x04] 3612; X86-SSE-NEXT: # xmm1 = mem[0],zero 3613; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 3614; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3615; X86-SSE-NEXT: retl # encoding: [0xc3] 3616; 3617; X86-AVX1-LABEL: test_mm_set_pd: 3618; X86-AVX1: # %bb.0: 3619; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c] 3620; X86-AVX1-NEXT: # xmm0 = mem[0],zero 3621; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04] 3622; X86-AVX1-NEXT: # xmm1 = mem[0],zero 3623; X86-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] 3624; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] 3625; X86-AVX1-NEXT: retl # encoding: [0xc3] 3626; 3627; X86-AVX512-LABEL: test_mm_set_pd: 3628; X86-AVX512: # %bb.0: 3629; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c] 3630; X86-AVX512-NEXT: # xmm0 = mem[0],zero 3631; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04] 3632; X86-AVX512-NEXT: # xmm1 = mem[0],zero 3633; X86-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 3634; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] 3635; X86-AVX512-NEXT: retl # encoding: [0xc3] 3636; 3637; X64-SSE-LABEL: test_mm_set_pd: 3638; X64-SSE: # %bb.0: 3639; X64-SSE-NEXT: movlhps %xmm0, %xmm1 # encoding: [0x0f,0x16,0xc8] 3640; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0] 3641; X64-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 3642; X64-SSE-NEXT: retq # encoding: [0xc3] 3643; 3644; X64-AVX1-LABEL: test_mm_set_pd: 3645; X64-AVX1: # %bb.0: 3646; X64-AVX1-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0x16,0xc0] 3647; X64-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] 3648; X64-AVX1-NEXT: retq # encoding: [0xc3] 3649; 3650; X64-AVX512-LABEL: test_mm_set_pd: 3651; X64-AVX512: # %bb.0: 3652; X64-AVX512-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0] 3653; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] 3654; X64-AVX512-NEXT: retq # encoding: [0xc3] 3655 %res0 = insertelement <2 x double> undef, double %a1, i32 0 3656 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 3657 ret <2 x double> %res1 3658} 3659 3660define <2 x double> @test_mm_set_pd1(double %a0) nounwind { 3661; X86-SSE-LABEL: test_mm_set_pd1: 3662; X86-SSE: # %bb.0: 3663; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04] 3664; X86-SSE-NEXT: # xmm0 = mem[0],zero 3665; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 3666; X86-SSE-NEXT: # xmm0 = xmm0[0,0] 3667; X86-SSE-NEXT: retl # encoding: [0xc3] 3668; 3669; X86-AVX1-LABEL: test_mm_set_pd1: 3670; X86-AVX1: # %bb.0: 3671; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04] 3672; X86-AVX1-NEXT: # xmm0 = mem[0],zero 3673; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 3674; X86-AVX1-NEXT: # xmm0 = xmm0[0,0] 3675; X86-AVX1-NEXT: retl # encoding: [0xc3] 3676; 3677; X86-AVX512-LABEL: test_mm_set_pd1: 3678; X86-AVX512: # %bb.0: 3679; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04] 3680; X86-AVX512-NEXT: # xmm0 = mem[0],zero 3681; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 3682; X86-AVX512-NEXT: # xmm0 = xmm0[0,0] 3683; X86-AVX512-NEXT: retl # encoding: [0xc3] 3684; 3685; X64-SSE-LABEL: test_mm_set_pd1: 3686; X64-SSE: # %bb.0: 3687; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 3688; X64-SSE-NEXT: # xmm0 = xmm0[0,0] 3689; X64-SSE-NEXT: retq # encoding: [0xc3] 3690; 3691; X64-AVX1-LABEL: test_mm_set_pd1: 3692; X64-AVX1: # %bb.0: 3693; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 3694; X64-AVX1-NEXT: # xmm0 = xmm0[0,0] 3695; X64-AVX1-NEXT: retq # encoding: [0xc3] 3696; 3697; X64-AVX512-LABEL: test_mm_set_pd1: 3698; X64-AVX512: # %bb.0: 3699; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 3700; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] 3701; X64-AVX512-NEXT: retq # encoding: [0xc3] 3702 %res0 = insertelement <2 x double> undef, double %a0, i32 0 3703 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 3704 ret <2 x double> %res1 3705} 3706 3707define <2 x double> @test_mm_set_sd(double %a0) nounwind { 3708; X86-SSE-LABEL: test_mm_set_sd: 3709; X86-SSE: # %bb.0: 3710; X86-SSE-NEXT: movq {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x7e,0x44,0x24,0x04] 3711; X86-SSE-NEXT: # xmm0 = mem[0],zero 3712; X86-SSE-NEXT: movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0] 3713; X86-SSE-NEXT: # xmm0 = xmm0[0],zero 3714; X86-SSE-NEXT: retl # encoding: [0xc3] 3715; 3716; X86-AVX1-LABEL: test_mm_set_sd: 3717; X86-AVX1: # %bb.0: 3718; X86-AVX1-NEXT: vmovq {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x7e,0x44,0x24,0x04] 3719; X86-AVX1-NEXT: # xmm0 = mem[0],zero 3720; X86-AVX1-NEXT: vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0] 3721; X86-AVX1-NEXT: # xmm0 = xmm0[0],zero 3722; X86-AVX1-NEXT: retl # encoding: [0xc3] 3723; 3724; X86-AVX512-LABEL: test_mm_set_sd: 3725; X86-AVX512: # %bb.0: 3726; X86-AVX512-NEXT: vmovq {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x44,0x24,0x04] 3727; X86-AVX512-NEXT: # xmm0 = mem[0],zero 3728; X86-AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0] 3729; X86-AVX512-NEXT: # xmm0 = xmm0[0],zero 3730; X86-AVX512-NEXT: retl # encoding: [0xc3] 3731; 3732; X64-SSE-LABEL: test_mm_set_sd: 3733; X64-SSE: # %bb.0: 3734; X64-SSE-NEXT: movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0] 3735; X64-SSE-NEXT: # xmm0 = xmm0[0],zero 3736; X64-SSE-NEXT: retq # encoding: [0xc3] 3737; 3738; X64-AVX1-LABEL: test_mm_set_sd: 3739; X64-AVX1: # %bb.0: 3740; X64-AVX1-NEXT: vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0] 3741; X64-AVX1-NEXT: # xmm0 = xmm0[0],zero 3742; X64-AVX1-NEXT: retq # encoding: [0xc3] 3743; 3744; X64-AVX512-LABEL: test_mm_set_sd: 3745; X64-AVX512: # %bb.0: 3746; X64-AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0] 3747; X64-AVX512-NEXT: # xmm0 = xmm0[0],zero 3748; X64-AVX512-NEXT: retq # encoding: [0xc3] 3749 %res0 = insertelement <2 x double> undef, double %a0, i32 0 3750 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1 3751 ret <2 x double> %res1 3752} 3753 3754define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind { 3755; X86-SSE-LABEL: test_mm_set1_epi8: 3756; X86-SSE: # %bb.0: 3757; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3758; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3759; X86-SSE-NEXT: punpcklbw %xmm0, %xmm0 # encoding: [0x66,0x0f,0x60,0xc0] 3760; X86-SSE-NEXT: # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3761; X86-SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00] 3762; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] 3763; X86-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 3764; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 3765; X86-SSE-NEXT: retl # encoding: [0xc3] 3766; 3767; X86-AVX1-LABEL: test_mm_set1_epi8: 3768; X86-AVX1: # %bb.0: 3769; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3770; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 3771; X86-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9] 3772; X86-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1] 3773; X86-AVX1-NEXT: retl # encoding: [0xc3] 3774; 3775; X86-AVX512-LABEL: test_mm_set1_epi8: 3776; X86-AVX512: # %bb.0: 3777; X86-AVX512-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] 3778; X86-AVX512-NEXT: vpbroadcastb %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc0] 3779; X86-AVX512-NEXT: retl # encoding: [0xc3] 3780; 3781; X64-SSE-LABEL: test_mm_set1_epi8: 3782; X64-SSE: # %bb.0: 3783; X64-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] 3784; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3785; X64-SSE-NEXT: punpcklbw %xmm0, %xmm0 # encoding: [0x66,0x0f,0x60,0xc0] 3786; X64-SSE-NEXT: # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3787; X64-SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00] 3788; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] 3789; X64-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 3790; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 3791; X64-SSE-NEXT: retq # encoding: [0xc3] 3792; 3793; X64-AVX1-LABEL: test_mm_set1_epi8: 3794; X64-AVX1: # %bb.0: 3795; X64-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] 3796; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 3797; X64-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9] 3798; X64-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1] 3799; X64-AVX1-NEXT: retq # encoding: [0xc3] 3800; 3801; X64-AVX512-LABEL: test_mm_set1_epi8: 3802; X64-AVX512: # %bb.0: 3803; X64-AVX512-NEXT: vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7] 3804; X64-AVX512-NEXT: retq # encoding: [0xc3] 3805 %res0 = insertelement <16 x i8> undef, i8 %a0, i32 0 3806 %res1 = insertelement <16 x i8> %res0, i8 %a0, i32 1 3807 %res2 = insertelement <16 x i8> %res1, i8 %a0, i32 2 3808 %res3 = insertelement <16 x i8> %res2, i8 %a0, i32 3 3809 %res4 = insertelement <16 x i8> %res3, i8 %a0, i32 4 3810 %res5 = insertelement <16 x i8> %res4, i8 %a0, i32 5 3811 %res6 = insertelement <16 x i8> %res5, i8 %a0, i32 6 3812 %res7 = insertelement <16 x i8> %res6, i8 %a0, i32 7 3813 %res8 = insertelement <16 x i8> %res7, i8 %a0, i32 8 3814 %res9 = insertelement <16 x i8> %res8, i8 %a0, i32 9 3815 %res10 = insertelement <16 x i8> %res9, i8 %a0, i32 10 3816 %res11 = insertelement <16 x i8> %res10, i8 %a0, i32 11 3817 %res12 = insertelement <16 x i8> %res11, i8 %a0, i32 12 3818 %res13 = insertelement <16 x i8> %res12, i8 %a0, i32 13 3819 %res14 = insertelement <16 x i8> %res13, i8 %a0, i32 14 3820 %res15 = insertelement <16 x i8> %res14, i8 %a0, i32 15 3821 %res = bitcast <16 x i8> %res15 to <2 x i64> 3822 ret <2 x i64> %res 3823} 3824 3825define <2 x i64> @test_mm_set1_epi16(i16 %a0) nounwind { 3826; X86-SSE-LABEL: test_mm_set1_epi16: 3827; X86-SSE: # %bb.0: 3828; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 3829; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3830; X86-SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00] 3831; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] 3832; X86-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 3833; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 3834; X86-SSE-NEXT: retl # encoding: [0xc3] 3835; 3836; X86-AVX1-LABEL: test_mm_set1_epi16: 3837; X86-AVX1: # %bb.0: 3838; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 3839; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 3840; X86-AVX1-NEXT: vpshuflw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0x00] 3841; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] 3842; X86-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00] 3843; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 3844; X86-AVX1-NEXT: retl # encoding: [0xc3] 3845; 3846; X86-AVX512-LABEL: test_mm_set1_epi16: 3847; X86-AVX512: # %bb.0: 3848; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 3849; X86-AVX512-NEXT: vpbroadcastw %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc0] 3850; X86-AVX512-NEXT: retl # encoding: [0xc3] 3851; 3852; X64-SSE-LABEL: test_mm_set1_epi16: 3853; X64-SSE: # %bb.0: 3854; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 3855; X64-SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00] 3856; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] 3857; X64-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 3858; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 3859; X64-SSE-NEXT: retq # encoding: [0xc3] 3860; 3861; X64-AVX1-LABEL: test_mm_set1_epi16: 3862; X64-AVX1: # %bb.0: 3863; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] 3864; X64-AVX1-NEXT: vpshuflw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0x00] 3865; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] 3866; X64-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00] 3867; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 3868; X64-AVX1-NEXT: retq # encoding: [0xc3] 3869; 3870; X64-AVX512-LABEL: test_mm_set1_epi16: 3871; X64-AVX512: # %bb.0: 3872; X64-AVX512-NEXT: vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7] 3873; X64-AVX512-NEXT: retq # encoding: [0xc3] 3874 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0 3875 %res1 = insertelement <8 x i16> %res0, i16 %a0, i32 1 3876 %res2 = insertelement <8 x i16> %res1, i16 %a0, i32 2 3877 %res3 = insertelement <8 x i16> %res2, i16 %a0, i32 3 3878 %res4 = insertelement <8 x i16> %res3, i16 %a0, i32 4 3879 %res5 = insertelement <8 x i16> %res4, i16 %a0, i32 5 3880 %res6 = insertelement <8 x i16> %res5, i16 %a0, i32 6 3881 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7 3882 %res = bitcast <8 x i16> %res7 to <2 x i64> 3883 ret <2 x i64> %res 3884} 3885 3886define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind { 3887; X86-SSE-LABEL: test_mm_set1_epi32: 3888; X86-SSE: # %bb.0: 3889; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %xmm0 # encoding: [0x66,0x0f,0x6e,0x44,0x24,0x04] 3890; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 3891; X86-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 3892; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 3893; X86-SSE-NEXT: retl # encoding: [0xc3] 3894; 3895; X86-AVX1-LABEL: test_mm_set1_epi32: 3896; X86-AVX1: # %bb.0: 3897; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 3898; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 3899; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 3900; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 3901; X86-AVX1-NEXT: retl # encoding: [0xc3] 3902; 3903; X86-AVX512-LABEL: test_mm_set1_epi32: 3904; X86-AVX512: # %bb.0: 3905; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3906; X86-AVX512-NEXT: vpbroadcastd %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc0] 3907; X86-AVX512-NEXT: retl # encoding: [0xc3] 3908; 3909; X64-SSE-LABEL: test_mm_set1_epi32: 3910; X64-SSE: # %bb.0: 3911; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 3912; X64-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 3913; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 3914; X64-SSE-NEXT: retq # encoding: [0xc3] 3915; 3916; X64-AVX1-LABEL: test_mm_set1_epi32: 3917; X64-AVX1: # %bb.0: 3918; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] 3919; X64-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00] 3920; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 3921; X64-AVX1-NEXT: retq # encoding: [0xc3] 3922; 3923; X64-AVX512-LABEL: test_mm_set1_epi32: 3924; X64-AVX512: # %bb.0: 3925; X64-AVX512-NEXT: vpbroadcastd %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc7] 3926; X64-AVX512-NEXT: retq # encoding: [0xc3] 3927 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 3928 %res1 = insertelement <4 x i32> %res0, i32 %a0, i32 1 3929 %res2 = insertelement <4 x i32> %res1, i32 %a0, i32 2 3930 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3 3931 %res = bitcast <4 x i32> %res3 to <2 x i64> 3932 ret <2 x i64> %res 3933} 3934 3935; TODO test_mm_set1_epi64 3936 3937define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind { 3938; X86-SSE-LABEL: test_mm_set1_epi64x: 3939; X86-SSE: # %bb.0: 3940; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %xmm0 # encoding: [0x66,0x0f,0x6e,0x44,0x24,0x04] 3941; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 3942; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %xmm1 # encoding: [0x66,0x0f,0x6e,0x4c,0x24,0x08] 3943; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 3944; X86-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1] 3945; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3946; X86-SSE-NEXT: pshufd $68, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x44] 3947; X86-SSE-NEXT: # xmm0 = xmm0[0,1,0,1] 3948; X86-SSE-NEXT: retl # encoding: [0xc3] 3949; 3950; X86-AVX1-LABEL: test_mm_set1_epi64x: 3951; X86-AVX1: # %bb.0: 3952; X86-AVX1-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] 3953; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 3954; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] 3955; X86-AVX1-NEXT: vpshufd $68, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x44] 3956; X86-AVX1-NEXT: # xmm0 = xmm0[0,1,0,1] 3957; X86-AVX1-NEXT: retl # encoding: [0xc3] 3958; 3959; X86-AVX512-LABEL: test_mm_set1_epi64x: 3960; X86-AVX512: # %bb.0: 3961; X86-AVX512-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] 3962; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 3963; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] 3964; X86-AVX512-NEXT: vpbroadcastq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xc0] 3965; X86-AVX512-NEXT: retl # encoding: [0xc3] 3966; 3967; X64-SSE-LABEL: test_mm_set1_epi64x: 3968; X64-SSE: # %bb.0: 3969; X64-SSE-NEXT: movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7] 3970; X64-SSE-NEXT: pshufd $68, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x44] 3971; X64-SSE-NEXT: # xmm0 = xmm0[0,1,0,1] 3972; X64-SSE-NEXT: retq # encoding: [0xc3] 3973; 3974; X64-AVX1-LABEL: test_mm_set1_epi64x: 3975; X64-AVX1: # %bb.0: 3976; X64-AVX1-NEXT: vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] 3977; X64-AVX1-NEXT: vpshufd $68, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x44] 3978; X64-AVX1-NEXT: # xmm0 = xmm0[0,1,0,1] 3979; X64-AVX1-NEXT: retq # encoding: [0xc3] 3980; 3981; X64-AVX512-LABEL: test_mm_set1_epi64x: 3982; X64-AVX512: # %bb.0: 3983; X64-AVX512-NEXT: vpbroadcastq %rdi, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xc7] 3984; X64-AVX512-NEXT: retq # encoding: [0xc3] 3985 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0 3986 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1 3987 ret <2 x i64> %res1 3988} 3989 3990define <2 x double> @test_mm_set1_pd(double %a0) nounwind { 3991; X86-SSE-LABEL: test_mm_set1_pd: 3992; X86-SSE: # %bb.0: 3993; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04] 3994; X86-SSE-NEXT: # xmm0 = mem[0],zero 3995; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 3996; X86-SSE-NEXT: # xmm0 = xmm0[0,0] 3997; X86-SSE-NEXT: retl # encoding: [0xc3] 3998; 3999; X86-AVX1-LABEL: test_mm_set1_pd: 4000; X86-AVX1: # %bb.0: 4001; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04] 4002; X86-AVX1-NEXT: # xmm0 = mem[0],zero 4003; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 4004; X86-AVX1-NEXT: # xmm0 = xmm0[0,0] 4005; X86-AVX1-NEXT: retl # encoding: [0xc3] 4006; 4007; X86-AVX512-LABEL: test_mm_set1_pd: 4008; X86-AVX512: # %bb.0: 4009; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04] 4010; X86-AVX512-NEXT: # xmm0 = mem[0],zero 4011; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 4012; X86-AVX512-NEXT: # xmm0 = xmm0[0,0] 4013; X86-AVX512-NEXT: retl # encoding: [0xc3] 4014; 4015; X64-SSE-LABEL: test_mm_set1_pd: 4016; X64-SSE: # %bb.0: 4017; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 4018; X64-SSE-NEXT: # xmm0 = xmm0[0,0] 4019; X64-SSE-NEXT: retq # encoding: [0xc3] 4020; 4021; X64-AVX1-LABEL: test_mm_set1_pd: 4022; X64-AVX1: # %bb.0: 4023; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 4024; X64-AVX1-NEXT: # xmm0 = xmm0[0,0] 4025; X64-AVX1-NEXT: retq # encoding: [0xc3] 4026; 4027; X64-AVX512-LABEL: test_mm_set1_pd: 4028; X64-AVX512: # %bb.0: 4029; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 4030; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] 4031; X64-AVX512-NEXT: retq # encoding: [0xc3] 4032 %res0 = insertelement <2 x double> undef, double %a0, i32 0 4033 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 4034 ret <2 x double> %res1 4035} 4036 4037define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind { 4038; X86-SSE-LABEL: test_mm_setr_epi8: 4039; X86-SSE: # %bb.0: 4040; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 4041; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4042; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] 4043; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 4044; X86-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 4045; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 4046; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 4047; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4048; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] 4049; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 4050; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 4051; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 4052; X86-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] 4053; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 4054; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 4055; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4056; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] 4057; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 4058; X86-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 4059; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 4060; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 4061; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4062; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] 4063; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 4064; X86-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 4065; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 4066; X86-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb] 4067; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 4068; X86-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca] 4069; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4070; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 4071; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4072; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] 4073; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 4074; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 4075; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 4076; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 4077; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4078; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] 4079; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 4080; X86-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 4081; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 4082; X86-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda] 4083; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 4084; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 4085; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4086; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 4087; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 4088; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 4089; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 4090; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 4091; X86-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] 4092; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4093; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4094; X86-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4] 4095; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 4096; X86-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2] 4097; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 4098; X86-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3] 4099; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 4100; X86-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 4101; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4102; X86-SSE-NEXT: retl # encoding: [0xc3] 4103; 4104; X86-AVX1-LABEL: test_mm_setr_epi8: 4105; X86-AVX1: # %bb.0: 4106; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 4107; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04] 4108; X86-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] 4109; X86-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 4110; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 4111; X86-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 4112; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 4113; X86-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 4114; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] 4115; X86-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 4116; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 4117; X86-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 4118; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] 4119; X86-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 4120; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 4121; X86-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 4122; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] 4123; X86-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 4124; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 4125; X86-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 4126; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] 4127; X86-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 4128; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 4129; X86-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 4130; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] 4131; X86-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 4132; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 4133; X86-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 4134; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] 4135; X86-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 4136; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 4137; X86-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 4138; X86-AVX1-NEXT: retl # encoding: [0xc3] 4139; 4140; X86-AVX512-LABEL: test_mm_setr_epi8: 4141; X86-AVX512: # %bb.0: 4142; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 4143; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04] 4144; X86-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 4145; X86-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 4146; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 4147; X86-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 4148; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 4149; X86-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 4150; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] 4151; X86-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 4152; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 4153; X86-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 4154; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] 4155; X86-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 4156; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 4157; X86-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 4158; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] 4159; X86-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 4160; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 4161; X86-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 4162; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] 4163; X86-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 4164; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 4165; X86-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 4166; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] 4167; X86-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 4168; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 4169; X86-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 4170; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] 4171; X86-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 4172; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 4173; X86-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 4174; X86-AVX512-NEXT: retl # encoding: [0xc3] 4175; 4176; X64-SSE-LABEL: test_mm_setr_epi8: 4177; X64-SSE: # %bb.0: 4178; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] 4179; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4180; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48] 4181; X64-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 4182; X64-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 4183; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 4184; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 4185; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4186; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 4187; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 4188; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 4189; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 4190; X64-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] 4191; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 4192; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 4193; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4194; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 4195; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 4196; X64-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 4197; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 4198; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 4199; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4200; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 4201; X64-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 4202; X64-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 4203; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 4204; X64-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb] 4205; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 4206; X64-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca] 4207; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4208; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 4209; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4210; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 4211; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 4212; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 4213; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 4214; X64-SSE-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] 4215; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4216; X64-SSE-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] 4217; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 4218; X64-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 4219; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 4220; X64-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda] 4221; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 4222; X64-SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 4223; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4224; X64-SSE-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] 4225; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 4226; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 4227; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 4228; X64-SSE-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] 4229; X64-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] 4230; X64-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] 4231; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4232; X64-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4] 4233; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 4234; X64-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2] 4235; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 4236; X64-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3] 4237; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 4238; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 4239; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4240; X64-SSE-NEXT: retq # encoding: [0xc3] 4241; 4242; X64-AVX1-LABEL: test_mm_setr_epi8: 4243; X64-AVX1: # %bb.0: 4244; X64-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] 4245; X64-AVX1-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7] 4246; X64-AVX1-NEXT: vmovd %esi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc6] 4247; X64-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 4248; X64-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] 4249; X64-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 4250; X64-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 4251; X64-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 4252; X64-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] 4253; X64-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 4254; X64-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] 4255; X64-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 4256; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 4257; X64-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 4258; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 4259; X64-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 4260; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 4261; X64-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 4262; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 4263; X64-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 4264; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 4265; X64-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 4266; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 4267; X64-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 4268; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 4269; X64-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 4270; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 4271; X64-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 4272; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48] 4273; X64-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 4274; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] 4275; X64-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 4276; X64-AVX1-NEXT: retq # encoding: [0xc3] 4277; 4278; X64-AVX512-LABEL: test_mm_setr_epi8: 4279; X64-AVX512: # %bb.0: 4280; X64-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] 4281; X64-AVX512-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7] 4282; X64-AVX512-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 4283; X64-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 4284; X64-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] 4285; X64-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 4286; X64-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 4287; X64-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 4288; X64-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] 4289; X64-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 4290; X64-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] 4291; X64-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 4292; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 4293; X64-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 4294; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 4295; X64-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 4296; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 4297; X64-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 4298; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 4299; X64-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 4300; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 4301; X64-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 4302; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 4303; X64-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 4304; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 4305; X64-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 4306; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 4307; X64-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 4308; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48] 4309; X64-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 4310; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] 4311; X64-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 4312; X64-AVX512-NEXT: retq # encoding: [0xc3] 4313 %res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0 4314 %res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1 4315 %res2 = insertelement <16 x i8> %res1, i8 %a2 , i32 2 4316 %res3 = insertelement <16 x i8> %res2, i8 %a3 , i32 3 4317 %res4 = insertelement <16 x i8> %res3, i8 %a4 , i32 4 4318 %res5 = insertelement <16 x i8> %res4, i8 %a5 , i32 5 4319 %res6 = insertelement <16 x i8> %res5, i8 %a6 , i32 6 4320 %res7 = insertelement <16 x i8> %res6, i8 %a7 , i32 7 4321 %res8 = insertelement <16 x i8> %res7, i8 %a8 , i32 8 4322 %res9 = insertelement <16 x i8> %res8, i8 %a9 , i32 9 4323 %res10 = insertelement <16 x i8> %res9, i8 %a10, i32 10 4324 %res11 = insertelement <16 x i8> %res10, i8 %a11, i32 11 4325 %res12 = insertelement <16 x i8> %res11, i8 %a12, i32 12 4326 %res13 = insertelement <16 x i8> %res12, i8 %a13, i32 13 4327 %res14 = insertelement <16 x i8> %res13, i8 %a14, i32 14 4328 %res15 = insertelement <16 x i8> %res14, i8 %a15, i32 15 4329 %res = bitcast <16 x i8> %res15 to <2 x i64> 4330 ret <2 x i64> %res 4331} 4332 4333define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { 4334; X86-SSE-LABEL: test_mm_setr_epi16: 4335; X86-SSE: # %bb.0: 4336; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] 4337; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 4338; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] 4339; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 4340; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] 4341; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 4342; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] 4343; X86-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] 4344; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 4345; X86-SSE-NEXT: movd %eax, %xmm5 # encoding: [0x66,0x0f,0x6e,0xe8] 4346; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] 4347; X86-SSE-NEXT: movd %eax, %xmm6 # encoding: [0x66,0x0f,0x6e,0xf0] 4348; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 4349; X86-SSE-NEXT: movd %eax, %xmm7 # encoding: [0x66,0x0f,0x6e,0xf8] 4350; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 4351; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4352; X86-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] 4353; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 4354; X86-SSE-NEXT: punpcklwd %xmm3, %xmm4 # encoding: [0x66,0x0f,0x61,0xe3] 4355; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 4356; X86-SSE-NEXT: punpckldq %xmm2, %xmm4 # encoding: [0x66,0x0f,0x62,0xe2] 4357; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 4358; X86-SSE-NEXT: punpcklwd %xmm5, %xmm6 # encoding: [0x66,0x0f,0x61,0xf5] 4359; X86-SSE-NEXT: # xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] 4360; X86-SSE-NEXT: punpcklwd %xmm7, %xmm0 # encoding: [0x66,0x0f,0x61,0xc7] 4361; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] 4362; X86-SSE-NEXT: punpckldq %xmm6, %xmm0 # encoding: [0x66,0x0f,0x62,0xc6] 4363; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1] 4364; X86-SSE-NEXT: punpcklqdq %xmm4, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc4] 4365; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0] 4366; X86-SSE-NEXT: retl # encoding: [0xc3] 4367; 4368; X86-AVX1-LABEL: test_mm_setr_epi16: 4369; X86-AVX1: # %bb.0: 4370; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 4371; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 4372; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 4373; X86-AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 4374; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] 4375; X86-AVX1-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 4376; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 4377; X86-AVX1-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x03] 4378; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] 4379; X86-AVX1-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 4380; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] 4381; X86-AVX1-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 4382; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] 4383; X86-AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 4384; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] 4385; X86-AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 4386; X86-AVX1-NEXT: retl # encoding: [0xc3] 4387; 4388; X86-AVX512-LABEL: test_mm_setr_epi16: 4389; X86-AVX512: # %bb.0: 4390; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 4391; X86-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 4392; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 4393; X86-AVX512-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 4394; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] 4395; X86-AVX512-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 4396; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 4397; X86-AVX512-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] 4398; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] 4399; X86-AVX512-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 4400; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] 4401; X86-AVX512-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 4402; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] 4403; X86-AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 4404; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] 4405; X86-AVX512-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 4406; X86-AVX512-NEXT: retl # encoding: [0xc3] 4407; 4408; X64-SSE-LABEL: test_mm_setr_epi16: 4409; X64-SSE: # %bb.0: 4410; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 4411; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08] 4412; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4413; X64-SSE-NEXT: movd %r10d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xca] 4414; X64-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] 4415; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 4416; X64-SSE-NEXT: movd %r9d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc1] 4417; X64-SSE-NEXT: movd %r8d, %xmm2 # encoding: [0x66,0x41,0x0f,0x6e,0xd0] 4418; X64-SSE-NEXT: punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0] 4419; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 4420; X64-SSE-NEXT: punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1] 4421; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 4422; X64-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1] 4423; X64-SSE-NEXT: movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca] 4424; X64-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] 4425; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 4426; X64-SSE-NEXT: movd %esi, %xmm3 # encoding: [0x66,0x0f,0x6e,0xde] 4427; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 4428; X64-SSE-NEXT: punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3] 4429; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 4430; X64-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1] 4431; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4432; X64-SSE-NEXT: punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2] 4433; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0] 4434; X64-SSE-NEXT: retq # encoding: [0xc3] 4435; 4436; X64-AVX1-LABEL: test_mm_setr_epi16: 4437; X64-AVX1: # %bb.0: 4438; X64-AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10] 4439; X64-AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 4440; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] 4441; X64-AVX1-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x01] 4442; X64-AVX1-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x02] 4443; X64-AVX1-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03] 4444; X64-AVX1-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04] 4445; X64-AVX1-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05] 4446; X64-AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 4447; X64-AVX1-NEXT: vpinsrw $7, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07] 4448; X64-AVX1-NEXT: retq # encoding: [0xc3] 4449; 4450; X64-AVX512-LABEL: test_mm_setr_epi16: 4451; X64-AVX512: # %bb.0: 4452; X64-AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10] 4453; X64-AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 4454; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] 4455; X64-AVX512-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x01] 4456; X64-AVX512-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x02] 4457; X64-AVX512-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x03] 4458; X64-AVX512-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04] 4459; X64-AVX512-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05] 4460; X64-AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 4461; X64-AVX512-NEXT: vpinsrw $7, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07] 4462; X64-AVX512-NEXT: retq # encoding: [0xc3] 4463 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0 4464 %res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1 4465 %res2 = insertelement <8 x i16> %res1, i16 %a2, i32 2 4466 %res3 = insertelement <8 x i16> %res2, i16 %a3, i32 3 4467 %res4 = insertelement <8 x i16> %res3, i16 %a4, i32 4 4468 %res5 = insertelement <8 x i16> %res4, i16 %a5, i32 5 4469 %res6 = insertelement <8 x i16> %res5, i16 %a6, i32 6 4470 %res7 = insertelement <8 x i16> %res6, i16 %a7, i32 7 4471 %res = bitcast <8 x i16> %res7 to <2 x i64> 4472 ret <2 x i64> %res 4473} 4474 4475define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { 4476; X86-SSE-LABEL: test_mm_setr_epi32: 4477; X86-SSE: # %bb.0: 4478; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] 4479; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 4480; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c] 4481; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 4482; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 4483; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 4484; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08] 4485; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 4486; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 4487; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 4488; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] 4489; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4490; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 4491; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4492; X86-SSE-NEXT: retl # encoding: [0xc3] 4493; 4494; X86-AVX1-LABEL: test_mm_setr_epi32: 4495; X86-AVX1: # %bb.0: 4496; X86-AVX1-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] 4497; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 4498; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] 4499; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02] 4500; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03] 4501; X86-AVX1-NEXT: retl # encoding: [0xc3] 4502; 4503; X86-AVX512-LABEL: test_mm_setr_epi32: 4504; X86-AVX512: # %bb.0: 4505; X86-AVX512-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] 4506; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 4507; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] 4508; X86-AVX512-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02] 4509; X86-AVX512-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03] 4510; X86-AVX512-NEXT: retl # encoding: [0xc3] 4511; 4512; X64-SSE-LABEL: test_mm_setr_epi32: 4513; X64-SSE: # %bb.0: 4514; X64-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1] 4515; X64-SSE-NEXT: movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca] 4516; X64-SSE-NEXT: punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8] 4517; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 4518; X64-SSE-NEXT: movd %esi, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd6] 4519; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 4520; X64-SSE-NEXT: punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2] 4521; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4522; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 4523; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4524; X64-SSE-NEXT: retq # encoding: [0xc3] 4525; 4526; X64-AVX1-LABEL: test_mm_setr_epi32: 4527; X64-AVX1: # %bb.0: 4528; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] 4529; X64-AVX1-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01] 4530; X64-AVX1-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] 4531; X64-AVX1-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03] 4532; X64-AVX1-NEXT: retq # encoding: [0xc3] 4533; 4534; X64-AVX512-LABEL: test_mm_setr_epi32: 4535; X64-AVX512: # %bb.0: 4536; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] 4537; X64-AVX512-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01] 4538; X64-AVX512-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] 4539; X64-AVX512-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03] 4540; X64-AVX512-NEXT: retq # encoding: [0xc3] 4541 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 4542 %res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1 4543 %res2 = insertelement <4 x i32> %res1, i32 %a2, i32 2 4544 %res3 = insertelement <4 x i32> %res2, i32 %a3, i32 3 4545 %res = bitcast <4 x i32> %res3 to <2 x i64> 4546 ret <2 x i64> %res 4547} 4548 4549; TODO test_mm_setr_epi64 4550 4551define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind { 4552; X86-SSE-LABEL: test_mm_setr_epi64x: 4553; X86-SSE: # %bb.0: 4554; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c] 4555; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 4556; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] 4557; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 4558; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 4559; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 4560; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 4561; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 4562; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08] 4563; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 4564; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] 4565; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4566; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 4567; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4568; X86-SSE-NEXT: retl # encoding: [0xc3] 4569; 4570; X86-AVX1-LABEL: test_mm_setr_epi64x: 4571; X86-AVX1: # %bb.0: 4572; X86-AVX1-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] 4573; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 4574; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] 4575; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02] 4576; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03] 4577; X86-AVX1-NEXT: retl # encoding: [0xc3] 4578; 4579; X86-AVX512-LABEL: test_mm_setr_epi64x: 4580; X86-AVX512: # %bb.0: 4581; X86-AVX512-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] 4582; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 4583; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] 4584; X86-AVX512-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02] 4585; X86-AVX512-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03] 4586; X86-AVX512-NEXT: retl # encoding: [0xc3] 4587; 4588; X64-SSE-LABEL: test_mm_setr_epi64x: 4589; X64-SSE: # %bb.0: 4590; X64-SSE-NEXT: movq %rsi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xce] 4591; X64-SSE-NEXT: movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7] 4592; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 4593; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4594; X64-SSE-NEXT: retq # encoding: [0xc3] 4595; 4596; X64-AVX1-LABEL: test_mm_setr_epi64x: 4597; X64-AVX1: # %bb.0: 4598; X64-AVX1-NEXT: vmovq %rsi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc6] 4599; X64-AVX1-NEXT: vmovq %rdi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xcf] 4600; X64-AVX1-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0] 4601; X64-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] 4602; X64-AVX1-NEXT: retq # encoding: [0xc3] 4603; 4604; X64-AVX512-LABEL: test_mm_setr_epi64x: 4605; X64-AVX512: # %bb.0: 4606; X64-AVX512-NEXT: vmovq %rsi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc6] 4607; X64-AVX512-NEXT: vmovq %rdi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xcf] 4608; X64-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0] 4609; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] 4610; X64-AVX512-NEXT: retq # encoding: [0xc3] 4611 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0 4612 %res1 = insertelement <2 x i64> %res0, i64 %a1, i32 1 4613 ret <2 x i64> %res1 4614} 4615 4616define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind { 4617; X86-SSE-LABEL: test_mm_setr_pd: 4618; X86-SSE: # %bb.0: 4619; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf2,0x0f,0x10,0x4c,0x24,0x0c] 4620; X86-SSE-NEXT: # xmm1 = mem[0],zero 4621; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04] 4622; X86-SSE-NEXT: # xmm0 = mem[0],zero 4623; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 4624; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4625; X86-SSE-NEXT: retl # encoding: [0xc3] 4626; 4627; X86-AVX1-LABEL: test_mm_setr_pd: 4628; X86-AVX1: # %bb.0: 4629; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c] 4630; X86-AVX1-NEXT: # xmm0 = mem[0],zero 4631; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04] 4632; X86-AVX1-NEXT: # xmm1 = mem[0],zero 4633; X86-AVX1-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0x16,0xc0] 4634; X86-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] 4635; X86-AVX1-NEXT: retl # encoding: [0xc3] 4636; 4637; X86-AVX512-LABEL: test_mm_setr_pd: 4638; X86-AVX512: # %bb.0: 4639; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c] 4640; X86-AVX512-NEXT: # xmm0 = mem[0],zero 4641; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04] 4642; X86-AVX512-NEXT: # xmm1 = mem[0],zero 4643; X86-AVX512-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0] 4644; X86-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] 4645; X86-AVX512-NEXT: retl # encoding: [0xc3] 4646; 4647; X64-SSE-LABEL: test_mm_setr_pd: 4648; X64-SSE: # %bb.0: 4649; X64-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 4650; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4651; X64-SSE-NEXT: retq # encoding: [0xc3] 4652; 4653; X64-AVX1-LABEL: test_mm_setr_pd: 4654; X64-AVX1: # %bb.0: 4655; X64-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] 4656; X64-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] 4657; X64-AVX1-NEXT: retq # encoding: [0xc3] 4658; 4659; X64-AVX512-LABEL: test_mm_setr_pd: 4660; X64-AVX512: # %bb.0: 4661; X64-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 4662; X64-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] 4663; X64-AVX512-NEXT: retq # encoding: [0xc3] 4664 %res0 = insertelement <2 x double> undef, double %a0, i32 0 4665 %res1 = insertelement <2 x double> %res0, double %a1, i32 1 4666 ret <2 x double> %res1 4667} 4668 4669define <2 x double> @test_mm_setzero_pd() { 4670; SSE-LABEL: test_mm_setzero_pd: 4671; SSE: # %bb.0: 4672; SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] 4673; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4674; 4675; AVX1-LABEL: test_mm_setzero_pd: 4676; AVX1: # %bb.0: 4677; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] 4678; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4679; 4680; AVX512-LABEL: test_mm_setzero_pd: 4681; AVX512: # %bb.0: 4682; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0] 4683; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4684 ret <2 x double> zeroinitializer 4685} 4686 4687define <2 x i64> @test_mm_setzero_si128() { 4688; SSE-LABEL: test_mm_setzero_si128: 4689; SSE: # %bb.0: 4690; SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] 4691; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4692; 4693; AVX1-LABEL: test_mm_setzero_si128: 4694; AVX1: # %bb.0: 4695; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] 4696; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4697; 4698; AVX512-LABEL: test_mm_setzero_si128: 4699; AVX512: # %bb.0: 4700; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0] 4701; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4702 ret <2 x i64> zeroinitializer 4703} 4704 4705define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) { 4706; SSE-LABEL: test_mm_shuffle_epi32: 4707; SSE: # %bb.0: 4708; SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 4709; SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 4710; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4711; 4712; AVX1-LABEL: test_mm_shuffle_epi32: 4713; AVX1: # %bb.0: 4714; AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 4715; AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 4716; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4717; 4718; AVX512-LABEL: test_mm_shuffle_epi32: 4719; AVX512: # %bb.0: 4720; AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 4721; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4722 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 4723 %res = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer 4724 %bc = bitcast <4 x i32> %res to <2 x i64> 4725 ret <2 x i64> %bc 4726} 4727 4728define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) { 4729; SSE-LABEL: test_mm_shuffle_pd: 4730; SSE: # %bb.0: 4731; SSE-NEXT: shufps $78, %xmm1, %xmm0 # encoding: [0x0f,0xc6,0xc1,0x4e] 4732; SSE-NEXT: # xmm0 = xmm0[2,3],xmm1[0,1] 4733; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4734; 4735; AVX1-LABEL: test_mm_shuffle_pd: 4736; AVX1: # %bb.0: 4737; AVX1-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc6,0xc1,0x01] 4738; AVX1-NEXT: # xmm0 = xmm0[1],xmm1[0] 4739; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4740; 4741; AVX512-LABEL: test_mm_shuffle_pd: 4742; AVX512: # %bb.0: 4743; AVX512-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xc1,0x01] 4744; AVX512-NEXT: # xmm0 = xmm0[1],xmm1[0] 4745; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4746 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2> 4747 ret <2 x double> %res 4748} 4749 4750define <2 x i64> @test_mm_shufflehi_epi16(<2 x i64> %a0) { 4751; SSE-LABEL: test_mm_shufflehi_epi16: 4752; SSE: # %bb.0: 4753; SSE-NEXT: pshufhw $0, %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x70,0xc0,0x00] 4754; SSE-NEXT: # xmm0 = xmm0[0,1,2,3,4,4,4,4] 4755; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4756; 4757; AVX1-LABEL: test_mm_shufflehi_epi16: 4758; AVX1: # %bb.0: 4759; AVX1-NEXT: vpshufhw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x70,0xc0,0x00] 4760; AVX1-NEXT: # xmm0 = xmm0[0,1,2,3,4,4,4,4] 4761; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4762; 4763; AVX512-LABEL: test_mm_shufflehi_epi16: 4764; AVX512: # %bb.0: 4765; AVX512-NEXT: vpshufhw $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xc0,0x00] 4766; AVX512-NEXT: # xmm0 = xmm0[0,1,2,3,4,4,4,4] 4767; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4768 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 4769 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4> 4770 %bc = bitcast <8 x i16> %res to <2 x i64> 4771 ret <2 x i64> %bc 4772} 4773 4774define <2 x i64> @test_mm_shufflelo_epi16(<2 x i64> %a0) { 4775; SSE-LABEL: test_mm_shufflelo_epi16: 4776; SSE: # %bb.0: 4777; SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00] 4778; SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] 4779; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4780; 4781; AVX1-LABEL: test_mm_shufflelo_epi16: 4782; AVX1: # %bb.0: 4783; AVX1-NEXT: vpshuflw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0x00] 4784; AVX1-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] 4785; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4786; 4787; AVX512-LABEL: test_mm_shufflelo_epi16: 4788; AVX512: # %bb.0: 4789; AVX512-NEXT: vpshuflw $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc0,0x00] 4790; AVX512-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] 4791; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4792 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 4793 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7> 4794 %bc = bitcast <8 x i16> %res to <2 x i64> 4795 ret <2 x i64> %bc 4796} 4797 4798define <2 x i64> @test_mm_sll_epi16(<2 x i64> %a0, <2 x i64> %a1) { 4799; SSE-LABEL: test_mm_sll_epi16: 4800; SSE: # %bb.0: 4801; SSE-NEXT: psllw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf1,0xc1] 4802; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4803; 4804; AVX1-LABEL: test_mm_sll_epi16: 4805; AVX1: # %bb.0: 4806; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf1,0xc1] 4807; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4808; 4809; AVX512-LABEL: test_mm_sll_epi16: 4810; AVX512: # %bb.0: 4811; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xc1] 4812; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4813 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 4814 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 4815 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %arg0, <8 x i16> %arg1) 4816 %bc = bitcast <8 x i16> %res to <2 x i64> 4817 ret <2 x i64> %bc 4818} 4819declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 4820 4821define <2 x i64> @test_mm_sll_epi32(<2 x i64> %a0, <2 x i64> %a1) { 4822; SSE-LABEL: test_mm_sll_epi32: 4823; SSE: # %bb.0: 4824; SSE-NEXT: pslld %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf2,0xc1] 4825; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4826; 4827; AVX1-LABEL: test_mm_sll_epi32: 4828; AVX1: # %bb.0: 4829; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf2,0xc1] 4830; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4831; 4832; AVX512-LABEL: test_mm_sll_epi32: 4833; AVX512: # %bb.0: 4834; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xc1] 4835; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4836 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 4837 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 4838 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %arg0, <4 x i32> %arg1) 4839 %bc = bitcast <4 x i32> %res to <2 x i64> 4840 ret <2 x i64> %bc 4841} 4842declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 4843 4844define <2 x i64> @test_mm_sll_epi64(<2 x i64> %a0, <2 x i64> %a1) { 4845; SSE-LABEL: test_mm_sll_epi64: 4846; SSE: # %bb.0: 4847; SSE-NEXT: psllq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf3,0xc1] 4848; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4849; 4850; AVX1-LABEL: test_mm_sll_epi64: 4851; AVX1: # %bb.0: 4852; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf3,0xc1] 4853; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4854; 4855; AVX512-LABEL: test_mm_sll_epi64: 4856; AVX512: # %bb.0: 4857; AVX512-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf3,0xc1] 4858; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4859 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) 4860 ret <2 x i64> %res 4861} 4862declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone 4863 4864define <2 x i64> @test_mm_slli_epi16(<2 x i64> %a0) { 4865; SSE-LABEL: test_mm_slli_epi16: 4866; SSE: # %bb.0: 4867; SSE-NEXT: psllw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xf0,0x01] 4868; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4869; 4870; AVX1-LABEL: test_mm_slli_epi16: 4871; AVX1: # %bb.0: 4872; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xf0,0x01] 4873; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4874; 4875; AVX512-LABEL: test_mm_slli_epi16: 4876; AVX512: # %bb.0: 4877; AVX512-NEXT: vpsllw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x01] 4878; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4879 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 4880 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 1) 4881 %bc = bitcast <8 x i16> %res to <2 x i64> 4882 ret <2 x i64> %bc 4883} 4884declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone 4885 4886define <2 x i64> @test_mm_slli_epi32(<2 x i64> %a0) { 4887; SSE-LABEL: test_mm_slli_epi32: 4888; SSE: # %bb.0: 4889; SSE-NEXT: pslld $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xf0,0x01] 4890; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4891; 4892; AVX1-LABEL: test_mm_slli_epi32: 4893; AVX1: # %bb.0: 4894; AVX1-NEXT: vpslld $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xf0,0x01] 4895; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4896; 4897; AVX512-LABEL: test_mm_slli_epi32: 4898; AVX512: # %bb.0: 4899; AVX512-NEXT: vpslld $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x01] 4900; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4901 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 4902 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 1) 4903 %bc = bitcast <4 x i32> %res to <2 x i64> 4904 ret <2 x i64> %bc 4905} 4906declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone 4907 4908define <2 x i64> @test_mm_slli_epi64(<2 x i64> %a0) { 4909; SSE-LABEL: test_mm_slli_epi64: 4910; SSE: # %bb.0: 4911; SSE-NEXT: psllq $1, %xmm0 # encoding: [0x66,0x0f,0x73,0xf0,0x01] 4912; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4913; 4914; AVX1-LABEL: test_mm_slli_epi64: 4915; AVX1: # %bb.0: 4916; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf0,0x01] 4917; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4918; 4919; AVX512-LABEL: test_mm_slli_epi64: 4920; AVX512: # %bb.0: 4921; AVX512-NEXT: vpsllq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf0,0x01] 4922; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4923 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 1) 4924 ret <2 x i64> %res 4925} 4926declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 4927 4928define <2 x i64> @test_mm_slli_si128(<2 x i64> %a0) nounwind { 4929; SSE-LABEL: test_mm_slli_si128: 4930; SSE: # %bb.0: 4931; SSE-NEXT: pslldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xf8,0x05] 4932; SSE-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 4933; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4934; 4935; AVX1-LABEL: test_mm_slli_si128: 4936; AVX1: # %bb.0: 4937; AVX1-NEXT: vpslldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x05] 4938; AVX1-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 4939; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4940; 4941; AVX512-LABEL: test_mm_slli_si128: 4942; AVX512: # %bb.0: 4943; AVX512-NEXT: vpslldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x05] 4944; AVX512-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 4945; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4946 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 4947 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26> 4948 %bc = bitcast <16 x i8> %res to <2 x i64> 4949 ret <2 x i64> %bc 4950} 4951 4952define <2 x double> @test_mm_sqrt_pd(<2 x double> %a0) nounwind { 4953; SSE-LABEL: test_mm_sqrt_pd: 4954; SSE: # %bb.0: 4955; SSE-NEXT: sqrtpd %xmm0, %xmm0 # encoding: [0x66,0x0f,0x51,0xc0] 4956; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4957; 4958; AVX1-LABEL: test_mm_sqrt_pd: 4959; AVX1: # %bb.0: 4960; AVX1-NEXT: vsqrtpd %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x51,0xc0] 4961; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4962; 4963; AVX512-LABEL: test_mm_sqrt_pd: 4964; AVX512: # %bb.0: 4965; AVX512-NEXT: vsqrtpd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x51,0xc0] 4966; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4967 %res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a0) 4968 ret <2 x double> %res 4969} 4970declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) nounwind readnone 4971 4972define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 4973; SSE-LABEL: test_mm_sqrt_sd: 4974; SSE: # %bb.0: 4975; SSE-NEXT: sqrtsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0x51,0xc8] 4976; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1] 4977; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4978; 4979; AVX1-LABEL: test_mm_sqrt_sd: 4980; AVX1: # %bb.0: 4981; AVX1-NEXT: vsqrtsd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf3,0x51,0xc0] 4982; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4983; 4984; AVX512-LABEL: test_mm_sqrt_sd: 4985; AVX512: # %bb.0: 4986; AVX512-NEXT: vsqrtsd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf3,0x51,0xc0] 4987; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4988 %ext = extractelement <2 x double> %a0, i32 0 4989 %sqrt = call double @llvm.sqrt.f64(double %ext) 4990 %ins = insertelement <2 x double> %a1, double %sqrt, i32 0 4991 ret <2 x double> %ins 4992} 4993declare double @llvm.sqrt.f64(double) nounwind readnone 4994 4995; This doesn't match a clang test, but helps with fast-isel coverage. 4996define double @test_mm_sqrt_sd_scalar(double %a0) nounwind { 4997; X86-SSE-LABEL: test_mm_sqrt_sd_scalar: 4998; X86-SSE: # %bb.0: 4999; X86-SSE-NEXT: pushl %ebp # encoding: [0x55] 5000; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 5001; X86-SSE-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] 5002; X86-SSE-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] 5003; X86-SSE-NEXT: movsd 8(%ebp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x45,0x08] 5004; X86-SSE-NEXT: # xmm0 = mem[0],zero 5005; X86-SSE-NEXT: sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0] 5006; X86-SSE-NEXT: movsd %xmm0, (%esp) # encoding: [0xf2,0x0f,0x11,0x04,0x24] 5007; X86-SSE-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] 5008; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 5009; X86-SSE-NEXT: popl %ebp # encoding: [0x5d] 5010; X86-SSE-NEXT: retl # encoding: [0xc3] 5011; 5012; X86-AVX1-LABEL: test_mm_sqrt_sd_scalar: 5013; X86-AVX1: # %bb.0: 5014; X86-AVX1-NEXT: pushl %ebp # encoding: [0x55] 5015; X86-AVX1-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 5016; X86-AVX1-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] 5017; X86-AVX1-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] 5018; X86-AVX1-NEXT: vmovsd 8(%ebp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x45,0x08] 5019; X86-AVX1-NEXT: # xmm0 = mem[0],zero 5020; X86-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0] 5021; X86-AVX1-NEXT: vmovsd %xmm0, (%esp) # encoding: [0xc5,0xfb,0x11,0x04,0x24] 5022; X86-AVX1-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] 5023; X86-AVX1-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 5024; X86-AVX1-NEXT: popl %ebp # encoding: [0x5d] 5025; X86-AVX1-NEXT: retl # encoding: [0xc3] 5026; 5027; X86-AVX512-LABEL: test_mm_sqrt_sd_scalar: 5028; X86-AVX512: # %bb.0: 5029; X86-AVX512-NEXT: pushl %ebp # encoding: [0x55] 5030; X86-AVX512-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 5031; X86-AVX512-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] 5032; X86-AVX512-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] 5033; X86-AVX512-NEXT: vmovsd 8(%ebp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x45,0x08] 5034; X86-AVX512-NEXT: # xmm0 = mem[0],zero 5035; X86-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] 5036; X86-AVX512-NEXT: vmovsd %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x04,0x24] 5037; X86-AVX512-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] 5038; X86-AVX512-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 5039; X86-AVX512-NEXT: popl %ebp # encoding: [0x5d] 5040; X86-AVX512-NEXT: retl # encoding: [0xc3] 5041; 5042; X64-SSE-LABEL: test_mm_sqrt_sd_scalar: 5043; X64-SSE: # %bb.0: 5044; X64-SSE-NEXT: sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0] 5045; X64-SSE-NEXT: retq # encoding: [0xc3] 5046; 5047; X64-AVX1-LABEL: test_mm_sqrt_sd_scalar: 5048; X64-AVX1: # %bb.0: 5049; X64-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0] 5050; X64-AVX1-NEXT: retq # encoding: [0xc3] 5051; 5052; X64-AVX512-LABEL: test_mm_sqrt_sd_scalar: 5053; X64-AVX512: # %bb.0: 5054; X64-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] 5055; X64-AVX512-NEXT: retq # encoding: [0xc3] 5056 %sqrt = call double @llvm.sqrt.f64(double %a0) 5057 ret double %sqrt 5058} 5059 5060define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) { 5061; SSE-LABEL: test_mm_sra_epi16: 5062; SSE: # %bb.0: 5063; SSE-NEXT: psraw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe1,0xc1] 5064; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5065; 5066; AVX1-LABEL: test_mm_sra_epi16: 5067; AVX1: # %bb.0: 5068; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe1,0xc1] 5069; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5070; 5071; AVX512-LABEL: test_mm_sra_epi16: 5072; AVX512: # %bb.0: 5073; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xc1] 5074; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5075 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 5076 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 5077 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %arg0, <8 x i16> %arg1) 5078 %bc = bitcast <8 x i16> %res to <2 x i64> 5079 ret <2 x i64> %bc 5080} 5081declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 5082 5083define <2 x i64> @test_mm_sra_epi32(<2 x i64> %a0, <2 x i64> %a1) { 5084; SSE-LABEL: test_mm_sra_epi32: 5085; SSE: # %bb.0: 5086; SSE-NEXT: psrad %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe2,0xc1] 5087; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5088; 5089; AVX1-LABEL: test_mm_sra_epi32: 5090; AVX1: # %bb.0: 5091; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe2,0xc1] 5092; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5093; 5094; AVX512-LABEL: test_mm_sra_epi32: 5095; AVX512: # %bb.0: 5096; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xc1] 5097; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5098 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 5099 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 5100 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %arg0, <4 x i32> %arg1) 5101 %bc = bitcast <4 x i32> %res to <2 x i64> 5102 ret <2 x i64> %bc 5103} 5104declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone 5105 5106define <2 x i64> @test_mm_srai_epi16(<2 x i64> %a0) { 5107; SSE-LABEL: test_mm_srai_epi16: 5108; SSE: # %bb.0: 5109; SSE-NEXT: psraw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xe0,0x01] 5110; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5111; 5112; AVX1-LABEL: test_mm_srai_epi16: 5113; AVX1: # %bb.0: 5114; AVX1-NEXT: vpsraw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xe0,0x01] 5115; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5116; 5117; AVX512-LABEL: test_mm_srai_epi16: 5118; AVX512: # %bb.0: 5119; AVX512-NEXT: vpsraw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x01] 5120; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5121 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 5122 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %arg0, i32 1) 5123 %bc = bitcast <8 x i16> %res to <2 x i64> 5124 ret <2 x i64> %bc 5125} 5126declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 5127 5128define <2 x i64> @test_mm_srai_epi32(<2 x i64> %a0) { 5129; SSE-LABEL: test_mm_srai_epi32: 5130; SSE: # %bb.0: 5131; SSE-NEXT: psrad $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xe0,0x01] 5132; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5133; 5134; AVX1-LABEL: test_mm_srai_epi32: 5135; AVX1: # %bb.0: 5136; AVX1-NEXT: vpsrad $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xe0,0x01] 5137; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5138; 5139; AVX512-LABEL: test_mm_srai_epi32: 5140; AVX512: # %bb.0: 5141; AVX512-NEXT: vpsrad $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xe0,0x01] 5142; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5143 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 5144 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %arg0, i32 1) 5145 %bc = bitcast <4 x i32> %res to <2 x i64> 5146 ret <2 x i64> %bc 5147} 5148declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone 5149 5150define <2 x i64> @test_mm_srl_epi16(<2 x i64> %a0, <2 x i64> %a1) { 5151; SSE-LABEL: test_mm_srl_epi16: 5152; SSE: # %bb.0: 5153; SSE-NEXT: psrlw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd1,0xc1] 5154; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5155; 5156; AVX1-LABEL: test_mm_srl_epi16: 5157; AVX1: # %bb.0: 5158; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd1,0xc1] 5159; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5160; 5161; AVX512-LABEL: test_mm_srl_epi16: 5162; AVX512: # %bb.0: 5163; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1] 5164; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5165 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 5166 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 5167 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %arg0, <8 x i16> %arg1) 5168 %bc = bitcast <8 x i16> %res to <2 x i64> 5169 ret <2 x i64> %bc 5170} 5171declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 5172 5173define <2 x i64> @test_mm_srl_epi32(<2 x i64> %a0, <2 x i64> %a1) { 5174; SSE-LABEL: test_mm_srl_epi32: 5175; SSE: # %bb.0: 5176; SSE-NEXT: psrld %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd2,0xc1] 5177; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5178; 5179; AVX1-LABEL: test_mm_srl_epi32: 5180; AVX1: # %bb.0: 5181; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd2,0xc1] 5182; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5183; 5184; AVX512-LABEL: test_mm_srl_epi32: 5185; AVX512: # %bb.0: 5186; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xc1] 5187; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5188 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 5189 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 5190 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %arg0, <4 x i32> %arg1) 5191 %bc = bitcast <4 x i32> %res to <2 x i64> 5192 ret <2 x i64> %bc 5193} 5194declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone 5195 5196define <2 x i64> @test_mm_srl_epi64(<2 x i64> %a0, <2 x i64> %a1) { 5197; SSE-LABEL: test_mm_srl_epi64: 5198; SSE: # %bb.0: 5199; SSE-NEXT: psrlq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd3,0xc1] 5200; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5201; 5202; AVX1-LABEL: test_mm_srl_epi64: 5203; AVX1: # %bb.0: 5204; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd3,0xc1] 5205; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5206; 5207; AVX512-LABEL: test_mm_srl_epi64: 5208; AVX512: # %bb.0: 5209; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xc1] 5210; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5211 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) 5212 ret <2 x i64> %res 5213} 5214declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 5215 5216define <2 x i64> @test_mm_srli_epi16(<2 x i64> %a0) { 5217; SSE-LABEL: test_mm_srli_epi16: 5218; SSE: # %bb.0: 5219; SSE-NEXT: psrlw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xd0,0x01] 5220; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5221; 5222; AVX1-LABEL: test_mm_srli_epi16: 5223; AVX1: # %bb.0: 5224; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xd0,0x01] 5225; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5226; 5227; AVX512-LABEL: test_mm_srli_epi16: 5228; AVX512: # %bb.0: 5229; AVX512-NEXT: vpsrlw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xd0,0x01] 5230; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5231 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 5232 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %arg0, i32 1) 5233 %bc = bitcast <8 x i16> %res to <2 x i64> 5234 ret <2 x i64> %bc 5235} 5236declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone 5237 5238define <2 x i64> @test_mm_srli_epi32(<2 x i64> %a0) { 5239; SSE-LABEL: test_mm_srli_epi32: 5240; SSE: # %bb.0: 5241; SSE-NEXT: psrld $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xd0,0x01] 5242; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5243; 5244; AVX1-LABEL: test_mm_srli_epi32: 5245; AVX1: # %bb.0: 5246; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xd0,0x01] 5247; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5248; 5249; AVX512-LABEL: test_mm_srli_epi32: 5250; AVX512: # %bb.0: 5251; AVX512-NEXT: vpsrld $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xd0,0x01] 5252; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5253 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 5254 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %arg0, i32 1) 5255 %bc = bitcast <4 x i32> %res to <2 x i64> 5256 ret <2 x i64> %bc 5257} 5258declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone 5259 5260define <2 x i64> @test_mm_srli_epi64(<2 x i64> %a0) { 5261; SSE-LABEL: test_mm_srli_epi64: 5262; SSE: # %bb.0: 5263; SSE-NEXT: psrlq $1, %xmm0 # encoding: [0x66,0x0f,0x73,0xd0,0x01] 5264; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5265; 5266; AVX1-LABEL: test_mm_srli_epi64: 5267; AVX1: # %bb.0: 5268; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd0,0x01] 5269; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5270; 5271; AVX512-LABEL: test_mm_srli_epi64: 5272; AVX512: # %bb.0: 5273; AVX512-NEXT: vpsrlq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x01] 5274; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5275 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 1) 5276 ret <2 x i64> %res 5277} 5278declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone 5279 5280define <2 x i64> @test_mm_srli_si128(<2 x i64> %a0) nounwind { 5281; SSE-LABEL: test_mm_srli_si128: 5282; SSE: # %bb.0: 5283; SSE-NEXT: psrldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xd8,0x05] 5284; SSE-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 5285; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5286; 5287; AVX1-LABEL: test_mm_srli_si128: 5288; AVX1: # %bb.0: 5289; AVX1-NEXT: vpsrldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x05] 5290; AVX1-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 5291; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5292; 5293; AVX512-LABEL: test_mm_srli_si128: 5294; AVX512: # %bb.0: 5295; AVX512-NEXT: vpsrldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x05] 5296; AVX512-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 5297; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5298 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 5299 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> 5300 %bc = bitcast <16 x i8> %res to <2 x i64> 5301 ret <2 x i64> %bc 5302} 5303 5304define void @test_mm_store_pd(double *%a0, <2 x double> %a1) { 5305; X86-SSE-LABEL: test_mm_store_pd: 5306; X86-SSE: # %bb.0: 5307; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5308; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 5309; X86-SSE-NEXT: retl # encoding: [0xc3] 5310; 5311; X86-AVX1-LABEL: test_mm_store_pd: 5312; X86-AVX1: # %bb.0: 5313; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5314; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 5315; X86-AVX1-NEXT: retl # encoding: [0xc3] 5316; 5317; X86-AVX512-LABEL: test_mm_store_pd: 5318; X86-AVX512: # %bb.0: 5319; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5320; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 5321; X86-AVX512-NEXT: retl # encoding: [0xc3] 5322; 5323; X64-SSE-LABEL: test_mm_store_pd: 5324; X64-SSE: # %bb.0: 5325; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 5326; X64-SSE-NEXT: retq # encoding: [0xc3] 5327; 5328; X64-AVX1-LABEL: test_mm_store_pd: 5329; X64-AVX1: # %bb.0: 5330; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 5331; X64-AVX1-NEXT: retq # encoding: [0xc3] 5332; 5333; X64-AVX512-LABEL: test_mm_store_pd: 5334; X64-AVX512: # %bb.0: 5335; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 5336; X64-AVX512-NEXT: retq # encoding: [0xc3] 5337 %arg0 = bitcast double* %a0 to <2 x double>* 5338 store <2 x double> %a1, <2 x double>* %arg0, align 16 5339 ret void 5340} 5341 5342define void @test_mm_store_pd1(double *%a0, <2 x double> %a1) { 5343; X86-SSE-LABEL: test_mm_store_pd1: 5344; X86-SSE: # %bb.0: 5345; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5346; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 5347; X86-SSE-NEXT: # xmm0 = xmm0[0,0] 5348; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 5349; X86-SSE-NEXT: retl # encoding: [0xc3] 5350; 5351; X86-AVX1-LABEL: test_mm_store_pd1: 5352; X86-AVX1: # %bb.0: 5353; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5354; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 5355; X86-AVX1-NEXT: # xmm0 = xmm0[0,0] 5356; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 5357; X86-AVX1-NEXT: retl # encoding: [0xc3] 5358; 5359; X86-AVX512-LABEL: test_mm_store_pd1: 5360; X86-AVX512: # %bb.0: 5361; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5362; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 5363; X86-AVX512-NEXT: # xmm0 = xmm0[0,0] 5364; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 5365; X86-AVX512-NEXT: retl # encoding: [0xc3] 5366; 5367; X64-SSE-LABEL: test_mm_store_pd1: 5368; X64-SSE: # %bb.0: 5369; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 5370; X64-SSE-NEXT: # xmm0 = xmm0[0,0] 5371; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 5372; X64-SSE-NEXT: retq # encoding: [0xc3] 5373; 5374; X64-AVX1-LABEL: test_mm_store_pd1: 5375; X64-AVX1: # %bb.0: 5376; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 5377; X64-AVX1-NEXT: # xmm0 = xmm0[0,0] 5378; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 5379; X64-AVX1-NEXT: retq # encoding: [0xc3] 5380; 5381; X64-AVX512-LABEL: test_mm_store_pd1: 5382; X64-AVX512: # %bb.0: 5383; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 5384; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] 5385; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 5386; X64-AVX512-NEXT: retq # encoding: [0xc3] 5387 %arg0 = bitcast double * %a0 to <2 x double>* 5388 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer 5389 store <2 x double> %shuf, <2 x double>* %arg0, align 16 5390 ret void 5391} 5392 5393define void @test_mm_store_sd(double *%a0, <2 x double> %a1) { 5394; X86-SSE-LABEL: test_mm_store_sd: 5395; X86-SSE: # %bb.0: 5396; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5397; X86-SSE-NEXT: movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00] 5398; X86-SSE-NEXT: retl # encoding: [0xc3] 5399; 5400; X86-AVX1-LABEL: test_mm_store_sd: 5401; X86-AVX1: # %bb.0: 5402; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5403; X86-AVX1-NEXT: vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00] 5404; X86-AVX1-NEXT: retl # encoding: [0xc3] 5405; 5406; X86-AVX512-LABEL: test_mm_store_sd: 5407; X86-AVX512: # %bb.0: 5408; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5409; X86-AVX512-NEXT: vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00] 5410; X86-AVX512-NEXT: retl # encoding: [0xc3] 5411; 5412; X64-SSE-LABEL: test_mm_store_sd: 5413; X64-SSE: # %bb.0: 5414; X64-SSE-NEXT: movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07] 5415; X64-SSE-NEXT: retq # encoding: [0xc3] 5416; 5417; X64-AVX1-LABEL: test_mm_store_sd: 5418; X64-AVX1: # %bb.0: 5419; X64-AVX1-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07] 5420; X64-AVX1-NEXT: retq # encoding: [0xc3] 5421; 5422; X64-AVX512-LABEL: test_mm_store_sd: 5423; X64-AVX512: # %bb.0: 5424; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] 5425; X64-AVX512-NEXT: retq # encoding: [0xc3] 5426 %ext = extractelement <2 x double> %a1, i32 0 5427 store double %ext, double* %a0, align 1 5428 ret void 5429} 5430 5431define void @test_mm_store_si128(<2 x i64> *%a0, <2 x i64> %a1) { 5432; X86-SSE-LABEL: test_mm_store_si128: 5433; X86-SSE: # %bb.0: 5434; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5435; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 5436; X86-SSE-NEXT: retl # encoding: [0xc3] 5437; 5438; X86-AVX1-LABEL: test_mm_store_si128: 5439; X86-AVX1: # %bb.0: 5440; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5441; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 5442; X86-AVX1-NEXT: retl # encoding: [0xc3] 5443; 5444; X86-AVX512-LABEL: test_mm_store_si128: 5445; X86-AVX512: # %bb.0: 5446; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5447; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 5448; X86-AVX512-NEXT: retl # encoding: [0xc3] 5449; 5450; X64-SSE-LABEL: test_mm_store_si128: 5451; X64-SSE: # %bb.0: 5452; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 5453; X64-SSE-NEXT: retq # encoding: [0xc3] 5454; 5455; X64-AVX1-LABEL: test_mm_store_si128: 5456; X64-AVX1: # %bb.0: 5457; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 5458; X64-AVX1-NEXT: retq # encoding: [0xc3] 5459; 5460; X64-AVX512-LABEL: test_mm_store_si128: 5461; X64-AVX512: # %bb.0: 5462; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 5463; X64-AVX512-NEXT: retq # encoding: [0xc3] 5464 store <2 x i64> %a1, <2 x i64>* %a0, align 16 5465 ret void 5466} 5467 5468define void @test_mm_store1_pd(double *%a0, <2 x double> %a1) { 5469; X86-SSE-LABEL: test_mm_store1_pd: 5470; X86-SSE: # %bb.0: 5471; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5472; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 5473; X86-SSE-NEXT: # xmm0 = xmm0[0,0] 5474; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 5475; X86-SSE-NEXT: retl # encoding: [0xc3] 5476; 5477; X86-AVX1-LABEL: test_mm_store1_pd: 5478; X86-AVX1: # %bb.0: 5479; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5480; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 5481; X86-AVX1-NEXT: # xmm0 = xmm0[0,0] 5482; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 5483; X86-AVX1-NEXT: retl # encoding: [0xc3] 5484; 5485; X86-AVX512-LABEL: test_mm_store1_pd: 5486; X86-AVX512: # %bb.0: 5487; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5488; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 5489; X86-AVX512-NEXT: # xmm0 = xmm0[0,0] 5490; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 5491; X86-AVX512-NEXT: retl # encoding: [0xc3] 5492; 5493; X64-SSE-LABEL: test_mm_store1_pd: 5494; X64-SSE: # %bb.0: 5495; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 5496; X64-SSE-NEXT: # xmm0 = xmm0[0,0] 5497; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 5498; X64-SSE-NEXT: retq # encoding: [0xc3] 5499; 5500; X64-AVX1-LABEL: test_mm_store1_pd: 5501; X64-AVX1: # %bb.0: 5502; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 5503; X64-AVX1-NEXT: # xmm0 = xmm0[0,0] 5504; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 5505; X64-AVX1-NEXT: retq # encoding: [0xc3] 5506; 5507; X64-AVX512-LABEL: test_mm_store1_pd: 5508; X64-AVX512: # %bb.0: 5509; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 5510; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] 5511; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 5512; X64-AVX512-NEXT: retq # encoding: [0xc3] 5513 %arg0 = bitcast double * %a0 to <2 x double>* 5514 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer 5515 store <2 x double> %shuf, <2 x double>* %arg0, align 16 5516 ret void 5517} 5518 5519define void @test_mm_storeh_sd(double *%a0, <2 x double> %a1) { 5520; X86-SSE-LABEL: test_mm_storeh_sd: 5521; X86-SSE: # %bb.0: 5522; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5523; X86-SSE-NEXT: movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0] 5524; X86-SSE-NEXT: # xmm0 = xmm0[1,1] 5525; X86-SSE-NEXT: movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00] 5526; X86-SSE-NEXT: retl # encoding: [0xc3] 5527; 5528; X86-AVX1-LABEL: test_mm_storeh_sd: 5529; X86-AVX1: # %bb.0: 5530; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5531; X86-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5532; X86-AVX1-NEXT: # xmm0 = xmm0[1,0] 5533; X86-AVX1-NEXT: vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00] 5534; X86-AVX1-NEXT: retl # encoding: [0xc3] 5535; 5536; X86-AVX512-LABEL: test_mm_storeh_sd: 5537; X86-AVX512: # %bb.0: 5538; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5539; X86-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5540; X86-AVX512-NEXT: # xmm0 = xmm0[1,0] 5541; X86-AVX512-NEXT: vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00] 5542; X86-AVX512-NEXT: retl # encoding: [0xc3] 5543; 5544; X64-SSE-LABEL: test_mm_storeh_sd: 5545; X64-SSE: # %bb.0: 5546; X64-SSE-NEXT: movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0] 5547; X64-SSE-NEXT: # xmm0 = xmm0[1,1] 5548; X64-SSE-NEXT: movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07] 5549; X64-SSE-NEXT: retq # encoding: [0xc3] 5550; 5551; X64-AVX1-LABEL: test_mm_storeh_sd: 5552; X64-AVX1: # %bb.0: 5553; X64-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5554; X64-AVX1-NEXT: # xmm0 = xmm0[1,0] 5555; X64-AVX1-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07] 5556; X64-AVX1-NEXT: retq # encoding: [0xc3] 5557; 5558; X64-AVX512-LABEL: test_mm_storeh_sd: 5559; X64-AVX512: # %bb.0: 5560; X64-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5561; X64-AVX512-NEXT: # xmm0 = xmm0[1,0] 5562; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] 5563; X64-AVX512-NEXT: retq # encoding: [0xc3] 5564 %ext = extractelement <2 x double> %a1, i32 1 5565 store double %ext, double* %a0, align 8 5566 ret void 5567} 5568 5569define void @test_mm_storel_epi64(<2 x i64> *%a0, <2 x i64> %a1) { 5570; X86-SSE-LABEL: test_mm_storel_epi64: 5571; X86-SSE: # %bb.0: 5572; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5573; X86-SSE-NEXT: movlps %xmm0, (%eax) # encoding: [0x0f,0x13,0x00] 5574; X86-SSE-NEXT: retl # encoding: [0xc3] 5575; 5576; X86-AVX1-LABEL: test_mm_storel_epi64: 5577; X86-AVX1: # %bb.0: 5578; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5579; X86-AVX1-NEXT: vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00] 5580; X86-AVX1-NEXT: retl # encoding: [0xc3] 5581; 5582; X86-AVX512-LABEL: test_mm_storel_epi64: 5583; X86-AVX512: # %bb.0: 5584; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5585; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00] 5586; X86-AVX512-NEXT: retl # encoding: [0xc3] 5587; 5588; X64-SSE-LABEL: test_mm_storel_epi64: 5589; X64-SSE: # %bb.0: 5590; X64-SSE-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0] 5591; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 5592; X64-SSE-NEXT: retq # encoding: [0xc3] 5593; 5594; X64-AVX1-LABEL: test_mm_storel_epi64: 5595; X64-AVX1: # %bb.0: 5596; X64-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] 5597; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 5598; X64-AVX1-NEXT: retq # encoding: [0xc3] 5599; 5600; X64-AVX512-LABEL: test_mm_storel_epi64: 5601; X64-AVX512: # %bb.0: 5602; X64-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] 5603; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 5604; X64-AVX512-NEXT: retq # encoding: [0xc3] 5605 %ext = extractelement <2 x i64> %a1, i32 0 5606 %bc = bitcast <2 x i64> *%a0 to i64* 5607 store i64 %ext, i64* %bc, align 8 5608 ret void 5609} 5610 5611define void @test_mm_storel_sd(double *%a0, <2 x double> %a1) { 5612; X86-SSE-LABEL: test_mm_storel_sd: 5613; X86-SSE: # %bb.0: 5614; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5615; X86-SSE-NEXT: movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00] 5616; X86-SSE-NEXT: retl # encoding: [0xc3] 5617; 5618; X86-AVX1-LABEL: test_mm_storel_sd: 5619; X86-AVX1: # %bb.0: 5620; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5621; X86-AVX1-NEXT: vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00] 5622; X86-AVX1-NEXT: retl # encoding: [0xc3] 5623; 5624; X86-AVX512-LABEL: test_mm_storel_sd: 5625; X86-AVX512: # %bb.0: 5626; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5627; X86-AVX512-NEXT: vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00] 5628; X86-AVX512-NEXT: retl # encoding: [0xc3] 5629; 5630; X64-SSE-LABEL: test_mm_storel_sd: 5631; X64-SSE: # %bb.0: 5632; X64-SSE-NEXT: movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07] 5633; X64-SSE-NEXT: retq # encoding: [0xc3] 5634; 5635; X64-AVX1-LABEL: test_mm_storel_sd: 5636; X64-AVX1: # %bb.0: 5637; X64-AVX1-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07] 5638; X64-AVX1-NEXT: retq # encoding: [0xc3] 5639; 5640; X64-AVX512-LABEL: test_mm_storel_sd: 5641; X64-AVX512: # %bb.0: 5642; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] 5643; X64-AVX512-NEXT: retq # encoding: [0xc3] 5644 %ext = extractelement <2 x double> %a1, i32 0 5645 store double %ext, double* %a0, align 8 5646 ret void 5647} 5648 5649define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) { 5650; X86-SSE-LABEL: test_mm_storer_pd: 5651; X86-SSE: # %bb.0: 5652; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5653; X86-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e] 5654; X86-SSE-NEXT: # xmm0 = xmm0[2,3,0,1] 5655; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 5656; X86-SSE-NEXT: retl # encoding: [0xc3] 5657; 5658; X86-AVX1-LABEL: test_mm_storer_pd: 5659; X86-AVX1: # %bb.0: 5660; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5661; X86-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5662; X86-AVX1-NEXT: # xmm0 = xmm0[1,0] 5663; X86-AVX1-NEXT: vmovapd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x29,0x00] 5664; X86-AVX1-NEXT: retl # encoding: [0xc3] 5665; 5666; X86-AVX512-LABEL: test_mm_storer_pd: 5667; X86-AVX512: # %bb.0: 5668; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5669; X86-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5670; X86-AVX512-NEXT: # xmm0 = xmm0[1,0] 5671; X86-AVX512-NEXT: vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00] 5672; X86-AVX512-NEXT: retl # encoding: [0xc3] 5673; 5674; X64-SSE-LABEL: test_mm_storer_pd: 5675; X64-SSE: # %bb.0: 5676; X64-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e] 5677; X64-SSE-NEXT: # xmm0 = xmm0[2,3,0,1] 5678; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 5679; X64-SSE-NEXT: retq # encoding: [0xc3] 5680; 5681; X64-AVX1-LABEL: test_mm_storer_pd: 5682; X64-AVX1: # %bb.0: 5683; X64-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5684; X64-AVX1-NEXT: # xmm0 = xmm0[1,0] 5685; X64-AVX1-NEXT: vmovapd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x29,0x07] 5686; X64-AVX1-NEXT: retq # encoding: [0xc3] 5687; 5688; X64-AVX512-LABEL: test_mm_storer_pd: 5689; X64-AVX512: # %bb.0: 5690; X64-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5691; X64-AVX512-NEXT: # xmm0 = xmm0[1,0] 5692; X64-AVX512-NEXT: vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07] 5693; X64-AVX512-NEXT: retq # encoding: [0xc3] 5694 %arg0 = bitcast double* %a0 to <2 x double>* 5695 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> <i32 1, i32 0> 5696 store <2 x double> %shuf, <2 x double>* %arg0, align 16 5697 ret void 5698} 5699 5700define void @test_mm_storeu_pd(double *%a0, <2 x double> %a1) { 5701; X86-SSE-LABEL: test_mm_storeu_pd: 5702; X86-SSE: # %bb.0: 5703; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5704; X86-SSE-NEXT: movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00] 5705; X86-SSE-NEXT: retl # encoding: [0xc3] 5706; 5707; X86-AVX1-LABEL: test_mm_storeu_pd: 5708; X86-AVX1: # %bb.0: 5709; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5710; X86-AVX1-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00] 5711; X86-AVX1-NEXT: retl # encoding: [0xc3] 5712; 5713; X86-AVX512-LABEL: test_mm_storeu_pd: 5714; X86-AVX512: # %bb.0: 5715; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5716; X86-AVX512-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] 5717; X86-AVX512-NEXT: retl # encoding: [0xc3] 5718; 5719; X64-SSE-LABEL: test_mm_storeu_pd: 5720; X64-SSE: # %bb.0: 5721; X64-SSE-NEXT: movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07] 5722; X64-SSE-NEXT: retq # encoding: [0xc3] 5723; 5724; X64-AVX1-LABEL: test_mm_storeu_pd: 5725; X64-AVX1: # %bb.0: 5726; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07] 5727; X64-AVX1-NEXT: retq # encoding: [0xc3] 5728; 5729; X64-AVX512-LABEL: test_mm_storeu_pd: 5730; X64-AVX512: # %bb.0: 5731; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 5732; X64-AVX512-NEXT: retq # encoding: [0xc3] 5733 %arg0 = bitcast double* %a0 to <2 x double>* 5734 store <2 x double> %a1, <2 x double>* %arg0, align 1 5735 ret void 5736} 5737 5738define void @test_mm_storeu_si128(<2 x i64> *%a0, <2 x i64> %a1) { 5739; X86-SSE-LABEL: test_mm_storeu_si128: 5740; X86-SSE: # %bb.0: 5741; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5742; X86-SSE-NEXT: movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00] 5743; X86-SSE-NEXT: retl # encoding: [0xc3] 5744; 5745; X86-AVX1-LABEL: test_mm_storeu_si128: 5746; X86-AVX1: # %bb.0: 5747; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5748; X86-AVX1-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00] 5749; X86-AVX1-NEXT: retl # encoding: [0xc3] 5750; 5751; X86-AVX512-LABEL: test_mm_storeu_si128: 5752; X86-AVX512: # %bb.0: 5753; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5754; X86-AVX512-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] 5755; X86-AVX512-NEXT: retl # encoding: [0xc3] 5756; 5757; X64-SSE-LABEL: test_mm_storeu_si128: 5758; X64-SSE: # %bb.0: 5759; X64-SSE-NEXT: movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07] 5760; X64-SSE-NEXT: retq # encoding: [0xc3] 5761; 5762; X64-AVX1-LABEL: test_mm_storeu_si128: 5763; X64-AVX1: # %bb.0: 5764; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07] 5765; X64-AVX1-NEXT: retq # encoding: [0xc3] 5766; 5767; X64-AVX512-LABEL: test_mm_storeu_si128: 5768; X64-AVX512: # %bb.0: 5769; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 5770; X64-AVX512-NEXT: retq # encoding: [0xc3] 5771 store <2 x i64> %a1, <2 x i64>* %a0, align 1 5772 ret void 5773} 5774 5775define void @test_mm_storeu_si64(i8* nocapture %A, <2 x i64> %B) { 5776; X86-SSE-LABEL: test_mm_storeu_si64: 5777; X86-SSE: # %bb.0: # %entry 5778; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5779; X86-SSE-NEXT: movlps %xmm0, (%eax) # encoding: [0x0f,0x13,0x00] 5780; X86-SSE-NEXT: retl # encoding: [0xc3] 5781; 5782; X86-AVX1-LABEL: test_mm_storeu_si64: 5783; X86-AVX1: # %bb.0: # %entry 5784; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5785; X86-AVX1-NEXT: vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00] 5786; X86-AVX1-NEXT: retl # encoding: [0xc3] 5787; 5788; X86-AVX512-LABEL: test_mm_storeu_si64: 5789; X86-AVX512: # %bb.0: # %entry 5790; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5791; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00] 5792; X86-AVX512-NEXT: retl # encoding: [0xc3] 5793; 5794; X64-SSE-LABEL: test_mm_storeu_si64: 5795; X64-SSE: # %bb.0: # %entry 5796; X64-SSE-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0] 5797; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 5798; X64-SSE-NEXT: retq # encoding: [0xc3] 5799; 5800; X64-AVX1-LABEL: test_mm_storeu_si64: 5801; X64-AVX1: # %bb.0: # %entry 5802; X64-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] 5803; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 5804; X64-AVX1-NEXT: retq # encoding: [0xc3] 5805; 5806; X64-AVX512-LABEL: test_mm_storeu_si64: 5807; X64-AVX512: # %bb.0: # %entry 5808; X64-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] 5809; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 5810; X64-AVX512-NEXT: retq # encoding: [0xc3] 5811entry: 5812 %vecext.i = extractelement <2 x i64> %B, i32 0 5813 %__v.i = bitcast i8* %A to i64* 5814 store i64 %vecext.i, i64* %__v.i, align 1 5815 ret void 5816} 5817 5818define void @test_mm_storeu_si32(i8* nocapture %A, <2 x i64> %B) { 5819; X86-SSE-LABEL: test_mm_storeu_si32: 5820; X86-SSE: # %bb.0: # %entry 5821; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5822; X86-SSE-NEXT: movd %xmm0, %ecx # encoding: [0x66,0x0f,0x7e,0xc1] 5823; X86-SSE-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08] 5824; X86-SSE-NEXT: retl # encoding: [0xc3] 5825; 5826; X86-AVX1-LABEL: test_mm_storeu_si32: 5827; X86-AVX1: # %bb.0: # %entry 5828; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5829; X86-AVX1-NEXT: vmovd %xmm0, %ecx # encoding: [0xc5,0xf9,0x7e,0xc1] 5830; X86-AVX1-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08] 5831; X86-AVX1-NEXT: retl # encoding: [0xc3] 5832; 5833; X86-AVX512-LABEL: test_mm_storeu_si32: 5834; X86-AVX512: # %bb.0: # %entry 5835; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5836; X86-AVX512-NEXT: vmovd %xmm0, %ecx # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc1] 5837; X86-AVX512-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08] 5838; X86-AVX512-NEXT: retl # encoding: [0xc3] 5839; 5840; X64-SSE-LABEL: test_mm_storeu_si32: 5841; X64-SSE: # %bb.0: # %entry 5842; X64-SSE-NEXT: movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0] 5843; X64-SSE-NEXT: movl %eax, (%rdi) # encoding: [0x89,0x07] 5844; X64-SSE-NEXT: retq # encoding: [0xc3] 5845; 5846; X64-AVX1-LABEL: test_mm_storeu_si32: 5847; X64-AVX1: # %bb.0: # %entry 5848; X64-AVX1-NEXT: vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0] 5849; X64-AVX1-NEXT: movl %eax, (%rdi) # encoding: [0x89,0x07] 5850; X64-AVX1-NEXT: retq # encoding: [0xc3] 5851; 5852; X64-AVX512-LABEL: test_mm_storeu_si32: 5853; X64-AVX512: # %bb.0: # %entry 5854; X64-AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0] 5855; X64-AVX512-NEXT: movl %eax, (%rdi) # encoding: [0x89,0x07] 5856; X64-AVX512-NEXT: retq # encoding: [0xc3] 5857entry: 5858 %0 = bitcast <2 x i64> %B to <4 x i32> 5859 %vecext.i = extractelement <4 x i32> %0, i32 0 5860 %__v.i = bitcast i8* %A to i32* 5861 store i32 %vecext.i, i32* %__v.i, align 1 5862 ret void 5863} 5864 5865define void @test_mm_storeu_si16(i8* nocapture %A, <2 x i64> %B) { 5866; X86-SSE-LABEL: test_mm_storeu_si16: 5867; X86-SSE: # %bb.0: # %entry 5868; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5869; X86-SSE-NEXT: movd %xmm0, %ecx # encoding: [0x66,0x0f,0x7e,0xc1] 5870; X86-SSE-NEXT: movw %cx, (%eax) # encoding: [0x66,0x89,0x08] 5871; X86-SSE-NEXT: retl # encoding: [0xc3] 5872; 5873; X86-AVX1-LABEL: test_mm_storeu_si16: 5874; X86-AVX1: # %bb.0: # %entry 5875; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5876; X86-AVX1-NEXT: vmovd %xmm0, %ecx # encoding: [0xc5,0xf9,0x7e,0xc1] 5877; X86-AVX1-NEXT: movw %cx, (%eax) # encoding: [0x66,0x89,0x08] 5878; X86-AVX1-NEXT: retl # encoding: [0xc3] 5879; 5880; X86-AVX512-LABEL: test_mm_storeu_si16: 5881; X86-AVX512: # %bb.0: # %entry 5882; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5883; X86-AVX512-NEXT: vmovd %xmm0, %ecx # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc1] 5884; X86-AVX512-NEXT: movw %cx, (%eax) # encoding: [0x66,0x89,0x08] 5885; X86-AVX512-NEXT: retl # encoding: [0xc3] 5886; 5887; X64-SSE-LABEL: test_mm_storeu_si16: 5888; X64-SSE: # %bb.0: # %entry 5889; X64-SSE-NEXT: movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0] 5890; X64-SSE-NEXT: movw %ax, (%rdi) # encoding: [0x66,0x89,0x07] 5891; X64-SSE-NEXT: retq # encoding: [0xc3] 5892; 5893; X64-AVX1-LABEL: test_mm_storeu_si16: 5894; X64-AVX1: # %bb.0: # %entry 5895; X64-AVX1-NEXT: vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0] 5896; X64-AVX1-NEXT: movw %ax, (%rdi) # encoding: [0x66,0x89,0x07] 5897; X64-AVX1-NEXT: retq # encoding: [0xc3] 5898; 5899; X64-AVX512-LABEL: test_mm_storeu_si16: 5900; X64-AVX512: # %bb.0: # %entry 5901; X64-AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0] 5902; X64-AVX512-NEXT: movw %ax, (%rdi) # encoding: [0x66,0x89,0x07] 5903; X64-AVX512-NEXT: retq # encoding: [0xc3] 5904entry: 5905 %0 = bitcast <2 x i64> %B to <8 x i16> 5906 %vecext.i = extractelement <8 x i16> %0, i32 0 5907 %__v.i = bitcast i8* %A to i16* 5908 store i16 %vecext.i, i16* %__v.i, align 1 5909 ret void 5910} 5911 5912define void @test_mm_stream_pd(double *%a0, <2 x double> %a1) { 5913; X86-SSE-LABEL: test_mm_stream_pd: 5914; X86-SSE: # %bb.0: 5915; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5916; X86-SSE-NEXT: movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00] 5917; X86-SSE-NEXT: retl # encoding: [0xc3] 5918; 5919; X86-AVX1-LABEL: test_mm_stream_pd: 5920; X86-AVX1: # %bb.0: 5921; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5922; X86-AVX1-NEXT: vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00] 5923; X86-AVX1-NEXT: retl # encoding: [0xc3] 5924; 5925; X86-AVX512-LABEL: test_mm_stream_pd: 5926; X86-AVX512: # %bb.0: 5927; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5928; X86-AVX512-NEXT: vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00] 5929; X86-AVX512-NEXT: retl # encoding: [0xc3] 5930; 5931; X64-SSE-LABEL: test_mm_stream_pd: 5932; X64-SSE: # %bb.0: 5933; X64-SSE-NEXT: movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07] 5934; X64-SSE-NEXT: retq # encoding: [0xc3] 5935; 5936; X64-AVX1-LABEL: test_mm_stream_pd: 5937; X64-AVX1: # %bb.0: 5938; X64-AVX1-NEXT: vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07] 5939; X64-AVX1-NEXT: retq # encoding: [0xc3] 5940; 5941; X64-AVX512-LABEL: test_mm_stream_pd: 5942; X64-AVX512: # %bb.0: 5943; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07] 5944; X64-AVX512-NEXT: retq # encoding: [0xc3] 5945 %arg0 = bitcast double* %a0 to <2 x double>* 5946 store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0 5947 ret void 5948} 5949 5950define void @test_mm_stream_si32(i32 *%a0, i32 %a1) { 5951; X86-LABEL: test_mm_stream_si32: 5952; X86: # %bb.0: 5953; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 5954; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 5955; X86-NEXT: movntil %eax, (%ecx) # encoding: [0x0f,0xc3,0x01] 5956; X86-NEXT: retl # encoding: [0xc3] 5957; 5958; X64-LABEL: test_mm_stream_si32: 5959; X64: # %bb.0: 5960; X64-NEXT: movntil %esi, (%rdi) # encoding: [0x0f,0xc3,0x37] 5961; X64-NEXT: retq # encoding: [0xc3] 5962 store i32 %a1, i32* %a0, align 1, !nontemporal !0 5963 ret void 5964} 5965 5966define void @test_mm_stream_si128(<2 x i64> *%a0, <2 x i64> %a1) { 5967; X86-SSE-LABEL: test_mm_stream_si128: 5968; X86-SSE: # %bb.0: 5969; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5970; X86-SSE-NEXT: movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00] 5971; X86-SSE-NEXT: retl # encoding: [0xc3] 5972; 5973; X86-AVX1-LABEL: test_mm_stream_si128: 5974; X86-AVX1: # %bb.0: 5975; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5976; X86-AVX1-NEXT: vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00] 5977; X86-AVX1-NEXT: retl # encoding: [0xc3] 5978; 5979; X86-AVX512-LABEL: test_mm_stream_si128: 5980; X86-AVX512: # %bb.0: 5981; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5982; X86-AVX512-NEXT: vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00] 5983; X86-AVX512-NEXT: retl # encoding: [0xc3] 5984; 5985; X64-SSE-LABEL: test_mm_stream_si128: 5986; X64-SSE: # %bb.0: 5987; X64-SSE-NEXT: movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07] 5988; X64-SSE-NEXT: retq # encoding: [0xc3] 5989; 5990; X64-AVX1-LABEL: test_mm_stream_si128: 5991; X64-AVX1: # %bb.0: 5992; X64-AVX1-NEXT: vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07] 5993; X64-AVX1-NEXT: retq # encoding: [0xc3] 5994; 5995; X64-AVX512-LABEL: test_mm_stream_si128: 5996; X64-AVX512: # %bb.0: 5997; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07] 5998; X64-AVX512-NEXT: retq # encoding: [0xc3] 5999 store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0 6000 ret void 6001} 6002 6003define <2 x i64> @test_mm_sub_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 6004; SSE-LABEL: test_mm_sub_epi8: 6005; SSE: # %bb.0: 6006; SSE-NEXT: psubb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf8,0xc1] 6007; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6008; 6009; AVX1-LABEL: test_mm_sub_epi8: 6010; AVX1: # %bb.0: 6011; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1] 6012; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6013; 6014; AVX512-LABEL: test_mm_sub_epi8: 6015; AVX512: # %bb.0: 6016; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1] 6017; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6018 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 6019 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 6020 %res = sub <16 x i8> %arg0, %arg1 6021 %bc = bitcast <16 x i8> %res to <2 x i64> 6022 ret <2 x i64> %bc 6023} 6024 6025define <2 x i64> @test_mm_sub_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 6026; SSE-LABEL: test_mm_sub_epi16: 6027; SSE: # %bb.0: 6028; SSE-NEXT: psubw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf9,0xc1] 6029; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6030; 6031; AVX1-LABEL: test_mm_sub_epi16: 6032; AVX1: # %bb.0: 6033; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf9,0xc1] 6034; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6035; 6036; AVX512-LABEL: test_mm_sub_epi16: 6037; AVX512: # %bb.0: 6038; AVX512-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0xc1] 6039; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6040 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 6041 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 6042 %res = sub <8 x i16> %arg0, %arg1 6043 %bc = bitcast <8 x i16> %res to <2 x i64> 6044 ret <2 x i64> %bc 6045} 6046 6047define <2 x i64> @test_mm_sub_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 6048; SSE-LABEL: test_mm_sub_epi32: 6049; SSE: # %bb.0: 6050; SSE-NEXT: psubd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfa,0xc1] 6051; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6052; 6053; AVX1-LABEL: test_mm_sub_epi32: 6054; AVX1: # %bb.0: 6055; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfa,0xc1] 6056; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6057; 6058; AVX512-LABEL: test_mm_sub_epi32: 6059; AVX512: # %bb.0: 6060; AVX512-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0xc1] 6061; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6062 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 6063 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 6064 %res = sub <4 x i32> %arg0, %arg1 6065 %bc = bitcast <4 x i32> %res to <2 x i64> 6066 ret <2 x i64> %bc 6067} 6068 6069define <2 x i64> @test_mm_sub_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind { 6070; SSE-LABEL: test_mm_sub_epi64: 6071; SSE: # %bb.0: 6072; SSE-NEXT: psubq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfb,0xc1] 6073; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6074; 6075; AVX1-LABEL: test_mm_sub_epi64: 6076; AVX1: # %bb.0: 6077; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1] 6078; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6079; 6080; AVX512-LABEL: test_mm_sub_epi64: 6081; AVX512: # %bb.0: 6082; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1] 6083; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6084 %res = sub <2 x i64> %a0, %a1 6085 ret <2 x i64> %res 6086} 6087 6088define <2 x double> @test_mm_sub_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 6089; SSE-LABEL: test_mm_sub_pd: 6090; SSE: # %bb.0: 6091; SSE-NEXT: subpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5c,0xc1] 6092; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6093; 6094; AVX1-LABEL: test_mm_sub_pd: 6095; AVX1: # %bb.0: 6096; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5c,0xc1] 6097; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6098; 6099; AVX512-LABEL: test_mm_sub_pd: 6100; AVX512: # %bb.0: 6101; AVX512-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5c,0xc1] 6102; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6103 %res = fsub <2 x double> %a0, %a1 6104 ret <2 x double> %res 6105} 6106 6107define <2 x double> @test_mm_sub_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 6108; SSE-LABEL: test_mm_sub_sd: 6109; SSE: # %bb.0: 6110; SSE-NEXT: subsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5c,0xc1] 6111; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6112; 6113; AVX1-LABEL: test_mm_sub_sd: 6114; AVX1: # %bb.0: 6115; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5c,0xc1] 6116; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6117; 6118; AVX512-LABEL: test_mm_sub_sd: 6119; AVX512: # %bb.0: 6120; AVX512-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5c,0xc1] 6121; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6122 %ext0 = extractelement <2 x double> %a0, i32 0 6123 %ext1 = extractelement <2 x double> %a1, i32 0 6124 %fsub = fsub double %ext0, %ext1 6125 %res = insertelement <2 x double> %a0, double %fsub, i32 0 6126 ret <2 x double> %res 6127} 6128 6129define <2 x i64> @test_mm_subs_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 6130; SSE-LABEL: test_mm_subs_epi8: 6131; SSE: # %bb.0: 6132; SSE-NEXT: psubsb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe8,0xc1] 6133; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6134; 6135; AVX1-LABEL: test_mm_subs_epi8: 6136; AVX1: # %bb.0: 6137; AVX1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe8,0xc1] 6138; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6139; 6140; AVX512-LABEL: test_mm_subs_epi8: 6141; AVX512: # %bb.0: 6142; AVX512-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1] 6143; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6144 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 6145 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 6146 %res = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %arg0, <16 x i8> %arg1) 6147 %bc = bitcast <16 x i8> %res to <2 x i64> 6148 ret <2 x i64> %bc 6149} 6150declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 6151 6152define <2 x i64> @test_mm_subs_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 6153; SSE-LABEL: test_mm_subs_epi16: 6154; SSE: # %bb.0: 6155; SSE-NEXT: psubsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe9,0xc1] 6156; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6157; 6158; AVX1-LABEL: test_mm_subs_epi16: 6159; AVX1: # %bb.0: 6160; AVX1-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe9,0xc1] 6161; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6162; 6163; AVX512-LABEL: test_mm_subs_epi16: 6164; AVX512: # %bb.0: 6165; AVX512-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1] 6166; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6167 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 6168 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 6169 %res = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %arg0, <8 x i16> %arg1) 6170 %bc = bitcast <8 x i16> %res to <2 x i64> 6171 ret <2 x i64> %bc 6172} 6173declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 6174 6175define <2 x i64> @test_mm_subs_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 6176; SSE-LABEL: test_mm_subs_epu8: 6177; SSE: # %bb.0: 6178; SSE-NEXT: psubusb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd8,0xc1] 6179; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6180; 6181; AVX1-LABEL: test_mm_subs_epu8: 6182; AVX1: # %bb.0: 6183; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd8,0xc1] 6184; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6185; 6186; AVX512-LABEL: test_mm_subs_epu8: 6187; AVX512: # %bb.0: 6188; AVX512-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1] 6189; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6190 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 6191 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 6192 %res = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %arg0, <16 x i8> %arg1) 6193 %bc = bitcast <16 x i8> %res to <2 x i64> 6194 ret <2 x i64> %bc 6195} 6196declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>) 6197 6198define <2 x i64> @test_mm_subs_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 6199; SSE-LABEL: test_mm_subs_epu16: 6200; SSE: # %bb.0: 6201; SSE-NEXT: psubusw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd9,0xc1] 6202; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6203; 6204; AVX1-LABEL: test_mm_subs_epu16: 6205; AVX1: # %bb.0: 6206; AVX1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd9,0xc1] 6207; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6208; 6209; AVX512-LABEL: test_mm_subs_epu16: 6210; AVX512: # %bb.0: 6211; AVX512-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1] 6212; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6213 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 6214 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 6215 %res = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %arg0, <8 x i16> %arg1) 6216 %bc = bitcast <8 x i16> %res to <2 x i64> 6217 ret <2 x i64> %bc 6218} 6219declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) 6220 6221define i32 @test_mm_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 6222; SSE-LABEL: test_mm_ucomieq_sd: 6223; SSE: # %bb.0: 6224; SSE-NEXT: ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1] 6225; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 6226; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 6227; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 6228; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 6229; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6230; 6231; AVX1-LABEL: test_mm_ucomieq_sd: 6232; AVX1: # %bb.0: 6233; AVX1-NEXT: vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1] 6234; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 6235; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 6236; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 6237; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 6238; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6239; 6240; AVX512-LABEL: test_mm_ucomieq_sd: 6241; AVX512: # %bb.0: 6242; AVX512-NEXT: vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] 6243; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 6244; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 6245; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 6246; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 6247; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6248 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) 6249 ret i32 %res 6250} 6251declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 6252 6253define i32 @test_mm_ucomige_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 6254; SSE-LABEL: test_mm_ucomige_sd: 6255; SSE: # %bb.0: 6256; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6257; SSE-NEXT: ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1] 6258; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 6259; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6260; 6261; AVX1-LABEL: test_mm_ucomige_sd: 6262; AVX1: # %bb.0: 6263; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6264; AVX1-NEXT: vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1] 6265; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 6266; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6267; 6268; AVX512-LABEL: test_mm_ucomige_sd: 6269; AVX512: # %bb.0: 6270; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6271; AVX512-NEXT: vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] 6272; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 6273; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6274 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) 6275 ret i32 %res 6276} 6277declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone 6278 6279define i32 @test_mm_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 6280; SSE-LABEL: test_mm_ucomigt_sd: 6281; SSE: # %bb.0: 6282; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6283; SSE-NEXT: ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1] 6284; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 6285; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6286; 6287; AVX1-LABEL: test_mm_ucomigt_sd: 6288; AVX1: # %bb.0: 6289; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6290; AVX1-NEXT: vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1] 6291; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 6292; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6293; 6294; AVX512-LABEL: test_mm_ucomigt_sd: 6295; AVX512: # %bb.0: 6296; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6297; AVX512-NEXT: vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] 6298; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 6299; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6300 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) 6301 ret i32 %res 6302} 6303declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone 6304 6305define i32 @test_mm_ucomile_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 6306; SSE-LABEL: test_mm_ucomile_sd: 6307; SSE: # %bb.0: 6308; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6309; SSE-NEXT: ucomisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2e,0xc8] 6310; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 6311; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6312; 6313; AVX1-LABEL: test_mm_ucomile_sd: 6314; AVX1: # %bb.0: 6315; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6316; AVX1-NEXT: vucomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2e,0xc8] 6317; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 6318; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6319; 6320; AVX512-LABEL: test_mm_ucomile_sd: 6321; AVX512: # %bb.0: 6322; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6323; AVX512-NEXT: vucomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8] 6324; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 6325; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6326 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) 6327 ret i32 %res 6328} 6329declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone 6330 6331define i32 @test_mm_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 6332; SSE-LABEL: test_mm_ucomilt_sd: 6333; SSE: # %bb.0: 6334; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6335; SSE-NEXT: ucomisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2e,0xc8] 6336; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 6337; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6338; 6339; AVX1-LABEL: test_mm_ucomilt_sd: 6340; AVX1: # %bb.0: 6341; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6342; AVX1-NEXT: vucomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2e,0xc8] 6343; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 6344; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6345; 6346; AVX512-LABEL: test_mm_ucomilt_sd: 6347; AVX512: # %bb.0: 6348; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6349; AVX512-NEXT: vucomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8] 6350; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 6351; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6352 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) 6353 ret i32 %res 6354} 6355declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone 6356 6357define i32 @test_mm_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 6358; SSE-LABEL: test_mm_ucomineq_sd: 6359; SSE: # %bb.0: 6360; SSE-NEXT: ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1] 6361; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 6362; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 6363; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 6364; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 6365; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6366; 6367; AVX1-LABEL: test_mm_ucomineq_sd: 6368; AVX1: # %bb.0: 6369; AVX1-NEXT: vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1] 6370; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 6371; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 6372; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 6373; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 6374; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6375; 6376; AVX512-LABEL: test_mm_ucomineq_sd: 6377; AVX512: # %bb.0: 6378; AVX512-NEXT: vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] 6379; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 6380; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 6381; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 6382; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 6383; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6384 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) 6385 ret i32 %res 6386} 6387declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone 6388 6389define <2 x double> @test_mm_undefined_pd() { 6390; CHECK-LABEL: test_mm_undefined_pd: 6391; CHECK: # %bb.0: 6392; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6393 ret <2 x double> undef 6394} 6395 6396define <2 x i64> @test_mm_undefined_si128() { 6397; CHECK-LABEL: test_mm_undefined_si128: 6398; CHECK: # %bb.0: 6399; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6400 ret <2 x i64> undef 6401} 6402 6403define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) { 6404; SSE-LABEL: test_mm_unpackhi_epi8: 6405; SSE: # %bb.0: 6406; SSE-NEXT: punpckhbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x68,0xc1] 6407; SSE-NEXT: # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 6408; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6409; 6410; AVX1-LABEL: test_mm_unpackhi_epi8: 6411; AVX1: # %bb.0: 6412; AVX1-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x68,0xc1] 6413; AVX1-NEXT: # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 6414; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6415; 6416; AVX512-LABEL: test_mm_unpackhi_epi8: 6417; AVX512: # %bb.0: 6418; AVX512-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x68,0xc1] 6419; AVX512-NEXT: # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 6420; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6421 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 6422 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 6423 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 6424 %bc = bitcast <16 x i8> %res to <2 x i64> 6425 ret <2 x i64> %bc 6426} 6427 6428define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) { 6429; SSE-LABEL: test_mm_unpackhi_epi16: 6430; SSE: # %bb.0: 6431; SSE-NEXT: punpckhwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x69,0xc1] 6432; SSE-NEXT: # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 6433; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6434; 6435; AVX1-LABEL: test_mm_unpackhi_epi16: 6436; AVX1: # %bb.0: 6437; AVX1-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x69,0xc1] 6438; AVX1-NEXT: # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 6439; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6440; 6441; AVX512-LABEL: test_mm_unpackhi_epi16: 6442; AVX512: # %bb.0: 6443; AVX512-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x69,0xc1] 6444; AVX512-NEXT: # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 6445; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6446 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 6447 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 6448 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 6449 %bc = bitcast <8 x i16> %res to <2 x i64> 6450 ret <2 x i64> %bc 6451} 6452 6453define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) { 6454; SSE-LABEL: test_mm_unpackhi_epi32: 6455; SSE: # %bb.0: 6456; SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] 6457; SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6458; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6459; 6460; AVX1-LABEL: test_mm_unpackhi_epi32: 6461; AVX1: # %bb.0: 6462; AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] 6463; AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6464; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6465; 6466; AVX512-LABEL: test_mm_unpackhi_epi32: 6467; AVX512: # %bb.0: 6468; AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] 6469; AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6470; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6471 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 6472 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 6473 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 6474 %bc = bitcast <4 x i32> %res to <2 x i64> 6475 ret <2 x i64> %bc 6476} 6477 6478define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) { 6479; SSE-LABEL: test_mm_unpackhi_epi64: 6480; SSE: # %bb.0: 6481; SSE-NEXT: unpckhpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x15,0xc1] 6482; SSE-NEXT: # xmm0 = xmm0[1],xmm1[1] 6483; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6484; 6485; AVX1-LABEL: test_mm_unpackhi_epi64: 6486; AVX1: # %bb.0: 6487; AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1] 6488; AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1] 6489; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6490; 6491; AVX512-LABEL: test_mm_unpackhi_epi64: 6492; AVX512: # %bb.0: 6493; AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] 6494; AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1] 6495; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6496 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3> 6497 ret <2 x i64> %res 6498} 6499 6500define <2 x double> @test_mm_unpackhi_pd(<2 x double> %a0, <2 x double> %a1) { 6501; SSE-LABEL: test_mm_unpackhi_pd: 6502; SSE: # %bb.0: 6503; SSE-NEXT: unpckhpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x15,0xc1] 6504; SSE-NEXT: # xmm0 = xmm0[1],xmm1[1] 6505; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6506; 6507; AVX1-LABEL: test_mm_unpackhi_pd: 6508; AVX1: # %bb.0: 6509; AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1] 6510; AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1] 6511; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6512; 6513; AVX512-LABEL: test_mm_unpackhi_pd: 6514; AVX512: # %bb.0: 6515; AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] 6516; AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1] 6517; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6518 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> 6519 ret <2 x double> %res 6520} 6521 6522define <2 x i64> @test_mm_unpacklo_epi8(<2 x i64> %a0, <2 x i64> %a1) { 6523; SSE-LABEL: test_mm_unpacklo_epi8: 6524; SSE: # %bb.0: 6525; SSE-NEXT: punpcklbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x60,0xc1] 6526; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 6527; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6528; 6529; AVX1-LABEL: test_mm_unpacklo_epi8: 6530; AVX1: # %bb.0: 6531; AVX1-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x60,0xc1] 6532; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 6533; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6534; 6535; AVX512-LABEL: test_mm_unpacklo_epi8: 6536; AVX512: # %bb.0: 6537; AVX512-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x60,0xc1] 6538; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 6539; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6540 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 6541 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 6542 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 6543 %bc = bitcast <16 x i8> %res to <2 x i64> 6544 ret <2 x i64> %bc 6545} 6546 6547define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) { 6548; SSE-LABEL: test_mm_unpacklo_epi16: 6549; SSE: # %bb.0: 6550; SSE-NEXT: punpcklwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x61,0xc1] 6551; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6552; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6553; 6554; AVX1-LABEL: test_mm_unpacklo_epi16: 6555; AVX1: # %bb.0: 6556; AVX1-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x61,0xc1] 6557; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6558; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6559; 6560; AVX512-LABEL: test_mm_unpacklo_epi16: 6561; AVX512: # %bb.0: 6562; AVX512-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x61,0xc1] 6563; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6564; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6565 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 6566 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 6567 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 6568 %bc = bitcast <8 x i16> %res to <2 x i64> 6569 ret <2 x i64> %bc 6570} 6571 6572define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) { 6573; SSE-LABEL: test_mm_unpacklo_epi32: 6574; SSE: # %bb.0: 6575; SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] 6576; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 6577; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6578; 6579; AVX1-LABEL: test_mm_unpacklo_epi32: 6580; AVX1: # %bb.0: 6581; AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x14,0xc1] 6582; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 6583; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6584; 6585; AVX512-LABEL: test_mm_unpacklo_epi32: 6586; AVX512: # %bb.0: 6587; AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xc1] 6588; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 6589; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6590 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 6591 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 6592 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 6593 %bc = bitcast <4 x i32> %res to <2 x i64> 6594 ret <2 x i64> %bc 6595} 6596 6597define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) { 6598; SSE-LABEL: test_mm_unpacklo_epi64: 6599; SSE: # %bb.0: 6600; SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 6601; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 6602; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6603; 6604; AVX1-LABEL: test_mm_unpacklo_epi64: 6605; AVX1: # %bb.0: 6606; AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] 6607; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] 6608; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6609; 6610; AVX512-LABEL: test_mm_unpacklo_epi64: 6611; AVX512: # %bb.0: 6612; AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 6613; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] 6614; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6615 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2> 6616 ret <2 x i64> %res 6617} 6618 6619define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) { 6620; SSE-LABEL: test_mm_unpacklo_pd: 6621; SSE: # %bb.0: 6622; SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 6623; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 6624; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6625; 6626; AVX1-LABEL: test_mm_unpacklo_pd: 6627; AVX1: # %bb.0: 6628; AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] 6629; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] 6630; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6631; 6632; AVX512-LABEL: test_mm_unpacklo_pd: 6633; AVX512: # %bb.0: 6634; AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 6635; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] 6636; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6637 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> 6638 ret <2 x double> %res 6639} 6640 6641define <2 x double> @test_mm_xor_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 6642; SSE-LABEL: test_mm_xor_pd: 6643; SSE: # %bb.0: 6644; SSE-NEXT: xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1] 6645; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6646; 6647; AVX1-LABEL: test_mm_xor_pd: 6648; AVX1: # %bb.0: 6649; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1] 6650; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6651; 6652; AVX512-LABEL: test_mm_xor_pd: 6653; AVX512: # %bb.0: 6654; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1] 6655; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6656 %arg0 = bitcast <2 x double> %a0 to <4 x i32> 6657 %arg1 = bitcast <2 x double> %a1 to <4 x i32> 6658 %res = xor <4 x i32> %arg0, %arg1 6659 %bc = bitcast <4 x i32> %res to <2 x double> 6660 ret <2 x double> %bc 6661} 6662 6663define <2 x i64> @test_mm_xor_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { 6664; SSE-LABEL: test_mm_xor_si128: 6665; SSE: # %bb.0: 6666; SSE-NEXT: xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1] 6667; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6668; 6669; AVX1-LABEL: test_mm_xor_si128: 6670; AVX1: # %bb.0: 6671; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1] 6672; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6673; 6674; AVX512-LABEL: test_mm_xor_si128: 6675; AVX512: # %bb.0: 6676; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1] 6677; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6678 %res = xor <2 x i64> %a0, %a1 6679 ret <2 x i64> %res 6680} 6681 6682!0 = !{i32 1} 6683 6684