1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE
3; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
4; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512
5; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE
6; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
7; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
8
9; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c
10
11define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
12; SSE-LABEL: test_mm_add_epi8:
13; SSE:       # %bb.0:
14; SSE-NEXT:    paddb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfc,0xc1]
15; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
16;
17; AVX1-LABEL: test_mm_add_epi8:
18; AVX1:       # %bb.0:
19; AVX1-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
20; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
21;
22; AVX512-LABEL: test_mm_add_epi8:
23; AVX512:       # %bb.0:
24; AVX512-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1]
25; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
26  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
27  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
28  %res = add <16 x i8> %arg0, %arg1
29  %bc = bitcast <16 x i8> %res to <2 x i64>
30  ret <2 x i64> %bc
31}
32
33define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
34; SSE-LABEL: test_mm_add_epi16:
35; SSE:       # %bb.0:
36; SSE-NEXT:    paddw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfd,0xc1]
37; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
38;
39; AVX1-LABEL: test_mm_add_epi16:
40; AVX1:       # %bb.0:
41; AVX1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
42; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
43;
44; AVX512-LABEL: test_mm_add_epi16:
45; AVX512:       # %bb.0:
46; AVX512-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1]
47; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
48  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
49  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
50  %res = add <8 x i16> %arg0, %arg1
51  %bc = bitcast <8 x i16> %res to <2 x i64>
52  ret <2 x i64> %bc
53}
54
55define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
56; SSE-LABEL: test_mm_add_epi32:
57; SSE:       # %bb.0:
58; SSE-NEXT:    paddd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfe,0xc1]
59; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
60;
61; AVX1-LABEL: test_mm_add_epi32:
62; AVX1:       # %bb.0:
63; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1]
64; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
65;
66; AVX512-LABEL: test_mm_add_epi32:
67; AVX512:       # %bb.0:
68; AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
69; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
70  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
71  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
72  %res = add <4 x i32> %arg0, %arg1
73  %bc = bitcast <4 x i32> %res to <2 x i64>
74  ret <2 x i64> %bc
75}
76
77define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
78; SSE-LABEL: test_mm_add_epi64:
79; SSE:       # %bb.0:
80; SSE-NEXT:    paddq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd4,0xc1]
81; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
82;
83; AVX1-LABEL: test_mm_add_epi64:
84; AVX1:       # %bb.0:
85; AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd4,0xc1]
86; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
87;
88; AVX512-LABEL: test_mm_add_epi64:
89; AVX512:       # %bb.0:
90; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc1]
91; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
92  %res = add <2 x i64> %a0, %a1
93  ret <2 x i64> %res
94}
95
96define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
97; SSE-LABEL: test_mm_add_pd:
98; SSE:       # %bb.0:
99; SSE-NEXT:    addpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x58,0xc1]
100; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
101;
102; AVX1-LABEL: test_mm_add_pd:
103; AVX1:       # %bb.0:
104; AVX1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1]
105; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
106;
107; AVX512-LABEL: test_mm_add_pd:
108; AVX512:       # %bb.0:
109; AVX512-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
110; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
111  %res = fadd <2 x double> %a0, %a1
112  ret <2 x double> %res
113}
114
115define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
116; SSE-LABEL: test_mm_add_sd:
117; SSE:       # %bb.0:
118; SSE-NEXT:    addsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x58,0xc1]
119; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
120;
121; AVX1-LABEL: test_mm_add_sd:
122; AVX1:       # %bb.0:
123; AVX1-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x58,0xc1]
124; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
125;
126; AVX512-LABEL: test_mm_add_sd:
127; AVX512:       # %bb.0:
128; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x58,0xc1]
129; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
130  %ext0 = extractelement <2 x double> %a0, i32 0
131  %ext1 = extractelement <2 x double> %a1, i32 0
132  %fadd = fadd double %ext0, %ext1
133  %res = insertelement <2 x double> %a0, double %fadd, i32 0
134  ret <2 x double> %res
135}
136
137define <2 x i64> @test_mm_adds_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
138; SSE-LABEL: test_mm_adds_epi8:
139; SSE:       # %bb.0:
140; SSE-NEXT:    paddsb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xec,0xc1]
141; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
142;
143; AVX1-LABEL: test_mm_adds_epi8:
144; AVX1:       # %bb.0:
145; AVX1-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xec,0xc1]
146; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
147;
148; AVX512-LABEL: test_mm_adds_epi8:
149; AVX512:       # %bb.0:
150; AVX512-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1]
151; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
152  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
153  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
154  %res = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %arg0, <16 x i8> %arg1)
155  %bc = bitcast <16 x i8> %res to <2 x i64>
156  ret <2 x i64> %bc
157}
158declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
159
160define <2 x i64> @test_mm_adds_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
161; SSE-LABEL: test_mm_adds_epi16:
162; SSE:       # %bb.0:
163; SSE-NEXT:    paddsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xed,0xc1]
164; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
165;
166; AVX1-LABEL: test_mm_adds_epi16:
167; AVX1:       # %bb.0:
168; AVX1-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xed,0xc1]
169; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
170;
171; AVX512-LABEL: test_mm_adds_epi16:
172; AVX512:       # %bb.0:
173; AVX512-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1]
174; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
175  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
176  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
177  %res = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %arg0, <8 x i16> %arg1)
178  %bc = bitcast <8 x i16> %res to <2 x i64>
179  ret <2 x i64> %bc
180}
181declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
182
183define <2 x i64> @test_mm_adds_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
184; SSE-LABEL: test_mm_adds_epu8:
185; SSE:       # %bb.0:
186; SSE-NEXT:    paddusb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdc,0xc1]
187; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
188;
189; AVX1-LABEL: test_mm_adds_epu8:
190; AVX1:       # %bb.0:
191; AVX1-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdc,0xc1]
192; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
193;
194; AVX512-LABEL: test_mm_adds_epu8:
195; AVX512:       # %bb.0:
196; AVX512-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1]
197; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
198  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
199  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
200  %res = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %arg0, <16 x i8> %arg1)
201  %bc = bitcast <16 x i8> %res to <2 x i64>
202  ret <2 x i64> %bc
203}
204declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>)
205
206define <2 x i64> @test_mm_adds_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
207; SSE-LABEL: test_mm_adds_epu16:
208; SSE:       # %bb.0:
209; SSE-NEXT:    paddusw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdd,0xc1]
210; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
211;
212; AVX1-LABEL: test_mm_adds_epu16:
213; AVX1:       # %bb.0:
214; AVX1-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdd,0xc1]
215; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
216;
217; AVX512-LABEL: test_mm_adds_epu16:
218; AVX512:       # %bb.0:
219; AVX512-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
220; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
221  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
222  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
223  %res = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %arg0, <8 x i16> %arg1)
224  %bc = bitcast <8 x i16> %res to <2 x i64>
225  ret <2 x i64> %bc
226}
227declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>)
228
229define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
230; SSE-LABEL: test_mm_and_pd:
231; SSE:       # %bb.0:
232; SSE-NEXT:    andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1]
233; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
234;
235; AVX1-LABEL: test_mm_and_pd:
236; AVX1:       # %bb.0:
237; AVX1-NEXT:    vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1]
238; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
239;
240; AVX512-LABEL: test_mm_and_pd:
241; AVX512:       # %bb.0:
242; AVX512-NEXT:    vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1]
243; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
244  %arg0 = bitcast <2 x double> %a0 to <4 x i32>
245  %arg1 = bitcast <2 x double> %a1 to <4 x i32>
246  %res = and <4 x i32> %arg0, %arg1
247  %bc = bitcast <4 x i32> %res to <2 x double>
248  ret <2 x double> %bc
249}
250
251define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
252; SSE-LABEL: test_mm_and_si128:
253; SSE:       # %bb.0:
254; SSE-NEXT:    andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1]
255; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
256;
257; AVX1-LABEL: test_mm_and_si128:
258; AVX1:       # %bb.0:
259; AVX1-NEXT:    vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1]
260; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
261;
262; AVX512-LABEL: test_mm_and_si128:
263; AVX512:       # %bb.0:
264; AVX512-NEXT:    vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1]
265; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
266  %res = and <2 x i64> %a0, %a1
267  ret <2 x i64> %res
268}
269
270define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
271; SSE-LABEL: test_mm_andnot_pd:
272; SSE:       # %bb.0:
273; SSE-NEXT:    pcmpeqd %xmm2, %xmm2 # encoding: [0x66,0x0f,0x76,0xd2]
274; SSE-NEXT:    pxor %xmm2, %xmm0 # encoding: [0x66,0x0f,0xef,0xc2]
275; SSE-NEXT:    pand %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc1]
276; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
277;
278; AVX1-LABEL: test_mm_andnot_pd:
279; AVX1:       # %bb.0:
280; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
281; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xef,0xc2]
282; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdb,0xc1]
283; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
284;
285; AVX512-LABEL: test_mm_andnot_pd:
286; AVX512:       # %bb.0:
287; AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f]
288; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1]
289; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
290  %arg0 = bitcast <2 x double> %a0 to <4 x i32>
291  %arg1 = bitcast <2 x double> %a1 to <4 x i32>
292  %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1>
293  %res = and <4 x i32> %not, %arg1
294  %bc = bitcast <4 x i32> %res to <2 x double>
295  ret <2 x double> %bc
296}
297
298define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
299; SSE-LABEL: test_mm_andnot_si128:
300; SSE:       # %bb.0:
301; SSE-NEXT:    pcmpeqd %xmm2, %xmm2 # encoding: [0x66,0x0f,0x76,0xd2]
302; SSE-NEXT:    pxor %xmm2, %xmm0 # encoding: [0x66,0x0f,0xef,0xc2]
303; SSE-NEXT:    pand %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc1]
304; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
305;
306; AVX1-LABEL: test_mm_andnot_si128:
307; AVX1:       # %bb.0:
308; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
309; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xef,0xc2]
310; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdb,0xc1]
311; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
312;
313; AVX512-LABEL: test_mm_andnot_si128:
314; AVX512:       # %bb.0:
315; AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f]
316; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1]
317; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
318  %not = xor <2 x i64> %a0, <i64 -1, i64 -1>
319  %res = and <2 x i64> %not, %a1
320  ret <2 x i64> %res
321}
322
323define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
324; SSE-LABEL: test_mm_avg_epu8:
325; SSE:       # %bb.0:
326; SSE-NEXT:    pavgb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe0,0xc1]
327; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
328;
329; AVX1-LABEL: test_mm_avg_epu8:
330; AVX1:       # %bb.0:
331; AVX1-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe0,0xc1]
332; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
333;
334; AVX512-LABEL: test_mm_avg_epu8:
335; AVX512:       # %bb.0:
336; AVX512-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xc1]
337; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
338  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
339  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
340  %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1)
341  %bc = bitcast <16 x i8> %res to <2 x i64>
342  ret <2 x i64> %bc
343}
344declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1) nounwind readnone
345
346define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
347; SSE-LABEL: test_mm_avg_epu16:
348; SSE:       # %bb.0:
349; SSE-NEXT:    pavgw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe3,0xc1]
350; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
351;
352; AVX1-LABEL: test_mm_avg_epu16:
353; AVX1:       # %bb.0:
354; AVX1-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe3,0xc1]
355; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
356;
357; AVX512-LABEL: test_mm_avg_epu16:
358; AVX512:       # %bb.0:
359; AVX512-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xc1]
360; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
361  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
362  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
363  %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %arg0, <8 x i16> %arg1)
364  %bc = bitcast <8 x i16> %res to <2 x i64>
365  ret <2 x i64> %bc
366}
367declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
368
369define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind {
370; SSE-LABEL: test_mm_bslli_si128:
371; SSE:       # %bb.0:
372; SSE-NEXT:    pslldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xf8,0x05]
373; SSE-NEXT:    # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
374; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
375;
376; AVX1-LABEL: test_mm_bslli_si128:
377; AVX1:       # %bb.0:
378; AVX1-NEXT:    vpslldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x05]
379; AVX1-NEXT:    # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
380; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
381;
382; AVX512-LABEL: test_mm_bslli_si128:
383; AVX512:       # %bb.0:
384; AVX512-NEXT:    vpslldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x05]
385; AVX512-NEXT:    # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
386; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
387  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
388  %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
389  %bc = bitcast <16 x i8> %res to <2 x i64>
390  ret <2 x i64> %bc
391}
392
393define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind {
394; SSE-LABEL: test_mm_bsrli_si128:
395; SSE:       # %bb.0:
396; SSE-NEXT:    psrldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xd8,0x05]
397; SSE-NEXT:    # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
398; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
399;
400; AVX1-LABEL: test_mm_bsrli_si128:
401; AVX1:       # %bb.0:
402; AVX1-NEXT:    vpsrldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x05]
403; AVX1-NEXT:    # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
404; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
405;
406; AVX512-LABEL: test_mm_bsrli_si128:
407; AVX512:       # %bb.0:
408; AVX512-NEXT:    vpsrldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x05]
409; AVX512-NEXT:    # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
410; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
411  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
412  %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
413  %bc = bitcast <16 x i8> %res to <2 x i64>
414  ret <2 x i64> %bc
415}
416
417define <4 x float> @test_mm_castpd_ps(<2 x double> %a0) nounwind {
418; CHECK-LABEL: test_mm_castpd_ps:
419; CHECK:       # %bb.0:
420; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
421  %res = bitcast <2 x double> %a0 to <4 x float>
422  ret <4 x float> %res
423}
424
425define <2 x i64> @test_mm_castpd_si128(<2 x double> %a0) nounwind {
426; CHECK-LABEL: test_mm_castpd_si128:
427; CHECK:       # %bb.0:
428; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
429  %res = bitcast <2 x double> %a0 to <2 x i64>
430  ret <2 x i64> %res
431}
432
433define <2 x double> @test_mm_castps_pd(<4 x float> %a0) nounwind {
434; CHECK-LABEL: test_mm_castps_pd:
435; CHECK:       # %bb.0:
436; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
437  %res = bitcast <4 x float> %a0 to <2 x double>
438  ret <2 x double> %res
439}
440
441define <2 x i64> @test_mm_castps_si128(<4 x float> %a0) nounwind {
442; CHECK-LABEL: test_mm_castps_si128:
443; CHECK:       # %bb.0:
444; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
445  %res = bitcast <4 x float> %a0 to <2 x i64>
446  ret <2 x i64> %res
447}
448
449define <2 x double> @test_mm_castsi128_pd(<2 x i64> %a0) nounwind {
450; CHECK-LABEL: test_mm_castsi128_pd:
451; CHECK:       # %bb.0:
452; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
453  %res = bitcast <2 x i64> %a0 to <2 x double>
454  ret <2 x double> %res
455}
456
457define <4 x float> @test_mm_castsi128_ps(<2 x i64> %a0) nounwind {
458; CHECK-LABEL: test_mm_castsi128_ps:
459; CHECK:       # %bb.0:
460; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
461  %res = bitcast <2 x i64> %a0 to <4 x float>
462  ret <4 x float> %res
463}
464
465define void @test_mm_clflush(i8* %a0) nounwind {
466; X86-LABEL: test_mm_clflush:
467; X86:       # %bb.0:
468; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
469; X86-NEXT:    clflush (%eax) # encoding: [0x0f,0xae,0x38]
470; X86-NEXT:    retl # encoding: [0xc3]
471;
472; X64-LABEL: test_mm_clflush:
473; X64:       # %bb.0:
474; X64-NEXT:    clflush (%rdi) # encoding: [0x0f,0xae,0x3f]
475; X64-NEXT:    retq # encoding: [0xc3]
476  call void @llvm.x86.sse2.clflush(i8* %a0)
477  ret void
478}
479declare void @llvm.x86.sse2.clflush(i8*) nounwind readnone
480
481define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
482; SSE-LABEL: test_mm_cmpeq_epi8:
483; SSE:       # %bb.0:
484; SSE-NEXT:    pcmpeqb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x74,0xc1]
485; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
486;
487; AVX1-LABEL: test_mm_cmpeq_epi8:
488; AVX1:       # %bb.0:
489; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x74,0xc1]
490; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
491;
492; AVX512-LABEL: test_mm_cmpeq_epi8:
493; AVX512:       # %bb.0:
494; AVX512-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
495; AVX512-NEXT:    vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
496; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
497  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
498  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
499  %cmp = icmp eq <16 x i8> %arg0, %arg1
500  %res = sext <16 x i1> %cmp to <16 x i8>
501  %bc = bitcast <16 x i8> %res to <2 x i64>
502  ret <2 x i64> %bc
503}
504
505define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
506; SSE-LABEL: test_mm_cmpeq_epi16:
507; SSE:       # %bb.0:
508; SSE-NEXT:    pcmpeqw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x75,0xc1]
509; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
510;
511; AVX1-LABEL: test_mm_cmpeq_epi16:
512; AVX1:       # %bb.0:
513; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x75,0xc1]
514; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
515;
516; AVX512-LABEL: test_mm_cmpeq_epi16:
517; AVX512:       # %bb.0:
518; AVX512-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
519; AVX512-NEXT:    vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
520; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
521  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
522  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
523  %cmp = icmp eq <8 x i16> %arg0, %arg1
524  %res = sext <8 x i1> %cmp to <8 x i16>
525  %bc = bitcast <8 x i16> %res to <2 x i64>
526  ret <2 x i64> %bc
527}
528
529define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
530; SSE-LABEL: test_mm_cmpeq_epi32:
531; SSE:       # %bb.0:
532; SSE-NEXT:    pcmpeqd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x76,0xc1]
533; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
534;
535; AVX1-LABEL: test_mm_cmpeq_epi32:
536; AVX1:       # %bb.0:
537; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x76,0xc1]
538; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
539;
540; AVX512-LABEL: test_mm_cmpeq_epi32:
541; AVX512:       # %bb.0:
542; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
543; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
544; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
545  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
546  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
547  %cmp = icmp eq <4 x i32> %arg0, %arg1
548  %res = sext <4 x i1> %cmp to <4 x i32>
549  %bc = bitcast <4 x i32> %res to <2 x i64>
550  ret <2 x i64> %bc
551}
552
553define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
554; SSE-LABEL: test_mm_cmpeq_pd:
555; SSE:       # %bb.0:
556; SSE-NEXT:    cmpeqpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x00]
557; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
558;
559; AVX1-LABEL: test_mm_cmpeq_pd:
560; AVX1:       # %bb.0:
561; AVX1-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x00]
562; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
563;
564; AVX512-LABEL: test_mm_cmpeq_pd:
565; AVX512:       # %bb.0:
566; AVX512-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x00]
567; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
568; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
569  %fcmp = fcmp oeq <2 x double> %a0, %a1
570  %sext = sext <2 x i1> %fcmp to <2 x i64>
571  %res = bitcast <2 x i64> %sext to <2 x double>
572  ret <2 x double> %res
573}
574
575define <2 x double> @test_mm_cmpeq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
576; SSE-LABEL: test_mm_cmpeq_sd:
577; SSE:       # %bb.0:
578; SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x00]
579; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
580;
581; AVX-LABEL: test_mm_cmpeq_sd:
582; AVX:       # %bb.0:
583; AVX-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x00]
584; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
585  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
586  ret <2 x double> %res
587}
588declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
589
590define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
591; SSE-LABEL: test_mm_cmpge_pd:
592; SSE:       # %bb.0:
593; SSE-NEXT:    cmplepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x02]
594; SSE-NEXT:    movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
595; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
596;
597; AVX1-LABEL: test_mm_cmpge_pd:
598; AVX1:       # %bb.0:
599; AVX1-NEXT:    vcmplepd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x02]
600; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
601;
602; AVX512-LABEL: test_mm_cmpge_pd:
603; AVX512:       # %bb.0:
604; AVX512-NEXT:    vcmplepd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x02]
605; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
606; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
607  %fcmp = fcmp ole <2 x double> %a1, %a0
608  %sext = sext <2 x i1> %fcmp to <2 x i64>
609  %res = bitcast <2 x i64> %sext to <2 x double>
610  ret <2 x double> %res
611}
612
613define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
614; SSE-LABEL: test_mm_cmpge_sd:
615; SSE:       # %bb.0:
616; SSE-NEXT:    cmplesd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x02]
617; SSE-NEXT:    movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
618; SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1]
619; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
620;
621; AVX-LABEL: test_mm_cmpge_sd:
622; AVX:       # %bb.0:
623; AVX-NEXT:    vcmplesd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x02]
624; AVX-NEXT:    vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01]
625; AVX-NEXT:    # xmm0 = xmm1[0],xmm0[1]
626; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
627  %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 2)
628  %ext0 = extractelement <2 x double> %cmp, i32 0
629  %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
630  %ext1 = extractelement <2 x double> %a0, i32 1
631  %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
632  ret <2 x double> %ins1
633}
634
635define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
636; SSE-LABEL: test_mm_cmpgt_epi8:
637; SSE:       # %bb.0:
638; SSE-NEXT:    pcmpgtb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x64,0xc1]
639; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
640;
641; AVX1-LABEL: test_mm_cmpgt_epi8:
642; AVX1:       # %bb.0:
643; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x64,0xc1]
644; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
645;
646; AVX512-LABEL: test_mm_cmpgt_epi8:
647; AVX512:       # %bb.0:
648; AVX512-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1]
649; AVX512-NEXT:    vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
650; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
651  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
652  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
653  %cmp = icmp sgt <16 x i8> %arg0, %arg1
654  %res = sext <16 x i1> %cmp to <16 x i8>
655  %bc = bitcast <16 x i8> %res to <2 x i64>
656  ret <2 x i64> %bc
657}
658
659define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
660; SSE-LABEL: test_mm_cmpgt_epi16:
661; SSE:       # %bb.0:
662; SSE-NEXT:    pcmpgtw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x65,0xc1]
663; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
664;
665; AVX1-LABEL: test_mm_cmpgt_epi16:
666; AVX1:       # %bb.0:
667; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x65,0xc1]
668; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
669;
670; AVX512-LABEL: test_mm_cmpgt_epi16:
671; AVX512:       # %bb.0:
672; AVX512-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1]
673; AVX512-NEXT:    vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
674; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
675  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
676  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
677  %cmp = icmp sgt <8 x i16> %arg0, %arg1
678  %res = sext <8 x i1> %cmp to <8 x i16>
679  %bc = bitcast <8 x i16> %res to <2 x i64>
680  ret <2 x i64> %bc
681}
682
683define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
684; SSE-LABEL: test_mm_cmpgt_epi32:
685; SSE:       # %bb.0:
686; SSE-NEXT:    pcmpgtd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x66,0xc1]
687; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
688;
689; AVX1-LABEL: test_mm_cmpgt_epi32:
690; AVX1:       # %bb.0:
691; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x66,0xc1]
692; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
693;
694; AVX512-LABEL: test_mm_cmpgt_epi32:
695; AVX512:       # %bb.0:
696; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1]
697; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
698; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
699  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
700  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
701  %cmp = icmp sgt <4 x i32> %arg0, %arg1
702  %res = sext <4 x i1> %cmp to <4 x i32>
703  %bc = bitcast <4 x i32> %res to <2 x i64>
704  ret <2 x i64> %bc
705}
706
707define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
708; SSE-LABEL: test_mm_cmpgt_pd:
709; SSE:       # %bb.0:
710; SSE-NEXT:    cmpltpd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x01]
711; SSE-NEXT:    movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
712; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
713;
714; AVX1-LABEL: test_mm_cmpgt_pd:
715; AVX1:       # %bb.0:
716; AVX1-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x01]
717; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
718;
719; AVX512-LABEL: test_mm_cmpgt_pd:
720; AVX512:       # %bb.0:
721; AVX512-NEXT:    vcmpltpd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x01]
722; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
723; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
724  %fcmp = fcmp olt <2 x double> %a1, %a0
725  %sext = sext <2 x i1> %fcmp to <2 x i64>
726  %res = bitcast <2 x i64> %sext to <2 x double>
727  ret <2 x double> %res
728}
729
730define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
731; SSE-LABEL: test_mm_cmpgt_sd:
732; SSE:       # %bb.0:
733; SSE-NEXT:    cmpltsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x01]
734; SSE-NEXT:    movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
735; SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1]
736; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
737;
738; AVX-LABEL: test_mm_cmpgt_sd:
739; AVX:       # %bb.0:
740; AVX-NEXT:    vcmpltsd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x01]
741; AVX-NEXT:    vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01]
742; AVX-NEXT:    # xmm0 = xmm1[0],xmm0[1]
743; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
744  %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 1)
745  %ext0 = extractelement <2 x double> %cmp, i32 0
746  %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
747  %ext1 = extractelement <2 x double> %a0, i32 1
748  %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
749  ret <2 x double> %ins1
750}
751
752define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
753; SSE-LABEL: test_mm_cmple_pd:
754; SSE:       # %bb.0:
755; SSE-NEXT:    cmplepd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x02]
756; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
757;
758; AVX1-LABEL: test_mm_cmple_pd:
759; AVX1:       # %bb.0:
760; AVX1-NEXT:    vcmplepd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x02]
761; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
762;
763; AVX512-LABEL: test_mm_cmple_pd:
764; AVX512:       # %bb.0:
765; AVX512-NEXT:    vcmplepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02]
766; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
767; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
768  %fcmp = fcmp ole <2 x double> %a0, %a1
769  %sext = sext <2 x i1> %fcmp to <2 x i64>
770  %res = bitcast <2 x i64> %sext to <2 x double>
771  ret <2 x double> %res
772}
773
774define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
775; SSE-LABEL: test_mm_cmple_sd:
776; SSE:       # %bb.0:
777; SSE-NEXT:    cmplesd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x02]
778; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
779;
780; AVX-LABEL: test_mm_cmple_sd:
781; AVX:       # %bb.0:
782; AVX-NEXT:    vcmplesd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x02]
783; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
784  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 2)
785  ret <2 x double> %res
786}
787
788define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
789; SSE-LABEL: test_mm_cmplt_epi8:
790; SSE:       # %bb.0:
791; SSE-NEXT:    pcmpgtb %xmm0, %xmm1 # encoding: [0x66,0x0f,0x64,0xc8]
792; SSE-NEXT:    movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1]
793; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
794;
795; AVX1-LABEL: test_mm_cmplt_epi8:
796; AVX1:       # %bb.0:
797; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x64,0xc0]
798; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
799;
800; AVX512-LABEL: test_mm_cmplt_epi8:
801; AVX512:       # %bb.0:
802; AVX512-NEXT:    vpcmpgtb %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x64,0xc0]
803; AVX512-NEXT:    vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
804; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
805  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
806  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
807  %cmp = icmp sgt <16 x i8> %arg1, %arg0
808  %res = sext <16 x i1> %cmp to <16 x i8>
809  %bc = bitcast <16 x i8> %res to <2 x i64>
810  ret <2 x i64> %bc
811}
812
813define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
814; SSE-LABEL: test_mm_cmplt_epi16:
815; SSE:       # %bb.0:
816; SSE-NEXT:    pcmpgtw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x65,0xc8]
817; SSE-NEXT:    movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1]
818; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
819;
820; AVX1-LABEL: test_mm_cmplt_epi16:
821; AVX1:       # %bb.0:
822; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x65,0xc0]
823; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
824;
825; AVX512-LABEL: test_mm_cmplt_epi16:
826; AVX512:       # %bb.0:
827; AVX512-NEXT:    vpcmpgtw %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x65,0xc0]
828; AVX512-NEXT:    vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
829; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
830  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
831  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
832  %cmp = icmp sgt <8 x i16> %arg1, %arg0
833  %res = sext <8 x i1> %cmp to <8 x i16>
834  %bc = bitcast <8 x i16> %res to <2 x i64>
835  ret <2 x i64> %bc
836}
837
838define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
839; SSE-LABEL: test_mm_cmplt_epi32:
840; SSE:       # %bb.0:
841; SSE-NEXT:    pcmpgtd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x66,0xc8]
842; SSE-NEXT:    movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1]
843; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
844;
845; AVX1-LABEL: test_mm_cmplt_epi32:
846; AVX1:       # %bb.0:
847; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x66,0xc0]
848; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
849;
850; AVX512-LABEL: test_mm_cmplt_epi32:
851; AVX512:       # %bb.0:
852; AVX512-NEXT:    vpcmpgtd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x66,0xc0]
853; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
854; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
855  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
856  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
857  %cmp = icmp sgt <4 x i32> %arg1, %arg0
858  %res = sext <4 x i1> %cmp to <4 x i32>
859  %bc = bitcast <4 x i32> %res to <2 x i64>
860  ret <2 x i64> %bc
861}
862
863define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
864; SSE-LABEL: test_mm_cmplt_pd:
865; SSE:       # %bb.0:
866; SSE-NEXT:    cmpltpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x01]
867; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
868;
869; AVX1-LABEL: test_mm_cmplt_pd:
870; AVX1:       # %bb.0:
871; AVX1-NEXT:    vcmpltpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x01]
872; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
873;
874; AVX512-LABEL: test_mm_cmplt_pd:
875; AVX512:       # %bb.0:
876; AVX512-NEXT:    vcmpltpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x01]
877; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
878; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
879  %fcmp = fcmp olt <2 x double> %a0, %a1
880  %sext = sext <2 x i1> %fcmp to <2 x i64>
881  %res = bitcast <2 x i64> %sext to <2 x double>
882  ret <2 x double> %res
883}
884
885define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
886; SSE-LABEL: test_mm_cmplt_sd:
887; SSE:       # %bb.0:
888; SSE-NEXT:    cmpltsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x01]
889; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
890;
891; AVX-LABEL: test_mm_cmplt_sd:
892; AVX:       # %bb.0:
893; AVX-NEXT:    vcmpltsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x01]
894; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
895  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 1)
896  ret <2 x double> %res
897}
898
899define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
900; SSE-LABEL: test_mm_cmpneq_pd:
901; SSE:       # %bb.0:
902; SSE-NEXT:    cmpneqpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x04]
903; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
904;
905; AVX1-LABEL: test_mm_cmpneq_pd:
906; AVX1:       # %bb.0:
907; AVX1-NEXT:    vcmpneqpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x04]
908; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
909;
910; AVX512-LABEL: test_mm_cmpneq_pd:
911; AVX512:       # %bb.0:
912; AVX512-NEXT:    vcmpneqpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x04]
913; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
914; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
915  %fcmp = fcmp une <2 x double> %a0, %a1
916  %sext = sext <2 x i1> %fcmp to <2 x i64>
917  %res = bitcast <2 x i64> %sext to <2 x double>
918  ret <2 x double> %res
919}
920
921define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
922; SSE-LABEL: test_mm_cmpneq_sd:
923; SSE:       # %bb.0:
924; SSE-NEXT:    cmpneqsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x04]
925; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
926;
927; AVX-LABEL: test_mm_cmpneq_sd:
928; AVX:       # %bb.0:
929; AVX-NEXT:    vcmpneqsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x04]
930; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
931  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 4)
932  ret <2 x double> %res
933}
934
935define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
936; SSE-LABEL: test_mm_cmpnge_pd:
937; SSE:       # %bb.0:
938; SSE-NEXT:    cmpnlepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x06]
939; SSE-NEXT:    movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
940; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
941;
942; AVX1-LABEL: test_mm_cmpnge_pd:
943; AVX1:       # %bb.0:
944; AVX1-NEXT:    vcmpnlepd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x06]
945; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
946;
947; AVX512-LABEL: test_mm_cmpnge_pd:
948; AVX512:       # %bb.0:
949; AVX512-NEXT:    vcmpnlepd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x06]
950; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
951; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
952  %fcmp = fcmp ugt <2 x double> %a1, %a0
953  %sext = sext <2 x i1> %fcmp to <2 x i64>
954  %res = bitcast <2 x i64> %sext to <2 x double>
955  ret <2 x double> %res
956}
957
958define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
959; SSE-LABEL: test_mm_cmpnge_sd:
960; SSE:       # %bb.0:
961; SSE-NEXT:    cmpnlesd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x06]
962; SSE-NEXT:    movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
963; SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1]
964; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
965;
966; AVX-LABEL: test_mm_cmpnge_sd:
967; AVX:       # %bb.0:
968; AVX-NEXT:    vcmpnlesd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x06]
969; AVX-NEXT:    vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01]
970; AVX-NEXT:    # xmm0 = xmm1[0],xmm0[1]
971; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
972  %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 6)
973  %ext0 = extractelement <2 x double> %cmp, i32 0
974  %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
975  %ext1 = extractelement <2 x double> %a0, i32 1
976  %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
977  ret <2 x double> %ins1
978}
979
980define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
981; SSE-LABEL: test_mm_cmpngt_pd:
982; SSE:       # %bb.0:
983; SSE-NEXT:    cmpnltpd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x05]
984; SSE-NEXT:    movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
985; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
986;
987; AVX1-LABEL: test_mm_cmpngt_pd:
988; AVX1:       # %bb.0:
989; AVX1-NEXT:    vcmpnltpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x05]
990; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
991;
992; AVX512-LABEL: test_mm_cmpngt_pd:
993; AVX512:       # %bb.0:
994; AVX512-NEXT:    vcmpnltpd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x05]
995; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
996; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
997  %fcmp = fcmp uge <2 x double> %a1, %a0
998  %sext = sext <2 x i1> %fcmp to <2 x i64>
999  %res = bitcast <2 x i64> %sext to <2 x double>
1000  ret <2 x double> %res
1001}
1002
1003define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1004; SSE-LABEL: test_mm_cmpngt_sd:
1005; SSE:       # %bb.0:
1006; SSE-NEXT:    cmpnltsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x05]
1007; SSE-NEXT:    movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
1008; SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1]
1009; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1010;
1011; AVX-LABEL: test_mm_cmpngt_sd:
1012; AVX:       # %bb.0:
1013; AVX-NEXT:    vcmpnltsd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x05]
1014; AVX-NEXT:    vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01]
1015; AVX-NEXT:    # xmm0 = xmm1[0],xmm0[1]
1016; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1017  %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 5)
1018  %ext0 = extractelement <2 x double> %cmp, i32 0
1019  %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
1020  %ext1 = extractelement <2 x double> %a0, i32 1
1021  %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
1022  ret <2 x double> %ins1
1023}
1024
1025define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1026; SSE-LABEL: test_mm_cmpnle_pd:
1027; SSE:       # %bb.0:
1028; SSE-NEXT:    cmpnlepd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x06]
1029; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1030;
1031; AVX1-LABEL: test_mm_cmpnle_pd:
1032; AVX1:       # %bb.0:
1033; AVX1-NEXT:    vcmpnlepd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x06]
1034; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1035;
1036; AVX512-LABEL: test_mm_cmpnle_pd:
1037; AVX512:       # %bb.0:
1038; AVX512-NEXT:    vcmpnlepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x06]
1039; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
1040; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1041  %fcmp = fcmp ugt <2 x double> %a0, %a1
1042  %sext = sext <2 x i1> %fcmp to <2 x i64>
1043  %res = bitcast <2 x i64> %sext to <2 x double>
1044  ret <2 x double> %res
1045}
1046
1047define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1048; SSE-LABEL: test_mm_cmpnle_sd:
1049; SSE:       # %bb.0:
1050; SSE-NEXT:    cmpnlesd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x06]
1051; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1052;
1053; AVX-LABEL: test_mm_cmpnle_sd:
1054; AVX:       # %bb.0:
1055; AVX-NEXT:    vcmpnlesd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x06]
1056; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1057  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 6)
1058  ret <2 x double> %res
1059}
1060
1061define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1062; SSE-LABEL: test_mm_cmpnlt_pd:
1063; SSE:       # %bb.0:
1064; SSE-NEXT:    cmpnltpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x05]
1065; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1066;
1067; AVX1-LABEL: test_mm_cmpnlt_pd:
1068; AVX1:       # %bb.0:
1069; AVX1-NEXT:    vcmpnltpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x05]
1070; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1071;
1072; AVX512-LABEL: test_mm_cmpnlt_pd:
1073; AVX512:       # %bb.0:
1074; AVX512-NEXT:    vcmpnltpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x05]
1075; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
1076; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1077  %fcmp = fcmp uge <2 x double> %a0, %a1
1078  %sext = sext <2 x i1> %fcmp to <2 x i64>
1079  %res = bitcast <2 x i64> %sext to <2 x double>
1080  ret <2 x double> %res
1081}
1082
1083define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1084; SSE-LABEL: test_mm_cmpnlt_sd:
1085; SSE:       # %bb.0:
1086; SSE-NEXT:    cmpnltsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x05]
1087; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1088;
1089; AVX-LABEL: test_mm_cmpnlt_sd:
1090; AVX:       # %bb.0:
1091; AVX-NEXT:    vcmpnltsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x05]
1092; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1093  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 5)
1094  ret <2 x double> %res
1095}
1096
1097define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1098; SSE-LABEL: test_mm_cmpord_pd:
1099; SSE:       # %bb.0:
1100; SSE-NEXT:    cmpordpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x07]
1101; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1102;
1103; AVX1-LABEL: test_mm_cmpord_pd:
1104; AVX1:       # %bb.0:
1105; AVX1-NEXT:    vcmpordpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x07]
1106; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1107;
1108; AVX512-LABEL: test_mm_cmpord_pd:
1109; AVX512:       # %bb.0:
1110; AVX512-NEXT:    vcmpordpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x07]
1111; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
1112; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1113  %fcmp = fcmp ord <2 x double> %a0, %a1
1114  %sext = sext <2 x i1> %fcmp to <2 x i64>
1115  %res = bitcast <2 x i64> %sext to <2 x double>
1116  ret <2 x double> %res
1117}
1118
1119define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1120; SSE-LABEL: test_mm_cmpord_sd:
1121; SSE:       # %bb.0:
1122; SSE-NEXT:    cmpordsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x07]
1123; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1124;
1125; AVX-LABEL: test_mm_cmpord_sd:
1126; AVX:       # %bb.0:
1127; AVX-NEXT:    vcmpordsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x07]
1128; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1129  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7)
1130  ret <2 x double> %res
1131}
1132
1133define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1134; SSE-LABEL: test_mm_cmpunord_pd:
1135; SSE:       # %bb.0:
1136; SSE-NEXT:    cmpunordpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x03]
1137; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1138;
1139; AVX1-LABEL: test_mm_cmpunord_pd:
1140; AVX1:       # %bb.0:
1141; AVX1-NEXT:    vcmpunordpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x03]
1142; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1143;
1144; AVX512-LABEL: test_mm_cmpunord_pd:
1145; AVX512:       # %bb.0:
1146; AVX512-NEXT:    vcmpunordpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x03]
1147; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
1148; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1149  %fcmp = fcmp uno <2 x double> %a0, %a1
1150  %sext = sext <2 x i1> %fcmp to <2 x i64>
1151  %res = bitcast <2 x i64> %sext to <2 x double>
1152  ret <2 x double> %res
1153}
1154
1155define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1156; SSE-LABEL: test_mm_cmpunord_sd:
1157; SSE:       # %bb.0:
1158; SSE-NEXT:    cmpunordsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x03]
1159; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1160;
1161; AVX-LABEL: test_mm_cmpunord_sd:
1162; AVX:       # %bb.0:
1163; AVX-NEXT:    vcmpunordsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x03]
1164; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1165  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 3)
1166  ret <2 x double> %res
1167}
1168
1169define i32 @test_mm_comieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1170; SSE-LABEL: test_mm_comieq_sd:
1171; SSE:       # %bb.0:
1172; SSE-NEXT:    comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1]
1173; SSE-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0]
1174; SSE-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1]
1175; SSE-NEXT:    andb %al, %cl # encoding: [0x20,0xc1]
1176; SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
1177; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1178;
1179; AVX1-LABEL: test_mm_comieq_sd:
1180; AVX1:       # %bb.0:
1181; AVX1-NEXT:    vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1]
1182; AVX1-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0]
1183; AVX1-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1]
1184; AVX1-NEXT:    andb %al, %cl # encoding: [0x20,0xc1]
1185; AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
1186; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1187;
1188; AVX512-LABEL: test_mm_comieq_sd:
1189; AVX512:       # %bb.0:
1190; AVX512-NEXT:    vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
1191; AVX512-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0]
1192; AVX512-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1]
1193; AVX512-NEXT:    andb %al, %cl # encoding: [0x20,0xc1]
1194; AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
1195; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1196  %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
1197  ret i32 %res
1198}
1199declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
1200
1201define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1202; SSE-LABEL: test_mm_comige_sd:
1203; SSE:       # %bb.0:
1204; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1205; SSE-NEXT:    comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1]
1206; SSE-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
1207; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1208;
1209; AVX1-LABEL: test_mm_comige_sd:
1210; AVX1:       # %bb.0:
1211; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1212; AVX1-NEXT:    vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1]
1213; AVX1-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
1214; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1215;
1216; AVX512-LABEL: test_mm_comige_sd:
1217; AVX512:       # %bb.0:
1218; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1219; AVX512-NEXT:    vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
1220; AVX512-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
1221; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1222  %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1)
1223  ret i32 %res
1224}
1225declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
1226
1227define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1228; SSE-LABEL: test_mm_comigt_sd:
1229; SSE:       # %bb.0:
1230; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1231; SSE-NEXT:    comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1]
1232; SSE-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
1233; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1234;
1235; AVX1-LABEL: test_mm_comigt_sd:
1236; AVX1:       # %bb.0:
1237; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1238; AVX1-NEXT:    vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1]
1239; AVX1-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
1240; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1241;
1242; AVX512-LABEL: test_mm_comigt_sd:
1243; AVX512:       # %bb.0:
1244; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1245; AVX512-NEXT:    vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
1246; AVX512-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
1247; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1248  %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1)
1249  ret i32 %res
1250}
1251declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
1252
1253define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1254; SSE-LABEL: test_mm_comile_sd:
1255; SSE:       # %bb.0:
1256; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1257; SSE-NEXT:    comisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2f,0xc8]
1258; SSE-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
1259; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1260;
1261; AVX1-LABEL: test_mm_comile_sd:
1262; AVX1:       # %bb.0:
1263; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1264; AVX1-NEXT:    vcomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2f,0xc8]
1265; AVX1-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
1266; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1267;
1268; AVX512-LABEL: test_mm_comile_sd:
1269; AVX512:       # %bb.0:
1270; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1271; AVX512-NEXT:    vcomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8]
1272; AVX512-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
1273; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1274  %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1)
1275  ret i32 %res
1276}
1277declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
1278
1279define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1280; SSE-LABEL: test_mm_comilt_sd:
1281; SSE:       # %bb.0:
1282; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1283; SSE-NEXT:    comisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2f,0xc8]
1284; SSE-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
1285; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1286;
1287; AVX1-LABEL: test_mm_comilt_sd:
1288; AVX1:       # %bb.0:
1289; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1290; AVX1-NEXT:    vcomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2f,0xc8]
1291; AVX1-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
1292; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1293;
1294; AVX512-LABEL: test_mm_comilt_sd:
1295; AVX512:       # %bb.0:
1296; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1297; AVX512-NEXT:    vcomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8]
1298; AVX512-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
1299; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1300  %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1)
1301  ret i32 %res
1302}
1303declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
1304
1305define i32 @test_mm_comineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1306; SSE-LABEL: test_mm_comineq_sd:
1307; SSE:       # %bb.0:
1308; SSE-NEXT:    comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1]
1309; SSE-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0]
1310; SSE-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1]
1311; SSE-NEXT:    orb %al, %cl # encoding: [0x08,0xc1]
1312; SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
1313; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1314;
1315; AVX1-LABEL: test_mm_comineq_sd:
1316; AVX1:       # %bb.0:
1317; AVX1-NEXT:    vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1]
1318; AVX1-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0]
1319; AVX1-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1]
1320; AVX1-NEXT:    orb %al, %cl # encoding: [0x08,0xc1]
1321; AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
1322; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1323;
1324; AVX512-LABEL: test_mm_comineq_sd:
1325; AVX512:       # %bb.0:
1326; AVX512-NEXT:    vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
1327; AVX512-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0]
1328; AVX512-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1]
1329; AVX512-NEXT:    orb %al, %cl # encoding: [0x08,0xc1]
1330; AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
1331; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1332  %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1)
1333  ret i32 %res
1334}
1335declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
1336
1337define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind {
1338; SSE-LABEL: test_mm_cvtepi32_pd:
1339; SSE:       # %bb.0:
1340; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0 # encoding: [0xf3,0x0f,0xe6,0xc0]
1341; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1342;
1343; AVX1-LABEL: test_mm_cvtepi32_pd:
1344; AVX1:       # %bb.0:
1345; AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xe6,0xc0]
1346; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1347;
1348; AVX512-LABEL: test_mm_cvtepi32_pd:
1349; AVX512:       # %bb.0:
1350; AVX512-NEXT:    vcvtdq2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0]
1351; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1352  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1353  %ext = shufflevector <4 x i32> %arg0, <4 x i32> %arg0, <2 x i32> <i32 0, i32 1>
1354  %res = sitofp <2 x i32> %ext to <2 x double>
1355  ret <2 x double> %res
1356}
1357
1358define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind {
1359; SSE-LABEL: test_mm_cvtepi32_ps:
1360; SSE:       # %bb.0:
1361; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0 # encoding: [0x0f,0x5b,0xc0]
1362; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1363;
1364; AVX1-LABEL: test_mm_cvtepi32_ps:
1365; AVX1:       # %bb.0:
1366; AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5b,0xc0]
1367; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1368;
1369; AVX512-LABEL: test_mm_cvtepi32_ps:
1370; AVX512:       # %bb.0:
1371; AVX512-NEXT:    vcvtdq2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0]
1372; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1373  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1374  %res = sitofp <4 x i32> %arg0 to <4 x float>
1375  ret <4 x float> %res
1376}
1377
1378define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind {
1379; SSE-LABEL: test_mm_cvtpd_epi32:
1380; SSE:       # %bb.0:
1381; SSE-NEXT:    cvtpd2dq %xmm0, %xmm0 # encoding: [0xf2,0x0f,0xe6,0xc0]
1382; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1383;
1384; AVX1-LABEL: test_mm_cvtpd_epi32:
1385; AVX1:       # %bb.0:
1386; AVX1-NEXT:    vcvtpd2dq %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xe6,0xc0]
1387; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1388;
1389; AVX512-LABEL: test_mm_cvtpd_epi32:
1390; AVX512:       # %bb.0:
1391; AVX512-NEXT:    vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
1392; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1393  %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
1394  %bc = bitcast <4 x i32> %res to <2 x i64>
1395  ret <2 x i64> %bc
1396}
1397declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
1398
1399define <4 x float> @test_mm_cvtpd_ps(<2 x double> %a0) nounwind {
1400; SSE-LABEL: test_mm_cvtpd_ps:
1401; SSE:       # %bb.0:
1402; SSE-NEXT:    cvtpd2ps %xmm0, %xmm0 # encoding: [0x66,0x0f,0x5a,0xc0]
1403; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1404;
1405; AVX1-LABEL: test_mm_cvtpd_ps:
1406; AVX1:       # %bb.0:
1407; AVX1-NEXT:    vcvtpd2ps %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5a,0xc0]
1408; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1409;
1410; AVX512-LABEL: test_mm_cvtpd_ps:
1411; AVX512:       # %bb.0:
1412; AVX512-NEXT:    vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
1413; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1414  %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
1415  ret <4 x float> %res
1416}
1417declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
1418
1419define <2 x i64> @test_mm_cvtps_epi32(<4 x float> %a0) nounwind {
1420; SSE-LABEL: test_mm_cvtps_epi32:
1421; SSE:       # %bb.0:
1422; SSE-NEXT:    cvtps2dq %xmm0, %xmm0 # encoding: [0x66,0x0f,0x5b,0xc0]
1423; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1424;
1425; AVX1-LABEL: test_mm_cvtps_epi32:
1426; AVX1:       # %bb.0:
1427; AVX1-NEXT:    vcvtps2dq %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5b,0xc0]
1428; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1429;
1430; AVX512-LABEL: test_mm_cvtps_epi32:
1431; AVX512:       # %bb.0:
1432; AVX512-NEXT:    vcvtps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5b,0xc0]
1433; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1434  %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
1435  %bc = bitcast <4 x i32> %res to <2 x i64>
1436  ret <2 x i64> %bc
1437}
1438declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
1439
1440define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind {
1441; SSE-LABEL: test_mm_cvtps_pd:
1442; SSE:       # %bb.0:
1443; SSE-NEXT:    cvtps2pd %xmm0, %xmm0 # encoding: [0x0f,0x5a,0xc0]
1444; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1445;
1446; AVX1-LABEL: test_mm_cvtps_pd:
1447; AVX1:       # %bb.0:
1448; AVX1-NEXT:    vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0]
1449; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1450;
1451; AVX512-LABEL: test_mm_cvtps_pd:
1452; AVX512:       # %bb.0:
1453; AVX512-NEXT:    vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0]
1454; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1455  %ext = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1>
1456  %res = fpext <2 x float> %ext to <2 x double>
1457  ret <2 x double> %res
1458}
1459
1460define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind {
1461; X86-SSE-LABEL: test_mm_cvtsd_f64:
1462; X86-SSE:       # %bb.0:
1463; X86-SSE-NEXT:    pushl %ebp # encoding: [0x55]
1464; X86-SSE-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
1465; X86-SSE-NEXT:    andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
1466; X86-SSE-NEXT:    subl $8, %esp # encoding: [0x83,0xec,0x08]
1467; X86-SSE-NEXT:    movlps %xmm0, (%esp) # encoding: [0x0f,0x13,0x04,0x24]
1468; X86-SSE-NEXT:    fldl (%esp) # encoding: [0xdd,0x04,0x24]
1469; X86-SSE-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
1470; X86-SSE-NEXT:    popl %ebp # encoding: [0x5d]
1471; X86-SSE-NEXT:    retl # encoding: [0xc3]
1472;
1473; X86-AVX1-LABEL: test_mm_cvtsd_f64:
1474; X86-AVX1:       # %bb.0:
1475; X86-AVX1-NEXT:    pushl %ebp # encoding: [0x55]
1476; X86-AVX1-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
1477; X86-AVX1-NEXT:    andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
1478; X86-AVX1-NEXT:    subl $8, %esp # encoding: [0x83,0xec,0x08]
1479; X86-AVX1-NEXT:    vmovlps %xmm0, (%esp) # encoding: [0xc5,0xf8,0x13,0x04,0x24]
1480; X86-AVX1-NEXT:    fldl (%esp) # encoding: [0xdd,0x04,0x24]
1481; X86-AVX1-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
1482; X86-AVX1-NEXT:    popl %ebp # encoding: [0x5d]
1483; X86-AVX1-NEXT:    retl # encoding: [0xc3]
1484;
1485; X86-AVX512-LABEL: test_mm_cvtsd_f64:
1486; X86-AVX512:       # %bb.0:
1487; X86-AVX512-NEXT:    pushl %ebp # encoding: [0x55]
1488; X86-AVX512-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
1489; X86-AVX512-NEXT:    andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
1490; X86-AVX512-NEXT:    subl $8, %esp # encoding: [0x83,0xec,0x08]
1491; X86-AVX512-NEXT:    vmovlps %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x04,0x24]
1492; X86-AVX512-NEXT:    fldl (%esp) # encoding: [0xdd,0x04,0x24]
1493; X86-AVX512-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
1494; X86-AVX512-NEXT:    popl %ebp # encoding: [0x5d]
1495; X86-AVX512-NEXT:    retl # encoding: [0xc3]
1496;
1497; X64-LABEL: test_mm_cvtsd_f64:
1498; X64:       # %bb.0:
1499; X64-NEXT:    retq # encoding: [0xc3]
1500  %res = extractelement <2 x double> %a0, i32 0
1501  ret double %res
1502}
1503
1504define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind {
1505; SSE-LABEL: test_mm_cvtsd_si32:
1506; SSE:       # %bb.0:
1507; SSE-NEXT:    cvtsd2si %xmm0, %eax # encoding: [0xf2,0x0f,0x2d,0xc0]
1508; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1509;
1510; AVX1-LABEL: test_mm_cvtsd_si32:
1511; AVX1:       # %bb.0:
1512; AVX1-NEXT:    vcvtsd2si %xmm0, %eax # encoding: [0xc5,0xfb,0x2d,0xc0]
1513; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1514;
1515; AVX512-LABEL: test_mm_cvtsd_si32:
1516; AVX512:       # %bb.0:
1517; AVX512-NEXT:    vcvtsd2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2d,0xc0]
1518; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1519  %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
1520  ret i32 %res
1521}
1522declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
1523
1524define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) {
1525; SSE-LABEL: test_mm_cvtsd_ss:
1526; SSE:       # %bb.0:
1527; SSE-NEXT:    cvtsd2ss %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5a,0xc1]
1528; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1529;
1530; AVX1-LABEL: test_mm_cvtsd_ss:
1531; AVX1:       # %bb.0:
1532; AVX1-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0xc1]
1533; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1534;
1535; AVX512-LABEL: test_mm_cvtsd_ss:
1536; AVX512:       # %bb.0:
1537; AVX512-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0xc1]
1538; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1539  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
1540  ret <4 x float> %res
1541}
1542declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
1543
1544define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) {
1545; X86-SSE-LABEL: test_mm_cvtsd_ss_load:
1546; X86-SSE:       # %bb.0:
1547; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1548; X86-SSE-NEXT:    cvtsd2ss (%eax), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x00]
1549; X86-SSE-NEXT:    retl # encoding: [0xc3]
1550;
1551; X86-AVX1-LABEL: test_mm_cvtsd_ss_load:
1552; X86-AVX1:       # %bb.0:
1553; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1554; X86-AVX1-NEXT:    vcvtsd2ss (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x00]
1555; X86-AVX1-NEXT:    retl # encoding: [0xc3]
1556;
1557; X86-AVX512-LABEL: test_mm_cvtsd_ss_load:
1558; X86-AVX512:       # %bb.0:
1559; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1560; X86-AVX512-NEXT:    vcvtsd2ss (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x00]
1561; X86-AVX512-NEXT:    retl # encoding: [0xc3]
1562;
1563; X64-SSE-LABEL: test_mm_cvtsd_ss_load:
1564; X64-SSE:       # %bb.0:
1565; X64-SSE-NEXT:    cvtsd2ss (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x07]
1566; X64-SSE-NEXT:    retq # encoding: [0xc3]
1567;
1568; X64-AVX1-LABEL: test_mm_cvtsd_ss_load:
1569; X64-AVX1:       # %bb.0:
1570; X64-AVX1-NEXT:    vcvtsd2ss (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x07]
1571; X64-AVX1-NEXT:    retq # encoding: [0xc3]
1572;
1573; X64-AVX512-LABEL: test_mm_cvtsd_ss_load:
1574; X64-AVX512:       # %bb.0:
1575; X64-AVX512-NEXT:    vcvtsd2ss (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x07]
1576; X64-AVX512-NEXT:    retq # encoding: [0xc3]
1577  %a1 = load <2 x double>, <2 x double>* %p1
1578  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
1579  ret <4 x float> %res
1580}
1581
1582define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind {
1583; SSE-LABEL: test_mm_cvtsi128_si32:
1584; SSE:       # %bb.0:
1585; SSE-NEXT:    movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0]
1586; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1587;
1588; AVX1-LABEL: test_mm_cvtsi128_si32:
1589; AVX1:       # %bb.0:
1590; AVX1-NEXT:    vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0]
1591; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1592;
1593; AVX512-LABEL: test_mm_cvtsi128_si32:
1594; AVX512:       # %bb.0:
1595; AVX512-NEXT:    vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0]
1596; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1597  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1598  %res = extractelement <4 x i32> %arg0, i32 0
1599  ret i32 %res
1600}
1601
1602define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {
1603; X86-SSE-LABEL: test_mm_cvtsi32_sd:
1604; X86-SSE:       # %bb.0:
1605; X86-SSE-NEXT:    cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x2a,0x44,0x24,0x04]
1606; X86-SSE-NEXT:    retl # encoding: [0xc3]
1607;
1608; X86-AVX1-LABEL: test_mm_cvtsi32_sd:
1609; X86-AVX1:       # %bb.0:
1610; X86-AVX1-NEXT:    vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04]
1611; X86-AVX1-NEXT:    retl # encoding: [0xc3]
1612;
1613; X86-AVX512-LABEL: test_mm_cvtsi32_sd:
1614; X86-AVX512:       # %bb.0:
1615; X86-AVX512-NEXT:    vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04]
1616; X86-AVX512-NEXT:    retl # encoding: [0xc3]
1617;
1618; X64-SSE-LABEL: test_mm_cvtsi32_sd:
1619; X64-SSE:       # %bb.0:
1620; X64-SSE-NEXT:    cvtsi2sd %edi, %xmm0 # encoding: [0xf2,0x0f,0x2a,0xc7]
1621; X64-SSE-NEXT:    retq # encoding: [0xc3]
1622;
1623; X64-AVX1-LABEL: test_mm_cvtsi32_sd:
1624; X64-AVX1:       # %bb.0:
1625; X64-AVX1-NEXT:    vcvtsi2sd %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0xc7]
1626; X64-AVX1-NEXT:    retq # encoding: [0xc3]
1627;
1628; X64-AVX512-LABEL: test_mm_cvtsi32_sd:
1629; X64-AVX512:       # %bb.0:
1630; X64-AVX512-NEXT:    vcvtsi2sd %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7]
1631; X64-AVX512-NEXT:    retq # encoding: [0xc3]
1632  %cvt = sitofp i32 %a1 to double
1633  %res = insertelement <2 x double> %a0, double %cvt, i32 0
1634  ret <2 x double> %res
1635}
1636
1637define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind {
1638; X86-SSE-LABEL: test_mm_cvtsi32_si128:
1639; X86-SSE:       # %bb.0:
1640; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
1641; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
1642; X86-SSE-NEXT:    retl # encoding: [0xc3]
1643;
1644; X86-AVX1-LABEL: test_mm_cvtsi32_si128:
1645; X86-AVX1:       # %bb.0:
1646; X86-AVX1-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
1647; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
1648; X86-AVX1-NEXT:    retl # encoding: [0xc3]
1649;
1650; X86-AVX512-LABEL: test_mm_cvtsi32_si128:
1651; X86-AVX512:       # %bb.0:
1652; X86-AVX512-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
1653; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
1654; X86-AVX512-NEXT:    retl # encoding: [0xc3]
1655;
1656; X64-SSE-LABEL: test_mm_cvtsi32_si128:
1657; X64-SSE:       # %bb.0:
1658; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
1659; X64-SSE-NEXT:    retq # encoding: [0xc3]
1660;
1661; X64-AVX1-LABEL: test_mm_cvtsi32_si128:
1662; X64-AVX1:       # %bb.0:
1663; X64-AVX1-NEXT:    vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
1664; X64-AVX1-NEXT:    retq # encoding: [0xc3]
1665;
1666; X64-AVX512-LABEL: test_mm_cvtsi32_si128:
1667; X64-AVX512:       # %bb.0:
1668; X64-AVX512-NEXT:    vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
1669; X64-AVX512-NEXT:    retq # encoding: [0xc3]
1670  %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
1671  %res1 = insertelement <4 x i32> %res0, i32 0, i32 1
1672  %res2 = insertelement <4 x i32> %res1, i32 0, i32 2
1673  %res3 = insertelement <4 x i32> %res2, i32 0, i32 3
1674  %res = bitcast <4 x i32> %res3 to <2 x i64>
1675  ret <2 x i64> %res
1676}
1677
1678define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind {
1679; SSE-LABEL: test_mm_cvtss_sd:
1680; SSE:       # %bb.0:
1681; SSE-NEXT:    cvtss2sd %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5a,0xc1]
1682; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1683;
1684; AVX1-LABEL: test_mm_cvtss_sd:
1685; AVX1:       # %bb.0:
1686; AVX1-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5a,0xc1]
1687; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1688;
1689; AVX512-LABEL: test_mm_cvtss_sd:
1690; AVX512:       # %bb.0:
1691; AVX512-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0xc1]
1692; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1693  %ext = extractelement <4 x float> %a1, i32 0
1694  %cvt = fpext float %ext to double
1695  %res = insertelement <2 x double> %a0, double %cvt, i32 0
1696  ret <2 x double> %res
1697}
1698
1699define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind {
1700; SSE-LABEL: test_mm_cvttpd_epi32:
1701; SSE:       # %bb.0:
1702; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0 # encoding: [0x66,0x0f,0xe6,0xc0]
1703; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1704;
1705; AVX1-LABEL: test_mm_cvttpd_epi32:
1706; AVX1:       # %bb.0:
1707; AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe6,0xc0]
1708; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1709;
1710; AVX512-LABEL: test_mm_cvttpd_epi32:
1711; AVX512:       # %bb.0:
1712; AVX512-NEXT:    vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
1713; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1714  %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
1715  %bc = bitcast <4 x i32> %res to <2 x i64>
1716  ret <2 x i64> %bc
1717}
1718declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
1719
1720define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind {
1721; SSE-LABEL: test_mm_cvttps_epi32:
1722; SSE:       # %bb.0:
1723; SSE-NEXT:    cvttps2dq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x5b,0xc0]
1724; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1725;
1726; AVX1-LABEL: test_mm_cvttps_epi32:
1727; AVX1:       # %bb.0:
1728; AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5b,0xc0]
1729; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1730;
1731; AVX512-LABEL: test_mm_cvttps_epi32:
1732; AVX512:       # %bb.0:
1733; AVX512-NEXT:    vcvttps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0]
1734; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1735  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0)
1736  %bc = bitcast <4 x i32> %res to <2 x i64>
1737  ret <2 x i64> %bc
1738}
1739declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
1740
1741define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind {
1742; SSE-LABEL: test_mm_cvttsd_si32:
1743; SSE:       # %bb.0:
1744; SSE-NEXT:    cvttsd2si %xmm0, %eax # encoding: [0xf2,0x0f,0x2c,0xc0]
1745; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1746;
1747; AVX1-LABEL: test_mm_cvttsd_si32:
1748; AVX1:       # %bb.0:
1749; AVX1-NEXT:    vcvttsd2si %xmm0, %eax # encoding: [0xc5,0xfb,0x2c,0xc0]
1750; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1751;
1752; AVX512-LABEL: test_mm_cvttsd_si32:
1753; AVX512:       # %bb.0:
1754; AVX512-NEXT:    vcvttsd2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2c,0xc0]
1755; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1756  %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0)
1757  ret i32 %res
1758}
1759declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
1760
1761define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1762; SSE-LABEL: test_mm_div_pd:
1763; SSE:       # %bb.0:
1764; SSE-NEXT:    divpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5e,0xc1]
1765; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1766;
1767; AVX1-LABEL: test_mm_div_pd:
1768; AVX1:       # %bb.0:
1769; AVX1-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5e,0xc1]
1770; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1771;
1772; AVX512-LABEL: test_mm_div_pd:
1773; AVX512:       # %bb.0:
1774; AVX512-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5e,0xc1]
1775; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1776  %res = fdiv <2 x double> %a0, %a1
1777  ret <2 x double> %res
1778}
1779
1780define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1781; SSE-LABEL: test_mm_div_sd:
1782; SSE:       # %bb.0:
1783; SSE-NEXT:    divsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5e,0xc1]
1784; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1785;
1786; AVX1-LABEL: test_mm_div_sd:
1787; AVX1:       # %bb.0:
1788; AVX1-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5e,0xc1]
1789; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1790;
1791; AVX512-LABEL: test_mm_div_sd:
1792; AVX512:       # %bb.0:
1793; AVX512-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5e,0xc1]
1794; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1795  %ext0 = extractelement <2 x double> %a0, i32 0
1796  %ext1 = extractelement <2 x double> %a1, i32 0
1797  %fdiv = fdiv double %ext0, %ext1
1798  %res = insertelement <2 x double> %a0, double %fdiv, i32 0
1799  ret <2 x double> %res
1800}
1801
1802define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind {
1803; SSE-LABEL: test_mm_extract_epi16:
1804; SSE:       # %bb.0:
1805; SSE-NEXT:    pextrw $1, %xmm0, %eax # encoding: [0x66,0x0f,0xc5,0xc0,0x01]
1806; SSE-NEXT:    movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
1807; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1808;
1809; AVX1-LABEL: test_mm_extract_epi16:
1810; AVX1:       # %bb.0:
1811; AVX1-NEXT:    vpextrw $1, %xmm0, %eax # encoding: [0xc5,0xf9,0xc5,0xc0,0x01]
1812; AVX1-NEXT:    movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
1813; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1814;
1815; AVX512-LABEL: test_mm_extract_epi16:
1816; AVX512:       # %bb.0:
1817; AVX512-NEXT:    vpextrw $1, %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc5,0xc0,0x01]
1818; AVX512-NEXT:    movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
1819; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1820  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1821  %ext = extractelement <8 x i16> %arg0, i32 1
1822  %res = zext i16 %ext to i32
1823  ret i32 %res
1824}
1825
1826define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind {
1827; X86-SSE-LABEL: test_mm_insert_epi16:
1828; X86-SSE:       # %bb.0:
1829; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
1830; X86-SSE-NEXT:    pinsrw $1, %eax, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc0,0x01]
1831; X86-SSE-NEXT:    retl # encoding: [0xc3]
1832;
1833; X86-AVX1-LABEL: test_mm_insert_epi16:
1834; X86-AVX1:       # %bb.0:
1835; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
1836; X86-AVX1-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
1837; X86-AVX1-NEXT:    retl # encoding: [0xc3]
1838;
1839; X86-AVX512-LABEL: test_mm_insert_epi16:
1840; X86-AVX512:       # %bb.0:
1841; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
1842; X86-AVX512-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
1843; X86-AVX512-NEXT:    retl # encoding: [0xc3]
1844;
1845; X64-SSE-LABEL: test_mm_insert_epi16:
1846; X64-SSE:       # %bb.0:
1847; X64-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc7,0x01]
1848; X64-SSE-NEXT:    retq # encoding: [0xc3]
1849;
1850; X64-AVX1-LABEL: test_mm_insert_epi16:
1851; X64-AVX1:       # %bb.0:
1852; X64-AVX1-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x01]
1853; X64-AVX1-NEXT:    retq # encoding: [0xc3]
1854;
1855; X64-AVX512-LABEL: test_mm_insert_epi16:
1856; X64-AVX512:       # %bb.0:
1857; X64-AVX512-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x01]
1858; X64-AVX512-NEXT:    retq # encoding: [0xc3]
1859  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1860  %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1
1861  %bc = bitcast <8 x i16> %res to <2 x i64>
1862  ret <2 x i64> %bc
1863}
1864
1865define void @test_mm_lfence() nounwind {
1866; CHECK-LABEL: test_mm_lfence:
1867; CHECK:       # %bb.0:
1868; CHECK-NEXT:    lfence # encoding: [0x0f,0xae,0xe8]
1869; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1870  call void @llvm.x86.sse2.lfence()
1871  ret void
1872}
1873declare void @llvm.x86.sse2.lfence() nounwind readnone
1874
1875define <2 x double> @test_mm_load_pd(double* %a0) nounwind {
1876; X86-SSE-LABEL: test_mm_load_pd:
1877; X86-SSE:       # %bb.0:
1878; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1879; X86-SSE-NEXT:    movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00]
1880; X86-SSE-NEXT:    retl # encoding: [0xc3]
1881;
1882; X86-AVX1-LABEL: test_mm_load_pd:
1883; X86-AVX1:       # %bb.0:
1884; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1885; X86-AVX1-NEXT:    vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00]
1886; X86-AVX1-NEXT:    retl # encoding: [0xc3]
1887;
1888; X86-AVX512-LABEL: test_mm_load_pd:
1889; X86-AVX512:       # %bb.0:
1890; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1891; X86-AVX512-NEXT:    vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00]
1892; X86-AVX512-NEXT:    retl # encoding: [0xc3]
1893;
1894; X64-SSE-LABEL: test_mm_load_pd:
1895; X64-SSE:       # %bb.0:
1896; X64-SSE-NEXT:    movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
1897; X64-SSE-NEXT:    retq # encoding: [0xc3]
1898;
1899; X64-AVX1-LABEL: test_mm_load_pd:
1900; X64-AVX1:       # %bb.0:
1901; X64-AVX1-NEXT:    vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07]
1902; X64-AVX1-NEXT:    retq # encoding: [0xc3]
1903;
1904; X64-AVX512-LABEL: test_mm_load_pd:
1905; X64-AVX512:       # %bb.0:
1906; X64-AVX512-NEXT:    vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
1907; X64-AVX512-NEXT:    retq # encoding: [0xc3]
1908  %arg0 = bitcast double* %a0 to <2 x double>*
1909  %res = load <2 x double>, <2 x double>* %arg0, align 16
1910  ret <2 x double> %res
1911}
1912
1913define <2 x double> @test_mm_load_sd(double* %a0) nounwind {
1914; X86-SSE-LABEL: test_mm_load_sd:
1915; X86-SSE:       # %bb.0:
1916; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1917; X86-SSE-NEXT:    movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00]
1918; X86-SSE-NEXT:    # xmm0 = mem[0],zero
1919; X86-SSE-NEXT:    retl # encoding: [0xc3]
1920;
1921; X86-AVX1-LABEL: test_mm_load_sd:
1922; X86-AVX1:       # %bb.0:
1923; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1924; X86-AVX1-NEXT:    vmovsd (%eax), %xmm0 # encoding: [0xc5,0xfb,0x10,0x00]
1925; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
1926; X86-AVX1-NEXT:    retl # encoding: [0xc3]
1927;
1928; X86-AVX512-LABEL: test_mm_load_sd:
1929; X86-AVX512:       # %bb.0:
1930; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1931; X86-AVX512-NEXT:    vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
1932; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
1933; X86-AVX512-NEXT:    retl # encoding: [0xc3]
1934;
1935; X64-SSE-LABEL: test_mm_load_sd:
1936; X64-SSE:       # %bb.0:
1937; X64-SSE-NEXT:    movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07]
1938; X64-SSE-NEXT:    # xmm0 = mem[0],zero
1939; X64-SSE-NEXT:    retq # encoding: [0xc3]
1940;
1941; X64-AVX1-LABEL: test_mm_load_sd:
1942; X64-AVX1:       # %bb.0:
1943; X64-AVX1-NEXT:    vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
1944; X64-AVX1-NEXT:    # xmm0 = mem[0],zero
1945; X64-AVX1-NEXT:    retq # encoding: [0xc3]
1946;
1947; X64-AVX512-LABEL: test_mm_load_sd:
1948; X64-AVX512:       # %bb.0:
1949; X64-AVX512-NEXT:    vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
1950; X64-AVX512-NEXT:    # xmm0 = mem[0],zero
1951; X64-AVX512-NEXT:    retq # encoding: [0xc3]
1952  %ld = load double, double* %a0, align 1
1953  %res0 = insertelement <2 x double> undef, double %ld, i32 0
1954  %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
1955  ret <2 x double> %res1
1956}
1957
1958define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind {
1959; X86-SSE-LABEL: test_mm_load_si128:
1960; X86-SSE:       # %bb.0:
1961; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1962; X86-SSE-NEXT:    movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00]
1963; X86-SSE-NEXT:    retl # encoding: [0xc3]
1964;
1965; X86-AVX1-LABEL: test_mm_load_si128:
1966; X86-AVX1:       # %bb.0:
1967; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1968; X86-AVX1-NEXT:    vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00]
1969; X86-AVX1-NEXT:    retl # encoding: [0xc3]
1970;
1971; X86-AVX512-LABEL: test_mm_load_si128:
1972; X86-AVX512:       # %bb.0:
1973; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1974; X86-AVX512-NEXT:    vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00]
1975; X86-AVX512-NEXT:    retl # encoding: [0xc3]
1976;
1977; X64-SSE-LABEL: test_mm_load_si128:
1978; X64-SSE:       # %bb.0:
1979; X64-SSE-NEXT:    movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
1980; X64-SSE-NEXT:    retq # encoding: [0xc3]
1981;
1982; X64-AVX1-LABEL: test_mm_load_si128:
1983; X64-AVX1:       # %bb.0:
1984; X64-AVX1-NEXT:    vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07]
1985; X64-AVX1-NEXT:    retq # encoding: [0xc3]
1986;
1987; X64-AVX512-LABEL: test_mm_load_si128:
1988; X64-AVX512:       # %bb.0:
1989; X64-AVX512-NEXT:    vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
1990; X64-AVX512-NEXT:    retq # encoding: [0xc3]
1991  %res = load <2 x i64>, <2 x i64>* %a0, align 16
1992  ret <2 x i64> %res
1993}
1994
1995define <2 x double> @test_mm_load1_pd(double* %a0) nounwind {
1996; X86-SSE-LABEL: test_mm_load1_pd:
1997; X86-SSE:       # %bb.0:
1998; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1999; X86-SSE-NEXT:    movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00]
2000; X86-SSE-NEXT:    # xmm0 = mem[0],zero
2001; X86-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
2002; X86-SSE-NEXT:    # xmm0 = xmm0[0,0]
2003; X86-SSE-NEXT:    retl # encoding: [0xc3]
2004;
2005; X86-AVX1-LABEL: test_mm_load1_pd:
2006; X86-AVX1:       # %bb.0:
2007; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2008; X86-AVX1-NEXT:    vmovddup (%eax), %xmm0 # encoding: [0xc5,0xfb,0x12,0x00]
2009; X86-AVX1-NEXT:    # xmm0 = mem[0,0]
2010; X86-AVX1-NEXT:    retl # encoding: [0xc3]
2011;
2012; X86-AVX512-LABEL: test_mm_load1_pd:
2013; X86-AVX512:       # %bb.0:
2014; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2015; X86-AVX512-NEXT:    vmovddup (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x00]
2016; X86-AVX512-NEXT:    # xmm0 = mem[0,0]
2017; X86-AVX512-NEXT:    retl # encoding: [0xc3]
2018;
2019; X64-SSE-LABEL: test_mm_load1_pd:
2020; X64-SSE:       # %bb.0:
2021; X64-SSE-NEXT:    movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07]
2022; X64-SSE-NEXT:    # xmm0 = mem[0],zero
2023; X64-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
2024; X64-SSE-NEXT:    # xmm0 = xmm0[0,0]
2025; X64-SSE-NEXT:    retq # encoding: [0xc3]
2026;
2027; X64-AVX1-LABEL: test_mm_load1_pd:
2028; X64-AVX1:       # %bb.0:
2029; X64-AVX1-NEXT:    vmovddup (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x12,0x07]
2030; X64-AVX1-NEXT:    # xmm0 = mem[0,0]
2031; X64-AVX1-NEXT:    retq # encoding: [0xc3]
2032;
2033; X64-AVX512-LABEL: test_mm_load1_pd:
2034; X64-AVX512:       # %bb.0:
2035; X64-AVX512-NEXT:    vmovddup (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x07]
2036; X64-AVX512-NEXT:    # xmm0 = mem[0,0]
2037; X64-AVX512-NEXT:    retq # encoding: [0xc3]
2038  %ld = load double, double* %a0, align 8
2039  %res0 = insertelement <2 x double> undef, double %ld, i32 0
2040  %res1 = insertelement <2 x double> %res0, double %ld, i32 1
2041  ret <2 x double> %res1
2042}
2043
2044define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind {
2045; X86-SSE-LABEL: test_mm_loadh_pd:
2046; X86-SSE:       # %bb.0:
2047; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2048; X86-SSE-NEXT:    movhps (%eax), %xmm0 # encoding: [0x0f,0x16,0x00]
2049; X86-SSE-NEXT:    # xmm0 = xmm0[0,1],mem[0,1]
2050; X86-SSE-NEXT:    retl # encoding: [0xc3]
2051;
2052; X86-AVX1-LABEL: test_mm_loadh_pd:
2053; X86-AVX1:       # %bb.0:
2054; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2055; X86-AVX1-NEXT:    vmovhps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0x00]
2056; X86-AVX1-NEXT:    # xmm0 = xmm0[0,1],mem[0,1]
2057; X86-AVX1-NEXT:    retl # encoding: [0xc3]
2058;
2059; X86-AVX512-LABEL: test_mm_loadh_pd:
2060; X86-AVX512:       # %bb.0:
2061; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2062; X86-AVX512-NEXT:    vmovhps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x00]
2063; X86-AVX512-NEXT:    # xmm0 = xmm0[0,1],mem[0,1]
2064; X86-AVX512-NEXT:    retl # encoding: [0xc3]
2065;
2066; X64-SSE-LABEL: test_mm_loadh_pd:
2067; X64-SSE:       # %bb.0:
2068; X64-SSE-NEXT:    movhps (%rdi), %xmm0 # encoding: [0x0f,0x16,0x07]
2069; X64-SSE-NEXT:    # xmm0 = xmm0[0,1],mem[0,1]
2070; X64-SSE-NEXT:    retq # encoding: [0xc3]
2071;
2072; X64-AVX1-LABEL: test_mm_loadh_pd:
2073; X64-AVX1:       # %bb.0:
2074; X64-AVX1-NEXT:    vmovhps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0x07]
2075; X64-AVX1-NEXT:    # xmm0 = xmm0[0,1],mem[0,1]
2076; X64-AVX1-NEXT:    retq # encoding: [0xc3]
2077;
2078; X64-AVX512-LABEL: test_mm_loadh_pd:
2079; X64-AVX512:       # %bb.0:
2080; X64-AVX512-NEXT:    vmovhps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x07]
2081; X64-AVX512-NEXT:    # xmm0 = xmm0[0,1],mem[0,1]
2082; X64-AVX512-NEXT:    retq # encoding: [0xc3]
2083  %ld = load double, double* %a1, align 8
2084  %res = insertelement <2 x double> %a0, double %ld, i32 1
2085  ret <2 x double> %res
2086}
2087
2088define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind {
2089; X86-SSE-LABEL: test_mm_loadl_epi64:
2090; X86-SSE:       # %bb.0:
2091; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2092; X86-SSE-NEXT:    movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00]
2093; X86-SSE-NEXT:    # xmm0 = mem[0],zero
2094; X86-SSE-NEXT:    retl # encoding: [0xc3]
2095;
2096; X86-AVX1-LABEL: test_mm_loadl_epi64:
2097; X86-AVX1:       # %bb.0:
2098; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2099; X86-AVX1-NEXT:    vmovsd (%eax), %xmm0 # encoding: [0xc5,0xfb,0x10,0x00]
2100; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
2101; X86-AVX1-NEXT:    retl # encoding: [0xc3]
2102;
2103; X86-AVX512-LABEL: test_mm_loadl_epi64:
2104; X86-AVX512:       # %bb.0:
2105; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2106; X86-AVX512-NEXT:    vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
2107; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
2108; X86-AVX512-NEXT:    retl # encoding: [0xc3]
2109;
2110; X64-SSE-LABEL: test_mm_loadl_epi64:
2111; X64-SSE:       # %bb.0:
2112; X64-SSE-NEXT:    movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07]
2113; X64-SSE-NEXT:    # xmm0 = mem[0],zero
2114; X64-SSE-NEXT:    retq # encoding: [0xc3]
2115;
2116; X64-AVX1-LABEL: test_mm_loadl_epi64:
2117; X64-AVX1:       # %bb.0:
2118; X64-AVX1-NEXT:    vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
2119; X64-AVX1-NEXT:    # xmm0 = mem[0],zero
2120; X64-AVX1-NEXT:    retq # encoding: [0xc3]
2121;
2122; X64-AVX512-LABEL: test_mm_loadl_epi64:
2123; X64-AVX512:       # %bb.0:
2124; X64-AVX512-NEXT:    vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
2125; X64-AVX512-NEXT:    # xmm0 = mem[0],zero
2126; X64-AVX512-NEXT:    retq # encoding: [0xc3]
2127  %bc = bitcast <2 x i64>* %a1 to i64*
2128  %ld = load i64, i64* %bc, align 1
2129  %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0
2130  %res1 = insertelement <2 x i64> %res0, i64 0, i32 1
2131  ret <2 x i64> %res1
2132}
2133
2134define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind {
2135; X86-SSE-LABEL: test_mm_loadl_pd:
2136; X86-SSE:       # %bb.0:
2137; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2138; X86-SSE-NEXT:    movlps (%eax), %xmm0 # encoding: [0x0f,0x12,0x00]
2139; X86-SSE-NEXT:    # xmm0 = mem[0,1],xmm0[2,3]
2140; X86-SSE-NEXT:    retl # encoding: [0xc3]
2141;
2142; X86-AVX1-LABEL: test_mm_loadl_pd:
2143; X86-AVX1:       # %bb.0:
2144; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2145; X86-AVX1-NEXT:    vmovlps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x12,0x00]
2146; X86-AVX1-NEXT:    # xmm0 = mem[0,1],xmm0[2,3]
2147; X86-AVX1-NEXT:    retl # encoding: [0xc3]
2148;
2149; X86-AVX512-LABEL: test_mm_loadl_pd:
2150; X86-AVX512:       # %bb.0:
2151; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2152; X86-AVX512-NEXT:    vmovlps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x00]
2153; X86-AVX512-NEXT:    # xmm0 = mem[0,1],xmm0[2,3]
2154; X86-AVX512-NEXT:    retl # encoding: [0xc3]
2155;
2156; X64-SSE-LABEL: test_mm_loadl_pd:
2157; X64-SSE:       # %bb.0:
2158; X64-SSE-NEXT:    movlps (%rdi), %xmm0 # encoding: [0x0f,0x12,0x07]
2159; X64-SSE-NEXT:    # xmm0 = mem[0,1],xmm0[2,3]
2160; X64-SSE-NEXT:    retq # encoding: [0xc3]
2161;
2162; X64-AVX1-LABEL: test_mm_loadl_pd:
2163; X64-AVX1:       # %bb.0:
2164; X64-AVX1-NEXT:    vmovlps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x12,0x07]
2165; X64-AVX1-NEXT:    # xmm0 = mem[0,1],xmm0[2,3]
2166; X64-AVX1-NEXT:    retq # encoding: [0xc3]
2167;
2168; X64-AVX512-LABEL: test_mm_loadl_pd:
2169; X64-AVX512:       # %bb.0:
2170; X64-AVX512-NEXT:    vmovlps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x07]
2171; X64-AVX512-NEXT:    # xmm0 = mem[0,1],xmm0[2,3]
2172; X64-AVX512-NEXT:    retq # encoding: [0xc3]
2173  %ld = load double, double* %a1, align 8
2174  %res = insertelement <2 x double> %a0, double %ld, i32 0
2175  ret <2 x double> %res
2176}
2177
2178define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind {
2179; X86-SSE-LABEL: test_mm_loadr_pd:
2180; X86-SSE:       # %bb.0:
2181; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2182; X86-SSE-NEXT:    movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00]
2183; X86-SSE-NEXT:    shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
2184; X86-SSE-NEXT:    # xmm0 = xmm0[2,3,0,1]
2185; X86-SSE-NEXT:    retl # encoding: [0xc3]
2186;
2187; X86-AVX1-LABEL: test_mm_loadr_pd:
2188; X86-AVX1:       # %bb.0:
2189; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2190; X86-AVX1-NEXT:    vpermilpd $1, (%eax), %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0x00,0x01]
2191; X86-AVX1-NEXT:    # xmm0 = mem[1,0]
2192; X86-AVX1-NEXT:    retl # encoding: [0xc3]
2193;
2194; X86-AVX512-LABEL: test_mm_loadr_pd:
2195; X86-AVX512:       # %bb.0:
2196; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2197; X86-AVX512-NEXT:    vpermilpd $1, (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x00,0x01]
2198; X86-AVX512-NEXT:    # xmm0 = mem[1,0]
2199; X86-AVX512-NEXT:    retl # encoding: [0xc3]
2200;
2201; X64-SSE-LABEL: test_mm_loadr_pd:
2202; X64-SSE:       # %bb.0:
2203; X64-SSE-NEXT:    movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
2204; X64-SSE-NEXT:    shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
2205; X64-SSE-NEXT:    # xmm0 = xmm0[2,3,0,1]
2206; X64-SSE-NEXT:    retq # encoding: [0xc3]
2207;
2208; X64-AVX1-LABEL: test_mm_loadr_pd:
2209; X64-AVX1:       # %bb.0:
2210; X64-AVX1-NEXT:    vpermilpd $1, (%rdi), %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01]
2211; X64-AVX1-NEXT:    # xmm0 = mem[1,0]
2212; X64-AVX1-NEXT:    retq # encoding: [0xc3]
2213;
2214; X64-AVX512-LABEL: test_mm_loadr_pd:
2215; X64-AVX512:       # %bb.0:
2216; X64-AVX512-NEXT:    vpermilpd $1, (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01]
2217; X64-AVX512-NEXT:    # xmm0 = mem[1,0]
2218; X64-AVX512-NEXT:    retq # encoding: [0xc3]
2219  %arg0 = bitcast double* %a0 to <2 x double>*
2220  %ld = load <2 x double>, <2 x double>* %arg0, align 16
2221  %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0>
2222  ret <2 x double> %res
2223}
2224
2225define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind {
2226; X86-SSE-LABEL: test_mm_loadu_pd:
2227; X86-SSE:       # %bb.0:
2228; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2229; X86-SSE-NEXT:    movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00]
2230; X86-SSE-NEXT:    retl # encoding: [0xc3]
2231;
2232; X86-AVX1-LABEL: test_mm_loadu_pd:
2233; X86-AVX1:       # %bb.0:
2234; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2235; X86-AVX1-NEXT:    vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00]
2236; X86-AVX1-NEXT:    retl # encoding: [0xc3]
2237;
2238; X86-AVX512-LABEL: test_mm_loadu_pd:
2239; X86-AVX512:       # %bb.0:
2240; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2241; X86-AVX512-NEXT:    vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
2242; X86-AVX512-NEXT:    retl # encoding: [0xc3]
2243;
2244; X64-SSE-LABEL: test_mm_loadu_pd:
2245; X64-SSE:       # %bb.0:
2246; X64-SSE-NEXT:    movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07]
2247; X64-SSE-NEXT:    retq # encoding: [0xc3]
2248;
2249; X64-AVX1-LABEL: test_mm_loadu_pd:
2250; X64-AVX1:       # %bb.0:
2251; X64-AVX1-NEXT:    vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07]
2252; X64-AVX1-NEXT:    retq # encoding: [0xc3]
2253;
2254; X64-AVX512-LABEL: test_mm_loadu_pd:
2255; X64-AVX512:       # %bb.0:
2256; X64-AVX512-NEXT:    vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
2257; X64-AVX512-NEXT:    retq # encoding: [0xc3]
2258  %arg0 = bitcast double* %a0 to <2 x double>*
2259  %res = load <2 x double>, <2 x double>* %arg0, align 1
2260  ret <2 x double> %res
2261}
2262
2263define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind {
2264; X86-SSE-LABEL: test_mm_loadu_si128:
2265; X86-SSE:       # %bb.0:
2266; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2267; X86-SSE-NEXT:    movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00]
2268; X86-SSE-NEXT:    retl # encoding: [0xc3]
2269;
2270; X86-AVX1-LABEL: test_mm_loadu_si128:
2271; X86-AVX1:       # %bb.0:
2272; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2273; X86-AVX1-NEXT:    vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00]
2274; X86-AVX1-NEXT:    retl # encoding: [0xc3]
2275;
2276; X86-AVX512-LABEL: test_mm_loadu_si128:
2277; X86-AVX512:       # %bb.0:
2278; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2279; X86-AVX512-NEXT:    vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
2280; X86-AVX512-NEXT:    retl # encoding: [0xc3]
2281;
2282; X64-SSE-LABEL: test_mm_loadu_si128:
2283; X64-SSE:       # %bb.0:
2284; X64-SSE-NEXT:    movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07]
2285; X64-SSE-NEXT:    retq # encoding: [0xc3]
2286;
2287; X64-AVX1-LABEL: test_mm_loadu_si128:
2288; X64-AVX1:       # %bb.0:
2289; X64-AVX1-NEXT:    vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07]
2290; X64-AVX1-NEXT:    retq # encoding: [0xc3]
2291;
2292; X64-AVX512-LABEL: test_mm_loadu_si128:
2293; X64-AVX512:       # %bb.0:
2294; X64-AVX512-NEXT:    vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
2295; X64-AVX512-NEXT:    retq # encoding: [0xc3]
2296  %res = load <2 x i64>, <2 x i64>* %a0, align 1
2297  ret <2 x i64> %res
2298}
2299
2300define <2 x i64> @test_mm_loadu_si64(i8* nocapture readonly %A) {
2301; X86-SSE-LABEL: test_mm_loadu_si64:
2302; X86-SSE:       # %bb.0: # %entry
2303; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2304; X86-SSE-NEXT:    movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00]
2305; X86-SSE-NEXT:    # xmm0 = mem[0],zero
2306; X86-SSE-NEXT:    retl # encoding: [0xc3]
2307;
2308; X86-AVX1-LABEL: test_mm_loadu_si64:
2309; X86-AVX1:       # %bb.0: # %entry
2310; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2311; X86-AVX1-NEXT:    vmovsd (%eax), %xmm0 # encoding: [0xc5,0xfb,0x10,0x00]
2312; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
2313; X86-AVX1-NEXT:    retl # encoding: [0xc3]
2314;
2315; X86-AVX512-LABEL: test_mm_loadu_si64:
2316; X86-AVX512:       # %bb.0: # %entry
2317; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2318; X86-AVX512-NEXT:    vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
2319; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
2320; X86-AVX512-NEXT:    retl # encoding: [0xc3]
2321;
2322; X64-SSE-LABEL: test_mm_loadu_si64:
2323; X64-SSE:       # %bb.0: # %entry
2324; X64-SSE-NEXT:    movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07]
2325; X64-SSE-NEXT:    # xmm0 = mem[0],zero
2326; X64-SSE-NEXT:    retq # encoding: [0xc3]
2327;
2328; X64-AVX1-LABEL: test_mm_loadu_si64:
2329; X64-AVX1:       # %bb.0: # %entry
2330; X64-AVX1-NEXT:    vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
2331; X64-AVX1-NEXT:    # xmm0 = mem[0],zero
2332; X64-AVX1-NEXT:    retq # encoding: [0xc3]
2333;
2334; X64-AVX512-LABEL: test_mm_loadu_si64:
2335; X64-AVX512:       # %bb.0: # %entry
2336; X64-AVX512-NEXT:    vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
2337; X64-AVX512-NEXT:    # xmm0 = mem[0],zero
2338; X64-AVX512-NEXT:    retq # encoding: [0xc3]
2339entry:
2340  %__v.i = bitcast i8* %A to i64*
2341  %0 = load i64, i64* %__v.i, align 1
2342  %vecinit1.i = insertelement <2 x i64> <i64 undef, i64 0>, i64 %0, i32 0
2343  ret <2 x i64> %vecinit1.i
2344}
2345
2346define <2 x i64> @test_mm_loadu_si32(i8* nocapture readonly %A) {
2347; X86-SSE-LABEL: test_mm_loadu_si32:
2348; X86-SSE:       # %bb.0: # %entry
2349; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2350; X86-SSE-NEXT:    movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00]
2351; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
2352; X86-SSE-NEXT:    retl # encoding: [0xc3]
2353;
2354; X86-AVX1-LABEL: test_mm_loadu_si32:
2355; X86-AVX1:       # %bb.0: # %entry
2356; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2357; X86-AVX1-NEXT:    vmovss (%eax), %xmm0 # encoding: [0xc5,0xfa,0x10,0x00]
2358; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
2359; X86-AVX1-NEXT:    retl # encoding: [0xc3]
2360;
2361; X86-AVX512-LABEL: test_mm_loadu_si32:
2362; X86-AVX512:       # %bb.0: # %entry
2363; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2364; X86-AVX512-NEXT:    vmovss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00]
2365; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
2366; X86-AVX512-NEXT:    retl # encoding: [0xc3]
2367;
2368; X64-SSE-LABEL: test_mm_loadu_si32:
2369; X64-SSE:       # %bb.0: # %entry
2370; X64-SSE-NEXT:    movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07]
2371; X64-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
2372; X64-SSE-NEXT:    retq # encoding: [0xc3]
2373;
2374; X64-AVX1-LABEL: test_mm_loadu_si32:
2375; X64-AVX1:       # %bb.0: # %entry
2376; X64-AVX1-NEXT:    vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07]
2377; X64-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
2378; X64-AVX1-NEXT:    retq # encoding: [0xc3]
2379;
2380; X64-AVX512-LABEL: test_mm_loadu_si32:
2381; X64-AVX512:       # %bb.0: # %entry
2382; X64-AVX512-NEXT:    vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
2383; X64-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
2384; X64-AVX512-NEXT:    retq # encoding: [0xc3]
2385entry:
2386  %__v.i = bitcast i8* %A to i32*
2387  %0 = load i32, i32* %__v.i, align 1
2388  %vecinit3.i = insertelement <4 x i32> <i32 undef, i32 0, i32 0, i32 0>, i32 %0, i32 0
2389  %1 = bitcast <4 x i32> %vecinit3.i to <2 x i64>
2390  ret <2 x i64> %1
2391}
2392
2393define <2 x i64> @test_mm_loadu_si16(i8* nocapture readonly %A) {
2394; X86-SSE-LABEL: test_mm_loadu_si16:
2395; X86-SSE:       # %bb.0: # %entry
2396; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2397; X86-SSE-NEXT:    movzwl (%eax), %eax # encoding: [0x0f,0xb7,0x00]
2398; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
2399; X86-SSE-NEXT:    retl # encoding: [0xc3]
2400;
2401; X86-AVX1-LABEL: test_mm_loadu_si16:
2402; X86-AVX1:       # %bb.0: # %entry
2403; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2404; X86-AVX1-NEXT:    movzwl (%eax), %eax # encoding: [0x0f,0xb7,0x00]
2405; X86-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
2406; X86-AVX1-NEXT:    retl # encoding: [0xc3]
2407;
2408; X86-AVX512-LABEL: test_mm_loadu_si16:
2409; X86-AVX512:       # %bb.0: # %entry
2410; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2411; X86-AVX512-NEXT:    movzwl (%eax), %eax # encoding: [0x0f,0xb7,0x00]
2412; X86-AVX512-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
2413; X86-AVX512-NEXT:    retl # encoding: [0xc3]
2414;
2415; X64-SSE-LABEL: test_mm_loadu_si16:
2416; X64-SSE:       # %bb.0: # %entry
2417; X64-SSE-NEXT:    movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
2418; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
2419; X64-SSE-NEXT:    retq # encoding: [0xc3]
2420;
2421; X64-AVX1-LABEL: test_mm_loadu_si16:
2422; X64-AVX1:       # %bb.0: # %entry
2423; X64-AVX1-NEXT:    movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
2424; X64-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
2425; X64-AVX1-NEXT:    retq # encoding: [0xc3]
2426;
2427; X64-AVX512-LABEL: test_mm_loadu_si16:
2428; X64-AVX512:       # %bb.0: # %entry
2429; X64-AVX512-NEXT:    movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
2430; X64-AVX512-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
2431; X64-AVX512-NEXT:    retq # encoding: [0xc3]
2432entry:
2433  %__v.i = bitcast i8* %A to i16*
2434  %0 = load i16, i16* %__v.i, align 1
2435  %vecinit7.i = insertelement <8 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %0, i32 0
2436  %1 = bitcast <8 x i16> %vecinit7.i to <2 x i64>
2437  ret <2 x i64> %1
2438}
2439
2440define <2 x i64> @test_mm_madd_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2441; SSE-LABEL: test_mm_madd_epi16:
2442; SSE:       # %bb.0:
2443; SSE-NEXT:    pmaddwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf5,0xc1]
2444; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2445;
2446; AVX1-LABEL: test_mm_madd_epi16:
2447; AVX1:       # %bb.0:
2448; AVX1-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf5,0xc1]
2449; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2450;
2451; AVX512-LABEL: test_mm_madd_epi16:
2452; AVX512:       # %bb.0:
2453; AVX512-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1]
2454; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2455  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2456  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2457  %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %arg0, <8 x i16> %arg1)
2458  %bc = bitcast <4 x i32> %res to <2 x i64>
2459  ret <2 x i64> %bc
2460}
2461declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
2462
2463define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, i8* %a2) nounwind {
2464; X86-SSE-LABEL: test_mm_maskmoveu_si128:
2465; X86-SSE:       # %bb.0:
2466; X86-SSE-NEXT:    pushl %edi # encoding: [0x57]
2467; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edi # encoding: [0x8b,0x7c,0x24,0x08]
2468; X86-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf7,0xc1]
2469; X86-SSE-NEXT:    popl %edi # encoding: [0x5f]
2470; X86-SSE-NEXT:    retl # encoding: [0xc3]
2471;
2472; X86-AVX-LABEL: test_mm_maskmoveu_si128:
2473; X86-AVX:       # %bb.0:
2474; X86-AVX-NEXT:    pushl %edi # encoding: [0x57]
2475; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %edi # encoding: [0x8b,0x7c,0x24,0x08]
2476; X86-AVX-NEXT:    vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1]
2477; X86-AVX-NEXT:    popl %edi # encoding: [0x5f]
2478; X86-AVX-NEXT:    retl # encoding: [0xc3]
2479;
2480; X64-SSE-LABEL: test_mm_maskmoveu_si128:
2481; X64-SSE:       # %bb.0:
2482; X64-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf7,0xc1]
2483; X64-SSE-NEXT:    retq # encoding: [0xc3]
2484;
2485; X64-AVX-LABEL: test_mm_maskmoveu_si128:
2486; X64-AVX:       # %bb.0:
2487; X64-AVX-NEXT:    vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1]
2488; X64-AVX-NEXT:    retq # encoding: [0xc3]
2489  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2490  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2491  call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2)
2492  ret void
2493}
2494declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
2495
2496define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2497; SSE-LABEL: test_mm_max_epi16:
2498; SSE:       # %bb.0:
2499; SSE-NEXT:    pmaxsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xee,0xc1]
2500; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2501;
2502; AVX1-LABEL: test_mm_max_epi16:
2503; AVX1:       # %bb.0:
2504; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xee,0xc1]
2505; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2506;
2507; AVX512-LABEL: test_mm_max_epi16:
2508; AVX512:       # %bb.0:
2509; AVX512-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1]
2510; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2511  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2512  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2513  %sel = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %arg0, <8 x i16> %arg1)
2514  %bc = bitcast <8 x i16> %sel to <2 x i64>
2515  ret <2 x i64> %bc
2516}
2517declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
2518
2519define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2520; SSE-LABEL: test_mm_max_epu8:
2521; SSE:       # %bb.0:
2522; SSE-NEXT:    pmaxub %xmm1, %xmm0 # encoding: [0x66,0x0f,0xde,0xc1]
2523; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2524;
2525; AVX1-LABEL: test_mm_max_epu8:
2526; AVX1:       # %bb.0:
2527; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xde,0xc1]
2528; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2529;
2530; AVX512-LABEL: test_mm_max_epu8:
2531; AVX512:       # %bb.0:
2532; AVX512-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xde,0xc1]
2533; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2534  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2535  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2536  %sel = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %arg0, <16 x i8> %arg1)
2537  %bc = bitcast <16 x i8> %sel to <2 x i64>
2538  ret <2 x i64> %bc
2539}
2540declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>)
2541
2542define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
2543; SSE-LABEL: test_mm_max_pd:
2544; SSE:       # %bb.0:
2545; SSE-NEXT:    maxpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5f,0xc1]
2546; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2547;
2548; AVX1-LABEL: test_mm_max_pd:
2549; AVX1:       # %bb.0:
2550; AVX1-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5f,0xc1]
2551; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2552;
2553; AVX512-LABEL: test_mm_max_pd:
2554; AVX512:       # %bb.0:
2555; AVX512-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5f,0xc1]
2556; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2557  %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
2558  ret <2 x double> %res
2559}
2560declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
2561
2562define <2 x double> @test_mm_max_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2563; SSE-LABEL: test_mm_max_sd:
2564; SSE:       # %bb.0:
2565; SSE-NEXT:    maxsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5f,0xc1]
2566; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2567;
2568; AVX1-LABEL: test_mm_max_sd:
2569; AVX1:       # %bb.0:
2570; AVX1-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5f,0xc1]
2571; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2572;
2573; AVX512-LABEL: test_mm_max_sd:
2574; AVX512:       # %bb.0:
2575; AVX512-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5f,0xc1]
2576; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2577  %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
2578  ret <2 x double> %res
2579}
2580declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
2581
2582define void @test_mm_mfence() nounwind {
2583; CHECK-LABEL: test_mm_mfence:
2584; CHECK:       # %bb.0:
2585; CHECK-NEXT:    mfence # encoding: [0x0f,0xae,0xf0]
2586; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2587  call void @llvm.x86.sse2.mfence()
2588  ret void
2589}
2590declare void @llvm.x86.sse2.mfence() nounwind readnone
2591
2592define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2593; SSE-LABEL: test_mm_min_epi16:
2594; SSE:       # %bb.0:
2595; SSE-NEXT:    pminsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xea,0xc1]
2596; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2597;
2598; AVX1-LABEL: test_mm_min_epi16:
2599; AVX1:       # %bb.0:
2600; AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xea,0xc1]
2601; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2602;
2603; AVX512-LABEL: test_mm_min_epi16:
2604; AVX512:       # %bb.0:
2605; AVX512-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1]
2606; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2607  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2608  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2609  %sel = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %arg0, <8 x i16> %arg1)
2610  %bc = bitcast <8 x i16> %sel to <2 x i64>
2611  ret <2 x i64> %bc
2612}
2613declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
2614
2615define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2616; SSE-LABEL: test_mm_min_epu8:
2617; SSE:       # %bb.0:
2618; SSE-NEXT:    pminub %xmm1, %xmm0 # encoding: [0x66,0x0f,0xda,0xc1]
2619; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2620;
2621; AVX1-LABEL: test_mm_min_epu8:
2622; AVX1:       # %bb.0:
2623; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xda,0xc1]
2624; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2625;
2626; AVX512-LABEL: test_mm_min_epu8:
2627; AVX512:       # %bb.0:
2628; AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xda,0xc1]
2629; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2630  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2631  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2632  %sel = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %arg0, <16 x i8> %arg1)
2633  %bc = bitcast <16 x i8> %sel to <2 x i64>
2634  ret <2 x i64> %bc
2635}
2636declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>)
2637
2638define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
2639; SSE-LABEL: test_mm_min_pd:
2640; SSE:       # %bb.0:
2641; SSE-NEXT:    minpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5d,0xc1]
2642; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2643;
2644; AVX1-LABEL: test_mm_min_pd:
2645; AVX1:       # %bb.0:
2646; AVX1-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5d,0xc1]
2647; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2648;
2649; AVX512-LABEL: test_mm_min_pd:
2650; AVX512:       # %bb.0:
2651; AVX512-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5d,0xc1]
2652; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2653  %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
2654  ret <2 x double> %res
2655}
2656declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
2657
2658define <2 x double> @test_mm_min_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2659; SSE-LABEL: test_mm_min_sd:
2660; SSE:       # %bb.0:
2661; SSE-NEXT:    minsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5d,0xc1]
2662; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2663;
2664; AVX1-LABEL: test_mm_min_sd:
2665; AVX1:       # %bb.0:
2666; AVX1-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5d,0xc1]
2667; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2668;
2669; AVX512-LABEL: test_mm_min_sd:
2670; AVX512:       # %bb.0:
2671; AVX512-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5d,0xc1]
2672; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2673  %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
2674  ret <2 x double> %res
2675}
2676declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
2677
2678define <2 x i64> @test_mm_move_epi64(<2 x i64> %a0) nounwind {
2679; SSE-LABEL: test_mm_move_epi64:
2680; SSE:       # %bb.0:
2681; SSE-NEXT:    movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0]
2682; SSE-NEXT:    # xmm0 = xmm0[0],zero
2683; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2684;
2685; AVX1-LABEL: test_mm_move_epi64:
2686; AVX1:       # %bb.0:
2687; AVX1-NEXT:    vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0]
2688; AVX1-NEXT:    # xmm0 = xmm0[0],zero
2689; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2690;
2691; AVX512-LABEL: test_mm_move_epi64:
2692; AVX512:       # %bb.0:
2693; AVX512-NEXT:    vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0]
2694; AVX512-NEXT:    # xmm0 = xmm0[0],zero
2695; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2696  %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
2697  ret <2 x i64> %res
2698}
2699
2700define <2 x double> @test_mm_move_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2701; SSE-LABEL: test_mm_move_sd:
2702; SSE:       # %bb.0:
2703; SSE-NEXT:    movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
2704; SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1]
2705; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2706;
2707; AVX-LABEL: test_mm_move_sd:
2708; AVX:       # %bb.0:
2709; AVX-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
2710; AVX-NEXT:    # xmm0 = xmm1[0,1],xmm0[2,3]
2711; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2712  %ext0 = extractelement <2 x double> %a1, i32 0
2713  %res0 = insertelement <2 x double> undef, double %ext0, i32 0
2714  %ext1 = extractelement <2 x double> %a0, i32 1
2715  %res1 = insertelement <2 x double> %res0, double %ext1, i32 1
2716  ret <2 x double> %res1
2717}
2718
2719define i32 @test_mm_movemask_epi8(<2 x i64> %a0) nounwind {
2720; SSE-LABEL: test_mm_movemask_epi8:
2721; SSE:       # %bb.0:
2722; SSE-NEXT:    pmovmskb %xmm0, %eax # encoding: [0x66,0x0f,0xd7,0xc0]
2723; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2724;
2725; AVX-LABEL: test_mm_movemask_epi8:
2726; AVX:       # %bb.0:
2727; AVX-NEXT:    vpmovmskb %xmm0, %eax # encoding: [0xc5,0xf9,0xd7,0xc0]
2728; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2729  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2730  %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %arg0)
2731  ret i32 %res
2732}
2733declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
2734
2735define i32 @test_mm_movemask_pd(<2 x double> %a0) nounwind {
2736; SSE-LABEL: test_mm_movemask_pd:
2737; SSE:       # %bb.0:
2738; SSE-NEXT:    movmskpd %xmm0, %eax # encoding: [0x66,0x0f,0x50,0xc0]
2739; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2740;
2741; AVX-LABEL: test_mm_movemask_pd:
2742; AVX:       # %bb.0:
2743; AVX-NEXT:    vmovmskpd %xmm0, %eax # encoding: [0xc5,0xf9,0x50,0xc0]
2744; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2745  %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
2746  ret i32 %res
2747}
2748declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
2749
2750define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2751; SSE-LABEL: test_mm_mul_epu32:
2752; SSE:       # %bb.0:
2753; SSE-NEXT:    pmuludq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf4,0xc1]
2754; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2755;
2756; AVX1-LABEL: test_mm_mul_epu32:
2757; AVX1:       # %bb.0:
2758; AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf4,0xc1]
2759; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2760;
2761; AVX512-LABEL: test_mm_mul_epu32:
2762; AVX512:       # %bb.0:
2763; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
2764; AVX512-NEXT:    vpblendd $10, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc2,0x0a]
2765; AVX512-NEXT:    # xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
2766; AVX512-NEXT:    vpblendd $10, %xmm2, %xmm1, %xmm1 # encoding: [0xc4,0xe3,0x71,0x02,0xca,0x0a]
2767; AVX512-NEXT:    # xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2768; AVX512-NEXT:    vpmullq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1]
2769; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2770  %A = and <2 x i64> %a0, <i64 4294967295, i64 4294967295>
2771  %B = and <2 x i64> %a1, <i64 4294967295, i64 4294967295>
2772  %res = mul nuw <2 x i64> %A, %B
2773  ret <2 x i64> %res
2774}
2775
2776define <2 x double> @test_mm_mul_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
2777; SSE-LABEL: test_mm_mul_pd:
2778; SSE:       # %bb.0:
2779; SSE-NEXT:    mulpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x59,0xc1]
2780; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2781;
2782; AVX1-LABEL: test_mm_mul_pd:
2783; AVX1:       # %bb.0:
2784; AVX1-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x59,0xc1]
2785; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2786;
2787; AVX512-LABEL: test_mm_mul_pd:
2788; AVX512:       # %bb.0:
2789; AVX512-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x59,0xc1]
2790; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2791  %res = fmul <2 x double> %a0, %a1
2792  ret <2 x double> %res
2793}
2794
2795define <2 x double> @test_mm_mul_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2796; SSE-LABEL: test_mm_mul_sd:
2797; SSE:       # %bb.0:
2798; SSE-NEXT:    mulsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x59,0xc1]
2799; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2800;
2801; AVX1-LABEL: test_mm_mul_sd:
2802; AVX1:       # %bb.0:
2803; AVX1-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x59,0xc1]
2804; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2805;
2806; AVX512-LABEL: test_mm_mul_sd:
2807; AVX512:       # %bb.0:
2808; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x59,0xc1]
2809; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2810  %ext0 = extractelement <2 x double> %a0, i32 0
2811  %ext1 = extractelement <2 x double> %a1, i32 0
2812  %fmul = fmul double %ext0, %ext1
2813  %res = insertelement <2 x double> %a0, double %fmul, i32 0
2814  ret <2 x double> %res
2815}
2816
2817define <2 x i64> @test_mm_mulhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2818; SSE-LABEL: test_mm_mulhi_epi16:
2819; SSE:       # %bb.0:
2820; SSE-NEXT:    pmulhw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe5,0xc1]
2821; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2822;
2823; AVX1-LABEL: test_mm_mulhi_epi16:
2824; AVX1:       # %bb.0:
2825; AVX1-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe5,0xc1]
2826; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2827;
2828; AVX512-LABEL: test_mm_mulhi_epi16:
2829; AVX512:       # %bb.0:
2830; AVX512-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1]
2831; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2832  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2833  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2834  %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %arg0, <8 x i16> %arg1)
2835  %bc = bitcast <8 x i16> %res to <2 x i64>
2836  ret <2 x i64> %bc
2837}
2838declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
2839
2840define <2 x i64> @test_mm_mulhi_epu16(<2 x i64> %a0, <2 x i64> %a1) {
2841; SSE-LABEL: test_mm_mulhi_epu16:
2842; SSE:       # %bb.0:
2843; SSE-NEXT:    pmulhuw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe4,0xc1]
2844; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2845;
2846; AVX1-LABEL: test_mm_mulhi_epu16:
2847; AVX1:       # %bb.0:
2848; AVX1-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe4,0xc1]
2849; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2850;
2851; AVX512-LABEL: test_mm_mulhi_epu16:
2852; AVX512:       # %bb.0:
2853; AVX512-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1]
2854; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2855  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2856  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2857  %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %arg0, <8 x i16> %arg1)
2858  %bc = bitcast <8 x i16> %res to <2 x i64>
2859  ret <2 x i64> %bc
2860}
2861declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
2862
2863define <2 x i64> @test_mm_mullo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2864; SSE-LABEL: test_mm_mullo_epi16:
2865; SSE:       # %bb.0:
2866; SSE-NEXT:    pmullw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd5,0xc1]
2867; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2868;
2869; AVX1-LABEL: test_mm_mullo_epi16:
2870; AVX1:       # %bb.0:
2871; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd5,0xc1]
2872; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2873;
2874; AVX512-LABEL: test_mm_mullo_epi16:
2875; AVX512:       # %bb.0:
2876; AVX512-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0xc1]
2877; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2878  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2879  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2880  %res = mul <8 x i16> %arg0, %arg1
2881  %bc = bitcast <8 x i16> %res to <2 x i64>
2882  ret <2 x i64> %bc
2883}
2884
2885define <2 x double> @test_mm_or_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
2886; SSE-LABEL: test_mm_or_pd:
2887; SSE:       # %bb.0:
2888; SSE-NEXT:    orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1]
2889; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2890;
2891; AVX1-LABEL: test_mm_or_pd:
2892; AVX1:       # %bb.0:
2893; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1]
2894; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2895;
2896; AVX512-LABEL: test_mm_or_pd:
2897; AVX512:       # %bb.0:
2898; AVX512-NEXT:    vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1]
2899; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2900  %arg0 = bitcast <2 x double> %a0 to <4 x i32>
2901  %arg1 = bitcast <2 x double> %a1 to <4 x i32>
2902  %res = or <4 x i32> %arg0, %arg1
2903  %bc = bitcast <4 x i32> %res to <2 x double>
2904  ret <2 x double> %bc
2905}
2906
2907define <2 x i64> @test_mm_or_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2908; SSE-LABEL: test_mm_or_si128:
2909; SSE:       # %bb.0:
2910; SSE-NEXT:    orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1]
2911; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2912;
2913; AVX1-LABEL: test_mm_or_si128:
2914; AVX1:       # %bb.0:
2915; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1]
2916; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2917;
2918; AVX512-LABEL: test_mm_or_si128:
2919; AVX512:       # %bb.0:
2920; AVX512-NEXT:    vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1]
2921; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2922  %res = or <2 x i64> %a0, %a1
2923  ret <2 x i64> %res
2924}
2925
2926define <2 x i64> @test_mm_packs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2927; SSE-LABEL: test_mm_packs_epi16:
2928; SSE:       # %bb.0:
2929; SSE-NEXT:    packsswb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x63,0xc1]
2930; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2931;
2932; AVX1-LABEL: test_mm_packs_epi16:
2933; AVX1:       # %bb.0:
2934; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x63,0xc1]
2935; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2936;
2937; AVX512-LABEL: test_mm_packs_epi16:
2938; AVX512:       # %bb.0:
2939; AVX512-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1]
2940; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2941  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2942  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2943  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
2944  %bc = bitcast <16 x i8> %res to <2 x i64>
2945  ret <2 x i64> %bc
2946}
2947declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
2948
2949define <2 x i64> @test_mm_packs_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2950; SSE-LABEL: test_mm_packs_epi32:
2951; SSE:       # %bb.0:
2952; SSE-NEXT:    packssdw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6b,0xc1]
2953; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2954;
2955; AVX1-LABEL: test_mm_packs_epi32:
2956; AVX1:       # %bb.0:
2957; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x6b,0xc1]
2958; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2959;
2960; AVX512-LABEL: test_mm_packs_epi32:
2961; AVX512:       # %bb.0:
2962; AVX512-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1]
2963; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2964  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2965  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2966  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %arg0, <4 x i32> %arg1)
2967  %bc = bitcast <8 x i16> %res to <2 x i64>
2968  ret <2 x i64> %bc
2969}
2970declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
2971
2972define <2 x i64> @test_mm_packus_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2973; SSE-LABEL: test_mm_packus_epi16:
2974; SSE:       # %bb.0:
2975; SSE-NEXT:    packuswb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x67,0xc1]
2976; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2977;
2978; AVX1-LABEL: test_mm_packus_epi16:
2979; AVX1:       # %bb.0:
2980; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x67,0xc1]
2981; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2982;
2983; AVX512-LABEL: test_mm_packus_epi16:
2984; AVX512:       # %bb.0:
2985; AVX512-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1]
2986; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2987  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2988  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2989  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
2990  %bc = bitcast <16 x i8> %res to <2 x i64>
2991  ret <2 x i64> %bc
2992}
2993declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
2994
2995define void @test_mm_pause() nounwind {
2996; CHECK-LABEL: test_mm_pause:
2997; CHECK:       # %bb.0:
2998; CHECK-NEXT:    pause # encoding: [0xf3,0x90]
2999; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3000  call void @llvm.x86.sse2.pause()
3001  ret void
3002}
3003declare void @llvm.x86.sse2.pause() nounwind readnone
3004
3005define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3006; SSE-LABEL: test_mm_sad_epu8:
3007; SSE:       # %bb.0:
3008; SSE-NEXT:    psadbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf6,0xc1]
3009; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3010;
3011; AVX1-LABEL: test_mm_sad_epu8:
3012; AVX1:       # %bb.0:
3013; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf6,0xc1]
3014; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3015;
3016; AVX512-LABEL: test_mm_sad_epu8:
3017; AVX512:       # %bb.0:
3018; AVX512-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf6,0xc1]
3019; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3020  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3021  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3022  %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %arg0, <16 x i8> %arg1)
3023  ret <2 x i64> %res
3024}
3025declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
3026
3027define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
3028; X86-SSE-LABEL: test_mm_set_epi8:
3029; X86-SSE:       # %bb.0:
3030; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3031; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3032; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3033; X86-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
3034; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
3035; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
3036; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
3037; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3038; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
3039; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3040; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
3041; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
3042; X86-SSE-NEXT:    punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
3043; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
3044; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
3045; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3046; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
3047; X86-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
3048; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
3049; X86-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
3050; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
3051; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3052; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
3053; X86-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
3054; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
3055; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
3056; X86-SSE-NEXT:    punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb]
3057; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
3058; X86-SSE-NEXT:    punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca]
3059; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3060; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
3061; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3062; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
3063; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3064; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
3065; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
3066; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
3067; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3068; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
3069; X86-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
3070; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
3071; X86-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
3072; X86-SSE-NEXT:    punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda]
3073; X86-SSE-NEXT:    # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
3074; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
3075; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3076; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
3077; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3078; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
3079; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
3080; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
3081; X86-SSE-NEXT:    movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
3082; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
3083; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3084; X86-SSE-NEXT:    punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4]
3085; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
3086; X86-SSE-NEXT:    punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2]
3087; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
3088; X86-SSE-NEXT:    punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3]
3089; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
3090; X86-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
3091; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
3092; X86-SSE-NEXT:    retl # encoding: [0xc3]
3093;
3094; X86-AVX1-LABEL: test_mm_set_epi8:
3095; X86-AVX1:       # %bb.0:
3096; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
3097; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x40]
3098; X86-AVX1-NEXT:    vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
3099; X86-AVX1-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
3100; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
3101; X86-AVX1-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
3102; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
3103; X86-AVX1-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
3104; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
3105; X86-AVX1-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
3106; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
3107; X86-AVX1-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
3108; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
3109; X86-AVX1-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
3110; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
3111; X86-AVX1-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
3112; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
3113; X86-AVX1-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
3114; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
3115; X86-AVX1-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
3116; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
3117; X86-AVX1-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
3118; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
3119; X86-AVX1-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
3120; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
3121; X86-AVX1-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
3122; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
3123; X86-AVX1-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
3124; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3125; X86-AVX1-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
3126; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3127; X86-AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
3128; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3129;
3130; X86-AVX512-LABEL: test_mm_set_epi8:
3131; X86-AVX512:       # %bb.0:
3132; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
3133; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x40]
3134; X86-AVX512-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
3135; X86-AVX512-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
3136; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
3137; X86-AVX512-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
3138; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
3139; X86-AVX512-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
3140; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
3141; X86-AVX512-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
3142; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
3143; X86-AVX512-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
3144; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
3145; X86-AVX512-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
3146; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
3147; X86-AVX512-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
3148; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
3149; X86-AVX512-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
3150; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
3151; X86-AVX512-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
3152; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
3153; X86-AVX512-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
3154; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
3155; X86-AVX512-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
3156; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
3157; X86-AVX512-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
3158; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
3159; X86-AVX512-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
3160; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3161; X86-AVX512-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
3162; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3163; X86-AVX512-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
3164; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3165;
3166; X64-SSE-LABEL: test_mm_set_epi8:
3167; X64-SSE:       # %bb.0:
3168; X64-SSE-NEXT:    movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
3169; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3170; X64-SSE-NEXT:    movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
3171; X64-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
3172; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
3173; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
3174; X64-SSE-NEXT:    movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
3175; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3176; X64-SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
3177; X64-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3178; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
3179; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
3180; X64-SSE-NEXT:    punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
3181; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
3182; X64-SSE-NEXT:    movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
3183; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3184; X64-SSE-NEXT:    movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
3185; X64-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
3186; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
3187; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
3188; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3189; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3190; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
3191; X64-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
3192; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
3193; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
3194; X64-SSE-NEXT:    punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb]
3195; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
3196; X64-SSE-NEXT:    punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca]
3197; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3198; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
3199; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3200; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
3201; X64-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3202; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
3203; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
3204; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
3205; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3206; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
3207; X64-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
3208; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
3209; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
3210; X64-SSE-NEXT:    punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda]
3211; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
3212; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
3213; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3214; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
3215; X64-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3216; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
3217; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
3218; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48]
3219; X64-SSE-NEXT:    movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
3220; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
3221; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3222; X64-SSE-NEXT:    punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4]
3223; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
3224; X64-SSE-NEXT:    punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2]
3225; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
3226; X64-SSE-NEXT:    punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3]
3227; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
3228; X64-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
3229; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
3230; X64-SSE-NEXT:    retq # encoding: [0xc3]
3231;
3232; X64-AVX1-LABEL: test_mm_set_epi8:
3233; X64-AVX1:       # %bb.0:
3234; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb6,0x54,0x24,0x48]
3235; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
3236; X64-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
3237; X64-AVX1-NEXT:    vpinsrb $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01]
3238; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
3239; X64-AVX1-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
3240; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
3241; X64-AVX1-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
3242; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
3243; X64-AVX1-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
3244; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
3245; X64-AVX1-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
3246; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
3247; X64-AVX1-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
3248; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
3249; X64-AVX1-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
3250; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
3251; X64-AVX1-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
3252; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3253; X64-AVX1-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
3254; X64-AVX1-NEXT:    movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
3255; X64-AVX1-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
3256; X64-AVX1-NEXT:    movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
3257; X64-AVX1-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
3258; X64-AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
3259; X64-AVX1-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
3260; X64-AVX1-NEXT:    movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
3261; X64-AVX1-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
3262; X64-AVX1-NEXT:    movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
3263; X64-AVX1-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
3264; X64-AVX1-NEXT:    movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
3265; X64-AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
3266; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3267;
3268; X64-AVX512-LABEL: test_mm_set_epi8:
3269; X64-AVX512:       # %bb.0:
3270; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb6,0x54,0x24,0x48]
3271; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
3272; X64-AVX512-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
3273; X64-AVX512-NEXT:    vpinsrb $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01]
3274; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
3275; X64-AVX512-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
3276; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
3277; X64-AVX512-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
3278; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
3279; X64-AVX512-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
3280; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
3281; X64-AVX512-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
3282; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
3283; X64-AVX512-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
3284; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
3285; X64-AVX512-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
3286; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
3287; X64-AVX512-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
3288; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3289; X64-AVX512-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
3290; X64-AVX512-NEXT:    movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
3291; X64-AVX512-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
3292; X64-AVX512-NEXT:    movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
3293; X64-AVX512-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
3294; X64-AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
3295; X64-AVX512-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
3296; X64-AVX512-NEXT:    movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
3297; X64-AVX512-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
3298; X64-AVX512-NEXT:    movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
3299; X64-AVX512-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
3300; X64-AVX512-NEXT:    movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
3301; X64-AVX512-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
3302; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3303  %res0  = insertelement <16 x i8> undef,  i8 %a15, i32 0
3304  %res1  = insertelement <16 x i8> %res0,  i8 %a14, i32 1
3305  %res2  = insertelement <16 x i8> %res1,  i8 %a13, i32 2
3306  %res3  = insertelement <16 x i8> %res2,  i8 %a12, i32 3
3307  %res4  = insertelement <16 x i8> %res3,  i8 %a11, i32 4
3308  %res5  = insertelement <16 x i8> %res4,  i8 %a10, i32 5
3309  %res6  = insertelement <16 x i8> %res5,  i8 %a9 , i32 6
3310  %res7  = insertelement <16 x i8> %res6,  i8 %a8 , i32 7
3311  %res8  = insertelement <16 x i8> %res7,  i8 %a7 , i32 8
3312  %res9  = insertelement <16 x i8> %res8,  i8 %a6 , i32 9
3313  %res10 = insertelement <16 x i8> %res9,  i8 %a5 , i32 10
3314  %res11 = insertelement <16 x i8> %res10, i8 %a4 , i32 11
3315  %res12 = insertelement <16 x i8> %res11, i8 %a3 , i32 12
3316  %res13 = insertelement <16 x i8> %res12, i8 %a2 , i32 13
3317  %res14 = insertelement <16 x i8> %res13, i8 %a1 , i32 14
3318  %res15 = insertelement <16 x i8> %res14, i8 %a0 , i32 15
3319  %res = bitcast <16 x i8> %res15 to <2 x i64>
3320  ret <2 x i64> %res
3321}
3322
3323define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
3324; X86-SSE-LABEL: test_mm_set_epi16:
3325; X86-SSE:       # %bb.0:
3326; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
3327; X86-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
3328; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
3329; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3330; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
3331; X86-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
3332; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
3333; X86-SSE-NEXT:    movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
3334; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
3335; X86-SSE-NEXT:    movd %eax, %xmm5 # encoding: [0x66,0x0f,0x6e,0xe8]
3336; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
3337; X86-SSE-NEXT:    movd %eax, %xmm6 # encoding: [0x66,0x0f,0x6e,0xf0]
3338; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
3339; X86-SSE-NEXT:    movd %eax, %xmm7 # encoding: [0x66,0x0f,0x6e,0xf8]
3340; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
3341; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3342; X86-SSE-NEXT:    punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
3343; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
3344; X86-SSE-NEXT:    punpcklwd %xmm3, %xmm4 # encoding: [0x66,0x0f,0x61,0xe3]
3345; X86-SSE-NEXT:    # xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
3346; X86-SSE-NEXT:    punpckldq %xmm2, %xmm4 # encoding: [0x66,0x0f,0x62,0xe2]
3347; X86-SSE-NEXT:    # xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
3348; X86-SSE-NEXT:    punpcklwd %xmm5, %xmm6 # encoding: [0x66,0x0f,0x61,0xf5]
3349; X86-SSE-NEXT:    # xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
3350; X86-SSE-NEXT:    punpcklwd %xmm7, %xmm0 # encoding: [0x66,0x0f,0x61,0xc7]
3351; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
3352; X86-SSE-NEXT:    punpckldq %xmm6, %xmm0 # encoding: [0x66,0x0f,0x62,0xc6]
3353; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
3354; X86-SSE-NEXT:    punpcklqdq %xmm4, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc4]
3355; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm4[0]
3356; X86-SSE-NEXT:    retl # encoding: [0xc3]
3357;
3358; X86-AVX1-LABEL: test_mm_set_epi16:
3359; X86-AVX1:       # %bb.0:
3360; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
3361; X86-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
3362; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
3363; X86-AVX1-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
3364; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
3365; X86-AVX1-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
3366; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
3367; X86-AVX1-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x03]
3368; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
3369; X86-AVX1-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
3370; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
3371; X86-AVX1-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
3372; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
3373; X86-AVX1-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
3374; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
3375; X86-AVX1-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
3376; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3377;
3378; X86-AVX512-LABEL: test_mm_set_epi16:
3379; X86-AVX512:       # %bb.0:
3380; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
3381; X86-AVX512-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
3382; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
3383; X86-AVX512-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
3384; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
3385; X86-AVX512-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
3386; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
3387; X86-AVX512-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03]
3388; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
3389; X86-AVX512-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
3390; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
3391; X86-AVX512-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
3392; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
3393; X86-AVX512-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
3394; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
3395; X86-AVX512-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
3396; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3397;
3398; X64-SSE-LABEL: test_mm_set_epi16:
3399; X64-SSE:       # %bb.0:
3400; X64-SSE-NEXT:    movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10]
3401; X64-SSE-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
3402; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
3403; X64-SSE-NEXT:    movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce]
3404; X64-SSE-NEXT:    punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
3405; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3406; X64-SSE-NEXT:    movd %edx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc2]
3407; X64-SSE-NEXT:    movd %ecx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd1]
3408; X64-SSE-NEXT:    punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0]
3409; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
3410; X64-SSE-NEXT:    punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1]
3411; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
3412; X64-SSE-NEXT:    movd %r8d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc0]
3413; X64-SSE-NEXT:    movd %r9d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xc9]
3414; X64-SSE-NEXT:    punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
3415; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3416; X64-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
3417; X64-SSE-NEXT:    movd %r10d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc2]
3418; X64-SSE-NEXT:    punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3]
3419; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
3420; X64-SSE-NEXT:    punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1]
3421; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3422; X64-SSE-NEXT:    punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2]
3423; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0]
3424; X64-SSE-NEXT:    retq # encoding: [0xc3]
3425;
3426; X64-AVX1-LABEL: test_mm_set_epi16:
3427; X64-AVX1:       # %bb.0:
3428; X64-AVX1-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
3429; X64-AVX1-NEXT:    movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08]
3430; X64-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
3431; X64-AVX1-NEXT:    vpinsrw $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01]
3432; X64-AVX1-NEXT:    vpinsrw $2, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02]
3433; X64-AVX1-NEXT:    vpinsrw $3, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03]
3434; X64-AVX1-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
3435; X64-AVX1-NEXT:    vpinsrw $5, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x05]
3436; X64-AVX1-NEXT:    vpinsrw $6, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x06]
3437; X64-AVX1-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
3438; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3439;
3440; X64-AVX512-LABEL: test_mm_set_epi16:
3441; X64-AVX512:       # %bb.0:
3442; X64-AVX512-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
3443; X64-AVX512-NEXT:    movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08]
3444; X64-AVX512-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
3445; X64-AVX512-NEXT:    vpinsrw $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01]
3446; X64-AVX512-NEXT:    vpinsrw $2, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02]
3447; X64-AVX512-NEXT:    vpinsrw $3, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03]
3448; X64-AVX512-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
3449; X64-AVX512-NEXT:    vpinsrw $5, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x05]
3450; X64-AVX512-NEXT:    vpinsrw $6, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x06]
3451; X64-AVX512-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
3452; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3453  %res0  = insertelement <8 x i16> undef, i16 %a7, i32 0
3454  %res1  = insertelement <8 x i16> %res0, i16 %a6, i32 1
3455  %res2  = insertelement <8 x i16> %res1, i16 %a5, i32 2
3456  %res3  = insertelement <8 x i16> %res2, i16 %a4, i32 3
3457  %res4  = insertelement <8 x i16> %res3, i16 %a3, i32 4
3458  %res5  = insertelement <8 x i16> %res4, i16 %a2, i32 5
3459  %res6  = insertelement <8 x i16> %res5, i16 %a1, i32 6
3460  %res7  = insertelement <8 x i16> %res6, i16 %a0, i32 7
3461  %res = bitcast <8 x i16> %res7 to <2 x i64>
3462  ret <2 x i64> %res
3463}
3464
3465define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
3466; X86-SSE-LABEL: test_mm_set_epi32:
3467; X86-SSE:       # %bb.0:
3468; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
3469; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
3470; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x08]
3471; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero
3472; X86-SSE-NEXT:    unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8]
3473; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3474; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x0c]
3475; X86-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero
3476; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10]
3477; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
3478; X86-SSE-NEXT:    unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2]
3479; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3480; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
3481; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
3482; X86-SSE-NEXT:    retl # encoding: [0xc3]
3483;
3484; X86-AVX1-LABEL: test_mm_set_epi32:
3485; X86-AVX1:       # %bb.0:
3486; X86-AVX1-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x10]
3487; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
3488; X86-AVX1-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x01]
3489; X86-AVX1-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x02]
3490; X86-AVX1-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x03]
3491; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3492;
3493; X86-AVX512-LABEL: test_mm_set_epi32:
3494; X86-AVX512:       # %bb.0:
3495; X86-AVX512-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x10]
3496; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
3497; X86-AVX512-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x01]
3498; X86-AVX512-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x02]
3499; X86-AVX512-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x03]
3500; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3501;
3502; X64-SSE-LABEL: test_mm_set_epi32:
3503; X64-SSE:       # %bb.0:
3504; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
3505; X64-SSE-NEXT:    movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce]
3506; X64-SSE-NEXT:    punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8]
3507; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3508; X64-SSE-NEXT:    movd %edx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd2]
3509; X64-SSE-NEXT:    movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1]
3510; X64-SSE-NEXT:    punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2]
3511; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3512; X64-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
3513; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
3514; X64-SSE-NEXT:    retq # encoding: [0xc3]
3515;
3516; X64-AVX1-LABEL: test_mm_set_epi32:
3517; X64-AVX1:       # %bb.0:
3518; X64-AVX1-NEXT:    vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
3519; X64-AVX1-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01]
3520; X64-AVX1-NEXT:    vpinsrd $2, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02]
3521; X64-AVX1-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
3522; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3523;
3524; X64-AVX512-LABEL: test_mm_set_epi32:
3525; X64-AVX512:       # %bb.0:
3526; X64-AVX512-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
3527; X64-AVX512-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01]
3528; X64-AVX512-NEXT:    vpinsrd $2, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02]
3529; X64-AVX512-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
3530; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3531  %res0  = insertelement <4 x i32> undef, i32 %a3, i32 0
3532  %res1  = insertelement <4 x i32> %res0, i32 %a2, i32 1
3533  %res2  = insertelement <4 x i32> %res1, i32 %a1, i32 2
3534  %res3  = insertelement <4 x i32> %res2, i32 %a0, i32 3
3535  %res = bitcast <4 x i32> %res3 to <2 x i64>
3536  ret <2 x i64> %res
3537}
3538
3539; TODO test_mm_set_epi64
3540
3541define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind {
3542; X86-SSE-LABEL: test_mm_set_epi64x:
3543; X86-SSE:       # %bb.0:
3544; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x04]
3545; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero
3546; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x08]
3547; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
3548; X86-SSE-NEXT:    unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8]
3549; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3550; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x0c]
3551; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
3552; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x10]
3553; X86-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero
3554; X86-SSE-NEXT:    unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2]
3555; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3556; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
3557; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
3558; X86-SSE-NEXT:    retl # encoding: [0xc3]
3559;
3560; X86-AVX1-LABEL: test_mm_set_epi64x:
3561; X86-AVX1:       # %bb.0:
3562; X86-AVX1-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x0c]
3563; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
3564; X86-AVX1-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x01]
3565; X86-AVX1-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x02]
3566; X86-AVX1-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x03]
3567; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3568;
3569; X86-AVX512-LABEL: test_mm_set_epi64x:
3570; X86-AVX512:       # %bb.0:
3571; X86-AVX512-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x0c]
3572; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
3573; X86-AVX512-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x01]
3574; X86-AVX512-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x02]
3575; X86-AVX512-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x03]
3576; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3577;
3578; X64-SSE-LABEL: test_mm_set_epi64x:
3579; X64-SSE:       # %bb.0:
3580; X64-SSE-NEXT:    movq %rdi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xcf]
3581; X64-SSE-NEXT:    movq %rsi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc6]
3582; X64-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
3583; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
3584; X64-SSE-NEXT:    retq # encoding: [0xc3]
3585;
3586; X64-AVX1-LABEL: test_mm_set_epi64x:
3587; X64-AVX1:       # %bb.0:
3588; X64-AVX1-NEXT:    vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
3589; X64-AVX1-NEXT:    vmovq %rsi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xce]
3590; X64-AVX1-NEXT:    vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0]
3591; X64-AVX1-NEXT:    # xmm0 = xmm1[0],xmm0[0]
3592; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3593;
3594; X64-AVX512-LABEL: test_mm_set_epi64x:
3595; X64-AVX512:       # %bb.0:
3596; X64-AVX512-NEXT:    vmovq %rdi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
3597; X64-AVX512-NEXT:    vmovq %rsi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xce]
3598; X64-AVX512-NEXT:    vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0]
3599; X64-AVX512-NEXT:    # xmm0 = xmm1[0],xmm0[0]
3600; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3601  %res0  = insertelement <2 x i64> undef, i64 %a1, i32 0
3602  %res1  = insertelement <2 x i64> %res0, i64 %a0, i32 1
3603  ret <2 x i64> %res1
3604}
3605
3606define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind {
3607; X86-SSE-LABEL: test_mm_set_pd:
3608; X86-SSE:       # %bb.0:
3609; X86-SSE-NEXT:    movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x0c]
3610; X86-SSE-NEXT:    # xmm0 = mem[0],zero
3611; X86-SSE-NEXT:    movsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf2,0x0f,0x10,0x4c,0x24,0x04]
3612; X86-SSE-NEXT:    # xmm1 = mem[0],zero
3613; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
3614; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
3615; X86-SSE-NEXT:    retl # encoding: [0xc3]
3616;
3617; X86-AVX1-LABEL: test_mm_set_pd:
3618; X86-AVX1:       # %bb.0:
3619; X86-AVX1-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c]
3620; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
3621; X86-AVX1-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04]
3622; X86-AVX1-NEXT:    # xmm1 = mem[0],zero
3623; X86-AVX1-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1]
3624; X86-AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0]
3625; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3626;
3627; X86-AVX512-LABEL: test_mm_set_pd:
3628; X86-AVX512:       # %bb.0:
3629; X86-AVX512-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c]
3630; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
3631; X86-AVX512-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04]
3632; X86-AVX512-NEXT:    # xmm1 = mem[0],zero
3633; X86-AVX512-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
3634; X86-AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0]
3635; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3636;
3637; X64-SSE-LABEL: test_mm_set_pd:
3638; X64-SSE:       # %bb.0:
3639; X64-SSE-NEXT:    movlhps %xmm0, %xmm1 # encoding: [0x0f,0x16,0xc8]
3640; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0]
3641; X64-SSE-NEXT:    movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
3642; X64-SSE-NEXT:    retq # encoding: [0xc3]
3643;
3644; X64-AVX1-LABEL: test_mm_set_pd:
3645; X64-AVX1:       # %bb.0:
3646; X64-AVX1-NEXT:    vmovlhps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0x16,0xc0]
3647; X64-AVX1-NEXT:    # xmm0 = xmm1[0],xmm0[0]
3648; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3649;
3650; X64-AVX512-LABEL: test_mm_set_pd:
3651; X64-AVX512:       # %bb.0:
3652; X64-AVX512-NEXT:    vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0]
3653; X64-AVX512-NEXT:    # xmm0 = xmm1[0],xmm0[0]
3654; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3655  %res0  = insertelement <2 x double> undef, double %a1, i32 0
3656  %res1  = insertelement <2 x double> %res0, double %a0, i32 1
3657  ret <2 x double> %res1
3658}
3659
3660define <2 x double> @test_mm_set_pd1(double %a0) nounwind {
3661; X86-SSE-LABEL: test_mm_set_pd1:
3662; X86-SSE:       # %bb.0:
3663; X86-SSE-NEXT:    movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04]
3664; X86-SSE-NEXT:    # xmm0 = mem[0],zero
3665; X86-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
3666; X86-SSE-NEXT:    # xmm0 = xmm0[0,0]
3667; X86-SSE-NEXT:    retl # encoding: [0xc3]
3668;
3669; X86-AVX1-LABEL: test_mm_set_pd1:
3670; X86-AVX1:       # %bb.0:
3671; X86-AVX1-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04]
3672; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
3673; X86-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
3674; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0]
3675; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3676;
3677; X86-AVX512-LABEL: test_mm_set_pd1:
3678; X86-AVX512:       # %bb.0:
3679; X86-AVX512-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04]
3680; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
3681; X86-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
3682; X86-AVX512-NEXT:    # xmm0 = xmm0[0,0]
3683; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3684;
3685; X64-SSE-LABEL: test_mm_set_pd1:
3686; X64-SSE:       # %bb.0:
3687; X64-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
3688; X64-SSE-NEXT:    # xmm0 = xmm0[0,0]
3689; X64-SSE-NEXT:    retq # encoding: [0xc3]
3690;
3691; X64-AVX1-LABEL: test_mm_set_pd1:
3692; X64-AVX1:       # %bb.0:
3693; X64-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
3694; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0]
3695; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3696;
3697; X64-AVX512-LABEL: test_mm_set_pd1:
3698; X64-AVX512:       # %bb.0:
3699; X64-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
3700; X64-AVX512-NEXT:    # xmm0 = xmm0[0,0]
3701; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3702  %res0  = insertelement <2 x double> undef, double %a0, i32 0
3703  %res1  = insertelement <2 x double> %res0, double %a0, i32 1
3704  ret <2 x double> %res1
3705}
3706
3707define <2 x double> @test_mm_set_sd(double %a0) nounwind {
3708; X86-SSE-LABEL: test_mm_set_sd:
3709; X86-SSE:       # %bb.0:
3710; X86-SSE-NEXT:    movq {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x7e,0x44,0x24,0x04]
3711; X86-SSE-NEXT:    # xmm0 = mem[0],zero
3712; X86-SSE-NEXT:    movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0]
3713; X86-SSE-NEXT:    # xmm0 = xmm0[0],zero
3714; X86-SSE-NEXT:    retl # encoding: [0xc3]
3715;
3716; X86-AVX1-LABEL: test_mm_set_sd:
3717; X86-AVX1:       # %bb.0:
3718; X86-AVX1-NEXT:    vmovq {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x7e,0x44,0x24,0x04]
3719; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
3720; X86-AVX1-NEXT:    vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0]
3721; X86-AVX1-NEXT:    # xmm0 = xmm0[0],zero
3722; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3723;
3724; X86-AVX512-LABEL: test_mm_set_sd:
3725; X86-AVX512:       # %bb.0:
3726; X86-AVX512-NEXT:    vmovq {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x44,0x24,0x04]
3727; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
3728; X86-AVX512-NEXT:    vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0]
3729; X86-AVX512-NEXT:    # xmm0 = xmm0[0],zero
3730; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3731;
3732; X64-SSE-LABEL: test_mm_set_sd:
3733; X64-SSE:       # %bb.0:
3734; X64-SSE-NEXT:    movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0]
3735; X64-SSE-NEXT:    # xmm0 = xmm0[0],zero
3736; X64-SSE-NEXT:    retq # encoding: [0xc3]
3737;
3738; X64-AVX1-LABEL: test_mm_set_sd:
3739; X64-AVX1:       # %bb.0:
3740; X64-AVX1-NEXT:    vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0]
3741; X64-AVX1-NEXT:    # xmm0 = xmm0[0],zero
3742; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3743;
3744; X64-AVX512-LABEL: test_mm_set_sd:
3745; X64-AVX512:       # %bb.0:
3746; X64-AVX512-NEXT:    vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0]
3747; X64-AVX512-NEXT:    # xmm0 = xmm0[0],zero
3748; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3749  %res0  = insertelement <2 x double> undef, double %a0, i32 0
3750  %res1  = insertelement <2 x double> %res0, double 0.0, i32 1
3751  ret <2 x double> %res1
3752}
3753
3754define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind {
3755; X86-SSE-LABEL: test_mm_set1_epi8:
3756; X86-SSE:       # %bb.0:
3757; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3758; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3759; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm0 # encoding: [0x66,0x0f,0x60,0xc0]
3760; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3761; X86-SSE-NEXT:    pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00]
3762; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0,4,5,6,7]
3763; X86-SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
3764; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
3765; X86-SSE-NEXT:    retl # encoding: [0xc3]
3766;
3767; X86-AVX1-LABEL: test_mm_set1_epi8:
3768; X86-AVX1:       # %bb.0:
3769; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3770; X86-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
3771; X86-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
3772; X86-AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1]
3773; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3774;
3775; X86-AVX512-LABEL: test_mm_set1_epi8:
3776; X86-AVX512:       # %bb.0:
3777; X86-AVX512-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
3778; X86-AVX512-NEXT:    vpbroadcastb %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc0]
3779; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3780;
3781; X64-SSE-LABEL: test_mm_set1_epi8:
3782; X64-SSE:       # %bb.0:
3783; X64-SSE-NEXT:    movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
3784; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3785; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm0 # encoding: [0x66,0x0f,0x60,0xc0]
3786; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3787; X64-SSE-NEXT:    pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00]
3788; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0,4,5,6,7]
3789; X64-SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
3790; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
3791; X64-SSE-NEXT:    retq # encoding: [0xc3]
3792;
3793; X64-AVX1-LABEL: test_mm_set1_epi8:
3794; X64-AVX1:       # %bb.0:
3795; X64-AVX1-NEXT:    movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
3796; X64-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
3797; X64-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
3798; X64-AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1]
3799; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3800;
3801; X64-AVX512-LABEL: test_mm_set1_epi8:
3802; X64-AVX512:       # %bb.0:
3803; X64-AVX512-NEXT:    vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7]
3804; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3805  %res0  = insertelement <16 x i8> undef,  i8 %a0, i32 0
3806  %res1  = insertelement <16 x i8> %res0,  i8 %a0, i32 1
3807  %res2  = insertelement <16 x i8> %res1,  i8 %a0, i32 2
3808  %res3  = insertelement <16 x i8> %res2,  i8 %a0, i32 3
3809  %res4  = insertelement <16 x i8> %res3,  i8 %a0, i32 4
3810  %res5  = insertelement <16 x i8> %res4,  i8 %a0, i32 5
3811  %res6  = insertelement <16 x i8> %res5,  i8 %a0, i32 6
3812  %res7  = insertelement <16 x i8> %res6,  i8 %a0, i32 7
3813  %res8  = insertelement <16 x i8> %res7,  i8 %a0, i32 8
3814  %res9  = insertelement <16 x i8> %res8,  i8 %a0, i32 9
3815  %res10 = insertelement <16 x i8> %res9,  i8 %a0, i32 10
3816  %res11 = insertelement <16 x i8> %res10, i8 %a0, i32 11
3817  %res12 = insertelement <16 x i8> %res11, i8 %a0, i32 12
3818  %res13 = insertelement <16 x i8> %res12, i8 %a0, i32 13
3819  %res14 = insertelement <16 x i8> %res13, i8 %a0, i32 14
3820  %res15 = insertelement <16 x i8> %res14, i8 %a0, i32 15
3821  %res = bitcast <16 x i8> %res15 to <2 x i64>
3822  ret <2 x i64> %res
3823}
3824
3825define <2 x i64> @test_mm_set1_epi16(i16 %a0) nounwind {
3826; X86-SSE-LABEL: test_mm_set1_epi16:
3827; X86-SSE:       # %bb.0:
3828; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
3829; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3830; X86-SSE-NEXT:    pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00]
3831; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0,4,5,6,7]
3832; X86-SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
3833; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
3834; X86-SSE-NEXT:    retl # encoding: [0xc3]
3835;
3836; X86-AVX1-LABEL: test_mm_set1_epi16:
3837; X86-AVX1:       # %bb.0:
3838; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
3839; X86-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
3840; X86-AVX1-NEXT:    vpshuflw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0x00]
3841; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0,4,5,6,7]
3842; X86-AVX1-NEXT:    vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00]
3843; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0]
3844; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3845;
3846; X86-AVX512-LABEL: test_mm_set1_epi16:
3847; X86-AVX512:       # %bb.0:
3848; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
3849; X86-AVX512-NEXT:    vpbroadcastw %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc0]
3850; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3851;
3852; X64-SSE-LABEL: test_mm_set1_epi16:
3853; X64-SSE:       # %bb.0:
3854; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
3855; X64-SSE-NEXT:    pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00]
3856; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0,4,5,6,7]
3857; X64-SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
3858; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
3859; X64-SSE-NEXT:    retq # encoding: [0xc3]
3860;
3861; X64-AVX1-LABEL: test_mm_set1_epi16:
3862; X64-AVX1:       # %bb.0:
3863; X64-AVX1-NEXT:    vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
3864; X64-AVX1-NEXT:    vpshuflw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0x00]
3865; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0,4,5,6,7]
3866; X64-AVX1-NEXT:    vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00]
3867; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0]
3868; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3869;
3870; X64-AVX512-LABEL: test_mm_set1_epi16:
3871; X64-AVX512:       # %bb.0:
3872; X64-AVX512-NEXT:    vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7]
3873; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3874  %res0  = insertelement <8 x i16> undef, i16 %a0, i32 0
3875  %res1  = insertelement <8 x i16> %res0, i16 %a0, i32 1
3876  %res2  = insertelement <8 x i16> %res1, i16 %a0, i32 2
3877  %res3  = insertelement <8 x i16> %res2, i16 %a0, i32 3
3878  %res4  = insertelement <8 x i16> %res3, i16 %a0, i32 4
3879  %res5  = insertelement <8 x i16> %res4, i16 %a0, i32 5
3880  %res6  = insertelement <8 x i16> %res5, i16 %a0, i32 6
3881  %res7  = insertelement <8 x i16> %res6, i16 %a0, i32 7
3882  %res = bitcast <8 x i16> %res7 to <2 x i64>
3883  ret <2 x i64> %res
3884}
3885
3886define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind {
3887; X86-SSE-LABEL: test_mm_set1_epi32:
3888; X86-SSE:       # %bb.0:
3889; X86-SSE-NEXT:    movd {{[0-9]+}}(%esp), %xmm0 # encoding: [0x66,0x0f,0x6e,0x44,0x24,0x04]
3890; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
3891; X86-SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
3892; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
3893; X86-SSE-NEXT:    retl # encoding: [0xc3]
3894;
3895; X86-AVX1-LABEL: test_mm_set1_epi32:
3896; X86-AVX1:       # %bb.0:
3897; X86-AVX1-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
3898; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
3899; X86-AVX1-NEXT:    vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
3900; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0]
3901; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3902;
3903; X86-AVX512-LABEL: test_mm_set1_epi32:
3904; X86-AVX512:       # %bb.0:
3905; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3906; X86-AVX512-NEXT:    vpbroadcastd %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc0]
3907; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3908;
3909; X64-SSE-LABEL: test_mm_set1_epi32:
3910; X64-SSE:       # %bb.0:
3911; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
3912; X64-SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
3913; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
3914; X64-SSE-NEXT:    retq # encoding: [0xc3]
3915;
3916; X64-AVX1-LABEL: test_mm_set1_epi32:
3917; X64-AVX1:       # %bb.0:
3918; X64-AVX1-NEXT:    vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
3919; X64-AVX1-NEXT:    vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00]
3920; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0]
3921; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3922;
3923; X64-AVX512-LABEL: test_mm_set1_epi32:
3924; X64-AVX512:       # %bb.0:
3925; X64-AVX512-NEXT:    vpbroadcastd %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc7]
3926; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3927  %res0  = insertelement <4 x i32> undef, i32 %a0, i32 0
3928  %res1  = insertelement <4 x i32> %res0, i32 %a0, i32 1
3929  %res2  = insertelement <4 x i32> %res1, i32 %a0, i32 2
3930  %res3  = insertelement <4 x i32> %res2, i32 %a0, i32 3
3931  %res = bitcast <4 x i32> %res3 to <2 x i64>
3932  ret <2 x i64> %res
3933}
3934
3935; TODO test_mm_set1_epi64
3936
3937define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind {
3938; X86-SSE-LABEL: test_mm_set1_epi64x:
3939; X86-SSE:       # %bb.0:
3940; X86-SSE-NEXT:    movd {{[0-9]+}}(%esp), %xmm0 # encoding: [0x66,0x0f,0x6e,0x44,0x24,0x04]
3941; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
3942; X86-SSE-NEXT:    movd {{[0-9]+}}(%esp), %xmm1 # encoding: [0x66,0x0f,0x6e,0x4c,0x24,0x08]
3943; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero
3944; X86-SSE-NEXT:    punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1]
3945; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3946; X86-SSE-NEXT:    pshufd $68, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x44]
3947; X86-SSE-NEXT:    # xmm0 = xmm0[0,1,0,1]
3948; X86-SSE-NEXT:    retl # encoding: [0xc3]
3949;
3950; X86-AVX1-LABEL: test_mm_set1_epi64x:
3951; X86-AVX1:       # %bb.0:
3952; X86-AVX1-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
3953; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
3954; X86-AVX1-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
3955; X86-AVX1-NEXT:    vpshufd $68, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x44]
3956; X86-AVX1-NEXT:    # xmm0 = xmm0[0,1,0,1]
3957; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3958;
3959; X86-AVX512-LABEL: test_mm_set1_epi64x:
3960; X86-AVX512:       # %bb.0:
3961; X86-AVX512-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
3962; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
3963; X86-AVX512-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
3964; X86-AVX512-NEXT:    vpbroadcastq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xc0]
3965; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3966;
3967; X64-SSE-LABEL: test_mm_set1_epi64x:
3968; X64-SSE:       # %bb.0:
3969; X64-SSE-NEXT:    movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7]
3970; X64-SSE-NEXT:    pshufd $68, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x44]
3971; X64-SSE-NEXT:    # xmm0 = xmm0[0,1,0,1]
3972; X64-SSE-NEXT:    retq # encoding: [0xc3]
3973;
3974; X64-AVX1-LABEL: test_mm_set1_epi64x:
3975; X64-AVX1:       # %bb.0:
3976; X64-AVX1-NEXT:    vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
3977; X64-AVX1-NEXT:    vpshufd $68, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x44]
3978; X64-AVX1-NEXT:    # xmm0 = xmm0[0,1,0,1]
3979; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3980;
3981; X64-AVX512-LABEL: test_mm_set1_epi64x:
3982; X64-AVX512:       # %bb.0:
3983; X64-AVX512-NEXT:    vpbroadcastq %rdi, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xc7]
3984; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3985  %res0  = insertelement <2 x i64> undef, i64 %a0, i32 0
3986  %res1  = insertelement <2 x i64> %res0, i64 %a0, i32 1
3987  ret <2 x i64> %res1
3988}
3989
3990define <2 x double> @test_mm_set1_pd(double %a0) nounwind {
3991; X86-SSE-LABEL: test_mm_set1_pd:
3992; X86-SSE:       # %bb.0:
3993; X86-SSE-NEXT:    movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04]
3994; X86-SSE-NEXT:    # xmm0 = mem[0],zero
3995; X86-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
3996; X86-SSE-NEXT:    # xmm0 = xmm0[0,0]
3997; X86-SSE-NEXT:    retl # encoding: [0xc3]
3998;
3999; X86-AVX1-LABEL: test_mm_set1_pd:
4000; X86-AVX1:       # %bb.0:
4001; X86-AVX1-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04]
4002; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
4003; X86-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
4004; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0]
4005; X86-AVX1-NEXT:    retl # encoding: [0xc3]
4006;
4007; X86-AVX512-LABEL: test_mm_set1_pd:
4008; X86-AVX512:       # %bb.0:
4009; X86-AVX512-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04]
4010; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
4011; X86-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
4012; X86-AVX512-NEXT:    # xmm0 = xmm0[0,0]
4013; X86-AVX512-NEXT:    retl # encoding: [0xc3]
4014;
4015; X64-SSE-LABEL: test_mm_set1_pd:
4016; X64-SSE:       # %bb.0:
4017; X64-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
4018; X64-SSE-NEXT:    # xmm0 = xmm0[0,0]
4019; X64-SSE-NEXT:    retq # encoding: [0xc3]
4020;
4021; X64-AVX1-LABEL: test_mm_set1_pd:
4022; X64-AVX1:       # %bb.0:
4023; X64-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
4024; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0]
4025; X64-AVX1-NEXT:    retq # encoding: [0xc3]
4026;
4027; X64-AVX512-LABEL: test_mm_set1_pd:
4028; X64-AVX512:       # %bb.0:
4029; X64-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
4030; X64-AVX512-NEXT:    # xmm0 = xmm0[0,0]
4031; X64-AVX512-NEXT:    retq # encoding: [0xc3]
4032  %res0  = insertelement <2 x double> undef, double %a0, i32 0
4033  %res1  = insertelement <2 x double> %res0, double %a0, i32 1
4034  ret <2 x double> %res1
4035}
4036
4037define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
4038; X86-SSE-LABEL: test_mm_setr_epi8:
4039; X86-SSE:       # %bb.0:
4040; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
4041; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4042; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
4043; X86-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
4044; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
4045; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
4046; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
4047; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4048; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
4049; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
4050; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
4051; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
4052; X86-SSE-NEXT:    punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
4053; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
4054; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
4055; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4056; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
4057; X86-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
4058; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
4059; X86-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
4060; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
4061; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4062; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
4063; X86-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
4064; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
4065; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
4066; X86-SSE-NEXT:    punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb]
4067; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
4068; X86-SSE-NEXT:    punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca]
4069; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
4070; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
4071; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4072; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
4073; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
4074; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
4075; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
4076; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
4077; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4078; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
4079; X86-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
4080; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
4081; X86-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
4082; X86-SSE-NEXT:    punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda]
4083; X86-SSE-NEXT:    # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
4084; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
4085; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4086; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
4087; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
4088; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
4089; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
4090; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
4091; X86-SSE-NEXT:    movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
4092; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4093; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4094; X86-SSE-NEXT:    punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4]
4095; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
4096; X86-SSE-NEXT:    punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2]
4097; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
4098; X86-SSE-NEXT:    punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3]
4099; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
4100; X86-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
4101; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
4102; X86-SSE-NEXT:    retl # encoding: [0xc3]
4103;
4104; X86-AVX1-LABEL: test_mm_setr_epi8:
4105; X86-AVX1:       # %bb.0:
4106; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
4107; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04]
4108; X86-AVX1-NEXT:    vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
4109; X86-AVX1-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
4110; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
4111; X86-AVX1-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
4112; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
4113; X86-AVX1-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
4114; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
4115; X86-AVX1-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
4116; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
4117; X86-AVX1-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
4118; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
4119; X86-AVX1-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
4120; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
4121; X86-AVX1-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
4122; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
4123; X86-AVX1-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
4124; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
4125; X86-AVX1-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
4126; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
4127; X86-AVX1-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
4128; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
4129; X86-AVX1-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
4130; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
4131; X86-AVX1-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
4132; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
4133; X86-AVX1-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
4134; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
4135; X86-AVX1-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
4136; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
4137; X86-AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
4138; X86-AVX1-NEXT:    retl # encoding: [0xc3]
4139;
4140; X86-AVX512-LABEL: test_mm_setr_epi8:
4141; X86-AVX512:       # %bb.0:
4142; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
4143; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04]
4144; X86-AVX512-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
4145; X86-AVX512-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
4146; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
4147; X86-AVX512-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
4148; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
4149; X86-AVX512-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
4150; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
4151; X86-AVX512-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
4152; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
4153; X86-AVX512-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
4154; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
4155; X86-AVX512-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
4156; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
4157; X86-AVX512-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
4158; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
4159; X86-AVX512-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
4160; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
4161; X86-AVX512-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
4162; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
4163; X86-AVX512-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
4164; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
4165; X86-AVX512-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
4166; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
4167; X86-AVX512-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
4168; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
4169; X86-AVX512-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
4170; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
4171; X86-AVX512-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
4172; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
4173; X86-AVX512-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
4174; X86-AVX512-NEXT:    retl # encoding: [0xc3]
4175;
4176; X64-SSE-LABEL: test_mm_setr_epi8:
4177; X64-SSE:       # %bb.0:
4178; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
4179; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4180; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48]
4181; X64-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
4182; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
4183; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
4184; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
4185; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4186; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
4187; X64-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
4188; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
4189; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
4190; X64-SSE-NEXT:    punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
4191; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
4192; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
4193; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4194; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
4195; X64-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
4196; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
4197; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
4198; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
4199; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4200; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
4201; X64-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
4202; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
4203; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
4204; X64-SSE-NEXT:    punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb]
4205; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
4206; X64-SSE-NEXT:    punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca]
4207; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
4208; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
4209; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4210; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
4211; X64-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
4212; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
4213; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
4214; X64-SSE-NEXT:    movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
4215; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4216; X64-SSE-NEXT:    movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
4217; X64-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
4218; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
4219; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
4220; X64-SSE-NEXT:    punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda]
4221; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
4222; X64-SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
4223; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4224; X64-SSE-NEXT:    movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
4225; X64-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
4226; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
4227; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
4228; X64-SSE-NEXT:    movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
4229; X64-SSE-NEXT:    movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
4230; X64-SSE-NEXT:    movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
4231; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4232; X64-SSE-NEXT:    punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4]
4233; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
4234; X64-SSE-NEXT:    punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2]
4235; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
4236; X64-SSE-NEXT:    punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3]
4237; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
4238; X64-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
4239; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
4240; X64-SSE-NEXT:    retq # encoding: [0xc3]
4241;
4242; X64-AVX1-LABEL: test_mm_setr_epi8:
4243; X64-AVX1:       # %bb.0:
4244; X64-AVX1-NEXT:    movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
4245; X64-AVX1-NEXT:    movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7]
4246; X64-AVX1-NEXT:    vmovd %esi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc6]
4247; X64-AVX1-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
4248; X64-AVX1-NEXT:    movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
4249; X64-AVX1-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
4250; X64-AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
4251; X64-AVX1-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
4252; X64-AVX1-NEXT:    movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
4253; X64-AVX1-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
4254; X64-AVX1-NEXT:    movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
4255; X64-AVX1-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
4256; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
4257; X64-AVX1-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
4258; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
4259; X64-AVX1-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
4260; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
4261; X64-AVX1-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
4262; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
4263; X64-AVX1-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
4264; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
4265; X64-AVX1-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
4266; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
4267; X64-AVX1-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
4268; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
4269; X64-AVX1-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
4270; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
4271; X64-AVX1-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
4272; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48]
4273; X64-AVX1-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
4274; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
4275; X64-AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
4276; X64-AVX1-NEXT:    retq # encoding: [0xc3]
4277;
4278; X64-AVX512-LABEL: test_mm_setr_epi8:
4279; X64-AVX512:       # %bb.0:
4280; X64-AVX512-NEXT:    movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
4281; X64-AVX512-NEXT:    movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7]
4282; X64-AVX512-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
4283; X64-AVX512-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
4284; X64-AVX512-NEXT:    movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
4285; X64-AVX512-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
4286; X64-AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
4287; X64-AVX512-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
4288; X64-AVX512-NEXT:    movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
4289; X64-AVX512-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
4290; X64-AVX512-NEXT:    movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
4291; X64-AVX512-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
4292; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
4293; X64-AVX512-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
4294; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
4295; X64-AVX512-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
4296; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
4297; X64-AVX512-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
4298; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
4299; X64-AVX512-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
4300; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
4301; X64-AVX512-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
4302; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
4303; X64-AVX512-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
4304; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
4305; X64-AVX512-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
4306; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
4307; X64-AVX512-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
4308; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48]
4309; X64-AVX512-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
4310; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
4311; X64-AVX512-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
4312; X64-AVX512-NEXT:    retq # encoding: [0xc3]
4313  %res0  = insertelement <16 x i8> undef,  i8 %a0 , i32 0
4314  %res1  = insertelement <16 x i8> %res0,  i8 %a1 , i32 1
4315  %res2  = insertelement <16 x i8> %res1,  i8 %a2 , i32 2
4316  %res3  = insertelement <16 x i8> %res2,  i8 %a3 , i32 3
4317  %res4  = insertelement <16 x i8> %res3,  i8 %a4 , i32 4
4318  %res5  = insertelement <16 x i8> %res4,  i8 %a5 , i32 5
4319  %res6  = insertelement <16 x i8> %res5,  i8 %a6 , i32 6
4320  %res7  = insertelement <16 x i8> %res6,  i8 %a7 , i32 7
4321  %res8  = insertelement <16 x i8> %res7,  i8 %a8 , i32 8
4322  %res9  = insertelement <16 x i8> %res8,  i8 %a9 , i32 9
4323  %res10 = insertelement <16 x i8> %res9,  i8 %a10, i32 10
4324  %res11 = insertelement <16 x i8> %res10, i8 %a11, i32 11
4325  %res12 = insertelement <16 x i8> %res11, i8 %a12, i32 12
4326  %res13 = insertelement <16 x i8> %res12, i8 %a13, i32 13
4327  %res14 = insertelement <16 x i8> %res13, i8 %a14, i32 14
4328  %res15 = insertelement <16 x i8> %res14, i8 %a15, i32 15
4329  %res = bitcast <16 x i8> %res15 to <2 x i64>
4330  ret <2 x i64> %res
4331}
4332
4333define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
4334; X86-SSE-LABEL: test_mm_setr_epi16:
4335; X86-SSE:       # %bb.0:
4336; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
4337; X86-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
4338; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
4339; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
4340; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
4341; X86-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
4342; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
4343; X86-SSE-NEXT:    movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
4344; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
4345; X86-SSE-NEXT:    movd %eax, %xmm5 # encoding: [0x66,0x0f,0x6e,0xe8]
4346; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
4347; X86-SSE-NEXT:    movd %eax, %xmm6 # encoding: [0x66,0x0f,0x6e,0xf0]
4348; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
4349; X86-SSE-NEXT:    movd %eax, %xmm7 # encoding: [0x66,0x0f,0x6e,0xf8]
4350; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
4351; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4352; X86-SSE-NEXT:    punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
4353; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
4354; X86-SSE-NEXT:    punpcklwd %xmm3, %xmm4 # encoding: [0x66,0x0f,0x61,0xe3]
4355; X86-SSE-NEXT:    # xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
4356; X86-SSE-NEXT:    punpckldq %xmm2, %xmm4 # encoding: [0x66,0x0f,0x62,0xe2]
4357; X86-SSE-NEXT:    # xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
4358; X86-SSE-NEXT:    punpcklwd %xmm5, %xmm6 # encoding: [0x66,0x0f,0x61,0xf5]
4359; X86-SSE-NEXT:    # xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
4360; X86-SSE-NEXT:    punpcklwd %xmm7, %xmm0 # encoding: [0x66,0x0f,0x61,0xc7]
4361; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
4362; X86-SSE-NEXT:    punpckldq %xmm6, %xmm0 # encoding: [0x66,0x0f,0x62,0xc6]
4363; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
4364; X86-SSE-NEXT:    punpcklqdq %xmm4, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc4]
4365; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm4[0]
4366; X86-SSE-NEXT:    retl # encoding: [0xc3]
4367;
4368; X86-AVX1-LABEL: test_mm_setr_epi16:
4369; X86-AVX1:       # %bb.0:
4370; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
4371; X86-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
4372; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
4373; X86-AVX1-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
4374; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
4375; X86-AVX1-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
4376; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
4377; X86-AVX1-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x03]
4378; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
4379; X86-AVX1-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
4380; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
4381; X86-AVX1-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
4382; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
4383; X86-AVX1-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
4384; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
4385; X86-AVX1-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
4386; X86-AVX1-NEXT:    retl # encoding: [0xc3]
4387;
4388; X86-AVX512-LABEL: test_mm_setr_epi16:
4389; X86-AVX512:       # %bb.0:
4390; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
4391; X86-AVX512-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
4392; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
4393; X86-AVX512-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
4394; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
4395; X86-AVX512-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
4396; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
4397; X86-AVX512-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03]
4398; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
4399; X86-AVX512-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
4400; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
4401; X86-AVX512-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
4402; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
4403; X86-AVX512-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
4404; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
4405; X86-AVX512-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
4406; X86-AVX512-NEXT:    retl # encoding: [0xc3]
4407;
4408; X64-SSE-LABEL: test_mm_setr_epi16:
4409; X64-SSE:       # %bb.0:
4410; X64-SSE-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
4411; X64-SSE-NEXT:    movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08]
4412; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4413; X64-SSE-NEXT:    movd %r10d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xca]
4414; X64-SSE-NEXT:    punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
4415; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
4416; X64-SSE-NEXT:    movd %r9d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc1]
4417; X64-SSE-NEXT:    movd %r8d, %xmm2 # encoding: [0x66,0x41,0x0f,0x6e,0xd0]
4418; X64-SSE-NEXT:    punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0]
4419; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
4420; X64-SSE-NEXT:    punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1]
4421; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
4422; X64-SSE-NEXT:    movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1]
4423; X64-SSE-NEXT:    movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca]
4424; X64-SSE-NEXT:    punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
4425; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
4426; X64-SSE-NEXT:    movd %esi, %xmm3 # encoding: [0x66,0x0f,0x6e,0xde]
4427; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
4428; X64-SSE-NEXT:    punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3]
4429; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
4430; X64-SSE-NEXT:    punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1]
4431; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
4432; X64-SSE-NEXT:    punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2]
4433; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0]
4434; X64-SSE-NEXT:    retq # encoding: [0xc3]
4435;
4436; X64-AVX1-LABEL: test_mm_setr_epi16:
4437; X64-AVX1:       # %bb.0:
4438; X64-AVX1-NEXT:    movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10]
4439; X64-AVX1-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
4440; X64-AVX1-NEXT:    vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
4441; X64-AVX1-NEXT:    vpinsrw $1, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x01]
4442; X64-AVX1-NEXT:    vpinsrw $2, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x02]
4443; X64-AVX1-NEXT:    vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
4444; X64-AVX1-NEXT:    vpinsrw $4, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04]
4445; X64-AVX1-NEXT:    vpinsrw $5, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05]
4446; X64-AVX1-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
4447; X64-AVX1-NEXT:    vpinsrw $7, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07]
4448; X64-AVX1-NEXT:    retq # encoding: [0xc3]
4449;
4450; X64-AVX512-LABEL: test_mm_setr_epi16:
4451; X64-AVX512:       # %bb.0:
4452; X64-AVX512-NEXT:    movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10]
4453; X64-AVX512-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
4454; X64-AVX512-NEXT:    vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
4455; X64-AVX512-NEXT:    vpinsrw $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x01]
4456; X64-AVX512-NEXT:    vpinsrw $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x02]
4457; X64-AVX512-NEXT:    vpinsrw $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
4458; X64-AVX512-NEXT:    vpinsrw $4, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04]
4459; X64-AVX512-NEXT:    vpinsrw $5, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05]
4460; X64-AVX512-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
4461; X64-AVX512-NEXT:    vpinsrw $7, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07]
4462; X64-AVX512-NEXT:    retq # encoding: [0xc3]
4463  %res0  = insertelement <8 x i16> undef, i16 %a0, i32 0
4464  %res1  = insertelement <8 x i16> %res0, i16 %a1, i32 1
4465  %res2  = insertelement <8 x i16> %res1, i16 %a2, i32 2
4466  %res3  = insertelement <8 x i16> %res2, i16 %a3, i32 3
4467  %res4  = insertelement <8 x i16> %res3, i16 %a4, i32 4
4468  %res5  = insertelement <8 x i16> %res4, i16 %a5, i32 5
4469  %res6  = insertelement <8 x i16> %res5, i16 %a6, i32 6
4470  %res7  = insertelement <8 x i16> %res6, i16 %a7, i32 7
4471  %res = bitcast <8 x i16> %res7 to <2 x i64>
4472  ret <2 x i64> %res
4473}
4474
4475define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
4476; X86-SSE-LABEL: test_mm_setr_epi32:
4477; X86-SSE:       # %bb.0:
4478; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10]
4479; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
4480; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c]
4481; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero
4482; X86-SSE-NEXT:    unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8]
4483; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
4484; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08]
4485; X86-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero
4486; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
4487; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
4488; X86-SSE-NEXT:    unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2]
4489; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
4490; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
4491; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
4492; X86-SSE-NEXT:    retl # encoding: [0xc3]
4493;
4494; X86-AVX1-LABEL: test_mm_setr_epi32:
4495; X86-AVX1:       # %bb.0:
4496; X86-AVX1-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
4497; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
4498; X86-AVX1-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
4499; X86-AVX1-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02]
4500; X86-AVX1-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03]
4501; X86-AVX1-NEXT:    retl # encoding: [0xc3]
4502;
4503; X86-AVX512-LABEL: test_mm_setr_epi32:
4504; X86-AVX512:       # %bb.0:
4505; X86-AVX512-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
4506; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
4507; X86-AVX512-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
4508; X86-AVX512-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02]
4509; X86-AVX512-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03]
4510; X86-AVX512-NEXT:    retl # encoding: [0xc3]
4511;
4512; X64-SSE-LABEL: test_mm_setr_epi32:
4513; X64-SSE:       # %bb.0:
4514; X64-SSE-NEXT:    movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1]
4515; X64-SSE-NEXT:    movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca]
4516; X64-SSE-NEXT:    punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8]
4517; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
4518; X64-SSE-NEXT:    movd %esi, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd6]
4519; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
4520; X64-SSE-NEXT:    punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2]
4521; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
4522; X64-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
4523; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
4524; X64-SSE-NEXT:    retq # encoding: [0xc3]
4525;
4526; X64-AVX1-LABEL: test_mm_setr_epi32:
4527; X64-AVX1:       # %bb.0:
4528; X64-AVX1-NEXT:    vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
4529; X64-AVX1-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01]
4530; X64-AVX1-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
4531; X64-AVX1-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03]
4532; X64-AVX1-NEXT:    retq # encoding: [0xc3]
4533;
4534; X64-AVX512-LABEL: test_mm_setr_epi32:
4535; X64-AVX512:       # %bb.0:
4536; X64-AVX512-NEXT:    vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
4537; X64-AVX512-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01]
4538; X64-AVX512-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
4539; X64-AVX512-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03]
4540; X64-AVX512-NEXT:    retq # encoding: [0xc3]
4541  %res0  = insertelement <4 x i32> undef, i32 %a0, i32 0
4542  %res1  = insertelement <4 x i32> %res0, i32 %a1, i32 1
4543  %res2  = insertelement <4 x i32> %res1, i32 %a2, i32 2
4544  %res3  = insertelement <4 x i32> %res2, i32 %a3, i32 3
4545  %res = bitcast <4 x i32> %res3 to <2 x i64>
4546  ret <2 x i64> %res
4547}
4548
4549; TODO test_mm_setr_epi64
4550
4551define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind {
4552; X86-SSE-LABEL: test_mm_setr_epi64x:
4553; X86-SSE:       # %bb.0:
4554; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c]
4555; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero
4556; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10]
4557; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
4558; X86-SSE-NEXT:    unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8]
4559; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
4560; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
4561; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
4562; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08]
4563; X86-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero
4564; X86-SSE-NEXT:    unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2]
4565; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
4566; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
4567; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
4568; X86-SSE-NEXT:    retl # encoding: [0xc3]
4569;
4570; X86-AVX1-LABEL: test_mm_setr_epi64x:
4571; X86-AVX1:       # %bb.0:
4572; X86-AVX1-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
4573; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
4574; X86-AVX1-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
4575; X86-AVX1-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02]
4576; X86-AVX1-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03]
4577; X86-AVX1-NEXT:    retl # encoding: [0xc3]
4578;
4579; X86-AVX512-LABEL: test_mm_setr_epi64x:
4580; X86-AVX512:       # %bb.0:
4581; X86-AVX512-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
4582; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
4583; X86-AVX512-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
4584; X86-AVX512-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02]
4585; X86-AVX512-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03]
4586; X86-AVX512-NEXT:    retl # encoding: [0xc3]
4587;
4588; X64-SSE-LABEL: test_mm_setr_epi64x:
4589; X64-SSE:       # %bb.0:
4590; X64-SSE-NEXT:    movq %rsi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xce]
4591; X64-SSE-NEXT:    movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7]
4592; X64-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
4593; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
4594; X64-SSE-NEXT:    retq # encoding: [0xc3]
4595;
4596; X64-AVX1-LABEL: test_mm_setr_epi64x:
4597; X64-AVX1:       # %bb.0:
4598; X64-AVX1-NEXT:    vmovq %rsi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc6]
4599; X64-AVX1-NEXT:    vmovq %rdi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xcf]
4600; X64-AVX1-NEXT:    vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0]
4601; X64-AVX1-NEXT:    # xmm0 = xmm1[0],xmm0[0]
4602; X64-AVX1-NEXT:    retq # encoding: [0xc3]
4603;
4604; X64-AVX512-LABEL: test_mm_setr_epi64x:
4605; X64-AVX512:       # %bb.0:
4606; X64-AVX512-NEXT:    vmovq %rsi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc6]
4607; X64-AVX512-NEXT:    vmovq %rdi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xcf]
4608; X64-AVX512-NEXT:    vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0]
4609; X64-AVX512-NEXT:    # xmm0 = xmm1[0],xmm0[0]
4610; X64-AVX512-NEXT:    retq # encoding: [0xc3]
4611  %res0  = insertelement <2 x i64> undef, i64 %a0, i32 0
4612  %res1  = insertelement <2 x i64> %res0, i64 %a1, i32 1
4613  ret <2 x i64> %res1
4614}
4615
4616define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind {
4617; X86-SSE-LABEL: test_mm_setr_pd:
4618; X86-SSE:       # %bb.0:
4619; X86-SSE-NEXT:    movsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf2,0x0f,0x10,0x4c,0x24,0x0c]
4620; X86-SSE-NEXT:    # xmm1 = mem[0],zero
4621; X86-SSE-NEXT:    movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04]
4622; X86-SSE-NEXT:    # xmm0 = mem[0],zero
4623; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
4624; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
4625; X86-SSE-NEXT:    retl # encoding: [0xc3]
4626;
4627; X86-AVX1-LABEL: test_mm_setr_pd:
4628; X86-AVX1:       # %bb.0:
4629; X86-AVX1-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c]
4630; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
4631; X86-AVX1-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04]
4632; X86-AVX1-NEXT:    # xmm1 = mem[0],zero
4633; X86-AVX1-NEXT:    vmovlhps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0x16,0xc0]
4634; X86-AVX1-NEXT:    # xmm0 = xmm1[0],xmm0[0]
4635; X86-AVX1-NEXT:    retl # encoding: [0xc3]
4636;
4637; X86-AVX512-LABEL: test_mm_setr_pd:
4638; X86-AVX512:       # %bb.0:
4639; X86-AVX512-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c]
4640; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
4641; X86-AVX512-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04]
4642; X86-AVX512-NEXT:    # xmm1 = mem[0],zero
4643; X86-AVX512-NEXT:    vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0]
4644; X86-AVX512-NEXT:    # xmm0 = xmm1[0],xmm0[0]
4645; X86-AVX512-NEXT:    retl # encoding: [0xc3]
4646;
4647; X64-SSE-LABEL: test_mm_setr_pd:
4648; X64-SSE:       # %bb.0:
4649; X64-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
4650; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
4651; X64-SSE-NEXT:    retq # encoding: [0xc3]
4652;
4653; X64-AVX1-LABEL: test_mm_setr_pd:
4654; X64-AVX1:       # %bb.0:
4655; X64-AVX1-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1]
4656; X64-AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0]
4657; X64-AVX1-NEXT:    retq # encoding: [0xc3]
4658;
4659; X64-AVX512-LABEL: test_mm_setr_pd:
4660; X64-AVX512:       # %bb.0:
4661; X64-AVX512-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
4662; X64-AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0]
4663; X64-AVX512-NEXT:    retq # encoding: [0xc3]
4664  %res0  = insertelement <2 x double> undef, double %a0, i32 0
4665  %res1  = insertelement <2 x double> %res0, double %a1, i32 1
4666  ret <2 x double> %res1
4667}
4668
4669define <2 x double> @test_mm_setzero_pd() {
4670; SSE-LABEL: test_mm_setzero_pd:
4671; SSE:       # %bb.0:
4672; SSE-NEXT:    xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
4673; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4674;
4675; AVX1-LABEL: test_mm_setzero_pd:
4676; AVX1:       # %bb.0:
4677; AVX1-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
4678; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4679;
4680; AVX512-LABEL: test_mm_setzero_pd:
4681; AVX512:       # %bb.0:
4682; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
4683; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4684  ret <2 x double> zeroinitializer
4685}
4686
4687define <2 x i64> @test_mm_setzero_si128() {
4688; SSE-LABEL: test_mm_setzero_si128:
4689; SSE:       # %bb.0:
4690; SSE-NEXT:    xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
4691; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4692;
4693; AVX1-LABEL: test_mm_setzero_si128:
4694; AVX1:       # %bb.0:
4695; AVX1-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
4696; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4697;
4698; AVX512-LABEL: test_mm_setzero_si128:
4699; AVX512:       # %bb.0:
4700; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
4701; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4702  ret <2 x i64> zeroinitializer
4703}
4704
4705define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) {
4706; SSE-LABEL: test_mm_shuffle_epi32:
4707; SSE:       # %bb.0:
4708; SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
4709; SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
4710; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4711;
4712; AVX1-LABEL: test_mm_shuffle_epi32:
4713; AVX1:       # %bb.0:
4714; AVX1-NEXT:    vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
4715; AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0]
4716; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4717;
4718; AVX512-LABEL: test_mm_shuffle_epi32:
4719; AVX512:       # %bb.0:
4720; AVX512-NEXT:    vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
4721; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4722  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
4723  %res = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer
4724  %bc = bitcast <4 x i32> %res to <2 x i64>
4725  ret <2 x i64> %bc
4726}
4727
4728define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) {
4729; SSE-LABEL: test_mm_shuffle_pd:
4730; SSE:       # %bb.0:
4731; SSE-NEXT:    shufps $78, %xmm1, %xmm0 # encoding: [0x0f,0xc6,0xc1,0x4e]
4732; SSE-NEXT:    # xmm0 = xmm0[2,3],xmm1[0,1]
4733; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4734;
4735; AVX1-LABEL: test_mm_shuffle_pd:
4736; AVX1:       # %bb.0:
4737; AVX1-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc6,0xc1,0x01]
4738; AVX1-NEXT:    # xmm0 = xmm0[1],xmm1[0]
4739; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4740;
4741; AVX512-LABEL: test_mm_shuffle_pd:
4742; AVX512:       # %bb.0:
4743; AVX512-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xc1,0x01]
4744; AVX512-NEXT:    # xmm0 = xmm0[1],xmm1[0]
4745; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4746  %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
4747  ret <2 x double> %res
4748}
4749
4750define <2 x i64> @test_mm_shufflehi_epi16(<2 x i64> %a0) {
4751; SSE-LABEL: test_mm_shufflehi_epi16:
4752; SSE:       # %bb.0:
4753; SSE-NEXT:    pshufhw $0, %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x70,0xc0,0x00]
4754; SSE-NEXT:    # xmm0 = xmm0[0,1,2,3,4,4,4,4]
4755; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4756;
4757; AVX1-LABEL: test_mm_shufflehi_epi16:
4758; AVX1:       # %bb.0:
4759; AVX1-NEXT:    vpshufhw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x70,0xc0,0x00]
4760; AVX1-NEXT:    # xmm0 = xmm0[0,1,2,3,4,4,4,4]
4761; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4762;
4763; AVX512-LABEL: test_mm_shufflehi_epi16:
4764; AVX512:       # %bb.0:
4765; AVX512-NEXT:    vpshufhw $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xc0,0x00]
4766; AVX512-NEXT:    # xmm0 = xmm0[0,1,2,3,4,4,4,4]
4767; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4768  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
4769  %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
4770  %bc = bitcast <8 x i16> %res to <2 x i64>
4771  ret <2 x i64> %bc
4772}
4773
4774define <2 x i64> @test_mm_shufflelo_epi16(<2 x i64> %a0) {
4775; SSE-LABEL: test_mm_shufflelo_epi16:
4776; SSE:       # %bb.0:
4777; SSE-NEXT:    pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00]
4778; SSE-NEXT:    # xmm0 = xmm0[0,0,0,0,4,5,6,7]
4779; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4780;
4781; AVX1-LABEL: test_mm_shufflelo_epi16:
4782; AVX1:       # %bb.0:
4783; AVX1-NEXT:    vpshuflw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0x00]
4784; AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0,4,5,6,7]
4785; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4786;
4787; AVX512-LABEL: test_mm_shufflelo_epi16:
4788; AVX512:       # %bb.0:
4789; AVX512-NEXT:    vpshuflw $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc0,0x00]
4790; AVX512-NEXT:    # xmm0 = xmm0[0,0,0,0,4,5,6,7]
4791; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4792  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
4793  %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
4794  %bc = bitcast <8 x i16> %res to <2 x i64>
4795  ret <2 x i64> %bc
4796}
4797
4798define <2 x i64> @test_mm_sll_epi16(<2 x i64> %a0, <2 x i64> %a1) {
4799; SSE-LABEL: test_mm_sll_epi16:
4800; SSE:       # %bb.0:
4801; SSE-NEXT:    psllw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf1,0xc1]
4802; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4803;
4804; AVX1-LABEL: test_mm_sll_epi16:
4805; AVX1:       # %bb.0:
4806; AVX1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf1,0xc1]
4807; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4808;
4809; AVX512-LABEL: test_mm_sll_epi16:
4810; AVX512:       # %bb.0:
4811; AVX512-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xc1]
4812; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4813  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
4814  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
4815  %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %arg0, <8 x i16> %arg1)
4816  %bc = bitcast <8 x i16> %res to <2 x i64>
4817  ret <2 x i64> %bc
4818}
4819declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
4820
4821define <2 x i64> @test_mm_sll_epi32(<2 x i64> %a0, <2 x i64> %a1) {
4822; SSE-LABEL: test_mm_sll_epi32:
4823; SSE:       # %bb.0:
4824; SSE-NEXT:    pslld %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf2,0xc1]
4825; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4826;
4827; AVX1-LABEL: test_mm_sll_epi32:
4828; AVX1:       # %bb.0:
4829; AVX1-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf2,0xc1]
4830; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4831;
4832; AVX512-LABEL: test_mm_sll_epi32:
4833; AVX512:       # %bb.0:
4834; AVX512-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xc1]
4835; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4836  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
4837  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
4838  %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %arg0, <4 x i32> %arg1)
4839  %bc = bitcast <4 x i32> %res to <2 x i64>
4840  ret <2 x i64> %bc
4841}
4842declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
4843
4844define <2 x i64> @test_mm_sll_epi64(<2 x i64> %a0, <2 x i64> %a1) {
4845; SSE-LABEL: test_mm_sll_epi64:
4846; SSE:       # %bb.0:
4847; SSE-NEXT:    psllq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf3,0xc1]
4848; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4849;
4850; AVX1-LABEL: test_mm_sll_epi64:
4851; AVX1:       # %bb.0:
4852; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf3,0xc1]
4853; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4854;
4855; AVX512-LABEL: test_mm_sll_epi64:
4856; AVX512:       # %bb.0:
4857; AVX512-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf3,0xc1]
4858; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4859  %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
4860  ret <2 x i64> %res
4861}
4862declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
4863
4864define <2 x i64> @test_mm_slli_epi16(<2 x i64> %a0) {
4865; SSE-LABEL: test_mm_slli_epi16:
4866; SSE:       # %bb.0:
4867; SSE-NEXT:    psllw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xf0,0x01]
4868; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4869;
4870; AVX1-LABEL: test_mm_slli_epi16:
4871; AVX1:       # %bb.0:
4872; AVX1-NEXT:    vpsllw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xf0,0x01]
4873; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4874;
4875; AVX512-LABEL: test_mm_slli_epi16:
4876; AVX512:       # %bb.0:
4877; AVX512-NEXT:    vpsllw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x01]
4878; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4879  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
4880  %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 1)
4881  %bc = bitcast <8 x i16> %res to <2 x i64>
4882  ret <2 x i64> %bc
4883}
4884declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
4885
4886define <2 x i64> @test_mm_slli_epi32(<2 x i64> %a0) {
4887; SSE-LABEL: test_mm_slli_epi32:
4888; SSE:       # %bb.0:
4889; SSE-NEXT:    pslld $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xf0,0x01]
4890; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4891;
4892; AVX1-LABEL: test_mm_slli_epi32:
4893; AVX1:       # %bb.0:
4894; AVX1-NEXT:    vpslld $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xf0,0x01]
4895; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4896;
4897; AVX512-LABEL: test_mm_slli_epi32:
4898; AVX512:       # %bb.0:
4899; AVX512-NEXT:    vpslld $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x01]
4900; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4901  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
4902  %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 1)
4903  %bc = bitcast <4 x i32> %res to <2 x i64>
4904  ret <2 x i64> %bc
4905}
4906declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
4907
4908define <2 x i64> @test_mm_slli_epi64(<2 x i64> %a0) {
4909; SSE-LABEL: test_mm_slli_epi64:
4910; SSE:       # %bb.0:
4911; SSE-NEXT:    psllq $1, %xmm0 # encoding: [0x66,0x0f,0x73,0xf0,0x01]
4912; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4913;
4914; AVX1-LABEL: test_mm_slli_epi64:
4915; AVX1:       # %bb.0:
4916; AVX1-NEXT:    vpsllq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf0,0x01]
4917; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4918;
4919; AVX512-LABEL: test_mm_slli_epi64:
4920; AVX512:       # %bb.0:
4921; AVX512-NEXT:    vpsllq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf0,0x01]
4922; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4923  %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 1)
4924  ret <2 x i64> %res
4925}
4926declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
4927
4928define <2 x i64> @test_mm_slli_si128(<2 x i64> %a0) nounwind {
4929; SSE-LABEL: test_mm_slli_si128:
4930; SSE:       # %bb.0:
4931; SSE-NEXT:    pslldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xf8,0x05]
4932; SSE-NEXT:    # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
4933; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4934;
4935; AVX1-LABEL: test_mm_slli_si128:
4936; AVX1:       # %bb.0:
4937; AVX1-NEXT:    vpslldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x05]
4938; AVX1-NEXT:    # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
4939; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4940;
4941; AVX512-LABEL: test_mm_slli_si128:
4942; AVX512:       # %bb.0:
4943; AVX512-NEXT:    vpslldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x05]
4944; AVX512-NEXT:    # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
4945; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4946  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
4947  %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
4948  %bc = bitcast <16 x i8> %res to <2 x i64>
4949  ret <2 x i64> %bc
4950}
4951
4952define <2 x double> @test_mm_sqrt_pd(<2 x double> %a0) nounwind {
4953; SSE-LABEL: test_mm_sqrt_pd:
4954; SSE:       # %bb.0:
4955; SSE-NEXT:    sqrtpd %xmm0, %xmm0 # encoding: [0x66,0x0f,0x51,0xc0]
4956; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4957;
4958; AVX1-LABEL: test_mm_sqrt_pd:
4959; AVX1:       # %bb.0:
4960; AVX1-NEXT:    vsqrtpd %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x51,0xc0]
4961; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4962;
4963; AVX512-LABEL: test_mm_sqrt_pd:
4964; AVX512:       # %bb.0:
4965; AVX512-NEXT:    vsqrtpd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x51,0xc0]
4966; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4967  %res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a0)
4968  ret <2 x double> %res
4969}
4970declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) nounwind readnone
4971
4972define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
4973; SSE-LABEL: test_mm_sqrt_sd:
4974; SSE:       # %bb.0:
4975; SSE-NEXT:    sqrtsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0x51,0xc8]
4976; SSE-NEXT:    movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
4977; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4978;
4979; AVX1-LABEL: test_mm_sqrt_sd:
4980; AVX1:       # %bb.0:
4981; AVX1-NEXT:    vsqrtsd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf3,0x51,0xc0]
4982; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4983;
4984; AVX512-LABEL: test_mm_sqrt_sd:
4985; AVX512:       # %bb.0:
4986; AVX512-NEXT:    vsqrtsd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf3,0x51,0xc0]
4987; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4988  %ext = extractelement <2 x double> %a0, i32 0
4989  %sqrt = call double @llvm.sqrt.f64(double %ext)
4990  %ins = insertelement <2 x double> %a1, double %sqrt, i32 0
4991  ret <2 x double> %ins
4992}
4993declare double @llvm.sqrt.f64(double) nounwind readnone
4994
4995; This doesn't match a clang test, but helps with fast-isel coverage.
4996define double @test_mm_sqrt_sd_scalar(double %a0) nounwind {
4997; X86-SSE-LABEL: test_mm_sqrt_sd_scalar:
4998; X86-SSE:       # %bb.0:
4999; X86-SSE-NEXT:    pushl %ebp # encoding: [0x55]
5000; X86-SSE-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
5001; X86-SSE-NEXT:    andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
5002; X86-SSE-NEXT:    subl $8, %esp # encoding: [0x83,0xec,0x08]
5003; X86-SSE-NEXT:    movsd 8(%ebp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x45,0x08]
5004; X86-SSE-NEXT:    # xmm0 = mem[0],zero
5005; X86-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0]
5006; X86-SSE-NEXT:    movsd %xmm0, (%esp) # encoding: [0xf2,0x0f,0x11,0x04,0x24]
5007; X86-SSE-NEXT:    fldl (%esp) # encoding: [0xdd,0x04,0x24]
5008; X86-SSE-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
5009; X86-SSE-NEXT:    popl %ebp # encoding: [0x5d]
5010; X86-SSE-NEXT:    retl # encoding: [0xc3]
5011;
5012; X86-AVX1-LABEL: test_mm_sqrt_sd_scalar:
5013; X86-AVX1:       # %bb.0:
5014; X86-AVX1-NEXT:    pushl %ebp # encoding: [0x55]
5015; X86-AVX1-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
5016; X86-AVX1-NEXT:    andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
5017; X86-AVX1-NEXT:    subl $8, %esp # encoding: [0x83,0xec,0x08]
5018; X86-AVX1-NEXT:    vmovsd 8(%ebp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x45,0x08]
5019; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
5020; X86-AVX1-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0]
5021; X86-AVX1-NEXT:    vmovsd %xmm0, (%esp) # encoding: [0xc5,0xfb,0x11,0x04,0x24]
5022; X86-AVX1-NEXT:    fldl (%esp) # encoding: [0xdd,0x04,0x24]
5023; X86-AVX1-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
5024; X86-AVX1-NEXT:    popl %ebp # encoding: [0x5d]
5025; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5026;
5027; X86-AVX512-LABEL: test_mm_sqrt_sd_scalar:
5028; X86-AVX512:       # %bb.0:
5029; X86-AVX512-NEXT:    pushl %ebp # encoding: [0x55]
5030; X86-AVX512-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
5031; X86-AVX512-NEXT:    andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
5032; X86-AVX512-NEXT:    subl $8, %esp # encoding: [0x83,0xec,0x08]
5033; X86-AVX512-NEXT:    vmovsd 8(%ebp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x45,0x08]
5034; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
5035; X86-AVX512-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
5036; X86-AVX512-NEXT:    vmovsd %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x04,0x24]
5037; X86-AVX512-NEXT:    fldl (%esp) # encoding: [0xdd,0x04,0x24]
5038; X86-AVX512-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
5039; X86-AVX512-NEXT:    popl %ebp # encoding: [0x5d]
5040; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5041;
5042; X64-SSE-LABEL: test_mm_sqrt_sd_scalar:
5043; X64-SSE:       # %bb.0:
5044; X64-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0]
5045; X64-SSE-NEXT:    retq # encoding: [0xc3]
5046;
5047; X64-AVX1-LABEL: test_mm_sqrt_sd_scalar:
5048; X64-AVX1:       # %bb.0:
5049; X64-AVX1-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0]
5050; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5051;
5052; X64-AVX512-LABEL: test_mm_sqrt_sd_scalar:
5053; X64-AVX512:       # %bb.0:
5054; X64-AVX512-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
5055; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5056  %sqrt = call double @llvm.sqrt.f64(double %a0)
5057  ret double %sqrt
5058}
5059
5060define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) {
5061; SSE-LABEL: test_mm_sra_epi16:
5062; SSE:       # %bb.0:
5063; SSE-NEXT:    psraw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe1,0xc1]
5064; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5065;
5066; AVX1-LABEL: test_mm_sra_epi16:
5067; AVX1:       # %bb.0:
5068; AVX1-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe1,0xc1]
5069; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5070;
5071; AVX512-LABEL: test_mm_sra_epi16:
5072; AVX512:       # %bb.0:
5073; AVX512-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xc1]
5074; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5075  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
5076  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
5077  %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %arg0, <8 x i16> %arg1)
5078  %bc = bitcast <8 x i16> %res to <2 x i64>
5079  ret <2 x i64> %bc
5080}
5081declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
5082
5083define <2 x i64> @test_mm_sra_epi32(<2 x i64> %a0, <2 x i64> %a1) {
5084; SSE-LABEL: test_mm_sra_epi32:
5085; SSE:       # %bb.0:
5086; SSE-NEXT:    psrad %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe2,0xc1]
5087; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5088;
5089; AVX1-LABEL: test_mm_sra_epi32:
5090; AVX1:       # %bb.0:
5091; AVX1-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe2,0xc1]
5092; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5093;
5094; AVX512-LABEL: test_mm_sra_epi32:
5095; AVX512:       # %bb.0:
5096; AVX512-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xc1]
5097; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5098  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
5099  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
5100  %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %arg0, <4 x i32> %arg1)
5101  %bc = bitcast <4 x i32> %res to <2 x i64>
5102  ret <2 x i64> %bc
5103}
5104declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
5105
5106define <2 x i64> @test_mm_srai_epi16(<2 x i64> %a0) {
5107; SSE-LABEL: test_mm_srai_epi16:
5108; SSE:       # %bb.0:
5109; SSE-NEXT:    psraw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xe0,0x01]
5110; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5111;
5112; AVX1-LABEL: test_mm_srai_epi16:
5113; AVX1:       # %bb.0:
5114; AVX1-NEXT:    vpsraw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xe0,0x01]
5115; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5116;
5117; AVX512-LABEL: test_mm_srai_epi16:
5118; AVX512:       # %bb.0:
5119; AVX512-NEXT:    vpsraw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x01]
5120; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5121  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
5122  %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %arg0, i32 1)
5123  %bc = bitcast <8 x i16> %res to <2 x i64>
5124  ret <2 x i64> %bc
5125}
5126declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
5127
5128define <2 x i64> @test_mm_srai_epi32(<2 x i64> %a0) {
5129; SSE-LABEL: test_mm_srai_epi32:
5130; SSE:       # %bb.0:
5131; SSE-NEXT:    psrad $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xe0,0x01]
5132; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5133;
5134; AVX1-LABEL: test_mm_srai_epi32:
5135; AVX1:       # %bb.0:
5136; AVX1-NEXT:    vpsrad $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xe0,0x01]
5137; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5138;
5139; AVX512-LABEL: test_mm_srai_epi32:
5140; AVX512:       # %bb.0:
5141; AVX512-NEXT:    vpsrad $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xe0,0x01]
5142; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5143  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
5144  %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %arg0, i32 1)
5145  %bc = bitcast <4 x i32> %res to <2 x i64>
5146  ret <2 x i64> %bc
5147}
5148declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
5149
5150define <2 x i64> @test_mm_srl_epi16(<2 x i64> %a0, <2 x i64> %a1) {
5151; SSE-LABEL: test_mm_srl_epi16:
5152; SSE:       # %bb.0:
5153; SSE-NEXT:    psrlw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd1,0xc1]
5154; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5155;
5156; AVX1-LABEL: test_mm_srl_epi16:
5157; AVX1:       # %bb.0:
5158; AVX1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd1,0xc1]
5159; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5160;
5161; AVX512-LABEL: test_mm_srl_epi16:
5162; AVX512:       # %bb.0:
5163; AVX512-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1]
5164; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5165  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
5166  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
5167  %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %arg0, <8 x i16> %arg1)
5168  %bc = bitcast <8 x i16> %res to <2 x i64>
5169  ret <2 x i64> %bc
5170}
5171declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
5172
5173define <2 x i64> @test_mm_srl_epi32(<2 x i64> %a0, <2 x i64> %a1) {
5174; SSE-LABEL: test_mm_srl_epi32:
5175; SSE:       # %bb.0:
5176; SSE-NEXT:    psrld %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd2,0xc1]
5177; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5178;
5179; AVX1-LABEL: test_mm_srl_epi32:
5180; AVX1:       # %bb.0:
5181; AVX1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd2,0xc1]
5182; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5183;
5184; AVX512-LABEL: test_mm_srl_epi32:
5185; AVX512:       # %bb.0:
5186; AVX512-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xc1]
5187; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5188  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
5189  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
5190  %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %arg0, <4 x i32> %arg1)
5191  %bc = bitcast <4 x i32> %res to <2 x i64>
5192  ret <2 x i64> %bc
5193}
5194declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
5195
5196define <2 x i64> @test_mm_srl_epi64(<2 x i64> %a0, <2 x i64> %a1) {
5197; SSE-LABEL: test_mm_srl_epi64:
5198; SSE:       # %bb.0:
5199; SSE-NEXT:    psrlq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd3,0xc1]
5200; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5201;
5202; AVX1-LABEL: test_mm_srl_epi64:
5203; AVX1:       # %bb.0:
5204; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd3,0xc1]
5205; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5206;
5207; AVX512-LABEL: test_mm_srl_epi64:
5208; AVX512:       # %bb.0:
5209; AVX512-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xc1]
5210; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5211  %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
5212  ret <2 x i64> %res
5213}
5214declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
5215
5216define <2 x i64> @test_mm_srli_epi16(<2 x i64> %a0) {
5217; SSE-LABEL: test_mm_srli_epi16:
5218; SSE:       # %bb.0:
5219; SSE-NEXT:    psrlw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xd0,0x01]
5220; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5221;
5222; AVX1-LABEL: test_mm_srli_epi16:
5223; AVX1:       # %bb.0:
5224; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xd0,0x01]
5225; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5226;
5227; AVX512-LABEL: test_mm_srli_epi16:
5228; AVX512:       # %bb.0:
5229; AVX512-NEXT:    vpsrlw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xd0,0x01]
5230; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5231  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
5232  %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %arg0, i32 1)
5233  %bc = bitcast <8 x i16> %res to <2 x i64>
5234  ret <2 x i64> %bc
5235}
5236declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
5237
5238define <2 x i64> @test_mm_srli_epi32(<2 x i64> %a0) {
5239; SSE-LABEL: test_mm_srli_epi32:
5240; SSE:       # %bb.0:
5241; SSE-NEXT:    psrld $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xd0,0x01]
5242; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5243;
5244; AVX1-LABEL: test_mm_srli_epi32:
5245; AVX1:       # %bb.0:
5246; AVX1-NEXT:    vpsrld $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xd0,0x01]
5247; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5248;
5249; AVX512-LABEL: test_mm_srli_epi32:
5250; AVX512:       # %bb.0:
5251; AVX512-NEXT:    vpsrld $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xd0,0x01]
5252; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5253  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
5254  %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %arg0, i32 1)
5255  %bc = bitcast <4 x i32> %res to <2 x i64>
5256  ret <2 x i64> %bc
5257}
5258declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
5259
5260define <2 x i64> @test_mm_srli_epi64(<2 x i64> %a0) {
5261; SSE-LABEL: test_mm_srli_epi64:
5262; SSE:       # %bb.0:
5263; SSE-NEXT:    psrlq $1, %xmm0 # encoding: [0x66,0x0f,0x73,0xd0,0x01]
5264; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5265;
5266; AVX1-LABEL: test_mm_srli_epi64:
5267; AVX1:       # %bb.0:
5268; AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd0,0x01]
5269; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5270;
5271; AVX512-LABEL: test_mm_srli_epi64:
5272; AVX512:       # %bb.0:
5273; AVX512-NEXT:    vpsrlq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x01]
5274; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5275  %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 1)
5276  ret <2 x i64> %res
5277}
5278declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
5279
5280define <2 x i64> @test_mm_srli_si128(<2 x i64> %a0) nounwind {
5281; SSE-LABEL: test_mm_srli_si128:
5282; SSE:       # %bb.0:
5283; SSE-NEXT:    psrldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xd8,0x05]
5284; SSE-NEXT:    # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
5285; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5286;
5287; AVX1-LABEL: test_mm_srli_si128:
5288; AVX1:       # %bb.0:
5289; AVX1-NEXT:    vpsrldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x05]
5290; AVX1-NEXT:    # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
5291; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5292;
5293; AVX512-LABEL: test_mm_srli_si128:
5294; AVX512:       # %bb.0:
5295; AVX512-NEXT:    vpsrldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x05]
5296; AVX512-NEXT:    # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
5297; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5298  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
5299  %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
5300  %bc = bitcast <16 x i8> %res to <2 x i64>
5301  ret <2 x i64> %bc
5302}
5303
5304define void @test_mm_store_pd(double *%a0, <2 x double> %a1) {
5305; X86-SSE-LABEL: test_mm_store_pd:
5306; X86-SSE:       # %bb.0:
5307; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5308; X86-SSE-NEXT:    movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
5309; X86-SSE-NEXT:    retl # encoding: [0xc3]
5310;
5311; X86-AVX1-LABEL: test_mm_store_pd:
5312; X86-AVX1:       # %bb.0:
5313; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5314; X86-AVX1-NEXT:    vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
5315; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5316;
5317; X86-AVX512-LABEL: test_mm_store_pd:
5318; X86-AVX512:       # %bb.0:
5319; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5320; X86-AVX512-NEXT:    vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
5321; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5322;
5323; X64-SSE-LABEL: test_mm_store_pd:
5324; X64-SSE:       # %bb.0:
5325; X64-SSE-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
5326; X64-SSE-NEXT:    retq # encoding: [0xc3]
5327;
5328; X64-AVX1-LABEL: test_mm_store_pd:
5329; X64-AVX1:       # %bb.0:
5330; X64-AVX1-NEXT:    vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
5331; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5332;
5333; X64-AVX512-LABEL: test_mm_store_pd:
5334; X64-AVX512:       # %bb.0:
5335; X64-AVX512-NEXT:    vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
5336; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5337  %arg0 = bitcast double* %a0 to <2 x double>*
5338  store <2 x double> %a1, <2 x double>* %arg0, align 16
5339  ret void
5340}
5341
5342define void @test_mm_store_pd1(double *%a0, <2 x double> %a1) {
5343; X86-SSE-LABEL: test_mm_store_pd1:
5344; X86-SSE:       # %bb.0:
5345; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5346; X86-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
5347; X86-SSE-NEXT:    # xmm0 = xmm0[0,0]
5348; X86-SSE-NEXT:    movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
5349; X86-SSE-NEXT:    retl # encoding: [0xc3]
5350;
5351; X86-AVX1-LABEL: test_mm_store_pd1:
5352; X86-AVX1:       # %bb.0:
5353; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5354; X86-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
5355; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0]
5356; X86-AVX1-NEXT:    vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
5357; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5358;
5359; X86-AVX512-LABEL: test_mm_store_pd1:
5360; X86-AVX512:       # %bb.0:
5361; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5362; X86-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
5363; X86-AVX512-NEXT:    # xmm0 = xmm0[0,0]
5364; X86-AVX512-NEXT:    vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
5365; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5366;
5367; X64-SSE-LABEL: test_mm_store_pd1:
5368; X64-SSE:       # %bb.0:
5369; X64-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
5370; X64-SSE-NEXT:    # xmm0 = xmm0[0,0]
5371; X64-SSE-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
5372; X64-SSE-NEXT:    retq # encoding: [0xc3]
5373;
5374; X64-AVX1-LABEL: test_mm_store_pd1:
5375; X64-AVX1:       # %bb.0:
5376; X64-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
5377; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0]
5378; X64-AVX1-NEXT:    vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
5379; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5380;
5381; X64-AVX512-LABEL: test_mm_store_pd1:
5382; X64-AVX512:       # %bb.0:
5383; X64-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
5384; X64-AVX512-NEXT:    # xmm0 = xmm0[0,0]
5385; X64-AVX512-NEXT:    vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
5386; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5387  %arg0 = bitcast double * %a0 to <2 x double>*
5388  %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
5389  store <2 x double> %shuf, <2 x double>* %arg0, align 16
5390  ret void
5391}
5392
5393define void @test_mm_store_sd(double *%a0, <2 x double> %a1) {
5394; X86-SSE-LABEL: test_mm_store_sd:
5395; X86-SSE:       # %bb.0:
5396; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5397; X86-SSE-NEXT:    movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00]
5398; X86-SSE-NEXT:    retl # encoding: [0xc3]
5399;
5400; X86-AVX1-LABEL: test_mm_store_sd:
5401; X86-AVX1:       # %bb.0:
5402; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5403; X86-AVX1-NEXT:    vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00]
5404; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5405;
5406; X86-AVX512-LABEL: test_mm_store_sd:
5407; X86-AVX512:       # %bb.0:
5408; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5409; X86-AVX512-NEXT:    vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00]
5410; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5411;
5412; X64-SSE-LABEL: test_mm_store_sd:
5413; X64-SSE:       # %bb.0:
5414; X64-SSE-NEXT:    movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07]
5415; X64-SSE-NEXT:    retq # encoding: [0xc3]
5416;
5417; X64-AVX1-LABEL: test_mm_store_sd:
5418; X64-AVX1:       # %bb.0:
5419; X64-AVX1-NEXT:    vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
5420; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5421;
5422; X64-AVX512-LABEL: test_mm_store_sd:
5423; X64-AVX512:       # %bb.0:
5424; X64-AVX512-NEXT:    vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
5425; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5426  %ext = extractelement <2 x double> %a1, i32 0
5427  store double %ext, double* %a0, align 1
5428  ret void
5429}
5430
5431define void @test_mm_store_si128(<2 x i64> *%a0, <2 x i64> %a1) {
5432; X86-SSE-LABEL: test_mm_store_si128:
5433; X86-SSE:       # %bb.0:
5434; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5435; X86-SSE-NEXT:    movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
5436; X86-SSE-NEXT:    retl # encoding: [0xc3]
5437;
5438; X86-AVX1-LABEL: test_mm_store_si128:
5439; X86-AVX1:       # %bb.0:
5440; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5441; X86-AVX1-NEXT:    vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
5442; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5443;
5444; X86-AVX512-LABEL: test_mm_store_si128:
5445; X86-AVX512:       # %bb.0:
5446; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5447; X86-AVX512-NEXT:    vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
5448; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5449;
5450; X64-SSE-LABEL: test_mm_store_si128:
5451; X64-SSE:       # %bb.0:
5452; X64-SSE-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
5453; X64-SSE-NEXT:    retq # encoding: [0xc3]
5454;
5455; X64-AVX1-LABEL: test_mm_store_si128:
5456; X64-AVX1:       # %bb.0:
5457; X64-AVX1-NEXT:    vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
5458; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5459;
5460; X64-AVX512-LABEL: test_mm_store_si128:
5461; X64-AVX512:       # %bb.0:
5462; X64-AVX512-NEXT:    vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
5463; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5464  store <2 x i64> %a1, <2 x i64>* %a0, align 16
5465  ret void
5466}
5467
5468define void @test_mm_store1_pd(double *%a0, <2 x double> %a1) {
5469; X86-SSE-LABEL: test_mm_store1_pd:
5470; X86-SSE:       # %bb.0:
5471; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5472; X86-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
5473; X86-SSE-NEXT:    # xmm0 = xmm0[0,0]
5474; X86-SSE-NEXT:    movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
5475; X86-SSE-NEXT:    retl # encoding: [0xc3]
5476;
5477; X86-AVX1-LABEL: test_mm_store1_pd:
5478; X86-AVX1:       # %bb.0:
5479; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5480; X86-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
5481; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0]
5482; X86-AVX1-NEXT:    vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
5483; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5484;
5485; X86-AVX512-LABEL: test_mm_store1_pd:
5486; X86-AVX512:       # %bb.0:
5487; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5488; X86-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
5489; X86-AVX512-NEXT:    # xmm0 = xmm0[0,0]
5490; X86-AVX512-NEXT:    vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
5491; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5492;
5493; X64-SSE-LABEL: test_mm_store1_pd:
5494; X64-SSE:       # %bb.0:
5495; X64-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
5496; X64-SSE-NEXT:    # xmm0 = xmm0[0,0]
5497; X64-SSE-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
5498; X64-SSE-NEXT:    retq # encoding: [0xc3]
5499;
5500; X64-AVX1-LABEL: test_mm_store1_pd:
5501; X64-AVX1:       # %bb.0:
5502; X64-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
5503; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0]
5504; X64-AVX1-NEXT:    vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
5505; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5506;
5507; X64-AVX512-LABEL: test_mm_store1_pd:
5508; X64-AVX512:       # %bb.0:
5509; X64-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
5510; X64-AVX512-NEXT:    # xmm0 = xmm0[0,0]
5511; X64-AVX512-NEXT:    vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
5512; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5513  %arg0 = bitcast double * %a0 to <2 x double>*
5514  %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
5515  store <2 x double> %shuf, <2 x double>* %arg0, align 16
5516  ret void
5517}
5518
5519define void @test_mm_storeh_sd(double *%a0, <2 x double> %a1) {
5520; X86-SSE-LABEL: test_mm_storeh_sd:
5521; X86-SSE:       # %bb.0:
5522; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5523; X86-SSE-NEXT:    movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0]
5524; X86-SSE-NEXT:    # xmm0 = xmm0[1,1]
5525; X86-SSE-NEXT:    movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00]
5526; X86-SSE-NEXT:    retl # encoding: [0xc3]
5527;
5528; X86-AVX1-LABEL: test_mm_storeh_sd:
5529; X86-AVX1:       # %bb.0:
5530; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5531; X86-AVX1-NEXT:    vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
5532; X86-AVX1-NEXT:    # xmm0 = xmm0[1,0]
5533; X86-AVX1-NEXT:    vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00]
5534; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5535;
5536; X86-AVX512-LABEL: test_mm_storeh_sd:
5537; X86-AVX512:       # %bb.0:
5538; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5539; X86-AVX512-NEXT:    vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
5540; X86-AVX512-NEXT:    # xmm0 = xmm0[1,0]
5541; X86-AVX512-NEXT:    vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00]
5542; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5543;
5544; X64-SSE-LABEL: test_mm_storeh_sd:
5545; X64-SSE:       # %bb.0:
5546; X64-SSE-NEXT:    movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0]
5547; X64-SSE-NEXT:    # xmm0 = xmm0[1,1]
5548; X64-SSE-NEXT:    movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07]
5549; X64-SSE-NEXT:    retq # encoding: [0xc3]
5550;
5551; X64-AVX1-LABEL: test_mm_storeh_sd:
5552; X64-AVX1:       # %bb.0:
5553; X64-AVX1-NEXT:    vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
5554; X64-AVX1-NEXT:    # xmm0 = xmm0[1,0]
5555; X64-AVX1-NEXT:    vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
5556; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5557;
5558; X64-AVX512-LABEL: test_mm_storeh_sd:
5559; X64-AVX512:       # %bb.0:
5560; X64-AVX512-NEXT:    vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
5561; X64-AVX512-NEXT:    # xmm0 = xmm0[1,0]
5562; X64-AVX512-NEXT:    vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
5563; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5564  %ext = extractelement <2 x double> %a1, i32 1
5565  store double %ext, double* %a0, align 8
5566  ret void
5567}
5568
5569define void @test_mm_storel_epi64(<2 x i64> *%a0, <2 x i64> %a1) {
5570; X86-SSE-LABEL: test_mm_storel_epi64:
5571; X86-SSE:       # %bb.0:
5572; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5573; X86-SSE-NEXT:    movlps %xmm0, (%eax) # encoding: [0x0f,0x13,0x00]
5574; X86-SSE-NEXT:    retl # encoding: [0xc3]
5575;
5576; X86-AVX1-LABEL: test_mm_storel_epi64:
5577; X86-AVX1:       # %bb.0:
5578; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5579; X86-AVX1-NEXT:    vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00]
5580; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5581;
5582; X86-AVX512-LABEL: test_mm_storel_epi64:
5583; X86-AVX512:       # %bb.0:
5584; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5585; X86-AVX512-NEXT:    vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
5586; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5587;
5588; X64-SSE-LABEL: test_mm_storel_epi64:
5589; X64-SSE:       # %bb.0:
5590; X64-SSE-NEXT:    movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0]
5591; X64-SSE-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
5592; X64-SSE-NEXT:    retq # encoding: [0xc3]
5593;
5594; X64-AVX1-LABEL: test_mm_storel_epi64:
5595; X64-AVX1:       # %bb.0:
5596; X64-AVX1-NEXT:    vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
5597; X64-AVX1-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
5598; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5599;
5600; X64-AVX512-LABEL: test_mm_storel_epi64:
5601; X64-AVX512:       # %bb.0:
5602; X64-AVX512-NEXT:    vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
5603; X64-AVX512-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
5604; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5605  %ext = extractelement <2 x i64> %a1, i32 0
5606  %bc = bitcast <2 x i64> *%a0 to i64*
5607  store i64 %ext, i64* %bc, align 8
5608  ret void
5609}
5610
5611define void @test_mm_storel_sd(double *%a0, <2 x double> %a1) {
5612; X86-SSE-LABEL: test_mm_storel_sd:
5613; X86-SSE:       # %bb.0:
5614; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5615; X86-SSE-NEXT:    movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00]
5616; X86-SSE-NEXT:    retl # encoding: [0xc3]
5617;
5618; X86-AVX1-LABEL: test_mm_storel_sd:
5619; X86-AVX1:       # %bb.0:
5620; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5621; X86-AVX1-NEXT:    vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00]
5622; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5623;
5624; X86-AVX512-LABEL: test_mm_storel_sd:
5625; X86-AVX512:       # %bb.0:
5626; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5627; X86-AVX512-NEXT:    vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00]
5628; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5629;
5630; X64-SSE-LABEL: test_mm_storel_sd:
5631; X64-SSE:       # %bb.0:
5632; X64-SSE-NEXT:    movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07]
5633; X64-SSE-NEXT:    retq # encoding: [0xc3]
5634;
5635; X64-AVX1-LABEL: test_mm_storel_sd:
5636; X64-AVX1:       # %bb.0:
5637; X64-AVX1-NEXT:    vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
5638; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5639;
5640; X64-AVX512-LABEL: test_mm_storel_sd:
5641; X64-AVX512:       # %bb.0:
5642; X64-AVX512-NEXT:    vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
5643; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5644  %ext = extractelement <2 x double> %a1, i32 0
5645  store double %ext, double* %a0, align 8
5646  ret void
5647}
5648
5649define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) {
5650; X86-SSE-LABEL: test_mm_storer_pd:
5651; X86-SSE:       # %bb.0:
5652; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5653; X86-SSE-NEXT:    shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
5654; X86-SSE-NEXT:    # xmm0 = xmm0[2,3,0,1]
5655; X86-SSE-NEXT:    movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
5656; X86-SSE-NEXT:    retl # encoding: [0xc3]
5657;
5658; X86-AVX1-LABEL: test_mm_storer_pd:
5659; X86-AVX1:       # %bb.0:
5660; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5661; X86-AVX1-NEXT:    vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
5662; X86-AVX1-NEXT:    # xmm0 = xmm0[1,0]
5663; X86-AVX1-NEXT:    vmovapd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x29,0x00]
5664; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5665;
5666; X86-AVX512-LABEL: test_mm_storer_pd:
5667; X86-AVX512:       # %bb.0:
5668; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5669; X86-AVX512-NEXT:    vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
5670; X86-AVX512-NEXT:    # xmm0 = xmm0[1,0]
5671; X86-AVX512-NEXT:    vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00]
5672; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5673;
5674; X64-SSE-LABEL: test_mm_storer_pd:
5675; X64-SSE:       # %bb.0:
5676; X64-SSE-NEXT:    shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
5677; X64-SSE-NEXT:    # xmm0 = xmm0[2,3,0,1]
5678; X64-SSE-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
5679; X64-SSE-NEXT:    retq # encoding: [0xc3]
5680;
5681; X64-AVX1-LABEL: test_mm_storer_pd:
5682; X64-AVX1:       # %bb.0:
5683; X64-AVX1-NEXT:    vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
5684; X64-AVX1-NEXT:    # xmm0 = xmm0[1,0]
5685; X64-AVX1-NEXT:    vmovapd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x29,0x07]
5686; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5687;
5688; X64-AVX512-LABEL: test_mm_storer_pd:
5689; X64-AVX512:       # %bb.0:
5690; X64-AVX512-NEXT:    vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
5691; X64-AVX512-NEXT:    # xmm0 = xmm0[1,0]
5692; X64-AVX512-NEXT:    vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07]
5693; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5694  %arg0 = bitcast double* %a0 to <2 x double>*
5695  %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
5696  store <2 x double> %shuf, <2 x double>* %arg0, align 16
5697  ret void
5698}
5699
5700define void @test_mm_storeu_pd(double *%a0, <2 x double> %a1) {
5701; X86-SSE-LABEL: test_mm_storeu_pd:
5702; X86-SSE:       # %bb.0:
5703; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5704; X86-SSE-NEXT:    movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00]
5705; X86-SSE-NEXT:    retl # encoding: [0xc3]
5706;
5707; X86-AVX1-LABEL: test_mm_storeu_pd:
5708; X86-AVX1:       # %bb.0:
5709; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5710; X86-AVX1-NEXT:    vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00]
5711; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5712;
5713; X86-AVX512-LABEL: test_mm_storeu_pd:
5714; X86-AVX512:       # %bb.0:
5715; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5716; X86-AVX512-NEXT:    vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
5717; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5718;
5719; X64-SSE-LABEL: test_mm_storeu_pd:
5720; X64-SSE:       # %bb.0:
5721; X64-SSE-NEXT:    movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07]
5722; X64-SSE-NEXT:    retq # encoding: [0xc3]
5723;
5724; X64-AVX1-LABEL: test_mm_storeu_pd:
5725; X64-AVX1:       # %bb.0:
5726; X64-AVX1-NEXT:    vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07]
5727; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5728;
5729; X64-AVX512-LABEL: test_mm_storeu_pd:
5730; X64-AVX512:       # %bb.0:
5731; X64-AVX512-NEXT:    vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
5732; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5733  %arg0 = bitcast double* %a0 to <2 x double>*
5734  store <2 x double> %a1, <2 x double>* %arg0, align 1
5735  ret void
5736}
5737
5738define void @test_mm_storeu_si128(<2 x i64> *%a0, <2 x i64> %a1) {
5739; X86-SSE-LABEL: test_mm_storeu_si128:
5740; X86-SSE:       # %bb.0:
5741; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5742; X86-SSE-NEXT:    movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00]
5743; X86-SSE-NEXT:    retl # encoding: [0xc3]
5744;
5745; X86-AVX1-LABEL: test_mm_storeu_si128:
5746; X86-AVX1:       # %bb.0:
5747; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5748; X86-AVX1-NEXT:    vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00]
5749; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5750;
5751; X86-AVX512-LABEL: test_mm_storeu_si128:
5752; X86-AVX512:       # %bb.0:
5753; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5754; X86-AVX512-NEXT:    vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
5755; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5756;
5757; X64-SSE-LABEL: test_mm_storeu_si128:
5758; X64-SSE:       # %bb.0:
5759; X64-SSE-NEXT:    movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07]
5760; X64-SSE-NEXT:    retq # encoding: [0xc3]
5761;
5762; X64-AVX1-LABEL: test_mm_storeu_si128:
5763; X64-AVX1:       # %bb.0:
5764; X64-AVX1-NEXT:    vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07]
5765; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5766;
5767; X64-AVX512-LABEL: test_mm_storeu_si128:
5768; X64-AVX512:       # %bb.0:
5769; X64-AVX512-NEXT:    vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
5770; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5771  store <2 x i64> %a1, <2 x i64>* %a0, align 1
5772  ret void
5773}
5774
5775define void @test_mm_storeu_si64(i8* nocapture %A, <2 x i64> %B) {
5776; X86-SSE-LABEL: test_mm_storeu_si64:
5777; X86-SSE:       # %bb.0: # %entry
5778; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5779; X86-SSE-NEXT:    movlps %xmm0, (%eax) # encoding: [0x0f,0x13,0x00]
5780; X86-SSE-NEXT:    retl # encoding: [0xc3]
5781;
5782; X86-AVX1-LABEL: test_mm_storeu_si64:
5783; X86-AVX1:       # %bb.0: # %entry
5784; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5785; X86-AVX1-NEXT:    vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00]
5786; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5787;
5788; X86-AVX512-LABEL: test_mm_storeu_si64:
5789; X86-AVX512:       # %bb.0: # %entry
5790; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5791; X86-AVX512-NEXT:    vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
5792; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5793;
5794; X64-SSE-LABEL: test_mm_storeu_si64:
5795; X64-SSE:       # %bb.0: # %entry
5796; X64-SSE-NEXT:    movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0]
5797; X64-SSE-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
5798; X64-SSE-NEXT:    retq # encoding: [0xc3]
5799;
5800; X64-AVX1-LABEL: test_mm_storeu_si64:
5801; X64-AVX1:       # %bb.0: # %entry
5802; X64-AVX1-NEXT:    vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
5803; X64-AVX1-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
5804; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5805;
5806; X64-AVX512-LABEL: test_mm_storeu_si64:
5807; X64-AVX512:       # %bb.0: # %entry
5808; X64-AVX512-NEXT:    vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
5809; X64-AVX512-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
5810; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5811entry:
5812  %vecext.i = extractelement <2 x i64> %B, i32 0
5813  %__v.i = bitcast i8* %A to i64*
5814  store i64 %vecext.i, i64* %__v.i, align 1
5815  ret void
5816}
5817
5818define void @test_mm_storeu_si32(i8* nocapture %A, <2 x i64> %B) {
5819; X86-SSE-LABEL: test_mm_storeu_si32:
5820; X86-SSE:       # %bb.0: # %entry
5821; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5822; X86-SSE-NEXT:    movd %xmm0, %ecx # encoding: [0x66,0x0f,0x7e,0xc1]
5823; X86-SSE-NEXT:    movl %ecx, (%eax) # encoding: [0x89,0x08]
5824; X86-SSE-NEXT:    retl # encoding: [0xc3]
5825;
5826; X86-AVX1-LABEL: test_mm_storeu_si32:
5827; X86-AVX1:       # %bb.0: # %entry
5828; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5829; X86-AVX1-NEXT:    vmovd %xmm0, %ecx # encoding: [0xc5,0xf9,0x7e,0xc1]
5830; X86-AVX1-NEXT:    movl %ecx, (%eax) # encoding: [0x89,0x08]
5831; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5832;
5833; X86-AVX512-LABEL: test_mm_storeu_si32:
5834; X86-AVX512:       # %bb.0: # %entry
5835; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5836; X86-AVX512-NEXT:    vmovd %xmm0, %ecx # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc1]
5837; X86-AVX512-NEXT:    movl %ecx, (%eax) # encoding: [0x89,0x08]
5838; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5839;
5840; X64-SSE-LABEL: test_mm_storeu_si32:
5841; X64-SSE:       # %bb.0: # %entry
5842; X64-SSE-NEXT:    movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0]
5843; X64-SSE-NEXT:    movl %eax, (%rdi) # encoding: [0x89,0x07]
5844; X64-SSE-NEXT:    retq # encoding: [0xc3]
5845;
5846; X64-AVX1-LABEL: test_mm_storeu_si32:
5847; X64-AVX1:       # %bb.0: # %entry
5848; X64-AVX1-NEXT:    vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0]
5849; X64-AVX1-NEXT:    movl %eax, (%rdi) # encoding: [0x89,0x07]
5850; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5851;
5852; X64-AVX512-LABEL: test_mm_storeu_si32:
5853; X64-AVX512:       # %bb.0: # %entry
5854; X64-AVX512-NEXT:    vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0]
5855; X64-AVX512-NEXT:    movl %eax, (%rdi) # encoding: [0x89,0x07]
5856; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5857entry:
5858  %0 = bitcast <2 x i64> %B to <4 x i32>
5859  %vecext.i = extractelement <4 x i32> %0, i32 0
5860  %__v.i = bitcast i8* %A to i32*
5861  store i32 %vecext.i, i32* %__v.i, align 1
5862  ret void
5863}
5864
5865define void @test_mm_storeu_si16(i8* nocapture %A, <2 x i64> %B) {
5866; X86-SSE-LABEL: test_mm_storeu_si16:
5867; X86-SSE:       # %bb.0: # %entry
5868; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5869; X86-SSE-NEXT:    movd %xmm0, %ecx # encoding: [0x66,0x0f,0x7e,0xc1]
5870; X86-SSE-NEXT:    movw %cx, (%eax) # encoding: [0x66,0x89,0x08]
5871; X86-SSE-NEXT:    retl # encoding: [0xc3]
5872;
5873; X86-AVX1-LABEL: test_mm_storeu_si16:
5874; X86-AVX1:       # %bb.0: # %entry
5875; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5876; X86-AVX1-NEXT:    vmovd %xmm0, %ecx # encoding: [0xc5,0xf9,0x7e,0xc1]
5877; X86-AVX1-NEXT:    movw %cx, (%eax) # encoding: [0x66,0x89,0x08]
5878; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5879;
5880; X86-AVX512-LABEL: test_mm_storeu_si16:
5881; X86-AVX512:       # %bb.0: # %entry
5882; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5883; X86-AVX512-NEXT:    vmovd %xmm0, %ecx # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc1]
5884; X86-AVX512-NEXT:    movw %cx, (%eax) # encoding: [0x66,0x89,0x08]
5885; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5886;
5887; X64-SSE-LABEL: test_mm_storeu_si16:
5888; X64-SSE:       # %bb.0: # %entry
5889; X64-SSE-NEXT:    movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0]
5890; X64-SSE-NEXT:    movw %ax, (%rdi) # encoding: [0x66,0x89,0x07]
5891; X64-SSE-NEXT:    retq # encoding: [0xc3]
5892;
5893; X64-AVX1-LABEL: test_mm_storeu_si16:
5894; X64-AVX1:       # %bb.0: # %entry
5895; X64-AVX1-NEXT:    vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0]
5896; X64-AVX1-NEXT:    movw %ax, (%rdi) # encoding: [0x66,0x89,0x07]
5897; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5898;
5899; X64-AVX512-LABEL: test_mm_storeu_si16:
5900; X64-AVX512:       # %bb.0: # %entry
5901; X64-AVX512-NEXT:    vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0]
5902; X64-AVX512-NEXT:    movw %ax, (%rdi) # encoding: [0x66,0x89,0x07]
5903; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5904entry:
5905  %0 = bitcast <2 x i64> %B to <8 x i16>
5906  %vecext.i = extractelement <8 x i16> %0, i32 0
5907  %__v.i = bitcast i8* %A to i16*
5908  store i16 %vecext.i, i16* %__v.i, align 1
5909  ret void
5910}
5911
5912define void @test_mm_stream_pd(double *%a0, <2 x double> %a1) {
5913; X86-SSE-LABEL: test_mm_stream_pd:
5914; X86-SSE:       # %bb.0:
5915; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5916; X86-SSE-NEXT:    movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00]
5917; X86-SSE-NEXT:    retl # encoding: [0xc3]
5918;
5919; X86-AVX1-LABEL: test_mm_stream_pd:
5920; X86-AVX1:       # %bb.0:
5921; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5922; X86-AVX1-NEXT:    vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00]
5923; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5924;
5925; X86-AVX512-LABEL: test_mm_stream_pd:
5926; X86-AVX512:       # %bb.0:
5927; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5928; X86-AVX512-NEXT:    vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00]
5929; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5930;
5931; X64-SSE-LABEL: test_mm_stream_pd:
5932; X64-SSE:       # %bb.0:
5933; X64-SSE-NEXT:    movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07]
5934; X64-SSE-NEXT:    retq # encoding: [0xc3]
5935;
5936; X64-AVX1-LABEL: test_mm_stream_pd:
5937; X64-AVX1:       # %bb.0:
5938; X64-AVX1-NEXT:    vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07]
5939; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5940;
5941; X64-AVX512-LABEL: test_mm_stream_pd:
5942; X64-AVX512:       # %bb.0:
5943; X64-AVX512-NEXT:    vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07]
5944; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5945  %arg0 = bitcast double* %a0 to <2 x double>*
5946  store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0
5947  ret void
5948}
5949
5950define void @test_mm_stream_si32(i32 *%a0, i32 %a1) {
5951; X86-LABEL: test_mm_stream_si32:
5952; X86:       # %bb.0:
5953; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
5954; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
5955; X86-NEXT:    movntil %eax, (%ecx) # encoding: [0x0f,0xc3,0x01]
5956; X86-NEXT:    retl # encoding: [0xc3]
5957;
5958; X64-LABEL: test_mm_stream_si32:
5959; X64:       # %bb.0:
5960; X64-NEXT:    movntil %esi, (%rdi) # encoding: [0x0f,0xc3,0x37]
5961; X64-NEXT:    retq # encoding: [0xc3]
5962  store i32 %a1, i32* %a0, align 1, !nontemporal !0
5963  ret void
5964}
5965
5966define void @test_mm_stream_si128(<2 x i64> *%a0, <2 x i64> %a1) {
5967; X86-SSE-LABEL: test_mm_stream_si128:
5968; X86-SSE:       # %bb.0:
5969; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5970; X86-SSE-NEXT:    movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00]
5971; X86-SSE-NEXT:    retl # encoding: [0xc3]
5972;
5973; X86-AVX1-LABEL: test_mm_stream_si128:
5974; X86-AVX1:       # %bb.0:
5975; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5976; X86-AVX1-NEXT:    vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00]
5977; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5978;
5979; X86-AVX512-LABEL: test_mm_stream_si128:
5980; X86-AVX512:       # %bb.0:
5981; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5982; X86-AVX512-NEXT:    vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00]
5983; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5984;
5985; X64-SSE-LABEL: test_mm_stream_si128:
5986; X64-SSE:       # %bb.0:
5987; X64-SSE-NEXT:    movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07]
5988; X64-SSE-NEXT:    retq # encoding: [0xc3]
5989;
5990; X64-AVX1-LABEL: test_mm_stream_si128:
5991; X64-AVX1:       # %bb.0:
5992; X64-AVX1-NEXT:    vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07]
5993; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5994;
5995; X64-AVX512-LABEL: test_mm_stream_si128:
5996; X64-AVX512:       # %bb.0:
5997; X64-AVX512-NEXT:    vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07]
5998; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5999  store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0
6000  ret void
6001}
6002
6003define <2 x i64> @test_mm_sub_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
6004; SSE-LABEL: test_mm_sub_epi8:
6005; SSE:       # %bb.0:
6006; SSE-NEXT:    psubb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf8,0xc1]
6007; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6008;
6009; AVX1-LABEL: test_mm_sub_epi8:
6010; AVX1:       # %bb.0:
6011; AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
6012; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6013;
6014; AVX512-LABEL: test_mm_sub_epi8:
6015; AVX512:       # %bb.0:
6016; AVX512-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1]
6017; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6018  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
6019  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
6020  %res = sub <16 x i8> %arg0, %arg1
6021  %bc = bitcast <16 x i8> %res to <2 x i64>
6022  ret <2 x i64> %bc
6023}
6024
6025define <2 x i64> @test_mm_sub_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
6026; SSE-LABEL: test_mm_sub_epi16:
6027; SSE:       # %bb.0:
6028; SSE-NEXT:    psubw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf9,0xc1]
6029; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6030;
6031; AVX1-LABEL: test_mm_sub_epi16:
6032; AVX1:       # %bb.0:
6033; AVX1-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf9,0xc1]
6034; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6035;
6036; AVX512-LABEL: test_mm_sub_epi16:
6037; AVX512:       # %bb.0:
6038; AVX512-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0xc1]
6039; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6040  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
6041  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
6042  %res = sub <8 x i16> %arg0, %arg1
6043  %bc = bitcast <8 x i16> %res to <2 x i64>
6044  ret <2 x i64> %bc
6045}
6046
6047define <2 x i64> @test_mm_sub_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
6048; SSE-LABEL: test_mm_sub_epi32:
6049; SSE:       # %bb.0:
6050; SSE-NEXT:    psubd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfa,0xc1]
6051; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6052;
6053; AVX1-LABEL: test_mm_sub_epi32:
6054; AVX1:       # %bb.0:
6055; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfa,0xc1]
6056; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6057;
6058; AVX512-LABEL: test_mm_sub_epi32:
6059; AVX512:       # %bb.0:
6060; AVX512-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0xc1]
6061; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6062  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
6063  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
6064  %res = sub <4 x i32> %arg0, %arg1
6065  %bc = bitcast <4 x i32> %res to <2 x i64>
6066  ret <2 x i64> %bc
6067}
6068
6069define <2 x i64> @test_mm_sub_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
6070; SSE-LABEL: test_mm_sub_epi64:
6071; SSE:       # %bb.0:
6072; SSE-NEXT:    psubq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfb,0xc1]
6073; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6074;
6075; AVX1-LABEL: test_mm_sub_epi64:
6076; AVX1:       # %bb.0:
6077; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
6078; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6079;
6080; AVX512-LABEL: test_mm_sub_epi64:
6081; AVX512:       # %bb.0:
6082; AVX512-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
6083; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6084  %res = sub <2 x i64> %a0, %a1
6085  ret <2 x i64> %res
6086}
6087
6088define <2 x double> @test_mm_sub_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
6089; SSE-LABEL: test_mm_sub_pd:
6090; SSE:       # %bb.0:
6091; SSE-NEXT:    subpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5c,0xc1]
6092; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6093;
6094; AVX1-LABEL: test_mm_sub_pd:
6095; AVX1:       # %bb.0:
6096; AVX1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5c,0xc1]
6097; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6098;
6099; AVX512-LABEL: test_mm_sub_pd:
6100; AVX512:       # %bb.0:
6101; AVX512-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5c,0xc1]
6102; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6103  %res = fsub <2 x double> %a0, %a1
6104  ret <2 x double> %res
6105}
6106
6107define <2 x double> @test_mm_sub_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
6108; SSE-LABEL: test_mm_sub_sd:
6109; SSE:       # %bb.0:
6110; SSE-NEXT:    subsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5c,0xc1]
6111; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6112;
6113; AVX1-LABEL: test_mm_sub_sd:
6114; AVX1:       # %bb.0:
6115; AVX1-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5c,0xc1]
6116; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6117;
6118; AVX512-LABEL: test_mm_sub_sd:
6119; AVX512:       # %bb.0:
6120; AVX512-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5c,0xc1]
6121; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6122  %ext0 = extractelement <2 x double> %a0, i32 0
6123  %ext1 = extractelement <2 x double> %a1, i32 0
6124  %fsub = fsub double %ext0, %ext1
6125  %res = insertelement <2 x double> %a0, double %fsub, i32 0
6126  ret <2 x double> %res
6127}
6128
6129define <2 x i64> @test_mm_subs_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
6130; SSE-LABEL: test_mm_subs_epi8:
6131; SSE:       # %bb.0:
6132; SSE-NEXT:    psubsb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe8,0xc1]
6133; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6134;
6135; AVX1-LABEL: test_mm_subs_epi8:
6136; AVX1:       # %bb.0:
6137; AVX1-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe8,0xc1]
6138; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6139;
6140; AVX512-LABEL: test_mm_subs_epi8:
6141; AVX512:       # %bb.0:
6142; AVX512-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1]
6143; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6144  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
6145  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
6146  %res = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %arg0, <16 x i8> %arg1)
6147  %bc = bitcast <16 x i8> %res to <2 x i64>
6148  ret <2 x i64> %bc
6149}
6150declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
6151
6152define <2 x i64> @test_mm_subs_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
6153; SSE-LABEL: test_mm_subs_epi16:
6154; SSE:       # %bb.0:
6155; SSE-NEXT:    psubsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe9,0xc1]
6156; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6157;
6158; AVX1-LABEL: test_mm_subs_epi16:
6159; AVX1:       # %bb.0:
6160; AVX1-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe9,0xc1]
6161; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6162;
6163; AVX512-LABEL: test_mm_subs_epi16:
6164; AVX512:       # %bb.0:
6165; AVX512-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1]
6166; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6167  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
6168  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
6169  %res = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %arg0, <8 x i16> %arg1)
6170  %bc = bitcast <8 x i16> %res to <2 x i64>
6171  ret <2 x i64> %bc
6172}
6173declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
6174
6175define <2 x i64> @test_mm_subs_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
6176; SSE-LABEL: test_mm_subs_epu8:
6177; SSE:       # %bb.0:
6178; SSE-NEXT:    psubusb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd8,0xc1]
6179; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6180;
6181; AVX1-LABEL: test_mm_subs_epu8:
6182; AVX1:       # %bb.0:
6183; AVX1-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd8,0xc1]
6184; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6185;
6186; AVX512-LABEL: test_mm_subs_epu8:
6187; AVX512:       # %bb.0:
6188; AVX512-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1]
6189; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6190  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
6191  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
6192  %res = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %arg0, <16 x i8> %arg1)
6193  %bc = bitcast <16 x i8> %res to <2 x i64>
6194  ret <2 x i64> %bc
6195}
6196declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>)
6197
6198define <2 x i64> @test_mm_subs_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
6199; SSE-LABEL: test_mm_subs_epu16:
6200; SSE:       # %bb.0:
6201; SSE-NEXT:    psubusw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd9,0xc1]
6202; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6203;
6204; AVX1-LABEL: test_mm_subs_epu16:
6205; AVX1:       # %bb.0:
6206; AVX1-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd9,0xc1]
6207; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6208;
6209; AVX512-LABEL: test_mm_subs_epu16:
6210; AVX512:       # %bb.0:
6211; AVX512-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1]
6212; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6213  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
6214  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
6215  %res = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %arg0, <8 x i16> %arg1)
6216  %bc = bitcast <8 x i16> %res to <2 x i64>
6217  ret <2 x i64> %bc
6218}
6219declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>)
6220
6221define i32 @test_mm_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
6222; SSE-LABEL: test_mm_ucomieq_sd:
6223; SSE:       # %bb.0:
6224; SSE-NEXT:    ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1]
6225; SSE-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0]
6226; SSE-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1]
6227; SSE-NEXT:    andb %al, %cl # encoding: [0x20,0xc1]
6228; SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
6229; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6230;
6231; AVX1-LABEL: test_mm_ucomieq_sd:
6232; AVX1:       # %bb.0:
6233; AVX1-NEXT:    vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1]
6234; AVX1-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0]
6235; AVX1-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1]
6236; AVX1-NEXT:    andb %al, %cl # encoding: [0x20,0xc1]
6237; AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
6238; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6239;
6240; AVX512-LABEL: test_mm_ucomieq_sd:
6241; AVX512:       # %bb.0:
6242; AVX512-NEXT:    vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
6243; AVX512-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0]
6244; AVX512-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1]
6245; AVX512-NEXT:    andb %al, %cl # encoding: [0x20,0xc1]
6246; AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
6247; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6248  %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
6249  ret i32 %res
6250}
6251declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
6252
6253define i32 @test_mm_ucomige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
6254; SSE-LABEL: test_mm_ucomige_sd:
6255; SSE:       # %bb.0:
6256; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6257; SSE-NEXT:    ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1]
6258; SSE-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
6259; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6260;
6261; AVX1-LABEL: test_mm_ucomige_sd:
6262; AVX1:       # %bb.0:
6263; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6264; AVX1-NEXT:    vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1]
6265; AVX1-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
6266; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6267;
6268; AVX512-LABEL: test_mm_ucomige_sd:
6269; AVX512:       # %bb.0:
6270; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6271; AVX512-NEXT:    vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
6272; AVX512-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
6273; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6274  %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1)
6275  ret i32 %res
6276}
6277declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
6278
6279define i32 @test_mm_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
6280; SSE-LABEL: test_mm_ucomigt_sd:
6281; SSE:       # %bb.0:
6282; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6283; SSE-NEXT:    ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1]
6284; SSE-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
6285; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6286;
6287; AVX1-LABEL: test_mm_ucomigt_sd:
6288; AVX1:       # %bb.0:
6289; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6290; AVX1-NEXT:    vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1]
6291; AVX1-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
6292; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6293;
6294; AVX512-LABEL: test_mm_ucomigt_sd:
6295; AVX512:       # %bb.0:
6296; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6297; AVX512-NEXT:    vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
6298; AVX512-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
6299; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6300  %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1)
6301  ret i32 %res
6302}
6303declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
6304
6305define i32 @test_mm_ucomile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
6306; SSE-LABEL: test_mm_ucomile_sd:
6307; SSE:       # %bb.0:
6308; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6309; SSE-NEXT:    ucomisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2e,0xc8]
6310; SSE-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
6311; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6312;
6313; AVX1-LABEL: test_mm_ucomile_sd:
6314; AVX1:       # %bb.0:
6315; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6316; AVX1-NEXT:    vucomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2e,0xc8]
6317; AVX1-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
6318; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6319;
6320; AVX512-LABEL: test_mm_ucomile_sd:
6321; AVX512:       # %bb.0:
6322; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6323; AVX512-NEXT:    vucomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8]
6324; AVX512-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
6325; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6326  %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1)
6327  ret i32 %res
6328}
6329declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
6330
6331define i32 @test_mm_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
6332; SSE-LABEL: test_mm_ucomilt_sd:
6333; SSE:       # %bb.0:
6334; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6335; SSE-NEXT:    ucomisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2e,0xc8]
6336; SSE-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
6337; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6338;
6339; AVX1-LABEL: test_mm_ucomilt_sd:
6340; AVX1:       # %bb.0:
6341; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6342; AVX1-NEXT:    vucomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2e,0xc8]
6343; AVX1-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
6344; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6345;
6346; AVX512-LABEL: test_mm_ucomilt_sd:
6347; AVX512:       # %bb.0:
6348; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6349; AVX512-NEXT:    vucomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8]
6350; AVX512-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
6351; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6352  %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1)
6353  ret i32 %res
6354}
6355declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
6356
6357define i32 @test_mm_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
6358; SSE-LABEL: test_mm_ucomineq_sd:
6359; SSE:       # %bb.0:
6360; SSE-NEXT:    ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1]
6361; SSE-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0]
6362; SSE-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1]
6363; SSE-NEXT:    orb %al, %cl # encoding: [0x08,0xc1]
6364; SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
6365; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6366;
6367; AVX1-LABEL: test_mm_ucomineq_sd:
6368; AVX1:       # %bb.0:
6369; AVX1-NEXT:    vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1]
6370; AVX1-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0]
6371; AVX1-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1]
6372; AVX1-NEXT:    orb %al, %cl # encoding: [0x08,0xc1]
6373; AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
6374; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6375;
6376; AVX512-LABEL: test_mm_ucomineq_sd:
6377; AVX512:       # %bb.0:
6378; AVX512-NEXT:    vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
6379; AVX512-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0]
6380; AVX512-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1]
6381; AVX512-NEXT:    orb %al, %cl # encoding: [0x08,0xc1]
6382; AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
6383; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6384  %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1)
6385  ret i32 %res
6386}
6387declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
6388
6389define <2 x double> @test_mm_undefined_pd() {
6390; CHECK-LABEL: test_mm_undefined_pd:
6391; CHECK:       # %bb.0:
6392; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6393  ret <2 x double> undef
6394}
6395
6396define <2 x i64> @test_mm_undefined_si128() {
6397; CHECK-LABEL: test_mm_undefined_si128:
6398; CHECK:       # %bb.0:
6399; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6400  ret <2 x i64> undef
6401}
6402
6403define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) {
6404; SSE-LABEL: test_mm_unpackhi_epi8:
6405; SSE:       # %bb.0:
6406; SSE-NEXT:    punpckhbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x68,0xc1]
6407; SSE-NEXT:    # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
6408; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6409;
6410; AVX1-LABEL: test_mm_unpackhi_epi8:
6411; AVX1:       # %bb.0:
6412; AVX1-NEXT:    vpunpckhbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x68,0xc1]
6413; AVX1-NEXT:    # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
6414; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6415;
6416; AVX512-LABEL: test_mm_unpackhi_epi8:
6417; AVX512:       # %bb.0:
6418; AVX512-NEXT:    vpunpckhbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x68,0xc1]
6419; AVX512-NEXT:    # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
6420; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6421  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
6422  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
6423  %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
6424  %bc = bitcast <16 x i8> %res to <2 x i64>
6425  ret <2 x i64> %bc
6426}
6427
6428define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
6429; SSE-LABEL: test_mm_unpackhi_epi16:
6430; SSE:       # %bb.0:
6431; SSE-NEXT:    punpckhwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x69,0xc1]
6432; SSE-NEXT:    # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
6433; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6434;
6435; AVX1-LABEL: test_mm_unpackhi_epi16:
6436; AVX1:       # %bb.0:
6437; AVX1-NEXT:    vpunpckhwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x69,0xc1]
6438; AVX1-NEXT:    # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
6439; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6440;
6441; AVX512-LABEL: test_mm_unpackhi_epi16:
6442; AVX512:       # %bb.0:
6443; AVX512-NEXT:    vpunpckhwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x69,0xc1]
6444; AVX512-NEXT:    # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
6445; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6446  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
6447  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
6448  %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
6449  %bc = bitcast <8 x i16> %res to <2 x i64>
6450  ret <2 x i64> %bc
6451}
6452
6453define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) {
6454; SSE-LABEL: test_mm_unpackhi_epi32:
6455; SSE:       # %bb.0:
6456; SSE-NEXT:    unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1]
6457; SSE-NEXT:    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6458; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6459;
6460; AVX1-LABEL: test_mm_unpackhi_epi32:
6461; AVX1:       # %bb.0:
6462; AVX1-NEXT:    vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1]
6463; AVX1-NEXT:    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6464; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6465;
6466; AVX512-LABEL: test_mm_unpackhi_epi32:
6467; AVX512:       # %bb.0:
6468; AVX512-NEXT:    vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1]
6469; AVX512-NEXT:    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6470; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6471  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
6472  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
6473  %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
6474  %bc = bitcast <4 x i32> %res to <2 x i64>
6475  ret <2 x i64> %bc
6476}
6477
6478define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) {
6479; SSE-LABEL: test_mm_unpackhi_epi64:
6480; SSE:       # %bb.0:
6481; SSE-NEXT:    unpckhpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x15,0xc1]
6482; SSE-NEXT:    # xmm0 = xmm0[1],xmm1[1]
6483; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6484;
6485; AVX1-LABEL: test_mm_unpackhi_epi64:
6486; AVX1:       # %bb.0:
6487; AVX1-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1]
6488; AVX1-NEXT:    # xmm0 = xmm0[1],xmm1[1]
6489; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6490;
6491; AVX512-LABEL: test_mm_unpackhi_epi64:
6492; AVX512:       # %bb.0:
6493; AVX512-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1]
6494; AVX512-NEXT:    # xmm0 = xmm0[1],xmm1[1]
6495; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6496  %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
6497  ret <2 x i64> %res
6498}
6499
6500define <2 x double> @test_mm_unpackhi_pd(<2 x double> %a0, <2 x double> %a1) {
6501; SSE-LABEL: test_mm_unpackhi_pd:
6502; SSE:       # %bb.0:
6503; SSE-NEXT:    unpckhpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x15,0xc1]
6504; SSE-NEXT:    # xmm0 = xmm0[1],xmm1[1]
6505; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6506;
6507; AVX1-LABEL: test_mm_unpackhi_pd:
6508; AVX1:       # %bb.0:
6509; AVX1-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1]
6510; AVX1-NEXT:    # xmm0 = xmm0[1],xmm1[1]
6511; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6512;
6513; AVX512-LABEL: test_mm_unpackhi_pd:
6514; AVX512:       # %bb.0:
6515; AVX512-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1]
6516; AVX512-NEXT:    # xmm0 = xmm0[1],xmm1[1]
6517; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6518  %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
6519  ret <2 x double> %res
6520}
6521
6522define <2 x i64> @test_mm_unpacklo_epi8(<2 x i64> %a0, <2 x i64> %a1) {
6523; SSE-LABEL: test_mm_unpacklo_epi8:
6524; SSE:       # %bb.0:
6525; SSE-NEXT:    punpcklbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x60,0xc1]
6526; SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
6527; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6528;
6529; AVX1-LABEL: test_mm_unpacklo_epi8:
6530; AVX1:       # %bb.0:
6531; AVX1-NEXT:    vpunpcklbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x60,0xc1]
6532; AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
6533; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6534;
6535; AVX512-LABEL: test_mm_unpacklo_epi8:
6536; AVX512:       # %bb.0:
6537; AVX512-NEXT:    vpunpcklbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x60,0xc1]
6538; AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
6539; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6540  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
6541  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
6542  %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
6543  %bc = bitcast <16 x i8> %res to <2 x i64>
6544  ret <2 x i64> %bc
6545}
6546
6547define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
6548; SSE-LABEL: test_mm_unpacklo_epi16:
6549; SSE:       # %bb.0:
6550; SSE-NEXT:    punpcklwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x61,0xc1]
6551; SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6552; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6553;
6554; AVX1-LABEL: test_mm_unpacklo_epi16:
6555; AVX1:       # %bb.0:
6556; AVX1-NEXT:    vpunpcklwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x61,0xc1]
6557; AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6558; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6559;
6560; AVX512-LABEL: test_mm_unpacklo_epi16:
6561; AVX512:       # %bb.0:
6562; AVX512-NEXT:    vpunpcklwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x61,0xc1]
6563; AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6564; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6565  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
6566  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
6567  %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
6568  %bc = bitcast <8 x i16> %res to <2 x i64>
6569  ret <2 x i64> %bc
6570}
6571
6572define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) {
6573; SSE-LABEL: test_mm_unpacklo_epi32:
6574; SSE:       # %bb.0:
6575; SSE-NEXT:    unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1]
6576; SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
6577; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6578;
6579; AVX1-LABEL: test_mm_unpacklo_epi32:
6580; AVX1:       # %bb.0:
6581; AVX1-NEXT:    vunpcklps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x14,0xc1]
6582; AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
6583; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6584;
6585; AVX512-LABEL: test_mm_unpacklo_epi32:
6586; AVX512:       # %bb.0:
6587; AVX512-NEXT:    vunpcklps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xc1]
6588; AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
6589; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6590  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
6591  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
6592  %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
6593  %bc = bitcast <4 x i32> %res to <2 x i64>
6594  ret <2 x i64> %bc
6595}
6596
6597define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) {
6598; SSE-LABEL: test_mm_unpacklo_epi64:
6599; SSE:       # %bb.0:
6600; SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
6601; SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
6602; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6603;
6604; AVX1-LABEL: test_mm_unpacklo_epi64:
6605; AVX1:       # %bb.0:
6606; AVX1-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1]
6607; AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0]
6608; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6609;
6610; AVX512-LABEL: test_mm_unpacklo_epi64:
6611; AVX512:       # %bb.0:
6612; AVX512-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
6613; AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0]
6614; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6615  %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
6616  ret <2 x i64> %res
6617}
6618
6619define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) {
6620; SSE-LABEL: test_mm_unpacklo_pd:
6621; SSE:       # %bb.0:
6622; SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
6623; SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
6624; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6625;
6626; AVX1-LABEL: test_mm_unpacklo_pd:
6627; AVX1:       # %bb.0:
6628; AVX1-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1]
6629; AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0]
6630; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6631;
6632; AVX512-LABEL: test_mm_unpacklo_pd:
6633; AVX512:       # %bb.0:
6634; AVX512-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
6635; AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0]
6636; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6637  %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
6638  ret <2 x double> %res
6639}
6640
6641define <2 x double> @test_mm_xor_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
6642; SSE-LABEL: test_mm_xor_pd:
6643; SSE:       # %bb.0:
6644; SSE-NEXT:    xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1]
6645; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6646;
6647; AVX1-LABEL: test_mm_xor_pd:
6648; AVX1:       # %bb.0:
6649; AVX1-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1]
6650; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6651;
6652; AVX512-LABEL: test_mm_xor_pd:
6653; AVX512:       # %bb.0:
6654; AVX512-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1]
6655; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6656  %arg0 = bitcast <2 x double> %a0 to <4 x i32>
6657  %arg1 = bitcast <2 x double> %a1 to <4 x i32>
6658  %res = xor <4 x i32> %arg0, %arg1
6659  %bc = bitcast <4 x i32> %res to <2 x double>
6660  ret <2 x double> %bc
6661}
6662
6663define <2 x i64> @test_mm_xor_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
6664; SSE-LABEL: test_mm_xor_si128:
6665; SSE:       # %bb.0:
6666; SSE-NEXT:    xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1]
6667; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6668;
6669; AVX1-LABEL: test_mm_xor_si128:
6670; AVX1:       # %bb.0:
6671; AVX1-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1]
6672; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6673;
6674; AVX512-LABEL: test_mm_xor_si128:
6675; AVX512:       # %bb.0:
6676; AVX512-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1]
6677; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6678  %res = xor <2 x i64> %a0, %a1
6679  ret <2 x i64> %res
6680}
6681
6682!0 = !{i32 1}
6683
6684