1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse2 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE
3; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,AVX1,X86-AVX1
4; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,AVX512,X86-AVX512
5; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse2 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE
6; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,AVX1,X64-AVX1
7; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,AVX512,X64-AVX512
8
9define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
10; SSE-LABEL: test_x86_sse2_cmp_pd:
11; SSE:       ## %bb.0:
12; SSE-NEXT:    cmpordpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xc2,0xc1,0x07]
13; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
14;
15; AVX-LABEL: test_x86_sse2_cmp_pd:
16; AVX:       ## %bb.0:
17; AVX-NEXT:    vcmpordpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc2,0xc1,0x07]
18; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
19  %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
20  ret <2 x double> %res
21}
22declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
23
24
25define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
26; SSE-LABEL: test_x86_sse2_cmp_sd:
27; SSE:       ## %bb.0:
28; SSE-NEXT:    cmpordsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0xc2,0xc1,0x07]
29; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
30;
31; AVX-LABEL: test_x86_sse2_cmp_sd:
32; AVX:       ## %bb.0:
33; AVX-NEXT:    vcmpordsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0xc2,0xc1,0x07]
34; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
35  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
36  ret <2 x double> %res
37}
38declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
39
40
41define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
42; SSE-LABEL: test_x86_sse2_comieq_sd:
43; SSE:       ## %bb.0:
44; SSE-NEXT:    comisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2f,0xc1]
45; SSE-NEXT:    setnp %al ## encoding: [0x0f,0x9b,0xc0]
46; SSE-NEXT:    sete %cl ## encoding: [0x0f,0x94,0xc1]
47; SSE-NEXT:    andb %al, %cl ## encoding: [0x20,0xc1]
48; SSE-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
49; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
50;
51; AVX1-LABEL: test_x86_sse2_comieq_sd:
52; AVX1:       ## %bb.0:
53; AVX1-NEXT:    vcomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2f,0xc1]
54; AVX1-NEXT:    setnp %al ## encoding: [0x0f,0x9b,0xc0]
55; AVX1-NEXT:    sete %cl ## encoding: [0x0f,0x94,0xc1]
56; AVX1-NEXT:    andb %al, %cl ## encoding: [0x20,0xc1]
57; AVX1-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
58; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
59;
60; AVX512-LABEL: test_x86_sse2_comieq_sd:
61; AVX512:       ## %bb.0:
62; AVX512-NEXT:    vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
63; AVX512-NEXT:    setnp %al ## encoding: [0x0f,0x9b,0xc0]
64; AVX512-NEXT:    sete %cl ## encoding: [0x0f,0x94,0xc1]
65; AVX512-NEXT:    andb %al, %cl ## encoding: [0x20,0xc1]
66; AVX512-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
67; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
68  %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
69  ret i32 %res
70}
71declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
72
73
74define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
75; SSE-LABEL: test_x86_sse2_comige_sd:
76; SSE:       ## %bb.0:
77; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
78; SSE-NEXT:    comisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2f,0xc1]
79; SSE-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
80; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
81;
82; AVX1-LABEL: test_x86_sse2_comige_sd:
83; AVX1:       ## %bb.0:
84; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
85; AVX1-NEXT:    vcomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2f,0xc1]
86; AVX1-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
87; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
88;
89; AVX512-LABEL: test_x86_sse2_comige_sd:
90; AVX512:       ## %bb.0:
91; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
92; AVX512-NEXT:    vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
93; AVX512-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
94; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
95  %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
96  ret i32 %res
97}
98declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
99
100
101define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
102; SSE-LABEL: test_x86_sse2_comigt_sd:
103; SSE:       ## %bb.0:
104; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
105; SSE-NEXT:    comisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2f,0xc1]
106; SSE-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
107; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
108;
109; AVX1-LABEL: test_x86_sse2_comigt_sd:
110; AVX1:       ## %bb.0:
111; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
112; AVX1-NEXT:    vcomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2f,0xc1]
113; AVX1-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
114; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
115;
116; AVX512-LABEL: test_x86_sse2_comigt_sd:
117; AVX512:       ## %bb.0:
118; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
119; AVX512-NEXT:    vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
120; AVX512-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
121; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
122  %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
123  ret i32 %res
124}
125declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
126
127
128define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
129; SSE-LABEL: test_x86_sse2_comile_sd:
130; SSE:       ## %bb.0:
131; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
132; SSE-NEXT:    comisd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x2f,0xc8]
133; SSE-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
134; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
135;
136; AVX1-LABEL: test_x86_sse2_comile_sd:
137; AVX1:       ## %bb.0:
138; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
139; AVX1-NEXT:    vcomisd %xmm0, %xmm1 ## encoding: [0xc5,0xf9,0x2f,0xc8]
140; AVX1-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
141; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
142;
143; AVX512-LABEL: test_x86_sse2_comile_sd:
144; AVX512:       ## %bb.0:
145; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
146; AVX512-NEXT:    vcomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8]
147; AVX512-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
148; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
149  %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
150  ret i32 %res
151}
152declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
153
154
155define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
156; SSE-LABEL: test_x86_sse2_comilt_sd:
157; SSE:       ## %bb.0:
158; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
159; SSE-NEXT:    comisd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x2f,0xc8]
160; SSE-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
161; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
162;
163; AVX1-LABEL: test_x86_sse2_comilt_sd:
164; AVX1:       ## %bb.0:
165; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
166; AVX1-NEXT:    vcomisd %xmm0, %xmm1 ## encoding: [0xc5,0xf9,0x2f,0xc8]
167; AVX1-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
168; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
169;
170; AVX512-LABEL: test_x86_sse2_comilt_sd:
171; AVX512:       ## %bb.0:
172; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
173; AVX512-NEXT:    vcomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8]
174; AVX512-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
175; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
176  %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
177  ret i32 %res
178}
179declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
180
181
182define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
183; SSE-LABEL: test_x86_sse2_comineq_sd:
184; SSE:       ## %bb.0:
185; SSE-NEXT:    comisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2f,0xc1]
186; SSE-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
187; SSE-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
188; SSE-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
189; SSE-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
190; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
191;
192; AVX1-LABEL: test_x86_sse2_comineq_sd:
193; AVX1:       ## %bb.0:
194; AVX1-NEXT:    vcomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2f,0xc1]
195; AVX1-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
196; AVX1-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
197; AVX1-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
198; AVX1-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
199; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
200;
201; AVX512-LABEL: test_x86_sse2_comineq_sd:
202; AVX512:       ## %bb.0:
203; AVX512-NEXT:    vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
204; AVX512-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
205; AVX512-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
206; AVX512-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
207; AVX512-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
208; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
209  %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
210  ret i32 %res
211}
212declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
213
214
215define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
216; SSE-LABEL: test_x86_sse2_cvtpd2dq:
217; SSE:       ## %bb.0:
218; SSE-NEXT:    cvtpd2dq %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0xe6,0xc0]
219; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
220;
221; AVX1-LABEL: test_x86_sse2_cvtpd2dq:
222; AVX1:       ## %bb.0:
223; AVX1-NEXT:    vcvtpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0xe6,0xc0]
224; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
225;
226; AVX512-LABEL: test_x86_sse2_cvtpd2dq:
227; AVX512:       ## %bb.0:
228; AVX512-NEXT:    vcvtpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
229; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
230  %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
231  ret <4 x i32> %res
232}
233declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
234
235
236define <2 x i64> @test_mm_cvtpd_epi32_zext(<2 x double> %a0) nounwind {
237; SSE-LABEL: test_mm_cvtpd_epi32_zext:
238; SSE:       ## %bb.0:
239; SSE-NEXT:    cvtpd2dq %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0xe6,0xc0]
240; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
241;
242; AVX1-LABEL: test_mm_cvtpd_epi32_zext:
243; AVX1:       ## %bb.0:
244; AVX1-NEXT:    vcvtpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0xe6,0xc0]
245; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
246;
247; AVX512-LABEL: test_mm_cvtpd_epi32_zext:
248; AVX512:       ## %bb.0:
249; AVX512-NEXT:    vcvtpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
250; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
251  %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
252  %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
253  %bc = bitcast <4 x i32> %res to <2 x i64>
254  ret <2 x i64> %bc
255}
256
257
258define <2 x i64> @test_mm_cvtpd_epi32_zext_load(<2 x double>* %p0) nounwind {
259; X86-SSE-LABEL: test_mm_cvtpd_epi32_zext_load:
260; X86-SSE:       ## %bb.0:
261; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
262; X86-SSE-NEXT:    cvtpd2dq (%eax), %xmm0 ## encoding: [0xf2,0x0f,0xe6,0x00]
263; X86-SSE-NEXT:    retl ## encoding: [0xc3]
264;
265; X86-AVX1-LABEL: test_mm_cvtpd_epi32_zext_load:
266; X86-AVX1:       ## %bb.0:
267; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
268; X86-AVX1-NEXT:    vcvtpd2dqx (%eax), %xmm0 ## encoding: [0xc5,0xfb,0xe6,0x00]
269; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
270;
271; X86-AVX512-LABEL: test_mm_cvtpd_epi32_zext_load:
272; X86-AVX512:       ## %bb.0:
273; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
274; X86-AVX512-NEXT:    vcvtpd2dqx (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0x00]
275; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
276;
277; X64-SSE-LABEL: test_mm_cvtpd_epi32_zext_load:
278; X64-SSE:       ## %bb.0:
279; X64-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 ## encoding: [0xf2,0x0f,0xe6,0x07]
280; X64-SSE-NEXT:    retq ## encoding: [0xc3]
281;
282; X64-AVX1-LABEL: test_mm_cvtpd_epi32_zext_load:
283; X64-AVX1:       ## %bb.0:
284; X64-AVX1-NEXT:    vcvtpd2dqx (%rdi), %xmm0 ## encoding: [0xc5,0xfb,0xe6,0x07]
285; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
286;
287; X64-AVX512-LABEL: test_mm_cvtpd_epi32_zext_load:
288; X64-AVX512:       ## %bb.0:
289; X64-AVX512-NEXT:    vcvtpd2dqx (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0x07]
290; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
291  %a0 = load <2 x double>, <2 x double>* %p0
292  %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
293  %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
294  %bc = bitcast <4 x i32> %res to <2 x i64>
295  ret <2 x i64> %bc
296}
297
298
299define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
300; SSE-LABEL: test_x86_sse2_cvtpd2ps:
301; SSE:       ## %bb.0:
302; SSE-NEXT:    cvtpd2ps %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x5a,0xc0]
303; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
304;
305; AVX1-LABEL: test_x86_sse2_cvtpd2ps:
306; AVX1:       ## %bb.0:
307; AVX1-NEXT:    vcvtpd2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5a,0xc0]
308; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
309;
310; AVX512-LABEL: test_x86_sse2_cvtpd2ps:
311; AVX512:       ## %bb.0:
312; AVX512-NEXT:    vcvtpd2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
313; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
314  %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
315  ret <4 x float> %res
316}
317declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
318
319define <4 x float> @test_x86_sse2_cvtpd2ps_zext(<2 x double> %a0) nounwind {
320; SSE-LABEL: test_x86_sse2_cvtpd2ps_zext:
321; SSE:       ## %bb.0:
322; SSE-NEXT:    cvtpd2ps %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x5a,0xc0]
323; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
324;
325; AVX1-LABEL: test_x86_sse2_cvtpd2ps_zext:
326; AVX1:       ## %bb.0:
327; AVX1-NEXT:    vcvtpd2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5a,0xc0]
328; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
329;
330; AVX512-LABEL: test_x86_sse2_cvtpd2ps_zext:
331; AVX512:       ## %bb.0:
332; AVX512-NEXT:    vcvtpd2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
333; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
334  %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
335  %res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
336  ret <4 x float> %res
337}
338
339define <4 x float> @test_x86_sse2_cvtpd2ps_zext_load(<2 x double>* %p0) nounwind {
340; X86-SSE-LABEL: test_x86_sse2_cvtpd2ps_zext_load:
341; X86-SSE:       ## %bb.0:
342; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
343; X86-SSE-NEXT:    cvtpd2ps (%eax), %xmm0 ## encoding: [0x66,0x0f,0x5a,0x00]
344; X86-SSE-NEXT:    retl ## encoding: [0xc3]
345;
346; X86-AVX1-LABEL: test_x86_sse2_cvtpd2ps_zext_load:
347; X86-AVX1:       ## %bb.0:
348; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
349; X86-AVX1-NEXT:    vcvtpd2psx (%eax), %xmm0 ## encoding: [0xc5,0xf9,0x5a,0x00]
350; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
351;
352; X86-AVX512-LABEL: test_x86_sse2_cvtpd2ps_zext_load:
353; X86-AVX512:       ## %bb.0:
354; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
355; X86-AVX512-NEXT:    vcvtpd2psx (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0x00]
356; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
357;
358; X64-SSE-LABEL: test_x86_sse2_cvtpd2ps_zext_load:
359; X64-SSE:       ## %bb.0:
360; X64-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x5a,0x07]
361; X64-SSE-NEXT:    retq ## encoding: [0xc3]
362;
363; X64-AVX1-LABEL: test_x86_sse2_cvtpd2ps_zext_load:
364; X64-AVX1:       ## %bb.0:
365; X64-AVX1-NEXT:    vcvtpd2psx (%rdi), %xmm0 ## encoding: [0xc5,0xf9,0x5a,0x07]
366; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
367;
368; X64-AVX512-LABEL: test_x86_sse2_cvtpd2ps_zext_load:
369; X64-AVX512:       ## %bb.0:
370; X64-AVX512-NEXT:    vcvtpd2psx (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0x07]
371; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
372  %a0 = load <2 x double>, <2 x double>* %p0
373  %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
374  %res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
375  ret <4 x float> %res
376}
377
378define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
379; SSE-LABEL: test_x86_sse2_cvtps2dq:
380; SSE:       ## %bb.0:
381; SSE-NEXT:    cvtps2dq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x5b,0xc0]
382; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
383;
384; AVX1-LABEL: test_x86_sse2_cvtps2dq:
385; AVX1:       ## %bb.0:
386; AVX1-NEXT:    vcvtps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5b,0xc0]
387; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
388;
389; AVX512-LABEL: test_x86_sse2_cvtps2dq:
390; AVX512:       ## %bb.0:
391; AVX512-NEXT:    vcvtps2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5b,0xc0]
392; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
393  %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
394  ret <4 x i32> %res
395}
396declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
397
398
399define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
400; SSE-LABEL: test_x86_sse2_cvtsd2si:
401; SSE:       ## %bb.0:
402; SSE-NEXT:    cvtsd2si %xmm0, %eax ## encoding: [0xf2,0x0f,0x2d,0xc0]
403; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
404;
405; AVX1-LABEL: test_x86_sse2_cvtsd2si:
406; AVX1:       ## %bb.0:
407; AVX1-NEXT:    vcvtsd2si %xmm0, %eax ## encoding: [0xc5,0xfb,0x2d,0xc0]
408; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
409;
410; AVX512-LABEL: test_x86_sse2_cvtsd2si:
411; AVX512:       ## %bb.0:
412; AVX512-NEXT:    vcvtsd2si %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2d,0xc0]
413; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
414  %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
415  ret i32 %res
416}
417declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
418
419
420define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
421; SSE-LABEL: test_x86_sse2_cvtsd2ss:
422; SSE:       ## %bb.0:
423; SSE-NEXT:    cvtsd2ss %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5a,0xc1]
424; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
425;
426; AVX1-LABEL: test_x86_sse2_cvtsd2ss:
427; AVX1:       ## %bb.0:
428; AVX1-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0xc1]
429; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
430;
431; AVX512-LABEL: test_x86_sse2_cvtsd2ss:
432; AVX512:       ## %bb.0:
433; AVX512-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0xc1]
434; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
435  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
436  ret <4 x float> %res
437}
438declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
439
440
441define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, <2 x double>* %p1) {
442; X86-SSE-LABEL: test_x86_sse2_cvtsd2ss_load:
443; X86-SSE:       ## %bb.0:
444; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
445; X86-SSE-NEXT:    cvtsd2ss (%eax), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x00]
446; X86-SSE-NEXT:    retl ## encoding: [0xc3]
447;
448; X86-AVX1-LABEL: test_x86_sse2_cvtsd2ss_load:
449; X86-AVX1:       ## %bb.0:
450; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
451; X86-AVX1-NEXT:    vcvtsd2ss (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x00]
452; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
453;
454; X86-AVX512-LABEL: test_x86_sse2_cvtsd2ss_load:
455; X86-AVX512:       ## %bb.0:
456; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
457; X86-AVX512-NEXT:    vcvtsd2ss (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x00]
458; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
459;
460; X64-SSE-LABEL: test_x86_sse2_cvtsd2ss_load:
461; X64-SSE:       ## %bb.0:
462; X64-SSE-NEXT:    cvtsd2ss (%rdi), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x07]
463; X64-SSE-NEXT:    retq ## encoding: [0xc3]
464;
465; X64-AVX1-LABEL: test_x86_sse2_cvtsd2ss_load:
466; X64-AVX1:       ## %bb.0:
467; X64-AVX1-NEXT:    vcvtsd2ss (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x07]
468; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
469;
470; X64-AVX512-LABEL: test_x86_sse2_cvtsd2ss_load:
471; X64-AVX512:       ## %bb.0:
472; X64-AVX512-NEXT:    vcvtsd2ss (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x07]
473; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
474  %a1 = load <2 x double>, <2 x double>* %p1
475  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
476  ret <4 x float> %res
477}
478
479
480define <4 x float> @test_x86_sse2_cvtsd2ss_load_optsize(<4 x float> %a0, <2 x double>* %p1) optsize {
481; X86-SSE-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
482; X86-SSE:       ## %bb.0:
483; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
484; X86-SSE-NEXT:    cvtsd2ss (%eax), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x00]
485; X86-SSE-NEXT:    retl ## encoding: [0xc3]
486;
487; X86-AVX1-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
488; X86-AVX1:       ## %bb.0:
489; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
490; X86-AVX1-NEXT:    vcvtsd2ss (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x00]
491; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
492;
493; X86-AVX512-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
494; X86-AVX512:       ## %bb.0:
495; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
496; X86-AVX512-NEXT:    vcvtsd2ss (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x00]
497; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
498;
499; X64-SSE-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
500; X64-SSE:       ## %bb.0:
501; X64-SSE-NEXT:    cvtsd2ss (%rdi), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x07]
502; X64-SSE-NEXT:    retq ## encoding: [0xc3]
503;
504; X64-AVX1-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
505; X64-AVX1:       ## %bb.0:
506; X64-AVX1-NEXT:    vcvtsd2ss (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x07]
507; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
508;
509; X64-AVX512-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
510; X64-AVX512:       ## %bb.0:
511; X64-AVX512-NEXT:    vcvtsd2ss (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x07]
512; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
513  %a1 = load <2 x double>, <2 x double>* %p1
514  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
515  ret <4 x float> %res
516}
517
518
519define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
520; SSE-LABEL: test_x86_sse2_cvttpd2dq:
521; SSE:       ## %bb.0:
522; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0xe6,0xc0]
523; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
524;
525; AVX1-LABEL: test_x86_sse2_cvttpd2dq:
526; AVX1:       ## %bb.0:
527; AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0]
528; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
529;
530; AVX512-LABEL: test_x86_sse2_cvttpd2dq:
531; AVX512:       ## %bb.0:
532; AVX512-NEXT:    vcvttpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
533; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
534  %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
535  ret <4 x i32> %res
536}
537declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
538
539
540define <2 x i64> @test_mm_cvttpd_epi32_zext(<2 x double> %a0) nounwind {
541; SSE-LABEL: test_mm_cvttpd_epi32_zext:
542; SSE:       ## %bb.0:
543; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0xe6,0xc0]
544; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
545;
546; AVX1-LABEL: test_mm_cvttpd_epi32_zext:
547; AVX1:       ## %bb.0:
548; AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0]
549; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
550;
551; AVX512-LABEL: test_mm_cvttpd_epi32_zext:
552; AVX512:       ## %bb.0:
553; AVX512-NEXT:    vcvttpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
554; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
555  %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
556  %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
557  %bc = bitcast <4 x i32> %res to <2 x i64>
558  ret <2 x i64> %bc
559}
560
561
562define <2 x i64> @test_mm_cvttpd_epi32_zext_load(<2 x double>* %p0) nounwind {
563; X86-SSE-LABEL: test_mm_cvttpd_epi32_zext_load:
564; X86-SSE:       ## %bb.0:
565; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
566; X86-SSE-NEXT:    cvttpd2dq (%eax), %xmm0 ## encoding: [0x66,0x0f,0xe6,0x00]
567; X86-SSE-NEXT:    retl ## encoding: [0xc3]
568;
569; X86-AVX1-LABEL: test_mm_cvttpd_epi32_zext_load:
570; X86-AVX1:       ## %bb.0:
571; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
572; X86-AVX1-NEXT:    vcvttpd2dqx (%eax), %xmm0 ## encoding: [0xc5,0xf9,0xe6,0x00]
573; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
574;
575; X86-AVX512-LABEL: test_mm_cvttpd_epi32_zext_load:
576; X86-AVX512:       ## %bb.0:
577; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
578; X86-AVX512-NEXT:    vcvttpd2dqx (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0x00]
579; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
580;
581; X64-SSE-LABEL: test_mm_cvttpd_epi32_zext_load:
582; X64-SSE:       ## %bb.0:
583; X64-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 ## encoding: [0x66,0x0f,0xe6,0x07]
584; X64-SSE-NEXT:    retq ## encoding: [0xc3]
585;
586; X64-AVX1-LABEL: test_mm_cvttpd_epi32_zext_load:
587; X64-AVX1:       ## %bb.0:
588; X64-AVX1-NEXT:    vcvttpd2dqx (%rdi), %xmm0 ## encoding: [0xc5,0xf9,0xe6,0x07]
589; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
590;
591; X64-AVX512-LABEL: test_mm_cvttpd_epi32_zext_load:
592; X64-AVX512:       ## %bb.0:
593; X64-AVX512-NEXT:    vcvttpd2dqx (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0x07]
594; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
595  %a0 = load <2 x double>, <2 x double>* %p0
596  %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
597  %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
598  %bc = bitcast <4 x i32> %res to <2 x i64>
599  ret <2 x i64> %bc
600}
601
602
603define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
604; SSE-LABEL: test_x86_sse2_cvttps2dq:
605; SSE:       ## %bb.0:
606; SSE-NEXT:    cvttps2dq %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x5b,0xc0]
607; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
608;
609; AVX1-LABEL: test_x86_sse2_cvttps2dq:
610; AVX1:       ## %bb.0:
611; AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5b,0xc0]
612; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
613;
614; AVX512-LABEL: test_x86_sse2_cvttps2dq:
615; AVX512:       ## %bb.0:
616; AVX512-NEXT:    vcvttps2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0]
617; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
618  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
619  ret <4 x i32> %res
620}
621declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
622
623
624define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
625; SSE-LABEL: test_x86_sse2_cvttsd2si:
626; SSE:       ## %bb.0:
627; SSE-NEXT:    cvttsd2si %xmm0, %eax ## encoding: [0xf2,0x0f,0x2c,0xc0]
628; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
629;
630; AVX1-LABEL: test_x86_sse2_cvttsd2si:
631; AVX1:       ## %bb.0:
632; AVX1-NEXT:    vcvttsd2si %xmm0, %eax ## encoding: [0xc5,0xfb,0x2c,0xc0]
633; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
634;
635; AVX512-LABEL: test_x86_sse2_cvttsd2si:
636; AVX512:       ## %bb.0:
637; AVX512-NEXT:    vcvttsd2si %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2c,0xc0]
638; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
639  %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
640  ret i32 %res
641}
642declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
643
644
645define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
646; SSE-LABEL: test_x86_sse2_max_pd:
647; SSE:       ## %bb.0:
648; SSE-NEXT:    maxpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x5f,0xc1]
649; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
650;
651; AVX1-LABEL: test_x86_sse2_max_pd:
652; AVX1:       ## %bb.0:
653; AVX1-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5f,0xc1]
654; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
655;
656; AVX512-LABEL: test_x86_sse2_max_pd:
657; AVX512:       ## %bb.0:
658; AVX512-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5f,0xc1]
659; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
660  %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
661  ret <2 x double> %res
662}
663declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
664
665
666define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
667; SSE-LABEL: test_x86_sse2_max_sd:
668; SSE:       ## %bb.0:
669; SSE-NEXT:    maxsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5f,0xc1]
670; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
671;
672; AVX1-LABEL: test_x86_sse2_max_sd:
673; AVX1:       ## %bb.0:
674; AVX1-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5f,0xc1]
675; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
676;
677; AVX512-LABEL: test_x86_sse2_max_sd:
678; AVX512:       ## %bb.0:
679; AVX512-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5f,0xc1]
680; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
681  %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
682  ret <2 x double> %res
683}
684declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
685
686
687define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
688; SSE-LABEL: test_x86_sse2_min_pd:
689; SSE:       ## %bb.0:
690; SSE-NEXT:    minpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x5d,0xc1]
691; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
692;
693; AVX1-LABEL: test_x86_sse2_min_pd:
694; AVX1:       ## %bb.0:
695; AVX1-NEXT:    vminpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5d,0xc1]
696; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
697;
698; AVX512-LABEL: test_x86_sse2_min_pd:
699; AVX512:       ## %bb.0:
700; AVX512-NEXT:    vminpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5d,0xc1]
701; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
702  %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
703  ret <2 x double> %res
704}
705declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
706
707
708define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
709; SSE-LABEL: test_x86_sse2_min_sd:
710; SSE:       ## %bb.0:
711; SSE-NEXT:    minsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5d,0xc1]
712; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
713;
714; AVX1-LABEL: test_x86_sse2_min_sd:
715; AVX1:       ## %bb.0:
716; AVX1-NEXT:    vminsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5d,0xc1]
717; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
718;
719; AVX512-LABEL: test_x86_sse2_min_sd:
720; AVX512:       ## %bb.0:
721; AVX512-NEXT:    vminsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5d,0xc1]
722; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
723  %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
724  ret <2 x double> %res
725}
726declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
727
728
729define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
730; SSE-LABEL: test_x86_sse2_movmsk_pd:
731; SSE:       ## %bb.0:
732; SSE-NEXT:    movmskpd %xmm0, %eax ## encoding: [0x66,0x0f,0x50,0xc0]
733; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
734;
735; AVX-LABEL: test_x86_sse2_movmsk_pd:
736; AVX:       ## %bb.0:
737; AVX-NEXT:    vmovmskpd %xmm0, %eax ## encoding: [0xc5,0xf9,0x50,0xc0]
738; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
739  %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
740  ret i32 %res
741}
742declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
743
744
745define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
746; SSE-LABEL: test_x86_sse2_packssdw_128:
747; SSE:       ## %bb.0:
748; SSE-NEXT:    packssdw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x6b,0xc1]
749; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
750;
751; AVX1-LABEL: test_x86_sse2_packssdw_128:
752; AVX1:       ## %bb.0:
753; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0xc1]
754; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
755;
756; AVX512-LABEL: test_x86_sse2_packssdw_128:
757; AVX512:       ## %bb.0:
758; AVX512-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1]
759; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
760  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
761  ret <8 x i16> %res
762}
763declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
764
765
766define <8 x i16> @test_x86_sse2_packssdw_128_fold() {
767; X86-SSE-LABEL: test_x86_sse2_packssdw_128_fold:
768; X86-SSE:       ## %bb.0:
769; X86-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768]
770; X86-SSE-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A]
771; X86-SSE-NEXT:    ## fixup A - offset: 3, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
772; X86-SSE-NEXT:    retl ## encoding: [0xc3]
773;
774; X86-AVX1-LABEL: test_x86_sse2_packssdw_128_fold:
775; X86-AVX1:       ## %bb.0:
776; X86-AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768]
777; X86-AVX1-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
778; X86-AVX1-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
779; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
780;
781; X86-AVX512-LABEL: test_x86_sse2_packssdw_128_fold:
782; X86-AVX512:       ## %bb.0:
783; X86-AVX512-NEXT:    vmovaps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,32767,32767,65535,32768]
784; X86-AVX512-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
785; X86-AVX512-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
786; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
787;
788; X64-SSE-LABEL: test_x86_sse2_packssdw_128_fold:
789; X64-SSE:       ## %bb.0:
790; X64-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768]
791; X64-SSE-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A]
792; X64-SSE-NEXT:    ## fixup A - offset: 3, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
793; X64-SSE-NEXT:    retq ## encoding: [0xc3]
794;
795; X64-AVX1-LABEL: test_x86_sse2_packssdw_128_fold:
796; X64-AVX1:       ## %bb.0:
797; X64-AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768]
798; X64-AVX1-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
799; X64-AVX1-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
800; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
801;
802; X64-AVX512-LABEL: test_x86_sse2_packssdw_128_fold:
803; X64-AVX512:       ## %bb.0:
804; X64-AVX512-NEXT:    vmovaps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,32767,32767,65535,32768]
805; X64-AVX512-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
806; X64-AVX512-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
807; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
808  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> <i32 65535, i32 65536, i32 -1, i32 -131072>)
809  ret <8 x i16> %res
810}
811
812
813define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
814; SSE-LABEL: test_x86_sse2_packsswb_128:
815; SSE:       ## %bb.0:
816; SSE-NEXT:    packsswb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x63,0xc1]
817; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
818;
819; AVX1-LABEL: test_x86_sse2_packsswb_128:
820; AVX1:       ## %bb.0:
821; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc1]
822; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
823;
824; AVX512-LABEL: test_x86_sse2_packsswb_128:
825; AVX512:       ## %bb.0:
826; AVX512-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1]
827; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
828  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
829  ret <16 x i8> %res
830}
831declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
832
833
834define <16 x i8> @test_x86_sse2_packsswb_128_fold() {
835; X86-SSE-LABEL: test_x86_sse2_packsswb_128_fold:
836; X86-SSE:       ## %bb.0:
837; X86-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
838; X86-SSE-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A]
839; X86-SSE-NEXT:    ## fixup A - offset: 3, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
840; X86-SSE-NEXT:    retl ## encoding: [0xc3]
841;
842; X86-AVX1-LABEL: test_x86_sse2_packsswb_128_fold:
843; X86-AVX1:       ## %bb.0:
844; X86-AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
845; X86-AVX1-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
846; X86-AVX1-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
847; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
848;
849; X86-AVX512-LABEL: test_x86_sse2_packsswb_128_fold:
850; X86-AVX512:       ## %bb.0:
851; X86-AVX512-NEXT:    vmovaps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
852; X86-AVX512-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
853; X86-AVX512-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
854; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
855;
856; X64-SSE-LABEL: test_x86_sse2_packsswb_128_fold:
857; X64-SSE:       ## %bb.0:
858; X64-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
859; X64-SSE-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A]
860; X64-SSE-NEXT:    ## fixup A - offset: 3, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
861; X64-SSE-NEXT:    retq ## encoding: [0xc3]
862;
863; X64-AVX1-LABEL: test_x86_sse2_packsswb_128_fold:
864; X64-AVX1:       ## %bb.0:
865; X64-AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
866; X64-AVX1-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
867; X64-AVX1-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
868; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
869;
870; X64-AVX512-LABEL: test_x86_sse2_packsswb_128_fold:
871; X64-AVX512:       ## %bb.0:
872; X64-AVX512-NEXT:    vmovaps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ## EVEX TO VEX Compression xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
873; X64-AVX512-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
874; X64-AVX512-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
875; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
876  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer)
877  ret <16 x i8> %res
878}
879
880
881define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
882; SSE-LABEL: test_x86_sse2_packuswb_128:
883; SSE:       ## %bb.0:
884; SSE-NEXT:    packuswb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x67,0xc1]
885; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
886;
887; AVX1-LABEL: test_x86_sse2_packuswb_128:
888; AVX1:       ## %bb.0:
889; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x67,0xc1]
890; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
891;
892; AVX512-LABEL: test_x86_sse2_packuswb_128:
893; AVX512:       ## %bb.0:
894; AVX512-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1]
895; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
896  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
897  ret <16 x i8> %res
898}
899declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
900
901
902define <16 x i8> @test_x86_sse2_packuswb_128_fold() {
903; X86-SSE-LABEL: test_x86_sse2_packuswb_128_fold:
904; X86-SSE:       ## %bb.0:
905; X86-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
906; X86-SSE-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A]
907; X86-SSE-NEXT:    ## fixup A - offset: 3, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
908; X86-SSE-NEXT:    retl ## encoding: [0xc3]
909;
910; X86-AVX1-LABEL: test_x86_sse2_packuswb_128_fold:
911; X86-AVX1:       ## %bb.0:
912; X86-AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
913; X86-AVX1-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
914; X86-AVX1-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
915; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
916;
917; X86-AVX512-LABEL: test_x86_sse2_packuswb_128_fold:
918; X86-AVX512:       ## %bb.0:
919; X86-AVX512-NEXT:    vmovaps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
920; X86-AVX512-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
921; X86-AVX512-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
922; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
923;
924; X64-SSE-LABEL: test_x86_sse2_packuswb_128_fold:
925; X64-SSE:       ## %bb.0:
926; X64-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
927; X64-SSE-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A]
928; X64-SSE-NEXT:    ## fixup A - offset: 3, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
929; X64-SSE-NEXT:    retq ## encoding: [0xc3]
930;
931; X64-AVX1-LABEL: test_x86_sse2_packuswb_128_fold:
932; X64-AVX1:       ## %bb.0:
933; X64-AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
934; X64-AVX1-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
935; X64-AVX1-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
936; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
937;
938; X64-AVX512-LABEL: test_x86_sse2_packuswb_128_fold:
939; X64-AVX512:       ## %bb.0:
940; X64-AVX512-NEXT:    vmovaps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ## EVEX TO VEX Compression xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
941; X64-AVX512-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
942; X64-AVX512-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
943; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
944  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer)
945  ret <16 x i8> %res
946}
947
948
949define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
950; SSE-LABEL: test_x86_sse2_pavg_b:
951; SSE:       ## %bb.0:
952; SSE-NEXT:    pavgb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe0,0xc1]
953; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
954;
955; AVX1-LABEL: test_x86_sse2_pavg_b:
956; AVX1:       ## %bb.0:
957; AVX1-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe0,0xc1]
958; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
959;
960; AVX512-LABEL: test_x86_sse2_pavg_b:
961; AVX512:       ## %bb.0:
962; AVX512-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xc1]
963; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
964  %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
965  ret <16 x i8> %res
966}
967declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
968
969
970define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
971; SSE-LABEL: test_x86_sse2_pavg_w:
972; SSE:       ## %bb.0:
973; SSE-NEXT:    pavgw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe3,0xc1]
974; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
975;
976; AVX1-LABEL: test_x86_sse2_pavg_w:
977; AVX1:       ## %bb.0:
978; AVX1-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe3,0xc1]
979; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
980;
981; AVX512-LABEL: test_x86_sse2_pavg_w:
982; AVX512:       ## %bb.0:
983; AVX512-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xc1]
984; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
985  %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
986  ret <8 x i16> %res
987}
988declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
989
990
991define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
992; SSE-LABEL: test_x86_sse2_pmadd_wd:
993; SSE:       ## %bb.0:
994; SSE-NEXT:    pmaddwd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf5,0xc1]
995; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
996;
997; AVX1-LABEL: test_x86_sse2_pmadd_wd:
998; AVX1:       ## %bb.0:
999; AVX1-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf5,0xc1]
1000; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1001;
1002; AVX512-LABEL: test_x86_sse2_pmadd_wd:
1003; AVX512:       ## %bb.0:
1004; AVX512-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1]
1005; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1006  %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
1007  ret <4 x i32> %res
1008}
1009declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
1010
1011
1012define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
1013; SSE-LABEL: test_x86_sse2_pmovmskb_128:
1014; SSE:       ## %bb.0:
1015; SSE-NEXT:    pmovmskb %xmm0, %eax ## encoding: [0x66,0x0f,0xd7,0xc0]
1016; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1017;
1018; AVX-LABEL: test_x86_sse2_pmovmskb_128:
1019; AVX:       ## %bb.0:
1020; AVX-NEXT:    vpmovmskb %xmm0, %eax ## encoding: [0xc5,0xf9,0xd7,0xc0]
1021; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1022  %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
1023  ret i32 %res
1024}
1025declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
1026
1027
1028define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
1029; SSE-LABEL: test_x86_sse2_pmulh_w:
1030; SSE:       ## %bb.0:
1031; SSE-NEXT:    pmulhw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe5,0xc1]
1032; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1033;
1034; AVX1-LABEL: test_x86_sse2_pmulh_w:
1035; AVX1:       ## %bb.0:
1036; AVX1-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe5,0xc1]
1037; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1038;
1039; AVX512-LABEL: test_x86_sse2_pmulh_w:
1040; AVX512:       ## %bb.0:
1041; AVX512-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1]
1042; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1043  %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1044  ret <8 x i16> %res
1045}
1046declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
1047
1048
1049define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
1050; SSE-LABEL: test_x86_sse2_pmulhu_w:
1051; SSE:       ## %bb.0:
1052; SSE-NEXT:    pmulhuw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe4,0xc1]
1053; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1054;
1055; AVX1-LABEL: test_x86_sse2_pmulhu_w:
1056; AVX1:       ## %bb.0:
1057; AVX1-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe4,0xc1]
1058; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1059;
1060; AVX512-LABEL: test_x86_sse2_pmulhu_w:
1061; AVX512:       ## %bb.0:
1062; AVX512-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1]
1063; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1064  %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1065  ret <8 x i16> %res
1066}
1067declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
1068
1069
1070define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
1071; SSE-LABEL: test_x86_sse2_psad_bw:
1072; SSE:       ## %bb.0:
1073; SSE-NEXT:    psadbw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf6,0xc1]
1074; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1075;
1076; AVX1-LABEL: test_x86_sse2_psad_bw:
1077; AVX1:       ## %bb.0:
1078; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf6,0xc1]
1079; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1080;
1081; AVX512-LABEL: test_x86_sse2_psad_bw:
1082; AVX512:       ## %bb.0:
1083; AVX512-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf6,0xc1]
1084; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1085  %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
1086  ret <2 x i64> %res
1087}
1088declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
1089
1090
1091define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
1092; SSE-LABEL: test_x86_sse2_psll_d:
1093; SSE:       ## %bb.0:
1094; SSE-NEXT:    pslld %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf2,0xc1]
1095; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1096;
1097; AVX1-LABEL: test_x86_sse2_psll_d:
1098; AVX1:       ## %bb.0:
1099; AVX1-NEXT:    vpslld %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf2,0xc1]
1100; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1101;
1102; AVX512-LABEL: test_x86_sse2_psll_d:
1103; AVX512:       ## %bb.0:
1104; AVX512-NEXT:    vpslld %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xc1]
1105; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1106  %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1107  ret <4 x i32> %res
1108}
1109declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
1110
1111
1112define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
1113; SSE-LABEL: test_x86_sse2_psll_q:
1114; SSE:       ## %bb.0:
1115; SSE-NEXT:    psllq %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf3,0xc1]
1116; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1117;
1118; AVX1-LABEL: test_x86_sse2_psll_q:
1119; AVX1:       ## %bb.0:
1120; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf3,0xc1]
1121; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1122;
1123; AVX512-LABEL: test_x86_sse2_psll_q:
1124; AVX512:       ## %bb.0:
1125; AVX512-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf3,0xc1]
1126; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1127  %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1128  ret <2 x i64> %res
1129}
1130declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
1131
1132
1133define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
1134; SSE-LABEL: test_x86_sse2_psll_w:
1135; SSE:       ## %bb.0:
1136; SSE-NEXT:    psllw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf1,0xc1]
1137; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1138;
1139; AVX1-LABEL: test_x86_sse2_psll_w:
1140; AVX1:       ## %bb.0:
1141; AVX1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf1,0xc1]
1142; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1143;
1144; AVX512-LABEL: test_x86_sse2_psll_w:
1145; AVX512:       ## %bb.0:
1146; AVX512-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xc1]
1147; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1148  %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1149  ret <8 x i16> %res
1150}
1151declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
1152
1153
1154define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
1155; SSE-LABEL: test_x86_sse2_pslli_d:
1156; SSE:       ## %bb.0:
1157; SSE-NEXT:    pslld $7, %xmm0 ## encoding: [0x66,0x0f,0x72,0xf0,0x07]
1158; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1159;
1160; AVX1-LABEL: test_x86_sse2_pslli_d:
1161; AVX1:       ## %bb.0:
1162; AVX1-NEXT:    vpslld $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x72,0xf0,0x07]
1163; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1164;
1165; AVX512-LABEL: test_x86_sse2_pslli_d:
1166; AVX512:       ## %bb.0:
1167; AVX512-NEXT:    vpslld $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x07]
1168; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1169  %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
1170  ret <4 x i32> %res
1171}
1172declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
1173
1174
1175define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
1176; SSE-LABEL: test_x86_sse2_pslli_q:
1177; SSE:       ## %bb.0:
1178; SSE-NEXT:    psllq $7, %xmm0 ## encoding: [0x66,0x0f,0x73,0xf0,0x07]
1179; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1180;
1181; AVX1-LABEL: test_x86_sse2_pslli_q:
1182; AVX1:       ## %bb.0:
1183; AVX1-NEXT:    vpsllq $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xf0,0x07]
1184; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1185;
1186; AVX512-LABEL: test_x86_sse2_pslli_q:
1187; AVX512:       ## %bb.0:
1188; AVX512-NEXT:    vpsllq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf0,0x07]
1189; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1190  %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
1191  ret <2 x i64> %res
1192}
1193declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
1194
1195
1196define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
1197; SSE-LABEL: test_x86_sse2_pslli_w:
1198; SSE:       ## %bb.0:
1199; SSE-NEXT:    psllw $7, %xmm0 ## encoding: [0x66,0x0f,0x71,0xf0,0x07]
1200; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1201;
1202; AVX1-LABEL: test_x86_sse2_pslli_w:
1203; AVX1:       ## %bb.0:
1204; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x71,0xf0,0x07]
1205; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1206;
1207; AVX512-LABEL: test_x86_sse2_pslli_w:
1208; AVX512:       ## %bb.0:
1209; AVX512-NEXT:    vpsllw $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x07]
1210; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1211  %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
1212  ret <8 x i16> %res
1213}
1214declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
1215
1216
1217define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
1218; SSE-LABEL: test_x86_sse2_psra_d:
1219; SSE:       ## %bb.0:
1220; SSE-NEXT:    psrad %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe2,0xc1]
1221; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1222;
1223; AVX1-LABEL: test_x86_sse2_psra_d:
1224; AVX1:       ## %bb.0:
1225; AVX1-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe2,0xc1]
1226; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1227;
1228; AVX512-LABEL: test_x86_sse2_psra_d:
1229; AVX512:       ## %bb.0:
1230; AVX512-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xc1]
1231; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1232  %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1233  ret <4 x i32> %res
1234}
1235declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
1236
1237
1238define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
1239; SSE-LABEL: test_x86_sse2_psra_w:
1240; SSE:       ## %bb.0:
1241; SSE-NEXT:    psraw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe1,0xc1]
1242; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1243;
1244; AVX1-LABEL: test_x86_sse2_psra_w:
1245; AVX1:       ## %bb.0:
1246; AVX1-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe1,0xc1]
1247; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1248;
1249; AVX512-LABEL: test_x86_sse2_psra_w:
1250; AVX512:       ## %bb.0:
1251; AVX512-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xc1]
1252; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1253  %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1254  ret <8 x i16> %res
1255}
1256declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
1257
1258
1259define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
1260; SSE-LABEL: test_x86_sse2_psrai_d:
1261; SSE:       ## %bb.0:
1262; SSE-NEXT:    psrad $7, %xmm0 ## encoding: [0x66,0x0f,0x72,0xe0,0x07]
1263; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1264;
1265; AVX1-LABEL: test_x86_sse2_psrai_d:
1266; AVX1:       ## %bb.0:
1267; AVX1-NEXT:    vpsrad $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x72,0xe0,0x07]
1268; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1269;
1270; AVX512-LABEL: test_x86_sse2_psrai_d:
1271; AVX512:       ## %bb.0:
1272; AVX512-NEXT:    vpsrad $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xe0,0x07]
1273; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1274  %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
1275  ret <4 x i32> %res
1276}
1277declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
1278
1279
1280define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
1281; SSE-LABEL: test_x86_sse2_psrai_w:
1282; SSE:       ## %bb.0:
1283; SSE-NEXT:    psraw $7, %xmm0 ## encoding: [0x66,0x0f,0x71,0xe0,0x07]
1284; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1285;
1286; AVX1-LABEL: test_x86_sse2_psrai_w:
1287; AVX1:       ## %bb.0:
1288; AVX1-NEXT:    vpsraw $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x71,0xe0,0x07]
1289; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1290;
1291; AVX512-LABEL: test_x86_sse2_psrai_w:
1292; AVX512:       ## %bb.0:
1293; AVX512-NEXT:    vpsraw $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x07]
1294; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1295  %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
1296  ret <8 x i16> %res
1297}
1298declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
1299
1300
1301define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
1302; SSE-LABEL: test_x86_sse2_psrl_d:
1303; SSE:       ## %bb.0:
1304; SSE-NEXT:    psrld %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd2,0xc1]
1305; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1306;
1307; AVX1-LABEL: test_x86_sse2_psrl_d:
1308; AVX1:       ## %bb.0:
1309; AVX1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd2,0xc1]
1310; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1311;
1312; AVX512-LABEL: test_x86_sse2_psrl_d:
1313; AVX512:       ## %bb.0:
1314; AVX512-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xc1]
1315; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1316  %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1317  ret <4 x i32> %res
1318}
1319declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
1320
1321
1322define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
1323; SSE-LABEL: test_x86_sse2_psrl_q:
1324; SSE:       ## %bb.0:
1325; SSE-NEXT:    psrlq %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd3,0xc1]
1326; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1327;
1328; AVX1-LABEL: test_x86_sse2_psrl_q:
1329; AVX1:       ## %bb.0:
1330; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd3,0xc1]
1331; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1332;
1333; AVX512-LABEL: test_x86_sse2_psrl_q:
1334; AVX512:       ## %bb.0:
1335; AVX512-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xc1]
1336; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1337  %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1338  ret <2 x i64> %res
1339}
1340declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
1341
1342
1343define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
1344; SSE-LABEL: test_x86_sse2_psrl_w:
1345; SSE:       ## %bb.0:
1346; SSE-NEXT:    psrlw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd1,0xc1]
1347; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1348;
1349; AVX1-LABEL: test_x86_sse2_psrl_w:
1350; AVX1:       ## %bb.0:
1351; AVX1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd1,0xc1]
1352; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1353;
1354; AVX512-LABEL: test_x86_sse2_psrl_w:
1355; AVX512:       ## %bb.0:
1356; AVX512-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1]
1357; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1358  %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1359  ret <8 x i16> %res
1360}
1361declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
1362
1363
1364define <8 x i16> @test_x86_sse2_psrl_w_load(<8 x i16> %a0, <8 x i16>* %p) {
1365; X86-SSE-LABEL: test_x86_sse2_psrl_w_load:
1366; X86-SSE:       ## %bb.0:
1367; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1368; X86-SSE-NEXT:    psrlw (%eax), %xmm0 ## encoding: [0x66,0x0f,0xd1,0x00]
1369; X86-SSE-NEXT:    retl ## encoding: [0xc3]
1370;
1371; X86-AVX1-LABEL: test_x86_sse2_psrl_w_load:
1372; X86-AVX1:       ## %bb.0:
1373; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1374; X86-AVX1-NEXT:    vpsrlw (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd1,0x00]
1375; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
1376;
1377; X86-AVX512-LABEL: test_x86_sse2_psrl_w_load:
1378; X86-AVX512:       ## %bb.0:
1379; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1380; X86-AVX512-NEXT:    vpsrlw (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0x00]
1381; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
1382;
1383; X64-SSE-LABEL: test_x86_sse2_psrl_w_load:
1384; X64-SSE:       ## %bb.0:
1385; X64-SSE-NEXT:    psrlw (%rdi), %xmm0 ## encoding: [0x66,0x0f,0xd1,0x07]
1386; X64-SSE-NEXT:    retq ## encoding: [0xc3]
1387;
1388; X64-AVX1-LABEL: test_x86_sse2_psrl_w_load:
1389; X64-AVX1:       ## %bb.0:
1390; X64-AVX1-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd1,0x07]
1391; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
1392;
1393; X64-AVX512-LABEL: test_x86_sse2_psrl_w_load:
1394; X64-AVX512:       ## %bb.0:
1395; X64-AVX512-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0x07]
1396; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
1397  %a1 = load <8 x i16>, <8 x i16>* %p
1398  %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1399  ret <8 x i16> %res
1400}
1401
1402
1403define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
1404; SSE-LABEL: test_x86_sse2_psrli_d:
1405; SSE:       ## %bb.0:
1406; SSE-NEXT:    psrld $7, %xmm0 ## encoding: [0x66,0x0f,0x72,0xd0,0x07]
1407; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1408;
1409; AVX1-LABEL: test_x86_sse2_psrli_d:
1410; AVX1:       ## %bb.0:
1411; AVX1-NEXT:    vpsrld $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x72,0xd0,0x07]
1412; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1413;
1414; AVX512-LABEL: test_x86_sse2_psrli_d:
1415; AVX512:       ## %bb.0:
1416; AVX512-NEXT:    vpsrld $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xd0,0x07]
1417; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1418  %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
1419  ret <4 x i32> %res
1420}
1421declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
1422
1423
1424define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
1425; SSE-LABEL: test_x86_sse2_psrli_q:
1426; SSE:       ## %bb.0:
1427; SSE-NEXT:    psrlq $7, %xmm0 ## encoding: [0x66,0x0f,0x73,0xd0,0x07]
1428; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1429;
1430; AVX1-LABEL: test_x86_sse2_psrli_q:
1431; AVX1:       ## %bb.0:
1432; AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xd0,0x07]
1433; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1434;
1435; AVX512-LABEL: test_x86_sse2_psrli_q:
1436; AVX512:       ## %bb.0:
1437; AVX512-NEXT:    vpsrlq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x07]
1438; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1439  %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
1440  ret <2 x i64> %res
1441}
1442declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
1443
1444
1445define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
1446; SSE-LABEL: test_x86_sse2_psrli_w:
1447; SSE:       ## %bb.0:
1448; SSE-NEXT:    psrlw $7, %xmm0 ## encoding: [0x66,0x0f,0x71,0xd0,0x07]
1449; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1450;
1451; AVX1-LABEL: test_x86_sse2_psrli_w:
1452; AVX1:       ## %bb.0:
1453; AVX1-NEXT:    vpsrlw $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x71,0xd0,0x07]
1454; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1455;
1456; AVX512-LABEL: test_x86_sse2_psrli_w:
1457; AVX512:       ## %bb.0:
1458; AVX512-NEXT:    vpsrlw $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xd0,0x07]
1459; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1460  %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
1461  ret <8 x i16> %res
1462}
1463declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
1464
1465
1466define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
1467; SSE-LABEL: test_x86_sse2_ucomieq_sd:
1468; SSE:       ## %bb.0:
1469; SSE-NEXT:    ucomisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2e,0xc1]
1470; SSE-NEXT:    setnp %al ## encoding: [0x0f,0x9b,0xc0]
1471; SSE-NEXT:    sete %cl ## encoding: [0x0f,0x94,0xc1]
1472; SSE-NEXT:    andb %al, %cl ## encoding: [0x20,0xc1]
1473; SSE-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
1474; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1475;
1476; AVX1-LABEL: test_x86_sse2_ucomieq_sd:
1477; AVX1:       ## %bb.0:
1478; AVX1-NEXT:    vucomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2e,0xc1]
1479; AVX1-NEXT:    setnp %al ## encoding: [0x0f,0x9b,0xc0]
1480; AVX1-NEXT:    sete %cl ## encoding: [0x0f,0x94,0xc1]
1481; AVX1-NEXT:    andb %al, %cl ## encoding: [0x20,0xc1]
1482; AVX1-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
1483; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1484;
1485; AVX512-LABEL: test_x86_sse2_ucomieq_sd:
1486; AVX512:       ## %bb.0:
1487; AVX512-NEXT:    vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
1488; AVX512-NEXT:    setnp %al ## encoding: [0x0f,0x9b,0xc0]
1489; AVX512-NEXT:    sete %cl ## encoding: [0x0f,0x94,0xc1]
1490; AVX512-NEXT:    andb %al, %cl ## encoding: [0x20,0xc1]
1491; AVX512-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
1492; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1493  %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1494  ret i32 %res
1495}
1496declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
1497
1498
1499define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
1500; SSE-LABEL: test_x86_sse2_ucomige_sd:
1501; SSE:       ## %bb.0:
1502; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1503; SSE-NEXT:    ucomisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2e,0xc1]
1504; SSE-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
1505; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1506;
1507; AVX1-LABEL: test_x86_sse2_ucomige_sd:
1508; AVX1:       ## %bb.0:
1509; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1510; AVX1-NEXT:    vucomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2e,0xc1]
1511; AVX1-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
1512; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1513;
1514; AVX512-LABEL: test_x86_sse2_ucomige_sd:
1515; AVX512:       ## %bb.0:
1516; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1517; AVX512-NEXT:    vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
1518; AVX512-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
1519; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1520  %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1521  ret i32 %res
1522}
1523declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
1524
1525
1526define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
1527; SSE-LABEL: test_x86_sse2_ucomigt_sd:
1528; SSE:       ## %bb.0:
1529; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1530; SSE-NEXT:    ucomisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2e,0xc1]
1531; SSE-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
1532; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1533;
1534; AVX1-LABEL: test_x86_sse2_ucomigt_sd:
1535; AVX1:       ## %bb.0:
1536; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1537; AVX1-NEXT:    vucomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2e,0xc1]
1538; AVX1-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
1539; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1540;
1541; AVX512-LABEL: test_x86_sse2_ucomigt_sd:
1542; AVX512:       ## %bb.0:
1543; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1544; AVX512-NEXT:    vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
1545; AVX512-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
1546; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1547  %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1548  ret i32 %res
1549}
1550declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
1551
1552
1553define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
1554; SSE-LABEL: test_x86_sse2_ucomile_sd:
1555; SSE:       ## %bb.0:
1556; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1557; SSE-NEXT:    ucomisd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x2e,0xc8]
1558; SSE-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
1559; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1560;
1561; AVX1-LABEL: test_x86_sse2_ucomile_sd:
1562; AVX1:       ## %bb.0:
1563; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1564; AVX1-NEXT:    vucomisd %xmm0, %xmm1 ## encoding: [0xc5,0xf9,0x2e,0xc8]
1565; AVX1-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
1566; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1567;
1568; AVX512-LABEL: test_x86_sse2_ucomile_sd:
1569; AVX512:       ## %bb.0:
1570; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1571; AVX512-NEXT:    vucomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8]
1572; AVX512-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
1573; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1574  %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1575  ret i32 %res
1576}
1577declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
1578
1579
1580define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
1581; SSE-LABEL: test_x86_sse2_ucomilt_sd:
1582; SSE:       ## %bb.0:
1583; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1584; SSE-NEXT:    ucomisd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x2e,0xc8]
1585; SSE-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
1586; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1587;
1588; AVX1-LABEL: test_x86_sse2_ucomilt_sd:
1589; AVX1:       ## %bb.0:
1590; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1591; AVX1-NEXT:    vucomisd %xmm0, %xmm1 ## encoding: [0xc5,0xf9,0x2e,0xc8]
1592; AVX1-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
1593; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1594;
1595; AVX512-LABEL: test_x86_sse2_ucomilt_sd:
1596; AVX512:       ## %bb.0:
1597; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1598; AVX512-NEXT:    vucomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8]
1599; AVX512-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
1600; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1601  %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1602  ret i32 %res
1603}
1604declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
1605
1606
1607define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
1608; SSE-LABEL: test_x86_sse2_ucomineq_sd:
1609; SSE:       ## %bb.0:
1610; SSE-NEXT:    ucomisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2e,0xc1]
1611; SSE-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
1612; SSE-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
1613; SSE-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
1614; SSE-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
1615; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1616;
1617; AVX1-LABEL: test_x86_sse2_ucomineq_sd:
1618; AVX1:       ## %bb.0:
1619; AVX1-NEXT:    vucomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2e,0xc1]
1620; AVX1-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
1621; AVX1-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
1622; AVX1-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
1623; AVX1-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
1624; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1625;
1626; AVX512-LABEL: test_x86_sse2_ucomineq_sd:
1627; AVX512:       ## %bb.0:
1628; AVX512-NEXT:    vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
1629; AVX512-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
1630; AVX512-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
1631; AVX512-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
1632; AVX512-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
1633; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1634  %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1635  ret i32 %res
1636}
1637declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
1638
1639define void @test_x86_sse2_pause() {
1640; CHECK-LABEL: test_x86_sse2_pause:
1641; CHECK:       ## %bb.0:
1642; CHECK-NEXT:    pause ## encoding: [0xf3,0x90]
1643; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1644  tail call void @llvm.x86.sse2.pause()
1645  ret void
1646}
1647declare void @llvm.x86.sse2.pause() nounwind
1648
1649define void @lfence() nounwind {
1650; CHECK-LABEL: lfence:
1651; CHECK:       ## %bb.0:
1652; CHECK-NEXT:    lfence ## encoding: [0x0f,0xae,0xe8]
1653; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1654  tail call void @llvm.x86.sse2.lfence()
1655  ret void
1656}
1657declare void @llvm.x86.sse2.lfence() nounwind
1658
1659define void @mfence() nounwind {
1660; CHECK-LABEL: mfence:
1661; CHECK:       ## %bb.0:
1662; CHECK-NEXT:    mfence ## encoding: [0x0f,0xae,0xf0]
1663; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1664  tail call void @llvm.x86.sse2.mfence()
1665  ret void
1666}
1667declare void @llvm.x86.sse2.mfence() nounwind
1668
1669define void @clflush(i8* %p) nounwind {
1670; X86-LABEL: clflush:
1671; X86:       ## %bb.0:
1672; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1673; X86-NEXT:    clflush (%eax) ## encoding: [0x0f,0xae,0x38]
1674; X86-NEXT:    retl ## encoding: [0xc3]
1675;
1676; X64-LABEL: clflush:
1677; X64:       ## %bb.0:
1678; X64-NEXT:    clflush (%rdi) ## encoding: [0x0f,0xae,0x3f]
1679; X64-NEXT:    retq ## encoding: [0xc3]
1680  tail call void @llvm.x86.sse2.clflush(i8* %p)
1681  ret void
1682}
1683declare void @llvm.x86.sse2.clflush(i8*) nounwind
1684