1; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=corei7 | FileCheck %s
2
3
4define <2 x double> @test_x86_sse41_blend_pd(<2 x double> %a0, <2 x double> %a1) {
5  %1 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 0)
6  ret <2 x double> %1
7}
8; CHECK-LABEL: test_x86_sse41_blend_pd
9; CHECK-NOT: blendpd
10; CHECK: ret
11
12
13define <4 x float> @test_x86_sse41_blend_ps(<4 x float> %a0, <4 x float> %a1) {
14  %1 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 0)
15  ret <4 x float> %1
16}
17; CHECK-LABEL: test_x86_sse41_blend_ps
18; CHECK-NOT: blendps
19; CHECK: ret
20
21
22define <2 x double> @test_x86_sse41_blendv_pd(<2 x double> %a0, <2 x double> %a1) {
23  %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> zeroinitializer)
24  ret <2 x double> %1
25}
26; CHECK-LABEL: test_x86_sse41_blendv_pd
27; CHECK-NOT: blendvpd
28; CHECK: ret
29
30
31define <4 x float> @test_x86_sse41_blendv_ps(<4 x float> %a0, <4 x float> %a1) {
32  %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer)
33  ret <4 x float> %1
34}
35; CHECK-LABEL: test_x86_sse41_blendv_ps
36; CHECK-NOT: blendvps
37; CHECK: ret
38
39
40define <16 x i8> @test_x86_sse41_pblendv_b(<16 x i8> %a0, <16 x i8> %a1) {
41  %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> zeroinitializer)
42  ret <16 x i8> %1
43}
44; CHECK-LABEL: test_x86_sse41_pblendv_b
45; CHECK-NOT: pblendvb
46; CHECK: ret
47
48
49define <8 x i16> @test_x86_sse41_pblend_w(<8 x i16> %a0, <8 x i16> %a1) {
50  %1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 0)
51  ret <8 x i16> %1
52}
53; CHECK-LABEL: test_x86_sse41_pblend_w
54; CHECK-NOT: pblendw
55; CHECK: ret
56
57
58define <2 x double> @test2_x86_sse41_blend_pd(<2 x double> %a0, <2 x double> %a1) {
59  %1 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 -1)
60  ret <2 x double> %1
61}
62; CHECK-LABEL: test2_x86_sse41_blend_pd
63; CHECK-NOT: blendpd
64; CHECK: movaps %xmm1, %xmm0
65; CHECK-NEXT: ret
66
67
68define <4 x float> @test2_x86_sse41_blend_ps(<4 x float> %a0, <4 x float> %a1) {
69  %1 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 -1)
70  ret <4 x float> %1
71}
72; CHECK-LABEL: test2_x86_sse41_blend_ps
73; CHECK-NOT: blendps
74; CHECK: movaps %xmm1, %xmm0
75; CHECK-NEXT: ret
76
77
78define <2 x double> @test2_x86_sse41_blendv_pd(<2 x double> %a0, <2 x double> %a1) {
79  %Mask = bitcast <2 x i64> <i64 -1, i64 -1> to <2 x double>
80  %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %Mask )
81  ret <2 x double> %1
82}
83; CHECK-LABEL: test2_x86_sse41_blendv_pd
84; CHECK-NOT: blendvpd
85; CHECK: movaps %xmm1, %xmm0
86; CHECK-NEXT: ret
87
88
89define <4 x float> @test2_x86_sse41_blendv_ps(<4 x float> %a0, <4 x float> %a1) {
90  %Mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x float>
91  %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %Mask)
92  ret <4 x float> %1
93}
94; CHECK-LABEL: test2_x86_sse41_blendv_ps
95; CHECK-NOT: blendvps
96; CHECK: movaps %xmm1, %xmm0
97; CHECK-NEXT: ret
98
99
100define <16 x i8> @test2_x86_sse41_pblendv_b(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
101  %Mask = bitcast <2 x i64> <i64 -1, i64 -1> to <16 x i8>
102  %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %Mask)
103  ret <16 x i8> %1
104}
105; CHECK-LABEL: test2_x86_sse41_pblendv_b
106; CHECK-NOT: pblendvb
107; CHECK: movaps %xmm1, %xmm0
108; CHECK-NEXT: ret
109
110
111define <8 x i16> @test2_x86_sse41_pblend_w(<8 x i16> %a0, <8 x i16> %a1) {
112  %1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 -1)
113  ret <8 x i16> %1
114}
115; CHECK-LABEL: test2_x86_sse41_pblend_w
116; CHECK-NOT: pblendw
117; CHECK: movaps %xmm1, %xmm0
118; CHECK-NEXT: ret
119
120
121define <2 x double> @test3_x86_sse41_blend_pd(<2 x double> %a0) {
122  %1 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a0, i32 7)
123  ret <2 x double> %1
124}
125; CHECK-LABEL: test3_x86_sse41_blend_pd
126; CHECK-NOT: blendpd
127; CHECK: ret
128
129
130define <4 x float> @test3_x86_sse41_blend_ps(<4 x float> %a0) {
131  %1 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a0, i32 7)
132  ret <4 x float> %1
133}
134; CHECK-LABEL: test3_x86_sse41_blend_ps
135; CHECK-NOT: blendps
136; CHECK: ret
137
138
139define <2 x double> @test3_x86_sse41_blendv_pd(<2 x double> %a0, <2 x double> %a1) {
140  %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a0, <2 x double> %a1 )
141  ret <2 x double> %1
142}
143; CHECK-LABEL: test3_x86_sse41_blendv_pd
144; CHECK-NOT: blendvpd
145; CHECK: ret
146
147
148define <4 x float> @test3_x86_sse41_blendv_ps(<4 x float> %a0, <4 x float> %a1) {
149  %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a0, <4 x float> %a1)
150  ret <4 x float> %1
151}
152; CHECK-LABEL: test3_x86_sse41_blendv_ps
153; CHECK-NOT: blendvps
154; CHECK: ret
155
156
157define <16 x i8> @test3_x86_sse41_pblendv_b(<16 x i8> %a0, <16 x i8> %a1) {
158  %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> %a1)
159  ret <16 x i8> %1
160}
161; CHECK-LABEL: test3_x86_sse41_pblendv_b
162; CHECK-NOT: pblendvb
163; CHECK: ret
164
165
166define <8 x i16> @test3_x86_sse41_pblend_w(<8 x i16> %a0) {
167  %1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a0, i32 7)
168  ret <8 x i16> %1
169}
170; CHECK-LABEL: test3_x86_sse41_pblend_w
171; CHECK-NOT: pblendw
172; CHECK: ret
173
174
175declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32)
176declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32)
177declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)
178declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
179declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
180declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32)
181declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>)
182
183