1; RUN: llc < %s -march=x86-64 -mattr=+sse4.2 |  FileCheck %s
2
3; Verify when widening a divide/remainder operation, we only generate a
4; divide/rem per element since divide/remainder can trap.
5
6; CHECK: vectorDiv
7define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind {
8; CHECK: idivq
9; CHECK: idivq
10; CHECK-NOT: idivl
11; CHECK: ret
12entry:
13  %nsource.addr = alloca <2 x i32> addrspace(1)*, align 4
14  %dsource.addr = alloca <2 x i32> addrspace(1)*, align 4
15  %qdest.addr = alloca <2 x i32> addrspace(1)*, align 4
16  %index = alloca i32, align 4
17  store <2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)** %nsource.addr
18  store <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)** %dsource.addr
19  store <2 x i32> addrspace(1)* %qdest, <2 x i32> addrspace(1)** %qdest.addr
20  %tmp = load <2 x i32> addrspace(1)** %qdest.addr
21  %tmp1 = load i32* %index
22  %arrayidx = getelementptr <2 x i32> addrspace(1)* %tmp, i32 %tmp1
23  %tmp2 = load <2 x i32> addrspace(1)** %nsource.addr
24  %tmp3 = load i32* %index
25  %arrayidx4 = getelementptr <2 x i32> addrspace(1)* %tmp2, i32 %tmp3
26  %tmp5 = load <2 x i32> addrspace(1)* %arrayidx4
27  %tmp6 = load <2 x i32> addrspace(1)** %dsource.addr
28  %tmp7 = load i32* %index
29  %arrayidx8 = getelementptr <2 x i32> addrspace(1)* %tmp6, i32 %tmp7
30  %tmp9 = load <2 x i32> addrspace(1)* %arrayidx8
31  %tmp10 = sdiv <2 x i32> %tmp5, %tmp9
32  store <2 x i32> %tmp10, <2 x i32> addrspace(1)* %arrayidx
33  ret void
34}
35
36; CHECK: test_char_div
37define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) {
38; CHECK: idivb
39; CHECK: idivb
40; CHECK: idivb
41; CHECK-NOT: idivb
42; CHECK: ret
43  %div.r = sdiv <3 x i8> %num, %div
44  ret <3 x i8>  %div.r
45}
46
47; CHECK: test_uchar_div
48define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) {
49; CHECK: divb
50; CHECK: divb
51; CHECK: divb
52; CHECK-NOT: divb
53; CHECK: ret
54  %div.r = udiv <3 x i8> %num, %div
55  ret <3 x i8>  %div.r
56}
57
58; CHECK: test_short_div
59define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) {
60; CHECK: idivw
61; CHECK: idivw
62; CHECK: idivw
63; CHECK: idivw
64; CHECK: idivw
65; CHECK-NOT: idivw
66; CHECK: ret
67  %div.r = sdiv <5 x i16> %num, %div
68  ret <5 x i16>  %div.r
69}
70
71; CHECK: test_ushort_div
72define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) {
73; CHECK: divl
74; CHECK: divl
75; CHECK: divl
76; CHECK: divl
77; CHECK-NOT: divl
78; CHECK: ret
79  %div.r = udiv <4 x i16> %num, %div
80  ret <4 x i16>  %div.r
81}
82
83; CHECK: test_uint_div
84define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) {
85; CHECK: divl
86; CHECK: divl
87; CHECK: divl
88; CHECK-NOT: divl
89; CHECK: ret
90  %div.r = udiv <3 x i32> %num, %div
91  ret <3 x i32>  %div.r
92}
93
94; CHECK: test_long_div
95define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) {
96; CHECK: idivq
97; CHECK: idivq
98; CHECK: idivq
99; CHECK-NOT: idivq
100; CHECK: ret
101  %div.r = sdiv <3 x i64> %num, %div
102  ret <3 x i64>  %div.r
103}
104
105; CHECK: test_ulong_div
106define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) {
107; CHECK: divq
108; CHECK: divq
109; CHECK: divq
110; CHECK-NOT: divq
111; CHECK: ret
112  %div.r = udiv <3 x i64> %num, %div
113  ret <3 x i64>  %div.r
114}
115
116; CHECK: test_char_rem
117define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) {
118; CHECK: idivl
119; CHECK: idivl
120; CHECK: idivl
121; CHECK: idivl
122; CHECK-NOT: idivl
123; CHECK: ret
124  %rem.r = srem <4 x i8> %num, %rem
125  ret <4 x i8>  %rem.r
126}
127
128; CHECK: test_short_rem
129define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) {
130; CHECK: idivw
131; CHECK: idivw
132; CHECK: idivw
133; CHECK: idivw
134; CHECK: idivw
135; CHECK-NOT: idivw
136; CHECK: ret
137  %rem.r = srem <5 x i16> %num, %rem
138  ret <5 x i16>  %rem.r
139}
140
141; CHECK: test_uint_rem
142define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) {
143; CHECK: idivl
144; CHECK: idivl
145; CHECK: idivl
146; CHECK: idivl
147; CHECK-NOT: idivl
148; CHECK: ret
149  %rem.r = srem <4 x i32> %num, %rem
150  ret <4 x i32>  %rem.r
151}
152
153
154; CHECK: test_ulong_rem
155define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
156; CHECK: divq
157; CHECK: divq
158; CHECK: divq
159; CHECK: divq
160; CHECK: divq
161; CHECK-NOT: divq
162; CHECK: ret
163  %rem.r = urem <5 x i64> %num, %rem
164  ret <5 x i64>  %rem.r
165}
166
167; CHECK: test_int_div
168define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) {
169; CHECK: idivl
170; CHECK: idivl
171; CHECK: idivl
172; CHECK-NOT: idivl
173; CHECK: ret
174entry:
175  %cmp13 = icmp sgt i32 %n, 0
176  br i1 %cmp13, label %bb.nph, label %for.end
177
178bb.nph:
179  br label %for.body
180
181for.body:
182  %i.014 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ]
183  %arrayidx11 = getelementptr <3 x i32>* %dest, i32 %i.014
184  %tmp4 = load <3 x i32>* %arrayidx11 ; <<3 x i32>> [#uses=1]
185  %arrayidx7 = getelementptr inbounds <3 x i32>* %old, i32 %i.014
186  %tmp8 = load <3 x i32>* %arrayidx7 ; <<3 x i32>> [#uses=1]
187  %div = sdiv <3 x i32> %tmp4, %tmp8
188  store <3 x i32> %div, <3 x i32>* %arrayidx11
189  %inc = add nsw i32 %i.014, 1
190  %exitcond = icmp eq i32 %inc, %n
191  br i1 %exitcond, label %for.end, label %for.body
192
193for.end:                                          ; preds = %for.body, %entry
194  ret void
195}
196