1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefixes=ALL,X64
3
4; If the target does not have a single div/rem operation,
5; -div-rem-pairs pass will decompose the remainder calculation as:
6;   X % Y --> X - ((X / Y) * Y)
7; But if the target does have a single div/rem operation,
8; the opposite transform is likely beneficial.
9
10define i8 @scalar_i8(i8 %x, i8 %y, i8* %divdst) nounwind {
11; ALL-LABEL: scalar_i8:
12; ALL:       // %bb.0:
13; ALL-NEXT:    sxtb w8, w1
14; ALL-NEXT:    sxtb w9, w0
15; ALL-NEXT:    sdiv w8, w9, w8
16; ALL-NEXT:    msub w0, w8, w1, w0
17; ALL-NEXT:    strb w8, [x2]
18; ALL-NEXT:    ret
19  %div = sdiv i8 %x, %y
20  store i8 %div, i8* %divdst, align 4
21  %t1 = mul i8 %div, %y
22  %t2 = sub i8 %x, %t1
23  ret i8 %t2
24}
25
26define i16 @scalar_i16(i16 %x, i16 %y, i16* %divdst) nounwind {
27; ALL-LABEL: scalar_i16:
28; ALL:       // %bb.0:
29; ALL-NEXT:    sxth w8, w1
30; ALL-NEXT:    sxth w9, w0
31; ALL-NEXT:    sdiv w8, w9, w8
32; ALL-NEXT:    msub w0, w8, w1, w0
33; ALL-NEXT:    strh w8, [x2]
34; ALL-NEXT:    ret
35  %div = sdiv i16 %x, %y
36  store i16 %div, i16* %divdst, align 4
37  %t1 = mul i16 %div, %y
38  %t2 = sub i16 %x, %t1
39  ret i16 %t2
40}
41
42define i32 @scalar_i32(i32 %x, i32 %y, i32* %divdst) nounwind {
43; ALL-LABEL: scalar_i32:
44; ALL:       // %bb.0:
45; ALL-NEXT:    sdiv w8, w0, w1
46; ALL-NEXT:    msub w0, w8, w1, w0
47; ALL-NEXT:    str w8, [x2]
48; ALL-NEXT:    ret
49  %div = sdiv i32 %x, %y
50  store i32 %div, i32* %divdst, align 4
51  %t1 = mul i32 %div, %y
52  %t2 = sub i32 %x, %t1
53  ret i32 %t2
54}
55
56define i64 @scalar_i64(i64 %x, i64 %y, i64* %divdst) nounwind {
57; ALL-LABEL: scalar_i64:
58; ALL:       // %bb.0:
59; ALL-NEXT:    sdiv x8, x0, x1
60; ALL-NEXT:    msub x0, x8, x1, x0
61; ALL-NEXT:    str x8, [x2]
62; ALL-NEXT:    ret
63  %div = sdiv i64 %x, %y
64  store i64 %div, i64* %divdst, align 4
65  %t1 = mul i64 %div, %y
66  %t2 = sub i64 %x, %t1
67  ret i64 %t2
68}
69
70define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y, <16 x i8>* %divdst) nounwind {
71; ALL-LABEL: vector_i128_i8:
72; ALL:       // %bb.0:
73; ALL-NEXT:    smov w10, v1.b[0]
74; ALL-NEXT:    smov w11, v0.b[0]
75; ALL-NEXT:    smov w8, v1.b[1]
76; ALL-NEXT:    smov w9, v0.b[1]
77; ALL-NEXT:    sdiv w10, w11, w10
78; ALL-NEXT:    smov w12, v1.b[2]
79; ALL-NEXT:    smov w13, v0.b[2]
80; ALL-NEXT:    sdiv w8, w9, w8
81; ALL-NEXT:    fmov s2, w10
82; ALL-NEXT:    smov w14, v1.b[3]
83; ALL-NEXT:    smov w15, v0.b[3]
84; ALL-NEXT:    sdiv w12, w13, w12
85; ALL-NEXT:    mov v2.b[1], w8
86; ALL-NEXT:    smov w16, v1.b[4]
87; ALL-NEXT:    smov w17, v0.b[4]
88; ALL-NEXT:    sdiv w14, w15, w14
89; ALL-NEXT:    mov v2.b[2], w12
90; ALL-NEXT:    smov w18, v1.b[5]
91; ALL-NEXT:    smov w1, v0.b[5]
92; ALL-NEXT:    sdiv w16, w17, w16
93; ALL-NEXT:    mov v2.b[3], w14
94; ALL-NEXT:    smov w2, v1.b[6]
95; ALL-NEXT:    smov w3, v0.b[6]
96; ALL-NEXT:    sdiv w18, w1, w18
97; ALL-NEXT:    mov v2.b[4], w16
98; ALL-NEXT:    smov w4, v1.b[7]
99; ALL-NEXT:    smov w5, v0.b[7]
100; ALL-NEXT:    sdiv w2, w3, w2
101; ALL-NEXT:    mov v2.b[5], w18
102; ALL-NEXT:    smov w9, v1.b[8]
103; ALL-NEXT:    smov w11, v0.b[8]
104; ALL-NEXT:    sdiv w4, w5, w4
105; ALL-NEXT:    mov v2.b[6], w2
106; ALL-NEXT:    smov w13, v1.b[9]
107; ALL-NEXT:    smov w15, v0.b[9]
108; ALL-NEXT:    sdiv w9, w11, w9
109; ALL-NEXT:    mov v2.b[7], w4
110; ALL-NEXT:    smov w17, v1.b[10]
111; ALL-NEXT:    smov w1, v0.b[10]
112; ALL-NEXT:    sdiv w13, w15, w13
113; ALL-NEXT:    mov v2.b[8], w9
114; ALL-NEXT:    smov w3, v1.b[11]
115; ALL-NEXT:    smov w5, v0.b[11]
116; ALL-NEXT:    sdiv w17, w1, w17
117; ALL-NEXT:    mov v2.b[9], w13
118; ALL-NEXT:    smov w11, v1.b[12]
119; ALL-NEXT:    smov w15, v0.b[12]
120; ALL-NEXT:    sdiv w3, w5, w3
121; ALL-NEXT:    mov v2.b[10], w17
122; ALL-NEXT:    smov w1, v1.b[13]
123; ALL-NEXT:    smov w5, v0.b[13]
124; ALL-NEXT:    sdiv w11, w15, w11
125; ALL-NEXT:    mov v2.b[11], w3
126; ALL-NEXT:    smov w15, v1.b[14]
127; ALL-NEXT:    sdiv w1, w5, w1
128; ALL-NEXT:    smov w5, v0.b[14]
129; ALL-NEXT:    mov v2.b[12], w11
130; ALL-NEXT:    sdiv w15, w5, w15
131; ALL-NEXT:    smov w8, v1.b[15]
132; ALL-NEXT:    mov v2.b[13], w1
133; ALL-NEXT:    smov w9, v0.b[15]
134; ALL-NEXT:    mov v2.b[14], w15
135; ALL-NEXT:    sdiv w8, w9, w8
136; ALL-NEXT:    mov v2.b[15], w8
137; ALL-NEXT:    mls v0.16b, v2.16b, v1.16b
138; ALL-NEXT:    str q2, [x0]
139; ALL-NEXT:    ret
140  %div = sdiv <16 x i8> %x, %y
141  store <16 x i8> %div, <16 x i8>* %divdst, align 16
142  %t1 = mul <16 x i8> %div, %y
143  %t2 = sub <16 x i8> %x, %t1
144  ret <16 x i8> %t2
145}
146
147define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y, <8 x i16>* %divdst) nounwind {
148; ALL-LABEL: vector_i128_i16:
149; ALL:       // %bb.0:
150; ALL-NEXT:    smov w10, v1.h[0]
151; ALL-NEXT:    smov w11, v0.h[0]
152; ALL-NEXT:    smov w8, v1.h[1]
153; ALL-NEXT:    smov w9, v0.h[1]
154; ALL-NEXT:    sdiv w10, w11, w10
155; ALL-NEXT:    smov w12, v1.h[2]
156; ALL-NEXT:    smov w13, v0.h[2]
157; ALL-NEXT:    sdiv w8, w9, w8
158; ALL-NEXT:    fmov s2, w10
159; ALL-NEXT:    smov w14, v1.h[3]
160; ALL-NEXT:    smov w15, v0.h[3]
161; ALL-NEXT:    sdiv w12, w13, w12
162; ALL-NEXT:    mov v2.h[1], w8
163; ALL-NEXT:    smov w9, v1.h[4]
164; ALL-NEXT:    smov w11, v0.h[4]
165; ALL-NEXT:    sdiv w14, w15, w14
166; ALL-NEXT:    mov v2.h[2], w12
167; ALL-NEXT:    smov w13, v1.h[5]
168; ALL-NEXT:    smov w15, v0.h[5]
169; ALL-NEXT:    sdiv w9, w11, w9
170; ALL-NEXT:    mov v2.h[3], w14
171; ALL-NEXT:    smov w11, v1.h[6]
172; ALL-NEXT:    sdiv w13, w15, w13
173; ALL-NEXT:    smov w15, v0.h[6]
174; ALL-NEXT:    mov v2.h[4], w9
175; ALL-NEXT:    sdiv w11, w15, w11
176; ALL-NEXT:    smov w8, v1.h[7]
177; ALL-NEXT:    mov v2.h[5], w13
178; ALL-NEXT:    smov w9, v0.h[7]
179; ALL-NEXT:    mov v2.h[6], w11
180; ALL-NEXT:    sdiv w8, w9, w8
181; ALL-NEXT:    mov v2.h[7], w8
182; ALL-NEXT:    mls v0.8h, v2.8h, v1.8h
183; ALL-NEXT:    str q2, [x0]
184; ALL-NEXT:    ret
185  %div = sdiv <8 x i16> %x, %y
186  store <8 x i16> %div, <8 x i16>* %divdst, align 16
187  %t1 = mul <8 x i16> %div, %y
188  %t2 = sub <8 x i16> %x, %t1
189  ret <8 x i16> %t2
190}
191
192define <4 x i32> @vector_i128_i32(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %divdst) nounwind {
193; ALL-LABEL: vector_i128_i32:
194; ALL:       // %bb.0:
195; ALL-NEXT:    fmov w9, s1
196; ALL-NEXT:    fmov w10, s0
197; ALL-NEXT:    mov w8, v1.s[1]
198; ALL-NEXT:    sdiv w9, w10, w9
199; ALL-NEXT:    mov w10, v0.s[1]
200; ALL-NEXT:    sdiv w8, w10, w8
201; ALL-NEXT:    mov w10, v1.s[2]
202; ALL-NEXT:    fmov s2, w9
203; ALL-NEXT:    mov w9, v0.s[2]
204; ALL-NEXT:    sdiv w9, w9, w10
205; ALL-NEXT:    mov w10, v1.s[3]
206; ALL-NEXT:    mov v2.s[1], w8
207; ALL-NEXT:    mov w8, v0.s[3]
208; ALL-NEXT:    mov v2.s[2], w9
209; ALL-NEXT:    sdiv w8, w8, w10
210; ALL-NEXT:    mov v2.s[3], w8
211; ALL-NEXT:    mls v0.4s, v2.4s, v1.4s
212; ALL-NEXT:    str q2, [x0]
213; ALL-NEXT:    ret
214  %div = sdiv <4 x i32> %x, %y
215  store <4 x i32> %div, <4 x i32>* %divdst, align 16
216  %t1 = mul <4 x i32> %div, %y
217  %t2 = sub <4 x i32> %x, %t1
218  ret <4 x i32> %t2
219}
220
221define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y, <2 x i64>* %divdst) nounwind {
222; ALL-LABEL: vector_i128_i64:
223; ALL:       // %bb.0:
224; ALL-NEXT:    fmov x10, d1
225; ALL-NEXT:    fmov x11, d0
226; ALL-NEXT:    mov x8, v1.d[1]
227; ALL-NEXT:    mov x9, v0.d[1]
228; ALL-NEXT:    sdiv x11, x11, x10
229; ALL-NEXT:    sdiv x9, x9, x8
230; ALL-NEXT:    mul x10, x11, x10
231; ALL-NEXT:    mul x8, x9, x8
232; ALL-NEXT:    fmov d1, x10
233; ALL-NEXT:    mov v1.d[1], x8
234; ALL-NEXT:    sub v0.2d, v0.2d, v1.2d
235; ALL-NEXT:    fmov d1, x11
236; ALL-NEXT:    mov v1.d[1], x9
237; ALL-NEXT:    str q1, [x0]
238; ALL-NEXT:    ret
239  %div = sdiv <2 x i64> %x, %y
240  store <2 x i64> %div, <2 x i64>* %divdst, align 16
241  %t1 = mul <2 x i64> %div, %y
242  %t2 = sub <2 x i64> %x, %t1
243  ret <2 x i64> %t2
244}
245
246; Special tests.
247
248define i32 @scalar_i32_commutative(i32 %x, i32* %ysrc, i32* %divdst) nounwind {
249; ALL-LABEL: scalar_i32_commutative:
250; ALL:       // %bb.0:
251; ALL-NEXT:    ldr w8, [x1]
252; ALL-NEXT:    sdiv w9, w0, w8
253; ALL-NEXT:    msub w0, w8, w9, w0
254; ALL-NEXT:    str w9, [x2]
255; ALL-NEXT:    ret
256  %y = load i32, i32* %ysrc, align 4
257  %div = sdiv i32 %x, %y
258  store i32 %div, i32* %divdst, align 4
259  %t1 = mul i32 %y, %div ; commutative
260  %t2 = sub i32 %x, %t1
261  ret i32 %t2
262}
263
264; We do not care about extra uses.
265define i32 @extrause(i32 %x, i32 %y, i32* %divdst, i32* %t1dst) nounwind {
266; ALL-LABEL: extrause:
267; ALL:       // %bb.0:
268; ALL-NEXT:    sdiv w8, w0, w1
269; ALL-NEXT:    str w8, [x2]
270; ALL-NEXT:    mul w8, w8, w1
271; ALL-NEXT:    sub w0, w0, w8
272; ALL-NEXT:    str w8, [x3]
273; ALL-NEXT:    ret
274  %div = sdiv i32 %x, %y
275  store i32 %div, i32* %divdst, align 4
276  %t1 = mul i32 %div, %y
277  store i32 %t1, i32* %t1dst, align 4
278  %t2 = sub i32 %x, %t1
279  ret i32 %t2
280}
281
282; 'rem' should appear next to 'div'.
283define i32 @multiple_bb(i32 %x, i32 %y, i32* %divdst, i1 zeroext %store_srem, i32* %sremdst) nounwind {
284; ALL-LABEL: multiple_bb:
285; ALL:       // %bb.0:
286; ALL-NEXT:    mov w8, w0
287; ALL-NEXT:    sdiv w0, w0, w1
288; ALL-NEXT:    str w0, [x2]
289; ALL-NEXT:    cbz w3, .LBB10_2
290; ALL-NEXT:  // %bb.1: // %do_srem
291; ALL-NEXT:    msub w8, w0, w1, w8
292; ALL-NEXT:    str w8, [x4]
293; ALL-NEXT:  .LBB10_2: // %end
294; ALL-NEXT:    ret
295  %div = sdiv i32 %x, %y
296  store i32 %div, i32* %divdst, align 4
297  br i1 %store_srem, label %do_srem, label %end
298do_srem:
299  %t1 = mul i32 %div, %y
300  %t2 = sub i32 %x, %t1
301  store i32 %t2, i32* %sremdst, align 4
302  br label %end
303end:
304  ret i32 %div
305}
306
307define i32 @negative_different_x(i32 %x0, i32 %x1, i32 %y, i32* %divdst) nounwind {
308; ALL-LABEL: negative_different_x:
309; ALL:       // %bb.0:
310; ALL-NEXT:    sdiv w8, w0, w2
311; ALL-NEXT:    msub w0, w8, w2, w1
312; ALL-NEXT:    str w8, [x3]
313; ALL-NEXT:    ret
314  %div = sdiv i32 %x0, %y ; not %x1
315  store i32 %div, i32* %divdst, align 4
316  %t1 = mul i32 %div, %y
317  %t2 = sub i32 %x1, %t1 ; not %x0
318  ret i32 %t2
319}
320