1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -O3 -mtriple=arm-none-none-eabi -mcpu=cortex-m33 < %s | FileCheck %s --check-prefixes=CHECK-LE
3; RUN: llc -O3 -mtriple=armeb-none-none-eabi -mcpu=cortex-m33 < %s | FileCheck %s --check-prefixes=CHECK-BE
4
5define i32 @add_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) {
6; CHECK-LE-LABEL: add_user:
7; CHECK-LE:       @ %bb.0: @ %entry
8; CHECK-LE-NEXT:    .save {r4, lr}
9; CHECK-LE-NEXT:    push {r4, lr}
10; CHECK-LE-NEXT:    cmp r0, #1
11; CHECK-LE-NEXT:    blt .LBB0_4
12; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
13; CHECK-LE-NEXT:    subs r2, #2
14; CHECK-LE-NEXT:    subs r3, #2
15; CHECK-LE-NEXT:    mov.w r12, #0
16; CHECK-LE-NEXT:    movs r1, #0
17; CHECK-LE-NEXT:    .p2align 2
18; CHECK-LE-NEXT:  .LBB0_2: @ %for.body
19; CHECK-LE-NEXT:    @ =>This Inner Loop Header: Depth=1
20; CHECK-LE-NEXT:    ldr lr, [r3, #2]!
21; CHECK-LE-NEXT:    ldr r4, [r2, #2]!
22; CHECK-LE-NEXT:    subs r0, #1
23; CHECK-LE-NEXT:    smlad r12, r4, lr, r12
24; CHECK-LE-NEXT:    sxtah r1, r1, lr
25; CHECK-LE-NEXT:    bne .LBB0_2
26; CHECK-LE-NEXT:  @ %bb.3: @ %for.cond.cleanup
27; CHECK-LE-NEXT:    add.w r0, r12, r1
28; CHECK-LE-NEXT:    pop {r4, pc}
29; CHECK-LE-NEXT:    .p2align 2
30; CHECK-LE-NEXT:  .LBB0_4:
31; CHECK-LE-NEXT:    mov.w r12, #0
32; CHECK-LE-NEXT:    movs r1, #0
33; CHECK-LE-NEXT:    add.w r0, r12, r1
34; CHECK-LE-NEXT:    pop {r4, pc}
35;
36; CHECK-BE-LABEL: add_user:
37; CHECK-BE:       @ %bb.0: @ %entry
38; CHECK-BE-NEXT:    .save {r4, r5, r6, lr}
39; CHECK-BE-NEXT:    push {r4, r5, r6, lr}
40; CHECK-BE-NEXT:    cmp r0, #1
41; CHECK-BE-NEXT:    blt .LBB0_4
42; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
43; CHECK-BE-NEXT:    subs r2, #2
44; CHECK-BE-NEXT:    subs r3, #2
45; CHECK-BE-NEXT:    mov.w r12, #0
46; CHECK-BE-NEXT:    movs r1, #0
47; CHECK-BE-NEXT:    .p2align 2
48; CHECK-BE-NEXT:  .LBB0_2: @ %for.body
49; CHECK-BE-NEXT:    @ =>This Inner Loop Header: Depth=1
50; CHECK-BE-NEXT:    ldrsh lr, [r3, #2]!
51; CHECK-BE-NEXT:    ldrsh r5, [r2, #2]!
52; CHECK-BE-NEXT:    ldrsh.w r4, [r3, #2]
53; CHECK-BE-NEXT:    ldrsh.w r6, [r2, #2]
54; CHECK-BE-NEXT:    smlabb r5, r5, lr, r12
55; CHECK-BE-NEXT:    add r1, lr
56; CHECK-BE-NEXT:    subs r0, #1
57; CHECK-BE-NEXT:    smlabb r12, r6, r4, r5
58; CHECK-BE-NEXT:    bne .LBB0_2
59; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
60; CHECK-BE-NEXT:    add.w r0, r12, r1
61; CHECK-BE-NEXT:    pop {r4, r5, r6, pc}
62; CHECK-BE-NEXT:    .p2align 2
63; CHECK-BE-NEXT:  .LBB0_4:
64; CHECK-BE-NEXT:    mov.w r12, #0
65; CHECK-BE-NEXT:    movs r1, #0
66; CHECK-BE-NEXT:    add.w r0, r12, r1
67; CHECK-BE-NEXT:    pop {r4, r5, r6, pc}
68entry:
69  %cmp24 = icmp sgt i32 %arg, 0
70  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
71
72for.body.preheader:
73  %.pre = load i16, i16* %arg3, align 2
74  %.pre27 = load i16, i16* %arg2, align 2
75  br label %for.body
76
77for.cond.cleanup:
78  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
79  %count.final = phi i32 [ 0, %entry ], [ %count.next, %for.body ]
80  %res = add i32 %mac1.0.lcssa, %count.final
81  ret i32 %res
82
83for.body:
84  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
85  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
86  %count = phi i32 [ %count.next, %for.body ], [ 0, %for.body.preheader ]
87  %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025
88  %0 = load i16, i16* %arrayidx, align 2
89  %add = add nuw nsw i32 %i.025, 1
90  %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add
91  %1 = load i16, i16* %arrayidx1, align 2
92  %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025
93  %2 = load i16, i16* %arrayidx3, align 2
94  %conv = sext i16 %2 to i32
95  %conv4 = sext i16 %0 to i32
96  %count.next = add i32 %conv4, %count
97  %mul = mul nsw i32 %conv, %conv4
98  %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add
99  %3 = load i16, i16* %arrayidx6, align 2
100  %conv7 = sext i16 %3 to i32
101  %conv8 = sext i16 %1 to i32
102  %mul9 = mul nsw i32 %conv7, %conv8
103  %add10 = add i32 %mul, %mac1.026
104  %add11 = add i32 %mul9, %add10
105  %exitcond = icmp ne i32 %add, %arg
106  br i1 %exitcond, label %for.body, label %for.cond.cleanup
107}
108
109define i32 @mul_bottom_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) {
110; CHECK-LE-LABEL: mul_bottom_user:
111; CHECK-LE:       @ %bb.0: @ %entry
112; CHECK-LE-NEXT:    .save {r4, r5, r7, lr}
113; CHECK-LE-NEXT:    push {r4, r5, r7, lr}
114; CHECK-LE-NEXT:    cmp r0, #1
115; CHECK-LE-NEXT:    blt .LBB1_4
116; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
117; CHECK-LE-NEXT:    sub.w lr, r2, #2
118; CHECK-LE-NEXT:    subs r3, #2
119; CHECK-LE-NEXT:    mov.w r12, #0
120; CHECK-LE-NEXT:    movs r1, #0
121; CHECK-LE-NEXT:    .p2align 2
122; CHECK-LE-NEXT:  .LBB1_2: @ %for.body
123; CHECK-LE-NEXT:    @ =>This Inner Loop Header: Depth=1
124; CHECK-LE-NEXT:    ldr r2, [r3, #2]!
125; CHECK-LE-NEXT:    ldr r4, [lr, #2]!
126; CHECK-LE-NEXT:    sxth r5, r2
127; CHECK-LE-NEXT:    smlad r12, r4, r2, r12
128; CHECK-LE-NEXT:    subs r0, #1
129; CHECK-LE-NEXT:    mul r1, r5, r1
130; CHECK-LE-NEXT:    bne .LBB1_2
131; CHECK-LE-NEXT:  @ %bb.3: @ %for.cond.cleanup
132; CHECK-LE-NEXT:    add.w r0, r12, r1
133; CHECK-LE-NEXT:    pop {r4, r5, r7, pc}
134; CHECK-LE-NEXT:    .p2align 2
135; CHECK-LE-NEXT:  .LBB1_4:
136; CHECK-LE-NEXT:    mov.w r12, #0
137; CHECK-LE-NEXT:    movs r1, #0
138; CHECK-LE-NEXT:    add.w r0, r12, r1
139; CHECK-LE-NEXT:    pop {r4, r5, r7, pc}
140;
141; CHECK-BE-LABEL: mul_bottom_user:
142; CHECK-BE:       @ %bb.0: @ %entry
143; CHECK-BE-NEXT:    .save {r4, r5, r6, lr}
144; CHECK-BE-NEXT:    push {r4, r5, r6, lr}
145; CHECK-BE-NEXT:    cmp r0, #1
146; CHECK-BE-NEXT:    blt .LBB1_4
147; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
148; CHECK-BE-NEXT:    subs r2, #2
149; CHECK-BE-NEXT:    subs r3, #2
150; CHECK-BE-NEXT:    mov.w r12, #0
151; CHECK-BE-NEXT:    movs r1, #0
152; CHECK-BE-NEXT:    .p2align 2
153; CHECK-BE-NEXT:  .LBB1_2: @ %for.body
154; CHECK-BE-NEXT:    @ =>This Inner Loop Header: Depth=1
155; CHECK-BE-NEXT:    ldrsh lr, [r3, #2]!
156; CHECK-BE-NEXT:    ldrsh r4, [r2, #2]!
157; CHECK-BE-NEXT:    ldrsh.w r5, [r3, #2]
158; CHECK-BE-NEXT:    ldrsh.w r6, [r2, #2]
159; CHECK-BE-NEXT:    smlabb r4, r4, lr, r12
160; CHECK-BE-NEXT:    smlabb r12, r6, r5, r4
161; CHECK-BE-NEXT:    subs r0, #1
162; CHECK-BE-NEXT:    mul r1, lr, r1
163; CHECK-BE-NEXT:    bne .LBB1_2
164; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
165; CHECK-BE-NEXT:    add.w r0, r12, r1
166; CHECK-BE-NEXT:    pop {r4, r5, r6, pc}
167; CHECK-BE-NEXT:    .p2align 2
168; CHECK-BE-NEXT:  .LBB1_4:
169; CHECK-BE-NEXT:    mov.w r12, #0
170; CHECK-BE-NEXT:    movs r1, #0
171; CHECK-BE-NEXT:    add.w r0, r12, r1
172; CHECK-BE-NEXT:    pop {r4, r5, r6, pc}
173entry:
174  %cmp24 = icmp sgt i32 %arg, 0
175  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
176
177for.body.preheader:
178  %.pre = load i16, i16* %arg3, align 2
179  %.pre27 = load i16, i16* %arg2, align 2
180  br label %for.body
181
182for.cond.cleanup:
183  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
184  %count.final = phi i32 [ 0, %entry ], [ %count.next, %for.body ]
185  %res = add i32 %mac1.0.lcssa, %count.final
186  ret i32 %res
187
188for.body:
189  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
190  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
191  %count = phi i32 [ %count.next, %for.body ], [ 0, %for.body.preheader ]
192  %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025
193  %0 = load i16, i16* %arrayidx, align 2
194  %add = add nuw nsw i32 %i.025, 1
195  %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add
196  %1 = load i16, i16* %arrayidx1, align 2
197  %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025
198  %2 = load i16, i16* %arrayidx3, align 2
199  %conv = sext i16 %2 to i32
200  %conv4 = sext i16 %0 to i32
201  %mul = mul nsw i32 %conv, %conv4
202  %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add
203  %3 = load i16, i16* %arrayidx6, align 2
204  %conv7 = sext i16 %3 to i32
205  %conv8 = sext i16 %1 to i32
206  %mul9 = mul nsw i32 %conv7, %conv8
207  %add10 = add i32 %mul, %mac1.026
208  %add11 = add i32 %mul9, %add10
209  %count.next = mul i32 %conv4, %count
210  %exitcond = icmp ne i32 %add, %arg
211  br i1 %exitcond, label %for.body, label %for.cond.cleanup
212}
213
214define i32 @mul_top_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) {
215; CHECK-LE-LABEL: mul_top_user:
216; CHECK-LE:       @ %bb.0: @ %entry
217; CHECK-LE-NEXT:    .save {r4, lr}
218; CHECK-LE-NEXT:    push {r4, lr}
219; CHECK-LE-NEXT:    cmp r0, #1
220; CHECK-LE-NEXT:    blt .LBB2_4
221; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
222; CHECK-LE-NEXT:    subs r2, #2
223; CHECK-LE-NEXT:    subs r3, #2
224; CHECK-LE-NEXT:    mov.w r12, #0
225; CHECK-LE-NEXT:    movs r1, #0
226; CHECK-LE-NEXT:    .p2align 2
227; CHECK-LE-NEXT:  .LBB2_2: @ %for.body
228; CHECK-LE-NEXT:    @ =>This Inner Loop Header: Depth=1
229; CHECK-LE-NEXT:    ldr lr, [r3, #2]!
230; CHECK-LE-NEXT:    ldr r4, [r2, #2]!
231; CHECK-LE-NEXT:    subs r0, #1
232; CHECK-LE-NEXT:    smlad r12, r4, lr, r12
233; CHECK-LE-NEXT:    asr.w r4, r4, #16
234; CHECK-LE-NEXT:    mul r1, r4, r1
235; CHECK-LE-NEXT:    bne .LBB2_2
236; CHECK-LE-NEXT:  @ %bb.3: @ %for.cond.cleanup
237; CHECK-LE-NEXT:    add.w r0, r12, r1
238; CHECK-LE-NEXT:    pop {r4, pc}
239; CHECK-LE-NEXT:    .p2align 2
240; CHECK-LE-NEXT:  .LBB2_4:
241; CHECK-LE-NEXT:    mov.w r12, #0
242; CHECK-LE-NEXT:    movs r1, #0
243; CHECK-LE-NEXT:    add.w r0, r12, r1
244; CHECK-LE-NEXT:    pop {r4, pc}
245;
246; CHECK-BE-LABEL: mul_top_user:
247; CHECK-BE:       @ %bb.0: @ %entry
248; CHECK-BE-NEXT:    .save {r4, r5, r6, lr}
249; CHECK-BE-NEXT:    push {r4, r5, r6, lr}
250; CHECK-BE-NEXT:    cmp r0, #1
251; CHECK-BE-NEXT:    blt .LBB2_4
252; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
253; CHECK-BE-NEXT:    subs r2, #2
254; CHECK-BE-NEXT:    subs r3, #2
255; CHECK-BE-NEXT:    mov.w r12, #0
256; CHECK-BE-NEXT:    movs r1, #0
257; CHECK-BE-NEXT:    .p2align 2
258; CHECK-BE-NEXT:  .LBB2_2: @ %for.body
259; CHECK-BE-NEXT:    @ =>This Inner Loop Header: Depth=1
260; CHECK-BE-NEXT:    ldrsh lr, [r3, #2]!
261; CHECK-BE-NEXT:    ldrsh r4, [r2, #2]!
262; CHECK-BE-NEXT:    ldrsh.w r5, [r3, #2]
263; CHECK-BE-NEXT:    ldrsh.w r6, [r2, #2]
264; CHECK-BE-NEXT:    smlabb r4, r4, lr, r12
265; CHECK-BE-NEXT:    smlabb r12, r6, r5, r4
266; CHECK-BE-NEXT:    subs r0, #1
267; CHECK-BE-NEXT:    mul r1, r6, r1
268; CHECK-BE-NEXT:    bne .LBB2_2
269; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
270; CHECK-BE-NEXT:    add.w r0, r12, r1
271; CHECK-BE-NEXT:    pop {r4, r5, r6, pc}
272; CHECK-BE-NEXT:    .p2align 2
273; CHECK-BE-NEXT:  .LBB2_4:
274; CHECK-BE-NEXT:    mov.w r12, #0
275; CHECK-BE-NEXT:    movs r1, #0
276; CHECK-BE-NEXT:    add.w r0, r12, r1
277; CHECK-BE-NEXT:    pop {r4, r5, r6, pc}
278entry:
279  %cmp24 = icmp sgt i32 %arg, 0
280  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
281
282for.body.preheader:
283  %.pre = load i16, i16* %arg3, align 2
284  %.pre27 = load i16, i16* %arg2, align 2
285  br label %for.body
286
287for.cond.cleanup:
288  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
289  %count.final = phi i32 [ 0, %entry ], [ %count.next, %for.body ]
290  %res = add i32 %mac1.0.lcssa, %count.final
291  ret i32 %res
292
293for.body:
294  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
295  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
296  %count = phi i32 [ %count.next, %for.body ], [ 0, %for.body.preheader ]
297  %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025
298  %0 = load i16, i16* %arrayidx, align 2
299  %add = add nuw nsw i32 %i.025, 1
300  %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add
301  %1 = load i16, i16* %arrayidx1, align 2
302  %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025
303  %2 = load i16, i16* %arrayidx3, align 2
304  %conv = sext i16 %2 to i32
305  %conv4 = sext i16 %0 to i32
306  %mul = mul nsw i32 %conv, %conv4
307  %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add
308  %3 = load i16, i16* %arrayidx6, align 2
309  %conv7 = sext i16 %3 to i32
310  %conv8 = sext i16 %1 to i32
311  %mul9 = mul nsw i32 %conv7, %conv8
312  %add10 = add i32 %mul, %mac1.026
313  %add11 = add i32 %mul9, %add10
314  %count.next = mul i32 %conv7, %count
315  %exitcond = icmp ne i32 %add, %arg
316  br i1 %exitcond, label %for.body, label %for.cond.cleanup
317}
318
319define i32 @and_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) {
320; CHECK-LE-LABEL: and_user:
321; CHECK-LE:       @ %bb.0: @ %entry
322; CHECK-LE-NEXT:    .save {r4, lr}
323; CHECK-LE-NEXT:    push {r4, lr}
324; CHECK-LE-NEXT:    cmp r0, #1
325; CHECK-LE-NEXT:    blt .LBB3_4
326; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
327; CHECK-LE-NEXT:    sub.w lr, r2, #2
328; CHECK-LE-NEXT:    subs r3, #2
329; CHECK-LE-NEXT:    mov.w r12, #0
330; CHECK-LE-NEXT:    movs r1, #0
331; CHECK-LE-NEXT:    .p2align 2
332; CHECK-LE-NEXT:  .LBB3_2: @ %for.body
333; CHECK-LE-NEXT:    @ =>This Inner Loop Header: Depth=1
334; CHECK-LE-NEXT:    ldr r2, [r3, #2]!
335; CHECK-LE-NEXT:    ldr r4, [lr, #2]!
336; CHECK-LE-NEXT:    subs r0, #1
337; CHECK-LE-NEXT:    smlad r12, r4, r2, r12
338; CHECK-LE-NEXT:    uxth r2, r2
339; CHECK-LE-NEXT:    mul r1, r2, r1
340; CHECK-LE-NEXT:    bne .LBB3_2
341; CHECK-LE-NEXT:  @ %bb.3: @ %for.cond.cleanup
342; CHECK-LE-NEXT:    add.w r0, r12, r1
343; CHECK-LE-NEXT:    pop {r4, pc}
344; CHECK-LE-NEXT:    .p2align 2
345; CHECK-LE-NEXT:  .LBB3_4:
346; CHECK-LE-NEXT:    mov.w r12, #0
347; CHECK-LE-NEXT:    movs r1, #0
348; CHECK-LE-NEXT:    add.w r0, r12, r1
349; CHECK-LE-NEXT:    pop {r4, pc}
350;
351; CHECK-BE-LABEL: and_user:
352; CHECK-BE:       @ %bb.0: @ %entry
353; CHECK-BE-NEXT:    .save {r4, r5, r6, lr}
354; CHECK-BE-NEXT:    push {r4, r5, r6, lr}
355; CHECK-BE-NEXT:    cmp r0, #1
356; CHECK-BE-NEXT:    blt .LBB3_4
357; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
358; CHECK-BE-NEXT:    subs r2, #2
359; CHECK-BE-NEXT:    subs r3, #2
360; CHECK-BE-NEXT:    mov.w r12, #0
361; CHECK-BE-NEXT:    movs r1, #0
362; CHECK-BE-NEXT:    .p2align 2
363; CHECK-BE-NEXT:  .LBB3_2: @ %for.body
364; CHECK-BE-NEXT:    @ =>This Inner Loop Header: Depth=1
365; CHECK-BE-NEXT:    ldrh lr, [r3, #2]!
366; CHECK-BE-NEXT:    ldrsh r4, [r2, #2]!
367; CHECK-BE-NEXT:    ldrsh.w r5, [r3, #2]
368; CHECK-BE-NEXT:    ldrsh.w r6, [r2, #2]
369; CHECK-BE-NEXT:    smlabb r4, r4, lr, r12
370; CHECK-BE-NEXT:    smlabb r12, r6, r5, r4
371; CHECK-BE-NEXT:    subs r0, #1
372; CHECK-BE-NEXT:    mul r1, lr, r1
373; CHECK-BE-NEXT:    bne .LBB3_2
374; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
375; CHECK-BE-NEXT:    add.w r0, r12, r1
376; CHECK-BE-NEXT:    pop {r4, r5, r6, pc}
377; CHECK-BE-NEXT:    .p2align 2
378; CHECK-BE-NEXT:  .LBB3_4:
379; CHECK-BE-NEXT:    mov.w r12, #0
380; CHECK-BE-NEXT:    movs r1, #0
381; CHECK-BE-NEXT:    add.w r0, r12, r1
382; CHECK-BE-NEXT:    pop {r4, r5, r6, pc}
383entry:
384  %cmp24 = icmp sgt i32 %arg, 0
385  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
386
387for.body.preheader:
388  %.pre = load i16, i16* %arg3, align 2
389  %.pre27 = load i16, i16* %arg2, align 2
390  br label %for.body
391
392for.cond.cleanup:
393  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
394  %count.final = phi i32 [ 0, %entry ], [ %count.next, %for.body ]
395  %res = add i32 %mac1.0.lcssa, %count.final
396  ret i32 %res
397
398for.body:
399  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
400  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
401  %count = phi i32 [ %count.next, %for.body ], [ 0, %for.body.preheader ]
402  %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025
403  %0 = load i16, i16* %arrayidx, align 2
404  %add = add nuw nsw i32 %i.025, 1
405  %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add
406  %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025
407  %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add
408  %1 = load i16, i16* %arrayidx1, align 2
409  %2 = load i16, i16* %arrayidx3, align 2
410  %conv = sext i16 %2 to i32
411  %conv4 = sext i16 %0 to i32
412  %bottom = and i32 %conv4, 65535
413  %mul = mul nsw i32 %conv, %conv4
414  %3 = load i16, i16* %arrayidx6, align 2
415  %conv7 = sext i16 %3 to i32
416  %conv8 = sext i16 %1 to i32
417  %mul9 = mul nsw i32 %conv7, %conv8
418  %add10 = add i32 %mul, %mac1.026
419  %add11 = add i32 %mul9, %add10
420  %count.next = mul i32 %bottom, %count
421  %exitcond = icmp ne i32 %add, %arg
422  br i1 %exitcond, label %for.body, label %for.cond.cleanup
423}
424
425define i32 @multi_uses(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) {
426; CHECK-LE-LABEL: multi_uses:
427; CHECK-LE:       @ %bb.0: @ %entry
428; CHECK-LE-NEXT:    .save {r4, lr}
429; CHECK-LE-NEXT:    push {r4, lr}
430; CHECK-LE-NEXT:    cmp r0, #1
431; CHECK-LE-NEXT:    blt .LBB4_4
432; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
433; CHECK-LE-NEXT:    subs r2, #2
434; CHECK-LE-NEXT:    subs r3, #2
435; CHECK-LE-NEXT:    mov.w lr, #0
436; CHECK-LE-NEXT:    mov.w r12, #0
437; CHECK-LE-NEXT:    .p2align 2
438; CHECK-LE-NEXT:  .LBB4_2: @ %for.body
439; CHECK-LE-NEXT:    @ =>This Inner Loop Header: Depth=1
440; CHECK-LE-NEXT:    ldr r1, [r3, #2]!
441; CHECK-LE-NEXT:    ldr r4, [r2, #2]!
442; CHECK-LE-NEXT:    subs r0, #1
443; CHECK-LE-NEXT:    smlad lr, r4, r1, lr
444; CHECK-LE-NEXT:    eor.w r4, r1, r12
445; CHECK-LE-NEXT:    mul r1, r4, r1
446; CHECK-LE-NEXT:    lsl.w r12, r1, #16
447; CHECK-LE-NEXT:    bne .LBB4_2
448; CHECK-LE-NEXT:  @ %bb.3: @ %for.cond.cleanup
449; CHECK-LE-NEXT:    add.w r0, lr, r12
450; CHECK-LE-NEXT:    pop {r4, pc}
451; CHECK-LE-NEXT:    .p2align 2
452; CHECK-LE-NEXT:  .LBB4_4:
453; CHECK-LE-NEXT:    mov.w lr, #0
454; CHECK-LE-NEXT:    mov.w r12, #0
455; CHECK-LE-NEXT:    add.w r0, lr, r12
456; CHECK-LE-NEXT:    pop {r4, pc}
457;
458; CHECK-BE-LABEL: multi_uses:
459; CHECK-BE:       @ %bb.0: @ %entry
460; CHECK-BE-NEXT:    .save {r4, r5, r6, lr}
461; CHECK-BE-NEXT:    push {r4, r5, r6, lr}
462; CHECK-BE-NEXT:    cmp r0, #1
463; CHECK-BE-NEXT:    blt .LBB4_4
464; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
465; CHECK-BE-NEXT:    subs r2, #2
466; CHECK-BE-NEXT:    subs r3, #2
467; CHECK-BE-NEXT:    mov.w r12, #0
468; CHECK-BE-NEXT:    mov.w lr, #0
469; CHECK-BE-NEXT:    .p2align 2
470; CHECK-BE-NEXT:  .LBB4_2: @ %for.body
471; CHECK-BE-NEXT:    @ =>This Inner Loop Header: Depth=1
472; CHECK-BE-NEXT:    ldrsh r1, [r3, #2]!
473; CHECK-BE-NEXT:    ldrsh r4, [r2, #2]!
474; CHECK-BE-NEXT:    ldrsh.w r5, [r3, #2]
475; CHECK-BE-NEXT:    ldrsh.w r6, [r2, #2]
476; CHECK-BE-NEXT:    smlabb r4, r4, r1, r12
477; CHECK-BE-NEXT:    smlabb r12, r6, r5, r4
478; CHECK-BE-NEXT:    eor.w r6, r1, lr
479; CHECK-BE-NEXT:    muls r1, r6, r1
480; CHECK-BE-NEXT:    subs r0, #1
481; CHECK-BE-NEXT:    lsl.w lr, r1, #16
482; CHECK-BE-NEXT:    bne .LBB4_2
483; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
484; CHECK-BE-NEXT:    add.w r0, r12, lr
485; CHECK-BE-NEXT:    pop {r4, r5, r6, pc}
486; CHECK-BE-NEXT:    .p2align 2
487; CHECK-BE-NEXT:  .LBB4_4:
488; CHECK-BE-NEXT:    mov.w r12, #0
489; CHECK-BE-NEXT:    mov.w lr, #0
490; CHECK-BE-NEXT:    add.w r0, r12, lr
491; CHECK-BE-NEXT:    pop {r4, r5, r6, pc}
492entry:
493  %cmp24 = icmp sgt i32 %arg, 0
494  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
495
496for.body.preheader:
497  %.pre = load i16, i16* %arg3, align 2
498  %.pre27 = load i16, i16* %arg2, align 2
499  br label %for.body
500
501for.cond.cleanup:
502  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
503  %count.final = phi i32 [ 0, %entry ], [ %count.next, %for.body ]
504  %res = add i32 %mac1.0.lcssa, %count.final
505  ret i32 %res
506
507for.body:
508  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
509  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
510  %count = phi i32 [ %count.next, %for.body ], [ 0, %for.body.preheader ]
511  %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025
512  %0 = load i16, i16* %arrayidx, align 2
513  %add = add nuw nsw i32 %i.025, 1
514  %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add
515  %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025
516  %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add
517  %1 = load i16, i16* %arrayidx1, align 2
518  %2 = load i16, i16* %arrayidx3, align 2
519  %conv = sext i16 %2 to i32
520  %conv4 = sext i16 %0 to i32
521  %bottom = and i32 %conv4, 65535
522  %mul = mul nsw i32 %conv, %conv4
523  %3 = load i16, i16* %arrayidx6, align 2
524  %conv7 = sext i16 %3 to i32
525  %conv8 = sext i16 %1 to i32
526  %mul9 = mul nsw i32 %conv7, %conv8
527  %add10 = add i32 %mul, %mac1.026
528  %shl = shl i32 %conv4, 16
529  %add11 = add i32 %mul9, %add10
530  %xor = xor i32 %bottom, %count
531  %count.next = mul i32 %xor, %shl
532  %exitcond = icmp ne i32 %add, %arg
533  br i1 %exitcond, label %for.body, label %for.cond.cleanup
534}
535