1; RUN: llc -mtriple=thumbv7m -arm-disable-cgp=false %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP
2; RUN: llc -mtriple=thumbv8m.main -arm-disable-cgp=false %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP
3; RUN: llc -mtriple=thumbv8m.main -arm-disable-cgp=false -arm-enable-scalar-dsp=true -mcpu=cortex-m33 %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP
4; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM
5
6; Test that ARMCodeGenPrepare can handle:
7; - loops
8; - call operands
9; - call return values
10; - ret instructions
11; We use nuw on the arithmetic instructions to avoid complications.
12
13; Check that the arguments are extended but then nothing else is.
14; This also ensures that the pass can handle loops.
15; CHECK-COMMON-LABEL: phi_feeding_phi_args
16; CHECK-COMMON: uxtb
17; CHECK-COMMON: uxtb
18; CHECK-NOT: uxtb
19define void @phi_feeding_phi_args(i8 %a, i8 %b) {
20entry:
21  %0 = icmp ugt i8 %a, %b
22  br i1 %0, label %preheader, label %empty
23
24empty:
25  br label %preheader
26
27preheader:
28  %1 = phi i8 [ %a, %entry ], [ %b, %empty ]
29  br label %loop
30
31loop:
32  %val = phi i8 [ %1, %preheader ], [ %inc2, %if.end ]
33  %cmp = icmp ult i8 %val, 254
34  br i1 %cmp, label %if.then, label %if.else
35
36if.then:
37  %inc = sub nuw i8 %val, 2
38  br label %if.end
39
40if.else:
41  %inc1 = shl nuw i8 %val, 1
42  br label %if.end
43
44if.end:
45  %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ]
46  %cmp1 = icmp eq i8 %inc2, 255
47  br i1 %cmp1, label %exit, label %loop
48
49exit:
50  ret void
51}
52
53; Same as above, but as the args are zeroext, we shouldn't see any uxts.
54; CHECK-COMMON-LABEL: phi_feeding_phi_zeroext_args
55; CHECK-COMMON-NOT: uxt
56define void @phi_feeding_phi_zeroext_args(i8 zeroext %a, i8 zeroext %b) {
57entry:
58  %0 = icmp ugt i8 %a, %b
59  br i1 %0, label %preheader, label %empty
60
61empty:
62  br label %preheader
63
64preheader:
65  %1 = phi i8 [ %a, %entry ], [ %b, %empty ]
66  br label %loop
67
68loop:
69  %val = phi i8 [ %1, %preheader ], [ %inc2, %if.end ]
70  %cmp = icmp ult i8 %val, 254
71  br i1 %cmp, label %if.then, label %if.else
72
73if.then:
74  %inc = sub nuw i8 %val, 2
75  br label %if.end
76
77if.else:
78  %inc1 = shl nuw i8 %val, 1
79  br label %if.end
80
81if.end:
82  %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ]
83  %cmp1 = icmp eq i8 %inc2, 255
84  br i1 %cmp1, label %exit, label %loop
85
86exit:
87  ret void
88}
89
90; Just check that phis also work with i16s.
91; CHECK-COMMON-LABEL: phi_i16:
92; CHECK-COMMON-NOT:   uxt
93define void @phi_i16() {
94entry:
95  br label %loop
96
97loop:
98  %val = phi i16 [ 0, %entry ], [ %inc2, %if.end ]
99  %cmp = icmp ult i16 %val, 128
100  br i1 %cmp, label %if.then, label %if.else
101
102if.then:
103  %inc = add nuw i16 %val, 2
104  br label %if.end
105
106if.else:
107  %inc1 = add nuw i16 %val, 1
108  br label %if.end
109
110if.end:
111  %inc2 = phi i16 [ %inc, %if.then], [ %inc1, %if.else ]
112  %cmp1 = icmp ult i16 %inc2, 253
113  br i1 %cmp1, label %loop, label %exit
114
115exit:
116  ret void
117}
118
119; CHECK-COMMON-LABEL: phi_feeding_switch
120; CHECK-COMMON: ldrb
121; CHECK-COMMON: uxtb
122; CHECK-COMMON-NOT: uxt
123define void @phi_feeding_switch(i8* %memblock, i8* %store, i16 %arg) {
124entry:
125  %pre = load i8, i8* %memblock, align 1
126  %conv = trunc i16 %arg to i8
127  br label %header
128
129header:
130  %phi.0 = phi i8 [ %pre, %entry ], [ %count, %latch ]
131  %phi.1 = phi i8 [ %conv, %entry ], [ %phi.3, %latch ]
132  %phi.2 = phi i8 [ 0, %entry], [ %count, %latch ]
133  switch i8 %phi.0, label %default [
134    i8 43, label %for.inc.i
135    i8 45, label %for.inc.i.i
136  ]
137
138for.inc.i:
139  %xor = xor i8 %phi.1, 1
140  br label %latch
141
142for.inc.i.i:
143  %and = and i8 %phi.1, 3
144  br label %latch
145
146default:
147  %sub = sub i8 %phi.0, 1
148  %cmp2 = icmp ugt i8 %sub, 4
149  br i1 %cmp2, label %latch, label %exit
150
151latch:
152  %phi.3 = phi i8 [ %xor, %for.inc.i ], [ %and, %for.inc.i.i ], [ %phi.2, %default ]
153  %count = add nuw i8 %phi.2, 1
154  store i8 %count, i8* %store, align 1
155  br label %header
156
157exit:
158  ret void
159}
160
161; CHECK-COMMON-LABEL: ret_i8
162; CHECK-COMMON-NOT:   uxt
163define i8 @ret_i8() {
164entry:
165  br label %loop
166
167loop:
168  %val = phi i8 [ 0, %entry ], [ %inc2, %if.end ]
169  %cmp = icmp ult i8 %val, 128
170  br i1 %cmp, label %if.then, label %if.else
171
172if.then:
173  %inc = add nuw i8 %val, 2
174  br label %if.end
175
176if.else:
177  %inc1 = add nuw i8 %val, 1
178  br label %if.end
179
180if.end:
181  %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ]
182  %cmp1 = icmp ult i8 %inc2, 253
183  br i1 %cmp1, label %exit, label %loop
184
185exit:
186  ret i8 %inc2
187}
188
189; Check that %exp requires uxth in all cases, and will also be required to
190; promote %1 for the call - unless we can generate a uadd16.
191; CHECK-COMMON-LABEL: zext_load_sink_call:
192; CHECK-COMMON:       uxt
193; CHECK-DSP-IMM:      uadd16
194; CHECK-COMMON:       cmp
195; CHECK-DSP:          uxt
196; CHECK-DSP-IMM-NOT:  uxt
197define i32 @zext_load_sink_call(i16* %ptr, i16 %exp) {
198entry:
199  %0 = load i16, i16* %ptr, align 4
200  %1 = add i16 %exp, 3
201  %cmp = icmp eq i16 %0, %exp
202  br i1 %cmp, label %exit, label %if.then
203
204if.then:
205  %conv0 = zext i16 %0 to i32
206  %conv1 = zext i16 %1 to i32
207  %call = tail call arm_aapcs_vfpcc i32 @dummy(i32 %conv0, i32 %conv1)
208  br label %exit
209
210exit:
211  %exitval = phi i32 [ %call, %if.then ], [ 0, %entry  ]
212  ret i32 %exitval
213}
214
215
216; Check that the pass doesn't try to promote the immediate parameters.
217; CHECK-COMMON-LABEL: call_with_imms
218; CHECK-COMMON-NOT:   uxt
219define i8 @call_with_imms(i8* %arg) {
220  %call = tail call arm_aapcs_vfpcc zeroext i8 @dummy2(i8* nonnull %arg, i8 zeroext 0, i8 zeroext 0)
221  %cmp = icmp eq i8 %call, 0
222  %res = select i1 %cmp, i8 %call, i8 1
223  ret i8 %res
224}
225
226; Test that the call result is still extended.
227; CHECK-COMMON-LABEL: test_call:
228; CHECK-COMMON: bl
229; CHECK-COMMONNEXT: sxtb r1, r0
230define i16 @test_call(i8 zeroext %arg) {
231  %call = call i8 @dummy_i8(i8 %arg)
232  %cmp = icmp ult i8 %call, 128
233  %conv = zext i1 %cmp to i16
234  ret i16 %conv
235}
236
237; Test that the transformation bails when it finds that i16 is larger than i8.
238; TODO: We should be able to remove the uxtb in these cases.
239; CHECK-LABEL: promote_i8_sink_i16_1
240; CHECK-COMMON: bl dummy_i8
241; CHECK-COMMON: adds r0, #1
242; CHECK-COMMON: uxtb r0, r0
243; CHECK-COMMON: cmp r0
244define i16 @promote_i8_sink_i16_1(i8 zeroext %arg0, i16 zeroext %arg1, i16 zeroext %arg2) {
245  %call = tail call zeroext i8 @dummy_i8(i8 %arg0)
246  %add = add nuw i8 %call, 1
247  %conv = zext i8 %add to i16
248  %cmp = icmp ne i16 %conv, %arg1
249  %sel = select i1 %cmp, i16 %arg1, i16 %arg2
250  %res = tail call zeroext i16 @dummy3(i16 %sel)
251  ret i16 %res
252}
253
254; CHECK-COMMON-LABEL: promote_i8_sink_i16_2
255; CHECK-COMMON: bl dummy_i8
256; CHECK-COMMON: adds r0, #1
257; CHECK-COMMON: uxtb r0, r0
258; CHECK-COMMON: cmp r0
259define i16 @promote_i8_sink_i16_2(i8 zeroext %arg0, i8 zeroext %arg1, i16 zeroext %arg2) {
260  %call = tail call zeroext i8 @dummy_i8(i8 %arg0)
261  %add = add nuw i8 %call, 1
262  %cmp = icmp ne i8 %add, %arg1
263  %conv = zext i8 %arg1 to i16
264  %sel = select i1 %cmp, i16 %conv, i16 %arg2
265  %res = tail call zeroext i16 @dummy3(i16 %sel)
266  ret i16 %res
267}
268
269@uc = global i8 42, align 1
270@LL = global i64 0, align 8
271
272; CHECK-COMMON-LABEL: zext_i64
273; CHECK-COMMON: ldrb
274; CHECK-COMMON: strd
275define void @zext_i64() {
276entry:
277  %0 = load i8, i8* @uc, align 1
278  %conv = zext i8 %0 to i64
279  store i64 %conv, i64* @LL, align 8
280  %cmp = icmp eq i8 %0, 42
281  %conv1 = zext i1 %cmp to i32
282  %call = tail call i32 bitcast (i32 (...)* @assert to i32 (i32)*)(i32 %conv1)
283  ret void
284}
285
286@a = global i16* null, align 4
287@b = global i32 0, align 4
288
289; CHECK-COMMON-LABEL: constexpr
290; CHECK-COMMON: uxth
291define i32 @constexpr() {
292entry:
293  store i32 ptrtoint (i32* @b to i32), i32* @b, align 4
294  %0 = load i16*, i16** @a, align 4
295  %1 = load i16, i16* %0, align 2
296  %or = or i16 %1, ptrtoint (i32* @b to i16)
297  store i16 %or, i16* %0, align 2
298  %cmp = icmp ne i16 %or, 4
299  %conv3 = zext i1 %cmp to i32
300  %call = tail call i32 bitcast (i32 (...)* @e to i32 (i32)*)(i32 %conv3) #2
301  ret i32 undef
302}
303
304; Check that d.sroa.0.0.be is promoted passed directly into the tail call.
305; CHECK-COMMON-LABEL: check_zext_phi_call_arg
306; CHECK-COMMON-NOT: uxt
307define i32 @check_zext_phi_call_arg() {
308entry:
309  br label %for.cond
310
311for.cond:                                         ; preds = %for.cond.backedge, %entry
312  %d.sroa.0.0 = phi i16 [ 30, %entry ], [ %d.sroa.0.0.be, %for.cond.backedge ]
313  %tobool = icmp eq i16 %d.sroa.0.0, 0
314  br i1 %tobool, label %for.cond.backedge, label %if.then
315
316for.cond.backedge:                                ; preds = %for.cond, %if.then
317  %d.sroa.0.0.be = phi i16 [ %call, %if.then ], [ 0, %for.cond ]
318  br label %for.cond
319
320if.then:                                          ; preds = %for.cond
321  %d.sroa.0.0.insert.ext = zext i16 %d.sroa.0.0 to i32
322  %call = tail call zeroext i16 bitcast (i16 (...)* @f to i16 (i32)*)(i32 %d.sroa.0.0.insert.ext) #2
323  br label %for.cond.backedge
324}
325
326
327; The call to safe_lshift_func takes two parameters, but they're the same value just one is zext.
328; CHECK-COMMON-LABEL: call_zext_i8_i32
329define fastcc i32 @call_zext_i8_i32(i32 %p_45, i8 zeroext %p_46) {
330for.cond8.preheader:
331  %call217 = call fastcc zeroext i8 @safe_mul_func_uint8_t_u_u(i8 zeroext undef)
332  %tobool219 = icmp eq i8 %call217, 0
333  br i1 %tobool219, label %for.end411, label %for.cond273.preheader
334
335for.cond273.preheader:                            ; preds = %for.cond8.preheader
336  %call217.lcssa = phi i8 [ %call217, %for.cond8.preheader ]
337  %conv218.le = zext i8 %call217.lcssa to i32
338  %call346 = call fastcc zeroext i8 @safe_lshift_func(i8 zeroext %call217.lcssa, i32 %conv218.le)
339  unreachable
340
341for.end411:                                       ; preds = %for.cond8.preheader
342  %call452 = call fastcc i64 @safe_sub_func_int64_t_s_s(i64 undef, i64 4)
343  unreachable
344}
345
346%struct.anon = type { i32 }
347
348@g_57 = hidden local_unnamed_addr global %struct.anon zeroinitializer, align 4
349@g_893 = hidden local_unnamed_addr global %struct.anon zeroinitializer, align 4
350@g_82 = hidden local_unnamed_addr global i32 0, align 4
351
352; Test that the transform bails on finding a call which returns a i16**
353; CHECK-COMMON-LABEL: call_return_pointer
354; CHECK-COMMON: sxth
355; CHECK-COMMON-NOT: uxt
356define hidden i32 @call_return_pointer(i8 zeroext %p_13) local_unnamed_addr #0 {
357entry:
358  %conv1 = zext i8 %p_13 to i16
359  %call = tail call i16** @func_62(i8 zeroext undef, i32 undef, i16 signext %conv1, i32* undef)
360  %0 = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @g_893, i32 0, i32 0), align 4
361  %conv2 = trunc i32 %0 to i16
362  br label %for.cond
363
364for.cond:                                         ; preds = %for.cond.backedge, %entry
365  %p_13.addr.0 = phi i8 [ %p_13, %entry ], [ %p_13.addr.0.be, %for.cond.backedge ]
366  %tobool = icmp eq i8 %p_13.addr.0, 0
367  br i1 %tobool, label %for.cond.backedge, label %if.then
368
369for.cond.backedge:                                ; preds = %for.cond, %if.then
370  %p_13.addr.0.be = phi i8 [ %conv4, %if.then ], [ 0, %for.cond ]
371  br label %for.cond
372
373if.then:                                          ; preds = %for.cond
374  %call3 = tail call fastcc signext i16 @safe_sub_func_int16_t_s_s(i16 signext %conv2)
375  %conv4 = trunc i16 %call3 to i8
376  br label %for.cond.backedge
377}
378
379declare noalias i16** @func_62(i8 zeroext %p_63, i32 %p_64, i16 signext %p_65, i32* nocapture readnone %p_66)
380declare fastcc signext i16 @safe_sub_func_int16_t_s_s(i16 signext %si2)
381declare dso_local fastcc i64 @safe_sub_func_int64_t_s_s(i64, i64)
382declare dso_local fastcc zeroext i8 @safe_lshift_func(i8 zeroext, i32)
383declare dso_local fastcc zeroext i8 @safe_mul_func_uint8_t_u_u(i8 returned zeroext)
384
385declare dso_local i32 @e(...) local_unnamed_addr #1
386declare dso_local zeroext i16 @f(...) local_unnamed_addr #1
387
388declare i32 @dummy(i32, i32)
389declare i8 @dummy_i8(i8)
390declare i8 @dummy2(i8*, i8, i8)
391declare i16 @dummy3(i16)
392declare i32 @assert(...)
393