1; RUN: llc < %s -march=arm64 -mcpu=cyclone -aarch64-neon-syntax=apple | FileCheck %s
2; RUN: llc < %s -march=arm64 -mcpu=cortex-a57 | FileCheck --check-prefix=CHECK-A57 %s
3; rdar://13082402
4
5define float @t1(i32* nocapture %src) nounwind ssp {
6entry:
7; CHECK-LABEL: t1:
8; CHECK: ldr s0, [x0]
9; CHECK: scvtf s0, s0
10  %tmp1 = load i32* %src, align 4
11  %tmp2 = sitofp i32 %tmp1 to float
12  ret float %tmp2
13}
14
15define float @t2(i32* nocapture %src) nounwind ssp {
16entry:
17; CHECK-LABEL: t2:
18; CHECK: ldr s0, [x0]
19; CHECK: ucvtf s0, s0
20  %tmp1 = load i32* %src, align 4
21  %tmp2 = uitofp i32 %tmp1 to float
22  ret float %tmp2
23}
24
25define double @t3(i64* nocapture %src) nounwind ssp {
26entry:
27; CHECK-LABEL: t3:
28; CHECK: ldr d0, [x0]
29; CHECK: scvtf d0, d0
30  %tmp1 = load i64* %src, align 4
31  %tmp2 = sitofp i64 %tmp1 to double
32  ret double %tmp2
33}
34
35define double @t4(i64* nocapture %src) nounwind ssp {
36entry:
37; CHECK-LABEL: t4:
38; CHECK: ldr d0, [x0]
39; CHECK: ucvtf d0, d0
40  %tmp1 = load i64* %src, align 4
41  %tmp2 = uitofp i64 %tmp1 to double
42  ret double %tmp2
43}
44
45; rdar://13136456
46define double @t5(i32* nocapture %src) nounwind ssp optsize {
47entry:
48; CHECK-LABEL: t5:
49; CHECK: ldr [[REG:w[0-9]+]], [x0]
50; CHECK: scvtf d0, [[REG]]
51  %tmp1 = load i32* %src, align 4
52  %tmp2 = sitofp i32 %tmp1 to double
53  ret double %tmp2
54}
55
56; Check that we load in FP register when we want to convert into
57; floating point value.
58; This is much faster than loading on GPR and making the conversion
59; GPR -> FPR.
60; <rdar://problem/14599607>
61;
62; Check the flollowing patterns for signed/unsigned:
63; 1. load with scaled imm to float.
64; 2. load with scaled register to float.
65; 3. load with scaled imm to double.
66; 4. load with scaled register to double.
67; 5. load with unscaled imm to float.
68; 6. load with unscaled imm to double.
69; With loading size: 8, 16, 32, and 64-bits.
70
71; ********* 1. load with scaled imm to float. *********
72define float @fct1(i8* nocapture %sp0) {
73; CHECK-LABEL: fct1:
74; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
75; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
76; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
77entry:
78  %addr = getelementptr i8* %sp0, i64 1
79  %pix_sp0.0.copyload = load i8* %addr, align 1
80  %val = uitofp i8 %pix_sp0.0.copyload to float
81  %vmull.i = fmul float %val, %val
82  ret float %vmull.i
83}
84
85define float @fct2(i16* nocapture %sp0) {
86; CHECK-LABEL: fct2:
87; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
88; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
89; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
90entry:
91  %addr = getelementptr i16* %sp0, i64 1
92  %pix_sp0.0.copyload = load i16* %addr, align 1
93  %val = uitofp i16 %pix_sp0.0.copyload to float
94  %vmull.i = fmul float %val, %val
95  ret float %vmull.i
96}
97
98define float @fct3(i32* nocapture %sp0) {
99; CHECK-LABEL: fct3:
100; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
101; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
102; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
103entry:
104  %addr = getelementptr i32* %sp0, i64 1
105  %pix_sp0.0.copyload = load i32* %addr, align 1
106  %val = uitofp i32 %pix_sp0.0.copyload to float
107  %vmull.i = fmul float %val, %val
108  ret float %vmull.i
109}
110
111; i64 -> f32 is not supported on floating point unit.
112define float @fct4(i64* nocapture %sp0) {
113; CHECK-LABEL: fct4:
114; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8]
115; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
116; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
117entry:
118  %addr = getelementptr i64* %sp0, i64 1
119  %pix_sp0.0.copyload = load i64* %addr, align 1
120  %val = uitofp i64 %pix_sp0.0.copyload to float
121  %vmull.i = fmul float %val, %val
122  ret float %vmull.i
123}
124
125; ********* 2. load with scaled register to float. *********
126define float @fct5(i8* nocapture %sp0, i64 %offset) {
127; CHECK-LABEL: fct5:
128; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
129; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
130; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
131entry:
132  %addr = getelementptr i8* %sp0, i64 %offset
133  %pix_sp0.0.copyload = load i8* %addr, align 1
134  %val = uitofp i8 %pix_sp0.0.copyload to float
135  %vmull.i = fmul float %val, %val
136  ret float %vmull.i
137}
138
139define float @fct6(i16* nocapture %sp0, i64 %offset) {
140; CHECK-LABEL: fct6:
141; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
142; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
143; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
144entry:
145  %addr = getelementptr i16* %sp0, i64 %offset
146  %pix_sp0.0.copyload = load i16* %addr, align 1
147  %val = uitofp i16 %pix_sp0.0.copyload to float
148  %vmull.i = fmul float %val, %val
149  ret float %vmull.i
150}
151
152define float @fct7(i32* nocapture %sp0, i64 %offset) {
153; CHECK-LABEL: fct7:
154; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
155; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
156; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
157entry:
158  %addr = getelementptr i32* %sp0, i64 %offset
159  %pix_sp0.0.copyload = load i32* %addr, align 1
160  %val = uitofp i32 %pix_sp0.0.copyload to float
161  %vmull.i = fmul float %val, %val
162  ret float %vmull.i
163}
164
165; i64 -> f32 is not supported on floating point unit.
166define float @fct8(i64* nocapture %sp0, i64 %offset) {
167; CHECK-LABEL: fct8:
168; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
169; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
170; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
171entry:
172  %addr = getelementptr i64* %sp0, i64 %offset
173  %pix_sp0.0.copyload = load i64* %addr, align 1
174  %val = uitofp i64 %pix_sp0.0.copyload to float
175  %vmull.i = fmul float %val, %val
176  ret float %vmull.i
177}
178
179
180; ********* 3. load with scaled imm to double. *********
181define double @fct9(i8* nocapture %sp0) {
182; CHECK-LABEL: fct9:
183; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
184; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
185; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
186entry:
187  %addr = getelementptr i8* %sp0, i64 1
188  %pix_sp0.0.copyload = load i8* %addr, align 1
189  %val = uitofp i8 %pix_sp0.0.copyload to double
190  %vmull.i = fmul double %val, %val
191  ret double %vmull.i
192}
193
194define double @fct10(i16* nocapture %sp0) {
195; CHECK-LABEL: fct10:
196; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
197; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
198; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
199entry:
200  %addr = getelementptr i16* %sp0, i64 1
201  %pix_sp0.0.copyload = load i16* %addr, align 1
202  %val = uitofp i16 %pix_sp0.0.copyload to double
203  %vmull.i = fmul double %val, %val
204  ret double %vmull.i
205}
206
207define double @fct11(i32* nocapture %sp0) {
208; CHECK-LABEL: fct11:
209; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
210; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
211; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
212entry:
213  %addr = getelementptr i32* %sp0, i64 1
214  %pix_sp0.0.copyload = load i32* %addr, align 1
215  %val = uitofp i32 %pix_sp0.0.copyload to double
216  %vmull.i = fmul double %val, %val
217  ret double %vmull.i
218}
219
220define double @fct12(i64* nocapture %sp0) {
221; CHECK-LABEL: fct12:
222; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
223; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
224; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
225entry:
226  %addr = getelementptr i64* %sp0, i64 1
227  %pix_sp0.0.copyload = load i64* %addr, align 1
228  %val = uitofp i64 %pix_sp0.0.copyload to double
229  %vmull.i = fmul double %val, %val
230  ret double %vmull.i
231}
232
233; ********* 4. load with scaled register to double. *********
234define double @fct13(i8* nocapture %sp0, i64 %offset) {
235; CHECK-LABEL: fct13:
236; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
237; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
238; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
239entry:
240  %addr = getelementptr i8* %sp0, i64 %offset
241  %pix_sp0.0.copyload = load i8* %addr, align 1
242  %val = uitofp i8 %pix_sp0.0.copyload to double
243  %vmull.i = fmul double %val, %val
244  ret double %vmull.i
245}
246
247define double @fct14(i16* nocapture %sp0, i64 %offset) {
248; CHECK-LABEL: fct14:
249; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
250; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
251; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
252entry:
253  %addr = getelementptr i16* %sp0, i64 %offset
254  %pix_sp0.0.copyload = load i16* %addr, align 1
255  %val = uitofp i16 %pix_sp0.0.copyload to double
256  %vmull.i = fmul double %val, %val
257  ret double %vmull.i
258}
259
260define double @fct15(i32* nocapture %sp0, i64 %offset) {
261; CHECK-LABEL: fct15:
262; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
263; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
264; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
265entry:
266  %addr = getelementptr i32* %sp0, i64 %offset
267  %pix_sp0.0.copyload = load i32* %addr, align 1
268  %val = uitofp i32 %pix_sp0.0.copyload to double
269  %vmull.i = fmul double %val, %val
270  ret double %vmull.i
271}
272
273define double @fct16(i64* nocapture %sp0, i64 %offset) {
274; CHECK-LABEL: fct16:
275; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
276; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
277; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
278entry:
279  %addr = getelementptr i64* %sp0, i64 %offset
280  %pix_sp0.0.copyload = load i64* %addr, align 1
281  %val = uitofp i64 %pix_sp0.0.copyload to double
282  %vmull.i = fmul double %val, %val
283  ret double %vmull.i
284}
285
286; ********* 5. load with unscaled imm to float. *********
287define float @fct17(i8* nocapture %sp0) {
288entry:
289; CHECK-LABEL: fct17:
290; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
291; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
292; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
293  %bitcast = ptrtoint i8* %sp0 to i64
294  %add = add i64 %bitcast, -1
295  %addr = inttoptr i64 %add to i8*
296  %pix_sp0.0.copyload = load i8* %addr, align 1
297  %val = uitofp i8 %pix_sp0.0.copyload to float
298  %vmull.i = fmul float %val, %val
299  ret float %vmull.i
300}
301
302define float @fct18(i16* nocapture %sp0) {
303; CHECK-LABEL: fct18:
304; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
305; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
306; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
307  %bitcast = ptrtoint i16* %sp0 to i64
308  %add = add i64 %bitcast, 1
309  %addr = inttoptr i64 %add to i16*
310  %pix_sp0.0.copyload = load i16* %addr, align 1
311  %val = uitofp i16 %pix_sp0.0.copyload to float
312  %vmull.i = fmul float %val, %val
313  ret float %vmull.i
314}
315
316define float @fct19(i32* nocapture %sp0) {
317; CHECK-LABEL: fct19:
318; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
319; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
320; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
321  %bitcast = ptrtoint i32* %sp0 to i64
322  %add = add i64 %bitcast, 1
323  %addr = inttoptr i64 %add to i32*
324  %pix_sp0.0.copyload = load i32* %addr, align 1
325  %val = uitofp i32 %pix_sp0.0.copyload to float
326  %vmull.i = fmul float %val, %val
327  ret float %vmull.i
328}
329
330; i64 -> f32 is not supported on floating point unit.
331define float @fct20(i64* nocapture %sp0) {
332; CHECK-LABEL: fct20:
333; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1]
334; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
335; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
336  %bitcast = ptrtoint i64* %sp0 to i64
337  %add = add i64 %bitcast, 1
338  %addr = inttoptr i64 %add to i64*
339  %pix_sp0.0.copyload = load i64* %addr, align 1
340  %val = uitofp i64 %pix_sp0.0.copyload to float
341  %vmull.i = fmul float %val, %val
342  ret float %vmull.i
343
344}
345
346; ********* 6. load with unscaled imm to double. *********
347define double @fct21(i8* nocapture %sp0) {
348entry:
349; CHECK-LABEL: fct21:
350; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
351; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
352; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
353  %bitcast = ptrtoint i8* %sp0 to i64
354  %add = add i64 %bitcast, -1
355  %addr = inttoptr i64 %add to i8*
356  %pix_sp0.0.copyload = load i8* %addr, align 1
357  %val = uitofp i8 %pix_sp0.0.copyload to double
358  %vmull.i = fmul double %val, %val
359  ret double %vmull.i
360}
361
362define double @fct22(i16* nocapture %sp0) {
363; CHECK-LABEL: fct22:
364; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
365; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
366; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
367  %bitcast = ptrtoint i16* %sp0 to i64
368  %add = add i64 %bitcast, 1
369  %addr = inttoptr i64 %add to i16*
370  %pix_sp0.0.copyload = load i16* %addr, align 1
371  %val = uitofp i16 %pix_sp0.0.copyload to double
372  %vmull.i = fmul double %val, %val
373  ret double %vmull.i
374}
375
376define double @fct23(i32* nocapture %sp0) {
377; CHECK-LABEL: fct23:
378; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
379; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
380; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
381  %bitcast = ptrtoint i32* %sp0 to i64
382  %add = add i64 %bitcast, 1
383  %addr = inttoptr i64 %add to i32*
384  %pix_sp0.0.copyload = load i32* %addr, align 1
385  %val = uitofp i32 %pix_sp0.0.copyload to double
386  %vmull.i = fmul double %val, %val
387  ret double %vmull.i
388}
389
390define double @fct24(i64* nocapture %sp0) {
391; CHECK-LABEL: fct24:
392; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1]
393; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
394; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
395  %bitcast = ptrtoint i64* %sp0 to i64
396  %add = add i64 %bitcast, 1
397  %addr = inttoptr i64 %add to i64*
398  %pix_sp0.0.copyload = load i64* %addr, align 1
399  %val = uitofp i64 %pix_sp0.0.copyload to double
400  %vmull.i = fmul double %val, %val
401  ret double %vmull.i
402
403}
404
405; ********* 1s. load with scaled imm to float. *********
406define float @sfct1(i8* nocapture %sp0) {
407; CHECK-LABEL: sfct1:
408; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
409; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
410; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
411; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
412; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
413; CHECK-A57-LABEL: sfct1:
414; CHECK-A57: ldrsb w[[REGNUM:[0-9]+]], [x0, #1]
415; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
416; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
417entry:
418  %addr = getelementptr i8* %sp0, i64 1
419  %pix_sp0.0.copyload = load i8* %addr, align 1
420  %val = sitofp i8 %pix_sp0.0.copyload to float
421  %vmull.i = fmul float %val, %val
422  ret float %vmull.i
423}
424
425define float @sfct2(i16* nocapture %sp0) {
426; CHECK-LABEL: sfct2:
427; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
428; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
429; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
430; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
431entry:
432  %addr = getelementptr i16* %sp0, i64 1
433  %pix_sp0.0.copyload = load i16* %addr, align 1
434  %val = sitofp i16 %pix_sp0.0.copyload to float
435  %vmull.i = fmul float %val, %val
436  ret float %vmull.i
437}
438
439define float @sfct3(i32* nocapture %sp0) {
440; CHECK-LABEL: sfct3:
441; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
442; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
443; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
444entry:
445  %addr = getelementptr i32* %sp0, i64 1
446  %pix_sp0.0.copyload = load i32* %addr, align 1
447  %val = sitofp i32 %pix_sp0.0.copyload to float
448  %vmull.i = fmul float %val, %val
449  ret float %vmull.i
450}
451
452; i64 -> f32 is not supported on floating point unit.
453define float @sfct4(i64* nocapture %sp0) {
454; CHECK-LABEL: sfct4:
455; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8]
456; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
457; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
458entry:
459  %addr = getelementptr i64* %sp0, i64 1
460  %pix_sp0.0.copyload = load i64* %addr, align 1
461  %val = sitofp i64 %pix_sp0.0.copyload to float
462  %vmull.i = fmul float %val, %val
463  ret float %vmull.i
464}
465
466; ********* 2s. load with scaled register to float. *********
467define float @sfct5(i8* nocapture %sp0, i64 %offset) {
468; CHECK-LABEL: sfct5:
469; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
470; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
471; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
472; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
473; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
474; CHECK-A57-LABEL: sfct5:
475; CHECK-A57: ldrsb w[[REGNUM:[0-9]+]], [x0, x1]
476; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
477; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
478entry:
479  %addr = getelementptr i8* %sp0, i64 %offset
480  %pix_sp0.0.copyload = load i8* %addr, align 1
481  %val = sitofp i8 %pix_sp0.0.copyload to float
482  %vmull.i = fmul float %val, %val
483  ret float %vmull.i
484}
485
486define float @sfct6(i16* nocapture %sp0, i64 %offset) {
487; CHECK-LABEL: sfct6:
488; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
489; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
490; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
491; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
492entry:
493  %addr = getelementptr i16* %sp0, i64 %offset
494  %pix_sp0.0.copyload = load i16* %addr, align 1
495  %val = sitofp i16 %pix_sp0.0.copyload to float
496  %vmull.i = fmul float %val, %val
497  ret float %vmull.i
498}
499
500define float @sfct7(i32* nocapture %sp0, i64 %offset) {
501; CHECK-LABEL: sfct7:
502; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
503; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
504; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
505entry:
506  %addr = getelementptr i32* %sp0, i64 %offset
507  %pix_sp0.0.copyload = load i32* %addr, align 1
508  %val = sitofp i32 %pix_sp0.0.copyload to float
509  %vmull.i = fmul float %val, %val
510  ret float %vmull.i
511}
512
513; i64 -> f32 is not supported on floating point unit.
514define float @sfct8(i64* nocapture %sp0, i64 %offset) {
515; CHECK-LABEL: sfct8:
516; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
517; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
518; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
519entry:
520  %addr = getelementptr i64* %sp0, i64 %offset
521  %pix_sp0.0.copyload = load i64* %addr, align 1
522  %val = sitofp i64 %pix_sp0.0.copyload to float
523  %vmull.i = fmul float %val, %val
524  ret float %vmull.i
525}
526
527; ********* 3s. load with scaled imm to double. *********
528define double @sfct9(i8* nocapture %sp0) {
529; CHECK-LABEL: sfct9:
530; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, #1]
531; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
532; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
533entry:
534  %addr = getelementptr i8* %sp0, i64 1
535  %pix_sp0.0.copyload = load i8* %addr, align 1
536  %val = sitofp i8 %pix_sp0.0.copyload to double
537  %vmull.i = fmul double %val, %val
538  ret double %vmull.i
539}
540
541define double @sfct10(i16* nocapture %sp0) {
542; CHECK-LABEL: sfct10:
543; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
544; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
545; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
546; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
547; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
548; CHECK-A57-LABEL: sfct10:
549; CHECK-A57: ldrsh w[[REGNUM:[0-9]+]], [x0, #2]
550; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
551; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
552entry:
553  %addr = getelementptr i16* %sp0, i64 1
554  %pix_sp0.0.copyload = load i16* %addr, align 1
555  %val = sitofp i16 %pix_sp0.0.copyload to double
556  %vmull.i = fmul double %val, %val
557  ret double %vmull.i
558}
559
560define double @sfct11(i32* nocapture %sp0) {
561; CHECK-LABEL: sfct11:
562; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
563; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
564; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
565; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
566entry:
567  %addr = getelementptr i32* %sp0, i64 1
568  %pix_sp0.0.copyload = load i32* %addr, align 1
569  %val = sitofp i32 %pix_sp0.0.copyload to double
570  %vmull.i = fmul double %val, %val
571  ret double %vmull.i
572}
573
574define double @sfct12(i64* nocapture %sp0) {
575; CHECK-LABEL: sfct12:
576; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
577; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
578; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
579entry:
580  %addr = getelementptr i64* %sp0, i64 1
581  %pix_sp0.0.copyload = load i64* %addr, align 1
582  %val = sitofp i64 %pix_sp0.0.copyload to double
583  %vmull.i = fmul double %val, %val
584  ret double %vmull.i
585}
586
587; ********* 4s. load with scaled register to double. *********
588define double @sfct13(i8* nocapture %sp0, i64 %offset) {
589; CHECK-LABEL: sfct13:
590; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, x1]
591; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
592; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
593entry:
594  %addr = getelementptr i8* %sp0, i64 %offset
595  %pix_sp0.0.copyload = load i8* %addr, align 1
596  %val = sitofp i8 %pix_sp0.0.copyload to double
597  %vmull.i = fmul double %val, %val
598  ret double %vmull.i
599}
600
601define double @sfct14(i16* nocapture %sp0, i64 %offset) {
602; CHECK-LABEL: sfct14:
603; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
604; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
605; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
606; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
607; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
608; CHECK-A57-LABEL: sfct14:
609; CHECK-A57: ldrsh w[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
610; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
611; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
612entry:
613  %addr = getelementptr i16* %sp0, i64 %offset
614  %pix_sp0.0.copyload = load i16* %addr, align 1
615  %val = sitofp i16 %pix_sp0.0.copyload to double
616  %vmull.i = fmul double %val, %val
617  ret double %vmull.i
618}
619
620define double @sfct15(i32* nocapture %sp0, i64 %offset) {
621; CHECK-LABEL: sfct15:
622; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
623; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
624; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
625; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
626entry:
627  %addr = getelementptr i32* %sp0, i64 %offset
628  %pix_sp0.0.copyload = load i32* %addr, align 1
629  %val = sitofp i32 %pix_sp0.0.copyload to double
630  %vmull.i = fmul double %val, %val
631  ret double %vmull.i
632}
633
634define double @sfct16(i64* nocapture %sp0, i64 %offset) {
635; CHECK-LABEL: sfct16:
636; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
637; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
638; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
639entry:
640  %addr = getelementptr i64* %sp0, i64 %offset
641  %pix_sp0.0.copyload = load i64* %addr, align 1
642  %val = sitofp i64 %pix_sp0.0.copyload to double
643  %vmull.i = fmul double %val, %val
644  ret double %vmull.i
645}
646
647; ********* 5s. load with unscaled imm to float. *********
648define float @sfct17(i8* nocapture %sp0) {
649entry:
650; CHECK-LABEL: sfct17:
651; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
652; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
653; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
654; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
655; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
656; CHECK-A57-LABEL: sfct17:
657; CHECK-A57: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
658; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
659; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
660  %bitcast = ptrtoint i8* %sp0 to i64
661  %add = add i64 %bitcast, -1
662  %addr = inttoptr i64 %add to i8*
663  %pix_sp0.0.copyload = load i8* %addr, align 1
664  %val = sitofp i8 %pix_sp0.0.copyload to float
665  %vmull.i = fmul float %val, %val
666  ret float %vmull.i
667}
668
669define float @sfct18(i16* nocapture %sp0) {
670; CHECK-LABEL: sfct18:
671; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
672; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
673; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
674; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
675  %bitcast = ptrtoint i16* %sp0 to i64
676  %add = add i64 %bitcast, 1
677  %addr = inttoptr i64 %add to i16*
678  %pix_sp0.0.copyload = load i16* %addr, align 1
679  %val = sitofp i16 %pix_sp0.0.copyload to float
680  %vmull.i = fmul float %val, %val
681  ret float %vmull.i
682}
683
684define float @sfct19(i32* nocapture %sp0) {
685; CHECK-LABEL: sfct19:
686; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
687; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
688; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
689  %bitcast = ptrtoint i32* %sp0 to i64
690  %add = add i64 %bitcast, 1
691  %addr = inttoptr i64 %add to i32*
692  %pix_sp0.0.copyload = load i32* %addr, align 1
693  %val = sitofp i32 %pix_sp0.0.copyload to float
694  %vmull.i = fmul float %val, %val
695  ret float %vmull.i
696}
697
698; i64 -> f32 is not supported on floating point unit.
699define float @sfct20(i64* nocapture %sp0) {
700; CHECK-LABEL: sfct20:
701; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1]
702; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
703; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
704  %bitcast = ptrtoint i64* %sp0 to i64
705  %add = add i64 %bitcast, 1
706  %addr = inttoptr i64 %add to i64*
707  %pix_sp0.0.copyload = load i64* %addr, align 1
708  %val = sitofp i64 %pix_sp0.0.copyload to float
709  %vmull.i = fmul float %val, %val
710  ret float %vmull.i
711
712}
713
714; ********* 6s. load with unscaled imm to double. *********
715define double @sfct21(i8* nocapture %sp0) {
716entry:
717; CHECK-LABEL: sfct21:
718; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
719; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
720; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
721  %bitcast = ptrtoint i8* %sp0 to i64
722  %add = add i64 %bitcast, -1
723  %addr = inttoptr i64 %add to i8*
724  %pix_sp0.0.copyload = load i8* %addr, align 1
725  %val = sitofp i8 %pix_sp0.0.copyload to double
726  %vmull.i = fmul double %val, %val
727  ret double %vmull.i
728}
729
730define double @sfct22(i16* nocapture %sp0) {
731; CHECK-LABEL: sfct22:
732; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
733; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
734; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
735; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
736; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
737; CHECK-A57-LABEL: sfct22:
738; CHECK-A57: ldursh w[[REGNUM:[0-9]+]], [x0, #1]
739; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
740; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
741  %bitcast = ptrtoint i16* %sp0 to i64
742  %add = add i64 %bitcast, 1
743  %addr = inttoptr i64 %add to i16*
744  %pix_sp0.0.copyload = load i16* %addr, align 1
745  %val = sitofp i16 %pix_sp0.0.copyload to double
746  %vmull.i = fmul double %val, %val
747  ret double %vmull.i
748}
749
750define double @sfct23(i32* nocapture %sp0) {
751; CHECK-LABEL: sfct23:
752; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
753; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
754; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
755; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
756  %bitcast = ptrtoint i32* %sp0 to i64
757  %add = add i64 %bitcast, 1
758  %addr = inttoptr i64 %add to i32*
759  %pix_sp0.0.copyload = load i32* %addr, align 1
760  %val = sitofp i32 %pix_sp0.0.copyload to double
761  %vmull.i = fmul double %val, %val
762  ret double %vmull.i
763}
764
765define double @sfct24(i64* nocapture %sp0) {
766; CHECK-LABEL: sfct24:
767; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1]
768; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
769; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
770  %bitcast = ptrtoint i64* %sp0 to i64
771  %add = add i64 %bitcast, 1
772  %addr = inttoptr i64 %add to i64*
773  %pix_sp0.0.copyload = load i64* %addr, align 1
774  %val = sitofp i64 %pix_sp0.0.copyload to double
775  %vmull.i = fmul double %val, %val
776  ret double %vmull.i
777
778}
779
780; Check that we do not use SSHLL code sequence when code size is a concern.
781define float @codesize_sfct17(i8* nocapture %sp0) optsize {
782entry:
783; CHECK-LABEL: codesize_sfct17:
784; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
785; CHECK-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
786; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
787  %bitcast = ptrtoint i8* %sp0 to i64
788  %add = add i64 %bitcast, -1
789  %addr = inttoptr i64 %add to i8*
790  %pix_sp0.0.copyload = load i8* %addr, align 1
791  %val = sitofp i8 %pix_sp0.0.copyload to float
792  %vmull.i = fmul float %val, %val
793  ret float %vmull.i
794}
795
796define double @codesize_sfct11(i32* nocapture %sp0) minsize {
797; CHECK-LABEL: sfct11:
798; CHECK: ldr w[[REGNUM:[0-9]+]], [x0, #4]
799; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
800; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
801entry:
802  %addr = getelementptr i32* %sp0, i64 1
803  %pix_sp0.0.copyload = load i32* %addr, align 1
804  %val = sitofp i32 %pix_sp0.0.copyload to double
805  %vmull.i = fmul double %val, %val
806  ret double %vmull.i
807}
808
809; Adding fp128 custom lowering makes these a little fragile since we have to
810; return the correct mix of Legal/Expand from the custom method.
811;
812; rdar://problem/14991489
813
814define float @float_from_i128(i128 %in) {
815; CHECK-LABEL: float_from_i128:
816; CHECK: bl {{_?__floatuntisf}}
817  %conv = uitofp i128 %in to float
818  ret float %conv
819}
820
821define double @double_from_i128(i128 %in) {
822; CHECK-LABEL: double_from_i128:
823; CHECK: bl {{_?__floattidf}}
824  %conv = sitofp i128 %in to double
825  ret double %conv
826}
827
828define fp128 @fp128_from_i128(i128 %in) {
829; CHECK-LABEL: fp128_from_i128:
830; CHECK: bl {{_?__floatuntitf}}
831  %conv = uitofp i128 %in to fp128
832  ret fp128 %conv
833}
834
835define i128 @i128_from_float(float %in) {
836; CHECK-LABEL: i128_from_float
837; CHECK: bl {{_?__fixsfti}}
838  %conv = fptosi float %in to i128
839  ret i128 %conv
840}
841
842define i128 @i128_from_double(double %in) {
843; CHECK-LABEL: i128_from_double
844; CHECK: bl {{_?__fixunsdfti}}
845  %conv = fptoui double %in to i128
846  ret i128 %conv
847}
848
849define i128 @i128_from_fp128(fp128 %in) {
850; CHECK-LABEL: i128_from_fp128
851; CHECK: bl {{_?__fixtfti}}
852  %conv = fptosi fp128 %in to i128
853  ret i128 %conv
854}
855
856