1; REQUIRES: asserts
2; The regression tests need to test for order of emitted instructions, and
3; therefore, the tests are a bit fragile/reliant on instruction scheduling. The
4; test cases have been minimized as much as possible, but still most of the test
5; cases could break if instruction scheduling heuristics for cortex-a53 change
6; RUN: llc < %s -mcpu=cortex-a53 -aarch64-fix-cortex-a53-835769=1 -stats 2>&1 \
7; RUN:  | FileCheck %s --check-prefix CHECK
8; RUN: llc < %s -mcpu=cortex-a53 -aarch64-fix-cortex-a53-835769=0 -stats 2>&1 \
9; RUN:  | FileCheck %s --check-prefix CHECK-NOWORKAROUND
10; The following run lines are just to verify whether or not this pass runs by
11; default for given CPUs. Given the fragility of the tests, this is only run on
12; a test case where the scheduler has not freedom at all to reschedule the
13; instructions, so the potentially massively different scheduling heuristics
14; will not break the test case.
15; RUN: llc < %s -mcpu=generic    | FileCheck %s --check-prefix CHECK-BASIC-PASS-DISABLED
16; RUN: llc < %s -mcpu=cortex-a53 | FileCheck %s --check-prefix CHECK-BASIC-PASS-DISABLED
17; RUN: llc < %s -mcpu=cortex-a57 | FileCheck %s --check-prefix CHECK-BASIC-PASS-DISABLED
18; RUN: llc < %s -mcpu=cyclone    | FileCheck %s --check-prefix CHECK-BASIC-PASS-DISABLED
19
20target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
21target triple = "aarch64--linux-gnu"
22
23define i64 @f_load_madd_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
24entry:
25  %0 = load i64* %c, align 8
26  %mul = mul nsw i64 %0, %b
27  %add = add nsw i64 %mul, %a
28  ret i64 %add
29}
30; CHECK-LABEL: f_load_madd_64:
31; CHECK:	ldr
32; CHECK-NEXT:	nop
33; CHECK-NEXT:	madd
34; CHECK-NOWORKAROUND-LABEL: f_load_madd_64:
35; CHECK-NOWORKAROUND:	ldr
36; CHECK-NOWORKAROUND-NEXT:	madd
37; CHECK-BASIC-PASS-DISABLED-LABEL: f_load_madd_64:
38; CHECK-BASIC-PASS-DISABLED:  ldr
39; CHECK-BASIC-PASS-DISABLED-NEXT:  madd
40
41
42define i32 @f_load_madd_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
43entry:
44  %0 = load i32* %c, align 4
45  %mul = mul nsw i32 %0, %b
46  %add = add nsw i32 %mul, %a
47  ret i32 %add
48}
49; CHECK-LABEL: f_load_madd_32:
50; CHECK:	ldr
51; CHECK-NEXT:	madd
52; CHECK-NOWORKAROUND-LABEL: f_load_madd_32:
53; CHECK-NOWORKAROUND:	ldr
54; CHECK-NOWORKAROUND-NEXT:	madd
55
56
57define i64 @f_load_msub_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
58entry:
59  %0 = load i64* %c, align 8
60  %mul = mul nsw i64 %0, %b
61  %sub = sub nsw i64 %a, %mul
62  ret i64 %sub
63}
64; CHECK-LABEL: f_load_msub_64:
65; CHECK:	ldr
66; CHECK-NEXT:	nop
67; CHECK-NEXT:	msub
68; CHECK-NOWORKAROUND-LABEL: f_load_msub_64:
69; CHECK-NOWORKAROUND:	ldr
70; CHECK-NOWORKAROUND-NEXT:	msub
71
72
73define i32 @f_load_msub_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
74entry:
75  %0 = load i32* %c, align 4
76  %mul = mul nsw i32 %0, %b
77  %sub = sub nsw i32 %a, %mul
78  ret i32 %sub
79}
80; CHECK-LABEL: f_load_msub_32:
81; CHECK:	ldr
82; CHECK-NEXT:	msub
83; CHECK-NOWORKAROUND-LABEL: f_load_msub_32:
84; CHECK-NOWORKAROUND:	ldr
85; CHECK-NOWORKAROUND-NEXT:	msub
86
87
88define i64 @f_load_mul_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
89entry:
90  %0 = load i64* %c, align 8
91  %mul = mul nsw i64 %0, %b
92  ret i64 %mul
93}
94; CHECK-LABEL: f_load_mul_64:
95; CHECK:	ldr
96; CHECK-NEXT:	mul
97; CHECK-NOWORKAROUND-LABEL: f_load_mul_64:
98; CHECK-NOWORKAROUND:	ldr
99; CHECK-NOWORKAROUND-NEXT:	mul
100
101
102define i32 @f_load_mul_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
103entry:
104  %0 = load i32* %c, align 4
105  %mul = mul nsw i32 %0, %b
106  ret i32 %mul
107}
108; CHECK-LABEL: f_load_mul_32:
109; CHECK:	ldr
110; CHECK-NEXT:	mul
111; CHECK-NOWORKAROUND-LABEL: f_load_mul_32:
112; CHECK-NOWORKAROUND:	ldr
113; CHECK-NOWORKAROUND-NEXT:	mul
114
115
116define i64 @f_load_mneg_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
117entry:
118  %0 = load i64* %c, align 8
119  %mul = sub i64 0, %b
120  %sub = mul i64 %0, %mul
121  ret i64 %sub
122}
123; CHECK-LABEL: f_load_mneg_64:
124; CHECK-NOWORKAROUND-LABEL: f_load_mneg_64:
125; FIXME: only add further checks here once LLVM actually produces
126;        neg instructions
127; FIXME-CHECK: ldr
128; FIXME-CHECK-NEXT: nop
129; FIXME-CHECK-NEXT: mneg
130; FIXME-CHECK-NOWORKAROUND: ldr
131; FIXME-CHECK-NOWORKAROUND-NEXT: mneg
132
133
134define i32 @f_load_mneg_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
135entry:
136  %0 = load i32* %c, align 4
137  %mul = sub i32 0, %b
138  %sub = mul i32 %0, %mul
139  ret i32 %sub
140}
141; CHECK-LABEL: f_load_mneg_32:
142; CHECK-NOWORKAROUND-LABEL: f_load_mneg_32:
143; FIXME: only add further checks here once LLVM actually produces
144;        neg instructions
145; FIXME-CHECK: ldr
146; FIXME-CHECK-NEXT: mneg
147; FIXME-CHECK-NOWORKAROUND: ldr
148; FIXME-CHECK-NOWORKAROUND-NEXT: mneg
149
150
151define i64 @f_load_smaddl(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
152entry:
153  %conv = sext i32 %b to i64
154  %conv1 = sext i32 %c to i64
155  %mul = mul nsw i64 %conv1, %conv
156  %add = add nsw i64 %mul, %a
157  %0 = load i32* %d, align 4
158  %conv2 = sext i32 %0 to i64
159  %add3 = add nsw i64 %add, %conv2
160  ret i64 %add3
161}
162; CHECK-LABEL: f_load_smaddl:
163; CHECK:	ldrsw
164; CHECK-NEXT:	nop
165; CHECK-NEXT:	smaddl
166; CHECK-NOWORKAROUND-LABEL: f_load_smaddl:
167; CHECK-NOWORKAROUND:	ldrsw
168; CHECK-NOWORKAROUND-NEXT:	smaddl
169
170
171define i64 @f_load_smsubl_64(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
172entry:
173  %conv = sext i32 %b to i64
174  %conv1 = sext i32 %c to i64
175  %mul = mul nsw i64 %conv1, %conv
176  %sub = sub i64 %a, %mul
177  %0 = load i32* %d, align 4
178  %conv2 = sext i32 %0 to i64
179  %add = add nsw i64 %sub, %conv2
180  ret i64 %add
181}
182; CHECK-LABEL: f_load_smsubl_64:
183; CHECK:	ldrsw
184; CHECK-NEXT:	nop
185; CHECK-NEXT:	smsubl
186; CHECK-NOWORKAROUND-LABEL: f_load_smsubl_64:
187; CHECK-NOWORKAROUND:	ldrsw
188; CHECK-NOWORKAROUND-NEXT:	smsubl
189
190
191define i64 @f_load_smull(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
192entry:
193  %conv = sext i32 %b to i64
194  %conv1 = sext i32 %c to i64
195  %mul = mul nsw i64 %conv1, %conv
196  %0 = load i32* %d, align 4
197  %conv2 = sext i32 %0 to i64
198  %div = sdiv i64 %mul, %conv2
199  ret i64 %div
200}
201; CHECK-LABEL: f_load_smull:
202; CHECK:	ldrsw
203; CHECK-NEXT:	smull
204; CHECK-NOWORKAROUND-LABEL: f_load_smull:
205; CHECK-NOWORKAROUND:	ldrsw
206; CHECK-NOWORKAROUND-NEXT:	smull
207
208
209define i64 @f_load_smnegl_64(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
210entry:
211  %conv = sext i32 %b to i64
212  %conv1 = sext i32 %c to i64
213  %mul = sub nsw i64 0, %conv
214  %sub = mul i64 %conv1, %mul
215  %0 = load i32* %d, align 4
216  %conv2 = sext i32 %0 to i64
217  %div = sdiv i64 %sub, %conv2
218  ret i64 %div
219}
220; CHECK-LABEL: f_load_smnegl_64:
221; CHECK-NOWORKAROUND-LABEL: f_load_smnegl_64:
222; FIXME: only add further checks here once LLVM actually produces
223;        smnegl instructions
224
225
226define i64 @f_load_umaddl(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
227entry:
228  %conv = zext i32 %b to i64
229  %conv1 = zext i32 %c to i64
230  %mul = mul i64 %conv1, %conv
231  %add = add i64 %mul, %a
232  %0 = load i32* %d, align 4
233  %conv2 = zext i32 %0 to i64
234  %add3 = add i64 %add, %conv2
235  ret i64 %add3
236}
237; CHECK-LABEL: f_load_umaddl:
238; CHECK:	ldr
239; CHECK-NEXT:	nop
240; CHECK-NEXT:	umaddl
241; CHECK-NOWORKAROUND-LABEL: f_load_umaddl:
242; CHECK-NOWORKAROUND:	ldr
243; CHECK-NOWORKAROUND-NEXT:	umaddl
244
245
246define i64 @f_load_umsubl_64(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
247entry:
248  %conv = zext i32 %b to i64
249  %conv1 = zext i32 %c to i64
250  %mul = mul i64 %conv1, %conv
251  %sub = sub i64 %a, %mul
252  %0 = load i32* %d, align 4
253  %conv2 = zext i32 %0 to i64
254  %add = add i64 %sub, %conv2
255  ret i64 %add
256}
257; CHECK-LABEL: f_load_umsubl_64:
258; CHECK:	ldr
259; CHECK-NEXT:	nop
260; CHECK-NEXT:	umsubl
261; CHECK-NOWORKAROUND-LABEL: f_load_umsubl_64:
262; CHECK-NOWORKAROUND:	ldr
263; CHECK-NOWORKAROUND-NEXT:	umsubl
264
265
266define i64 @f_load_umull(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
267entry:
268  %conv = zext i32 %b to i64
269  %conv1 = zext i32 %c to i64
270  %mul = mul i64 %conv1, %conv
271  %0 = load i32* %d, align 4
272  %conv2 = zext i32 %0 to i64
273  %div = udiv i64 %mul, %conv2
274  ret i64 %div
275}
276; CHECK-LABEL: f_load_umull:
277; CHECK:	ldr
278; CHECK-NEXT:	umull
279; CHECK-NOWORKAROUND-LABEL: f_load_umull:
280; CHECK-NOWORKAROUND:	ldr
281; CHECK-NOWORKAROUND-NEXT:	umull
282
283
284define i64 @f_load_umnegl_64(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
285entry:
286  %conv = zext i32 %b to i64
287  %conv1 = zext i32 %c to i64
288  %mul = sub nsw i64 0, %conv
289  %sub = mul i64 %conv1, %mul
290  %0 = load i32* %d, align 4
291  %conv2 = zext i32 %0 to i64
292  %div = udiv i64 %sub, %conv2
293  ret i64 %div
294}
295; CHECK-LABEL: f_load_umnegl_64:
296; CHECK-NOWORKAROUND-LABEL: f_load_umnegl_64:
297; FIXME: only add further checks here once LLVM actually produces
298;        umnegl instructions
299
300
301define i64 @f_store_madd_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
302entry:
303  %0 = load i64* %cp, align 8
304  store i64 %a, i64* %e, align 8
305  %mul = mul nsw i64 %0, %b
306  %add = add nsw i64 %mul, %a
307  ret i64 %add
308}
309; CHECK-LABEL: f_store_madd_64:
310; CHECK:	str
311; CHECK-NEXT:	nop
312; CHECK-NEXT:	madd
313; CHECK-NOWORKAROUND-LABEL: f_store_madd_64:
314; CHECK-NOWORKAROUND:	str
315; CHECK-NOWORKAROUND-NEXT:	madd
316
317
318define i32 @f_store_madd_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
319entry:
320  %0 = load i32* %cp, align 4
321  store i32 %a, i32* %e, align 4
322  %mul = mul nsw i32 %0, %b
323  %add = add nsw i32 %mul, %a
324  ret i32 %add
325}
326; CHECK-LABEL: f_store_madd_32:
327; CHECK:	str
328; CHECK-NEXT:	madd
329; CHECK-NOWORKAROUND-LABEL: f_store_madd_32:
330; CHECK-NOWORKAROUND:	str
331; CHECK-NOWORKAROUND-NEXT:	madd
332
333
334define i64 @f_store_msub_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
335entry:
336  %0 = load i64* %cp, align 8
337  store i64 %a, i64* %e, align 8
338  %mul = mul nsw i64 %0, %b
339  %sub = sub nsw i64 %a, %mul
340  ret i64 %sub
341}
342; CHECK-LABEL: f_store_msub_64:
343; CHECK:	str
344; CHECK-NEXT:	nop
345; CHECK-NEXT:	msub
346; CHECK-NOWORKAROUND-LABEL: f_store_msub_64:
347; CHECK-NOWORKAROUND:	str
348; CHECK-NOWORKAROUND-NEXT:	msub
349
350
351define i32 @f_store_msub_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
352entry:
353  %0 = load i32* %cp, align 4
354  store i32 %a, i32* %e, align 4
355  %mul = mul nsw i32 %0, %b
356  %sub = sub nsw i32 %a, %mul
357  ret i32 %sub
358}
359; CHECK-LABEL: f_store_msub_32:
360; CHECK:	str
361; CHECK-NEXT:	msub
362; CHECK-NOWORKAROUND-LABEL: f_store_msub_32:
363; CHECK-NOWORKAROUND:	str
364; CHECK-NOWORKAROUND-NEXT:	msub
365
366
367define i64 @f_store_mul_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
368entry:
369  %0 = load i64* %cp, align 8
370  store i64 %a, i64* %e, align 8
371  %mul = mul nsw i64 %0, %b
372  ret i64 %mul
373}
374; CHECK-LABEL: f_store_mul_64:
375; CHECK:	str
376; CHECK-NEXT:	mul
377; CHECK-NOWORKAROUND-LABEL: f_store_mul_64:
378; CHECK-NOWORKAROUND:	str
379; CHECK-NOWORKAROUND-NEXT:	mul
380
381
382define i32 @f_store_mul_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
383entry:
384  %0 = load i32* %cp, align 4
385  store i32 %a, i32* %e, align 4
386  %mul = mul nsw i32 %0, %b
387  ret i32 %mul
388}
389; CHECK-LABEL: f_store_mul_32:
390; CHECK:	str
391; CHECK-NEXT:	mul
392; CHECK-NOWORKAROUND-LABEL: f_store_mul_32:
393; CHECK-NOWORKAROUND:	str
394; CHECK-NOWORKAROUND-NEXT:	mul
395
396
397define i64 @f_prefetch_madd_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
398entry:
399  %0 = load i64* %cp, align 8
400  %1 = bitcast i64* %e to i8*
401  tail call void @llvm.prefetch(i8* %1, i32 0, i32 0, i32 1)
402  %mul = mul nsw i64 %0, %b
403  %add = add nsw i64 %mul, %a
404  ret i64 %add
405}
406; CHECK-LABEL: f_prefetch_madd_64:
407; CHECK:	prfm
408; CHECK-NEXT:   nop
409; CHECK-NEXT:	madd
410; CHECK-NOWORKAROUND-LABEL: f_prefetch_madd_64:
411; CHECK-NOWORKAROUND:	prfm
412; CHECK-NOWORKAROUND-NEXT:	madd
413
414declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) #2
415
416define i32 @f_prefetch_madd_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
417entry:
418  %0 = load i32* %cp, align 4
419  %1 = bitcast i32* %e to i8*
420  tail call void @llvm.prefetch(i8* %1, i32 1, i32 0, i32 1)
421  %mul = mul nsw i32 %0, %b
422  %add = add nsw i32 %mul, %a
423  ret i32 %add
424}
425; CHECK-LABEL: f_prefetch_madd_32:
426; CHECK:	prfm
427; CHECK-NEXT:	madd
428; CHECK-NOWORKAROUND-LABEL: f_prefetch_madd_32:
429; CHECK-NOWORKAROUND:	prfm
430; CHECK-NOWORKAROUND-NEXT:	madd
431
432define i64 @f_prefetch_msub_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
433entry:
434  %0 = load i64* %cp, align 8
435  %1 = bitcast i64* %e to i8*
436  tail call void @llvm.prefetch(i8* %1, i32 0, i32 1, i32 1)
437  %mul = mul nsw i64 %0, %b
438  %sub = sub nsw i64 %a, %mul
439  ret i64 %sub
440}
441; CHECK-LABEL: f_prefetch_msub_64:
442; CHECK:	prfm
443; CHECK-NEXT:   nop
444; CHECK-NEXT:	msub
445; CHECK-NOWORKAROUND-LABEL: f_prefetch_msub_64:
446; CHECK-NOWORKAROUND:	prfm
447; CHECK-NOWORKAROUND-NEXT:	msub
448
449define i32 @f_prefetch_msub_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
450entry:
451  %0 = load i32* %cp, align 4
452  %1 = bitcast i32* %e to i8*
453  tail call void @llvm.prefetch(i8* %1, i32 1, i32 1, i32 1)
454  %mul = mul nsw i32 %0, %b
455  %sub = sub nsw i32 %a, %mul
456  ret i32 %sub
457}
458; CHECK-LABEL: f_prefetch_msub_32:
459; CHECK:	prfm
460; CHECK-NEXT:	msub
461; CHECK-NOWORKAROUND-LABEL: f_prefetch_msub_32:
462; CHECK-NOWORKAROUND:	prfm
463; CHECK-NOWORKAROUND-NEXT:	msub
464
465define i64 @f_prefetch_mul_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
466entry:
467  %0 = load i64* %cp, align 8
468  %1 = bitcast i64* %e to i8*
469  tail call void @llvm.prefetch(i8* %1, i32 0, i32 3, i32 1)
470  %mul = mul nsw i64 %0, %b
471  ret i64 %mul
472}
473; CHECK-LABEL: f_prefetch_mul_64:
474; CHECK:	prfm
475; CHECK-NEXT:	mul
476; CHECK-NOWORKAROUND-LABEL: f_prefetch_mul_64:
477; CHECK-NOWORKAROUND:	prfm
478; CHECK-NOWORKAROUND-NEXT:	mul
479
480define i32 @f_prefetch_mul_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
481entry:
482  %0 = load i32* %cp, align 4
483  %1 = bitcast i32* %e to i8*
484  tail call void @llvm.prefetch(i8* %1, i32 1, i32 3, i32 1)
485  %mul = mul nsw i32 %0, %b
486  ret i32 %mul
487}
488; CHECK-LABEL: f_prefetch_mul_32:
489; CHECK:	prfm
490; CHECK-NEXT:	mul
491; CHECK-NOWORKAROUND-LABEL: f_prefetch_mul_32:
492; CHECK-NOWORKAROUND:	prfm
493; CHECK-NOWORKAROUND-NEXT:	mul
494
495define i64 @fall_through(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
496entry:
497  %0 = load i64* %c, align 8
498  br label %block1
499
500block1:
501  %mul = mul nsw i64 %0, %b
502  %add = add nsw i64 %mul, %a
503  %tmp = ptrtoint i8* blockaddress(@fall_through, %block1) to i64
504  %ret = add nsw i64 %tmp, %add
505  ret i64 %ret
506}
507; CHECK-LABEL:	fall_through
508; CHECK:	ldr
509; CHECK-NEXT:	nop
510; CHECK-NEXT:	.Ltmp
511; CHECK-NEXT: 	BB
512; CHECK-NEXT: 	madd
513; CHECK-NOWORKAROUND-LABEL:	fall_through
514; CHECK-NOWORKAROUND: 	ldr
515; CHECK-NOWORKAROUND-NEXT:	.Ltmp
516; CHECK-NOWORKAROUND-NEXT:	BB
517; CHECK-NOWORKAROUND-NEXT:	madd
518
519; No checks for this, just check it doesn't crash
520define i32 @crash_check(i8** nocapture readnone %data) #0 {
521entry:
522  br label %while.cond
523
524while.cond:
525  br label %while.cond
526}
527
528attributes #0 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
529attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
530attributes #2 = { nounwind }
531
532
533; CHECK-LABEL: ... Statistics Collected ...
534; CHECK: 11 aarch64-fix-cortex-a53-835769 - Number of Nops added to work around erratum 835769
535