1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: opt -instcombine -mtriple=thumbv8.1m.main -S %s | FileCheck --check-prefix=IR %s
3; RUN: opt -instcombine -mtriple=thumbv8.1m.main    %s | llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -O3 -o - | FileCheck --check-prefix=ASM %s
4
5%struct.foo = type { [2 x <4 x i32>] }
6
7define arm_aapcs_vfpcc i32 @test_vadciq_multiple(%struct.foo %a, %struct.foo %b, i32 %carry) {
8entry:
9  %a.0 = extractvalue %struct.foo %a, 0, 0
10  %a.1 = extractvalue %struct.foo %a, 0, 1
11  %b.0 = extractvalue %struct.foo %b, 0, 0
12  %b.1 = extractvalue %struct.foo %b, 0, 1
13
14  %fpscr.in.0 = shl i32 %carry, 29
15  %outpair.0 = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> %a.0, <4 x i32> %b.0, i32 %fpscr.in.0)
16  %fpscr.out.0 = extractvalue { <4 x i32>, i32 } %outpair.0, 1
17  %shifted.out.0 = lshr i32 %fpscr.out.0, 29
18  %carry.out.0 = and i32 1, %shifted.out.0
19  %fpscr.in.1 = shl i32 %carry.out.0, 29
20  %outpair.1 = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> %a.1, <4 x i32> %b.1, i32 %fpscr.in.1)
21  %fpscr.out.1 = extractvalue { <4 x i32>, i32 } %outpair.1, 1
22  %shifted.out.1 = lshr i32 %fpscr.out.1, 29
23  %carry.out.1 = and i32 1, %shifted.out.1
24  ret i32 %carry.out.1
25}
26
27define arm_aapcs_vfpcc i32 @test_vadciq_pred_multiple(%struct.foo %a, %struct.foo %b, i32 %ipred, i32 %carry) {
28entry:
29  %a.0 = extractvalue %struct.foo %a, 0, 0
30  %a.1 = extractvalue %struct.foo %a, 0, 1
31  %b.0 = extractvalue %struct.foo %b, 0, 0
32  %b.1 = extractvalue %struct.foo %b, 0, 1
33
34  %vpred = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %ipred)
35  %fpscr.in.0 = shl i32 %carry, 29
36  %outpair.0 = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> undef, <4 x i32> %a.0, <4 x i32> %b.0, i32 %fpscr.in.0, <4 x i1> %vpred)
37  %fpscr.out.0 = extractvalue { <4 x i32>, i32 } %outpair.0, 1
38  %shifted.out.0 = lshr i32 %fpscr.out.0, 29
39  %carry.out.0 = and i32 1, %shifted.out.0
40  %fpscr.in.1 = shl i32 %carry.out.0, 29
41  %outpair.1 = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> undef, <4 x i32> %a.1, <4 x i32> %b.1, i32 %fpscr.in.1, <4 x i1> %vpred)
42  %fpscr.out.1 = extractvalue { <4 x i32>, i32 } %outpair.1, 1
43  %shifted.out.1 = lshr i32 %fpscr.out.1, 29
44  %carry.out.1 = and i32 1, %shifted.out.1
45  ret i32 %carry.out.1
46}
47
48declare { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32>, <4 x i32>, i32)
49declare { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i32>, i32, <4 x i1>)
50declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
51
52; Expect the transformation in between the two intrinsics, where the
53; fpscr-formatted output value is turned back into just the carry bit
54; at bit 0 and then back again for the next call, to be optimized away
55; completely in InstCombine, so that the FPSCR output from one
56; intrinsic is passed straight on to the next:
57
58; IR: %outpair.0 = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> %a.0, <4 x i32> %b.0, i32 %fpscr.in.0)
59; IR: %fpscr.out.0 = extractvalue { <4 x i32>, i32 } %outpair.0, 1
60; IR: %outpair.1 = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> %a.1, <4 x i32> %b.1, i32 %fpscr.out.0)
61
62; IR: %outpair.0 = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> undef, <4 x i32> %a.0, <4 x i32> %b.0, i32 %fpscr.in.0, <4 x i1> %vpred)
63; IR: %fpscr.out.0 = extractvalue { <4 x i32>, i32 } %outpair.0, 1
64; IR: %outpair.1 = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> undef, <4 x i32> %a.1, <4 x i32> %b.1, i32 %fpscr.out.0, <4 x i1> %vpred)
65
66; And this is the assembly language we expect at the end of it, with
67; the two vadc.i32 instructions right next to each other, and the
68; second one implicitly reusing the FPSCR written by the first.
69
70; ASM: test_vadciq_multiple:
71; ASM:      lsls r0, r0, #29
72; ASM-NEXT: vmsr fpscr_nzcvqc, r0
73; ASM-NEXT: vadc.i32 q0, q0, q2
74; ASM-NEXT: vadc.i32 q0, q1, q3
75; ASM-NEXT: vmrs r0, fpscr_nzcvqc
76; ASM-NEXT: ubfx r0, r0, #29, #1
77; ASM-NEXT: bx lr
78
79; ASM: test_vadciq_pred_multiple:
80; ASM: lsls r1, r1, #29
81; ASM-NEXT: vmsr p0, r0
82; ASM-NEXT: vmsr fpscr_nzcvqc, r1
83; ASM-NEXT: vpstt
84; ASM-NEXT: vadct.i32 q0, q0, q2
85; ASM-NEXT: vadct.i32 q0, q1, q3
86; ASM-NEXT: vmrs r0, fpscr_nzcvqc
87; ASM-NEXT: ubfx r0, r0, #29, #1
88; ASM-NEXT: bx lr
89