1; RUN: opt -arm-parallel-dsp -dce -mtriple=armv7-a -S %s -o - | FileCheck %s
2
3; CHECK-LABEL: sext_acc_1
4; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
5; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]]
6; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
7; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]]
8; CHECK: [[ACC:%[^ ]+]] = sext i32 %acc to i64
9; CHECK: call i64 @llvm.arm.smlald(i32 [[A]], i32 [[B]], i64 [[ACC]])
10define i64 @sext_acc_1(i16* %a, i16* %b, i32 %acc) {
11entry:
12  %ld.a.0 = load i16, i16* %a
13  %sext.a.0 = sext i16 %ld.a.0 to i32
14  %ld.b.0 = load i16, i16* %b
15  %sext.b.0 = sext i16 %ld.b.0 to i32
16  %mul.0 = mul i32 %sext.a.0, %sext.b.0
17  %addr.a.1 = getelementptr i16, i16* %a, i32 1
18  %addr.b.1 = getelementptr i16, i16* %b, i32 1
19  %ld.a.1 = load i16, i16* %addr.a.1
20  %sext.a.1 = sext i16 %ld.a.1 to i32
21  %ld.b.1 = load i16, i16* %addr.b.1
22  %sext.b.1 = sext i16 %ld.b.1 to i32
23  %mul.1 = mul i32 %sext.a.1, %sext.b.1
24  %sext.mul.0 = sext i32 %mul.0 to i64
25  %sext.mul.1 = sext i32 %mul.1 to i64
26  %add = add i64 %sext.mul.0, %sext.mul.1
27  %sext.acc = sext i32 %acc to i64
28  %res = add i64 %add, %sext.acc
29  ret i64 %res
30}
31
32; CHECK-LABEL: sext_acc_2
33; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
34; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]]
35; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
36; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]]
37; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* %addr.a.2 to i32*
38; CHECK: [[A_2:%[^ ]+]] = load i32, i32* %4
39; CHECK: [[CAST_B_2:%[^ ]+]] = bitcast i16* %addr.b.2 to i32*
40; CHECK: [[B_2:%[^ ]+]] = load i32, i32* %6
41; CHECK: [[ACC:%[^ ]+]] = sext i32 %acc to i64
42; CHECK: [[SMLALD:%[^ ]+]] = call i64 @llvm.arm.smlald(i32 [[A]], i32 [[B]], i64 [[ACC]])
43; CHECK: call i64 @llvm.arm.smlald(i32 [[A_2]], i32 [[B_2]], i64 [[SMLALD]])
44define i64 @sext_acc_2(i16* %a, i16* %b, i32 %acc) {
45entry:
46  %ld.a.0 = load i16, i16* %a
47  %sext.a.0 = sext i16 %ld.a.0 to i32
48  %ld.b.0 = load i16, i16* %b
49  %sext.b.0 = sext i16 %ld.b.0 to i32
50  %mul.0 = mul i32 %sext.a.0, %sext.b.0
51  %addr.a.1 = getelementptr i16, i16* %a, i32 1
52  %addr.b.1 = getelementptr i16, i16* %b, i32 1
53  %ld.a.1 = load i16, i16* %addr.a.1
54  %sext.a.1 = sext i16 %ld.a.1 to i32
55  %ld.b.1 = load i16, i16* %addr.b.1
56  %sext.b.1 = sext i16 %ld.b.1 to i32
57  %mul.1 = mul i32 %sext.a.1, %sext.b.1
58  %sext.mul.0 = sext i32 %mul.0 to i64
59  %sext.mul.1 = sext i32 %mul.1 to i64
60  %add = add i64 %sext.mul.0, %sext.mul.1
61  %sext.acc = sext i32 %acc to i64
62  %add.1 = add i64 %add, %sext.acc
63  %addr.a.2 = getelementptr i16, i16* %a, i32 2
64  %addr.b.2 = getelementptr i16, i16* %b, i32 2
65  %ld.a.2 = load i16, i16* %addr.a.2
66  %sext.a.2 = sext i16 %ld.a.2 to i32
67  %ld.b.2 = load i16, i16* %addr.b.2
68  %sext.b.2 = sext i16 %ld.b.2 to i32
69  %mul.2 = mul i32 %sext.a.2, %sext.b.2
70  %sext.mul.2 = sext i32 %mul.2 to i64
71  %addr.a.3 = getelementptr i16, i16* %a, i32 3
72  %addr.b.3 = getelementptr i16, i16* %b, i32 3
73  %ld.a.3 = load i16, i16* %addr.a.3
74  %sext.a.3 = sext i16 %ld.a.3 to i32
75  %ld.b.3 = load i16, i16* %addr.b.3
76  %sext.b.3 = sext i16 %ld.b.3 to i32
77  %mul.3 = mul i32 %sext.a.3, %sext.b.3
78  %sext.mul.3 = sext i32 %mul.3 to i64
79  %add.2 = add i64 %sext.mul.2, %sext.mul.3
80  %add.3 = add i64 %add.1, %add.2
81  ret i64 %add.3
82}
83
84; CHECK-LABEL: sext_acc_3
85; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
86; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]]
87; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
88; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]]
89; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* %addr.a.2 to i32*
90; CHECK: [[A_2:%[^ ]+]] = load i32, i32* %4
91; CHECK: [[CAST_B_2:%[^ ]+]] = bitcast i16* %addr.b.2 to i32*
92; CHECK: [[B_2:%[^ ]+]] = load i32, i32* %6
93; CHECK: [[ACC:%[^ ]+]] = sext i32 %acc to i64
94; CHECK: [[SMLALD:%[^ ]+]] = call i64 @llvm.arm.smlald(i32 [[A]], i32 [[B]], i64 [[ACC]])
95; CHECK: call i64 @llvm.arm.smlald(i32 [[A_2]], i32 [[B_2]], i64 [[SMLALD]])
96define i64 @sext_acc_3(i16* %a, i16* %b, i32 %acc) {
97entry:
98  %ld.a.0 = load i16, i16* %a
99  %sext.a.0 = sext i16 %ld.a.0 to i32
100  %ld.b.0 = load i16, i16* %b
101  %sext.b.0 = sext i16 %ld.b.0 to i32
102  %mul.0 = mul i32 %sext.a.0, %sext.b.0
103  %addr.a.1 = getelementptr i16, i16* %a, i32 1
104  %addr.b.1 = getelementptr i16, i16* %b, i32 1
105  %ld.a.1 = load i16, i16* %addr.a.1
106  %sext.a.1 = sext i16 %ld.a.1 to i32
107  %ld.b.1 = load i16, i16* %addr.b.1
108  %sext.b.1 = sext i16 %ld.b.1 to i32
109  %mul.1 = mul i32 %sext.a.1, %sext.b.1
110  %sext.mul.0 = sext i32 %mul.0 to i64
111  %sext.mul.1 = sext i32 %mul.1 to i64
112  %add = add i64 %sext.mul.0, %sext.mul.1
113  %addr.a.2 = getelementptr i16, i16* %a, i32 2
114  %addr.b.2 = getelementptr i16, i16* %b, i32 2
115  %ld.a.2 = load i16, i16* %addr.a.2
116  %sext.a.2 = sext i16 %ld.a.2 to i32
117  %ld.b.2 = load i16, i16* %addr.b.2
118  %sext.b.2 = sext i16 %ld.b.2 to i32
119  %mul.2 = mul i32 %sext.a.2, %sext.b.2
120  %sext.mul.2 = sext i32 %mul.2 to i64
121  %addr.a.3 = getelementptr i16, i16* %a, i32 3
122  %addr.b.3 = getelementptr i16, i16* %b, i32 3
123  %ld.a.3 = load i16, i16* %addr.a.3
124  %sext.a.3 = sext i16 %ld.a.3 to i32
125  %ld.b.3 = load i16, i16* %addr.b.3
126  %sext.b.3 = sext i16 %ld.b.3 to i32
127  %mul.3 = mul i32 %sext.a.3, %sext.b.3
128  %sext.mul.3 = sext i32 %mul.3 to i64
129  %add.1 = add i64 %sext.mul.2, %sext.mul.3
130  %add.2 = add i64 %add, %add.1
131  %sext.acc = sext i32 %acc to i64
132  %add.3 = add i64 %add.2, %sext.acc
133  ret i64 %add.3
134}
135
136; CHECK-LABEL: sext_acc_4
137; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
138; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]]
139; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
140; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]]
141; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* %addr.a.2 to i32*
142; CHECK: [[A_2:%[^ ]+]] = load i32, i32* %4
143; CHECK: [[CAST_B_2:%[^ ]+]] = bitcast i16* %addr.b.2 to i32*
144; CHECK: [[B_2:%[^ ]+]] = load i32, i32* %6
145; CHECK: [[ACC:%[^ ]+]] = sext i32 %acc to i64
146; CHECK: [[SMLALD:%[^ ]+]] = call i64 @llvm.arm.smlald(i32 [[A]], i32 [[B]], i64 [[ACC]])
147; CHECK: call i64 @llvm.arm.smlald(i32 [[A_2]], i32 [[B_2]], i64 [[SMLALD]])
148define i64 @sext_acc_4(i16* %a, i16* %b, i32 %acc) {
149entry:
150  %ld.a.0 = load i16, i16* %a
151  %sext.a.0 = sext i16 %ld.a.0 to i32
152  %ld.b.0 = load i16, i16* %b
153  %sext.b.0 = sext i16 %ld.b.0 to i32
154  %mul.0 = mul i32 %sext.a.0, %sext.b.0
155  %addr.a.1 = getelementptr i16, i16* %a, i32 1
156  %addr.b.1 = getelementptr i16, i16* %b, i32 1
157  %ld.a.1 = load i16, i16* %addr.a.1
158  %sext.a.1 = sext i16 %ld.a.1 to i32
159  %ld.b.1 = load i16, i16* %addr.b.1
160  %sext.b.1 = sext i16 %ld.b.1 to i32
161  %mul.1 = mul i32 %sext.a.1, %sext.b.1
162  %add = add i32 %mul.0, %mul.1
163  %sext.add = sext i32 %add to i64
164  %addr.a.2 = getelementptr i16, i16* %a, i32 2
165  %addr.b.2 = getelementptr i16, i16* %b, i32 2
166  %ld.a.2 = load i16, i16* %addr.a.2
167  %sext.a.2 = sext i16 %ld.a.2 to i32
168  %ld.b.2 = load i16, i16* %addr.b.2
169  %sext.b.2 = sext i16 %ld.b.2 to i32
170  %mul.2 = mul i32 %sext.a.2, %sext.b.2
171  %sext.mul.2 = sext i32 %mul.2 to i64
172  %addr.a.3 = getelementptr i16, i16* %a, i32 3
173  %addr.b.3 = getelementptr i16, i16* %b, i32 3
174  %ld.a.3 = load i16, i16* %addr.a.3
175  %sext.a.3 = sext i16 %ld.a.3 to i32
176  %ld.b.3 = load i16, i16* %addr.b.3
177  %sext.b.3 = sext i16 %ld.b.3 to i32
178  %mul.3 = mul i32 %sext.a.3, %sext.b.3
179  %sext.mul.3 = sext i32 %mul.3 to i64
180  %sext.acc = sext i32 %acc to i64
181  %add.1 = add i64 %sext.mul.2, %sext.add
182  %add.2 = add i64 %sext.add, %add.1
183  %add.3 = add i64 %add.2, %sext.mul.3
184  %add.4 = add i64 %add.3, %sext.acc
185  ret i64 %add.4
186}
187