1; RUN: opt -arm-parallel-dsp -dce -mtriple=armv7-a -S %s -o - | FileCheck %s 2 3; CHECK-LABEL: sext_acc_1 4; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32* 5; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]] 6; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* 7; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]] 8; CHECK: [[ACC:%[^ ]+]] = sext i32 %acc to i64 9; CHECK: call i64 @llvm.arm.smlald(i32 [[A]], i32 [[B]], i64 [[ACC]]) 10define i64 @sext_acc_1(i16* %a, i16* %b, i32 %acc) { 11entry: 12 %ld.a.0 = load i16, i16* %a 13 %sext.a.0 = sext i16 %ld.a.0 to i32 14 %ld.b.0 = load i16, i16* %b 15 %sext.b.0 = sext i16 %ld.b.0 to i32 16 %mul.0 = mul i32 %sext.a.0, %sext.b.0 17 %addr.a.1 = getelementptr i16, i16* %a, i32 1 18 %addr.b.1 = getelementptr i16, i16* %b, i32 1 19 %ld.a.1 = load i16, i16* %addr.a.1 20 %sext.a.1 = sext i16 %ld.a.1 to i32 21 %ld.b.1 = load i16, i16* %addr.b.1 22 %sext.b.1 = sext i16 %ld.b.1 to i32 23 %mul.1 = mul i32 %sext.a.1, %sext.b.1 24 %sext.mul.0 = sext i32 %mul.0 to i64 25 %sext.mul.1 = sext i32 %mul.1 to i64 26 %add = add i64 %sext.mul.0, %sext.mul.1 27 %sext.acc = sext i32 %acc to i64 28 %res = add i64 %add, %sext.acc 29 ret i64 %res 30} 31 32; CHECK-LABEL: sext_acc_2 33; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32* 34; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]] 35; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* 36; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]] 37; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* %addr.a.2 to i32* 38; CHECK: [[A_2:%[^ ]+]] = load i32, i32* %4 39; CHECK: [[CAST_B_2:%[^ ]+]] = bitcast i16* %addr.b.2 to i32* 40; CHECK: [[B_2:%[^ ]+]] = load i32, i32* %6 41; CHECK: [[ACC:%[^ ]+]] = sext i32 %acc to i64 42; CHECK: [[SMLALD:%[^ ]+]] = call i64 @llvm.arm.smlald(i32 [[A]], i32 [[B]], i64 [[ACC]]) 43; CHECK: call i64 @llvm.arm.smlald(i32 [[A_2]], i32 [[B_2]], i64 [[SMLALD]]) 44define i64 @sext_acc_2(i16* %a, i16* %b, i32 %acc) { 45entry: 46 %ld.a.0 = load i16, i16* %a 47 %sext.a.0 = sext i16 %ld.a.0 to i32 48 %ld.b.0 = load i16, i16* %b 49 %sext.b.0 = sext i16 %ld.b.0 to i32 50 %mul.0 = mul i32 %sext.a.0, %sext.b.0 51 %addr.a.1 = getelementptr i16, i16* %a, i32 1 52 %addr.b.1 = getelementptr i16, i16* %b, i32 1 53 %ld.a.1 = load i16, i16* %addr.a.1 54 %sext.a.1 = sext i16 %ld.a.1 to i32 55 %ld.b.1 = load i16, i16* %addr.b.1 56 %sext.b.1 = sext i16 %ld.b.1 to i32 57 %mul.1 = mul i32 %sext.a.1, %sext.b.1 58 %sext.mul.0 = sext i32 %mul.0 to i64 59 %sext.mul.1 = sext i32 %mul.1 to i64 60 %add = add i64 %sext.mul.0, %sext.mul.1 61 %sext.acc = sext i32 %acc to i64 62 %add.1 = add i64 %add, %sext.acc 63 %addr.a.2 = getelementptr i16, i16* %a, i32 2 64 %addr.b.2 = getelementptr i16, i16* %b, i32 2 65 %ld.a.2 = load i16, i16* %addr.a.2 66 %sext.a.2 = sext i16 %ld.a.2 to i32 67 %ld.b.2 = load i16, i16* %addr.b.2 68 %sext.b.2 = sext i16 %ld.b.2 to i32 69 %mul.2 = mul i32 %sext.a.2, %sext.b.2 70 %sext.mul.2 = sext i32 %mul.2 to i64 71 %addr.a.3 = getelementptr i16, i16* %a, i32 3 72 %addr.b.3 = getelementptr i16, i16* %b, i32 3 73 %ld.a.3 = load i16, i16* %addr.a.3 74 %sext.a.3 = sext i16 %ld.a.3 to i32 75 %ld.b.3 = load i16, i16* %addr.b.3 76 %sext.b.3 = sext i16 %ld.b.3 to i32 77 %mul.3 = mul i32 %sext.a.3, %sext.b.3 78 %sext.mul.3 = sext i32 %mul.3 to i64 79 %add.2 = add i64 %sext.mul.2, %sext.mul.3 80 %add.3 = add i64 %add.1, %add.2 81 ret i64 %add.3 82} 83 84; CHECK-LABEL: sext_acc_3 85; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32* 86; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]] 87; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* 88; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]] 89; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* %addr.a.2 to i32* 90; CHECK: [[A_2:%[^ ]+]] = load i32, i32* %4 91; CHECK: [[CAST_B_2:%[^ ]+]] = bitcast i16* %addr.b.2 to i32* 92; CHECK: [[B_2:%[^ ]+]] = load i32, i32* %6 93; CHECK: [[ACC:%[^ ]+]] = sext i32 %acc to i64 94; CHECK: [[SMLALD:%[^ ]+]] = call i64 @llvm.arm.smlald(i32 [[A]], i32 [[B]], i64 [[ACC]]) 95; CHECK: call i64 @llvm.arm.smlald(i32 [[A_2]], i32 [[B_2]], i64 [[SMLALD]]) 96define i64 @sext_acc_3(i16* %a, i16* %b, i32 %acc) { 97entry: 98 %ld.a.0 = load i16, i16* %a 99 %sext.a.0 = sext i16 %ld.a.0 to i32 100 %ld.b.0 = load i16, i16* %b 101 %sext.b.0 = sext i16 %ld.b.0 to i32 102 %mul.0 = mul i32 %sext.a.0, %sext.b.0 103 %addr.a.1 = getelementptr i16, i16* %a, i32 1 104 %addr.b.1 = getelementptr i16, i16* %b, i32 1 105 %ld.a.1 = load i16, i16* %addr.a.1 106 %sext.a.1 = sext i16 %ld.a.1 to i32 107 %ld.b.1 = load i16, i16* %addr.b.1 108 %sext.b.1 = sext i16 %ld.b.1 to i32 109 %mul.1 = mul i32 %sext.a.1, %sext.b.1 110 %sext.mul.0 = sext i32 %mul.0 to i64 111 %sext.mul.1 = sext i32 %mul.1 to i64 112 %add = add i64 %sext.mul.0, %sext.mul.1 113 %addr.a.2 = getelementptr i16, i16* %a, i32 2 114 %addr.b.2 = getelementptr i16, i16* %b, i32 2 115 %ld.a.2 = load i16, i16* %addr.a.2 116 %sext.a.2 = sext i16 %ld.a.2 to i32 117 %ld.b.2 = load i16, i16* %addr.b.2 118 %sext.b.2 = sext i16 %ld.b.2 to i32 119 %mul.2 = mul i32 %sext.a.2, %sext.b.2 120 %sext.mul.2 = sext i32 %mul.2 to i64 121 %addr.a.3 = getelementptr i16, i16* %a, i32 3 122 %addr.b.3 = getelementptr i16, i16* %b, i32 3 123 %ld.a.3 = load i16, i16* %addr.a.3 124 %sext.a.3 = sext i16 %ld.a.3 to i32 125 %ld.b.3 = load i16, i16* %addr.b.3 126 %sext.b.3 = sext i16 %ld.b.3 to i32 127 %mul.3 = mul i32 %sext.a.3, %sext.b.3 128 %sext.mul.3 = sext i32 %mul.3 to i64 129 %add.1 = add i64 %sext.mul.2, %sext.mul.3 130 %add.2 = add i64 %add, %add.1 131 %sext.acc = sext i32 %acc to i64 132 %add.3 = add i64 %add.2, %sext.acc 133 ret i64 %add.3 134} 135 136; CHECK-LABEL: sext_acc_4 137; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32* 138; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]] 139; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* 140; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]] 141; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* %addr.a.2 to i32* 142; CHECK: [[A_2:%[^ ]+]] = load i32, i32* %4 143; CHECK: [[CAST_B_2:%[^ ]+]] = bitcast i16* %addr.b.2 to i32* 144; CHECK: [[B_2:%[^ ]+]] = load i32, i32* %6 145; CHECK: [[ACC:%[^ ]+]] = sext i32 %acc to i64 146; CHECK: [[SMLALD:%[^ ]+]] = call i64 @llvm.arm.smlald(i32 [[A]], i32 [[B]], i64 [[ACC]]) 147; CHECK: call i64 @llvm.arm.smlald(i32 [[A_2]], i32 [[B_2]], i64 [[SMLALD]]) 148define i64 @sext_acc_4(i16* %a, i16* %b, i32 %acc) { 149entry: 150 %ld.a.0 = load i16, i16* %a 151 %sext.a.0 = sext i16 %ld.a.0 to i32 152 %ld.b.0 = load i16, i16* %b 153 %sext.b.0 = sext i16 %ld.b.0 to i32 154 %mul.0 = mul i32 %sext.a.0, %sext.b.0 155 %addr.a.1 = getelementptr i16, i16* %a, i32 1 156 %addr.b.1 = getelementptr i16, i16* %b, i32 1 157 %ld.a.1 = load i16, i16* %addr.a.1 158 %sext.a.1 = sext i16 %ld.a.1 to i32 159 %ld.b.1 = load i16, i16* %addr.b.1 160 %sext.b.1 = sext i16 %ld.b.1 to i32 161 %mul.1 = mul i32 %sext.a.1, %sext.b.1 162 %add = add i32 %mul.0, %mul.1 163 %sext.add = sext i32 %add to i64 164 %addr.a.2 = getelementptr i16, i16* %a, i32 2 165 %addr.b.2 = getelementptr i16, i16* %b, i32 2 166 %ld.a.2 = load i16, i16* %addr.a.2 167 %sext.a.2 = sext i16 %ld.a.2 to i32 168 %ld.b.2 = load i16, i16* %addr.b.2 169 %sext.b.2 = sext i16 %ld.b.2 to i32 170 %mul.2 = mul i32 %sext.a.2, %sext.b.2 171 %sext.mul.2 = sext i32 %mul.2 to i64 172 %addr.a.3 = getelementptr i16, i16* %a, i32 3 173 %addr.b.3 = getelementptr i16, i16* %b, i32 3 174 %ld.a.3 = load i16, i16* %addr.a.3 175 %sext.a.3 = sext i16 %ld.a.3 to i32 176 %ld.b.3 = load i16, i16* %addr.b.3 177 %sext.b.3 = sext i16 %ld.b.3 to i32 178 %mul.3 = mul i32 %sext.a.3, %sext.b.3 179 %sext.mul.3 = sext i32 %mul.3 to i64 180 %sext.acc = sext i32 %acc to i64 181 %add.1 = add i64 %sext.mul.2, %sext.add 182 %add.2 = add i64 %sext.add, %add.1 183 %add.3 = add i64 %add.2, %sext.mul.3 184 %add.4 = add i64 %add.3, %sext.acc 185 ret i64 %add.4 186} 187