1; RUN: opt -S -loop-reroll %s | FileCheck %s 2target triple = "aarch64--linux-gnu" 3 4define void @rerollable1([2 x i32]* nocapture %a) { 5entry: 6 br label %loop 7 8loop: 9 10; CHECK-LABEL: loop: 11; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 12; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 20, i64 %iv 13; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 10, i64 %iv 14; CHECK-NEXT: [[VALUE:%.*]] = load i32, i32* [[SCEVGEP1]], align 4 15; CHECK-NEXT: store i32 [[VALUE]], i32* [[SCEVGEP2]], align 4 16 17 ; base instruction 18 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 19 20 ; NO unrerollable instructions 21 22 ; extra simple arithmetic operations, used by root instructions 23 %plus20 = add nuw nsw i64 %iv, 20 24 %plus10 = add nuw nsw i64 %iv, 10 25 26 ; root instruction 0 27 %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 28 %value0 = load i32, i32* %ldptr0, align 4 29 %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 30 store i32 %value0, i32* %stptr0, align 4 31 32 ; root instruction 1 33 %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 34 %value1 = load i32, i32* %ldptr1, align 4 35 %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 36 store i32 %value1, i32* %stptr1, align 4 37 38 ; loop-increment 39 %iv.next = add nuw nsw i64 %iv, 1 40 41 ; latch 42 %exitcond = icmp eq i64 %iv.next, 5 43 br i1 %exitcond, label %exit, label %loop 44 45exit: 46 ret void 47} 48 49define void @unrerollable1([2 x i32]* nocapture %a) { 50entry: 51 br label %loop 52 53loop: 54 55; CHECK-LABEL: loop: 56; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 57; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0 58; CHECK-NEXT: store i32 999, i32* %stptrx, align 4 59 60 ; base instruction 61 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 62 63 ; unrerollable instructions using %iv 64 %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0 65 store i32 999, i32* %stptrx, align 4 66 67 ; extra simple arithmetic operations, used by root instructions 68 %plus20 = add nuw nsw i64 %iv, 20 69 %plus10 = add nuw nsw i64 %iv, 10 70 71 ; root instruction 0 72 %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 73 %value0 = load i32, i32* %ldptr0, align 4 74 %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 75 store i32 %value0, i32* %stptr0, align 4 76 77 ; root instruction 1 78 %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 79 %value1 = load i32, i32* %ldptr1, align 4 80 %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 81 store i32 %value1, i32* %stptr1, align 4 82 83 ; loop-increment 84 %iv.next = add nuw nsw i64 %iv, 1 85 86 ; latch 87 %exitcond = icmp eq i64 %iv.next, 5 88 br i1 %exitcond, label %exit, label %loop 89 90exit: 91 ret void 92} 93 94define void @unrerollable2([2 x i32]* nocapture %a) { 95entry: 96 br label %loop 97 98loop: 99 100; CHECK-LABEL: loop: 101; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 102; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 103; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0 104; CHECK-NEXT: store i32 999, i32* %stptrx, align 4 105 106 ; base instruction 107 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 108 109 ; loop-increment 110 %iv.next = add nuw nsw i64 %iv, 1 111 112 ; unrerollable instructions using %iv.next 113 %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0 114 store i32 999, i32* %stptrx, align 4 115 116 ; extra simple arithmetic operations, used by root instructions 117 %plus20 = add nuw nsw i64 %iv, 20 118 %plus10 = add nuw nsw i64 %iv, 10 119 120 ; root instruction 0 121 %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 122 %value0 = load i32, i32* %ldptr0, align 4 123 %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 124 store i32 %value0, i32* %stptr0, align 4 125 126 ; root instruction 1 127 %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 128 %value1 = load i32, i32* %ldptr1, align 4 129 %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 130 store i32 %value1, i32* %stptr1, align 4 131 132 ; latch 133 %exitcond = icmp eq i64 %iv.next, 5 134 br i1 %exitcond, label %exit, label %loop 135 136exit: 137 ret void 138} 139 140define dso_local void @rerollable2() { 141entry: 142 br label %loop 143 144loop: 145 146; CHECK-LABEL: loop: 147; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] 148; CHECK-NEXT: {{%.*}} = add i32 %iv, {{20|24}} 149; CHECK-NEXT: {{%.*}} = add i32 %iv, {{20|24}} 150 151 ; induction variable 152 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] 153 154 ; scale instruction 155 %iv.mul3 = mul nuw nsw i32 %iv, 3 156 157 ; extra simple arithmetic operations, used by root instructions 158 %iv.scaled = add nuw nsw i32 %iv.mul3, 20 159 160 ; NO unrerollable instructions 161 162 ; root set 1 163 164 ; base instruction 165 %iv.scaled.div5 = udiv i32 %iv.scaled, 5 166 tail call void @bar(i32 %iv.scaled.div5) 167 ; root instruction 0 168 %iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1 169 %iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5 170 tail call void @bar(i32 %iv.scaled.add1.div5) 171 ; root instruction 2 172 %iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2 173 %iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5 174 tail call void @bar(i32 %iv.scaled.add2.div5) 175 176 ; root set 2 177 178 ; base instruction 179 %iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4 180 %iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5 181 tail call void @bar(i32 %iv.scaled.add4.div5) 182 ; root instruction 0 183 %iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5 184 %iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5 185 tail call void @bar(i32 %iv.scaled.add5.div5) 186 ; root instruction 2 187 %iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6 188 %iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5 189 tail call void @bar(i32 %iv.scaled.add6.div5) 190 191 ; loop-increment 192 %iv.next = add nuw nsw i32 %iv, 1 193 194 ; latch 195 %cmp = icmp ult i32 %iv.next, 3 196 br i1 %cmp, label %loop, label %exit 197 198exit: 199 ret void 200} 201 202define dso_local void @unrerollable3() { 203entry: 204 br label %loop 205 206loop: 207 208; CHECK-LABEL: loop: 209; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] 210; CHECK-NEXT: %iv.mul3 = mul nuw nsw i32 %iv, 3 211; CHECK-NEXT: %iv.scaled = add nuw nsw i32 %iv.mul3, 20 212; CHECK-NEXT: %iv.mul7 = mul nuw nsw i32 %iv, 7 213; CHECK-NEXT: tail call void @bar(i32 %iv.mul7) 214 215 ; induction variable 216 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] 217 218 ; scale instruction 219 %iv.mul3 = mul nuw nsw i32 %iv, 3 220 221 ; extra simple arithmetic operations, used by root instructions 222 %iv.scaled = add nuw nsw i32 %iv.mul3, 20 223 224 ; unrerollable instructions using %iv 225 %iv.mul7 = mul nuw nsw i32 %iv, 7 226 tail call void @bar(i32 %iv.mul7) 227 228 ; root set 1 229 230 ; base instruction 231 %iv.scaled.div5 = udiv i32 %iv.scaled, 5 232 tail call void @bar(i32 %iv.scaled.div5) 233 ; root instruction 0 234 %iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1 235 %iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5 236 tail call void @bar(i32 %iv.scaled.add1.div5) 237 ; root instruction 2 238 %iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2 239 %iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5 240 tail call void @bar(i32 %iv.scaled.add2.div5) 241 242 ; root set 2 243 244 ; base instruction 245 %iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4 246 %iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5 247 tail call void @bar(i32 %iv.scaled.add4.div5) 248 ; root instruction 0 249 %iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5 250 %iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5 251 tail call void @bar(i32 %iv.scaled.add5.div5) 252 ; root instruction 2 253 %iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6 254 %iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5 255 tail call void @bar(i32 %iv.scaled.add6.div5) 256 257 ; loop-increment 258 %iv.next = add nuw nsw i32 %iv, 1 259 260 ; latch 261 %cmp = icmp ult i32 %iv.next, 3 262 br i1 %cmp, label %loop, label %exit 263 264exit: 265 ret void 266} 267 268declare dso_local void @bar(i32) 269