1; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -hardware-loops -disable-arm-loloops=false %s -S -o - | FileCheck %s 2; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -disable-arm-loloops=false %s -o - | FileCheck %s --check-prefix=CHECK-LLC 3; RUN: opt -mtriple=thumbv8.1m.main -loop-unroll -unroll-remainder=false -S < %s | llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false | FileCheck %s --check-prefix=CHECK-UNROLL 4 5; CHECK-LABEL: early_exit 6; CHECK-NOT: llvm.set.loop.iterations 7; CHECK-NOT: llvm.loop.decrement 8define i32 @early_exit(i32* nocapture readonly %a, i32 %max, i32 %n) { 9entry: 10 br label %do.body 11 12do.body: 13 %i.0 = phi i32 [ 0, %entry ], [ %inc, %if.end ] 14 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.0 15 %0 = load i32, i32* %arrayidx, align 4 16 %cmp = icmp sgt i32 %0, %max 17 br i1 %cmp, label %do.end, label %if.end 18 19if.end: 20 %inc = add nuw i32 %i.0, 1 21 %cmp1 = icmp ult i32 %inc, %n 22 br i1 %cmp1, label %do.body, label %if.end.do.end_crit_edge 23 24if.end.do.end_crit_edge: 25 %arrayidx2.phi.trans.insert = getelementptr inbounds i32, i32* %a, i32 %inc 26 %.pre = load i32, i32* %arrayidx2.phi.trans.insert, align 4 27 br label %do.end 28 29do.end: 30 %1 = phi i32 [ %.pre, %if.end.do.end_crit_edge ], [ %0, %do.body ] 31 ret i32 %1 32} 33 34; CHECK-LABEL: nested 35; CHECK-NOT: call void @llvm.set.loop.iterations.i32(i32 %N) 36; CHECK: br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us 37 38; CHECK: call void @llvm.set.loop.iterations.i32(i32 %N) 39; CHECK: br label %while.body3.us 40 41; CHECK: [[REM:%[^ ]+]] = phi i32 [ %N, %while.cond1.preheader.us ], [ [[LOOP_DEC:%[^ ]+]], %while.body3.us ] 42; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1) 43; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 44; CHECK: br i1 [[CMP]], label %while.body3.us, label %while.cond1.while.end_crit_edge.us 45 46; CHECK-NOT: [[LOOP_DEC1:%[^ ]+]] = call i1 @llvm.loop.decrement.i32(i32 1) 47; CHECK-NOT: br i1 [[LOOP_DEC1]], label %while.cond1.preheader.us, label %while.end7 48 49; CHECK-LLC: nested: 50; CHECK-LLC-NOT: mov lr, r1 51; CHECK-LLC: dls lr, r1 52; CHECK-LLC-NOT: mov lr, r1 53; CHECK-LLC: [[LOOP_HEADER:\.LBB[0-9._]+]]: 54; CHECK-LLC: le lr, [[LOOP_HEADER]] 55; CHECK-LLC-NOT: b [[LOOP_EXIT:\.LBB[0-9._]+]] 56; CHECK-LLC: [[LOOP_EXIT:\.LBB[0-9._]+]]: 57 58define void @nested(i32* nocapture %A, i32 %N) { 59entry: 60 %cmp20 = icmp eq i32 %N, 0 61 br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us 62 63while.cond1.preheader.us: 64 %i.021.us = phi i32 [ %inc6.us, %while.cond1.while.end_crit_edge.us ], [ 0, %entry ] 65 %mul.us = mul i32 %i.021.us, %N 66 br label %while.body3.us 67 68while.body3.us: 69 %j.019.us = phi i32 [ 0, %while.cond1.preheader.us ], [ %inc.us, %while.body3.us ] 70 %add.us = add i32 %j.019.us, %mul.us 71 %arrayidx.us = getelementptr inbounds i32, i32* %A, i32 %add.us 72 store i32 %add.us, i32* %arrayidx.us, align 4 73 %inc.us = add nuw i32 %j.019.us, 1 74 %exitcond = icmp eq i32 %inc.us, %N 75 br i1 %exitcond, label %while.cond1.while.end_crit_edge.us, label %while.body3.us 76 77while.cond1.while.end_crit_edge.us: 78 %inc6.us = add nuw i32 %i.021.us, 1 79 %exitcond23 = icmp eq i32 %inc6.us, %N 80 br i1 %exitcond23, label %while.end7, label %while.cond1.preheader.us 81 82while.end7: 83 ret void 84} 85 86; CHECK-LABEL: pre_existing 87; CHECK: llvm.set.loop.iterations 88; CHECK-NOT: llvm.set.loop.iterations 89; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) 90; CHECK-NOT: call i32 @llvm.loop.decrement.reg 91define i32 @pre_existing(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) { 92entry: 93 call void @llvm.set.loop.iterations.i32(i32 %n) 94 br label %while.body 95 96while.body: ; preds = %while.body, %entry 97 %q.addr.05 = phi i32* [ %incdec.ptr, %while.body ], [ %q, %entry ] 98 %p.addr.04 = phi i32* [ %incdec.ptr1, %while.body ], [ %p, %entry ] 99 %0 = phi i32 [ %n, %entry ], [ %2, %while.body ] 100 %incdec.ptr = getelementptr inbounds i32, i32* %q.addr.05, i32 1 101 %1 = load i32, i32* %q.addr.05, align 4 102 %incdec.ptr1 = getelementptr inbounds i32, i32* %p.addr.04, i32 1 103 store i32 %1, i32* %p.addr.04, align 4 104 %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) 105 %3 = icmp ne i32 %2, 0 106 br i1 %3, label %while.body, label %while.end 107 108while.end: ; preds = %while.body 109 ret i32 0 110} 111 112; CHECK-LABEL: pre_existing_test_set 113; CHECK: call i1 @llvm.test.set.loop.iterations 114; CHECK-NOT: llvm.set{{.*}}.loop.iterations 115; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) 116; CHECK-NOT: call i32 @llvm.loop.decrement.reg 117define i32 @pre_existing_test_set(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) { 118entry: 119 %guard = call i1 @llvm.test.set.loop.iterations.i32(i32 %n) 120 br i1 %guard, label %while.preheader, label %while.end 121 122while.preheader: 123 br label %while.body 124 125while.body: ; preds = %while.body, %entry 126 %q.addr.05 = phi i32* [ %incdec.ptr, %while.body ], [ %q, %while.preheader ] 127 %p.addr.04 = phi i32* [ %incdec.ptr1, %while.body ], [ %p, %while.preheader ] 128 %0 = phi i32 [ %n, %while.preheader ], [ %2, %while.body ] 129 %incdec.ptr = getelementptr inbounds i32, i32* %q.addr.05, i32 1 130 %1 = load i32, i32* %q.addr.05, align 4 131 %incdec.ptr1 = getelementptr inbounds i32, i32* %p.addr.04, i32 1 132 store i32 %1, i32* %p.addr.04, align 4 133 %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) 134 %3 = icmp ne i32 %2, 0 135 br i1 %3, label %while.body, label %while.end 136 137while.end: ; preds = %while.body 138 ret i32 0 139} 140 141; CHECK-LABEL: pre_existing_inner 142; CHECK-NOT: llvm.set.loop.iterations 143; CHECK: while.cond1.preheader.us: 144; CHECK: call void @llvm.set.loop.iterations.i32(i32 %N) 145; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) 146; CHECK: br i1 147; CHECK-NOT: call i32 @llvm.loop.decrement 148define void @pre_existing_inner(i32* nocapture %A, i32 %N) { 149entry: 150 %cmp20 = icmp eq i32 %N, 0 151 br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us 152 153while.cond1.preheader.us: 154 %i.021.us = phi i32 [ %inc6.us, %while.cond1.while.end_crit_edge.us ], [ 0, %entry ] 155 %mul.us = mul i32 %i.021.us, %N 156 call void @llvm.set.loop.iterations.i32(i32 %N) 157 br label %while.body3.us 158 159while.body3.us: 160 %j.019.us = phi i32 [ 0, %while.cond1.preheader.us ], [ %inc.us, %while.body3.us ] 161 %0 = phi i32 [ %N, %while.cond1.preheader.us ], [ %1, %while.body3.us ] 162 %add.us = add i32 %j.019.us, %mul.us 163 %arrayidx.us = getelementptr inbounds i32, i32* %A, i32 %add.us 164 store i32 %add.us, i32* %arrayidx.us, align 4 165 %inc.us = add nuw i32 %j.019.us, 1 166 %1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) 167 %2 = icmp ne i32 %1, 0 168 br i1 %2, label %while.body3.us, label %while.cond1.while.end_crit_edge.us 169 170while.cond1.while.end_crit_edge.us: 171 %inc6.us = add nuw i32 %i.021.us, 1 172 %exitcond23 = icmp eq i32 %inc6.us, %N 173 br i1 %exitcond23, label %while.end7, label %while.cond1.preheader.us 174 175while.end7: 176 ret void 177} 178 179; CHECK-LABEL: not_rotated 180; CHECK-NOT: call void @llvm.set.loop.iterations 181; CHECK-NOT: call i32 @llvm.loop.decrement.i32 182define void @not_rotated(i32, i16* nocapture, i16 signext) { 183 br label %4 184 1854: 186 %5 = phi i32 [ 0, %3 ], [ %19, %18 ] 187 %6 = icmp eq i32 %5, %0 188 br i1 %6, label %20, label %7 189 1907: 191 %8 = mul i32 %5, %0 192 br label %9 193 1949: 195 %10 = phi i32 [ %17, %12 ], [ 0, %7 ] 196 %11 = icmp eq i32 %10, %0 197 br i1 %11, label %18, label %12 198 19912: 200 %13 = add i32 %10, %8 201 %14 = getelementptr inbounds i16, i16* %1, i32 %13 202 %15 = load i16, i16* %14, align 2 203 %16 = add i16 %15, %2 204 store i16 %16, i16* %14, align 2 205 %17 = add i32 %10, 1 206 br label %9 207 20818: 209 %19 = add i32 %5, 1 210 br label %4 211 21220: 213 ret void 214} 215 216; CHECK-LABEL: multi_latch 217; CHECK-NOT: call void @llvm.set.loop.iterations 218; CHECK-NOT: call i32 @llvm.loop.decrement 219define void @multi_latch(i32* %a, i32* %b, i32 %N) { 220entry: 221 %half = lshr i32 %N, 1 222 br label %header 223 224header: 225 %iv = phi i32 [ 0, %entry ], [ %count.next, %latch.0 ], [ %count.next, %latch.1 ] 226 %cmp = icmp ult i32 %iv, %half 227 %addr.a = getelementptr i32, i32* %a, i32 %iv 228 %addr.b = getelementptr i32, i32* %b, i32 %iv 229 br i1 %cmp, label %if.then, label %if.else 230 231if.then: 232 store i32 %iv, i32* %addr.a 233 br label %latch.0 234 235if.else: 236 store i32 %iv, i32* %addr.b 237 br label %latch.0 238 239latch.0: 240 %count.next = add nuw i32 %iv, 1 241 %cmp.1 = icmp ult i32 %count.next, %half 242 br i1 %cmp.1, label %header, label %latch.1 243 244latch.1: 245 %ld = load i32, i32* %addr.a 246 store i32 %ld, i32* %addr.b 247 %cmp.2 = icmp ult i32 %count.next, %N 248 br i1 %cmp.2, label %header, label %latch.1 249 250exit: 251 ret void 252} 253 254; CHECK-LABEL: search 255; CHECK: entry: 256; CHECK: [[TEST:%[^ ]+]] = call i1 @llvm.test.set.loop.iterations.i32(i32 %N) 257; CHECK: br i1 [[TEST]], label %for.body.preheader, label %for.cond.cleanup 258; CHECK: for.body.preheader: 259; CHECK: br label %for.body 260; CHECK: for.body: 261; CHECK: for.inc: 262; CHECK: [[LOOP_DEC:%[^ ]+]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32 263; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 264; CHECK: br i1 [[CMP]], label %for.body, label %for.cond.cleanup 265define i32 @search(i8* nocapture readonly %c, i32 %N) { 266entry: 267 %cmp11 = icmp eq i32 %N, 0 268 br i1 %cmp11, label %for.cond.cleanup, label %for.body 269 270for.cond.cleanup: 271 %found.0.lcssa = phi i32 [ 0, %entry ], [ %found.1, %for.inc ] 272 %spaces.0.lcssa = phi i32 [ 0, %entry ], [ %spaces.1, %for.inc ] 273 %sub = sub nsw i32 %found.0.lcssa, %spaces.0.lcssa 274 ret i32 %sub 275 276for.body: 277 %i.014 = phi i32 [ %inc3, %for.inc ], [ 0, %entry ] 278 %spaces.013 = phi i32 [ %spaces.1, %for.inc ], [ 0, %entry ] 279 %found.012 = phi i32 [ %found.1, %for.inc ], [ 0, %entry ] 280 %arrayidx = getelementptr inbounds i8, i8* %c, i32 %i.014 281 %0 = load i8, i8* %arrayidx, align 1 282 switch i8 %0, label %for.inc [ 283 i8 108, label %sw.bb 284 i8 111, label %sw.bb 285 i8 112, label %sw.bb 286 i8 32, label %sw.bb1 287 ] 288 289sw.bb: ; preds = %for.body, %for.body, %for.body 290 %inc = add nsw i32 %found.012, 1 291 br label %for.inc 292 293sw.bb1: ; preds = %for.body 294 %inc2 = add nsw i32 %spaces.013, 1 295 br label %for.inc 296 297for.inc: ; preds = %sw.bb, %sw.bb1, %for.body 298 %found.1 = phi i32 [ %found.012, %for.body ], [ %found.012, %sw.bb1 ], [ %inc, %sw.bb ] 299 %spaces.1 = phi i32 [ %spaces.013, %for.body ], [ %inc2, %sw.bb1 ], [ %spaces.013, %sw.bb ] 300 %inc3 = add nuw i32 %i.014, 1 301 %exitcond = icmp eq i32 %inc3, %N 302 br i1 %exitcond, label %for.cond.cleanup, label %for.body 303} 304 305; CHECK-LABEL: unroll_inc_int 306; CHECK: call void @llvm.set.loop.iterations.i32(i32 %N) 307; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32( 308 309; TODO: We should be able to support the unrolled loop body. 310; CHECK-UNROLL-LABEL: unroll_inc_int 311; CHECK-UNROLL: [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader 312; CHECK-UNROLL-NOT: dls 313; CHECK-UNROLL: [[LOOP:.LBB[0-9_]+]]: @ %for.body 314; CHECK-UNROLL-NOT: le lr, [[LOOP]] 315; CHECK-UNROLL: bne [[LOOP]] 316; CHECK-UNROLL: wls lr, lr, [[EXIT:.LBB[0-9_]+]] 317; CHECK-UNROLL: [[EPIL:.LBB[0-9_]+]]: 318; CHECK-UNROLL: le lr, [[EPIL]] 319; CHECK-UNROLL-NEXT: [[EXIT]] 320 321define void @unroll_inc_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { 322entry: 323 %cmp8 = icmp sgt i32 %N, 0 324 br i1 %cmp8, label %for.body, label %for.cond.cleanup 325 326for.cond.cleanup: 327 ret void 328 329for.body: 330 %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 331 %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09 332 %0 = load i32, i32* %arrayidx, align 4 333 %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09 334 %1 = load i32, i32* %arrayidx1, align 4 335 %mul = mul nsw i32 %1, %0 336 %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09 337 store i32 %mul, i32* %arrayidx2, align 4 338 %inc = add nuw nsw i32 %i.09, 1 339 %exitcond = icmp eq i32 %inc, %N 340 br i1 %exitcond, label %for.cond.cleanup, label %for.body 341} 342 343; CHECK-LABEL: unroll_inc_unsigned 344; CHECK: call i1 @llvm.test.set.loop.iterations.i32(i32 %N) 345; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32( 346 347; CHECK-LLC-LABEL: unroll_inc_unsigned: 348; CHECK-LLC: wls lr, r3, [[EXIT:.LBB[0-9_]+]] 349; CHECK-LLC: [[HEADER:.LBB[0-9_]+]]: 350; CHECK-LLC: le lr, [[HEADER]] 351; CHECK-LLC-NEXT: [[EXIT]]: 352 353; TODO: We should be able to support the unrolled loop body. 354; CHECK-UNROLL-LABEL: unroll_inc_unsigned 355; CHECK-UNROLL: [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader 356; CHECK-UNROLL-NOT: dls 357; CHECK-UNROLL: [[LOOP:.LBB[0-9_]+]]: @ %for.body 358; CHECK-UNROLL-NOT: le lr, [[LOOP]] 359; CHECK-UNROLL: bne [[LOOP]] 360; CHECK-UNROLL: wls lr, lr, [[EPIL_EXIT:.LBB[0-9_]+]] 361; CHECK-UNROLL: [[EPIL:.LBB[0-9_]+]]: 362; CHECK-UNROLL: le lr, [[EPIL]] 363; CHECK-UNROLL: [[EPIL_EXIT]]: 364; CHECK-UNROLL: pop 365define void @unroll_inc_unsigned(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { 366entry: 367 %cmp8 = icmp eq i32 %N, 0 368 br i1 %cmp8, label %for.cond.cleanup, label %for.body 369 370for.cond.cleanup: 371 ret void 372 373for.body: 374 %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 375 %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09 376 %0 = load i32, i32* %arrayidx, align 4 377 %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09 378 %1 = load i32, i32* %arrayidx1, align 4 379 %mul = mul nsw i32 %1, %0 380 %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09 381 store i32 %mul, i32* %arrayidx2, align 4 382 %inc = add nuw i32 %i.09, 1 383 %exitcond = icmp eq i32 %inc, %N 384 br i1 %exitcond, label %for.cond.cleanup, label %for.body 385} 386 387; CHECK-LABEL: unroll_dec_int 388; CHECK: call void @llvm.set.loop.iterations.i32(i32 %N) 389; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32( 390 391; TODO: An unnecessary register is being held to hold COUNT, lr should just 392; be used instead. 393; CHECK-LLC-LABEL: unroll_dec_int: 394; CHECK-LLC: dls lr, r3 395; CHECK-LLC-NOT: mov lr, r3 396; CHECK-LLC: [[HEADER:.LBB[0-9_]+]]: 397; CHECK-LLC: le lr, [[HEADER]] 398 399; CHECK-UNROLL-LABEL: unroll_dec_int: 400; CHECK-UNROLL: wls lr, {{.*}}, [[PROLOGUE_EXIT:.LBB[0-9_]+]] 401; CHECK-UNROLL-NEXT: [[PROLOGUE:.LBB[0-9_]+]]: 402; CHECK-UNROLL: le lr, [[PROLOGUE]] 403; CHECK-UNROLL-NEXT: [[PROLOGUE_EXIT:.LBB[0-9_]+]]: 404; CHECK-UNROLL: dls lr, lr 405; CHECK-UNROLL: [[BODY:.LBB[0-9_]+]]: 406; CHECK-UNROLL: le lr, [[BODY]] 407; CHECK-UNROLL-NOT: b 408; CHECK-UNROLL: pop 409define void @unroll_dec_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { 410entry: 411 %cmp8 = icmp sgt i32 %N, 0 412 br i1 %cmp8, label %for.body, label %for.cond.cleanup 413 414for.cond.cleanup: 415 ret void 416 417for.body: 418 %i.09 = phi i32 [ %dec, %for.body ], [ %N, %entry ] 419 %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09 420 %0 = load i32, i32* %arrayidx, align 4 421 %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09 422 %1 = load i32, i32* %arrayidx1, align 4 423 %mul = mul nsw i32 %1, %0 424 %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09 425 store i32 %mul, i32* %arrayidx2, align 4 426 %dec = add nsw i32 %i.09, -1 427 %cmp = icmp sgt i32 %dec, 0 428 br i1 %cmp, label %for.body, label %for.cond.cleanup 429} 430 431declare void @llvm.set.loop.iterations.i32(i32) #0 432declare i1 @llvm.test.set.loop.iterations.i32(i32) #0 433declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0 434 435