1; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | \ 2; RUN: FileCheck %s 3; RUN: opt -mtriple=thumbv8.1m.main -loop-unroll -unroll-remainder=false -S < %s | \ 4; RUN: llc -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-UNROLL 5; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops \ 6; RUN: -pass-remarks-analysis=hardware-loops %s -S -o - 2>&1 | \ 7; RUN: FileCheck %s --check-prefix=CHECK-REMARKS 8 9 10; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 11; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: nested hardware-loops not supported 12; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 13; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 14; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 15; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 16; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: loop is not a candidate 17; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: nested hardware-loops not supported 18; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 19; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 20 21 22; CHECK-LABEL: early_exit 23; CHECK-NOT: llvm.set.loop.iterations 24; CHECK-NOT: llvm.loop.decrement 25define i32 @early_exit(i32* nocapture readonly %a, i32 %max, i32 %n) { 26entry: 27 br label %do.body 28 29do.body: 30 %i.0 = phi i32 [ 0, %entry ], [ %inc, %if.end ] 31 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.0 32 %0 = load i32, i32* %arrayidx, align 4 33 %cmp = icmp sgt i32 %0, %max 34 br i1 %cmp, label %do.end, label %if.end 35 36if.end: 37 %inc = add nuw i32 %i.0, 1 38 %cmp1 = icmp ult i32 %inc, %n 39 br i1 %cmp1, label %do.body, label %if.end.do.end_crit_edge 40 41if.end.do.end_crit_edge: 42 %arrayidx2.phi.trans.insert = getelementptr inbounds i32, i32* %a, i32 %inc 43 %.pre = load i32, i32* %arrayidx2.phi.trans.insert, align 4 44 br label %do.end 45 46do.end: 47 %1 = phi i32 [ %.pre, %if.end.do.end_crit_edge ], [ %0, %do.body ] 48 ret i32 %1 49} 50 51; CHECK-LABEL: nested 52; CHECK-NOT: call i32 @llvm.start.loop.iterations.i32(i32 %N) 53; CHECK: br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us 54 55; CHECK: [[START:%[^ ]+]] = call i32 @llvm.start.loop.iterations.i32(i32 %N) 56; CHECK: br label %while.body3.us 57 58; CHECK: [[REM:%[^ ]+]] = phi i32 [ [[START]], %while.cond1.preheader.us ], [ [[LOOP_DEC:%[^ ]+]], %while.body3.us ] 59; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[REM]], i32 1) 60; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 61; CHECK: br i1 [[CMP]], label %while.body3.us, label %while.cond1.while.end_crit_edge.us 62 63; CHECK-NOT: [[LOOP_DEC1:%[^ ]+]] = call i1 @llvm.loop.decrement.i32(i32 1) 64; CHECK-NOT: br i1 [[LOOP_DEC1]], label %while.cond1.preheader.us, label %while.end7 65 66define void @nested(i32* nocapture %A, i32 %N) { 67entry: 68 %cmp20 = icmp eq i32 %N, 0 69 br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us 70 71while.cond1.preheader.us: 72 %i.021.us = phi i32 [ %inc6.us, %while.cond1.while.end_crit_edge.us ], [ 0, %entry ] 73 %mul.us = mul i32 %i.021.us, %N 74 br label %while.body3.us 75 76while.body3.us: 77 %j.019.us = phi i32 [ 0, %while.cond1.preheader.us ], [ %inc.us, %while.body3.us ] 78 %add.us = add i32 %j.019.us, %mul.us 79 %arrayidx.us = getelementptr inbounds i32, i32* %A, i32 %add.us 80 store i32 %add.us, i32* %arrayidx.us, align 4 81 %inc.us = add nuw i32 %j.019.us, 1 82 %exitcond = icmp eq i32 %inc.us, %N 83 br i1 %exitcond, label %while.cond1.while.end_crit_edge.us, label %while.body3.us 84 85while.cond1.while.end_crit_edge.us: 86 %inc6.us = add nuw i32 %i.021.us, 1 87 %exitcond23 = icmp eq i32 %inc6.us, %N 88 br i1 %exitcond23, label %while.end7, label %while.cond1.preheader.us 89 90while.end7: 91 ret void 92} 93 94; CHECK-LABEL: pre_existing 95; CHECK: llvm.start.loop.iterations 96; CHECK-NOT: llvm.start.loop.iterations 97; CHECK: call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1) 98; CHECK-NOT: call i32 @llvm.loop.decrement.reg 99define i32 @pre_existing(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) { 100entry: 101 %start = call i32 @llvm.start.loop.iterations.i32(i32 %n) 102 br label %while.body 103 104while.body: ; preds = %while.body, %entry 105 %q.addr.05 = phi i32* [ %incdec.ptr, %while.body ], [ %q, %entry ] 106 %p.addr.04 = phi i32* [ %incdec.ptr1, %while.body ], [ %p, %entry ] 107 %0 = phi i32 [ %start, %entry ], [ %2, %while.body ] 108 %incdec.ptr = getelementptr inbounds i32, i32* %q.addr.05, i32 1 109 %1 = load i32, i32* %q.addr.05, align 4 110 %incdec.ptr1 = getelementptr inbounds i32, i32* %p.addr.04, i32 1 111 store i32 %1, i32* %p.addr.04, align 4 112 %2 = call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1) 113 %3 = icmp ne i32 %2, 0 114 br i1 %3, label %while.body, label %while.end 115 116while.end: ; preds = %while.body 117 ret i32 0 118} 119 120; CHECK-LABEL: pre_existing_test_set 121; CHECK: call i1 @llvm.test.set.loop.iterations 122; CHECK-NOT: llvm.set{{.*}}.loop.iterations 123; CHECK: call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1) 124; CHECK-NOT: call i32 @llvm.loop.decrement.reg 125define i32 @pre_existing_test_set(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) { 126entry: 127 %guard = call i1 @llvm.test.set.loop.iterations.i32(i32 %n) 128 br i1 %guard, label %while.preheader, label %while.end 129 130while.preheader: 131 br label %while.body 132 133while.body: ; preds = %while.body, %entry 134 %q.addr.05 = phi i32* [ %incdec.ptr, %while.body ], [ %q, %while.preheader ] 135 %p.addr.04 = phi i32* [ %incdec.ptr1, %while.body ], [ %p, %while.preheader ] 136 %0 = phi i32 [ %n, %while.preheader ], [ %2, %while.body ] 137 %incdec.ptr = getelementptr inbounds i32, i32* %q.addr.05, i32 1 138 %1 = load i32, i32* %q.addr.05, align 4 139 %incdec.ptr1 = getelementptr inbounds i32, i32* %p.addr.04, i32 1 140 store i32 %1, i32* %p.addr.04, align 4 141 %2 = call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1) 142 %3 = icmp ne i32 %2, 0 143 br i1 %3, label %while.body, label %while.end 144 145while.end: ; preds = %while.body 146 ret i32 0 147} 148 149; CHECK-LABEL: pre_existing_inner 150; CHECK-NOT: llvm.start.loop.iterations 151; CHECK: while.cond1.preheader.us: 152; CHECK: call i32 @llvm.start.loop.iterations.i32(i32 %N) 153; CHECK: call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1) 154; CHECK: br i1 155; CHECK-NOT: call i32 @llvm.loop.decrement 156define void @pre_existing_inner(i32* nocapture %A, i32 %N) { 157entry: 158 %cmp20 = icmp eq i32 %N, 0 159 br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us 160 161while.cond1.preheader.us: 162 %i.021.us = phi i32 [ %inc6.us, %while.cond1.while.end_crit_edge.us ], [ 0, %entry ] 163 %mul.us = mul i32 %i.021.us, %N 164 %start = call i32 @llvm.start.loop.iterations.i32(i32 %N) 165 br label %while.body3.us 166 167while.body3.us: 168 %j.019.us = phi i32 [ 0, %while.cond1.preheader.us ], [ %inc.us, %while.body3.us ] 169 %0 = phi i32 [ %start, %while.cond1.preheader.us ], [ %1, %while.body3.us ] 170 %add.us = add i32 %j.019.us, %mul.us 171 %arrayidx.us = getelementptr inbounds i32, i32* %A, i32 %add.us 172 store i32 %add.us, i32* %arrayidx.us, align 4 173 %inc.us = add nuw i32 %j.019.us, 1 174 %1 = call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1) 175 %2 = icmp ne i32 %1, 0 176 br i1 %2, label %while.body3.us, label %while.cond1.while.end_crit_edge.us 177 178while.cond1.while.end_crit_edge.us: 179 %inc6.us = add nuw i32 %i.021.us, 1 180 %exitcond23 = icmp eq i32 %inc6.us, %N 181 br i1 %exitcond23, label %while.end7, label %while.cond1.preheader.us 182 183while.end7: 184 ret void 185} 186 187; CHECK-LABEL: not_rotated 188; CHECK-NOT: call i32 @llvm.start.loop.iterations 189; CHECK-NOT: call i32 @llvm.loop.decrement.i32 190define void @not_rotated(i32, i16* nocapture, i16 signext) { 191 br label %4 192 1934: 194 %5 = phi i32 [ 0, %3 ], [ %19, %18 ] 195 %6 = icmp eq i32 %5, %0 196 br i1 %6, label %20, label %7 197 1987: 199 %8 = mul i32 %5, %0 200 br label %9 201 2029: 203 %10 = phi i32 [ %17, %12 ], [ 0, %7 ] 204 %11 = icmp eq i32 %10, %0 205 br i1 %11, label %18, label %12 206 20712: 208 %13 = add i32 %10, %8 209 %14 = getelementptr inbounds i16, i16* %1, i32 %13 210 %15 = load i16, i16* %14, align 2 211 %16 = add i16 %15, %2 212 store i16 %16, i16* %14, align 2 213 %17 = add i32 %10, 1 214 br label %9 215 21618: 217 %19 = add i32 %5, 1 218 br label %4 219 22020: 221 ret void 222} 223 224; CHECK-LABEL: multi_latch 225; CHECK-NOT: call i32 @llvm.start.loop.iterations 226; CHECK-NOT: call i32 @llvm.loop.decrement 227define void @multi_latch(i32* %a, i32* %b, i32 %N) { 228entry: 229 %half = lshr i32 %N, 1 230 br label %header 231 232header: 233 %iv = phi i32 [ 0, %entry ], [ %count.next, %latch.0 ], [ %count.next, %latch.1 ] 234 %cmp = icmp ult i32 %iv, %half 235 %addr.a = getelementptr i32, i32* %a, i32 %iv 236 %addr.b = getelementptr i32, i32* %b, i32 %iv 237 br i1 %cmp, label %if.then, label %if.else 238 239if.then: 240 store i32 %iv, i32* %addr.a 241 br label %latch.0 242 243if.else: 244 store i32 %iv, i32* %addr.b 245 br label %latch.0 246 247latch.0: 248 %count.next = add nuw i32 %iv, 1 249 %cmp.1 = icmp ult i32 %count.next, %half 250 br i1 %cmp.1, label %header, label %latch.1 251 252latch.1: 253 %ld = load i32, i32* %addr.a 254 store i32 %ld, i32* %addr.b 255 %cmp.2 = icmp ult i32 %count.next, %N 256 br i1 %cmp.2, label %header, label %latch.1 257 258exit: 259 ret void 260} 261 262; CHECK-LABEL: search 263; CHECK: entry: 264; CHECK: [[TEST:%[^ ]+]] = call i1 @llvm.test.set.loop.iterations.i32(i32 %N) 265; CHECK: br i1 [[TEST]], label %for.body.preheader, label %for.cond.cleanup 266; CHECK: for.body.preheader: 267; CHECK: br label %for.body 268; CHECK: for.body: 269; CHECK: for.inc: 270; CHECK: [[LOOP_DEC:%[^ ]+]] = call i32 @llvm.loop.decrement.reg.i32( 271; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 272; CHECK: br i1 [[CMP]], label %for.body, label %for.cond.cleanup 273define i32 @search(i8* nocapture readonly %c, i32 %N) { 274entry: 275 %cmp11 = icmp eq i32 %N, 0 276 br i1 %cmp11, label %for.cond.cleanup, label %for.body 277 278for.cond.cleanup: 279 %found.0.lcssa = phi i32 [ 0, %entry ], [ %found.1, %for.inc ] 280 %spaces.0.lcssa = phi i32 [ 0, %entry ], [ %spaces.1, %for.inc ] 281 %sub = sub nsw i32 %found.0.lcssa, %spaces.0.lcssa 282 ret i32 %sub 283 284for.body: 285 %i.014 = phi i32 [ %inc3, %for.inc ], [ 0, %entry ] 286 %spaces.013 = phi i32 [ %spaces.1, %for.inc ], [ 0, %entry ] 287 %found.012 = phi i32 [ %found.1, %for.inc ], [ 0, %entry ] 288 %arrayidx = getelementptr inbounds i8, i8* %c, i32 %i.014 289 %0 = load i8, i8* %arrayidx, align 1 290 switch i8 %0, label %for.inc [ 291 i8 108, label %sw.bb 292 i8 111, label %sw.bb 293 i8 112, label %sw.bb 294 i8 32, label %sw.bb1 295 ] 296 297sw.bb: ; preds = %for.body, %for.body, %for.body 298 %inc = add nsw i32 %found.012, 1 299 br label %for.inc 300 301sw.bb1: ; preds = %for.body 302 %inc2 = add nsw i32 %spaces.013, 1 303 br label %for.inc 304 305for.inc: ; preds = %sw.bb, %sw.bb1, %for.body 306 %found.1 = phi i32 [ %found.012, %for.body ], [ %found.012, %sw.bb1 ], [ %inc, %sw.bb ] 307 %spaces.1 = phi i32 [ %spaces.013, %for.body ], [ %inc2, %sw.bb1 ], [ %spaces.013, %sw.bb ] 308 %inc3 = add nuw i32 %i.014, 1 309 %exitcond = icmp eq i32 %inc3, %N 310 br i1 %exitcond, label %for.cond.cleanup, label %for.body 311} 312 313; CHECK-LABEL: unroll_inc_int 314; CHECK: call i32 @llvm.start.loop.iterations.i32(i32 %N) 315; CHECK: call i32 @llvm.loop.decrement.reg.i32( 316 317; TODO: We should be able to support the unrolled loop body. 318; CHECK-UNROLL-LABEL: unroll_inc_int 319; CHECK-UNROLL: [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader 320; CHECK-UNROLL-NOT: dls 321; CHECK-UNROLL: [[LOOP:.LBB[0-9_]+]]: @ %for.body 322; CHECK-UNROLL-NOT: le lr, [[LOOP]] 323; CHECK-UNROLL: bne [[LOOP]] 324; CHECK-UNROLL: wls lr, lr, [[EXIT:.LBB[0-9_]+]] 325; CHECK-UNROLL: [[EPIL:.LBB[0-9_]+]]: 326; CHECK-UNROLL: le lr, [[EPIL]] 327; CHECK-UNROLL-NEXT: [[EXIT]] 328 329define void @unroll_inc_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { 330entry: 331 %cmp8 = icmp sgt i32 %N, 0 332 br i1 %cmp8, label %for.body, label %for.cond.cleanup 333 334for.cond.cleanup: 335 ret void 336 337for.body: 338 %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 339 %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09 340 %0 = load i32, i32* %arrayidx, align 4 341 %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09 342 %1 = load i32, i32* %arrayidx1, align 4 343 %mul = mul nsw i32 %1, %0 344 %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09 345 store i32 %mul, i32* %arrayidx2, align 4 346 %inc = add nuw nsw i32 %i.09, 1 347 %exitcond = icmp eq i32 %inc, %N 348 br i1 %exitcond, label %for.cond.cleanup, label %for.body 349} 350 351; CHECK-LABEL: unroll_inc_unsigned 352; CHECK: call i1 @llvm.test.set.loop.iterations.i32(i32 %N) 353; CHECK: call i32 @llvm.loop.decrement.reg.i32( 354 355; TODO: We should be able to support the unrolled loop body. 356; CHECK-UNROLL-LABEL: unroll_inc_unsigned 357; CHECK-UNROLL: [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader 358; CHECK-UNROLL-NOT: dls 359; CHECK-UNROLL: [[LOOP:.LBB[0-9_]+]]: @ %for.body 360; CHECK-UNROLL-NOT: le lr, [[LOOP]] 361; CHECK-UNROLL: bne [[LOOP]] 362; CHECK-UNROLL: wls lr, lr, [[EPIL_EXIT:.LBB[0-9_]+]] 363; CHECK-UNROLL: [[EPIL:.LBB[0-9_]+]]: 364; CHECK-UNROLL: le lr, [[EPIL]] 365; CHECK-UNROLL: [[EPIL_EXIT]]: 366; CHECK-UNROLL: pop 367define void @unroll_inc_unsigned(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { 368entry: 369 %cmp8 = icmp eq i32 %N, 0 370 br i1 %cmp8, label %for.cond.cleanup, label %for.body 371 372for.cond.cleanup: 373 ret void 374 375for.body: 376 %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 377 %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09 378 %0 = load i32, i32* %arrayidx, align 4 379 %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09 380 %1 = load i32, i32* %arrayidx1, align 4 381 %mul = mul nsw i32 %1, %0 382 %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09 383 store i32 %mul, i32* %arrayidx2, align 4 384 %inc = add nuw i32 %i.09, 1 385 %exitcond = icmp eq i32 %inc, %N 386 br i1 %exitcond, label %for.cond.cleanup, label %for.body 387} 388 389; CHECK-LABEL: unroll_dec_int 390; CHECK: call i32 @llvm.start.loop.iterations.i32(i32 %N) 391; CHECK: call i32 @llvm.loop.decrement.reg.i32( 392 393; CHECK-UNROLL-LABEL: unroll_dec_int: 394; CHECK-UNROLL: wls lr, {{.*}}, [[PROLOGUE_EXIT:.LBB[0-9_]+]] 395; CHECK-UNROLL-NEXT: [[PROLOGUE:.LBB[0-9_]+]]: 396; CHECK-UNROLL: le lr, [[PROLOGUE]] 397; CHECK-UNROLL-NEXT: [[PROLOGUE_EXIT:.LBB[0-9_]+]]: 398; CHECK-UNROLL: dls lr, lr 399; CHECK-UNROLL: [[BODY:.LBB[0-9_]+]]: 400; CHECK-UNROLL: le lr, [[BODY]] 401; CHECK-UNROLL-NOT: b 402; CHECK-UNROLL: pop 403define void @unroll_dec_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { 404entry: 405 %cmp8 = icmp sgt i32 %N, 0 406 br i1 %cmp8, label %for.body, label %for.cond.cleanup 407 408for.cond.cleanup: 409 ret void 410 411for.body: 412 %i.09 = phi i32 [ %dec, %for.body ], [ %N, %entry ] 413 %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09 414 %0 = load i32, i32* %arrayidx, align 4 415 %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09 416 %1 = load i32, i32* %arrayidx1, align 4 417 %mul = mul nsw i32 %1, %0 418 %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09 419 store i32 %mul, i32* %arrayidx2, align 4 420 %dec = add nsw i32 %i.09, -1 421 %cmp = icmp sgt i32 %dec, 0 422 br i1 %cmp, label %for.body, label %for.cond.cleanup 423} 424 425declare i32 @llvm.start.loop.iterations.i32(i32) #0 426declare i1 @llvm.test.set.loop.iterations.i32(i32) #0 427declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #0 428 429