1; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner -pipeliner-max-stages=2 -hexagon-bit=0 < %s | FileCheck %s 2 3; Very similar to swp-stages4.ll, but the pipelined schedule is a little 4; different. 5 6; CHECK: = memub(r{{[0-9]+}}++#1) 7; CHECK-DAG: [[REG0:(r[0-9]+)]] = memub(r{{[0-9]+}}++#1) 8; CHECK-DAG: loop0(.LBB0_[[LOOP:.]], 9; CHECK: .LBB0_[[LOOP]]: 10; CHECK: = and([[REG0]],#255) 11; CHECK: [[REG0]]{{[:0-9]*}} = 12; CHECK: endloop 13 14define void @fred(i8* noalias nocapture %src, i32 %srcWidth, i32 %srcHeight, i32 %srcStride, i8* noalias nocapture %dst, i32 %dstStride) #0 { 15entry: 16 %sub = add i32 %srcWidth, -1 17 %sub1 = add i32 %srcHeight, -1 18 %add.ptr = getelementptr inbounds i8, i8* %src, i32 %srcStride 19 %add.ptr.sum = mul i32 %srcStride, 2 20 %add.ptr2 = getelementptr inbounds i8, i8* %src, i32 %add.ptr.sum 21 %cmp212 = icmp ugt i32 %sub1, 1 22 br i1 %cmp212, label %for.body.lr.ph, label %for.end 23 24for.body.lr.ph: 25 br label %for.body74.preheader 26 27for.body74.preheader: 28 %0 = load i8, i8* %add.ptr, align 1, !tbaa !0 29 %arrayidx40 = getelementptr inbounds i8, i8* %add.ptr, i32 1 30 %1 = load i8, i8* %arrayidx40, align 1, !tbaa !0 31 %2 = load i8, i8* %add.ptr, align 1, !tbaa !0 32 %arrayidx46 = getelementptr inbounds i8, i8* %add.ptr, i32 1 33 %3 = load i8, i8* %arrayidx46, align 1, !tbaa !0 34 br label %for.body74 35 36for.body74: 37 %4 = phi i8 [ %9, %for.body74 ], [ %3, %for.body74.preheader ] 38 %5 = phi i8 [ %4, %for.body74 ], [ %2, %for.body74.preheader ] 39 %6 = phi i8 [ %8, %for.body74 ], [ %1, %for.body74.preheader ] 40 %7 = phi i8 [ %6, %for.body74 ], [ %0, %for.body74.preheader ] 41 %j.0211 = phi i32 [ %add81, %for.body74 ], [ 1, %for.body74.preheader ] 42 %conv77 = zext i8 %7 to i32 43 %conv79 = zext i8 %6 to i32 44 %add80 = add nsw i32 %conv79, %conv77 45 %add81 = add i32 %j.0211, 1 46 %arrayidx82 = getelementptr inbounds i8, i8* %src, i32 %add81 47 %8 = load i8, i8* %arrayidx82, align 1, !tbaa !0 48 %conv83 = zext i8 %8 to i32 49 %add84 = add nsw i32 %add80, %conv83 50 %conv87 = zext i8 %5 to i32 51 %add88 = add nsw i32 %add84, %conv87 52 %conv90 = zext i8 %4 to i32 53 %add91 = add nsw i32 %add88, %conv90 54 %arrayidx93 = getelementptr inbounds i8, i8* %add.ptr, i32 %add81 55 %9 = load i8, i8* %arrayidx93, align 1, !tbaa !0 56 %conv94 = zext i8 %9 to i32 57 %add95 = add nsw i32 %add91, %conv94 58 %mul96 = mul nsw i32 %add95, 7282 59 %add97 = add nsw i32 %mul96, 32768 60 %shr98208 = lshr i32 %add97, 16 61 %conv99 = trunc i32 %shr98208 to i8 62 %add.ptr5.sum209 = add i32 %j.0211, %dstStride 63 %arrayidx100 = getelementptr inbounds i8, i8* %dst, i32 %add.ptr5.sum209 64 store i8 %conv99, i8* %arrayidx100, align 1, !tbaa !0 65 %exitcond = icmp eq i32 %add81, %sub 66 br i1 %exitcond, label %for.end103.loopexit, label %for.body74 67 68for.end103.loopexit: 69 br label %for.end 70 71for.end: 72 ret void 73} 74 75attributes #0 = { nounwind } 76 77!0 = !{!"omnipotent char", !1} 78!1 = !{!"Simple C/C++ TBAA"} 79