1; REQUIRES: asserts 2; RUN: llc -march=hexagon --stats -o - 2>&1 < %s | FileCheck %s 3 4; Check that the compilation succeeded and that some code was generated. 5; CHECK: vadd 6 7; Check that the loop is pipelined and that a valid node order is used. 8; CHECK-NOT: Number of node order issues found 9; CHECK: Number of loops software pipelined 10; CHECK-NOT: Number of node order issues found 11 12target triple = "hexagon" 13 14define void @fred(i16* noalias nocapture readonly %p0, i32 %p1, i32 %p2, i16* noalias nocapture %p3, i32 %p4) local_unnamed_addr #1 { 15entry: 16 %mul = mul i32 %p4, %p1 17 %add.ptr = getelementptr inbounds i16, i16* %p0, i32 %mul 18 %add = add nsw i32 %p4, 1 19 %rem = srem i32 %add, 5 20 %mul1 = mul i32 %rem, %p1 21 %add.ptr2 = getelementptr inbounds i16, i16* %p0, i32 %mul1 22 %add.ptr6 = getelementptr inbounds i16, i16* %p0, i32 0 23 %add7 = add nsw i32 %p4, 3 24 %rem8 = srem i32 %add7, 5 25 %mul9 = mul i32 %rem8, %p1 26 %add.ptr10 = getelementptr inbounds i16, i16* %p0, i32 %mul9 27 %add.ptr14 = getelementptr inbounds i16, i16* %p0, i32 0 28 %incdec.ptr18 = getelementptr inbounds i16, i16* %add.ptr14, i32 32 29 %0 = bitcast i16* %incdec.ptr18 to <16 x i32>* 30 %incdec.ptr17 = getelementptr inbounds i16, i16* %add.ptr10, i32 32 31 %1 = bitcast i16* %incdec.ptr17 to <16 x i32>* 32 %incdec.ptr16 = getelementptr inbounds i16, i16* %add.ptr6, i32 32 33 %2 = bitcast i16* %incdec.ptr16 to <16 x i32>* 34 %incdec.ptr15 = getelementptr inbounds i16, i16* %add.ptr2, i32 32 35 %3 = bitcast i16* %incdec.ptr15 to <16 x i32>* 36 %incdec.ptr = getelementptr inbounds i16, i16* %add.ptr, i32 32 37 %4 = bitcast i16* %incdec.ptr to <16 x i32>* 38 %5 = bitcast i16* %p3 to <16 x i32>* 39 br i1 undef, label %for.end.loopexit.unr-lcssa, label %for.body 40 41for.body: ; preds = %for.body, %entry 42 %optr.0102 = phi <16 x i32>* [ %incdec.ptr24.3, %for.body ], [ %5, %entry ] 43 %iptr4.0101 = phi <16 x i32>* [ %incdec.ptr23.3, %for.body ], [ %0, %entry ] 44 %iptr3.0100 = phi <16 x i32>* [ %incdec.ptr22.3, %for.body ], [ %1, %entry ] 45 %iptr2.099 = phi <16 x i32>* [ undef, %for.body ], [ %2, %entry ] 46 %iptr1.098 = phi <16 x i32>* [ %incdec.ptr20.3, %for.body ], [ %3, %entry ] 47 %iptr0.097 = phi <16 x i32>* [ %incdec.ptr19.3, %for.body ], [ %4, %entry ] 48 %dVsumv1.096 = phi <32 x i32> [ %66, %for.body ], [ undef, %entry ] 49 %niter = phi i32 [ %niter.nsub.3, %for.body ], [ undef, %entry ] 50 %6 = load <16 x i32>, <16 x i32>* %iptr0.097, align 64, !tbaa !1 51 %7 = load <16 x i32>, <16 x i32>* %iptr1.098, align 64, !tbaa !1 52 %8 = load <16 x i32>, <16 x i32>* %iptr2.099, align 64, !tbaa !1 53 %9 = load <16 x i32>, <16 x i32>* %iptr3.0100, align 64, !tbaa !1 54 %10 = load <16 x i32>, <16 x i32>* %iptr4.0101, align 64, !tbaa !1 55 %11 = tail call <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32> %6, <16 x i32> %10) 56 %12 = tail call <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32> %11, <16 x i32> %8, i32 393222) 57 %13 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %9, <16 x i32> %7) 58 %14 = tail call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %12, <32 x i32> %13, i32 67372036) 59 %15 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %dVsumv1.096) 60 %16 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %14) 61 %17 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %16, <16 x i32> %15, i32 4) 62 %18 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %14) 63 %19 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %16, <16 x i32> %15, i32 8) 64 %20 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %18, <16 x i32> undef, i32 8) 65 %21 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %17, <16 x i32> %19) 66 %22 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %15, <16 x i32> %19) 67 %23 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %22, <16 x i32> %17, i32 101058054) 68 %24 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %23, <16 x i32> zeroinitializer, i32 67372036) 69 %25 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> undef, <16 x i32> %20) 70 %26 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %25, <16 x i32> undef, i32 101058054) 71 %27 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %26, <16 x i32> %21, i32 67372036) 72 %28 = tail call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %27, <16 x i32> %24, i32 8) 73 %incdec.ptr24 = getelementptr inbounds <16 x i32>, <16 x i32>* %optr.0102, i32 1 74 store <16 x i32> %28, <16 x i32>* %optr.0102, align 64, !tbaa !1 75 %incdec.ptr19.1 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr0.097, i32 2 76 %incdec.ptr23.1 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr4.0101, i32 2 77 %29 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %14) 78 %30 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %14) 79 %31 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> undef, <16 x i32> %29, i32 4) 80 %32 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> undef, <16 x i32> %30, i32 4) 81 %33 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> undef, <16 x i32> %29, i32 8) 82 %34 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> undef, <16 x i32> %30, i32 8) 83 %35 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %31, <16 x i32> %33) 84 %36 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %29, <16 x i32> %33) 85 %37 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %36, <16 x i32> %31, i32 101058054) 86 %38 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %37, <16 x i32> undef, i32 67372036) 87 %39 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %30, <16 x i32> %34) 88 %40 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %39, <16 x i32> %32, i32 101058054) 89 %41 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %40, <16 x i32> %35, i32 67372036) 90 %42 = tail call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %41, <16 x i32> %38, i32 8) 91 %incdec.ptr24.1 = getelementptr inbounds <16 x i32>, <16 x i32>* %optr.0102, i32 2 92 store <16 x i32> %42, <16 x i32>* %incdec.ptr24, align 64, !tbaa !1 93 %incdec.ptr19.2 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr0.097, i32 3 94 %43 = load <16 x i32>, <16 x i32>* %incdec.ptr19.1, align 64, !tbaa !1 95 %incdec.ptr20.2 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr1.098, i32 3 96 %incdec.ptr21.2 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr2.099, i32 3 97 %incdec.ptr22.2 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr3.0100, i32 3 98 %incdec.ptr23.2 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr4.0101, i32 3 99 %44 = load <16 x i32>, <16 x i32>* %incdec.ptr23.1, align 64, !tbaa !1 100 %45 = tail call <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32> %43, <16 x i32> %44) 101 %46 = tail call <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32> %45, <16 x i32> undef, i32 393222) 102 %47 = tail call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %46, <32 x i32> undef, i32 67372036) 103 %48 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %47) 104 %49 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %48, <16 x i32> undef, i32 4) 105 %50 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %48, <16 x i32> undef, i32 8) 106 %51 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> zeroinitializer, <16 x i32> undef) 107 %52 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %49, <16 x i32> %50) 108 %53 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> undef, <16 x i32> %50) 109 %54 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %53, <16 x i32> %49, i32 101058054) 110 %55 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %54, <16 x i32> %51, i32 67372036) 111 %56 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> undef, <16 x i32> %52, i32 67372036) 112 %57 = tail call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %56, <16 x i32> %55, i32 8) 113 %incdec.ptr24.2 = getelementptr inbounds <16 x i32>, <16 x i32>* %optr.0102, i32 3 114 store <16 x i32> %57, <16 x i32>* %incdec.ptr24.1, align 64, !tbaa !1 115 %incdec.ptr19.3 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr0.097, i32 4 116 %58 = load <16 x i32>, <16 x i32>* %incdec.ptr19.2, align 64, !tbaa !1 117 %incdec.ptr20.3 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr1.098, i32 4 118 %59 = load <16 x i32>, <16 x i32>* %incdec.ptr20.2, align 64, !tbaa !1 119 %60 = load <16 x i32>, <16 x i32>* %incdec.ptr21.2, align 64, !tbaa !1 120 %incdec.ptr22.3 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr3.0100, i32 4 121 %61 = load <16 x i32>, <16 x i32>* %incdec.ptr22.2, align 64, !tbaa !1 122 %incdec.ptr23.3 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr4.0101, i32 4 123 %62 = load <16 x i32>, <16 x i32>* %incdec.ptr23.2, align 64, !tbaa !1 124 %63 = tail call <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32> %58, <16 x i32> %62) 125 %64 = tail call <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32> %63, <16 x i32> %60, i32 393222) 126 %65 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %61, <16 x i32> %59) 127 %66 = tail call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %64, <32 x i32> %65, i32 67372036) 128 %67 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %47) 129 %68 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %66) 130 %69 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %68, <16 x i32> undef, i32 4) 131 %70 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %66) 132 %71 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %70, <16 x i32> %67, i32 4) 133 %72 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %70, <16 x i32> %67, i32 8) 134 %73 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %67, <16 x i32> %71) 135 %74 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> undef, <16 x i32> %69, i32 101058054) 136 %75 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %74, <16 x i32> %73, i32 67372036) 137 %76 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %67, <16 x i32> %72) 138 %77 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %76, <16 x i32> %71, i32 101058054) 139 %78 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %77, <16 x i32> undef, i32 67372036) 140 %79 = tail call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %78, <16 x i32> %75, i32 8) 141 %incdec.ptr24.3 = getelementptr inbounds <16 x i32>, <16 x i32>* %optr.0102, i32 4 142 store <16 x i32> %79, <16 x i32>* %incdec.ptr24.2, align 64, !tbaa !1 143 %niter.nsub.3 = add i32 %niter, -4 144 %niter.ncmp.3 = icmp eq i32 %niter.nsub.3, 0 145 br i1 %niter.ncmp.3, label %for.end.loopexit.unr-lcssa, label %for.body 146 147for.end.loopexit.unr-lcssa: ; preds = %for.body, %entry 148 ret void 149} 150 151declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #0 152declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #0 153declare <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32>, <16 x i32>) #0 154declare <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32>, <16 x i32>, i32) #0 155declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32) #0 156declare <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32>, <16 x i32>, i32) #0 157declare <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32>, <16 x i32>, i32) #0 158declare <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32>, <16 x i32>) #0 159declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #0 160declare <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32>, <32 x i32>, i32) #0 161declare <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32>, <16 x i32>, i32) #0 162 163attributes #0 = { nounwind readnone } 164attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } 165 166!1 = !{!2, !2, i64 0} 167!2 = !{!"omnipotent char", !3, i64 0} 168!3 = !{!"Simple C/C++ TBAA"} 169