1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s 3 4define void @arm_min_q31(i32* nocapture readonly %pSrc, i32 %blockSize, i32* nocapture %pResult, i32* nocapture %pIndex) { 5; CHECK-LABEL: arm_min_q31: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} 8; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} 9; CHECK-NEXT: ldr.w r12, [r0] 10; CHECK-NEXT: subs.w r9, r1, #1 11; CHECK-NEXT: beq .LBB0_3 12; CHECK-NEXT: @ %bb.1: @ %while.body.preheader 13; CHECK-NEXT: subs r7, r1, #2 14; CHECK-NEXT: and r8, r9, #3 15; CHECK-NEXT: cmp r7, #3 16; CHECK-NEXT: bhs .LBB0_4 17; CHECK-NEXT: @ %bb.2: 18; CHECK-NEXT: movs r6, #0 19; CHECK-NEXT: b .LBB0_6 20; CHECK-NEXT: .LBB0_3: 21; CHECK-NEXT: movs r6, #0 22; CHECK-NEXT: b .LBB0_10 23; CHECK-NEXT: .LBB0_4: @ %while.body.preheader.new 24; CHECK-NEXT: bic r7, r9, #3 25; CHECK-NEXT: movs r6, #1 26; CHECK-NEXT: subs r7, #4 27; CHECK-NEXT: add.w lr, r6, r7, lsr #2 28; CHECK-NEXT: movs r6, #0 29; CHECK-NEXT: movs r7, #4 30; CHECK-NEXT: .LBB0_5: @ %while.body 31; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 32; CHECK-NEXT: ldr r10, [r0, #16]! 33; CHECK-NEXT: sub.w r9, r9, #4 34; CHECK-NEXT: ldrd r5, r4, [r0, #-12] 35; CHECK-NEXT: ldr r11, [r0, #-4] 36; CHECK-NEXT: cmp r12, r5 37; CHECK-NEXT: it gt 38; CHECK-NEXT: subgt r6, r7, #3 39; CHECK-NEXT: csel r5, r5, r12, gt 40; CHECK-NEXT: cmp r5, r4 41; CHECK-NEXT: it gt 42; CHECK-NEXT: subgt r6, r7, #2 43; CHECK-NEXT: csel r5, r4, r5, gt 44; CHECK-NEXT: cmp r5, r11 45; CHECK-NEXT: it gt 46; CHECK-NEXT: subgt r6, r7, #1 47; CHECK-NEXT: csel r5, r11, r5, gt 48; CHECK-NEXT: cmp r5, r10 49; CHECK-NEXT: csel r6, r7, r6, gt 50; CHECK-NEXT: add.w r7, r7, #4 51; CHECK-NEXT: csel r12, r10, r5, gt 52; CHECK-NEXT: le lr, .LBB0_5 53; CHECK-NEXT: .LBB0_6: @ %while.end.loopexit.unr-lcssa 54; CHECK-NEXT: cmp.w r8, #0 55; CHECK-NEXT: beq .LBB0_10 56; CHECK-NEXT: @ %bb.7: @ %while.body.epil 57; CHECK-NEXT: ldr r7, [r0, #4] 58; CHECK-NEXT: sub.w r1, r1, r9 59; CHECK-NEXT: cmp r12, r7 60; CHECK-NEXT: csel r6, r1, r6, gt 61; CHECK-NEXT: csel r12, r7, r12, gt 62; CHECK-NEXT: cmp.w r8, #1 63; CHECK-NEXT: beq .LBB0_10 64; CHECK-NEXT: @ %bb.8: @ %while.body.epil.1 65; CHECK-NEXT: ldr r7, [r0, #8] 66; CHECK-NEXT: cmp r12, r7 67; CHECK-NEXT: csinc r6, r6, r1, le 68; CHECK-NEXT: csel r12, r7, r12, gt 69; CHECK-NEXT: cmp.w r8, #2 70; CHECK-NEXT: beq .LBB0_10 71; CHECK-NEXT: @ %bb.9: @ %while.body.epil.2 72; CHECK-NEXT: ldr r0, [r0, #12] 73; CHECK-NEXT: cmp r12, r0 74; CHECK-NEXT: it gt 75; CHECK-NEXT: addgt r6, r1, #2 76; CHECK-NEXT: csel r12, r0, r12, gt 77; CHECK-NEXT: .LBB0_10: @ %while.end 78; CHECK-NEXT: str.w r12, [r2] 79; CHECK-NEXT: str r6, [r3] 80; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} 81entry: 82 %0 = load i32, i32* %pSrc, align 4 83 %blkCnt.015 = add i32 %blockSize, -1 84 %cmp.not17 = icmp eq i32 %blkCnt.015, 0 85 br i1 %cmp.not17, label %while.end, label %while.body.preheader 86 87while.body.preheader: ; preds = %entry 88 %1 = add i32 %blockSize, -2 89 %xtraiter = and i32 %blkCnt.015, 3 90 %2 = icmp ult i32 %1, 3 91 br i1 %2, label %while.end.loopexit.unr-lcssa, label %while.body.preheader.new 92 93while.body.preheader.new: ; preds = %while.body.preheader 94 %unroll_iter = and i32 %blkCnt.015, -4 95 br label %while.body 96 97while.body: ; preds = %while.body, %while.body.preheader.new 98 %pSrc.addr.021.pn = phi i32* [ %pSrc, %while.body.preheader.new ], [ %pSrc.addr.021.3, %while.body ] 99 %blkCnt.020 = phi i32 [ %blkCnt.015, %while.body.preheader.new ], [ %blkCnt.0.3, %while.body ] 100 %outIndex.019 = phi i32 [ 0, %while.body.preheader.new ], [ %spec.select14.3, %while.body ] 101 %out.018 = phi i32 [ %0, %while.body.preheader.new ], [ %spec.select.3, %while.body ] 102 %niter = phi i32 [ %unroll_iter, %while.body.preheader.new ], [ %niter.nsub.3, %while.body ] 103 %pSrc.addr.021 = getelementptr inbounds i32, i32* %pSrc.addr.021.pn, i32 1 104 %3 = load i32, i32* %pSrc.addr.021, align 4 105 %cmp2 = icmp sgt i32 %out.018, %3 106 %sub3 = sub i32 %blockSize, %blkCnt.020 107 %spec.select = select i1 %cmp2, i32 %3, i32 %out.018 108 %spec.select14 = select i1 %cmp2, i32 %sub3, i32 %outIndex.019 109 %blkCnt.0 = add i32 %blkCnt.020, -1 110 %pSrc.addr.021.1 = getelementptr inbounds i32, i32* %pSrc.addr.021.pn, i32 2 111 %4 = load i32, i32* %pSrc.addr.021.1, align 4 112 %cmp2.1 = icmp sgt i32 %spec.select, %4 113 %sub3.1 = sub i32 %blockSize, %blkCnt.0 114 %spec.select.1 = select i1 %cmp2.1, i32 %4, i32 %spec.select 115 %spec.select14.1 = select i1 %cmp2.1, i32 %sub3.1, i32 %spec.select14 116 %blkCnt.0.1 = add i32 %blkCnt.020, -2 117 %pSrc.addr.021.2 = getelementptr inbounds i32, i32* %pSrc.addr.021.pn, i32 3 118 %5 = load i32, i32* %pSrc.addr.021.2, align 4 119 %cmp2.2 = icmp sgt i32 %spec.select.1, %5 120 %sub3.2 = sub i32 %blockSize, %blkCnt.0.1 121 %spec.select.2 = select i1 %cmp2.2, i32 %5, i32 %spec.select.1 122 %spec.select14.2 = select i1 %cmp2.2, i32 %sub3.2, i32 %spec.select14.1 123 %blkCnt.0.2 = add i32 %blkCnt.020, -3 124 %pSrc.addr.021.3 = getelementptr inbounds i32, i32* %pSrc.addr.021.pn, i32 4 125 %6 = load i32, i32* %pSrc.addr.021.3, align 4 126 %cmp2.3 = icmp sgt i32 %spec.select.2, %6 127 %sub3.3 = sub i32 %blockSize, %blkCnt.0.2 128 %spec.select.3 = select i1 %cmp2.3, i32 %6, i32 %spec.select.2 129 %spec.select14.3 = select i1 %cmp2.3, i32 %sub3.3, i32 %spec.select14.2 130 %blkCnt.0.3 = add i32 %blkCnt.020, -4 131 %niter.nsub.3 = add i32 %niter, -4 132 %niter.ncmp.3 = icmp eq i32 %niter.nsub.3, 0 133 br i1 %niter.ncmp.3, label %while.end.loopexit.unr-lcssa, label %while.body 134 135while.end.loopexit.unr-lcssa: ; preds = %while.body, %while.body.preheader 136 %spec.select.lcssa.ph = phi i32 [ undef, %while.body.preheader ], [ %spec.select.3, %while.body ] 137 %spec.select14.lcssa.ph = phi i32 [ undef, %while.body.preheader ], [ %spec.select14.3, %while.body ] 138 %pSrc.addr.021.pn.unr = phi i32* [ %pSrc, %while.body.preheader ], [ %pSrc.addr.021.3, %while.body ] 139 %blkCnt.020.unr = phi i32 [ %blkCnt.015, %while.body.preheader ], [ %blkCnt.0.3, %while.body ] 140 %outIndex.019.unr = phi i32 [ 0, %while.body.preheader ], [ %spec.select14.3, %while.body ] 141 %out.018.unr = phi i32 [ %0, %while.body.preheader ], [ %spec.select.3, %while.body ] 142 %lcmp.mod.not = icmp eq i32 %xtraiter, 0 143 br i1 %lcmp.mod.not, label %while.end, label %while.body.epil 144 145while.body.epil: ; preds = %while.end.loopexit.unr-lcssa 146 %pSrc.addr.021.epil = getelementptr inbounds i32, i32* %pSrc.addr.021.pn.unr, i32 1 147 %7 = load i32, i32* %pSrc.addr.021.epil, align 4 148 %cmp2.epil = icmp sgt i32 %out.018.unr, %7 149 %sub3.epil = sub i32 %blockSize, %blkCnt.020.unr 150 %spec.select.epil = select i1 %cmp2.epil, i32 %7, i32 %out.018.unr 151 %spec.select14.epil = select i1 %cmp2.epil, i32 %sub3.epil, i32 %outIndex.019.unr 152 %epil.iter.cmp.not = icmp eq i32 %xtraiter, 1 153 br i1 %epil.iter.cmp.not, label %while.end, label %while.body.epil.1 154 155while.end: ; preds = %while.end.loopexit.unr-lcssa, %while.body.epil.2, %while.body.epil.1, %while.body.epil, %entry 156 %out.0.lcssa = phi i32 [ %0, %entry ], [ %spec.select.lcssa.ph, %while.end.loopexit.unr-lcssa ], [ %spec.select.epil, %while.body.epil ], [ %spec.select.epil.1, %while.body.epil.1 ], [ %spec.select.epil.2, %while.body.epil.2 ] 157 %outIndex.0.lcssa = phi i32 [ 0, %entry ], [ %spec.select14.lcssa.ph, %while.end.loopexit.unr-lcssa ], [ %spec.select14.epil, %while.body.epil ], [ %spec.select14.epil.1, %while.body.epil.1 ], [ %spec.select14.epil.2, %while.body.epil.2 ] 158 store i32 %out.0.lcssa, i32* %pResult, align 4 159 store i32 %outIndex.0.lcssa, i32* %pIndex, align 4 160 ret void 161 162while.body.epil.1: ; preds = %while.body.epil 163 %blkCnt.0.epil = add i32 %blkCnt.020.unr, -1 164 %pSrc.addr.021.epil.1 = getelementptr inbounds i32, i32* %pSrc.addr.021.pn.unr, i32 2 165 %8 = load i32, i32* %pSrc.addr.021.epil.1, align 4 166 %cmp2.epil.1 = icmp sgt i32 %spec.select.epil, %8 167 %sub3.epil.1 = sub i32 %blockSize, %blkCnt.0.epil 168 %spec.select.epil.1 = select i1 %cmp2.epil.1, i32 %8, i32 %spec.select.epil 169 %spec.select14.epil.1 = select i1 %cmp2.epil.1, i32 %sub3.epil.1, i32 %spec.select14.epil 170 %epil.iter.cmp.1.not = icmp eq i32 %xtraiter, 2 171 br i1 %epil.iter.cmp.1.not, label %while.end, label %while.body.epil.2 172 173while.body.epil.2: ; preds = %while.body.epil.1 174 %blkCnt.0.epil.1 = add i32 %blkCnt.020.unr, -2 175 %pSrc.addr.021.epil.2 = getelementptr inbounds i32, i32* %pSrc.addr.021.pn.unr, i32 3 176 %9 = load i32, i32* %pSrc.addr.021.epil.2, align 4 177 %cmp2.epil.2 = icmp sgt i32 %spec.select.epil.1, %9 178 %sub3.epil.2 = sub i32 %blockSize, %blkCnt.0.epil.1 179 %spec.select.epil.2 = select i1 %cmp2.epil.2, i32 %9, i32 %spec.select.epil.1 180 %spec.select14.epil.2 = select i1 %cmp2.epil.2, i32 %sub3.epil.2, i32 %spec.select14.epil.1 181 br label %while.end 182} 183