1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s 3 4define arm_aapcs_vfpcc <4 x i32> @test_vadciq_s32(<4 x i32> %a, <4 x i32> %b, i32* %carry_out) { 5; CHECK-LABEL: test_vadciq_s32: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vadci.i32 q0, q0, q1 8; CHECK-NEXT: vmrs r1, fpscr_nzcvqc 9; CHECK-NEXT: ubfx r1, r1, #29, #1 10; CHECK-NEXT: str r1, [r0] 11; CHECK-NEXT: bx lr 12entry: 13 %0 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0) 14 %1 = extractvalue { <4 x i32>, i32 } %0, 1 15 %2 = lshr i32 %1, 29 16 %3 = and i32 %2, 1 17 store i32 %3, i32* %carry_out, align 4 18 %4 = extractvalue { <4 x i32>, i32 } %0, 0 19 ret <4 x i32> %4 20} 21 22declare { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32>, <4 x i32>, i32) 23 24define arm_aapcs_vfpcc <4 x i32> @test_vadcq_u32(<4 x i32> %a, <4 x i32> %b, i32* %carry) { 25; CHECK-LABEL: test_vadcq_u32: 26; CHECK: @ %bb.0: @ %entry 27; CHECK-NEXT: ldr r1, [r0] 28; CHECK-NEXT: lsls r1, r1, #29 29; CHECK-NEXT: vmsr fpscr_nzcvqc, r1 30; CHECK-NEXT: vadc.i32 q0, q0, q1 31; CHECK-NEXT: vmrs r1, fpscr_nzcvqc 32; CHECK-NEXT: ubfx r1, r1, #29, #1 33; CHECK-NEXT: str r1, [r0] 34; CHECK-NEXT: bx lr 35entry: 36 %0 = load i32, i32* %carry, align 4 37 %1 = shl i32 %0, 29 38 %2 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %1) 39 %3 = extractvalue { <4 x i32>, i32 } %2, 1 40 %4 = lshr i32 %3, 29 41 %5 = and i32 %4, 1 42 store i32 %5, i32* %carry, align 4 43 %6 = extractvalue { <4 x i32>, i32 } %2, 0 44 ret <4 x i32> %6 45} 46 47define arm_aapcs_vfpcc <4 x i32> @test_vadciq_m_u32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32* %carry_out, i16 zeroext %p) { 48; CHECK-LABEL: test_vadciq_m_u32: 49; CHECK: @ %bb.0: @ %entry 50; CHECK-NEXT: vmsr p0, r1 51; CHECK-NEXT: vpst 52; CHECK-NEXT: vadcit.i32 q0, q1, q2 53; CHECK-NEXT: vmrs r1, fpscr_nzcvqc 54; CHECK-NEXT: ubfx r1, r1, #29, #1 55; CHECK-NEXT: str r1, [r0] 56; CHECK-NEXT: bx lr 57entry: 58 %0 = zext i16 %p to i32 59 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 60 %2 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, <4 x i1> %1) 61 %3 = extractvalue { <4 x i32>, i32 } %2, 1 62 %4 = lshr i32 %3, 29 63 %5 = and i32 %4, 1 64 store i32 %5, i32* %carry_out, align 4 65 %6 = extractvalue { <4 x i32>, i32 } %2, 0 66 ret <4 x i32> %6 67} 68 69declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 70 71declare { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i32>, i32, <4 x i1>) 72 73define arm_aapcs_vfpcc <4 x i32> @test_vadcq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32* %carry, i16 zeroext %p) { 74; CHECK-LABEL: test_vadcq_m_s32: 75; CHECK: @ %bb.0: @ %entry 76; CHECK-NEXT: ldr r2, [r0] 77; CHECK-NEXT: vmsr p0, r1 78; CHECK-NEXT: lsls r1, r2, #29 79; CHECK-NEXT: vmsr fpscr_nzcvqc, r1 80; CHECK-NEXT: vpst 81; CHECK-NEXT: vadct.i32 q0, q1, q2 82; CHECK-NEXT: vmrs r1, fpscr_nzcvqc 83; CHECK-NEXT: ubfx r1, r1, #29, #1 84; CHECK-NEXT: str r1, [r0] 85; CHECK-NEXT: bx lr 86entry: 87 %0 = load i32, i32* %carry, align 4 88 %1 = shl i32 %0, 29 89 %2 = zext i16 %p to i32 90 %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) 91 %4 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 %1, <4 x i1> %3) 92 %5 = extractvalue { <4 x i32>, i32 } %4, 1 93 %6 = lshr i32 %5, 29 94 %7 = and i32 %6, 1 95 store i32 %7, i32* %carry, align 4 96 %8 = extractvalue { <4 x i32>, i32 } %4, 0 97 ret <4 x i32> %8 98} 99 100declare { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32>, <4 x i32>, i32) 101 102define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_s32(<4 x i32> %a, <4 x i32> %b, i32* nocapture %carry_out) { 103; CHECK-LABEL: test_vsbciq_s32: 104; CHECK: @ %bb.0: @ %entry 105; CHECK-NEXT: vsbci.i32 q0, q0, q1 106; CHECK-NEXT: vmrs r1, fpscr_nzcvqc 107; CHECK-NEXT: ubfx r1, r1, #29, #1 108; CHECK-NEXT: str r1, [r0] 109; CHECK-NEXT: bx lr 110entry: 111 %0 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0) 112 %1 = extractvalue { <4 x i32>, i32 } %0, 1 113 %2 = lshr i32 %1, 29 114 %3 = and i32 %2, 1 115 store i32 %3, i32* %carry_out, align 4 116 %4 = extractvalue { <4 x i32>, i32 } %0, 0 117 ret <4 x i32> %4 118} 119 120define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_u32(<4 x i32> %a, <4 x i32> %b, i32* nocapture %carry_out) { 121; CHECK-LABEL: test_vsbciq_u32: 122; CHECK: @ %bb.0: @ %entry 123; CHECK-NEXT: vsbci.i32 q0, q0, q1 124; CHECK-NEXT: vmrs r1, fpscr_nzcvqc 125; CHECK-NEXT: ubfx r1, r1, #29, #1 126; CHECK-NEXT: str r1, [r0] 127; CHECK-NEXT: bx lr 128entry: 129 %0 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0) 130 %1 = extractvalue { <4 x i32>, i32 } %0, 1 131 %2 = lshr i32 %1, 29 132 %3 = and i32 %2, 1 133 store i32 %3, i32* %carry_out, align 4 134 %4 = extractvalue { <4 x i32>, i32 } %0, 0 135 ret <4 x i32> %4 136} 137 138define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_s32(<4 x i32> %a, <4 x i32> %b, i32* nocapture %carry) { 139; CHECK-LABEL: test_vsbcq_s32: 140; CHECK: @ %bb.0: @ %entry 141; CHECK-NEXT: ldr r1, [r0] 142; CHECK-NEXT: lsls r1, r1, #29 143; CHECK-NEXT: vmsr fpscr_nzcvqc, r1 144; CHECK-NEXT: vsbc.i32 q0, q0, q1 145; CHECK-NEXT: vmrs r1, fpscr_nzcvqc 146; CHECK-NEXT: ubfx r1, r1, #29, #1 147; CHECK-NEXT: str r1, [r0] 148; CHECK-NEXT: bx lr 149entry: 150 %0 = load i32, i32* %carry, align 4 151 %1 = shl i32 %0, 29 152 %2 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %1) 153 %3 = extractvalue { <4 x i32>, i32 } %2, 1 154 %4 = lshr i32 %3, 29 155 %5 = and i32 %4, 1 156 store i32 %5, i32* %carry, align 4 157 %6 = extractvalue { <4 x i32>, i32 } %2, 0 158 ret <4 x i32> %6 159} 160 161define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_u32(<4 x i32> %a, <4 x i32> %b, i32* nocapture %carry) { 162; CHECK-LABEL: test_vsbcq_u32: 163; CHECK: @ %bb.0: @ %entry 164; CHECK-NEXT: ldr r1, [r0] 165; CHECK-NEXT: lsls r1, r1, #29 166; CHECK-NEXT: vmsr fpscr_nzcvqc, r1 167; CHECK-NEXT: vsbc.i32 q0, q0, q1 168; CHECK-NEXT: vmrs r1, fpscr_nzcvqc 169; CHECK-NEXT: ubfx r1, r1, #29, #1 170; CHECK-NEXT: str r1, [r0] 171; CHECK-NEXT: bx lr 172entry: 173 %0 = load i32, i32* %carry, align 4 174 %1 = shl i32 %0, 29 175 %2 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %1) 176 %3 = extractvalue { <4 x i32>, i32 } %2, 1 177 %4 = lshr i32 %3, 29 178 %5 = and i32 %4, 1 179 store i32 %5, i32* %carry, align 4 180 %6 = extractvalue { <4 x i32>, i32 } %2, 0 181 ret <4 x i32> %6 182} 183 184declare { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i32>, i32, <4 x i1>) 185 186define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32* nocapture %carry_out, i16 zeroext %p) { 187; CHECK-LABEL: test_vsbciq_m_s32: 188; CHECK: @ %bb.0: @ %entry 189; CHECK-NEXT: vmsr p0, r1 190; CHECK-NEXT: vpst 191; CHECK-NEXT: vsbcit.i32 q0, q1, q2 192; CHECK-NEXT: vmrs r1, fpscr_nzcvqc 193; CHECK-NEXT: ubfx r1, r1, #29, #1 194; CHECK-NEXT: str r1, [r0] 195; CHECK-NEXT: bx lr 196entry: 197 %0 = zext i16 %p to i32 198 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 199 %2 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, <4 x i1> %1) 200 %3 = extractvalue { <4 x i32>, i32 } %2, 1 201 %4 = lshr i32 %3, 29 202 %5 = and i32 %4, 1 203 store i32 %5, i32* %carry_out, align 4 204 %6 = extractvalue { <4 x i32>, i32 } %2, 0 205 ret <4 x i32> %6 206} 207 208define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_m_u32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32* nocapture %carry_out, i16 zeroext %p) { 209; CHECK-LABEL: test_vsbciq_m_u32: 210; CHECK: @ %bb.0: @ %entry 211; CHECK-NEXT: vmsr p0, r1 212; CHECK-NEXT: vpst 213; CHECK-NEXT: vsbcit.i32 q0, q1, q2 214; CHECK-NEXT: vmrs r1, fpscr_nzcvqc 215; CHECK-NEXT: ubfx r1, r1, #29, #1 216; CHECK-NEXT: str r1, [r0] 217; CHECK-NEXT: bx lr 218entry: 219 %0 = zext i16 %p to i32 220 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 221 %2 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, <4 x i1> %1) 222 %3 = extractvalue { <4 x i32>, i32 } %2, 1 223 %4 = lshr i32 %3, 29 224 %5 = and i32 %4, 1 225 store i32 %5, i32* %carry_out, align 4 226 %6 = extractvalue { <4 x i32>, i32 } %2, 0 227 ret <4 x i32> %6 228} 229 230define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32* nocapture %carry, i16 zeroext %p) { 231; CHECK-LABEL: test_vsbcq_m_s32: 232; CHECK: @ %bb.0: @ %entry 233; CHECK-NEXT: ldr r2, [r0] 234; CHECK-NEXT: vmsr p0, r1 235; CHECK-NEXT: lsls r1, r2, #29 236; CHECK-NEXT: vmsr fpscr_nzcvqc, r1 237; CHECK-NEXT: vpst 238; CHECK-NEXT: vsbct.i32 q0, q1, q2 239; CHECK-NEXT: vmrs r1, fpscr_nzcvqc 240; CHECK-NEXT: ubfx r1, r1, #29, #1 241; CHECK-NEXT: str r1, [r0] 242; CHECK-NEXT: bx lr 243entry: 244 %0 = load i32, i32* %carry, align 4 245 %1 = shl i32 %0, 29 246 %2 = zext i16 %p to i32 247 %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) 248 %4 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 %1, <4 x i1> %3) 249 %5 = extractvalue { <4 x i32>, i32 } %4, 1 250 %6 = lshr i32 %5, 29 251 %7 = and i32 %6, 1 252 store i32 %7, i32* %carry, align 4 253 %8 = extractvalue { <4 x i32>, i32 } %4, 0 254 ret <4 x i32> %8 255} 256 257define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_m_u32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32* nocapture %carry, i16 zeroext %p) { 258; CHECK-LABEL: test_vsbcq_m_u32: 259; CHECK: @ %bb.0: @ %entry 260; CHECK-NEXT: ldr r2, [r0] 261; CHECK-NEXT: vmsr p0, r1 262; CHECK-NEXT: lsls r1, r2, #29 263; CHECK-NEXT: vmsr fpscr_nzcvqc, r1 264; CHECK-NEXT: vpst 265; CHECK-NEXT: vsbct.i32 q0, q1, q2 266; CHECK-NEXT: vmrs r1, fpscr_nzcvqc 267; CHECK-NEXT: ubfx r1, r1, #29, #1 268; CHECK-NEXT: str r1, [r0] 269; CHECK-NEXT: bx lr 270entry: 271 %0 = load i32, i32* %carry, align 4 272 %1 = shl i32 %0, 29 273 %2 = zext i16 %p to i32 274 %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) 275 %4 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 %1, <4 x i1> %3) 276 %5 = extractvalue { <4 x i32>, i32 } %4, 1 277 %6 = lshr i32 %5, 29 278 %7 = and i32 %6, 1 279 store i32 %7, i32* %carry, align 4 280 %8 = extractvalue { <4 x i32>, i32 } %4, 0 281 ret <4 x i32> %8 282} 283