1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK 3 4define arm_aapcs_vfpcc <4 x i32> @add_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 5; CHECK-LABEL: add_v4i32_x: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vctp.32 r0 8; CHECK-NEXT: vpst 9; CHECK-NEXT: vaddt.i32 q0, q0, q1 10; CHECK-NEXT: bx lr 11entry: 12 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 13 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer 14 %b = add <4 x i32> %a, %x 15 ret <4 x i32> %b 16} 17 18define arm_aapcs_vfpcc <8 x i16> @add_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 19; CHECK-LABEL: add_v8i16_x: 20; CHECK: @ %bb.0: @ %entry 21; CHECK-NEXT: vctp.16 r0 22; CHECK-NEXT: vpst 23; CHECK-NEXT: vaddt.i16 q0, q0, q1 24; CHECK-NEXT: bx lr 25entry: 26 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 27 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer 28 %b = add <8 x i16> %a, %x 29 ret <8 x i16> %b 30} 31 32define arm_aapcs_vfpcc <16 x i8> @add_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 33; CHECK-LABEL: add_v16i8_x: 34; CHECK: @ %bb.0: @ %entry 35; CHECK-NEXT: vctp.8 r0 36; CHECK-NEXT: vpst 37; CHECK-NEXT: vaddt.i8 q0, q0, q1 38; CHECK-NEXT: bx lr 39entry: 40 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 41 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer 42 %b = add <16 x i8> %a, %x 43 ret <16 x i8> %b 44} 45 46define arm_aapcs_vfpcc <4 x i32> @sub_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 47; CHECK-LABEL: sub_v4i32_x: 48; CHECK: @ %bb.0: @ %entry 49; CHECK-NEXT: vctp.32 r0 50; CHECK-NEXT: vpst 51; CHECK-NEXT: vsubt.i32 q0, q0, q1 52; CHECK-NEXT: bx lr 53entry: 54 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 55 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer 56 %b = sub <4 x i32> %x, %a 57 ret <4 x i32> %b 58} 59 60define arm_aapcs_vfpcc <8 x i16> @sub_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 61; CHECK-LABEL: sub_v8i16_x: 62; CHECK: @ %bb.0: @ %entry 63; CHECK-NEXT: vctp.16 r0 64; CHECK-NEXT: vpst 65; CHECK-NEXT: vsubt.i16 q0, q0, q1 66; CHECK-NEXT: bx lr 67entry: 68 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 69 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer 70 %b = sub <8 x i16> %x, %a 71 ret <8 x i16> %b 72} 73 74define arm_aapcs_vfpcc <16 x i8> @sub_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 75; CHECK-LABEL: sub_v16i8_x: 76; CHECK: @ %bb.0: @ %entry 77; CHECK-NEXT: vctp.8 r0 78; CHECK-NEXT: vpst 79; CHECK-NEXT: vsubt.i8 q0, q0, q1 80; CHECK-NEXT: bx lr 81entry: 82 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 83 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer 84 %b = sub <16 x i8> %x, %a 85 ret <16 x i8> %b 86} 87 88define arm_aapcs_vfpcc <4 x i32> @mul_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 89; CHECK-LABEL: mul_v4i32_x: 90; CHECK: @ %bb.0: @ %entry 91; CHECK-NEXT: vctp.32 r0 92; CHECK-NEXT: vpst 93; CHECK-NEXT: vmult.i32 q0, q0, q1 94; CHECK-NEXT: bx lr 95entry: 96 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 97 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 98 %b = mul <4 x i32> %a, %x 99 ret <4 x i32> %b 100} 101 102define arm_aapcs_vfpcc <8 x i16> @mul_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 103; CHECK-LABEL: mul_v8i16_x: 104; CHECK: @ %bb.0: @ %entry 105; CHECK-NEXT: vctp.16 r0 106; CHECK-NEXT: vpst 107; CHECK-NEXT: vmult.i16 q0, q0, q1 108; CHECK-NEXT: bx lr 109entry: 110 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 111 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 112 %b = mul <8 x i16> %a, %x 113 ret <8 x i16> %b 114} 115 116define arm_aapcs_vfpcc <16 x i8> @mul_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 117; CHECK-LABEL: mul_v16i8_x: 118; CHECK: @ %bb.0: @ %entry 119; CHECK-NEXT: vctp.8 r0 120; CHECK-NEXT: vpst 121; CHECK-NEXT: vmult.i8 q0, q0, q1 122; CHECK-NEXT: bx lr 123entry: 124 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 125 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 126 %b = mul <16 x i8> %a, %x 127 ret <16 x i8> %b 128} 129 130define arm_aapcs_vfpcc <4 x i32> @and_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 131; CHECK-LABEL: and_v4i32_x: 132; CHECK: @ %bb.0: @ %entry 133; CHECK-NEXT: vctp.32 r0 134; CHECK-NEXT: vpst 135; CHECK-NEXT: vandt q0, q0, q1 136; CHECK-NEXT: bx lr 137entry: 138 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 139 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 140 %b = and <4 x i32> %a, %x 141 ret <4 x i32> %b 142} 143 144define arm_aapcs_vfpcc <8 x i16> @and_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 145; CHECK-LABEL: and_v8i16_x: 146; CHECK: @ %bb.0: @ %entry 147; CHECK-NEXT: vctp.16 r0 148; CHECK-NEXT: vpst 149; CHECK-NEXT: vandt q0, q0, q1 150; CHECK-NEXT: bx lr 151entry: 152 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 153 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 154 %b = and <8 x i16> %a, %x 155 ret <8 x i16> %b 156} 157 158define arm_aapcs_vfpcc <16 x i8> @and_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 159; CHECK-LABEL: and_v16i8_x: 160; CHECK: @ %bb.0: @ %entry 161; CHECK-NEXT: vctp.8 r0 162; CHECK-NEXT: vpst 163; CHECK-NEXT: vandt q0, q0, q1 164; CHECK-NEXT: bx lr 165entry: 166 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 167 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 168 %b = and <16 x i8> %a, %x 169 ret <16 x i8> %b 170} 171 172define arm_aapcs_vfpcc <4 x i32> @or_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 173; CHECK-LABEL: or_v4i32_x: 174; CHECK: @ %bb.0: @ %entry 175; CHECK-NEXT: vctp.32 r0 176; CHECK-NEXT: vpst 177; CHECK-NEXT: vorrt q0, q0, q1 178; CHECK-NEXT: bx lr 179entry: 180 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 181 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer 182 %b = or <4 x i32> %a, %x 183 ret <4 x i32> %b 184} 185 186define arm_aapcs_vfpcc <8 x i16> @or_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 187; CHECK-LABEL: or_v8i16_x: 188; CHECK: @ %bb.0: @ %entry 189; CHECK-NEXT: vctp.16 r0 190; CHECK-NEXT: vpst 191; CHECK-NEXT: vorrt q0, q0, q1 192; CHECK-NEXT: bx lr 193entry: 194 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 195 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer 196 %b = or <8 x i16> %a, %x 197 ret <8 x i16> %b 198} 199 200define arm_aapcs_vfpcc <16 x i8> @or_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 201; CHECK-LABEL: or_v16i8_x: 202; CHECK: @ %bb.0: @ %entry 203; CHECK-NEXT: vctp.8 r0 204; CHECK-NEXT: vpst 205; CHECK-NEXT: vorrt q0, q0, q1 206; CHECK-NEXT: bx lr 207entry: 208 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 209 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer 210 %b = or <16 x i8> %a, %x 211 ret <16 x i8> %b 212} 213 214define arm_aapcs_vfpcc <4 x i32> @xor_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 215; CHECK-LABEL: xor_v4i32_x: 216; CHECK: @ %bb.0: @ %entry 217; CHECK-NEXT: vctp.32 r0 218; CHECK-NEXT: vpst 219; CHECK-NEXT: veort q0, q0, q1 220; CHECK-NEXT: bx lr 221entry: 222 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 223 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer 224 %b = xor <4 x i32> %a, %x 225 ret <4 x i32> %b 226} 227 228define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 229; CHECK-LABEL: xor_v8i16_x: 230; CHECK: @ %bb.0: @ %entry 231; CHECK-NEXT: vctp.16 r0 232; CHECK-NEXT: vpst 233; CHECK-NEXT: veort q0, q0, q1 234; CHECK-NEXT: bx lr 235entry: 236 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 237 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer 238 %b = xor <8 x i16> %a, %x 239 ret <8 x i16> %b 240} 241 242define arm_aapcs_vfpcc <16 x i8> @xor_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 243; CHECK-LABEL: xor_v16i8_x: 244; CHECK: @ %bb.0: @ %entry 245; CHECK-NEXT: vctp.8 r0 246; CHECK-NEXT: vpst 247; CHECK-NEXT: veort q0, q0, q1 248; CHECK-NEXT: bx lr 249entry: 250 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 251 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer 252 %b = xor <16 x i8> %a, %x 253 ret <16 x i8> %b 254} 255 256define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 257; CHECK-LABEL: andnot_v4i32_x: 258; CHECK: @ %bb.0: @ %entry 259; CHECK-NEXT: vmvn q1, q1 260; CHECK-NEXT: vctp.32 r0 261; CHECK-NEXT: vpst 262; CHECK-NEXT: vandt q0, q0, q1 263; CHECK-NEXT: bx lr 264entry: 265 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 266 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 267 %a = select <4 x i1> %c, <4 x i32> %y1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 268 %b = and <4 x i32> %a, %x 269 ret <4 x i32> %b 270} 271 272define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 273; CHECK-LABEL: andnot_v8i16_x: 274; CHECK: @ %bb.0: @ %entry 275; CHECK-NEXT: vmvn q1, q1 276; CHECK-NEXT: vctp.16 r0 277; CHECK-NEXT: vpst 278; CHECK-NEXT: vandt q0, q0, q1 279; CHECK-NEXT: bx lr 280entry: 281 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 282 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 283 %a = select <8 x i1> %c, <8 x i16> %y1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 284 %b = and <8 x i16> %a, %x 285 ret <8 x i16> %b 286} 287 288define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 289; CHECK-LABEL: andnot_v16i8_x: 290; CHECK: @ %bb.0: @ %entry 291; CHECK-NEXT: vmvn q1, q1 292; CHECK-NEXT: vctp.8 r0 293; CHECK-NEXT: vpst 294; CHECK-NEXT: vandt q0, q0, q1 295; CHECK-NEXT: bx lr 296entry: 297 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 298 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 299 %a = select <16 x i1> %c, <16 x i8> %y1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 300 %b = and <16 x i8> %a, %x 301 ret <16 x i8> %b 302} 303 304define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 305; CHECK-LABEL: ornot_v4i32_x: 306; CHECK: @ %bb.0: @ %entry 307; CHECK-NEXT: vmvn q1, q1 308; CHECK-NEXT: vctp.32 r0 309; CHECK-NEXT: vpst 310; CHECK-NEXT: vorrt q0, q0, q1 311; CHECK-NEXT: bx lr 312entry: 313 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 314 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 315 %a = select <4 x i1> %c, <4 x i32> %y1, <4 x i32> zeroinitializer 316 %b = or <4 x i32> %a, %x 317 ret <4 x i32> %b 318} 319 320define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 321; CHECK-LABEL: ornot_v8i16_x: 322; CHECK: @ %bb.0: @ %entry 323; CHECK-NEXT: vmvn q1, q1 324; CHECK-NEXT: vctp.16 r0 325; CHECK-NEXT: vpst 326; CHECK-NEXT: vorrt q0, q0, q1 327; CHECK-NEXT: bx lr 328entry: 329 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 330 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 331 %a = select <8 x i1> %c, <8 x i16> %y1, <8 x i16> zeroinitializer 332 %b = or <8 x i16> %a, %x 333 ret <8 x i16> %b 334} 335 336define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 337; CHECK-LABEL: ornot_v16i8_x: 338; CHECK: @ %bb.0: @ %entry 339; CHECK-NEXT: vmvn q1, q1 340; CHECK-NEXT: vctp.8 r0 341; CHECK-NEXT: vpst 342; CHECK-NEXT: vorrt q0, q0, q1 343; CHECK-NEXT: bx lr 344entry: 345 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 346 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 347 %a = select <16 x i1> %c, <16 x i8> %y1, <16 x i8> zeroinitializer 348 %b = or <16 x i8> %a, %x 349 ret <16 x i8> %b 350} 351 352define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 353; CHECK-LABEL: fadd_v4f32_x: 354; CHECK: @ %bb.0: @ %entry 355; CHECK-NEXT: vctp.32 r0 356; CHECK-NEXT: vpst 357; CHECK-NEXT: vaddt.f32 q0, q0, q1 358; CHECK-NEXT: bx lr 359entry: 360 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 361 %a = fadd <4 x float> %x, %y 362 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 363 ret <4 x float> %b 364} 365 366define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 367; CHECK-LABEL: fadd_v8f16_x: 368; CHECK: @ %bb.0: @ %entry 369; CHECK-NEXT: vctp.16 r0 370; CHECK-NEXT: vpst 371; CHECK-NEXT: vaddt.f16 q0, q0, q1 372; CHECK-NEXT: bx lr 373entry: 374 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 375 %a = fadd <8 x half> %x, %y 376 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 377 ret <8 x half> %b 378} 379 380define arm_aapcs_vfpcc <4 x float> @fsub_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 381; CHECK-LABEL: fsub_v4f32_x: 382; CHECK: @ %bb.0: @ %entry 383; CHECK-NEXT: vctp.32 r0 384; CHECK-NEXT: vpst 385; CHECK-NEXT: vsubt.f32 q0, q0, q1 386; CHECK-NEXT: bx lr 387entry: 388 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 389 %a = fsub <4 x float> %x, %y 390 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 391 ret <4 x float> %b 392} 393 394define arm_aapcs_vfpcc <8 x half> @fsub_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 395; CHECK-LABEL: fsub_v8f16_x: 396; CHECK: @ %bb.0: @ %entry 397; CHECK-NEXT: vctp.16 r0 398; CHECK-NEXT: vpst 399; CHECK-NEXT: vsubt.f16 q0, q0, q1 400; CHECK-NEXT: bx lr 401entry: 402 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 403 %a = fsub <8 x half> %x, %y 404 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 405 ret <8 x half> %b 406} 407 408define arm_aapcs_vfpcc <4 x float> @fmul_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 409; CHECK-LABEL: fmul_v4f32_x: 410; CHECK: @ %bb.0: @ %entry 411; CHECK-NEXT: vctp.32 r0 412; CHECK-NEXT: vpst 413; CHECK-NEXT: vmult.f32 q0, q0, q1 414; CHECK-NEXT: bx lr 415entry: 416 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 417 %a = fmul <4 x float> %x, %y 418 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 419 ret <4 x float> %b 420} 421 422define arm_aapcs_vfpcc <8 x half> @fmul_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 423; CHECK-LABEL: fmul_v8f16_x: 424; CHECK: @ %bb.0: @ %entry 425; CHECK-NEXT: vctp.16 r0 426; CHECK-NEXT: vpst 427; CHECK-NEXT: vmult.f16 q0, q0, q1 428; CHECK-NEXT: bx lr 429entry: 430 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 431 %a = fmul <8 x half> %x, %y 432 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 433 ret <8 x half> %b 434} 435 436define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 437; CHECK-LABEL: icmp_slt_v4i32_x: 438; CHECK: @ %bb.0: @ %entry 439; CHECK-NEXT: vctp.32 r0 440; CHECK-NEXT: vpst 441; CHECK-NEXT: vmint.s32 q0, q0, q1 442; CHECK-NEXT: bx lr 443entry: 444 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 445 %a1 = icmp slt <4 x i32> %x, %y 446 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 447 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 448 ret <4 x i32> %b 449} 450 451define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 452; CHECK-LABEL: icmp_slt_v8i16_x: 453; CHECK: @ %bb.0: @ %entry 454; CHECK-NEXT: vctp.16 r0 455; CHECK-NEXT: vpst 456; CHECK-NEXT: vmint.s16 q0, q0, q1 457; CHECK-NEXT: bx lr 458entry: 459 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 460 %a1 = icmp slt <8 x i16> %x, %y 461 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 462 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 463 ret <8 x i16> %b 464} 465 466define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 467; CHECK-LABEL: icmp_slt_v16i8_x: 468; CHECK: @ %bb.0: @ %entry 469; CHECK-NEXT: vctp.8 r0 470; CHECK-NEXT: vpst 471; CHECK-NEXT: vmint.s8 q0, q0, q1 472; CHECK-NEXT: bx lr 473entry: 474 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 475 %a1 = icmp slt <16 x i8> %x, %y 476 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 477 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 478 ret <16 x i8> %b 479} 480 481define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 482; CHECK-LABEL: icmp_sgt_v4i32_x: 483; CHECK: @ %bb.0: @ %entry 484; CHECK-NEXT: vctp.32 r0 485; CHECK-NEXT: vpst 486; CHECK-NEXT: vmaxt.s32 q0, q0, q1 487; CHECK-NEXT: bx lr 488entry: 489 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 490 %a1 = icmp sgt <4 x i32> %x, %y 491 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 492 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 493 ret <4 x i32> %b 494} 495 496define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 497; CHECK-LABEL: icmp_sgt_v8i16_x: 498; CHECK: @ %bb.0: @ %entry 499; CHECK-NEXT: vctp.16 r0 500; CHECK-NEXT: vpst 501; CHECK-NEXT: vmaxt.s16 q0, q0, q1 502; CHECK-NEXT: bx lr 503entry: 504 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 505 %a1 = icmp sgt <8 x i16> %x, %y 506 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 507 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 508 ret <8 x i16> %b 509} 510 511define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 512; CHECK-LABEL: icmp_sgt_v16i8_x: 513; CHECK: @ %bb.0: @ %entry 514; CHECK-NEXT: vctp.8 r0 515; CHECK-NEXT: vpst 516; CHECK-NEXT: vmaxt.s8 q0, q0, q1 517; CHECK-NEXT: bx lr 518entry: 519 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 520 %a1 = icmp sgt <16 x i8> %x, %y 521 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 522 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 523 ret <16 x i8> %b 524} 525 526define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 527; CHECK-LABEL: icmp_ult_v4i32_x: 528; CHECK: @ %bb.0: @ %entry 529; CHECK-NEXT: vctp.32 r0 530; CHECK-NEXT: vpst 531; CHECK-NEXT: vmint.u32 q0, q0, q1 532; CHECK-NEXT: bx lr 533entry: 534 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 535 %a1 = icmp ult <4 x i32> %x, %y 536 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 537 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 538 ret <4 x i32> %b 539} 540 541define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 542; CHECK-LABEL: icmp_ult_v8i16_x: 543; CHECK: @ %bb.0: @ %entry 544; CHECK-NEXT: vctp.16 r0 545; CHECK-NEXT: vpst 546; CHECK-NEXT: vmint.u16 q0, q0, q1 547; CHECK-NEXT: bx lr 548entry: 549 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 550 %a1 = icmp ult <8 x i16> %x, %y 551 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 552 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 553 ret <8 x i16> %b 554} 555 556define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 557; CHECK-LABEL: icmp_ult_v16i8_x: 558; CHECK: @ %bb.0: @ %entry 559; CHECK-NEXT: vctp.8 r0 560; CHECK-NEXT: vpst 561; CHECK-NEXT: vmint.u8 q0, q0, q1 562; CHECK-NEXT: bx lr 563entry: 564 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 565 %a1 = icmp ult <16 x i8> %x, %y 566 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 567 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 568 ret <16 x i8> %b 569} 570 571define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 572; CHECK-LABEL: icmp_ugt_v4i32_x: 573; CHECK: @ %bb.0: @ %entry 574; CHECK-NEXT: vctp.32 r0 575; CHECK-NEXT: vpst 576; CHECK-NEXT: vmaxt.u32 q0, q0, q1 577; CHECK-NEXT: bx lr 578entry: 579 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 580 %a1 = icmp ugt <4 x i32> %x, %y 581 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 582 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 583 ret <4 x i32> %b 584} 585 586define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 587; CHECK-LABEL: icmp_ugt_v8i16_x: 588; CHECK: @ %bb.0: @ %entry 589; CHECK-NEXT: vctp.16 r0 590; CHECK-NEXT: vpst 591; CHECK-NEXT: vmaxt.u16 q0, q0, q1 592; CHECK-NEXT: bx lr 593entry: 594 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 595 %a1 = icmp ugt <8 x i16> %x, %y 596 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 597 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 598 ret <8 x i16> %b 599} 600 601define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 602; CHECK-LABEL: icmp_ugt_v16i8_x: 603; CHECK: @ %bb.0: @ %entry 604; CHECK-NEXT: vctp.8 r0 605; CHECK-NEXT: vpst 606; CHECK-NEXT: vmaxt.u8 q0, q0, q1 607; CHECK-NEXT: bx lr 608entry: 609 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 610 %a1 = icmp ugt <16 x i8> %x, %y 611 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 612 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 613 ret <16 x i8> %b 614} 615 616define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 617; CHECK-LABEL: fcmp_fast_olt_v4f32_x: 618; CHECK: @ %bb.0: @ %entry 619; CHECK-NEXT: vctp.32 r0 620; CHECK-NEXT: vpst 621; CHECK-NEXT: vminnmt.f32 q0, q0, q1 622; CHECK-NEXT: bx lr 623entry: 624 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 625 %a1 = fcmp fast olt <4 x float> %x, %y 626 %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y 627 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 628 ret <4 x float> %b 629} 630 631define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 632; CHECK-LABEL: fcmp_fast_olt_v8f16_x: 633; CHECK: @ %bb.0: @ %entry 634; CHECK-NEXT: vctp.16 r0 635; CHECK-NEXT: vpst 636; CHECK-NEXT: vminnmt.f16 q0, q0, q1 637; CHECK-NEXT: bx lr 638entry: 639 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 640 %a1 = fcmp fast olt <8 x half> %x, %y 641 %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y 642 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 643 ret <8 x half> %b 644} 645 646define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 647; CHECK-LABEL: fcmp_fast_ogt_v4f32_x: 648; CHECK: @ %bb.0: @ %entry 649; CHECK-NEXT: vctp.32 r0 650; CHECK-NEXT: vpst 651; CHECK-NEXT: vmaxnmt.f32 q0, q0, q1 652; CHECK-NEXT: bx lr 653entry: 654 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 655 %a1 = fcmp fast ogt <4 x float> %x, %y 656 %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y 657 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 658 ret <4 x float> %b 659} 660 661define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 662; CHECK-LABEL: fcmp_fast_ogt_v8f16_x: 663; CHECK: @ %bb.0: @ %entry 664; CHECK-NEXT: vctp.16 r0 665; CHECK-NEXT: vpst 666; CHECK-NEXT: vmaxnmt.f16 q0, q0, q1 667; CHECK-NEXT: bx lr 668entry: 669 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 670 %a1 = fcmp fast ogt <8 x half> %x, %y 671 %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y 672 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 673 ret <8 x half> %b 674} 675 676define arm_aapcs_vfpcc <4 x i32> @sadd_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 677; CHECK-LABEL: sadd_sat_v4i32_x: 678; CHECK: @ %bb.0: @ %entry 679; CHECK-NEXT: vctp.32 r0 680; CHECK-NEXT: vpst 681; CHECK-NEXT: vqaddt.s32 q0, q0, q1 682; CHECK-NEXT: bx lr 683entry: 684 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 685 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 686 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 687 ret <4 x i32> %b 688} 689 690define arm_aapcs_vfpcc <8 x i16> @sadd_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 691; CHECK-LABEL: sadd_sat_v8i16_x: 692; CHECK: @ %bb.0: @ %entry 693; CHECK-NEXT: vctp.16 r0 694; CHECK-NEXT: vpst 695; CHECK-NEXT: vqaddt.s16 q0, q0, q1 696; CHECK-NEXT: bx lr 697entry: 698 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 699 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 700 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 701 ret <8 x i16> %b 702} 703 704define arm_aapcs_vfpcc <16 x i8> @sadd_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 705; CHECK-LABEL: sadd_sat_v16i8_x: 706; CHECK: @ %bb.0: @ %entry 707; CHECK-NEXT: vctp.8 r0 708; CHECK-NEXT: vpst 709; CHECK-NEXT: vqaddt.s8 q0, q0, q1 710; CHECK-NEXT: bx lr 711entry: 712 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 713 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 714 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 715 ret <16 x i8> %b 716} 717 718define arm_aapcs_vfpcc <4 x i32> @uadd_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 719; CHECK-LABEL: uadd_sat_v4i32_x: 720; CHECK: @ %bb.0: @ %entry 721; CHECK-NEXT: vctp.32 r0 722; CHECK-NEXT: vpst 723; CHECK-NEXT: vqaddt.u32 q0, q0, q1 724; CHECK-NEXT: bx lr 725entry: 726 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 727 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 728 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 729 ret <4 x i32> %b 730} 731 732define arm_aapcs_vfpcc <8 x i16> @uadd_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 733; CHECK-LABEL: uadd_sat_v8i16_x: 734; CHECK: @ %bb.0: @ %entry 735; CHECK-NEXT: vctp.16 r0 736; CHECK-NEXT: vpst 737; CHECK-NEXT: vqaddt.u16 q0, q0, q1 738; CHECK-NEXT: bx lr 739entry: 740 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 741 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 742 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 743 ret <8 x i16> %b 744} 745 746define arm_aapcs_vfpcc <16 x i8> @uadd_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 747; CHECK-LABEL: uadd_sat_v16i8_x: 748; CHECK: @ %bb.0: @ %entry 749; CHECK-NEXT: vctp.8 r0 750; CHECK-NEXT: vpst 751; CHECK-NEXT: vqaddt.u8 q0, q0, q1 752; CHECK-NEXT: bx lr 753entry: 754 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 755 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 756 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 757 ret <16 x i8> %b 758} 759 760define arm_aapcs_vfpcc <4 x i32> @ssub_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 761; CHECK-LABEL: ssub_sat_v4i32_x: 762; CHECK: @ %bb.0: @ %entry 763; CHECK-NEXT: vctp.32 r0 764; CHECK-NEXT: vpst 765; CHECK-NEXT: vqsubt.s32 q0, q0, q1 766; CHECK-NEXT: bx lr 767entry: 768 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 769 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 770 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 771 ret <4 x i32> %b 772} 773 774define arm_aapcs_vfpcc <8 x i16> @ssub_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 775; CHECK-LABEL: ssub_sat_v8i16_x: 776; CHECK: @ %bb.0: @ %entry 777; CHECK-NEXT: vctp.16 r0 778; CHECK-NEXT: vpst 779; CHECK-NEXT: vqsubt.s16 q0, q0, q1 780; CHECK-NEXT: bx lr 781entry: 782 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 783 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 784 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 785 ret <8 x i16> %b 786} 787 788define arm_aapcs_vfpcc <16 x i8> @ssub_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 789; CHECK-LABEL: ssub_sat_v16i8_x: 790; CHECK: @ %bb.0: @ %entry 791; CHECK-NEXT: vctp.8 r0 792; CHECK-NEXT: vpst 793; CHECK-NEXT: vqsubt.s8 q0, q0, q1 794; CHECK-NEXT: bx lr 795entry: 796 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 797 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 798 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 799 ret <16 x i8> %b 800} 801 802define arm_aapcs_vfpcc <4 x i32> @usub_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 803; CHECK-LABEL: usub_sat_v4i32_x: 804; CHECK: @ %bb.0: @ %entry 805; CHECK-NEXT: vctp.32 r0 806; CHECK-NEXT: vpst 807; CHECK-NEXT: vqsubt.u32 q0, q0, q1 808; CHECK-NEXT: bx lr 809entry: 810 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 811 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 812 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 813 ret <4 x i32> %b 814} 815 816define arm_aapcs_vfpcc <8 x i16> @usub_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 817; CHECK-LABEL: usub_sat_v8i16_x: 818; CHECK: @ %bb.0: @ %entry 819; CHECK-NEXT: vctp.16 r0 820; CHECK-NEXT: vpst 821; CHECK-NEXT: vqsubt.u16 q0, q0, q1 822; CHECK-NEXT: bx lr 823entry: 824 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 825 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 826 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 827 ret <8 x i16> %b 828} 829 830define arm_aapcs_vfpcc <16 x i8> @usub_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 831; CHECK-LABEL: usub_sat_v16i8_x: 832; CHECK: @ %bb.0: @ %entry 833; CHECK-NEXT: vctp.8 r0 834; CHECK-NEXT: vpst 835; CHECK-NEXT: vqsubt.u8 q0, q0, q1 836; CHECK-NEXT: bx lr 837entry: 838 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 839 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 840 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 841 ret <16 x i8> %b 842} 843 844define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 845; CHECK-LABEL: addqr_v4i32_x: 846; CHECK: @ %bb.0: @ %entry 847; CHECK-NEXT: vctp.32 r1 848; CHECK-NEXT: vpst 849; CHECK-NEXT: vaddt.i32 q0, q0, r0 850; CHECK-NEXT: bx lr 851entry: 852 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 853 %i = insertelement <4 x i32> undef, i32 %y, i32 0 854 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 855 %a = select <4 x i1> %c, <4 x i32> %ys, <4 x i32> zeroinitializer 856 %b = add <4 x i32> %a, %x 857 ret <4 x i32> %b 858} 859 860define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 861; CHECK-LABEL: addqr_v8i16_x: 862; CHECK: @ %bb.0: @ %entry 863; CHECK-NEXT: vctp.16 r1 864; CHECK-NEXT: vpst 865; CHECK-NEXT: vaddt.i16 q0, q0, r0 866; CHECK-NEXT: bx lr 867entry: 868 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 869 %i = insertelement <8 x i16> undef, i16 %y, i32 0 870 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 871 %a = select <8 x i1> %c, <8 x i16> %ys, <8 x i16> zeroinitializer 872 %b = add <8 x i16> %a, %x 873 ret <8 x i16> %b 874} 875 876define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 877; CHECK-LABEL: addqr_v16i8_x: 878; CHECK: @ %bb.0: @ %entry 879; CHECK-NEXT: vctp.8 r1 880; CHECK-NEXT: vpst 881; CHECK-NEXT: vaddt.i8 q0, q0, r0 882; CHECK-NEXT: bx lr 883entry: 884 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 885 %i = insertelement <16 x i8> undef, i8 %y, i32 0 886 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 887 %a = select <16 x i1> %c, <16 x i8> %ys, <16 x i8> zeroinitializer 888 %b = add <16 x i8> %a, %x 889 ret <16 x i8> %b 890} 891 892define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 893; CHECK-LABEL: subqr_v4i32_x: 894; CHECK: @ %bb.0: @ %entry 895; CHECK-NEXT: vctp.32 r1 896; CHECK-NEXT: vpst 897; CHECK-NEXT: vsubt.i32 q0, q0, r0 898; CHECK-NEXT: bx lr 899entry: 900 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 901 %i = insertelement <4 x i32> undef, i32 %y, i32 0 902 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 903 %a = select <4 x i1> %c, <4 x i32> %ys, <4 x i32> zeroinitializer 904 %b = sub <4 x i32> %x, %a 905 ret <4 x i32> %b 906} 907 908define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 909; CHECK-LABEL: subqr_v8i16_x: 910; CHECK: @ %bb.0: @ %entry 911; CHECK-NEXT: vctp.16 r1 912; CHECK-NEXT: vpst 913; CHECK-NEXT: vsubt.i16 q0, q0, r0 914; CHECK-NEXT: bx lr 915entry: 916 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 917 %i = insertelement <8 x i16> undef, i16 %y, i32 0 918 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 919 %a = select <8 x i1> %c, <8 x i16> %ys, <8 x i16> zeroinitializer 920 %b = sub <8 x i16> %x, %a 921 ret <8 x i16> %b 922} 923 924define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 925; CHECK-LABEL: subqr_v16i8_x: 926; CHECK: @ %bb.0: @ %entry 927; CHECK-NEXT: vctp.8 r1 928; CHECK-NEXT: vpst 929; CHECK-NEXT: vsubt.i8 q0, q0, r0 930; CHECK-NEXT: bx lr 931entry: 932 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 933 %i = insertelement <16 x i8> undef, i8 %y, i32 0 934 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 935 %a = select <16 x i1> %c, <16 x i8> %ys, <16 x i8> zeroinitializer 936 %b = sub <16 x i8> %x, %a 937 ret <16 x i8> %b 938} 939 940define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 941; CHECK-LABEL: mulqr_v4i32_x: 942; CHECK: @ %bb.0: @ %entry 943; CHECK-NEXT: vctp.32 r1 944; CHECK-NEXT: vpst 945; CHECK-NEXT: vmult.i32 q0, q0, r0 946; CHECK-NEXT: bx lr 947entry: 948 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 949 %i = insertelement <4 x i32> undef, i32 %y, i32 0 950 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 951 %a = select <4 x i1> %c, <4 x i32> %ys, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 952 %b = mul <4 x i32> %a, %x 953 ret <4 x i32> %b 954} 955 956define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 957; CHECK-LABEL: mulqr_v8i16_x: 958; CHECK: @ %bb.0: @ %entry 959; CHECK-NEXT: vctp.16 r1 960; CHECK-NEXT: vpst 961; CHECK-NEXT: vmult.i16 q0, q0, r0 962; CHECK-NEXT: bx lr 963entry: 964 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 965 %i = insertelement <8 x i16> undef, i16 %y, i32 0 966 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 967 %a = select <8 x i1> %c, <8 x i16> %ys, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 968 %b = mul <8 x i16> %a, %x 969 ret <8 x i16> %b 970} 971 972define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 973; CHECK-LABEL: mulqr_v16i8_x: 974; CHECK: @ %bb.0: @ %entry 975; CHECK-NEXT: vctp.8 r1 976; CHECK-NEXT: vpst 977; CHECK-NEXT: vmult.i8 q0, q0, r0 978; CHECK-NEXT: bx lr 979entry: 980 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 981 %i = insertelement <16 x i8> undef, i8 %y, i32 0 982 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 983 %a = select <16 x i1> %c, <16 x i8> %ys, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 984 %b = mul <16 x i8> %a, %x 985 ret <16 x i8> %b 986} 987 988define arm_aapcs_vfpcc <4 x float> @faddqr_v4f32_x(<4 x float> %x, float %y, i32 %n) { 989; CHECK-LABEL: faddqr_v4f32_x: 990; CHECK: @ %bb.0: @ %entry 991; CHECK-NEXT: vmov r1, s4 992; CHECK-NEXT: vctp.32 r0 993; CHECK-NEXT: vpst 994; CHECK-NEXT: vaddt.f32 q0, q0, r1 995; CHECK-NEXT: bx lr 996entry: 997 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 998 %i = insertelement <4 x float> undef, float %y, i32 0 999 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 1000 %a = fadd <4 x float> %ys, %x 1001 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 1002 ret <4 x float> %b 1003} 1004 1005define arm_aapcs_vfpcc <8 x half> @faddqr_v8f16_x(<8 x half> %x, half %y, i32 %n) { 1006; CHECK-LABEL: faddqr_v8f16_x: 1007; CHECK: @ %bb.0: @ %entry 1008; CHECK-NEXT: vmov.f16 r1, s4 1009; CHECK-NEXT: vctp.16 r0 1010; CHECK-NEXT: vpst 1011; CHECK-NEXT: vaddt.f16 q0, q0, r1 1012; CHECK-NEXT: bx lr 1013entry: 1014 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1015 %i = insertelement <8 x half> undef, half %y, i32 0 1016 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 1017 %a = fadd <8 x half> %ys, %x 1018 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 1019 ret <8 x half> %b 1020} 1021 1022define arm_aapcs_vfpcc <4 x float> @fsubqr_v4f32_x(<4 x float> %x, float %y, i32 %n) { 1023; CHECK-LABEL: fsubqr_v4f32_x: 1024; CHECK: @ %bb.0: @ %entry 1025; CHECK-NEXT: vmov r1, s4 1026; CHECK-NEXT: vctp.32 r0 1027; CHECK-NEXT: vpst 1028; CHECK-NEXT: vsubt.f32 q0, q0, r1 1029; CHECK-NEXT: bx lr 1030entry: 1031 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1032 %i = insertelement <4 x float> undef, float %y, i32 0 1033 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 1034 %a = fsub <4 x float> %x, %ys 1035 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 1036 ret <4 x float> %b 1037} 1038 1039define arm_aapcs_vfpcc <8 x half> @fsubqr_v8f16_x(<8 x half> %x, half %y, i32 %n) { 1040; CHECK-LABEL: fsubqr_v8f16_x: 1041; CHECK: @ %bb.0: @ %entry 1042; CHECK-NEXT: vmov.f16 r1, s4 1043; CHECK-NEXT: vctp.16 r0 1044; CHECK-NEXT: vpst 1045; CHECK-NEXT: vsubt.f16 q0, q0, r1 1046; CHECK-NEXT: bx lr 1047entry: 1048 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1049 %i = insertelement <8 x half> undef, half %y, i32 0 1050 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 1051 %a = fsub <8 x half> %x, %ys 1052 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 1053 ret <8 x half> %b 1054} 1055 1056define arm_aapcs_vfpcc <4 x float> @fmulqr_v4f32_x(<4 x float> %x, float %y, i32 %n) { 1057; CHECK-LABEL: fmulqr_v4f32_x: 1058; CHECK: @ %bb.0: @ %entry 1059; CHECK-NEXT: vmov r1, s4 1060; CHECK-NEXT: vctp.32 r0 1061; CHECK-NEXT: vpst 1062; CHECK-NEXT: vmult.f32 q0, q0, r1 1063; CHECK-NEXT: bx lr 1064entry: 1065 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1066 %i = insertelement <4 x float> undef, float %y, i32 0 1067 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 1068 %a = fmul <4 x float> %ys, %x 1069 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 1070 ret <4 x float> %b 1071} 1072 1073define arm_aapcs_vfpcc <8 x half> @fmulqr_v8f16_x(<8 x half> %x, half %y, i32 %n) { 1074; CHECK-LABEL: fmulqr_v8f16_x: 1075; CHECK: @ %bb.0: @ %entry 1076; CHECK-NEXT: vmov.f16 r1, s4 1077; CHECK-NEXT: vctp.16 r0 1078; CHECK-NEXT: vpst 1079; CHECK-NEXT: vmult.f16 q0, q0, r1 1080; CHECK-NEXT: bx lr 1081entry: 1082 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1083 %i = insertelement <8 x half> undef, half %y, i32 0 1084 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 1085 %a = fmul <8 x half> %ys, %x 1086 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 1087 ret <8 x half> %b 1088} 1089 1090define arm_aapcs_vfpcc <4 x i32> @sadd_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1091; CHECK-LABEL: sadd_satqr_v4i32_x: 1092; CHECK: @ %bb.0: @ %entry 1093; CHECK-NEXT: vctp.32 r1 1094; CHECK-NEXT: vpst 1095; CHECK-NEXT: vqaddt.s32 q0, q0, r0 1096; CHECK-NEXT: bx lr 1097entry: 1098 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1099 %i = insertelement <4 x i32> undef, i32 %y, i32 0 1100 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1101 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 1102 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1103 ret <4 x i32> %b 1104} 1105 1106define arm_aapcs_vfpcc <8 x i16> @sadd_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1107; CHECK-LABEL: sadd_satqr_v8i16_x: 1108; CHECK: @ %bb.0: @ %entry 1109; CHECK-NEXT: vctp.16 r1 1110; CHECK-NEXT: vpst 1111; CHECK-NEXT: vqaddt.s16 q0, q0, r0 1112; CHECK-NEXT: bx lr 1113entry: 1114 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1115 %i = insertelement <8 x i16> undef, i16 %y, i32 0 1116 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1117 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 1118 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1119 ret <8 x i16> %b 1120} 1121 1122define arm_aapcs_vfpcc <16 x i8> @sadd_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1123; CHECK-LABEL: sadd_satqr_v16i8_x: 1124; CHECK: @ %bb.0: @ %entry 1125; CHECK-NEXT: vctp.8 r1 1126; CHECK-NEXT: vpst 1127; CHECK-NEXT: vqaddt.s8 q0, q0, r0 1128; CHECK-NEXT: bx lr 1129entry: 1130 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1131 %i = insertelement <16 x i8> undef, i8 %y, i32 0 1132 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1133 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 1134 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1135 ret <16 x i8> %b 1136} 1137 1138define arm_aapcs_vfpcc <4 x i32> @uadd_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1139; CHECK-LABEL: uadd_satqr_v4i32_x: 1140; CHECK: @ %bb.0: @ %entry 1141; CHECK-NEXT: vctp.32 r1 1142; CHECK-NEXT: vpst 1143; CHECK-NEXT: vqaddt.u32 q0, q0, r0 1144; CHECK-NEXT: bx lr 1145entry: 1146 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1147 %i = insertelement <4 x i32> undef, i32 %y, i32 0 1148 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1149 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 1150 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1151 ret <4 x i32> %b 1152} 1153 1154define arm_aapcs_vfpcc <8 x i16> @uadd_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1155; CHECK-LABEL: uadd_satqr_v8i16_x: 1156; CHECK: @ %bb.0: @ %entry 1157; CHECK-NEXT: vctp.16 r1 1158; CHECK-NEXT: vpst 1159; CHECK-NEXT: vqaddt.u16 q0, q0, r0 1160; CHECK-NEXT: bx lr 1161entry: 1162 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1163 %i = insertelement <8 x i16> undef, i16 %y, i32 0 1164 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1165 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 1166 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1167 ret <8 x i16> %b 1168} 1169 1170define arm_aapcs_vfpcc <16 x i8> @uadd_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1171; CHECK-LABEL: uadd_satqr_v16i8_x: 1172; CHECK: @ %bb.0: @ %entry 1173; CHECK-NEXT: vctp.8 r1 1174; CHECK-NEXT: vpst 1175; CHECK-NEXT: vqaddt.u8 q0, q0, r0 1176; CHECK-NEXT: bx lr 1177entry: 1178 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1179 %i = insertelement <16 x i8> undef, i8 %y, i32 0 1180 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1181 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 1182 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1183 ret <16 x i8> %b 1184} 1185 1186define arm_aapcs_vfpcc <4 x i32> @ssub_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1187; CHECK-LABEL: ssub_satqr_v4i32_x: 1188; CHECK: @ %bb.0: @ %entry 1189; CHECK-NEXT: vctp.32 r1 1190; CHECK-NEXT: vpst 1191; CHECK-NEXT: vqsubt.s32 q0, q0, r0 1192; CHECK-NEXT: bx lr 1193entry: 1194 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1195 %i = insertelement <4 x i32> undef, i32 %y, i32 0 1196 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1197 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 1198 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1199 ret <4 x i32> %b 1200} 1201 1202define arm_aapcs_vfpcc <8 x i16> @ssub_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1203; CHECK-LABEL: ssub_satqr_v8i16_x: 1204; CHECK: @ %bb.0: @ %entry 1205; CHECK-NEXT: vctp.16 r1 1206; CHECK-NEXT: vpst 1207; CHECK-NEXT: vqsubt.s16 q0, q0, r0 1208; CHECK-NEXT: bx lr 1209entry: 1210 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1211 %i = insertelement <8 x i16> undef, i16 %y, i32 0 1212 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1213 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 1214 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1215 ret <8 x i16> %b 1216} 1217 1218define arm_aapcs_vfpcc <16 x i8> @ssub_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1219; CHECK-LABEL: ssub_satqr_v16i8_x: 1220; CHECK: @ %bb.0: @ %entry 1221; CHECK-NEXT: vctp.8 r1 1222; CHECK-NEXT: vpst 1223; CHECK-NEXT: vqsubt.s8 q0, q0, r0 1224; CHECK-NEXT: bx lr 1225entry: 1226 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1227 %i = insertelement <16 x i8> undef, i8 %y, i32 0 1228 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1229 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 1230 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1231 ret <16 x i8> %b 1232} 1233 1234define arm_aapcs_vfpcc <4 x i32> @usub_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1235; CHECK-LABEL: usub_satqr_v4i32_x: 1236; CHECK: @ %bb.0: @ %entry 1237; CHECK-NEXT: vctp.32 r1 1238; CHECK-NEXT: vpst 1239; CHECK-NEXT: vqsubt.u32 q0, q0, r0 1240; CHECK-NEXT: bx lr 1241entry: 1242 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1243 %i = insertelement <4 x i32> undef, i32 %y, i32 0 1244 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1245 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 1246 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1247 ret <4 x i32> %b 1248} 1249 1250define arm_aapcs_vfpcc <8 x i16> @usub_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1251; CHECK-LABEL: usub_satqr_v8i16_x: 1252; CHECK: @ %bb.0: @ %entry 1253; CHECK-NEXT: vctp.16 r1 1254; CHECK-NEXT: vpst 1255; CHECK-NEXT: vqsubt.u16 q0, q0, r0 1256; CHECK-NEXT: bx lr 1257entry: 1258 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1259 %i = insertelement <8 x i16> undef, i16 %y, i32 0 1260 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1261 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 1262 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1263 ret <8 x i16> %b 1264} 1265 1266define arm_aapcs_vfpcc <16 x i8> @usub_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1267; CHECK-LABEL: usub_satqr_v16i8_x: 1268; CHECK: @ %bb.0: @ %entry 1269; CHECK-NEXT: vctp.8 r1 1270; CHECK-NEXT: vpst 1271; CHECK-NEXT: vqsubt.u8 q0, q0, r0 1272; CHECK-NEXT: bx lr 1273entry: 1274 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1275 %i = insertelement <16 x i8> undef, i8 %y, i32 0 1276 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1277 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 1278 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1279 ret <16 x i8> %b 1280} 1281 1282define arm_aapcs_vfpcc <4 x i32> @add_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1283; CHECK-LABEL: add_v4i32_y: 1284; CHECK: @ %bb.0: @ %entry 1285; CHECK-NEXT: vctp.32 r0 1286; CHECK-NEXT: vpst 1287; CHECK-NEXT: vaddt.i32 q1, q1, q0 1288; CHECK-NEXT: vmov q0, q1 1289; CHECK-NEXT: bx lr 1290entry: 1291 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1292 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer 1293 %b = add <4 x i32> %a, %y 1294 ret <4 x i32> %b 1295} 1296 1297define arm_aapcs_vfpcc <8 x i16> @add_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1298; CHECK-LABEL: add_v8i16_y: 1299; CHECK: @ %bb.0: @ %entry 1300; CHECK-NEXT: vctp.16 r0 1301; CHECK-NEXT: vpst 1302; CHECK-NEXT: vaddt.i16 q1, q1, q0 1303; CHECK-NEXT: vmov q0, q1 1304; CHECK-NEXT: bx lr 1305entry: 1306 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1307 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer 1308 %b = add <8 x i16> %a, %y 1309 ret <8 x i16> %b 1310} 1311 1312define arm_aapcs_vfpcc <16 x i8> @add_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1313; CHECK-LABEL: add_v16i8_y: 1314; CHECK: @ %bb.0: @ %entry 1315; CHECK-NEXT: vctp.8 r0 1316; CHECK-NEXT: vpst 1317; CHECK-NEXT: vaddt.i8 q1, q1, q0 1318; CHECK-NEXT: vmov q0, q1 1319; CHECK-NEXT: bx lr 1320entry: 1321 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1322 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer 1323 %b = add <16 x i8> %a, %y 1324 ret <16 x i8> %b 1325} 1326 1327define arm_aapcs_vfpcc <4 x i32> @sub_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1328; CHECK-LABEL: sub_v4i32_y: 1329; CHECK: @ %bb.0: @ %entry 1330; CHECK-NEXT: vctp.32 r0 1331; CHECK-NEXT: vpst 1332; CHECK-NEXT: vsubt.i32 q1, q0, q1 1333; CHECK-NEXT: vmov q0, q1 1334; CHECK-NEXT: bx lr 1335entry: 1336 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1337 %a = sub <4 x i32> %x, %y 1338 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1339 ret <4 x i32> %b 1340} 1341 1342define arm_aapcs_vfpcc <8 x i16> @sub_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1343; CHECK-LABEL: sub_v8i16_y: 1344; CHECK: @ %bb.0: @ %entry 1345; CHECK-NEXT: vctp.16 r0 1346; CHECK-NEXT: vpst 1347; CHECK-NEXT: vsubt.i16 q1, q0, q1 1348; CHECK-NEXT: vmov q0, q1 1349; CHECK-NEXT: bx lr 1350entry: 1351 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1352 %a = sub <8 x i16> %x, %y 1353 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1354 ret <8 x i16> %b 1355} 1356 1357define arm_aapcs_vfpcc <16 x i8> @sub_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1358; CHECK-LABEL: sub_v16i8_y: 1359; CHECK: @ %bb.0: @ %entry 1360; CHECK-NEXT: vctp.8 r0 1361; CHECK-NEXT: vpst 1362; CHECK-NEXT: vsubt.i8 q1, q0, q1 1363; CHECK-NEXT: vmov q0, q1 1364; CHECK-NEXT: bx lr 1365entry: 1366 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1367 %a = sub <16 x i8> %x, %y 1368 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1369 ret <16 x i8> %b 1370} 1371 1372define arm_aapcs_vfpcc <4 x i32> @mul_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1373; CHECK-LABEL: mul_v4i32_y: 1374; CHECK: @ %bb.0: @ %entry 1375; CHECK-NEXT: vctp.32 r0 1376; CHECK-NEXT: vpst 1377; CHECK-NEXT: vmult.i32 q1, q1, q0 1378; CHECK-NEXT: vmov q0, q1 1379; CHECK-NEXT: bx lr 1380entry: 1381 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1382 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1383 %b = mul <4 x i32> %a, %y 1384 ret <4 x i32> %b 1385} 1386 1387define arm_aapcs_vfpcc <8 x i16> @mul_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1388; CHECK-LABEL: mul_v8i16_y: 1389; CHECK: @ %bb.0: @ %entry 1390; CHECK-NEXT: vctp.16 r0 1391; CHECK-NEXT: vpst 1392; CHECK-NEXT: vmult.i16 q1, q1, q0 1393; CHECK-NEXT: vmov q0, q1 1394; CHECK-NEXT: bx lr 1395entry: 1396 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1397 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1398 %b = mul <8 x i16> %a, %y 1399 ret <8 x i16> %b 1400} 1401 1402define arm_aapcs_vfpcc <16 x i8> @mul_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1403; CHECK-LABEL: mul_v16i8_y: 1404; CHECK: @ %bb.0: @ %entry 1405; CHECK-NEXT: vctp.8 r0 1406; CHECK-NEXT: vpst 1407; CHECK-NEXT: vmult.i8 q1, q1, q0 1408; CHECK-NEXT: vmov q0, q1 1409; CHECK-NEXT: bx lr 1410entry: 1411 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1412 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1413 %b = mul <16 x i8> %a, %y 1414 ret <16 x i8> %b 1415} 1416 1417define arm_aapcs_vfpcc <4 x i32> @and_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1418; CHECK-LABEL: and_v4i32_y: 1419; CHECK: @ %bb.0: @ %entry 1420; CHECK-NEXT: vctp.32 r0 1421; CHECK-NEXT: vpst 1422; CHECK-NEXT: vandt q1, q1, q0 1423; CHECK-NEXT: vmov q0, q1 1424; CHECK-NEXT: bx lr 1425entry: 1426 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1427 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 1428 %b = and <4 x i32> %a, %y 1429 ret <4 x i32> %b 1430} 1431 1432define arm_aapcs_vfpcc <8 x i16> @and_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1433; CHECK-LABEL: and_v8i16_y: 1434; CHECK: @ %bb.0: @ %entry 1435; CHECK-NEXT: vctp.16 r0 1436; CHECK-NEXT: vpst 1437; CHECK-NEXT: vandt q1, q1, q0 1438; CHECK-NEXT: vmov q0, q1 1439; CHECK-NEXT: bx lr 1440entry: 1441 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1442 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1443 %b = and <8 x i16> %a, %y 1444 ret <8 x i16> %b 1445} 1446 1447define arm_aapcs_vfpcc <16 x i8> @and_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1448; CHECK-LABEL: and_v16i8_y: 1449; CHECK: @ %bb.0: @ %entry 1450; CHECK-NEXT: vctp.8 r0 1451; CHECK-NEXT: vpst 1452; CHECK-NEXT: vandt q1, q1, q0 1453; CHECK-NEXT: vmov q0, q1 1454; CHECK-NEXT: bx lr 1455entry: 1456 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1457 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1458 %b = and <16 x i8> %a, %y 1459 ret <16 x i8> %b 1460} 1461 1462define arm_aapcs_vfpcc <4 x i32> @or_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1463; CHECK-LABEL: or_v4i32_y: 1464; CHECK: @ %bb.0: @ %entry 1465; CHECK-NEXT: vctp.32 r0 1466; CHECK-NEXT: vpst 1467; CHECK-NEXT: vorrt q1, q1, q0 1468; CHECK-NEXT: vmov q0, q1 1469; CHECK-NEXT: bx lr 1470entry: 1471 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1472 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer 1473 %b = or <4 x i32> %a, %y 1474 ret <4 x i32> %b 1475} 1476 1477define arm_aapcs_vfpcc <8 x i16> @or_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1478; CHECK-LABEL: or_v8i16_y: 1479; CHECK: @ %bb.0: @ %entry 1480; CHECK-NEXT: vctp.16 r0 1481; CHECK-NEXT: vpst 1482; CHECK-NEXT: vorrt q1, q1, q0 1483; CHECK-NEXT: vmov q0, q1 1484; CHECK-NEXT: bx lr 1485entry: 1486 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1487 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer 1488 %b = or <8 x i16> %a, %y 1489 ret <8 x i16> %b 1490} 1491 1492define arm_aapcs_vfpcc <16 x i8> @or_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1493; CHECK-LABEL: or_v16i8_y: 1494; CHECK: @ %bb.0: @ %entry 1495; CHECK-NEXT: vctp.8 r0 1496; CHECK-NEXT: vpst 1497; CHECK-NEXT: vorrt q1, q1, q0 1498; CHECK-NEXT: vmov q0, q1 1499; CHECK-NEXT: bx lr 1500entry: 1501 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1502 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer 1503 %b = or <16 x i8> %a, %y 1504 ret <16 x i8> %b 1505} 1506 1507define arm_aapcs_vfpcc <4 x i32> @xor_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1508; CHECK-LABEL: xor_v4i32_y: 1509; CHECK: @ %bb.0: @ %entry 1510; CHECK-NEXT: vctp.32 r0 1511; CHECK-NEXT: vpst 1512; CHECK-NEXT: veort q1, q1, q0 1513; CHECK-NEXT: vmov q0, q1 1514; CHECK-NEXT: bx lr 1515entry: 1516 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1517 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer 1518 %b = xor <4 x i32> %a, %y 1519 ret <4 x i32> %b 1520} 1521 1522define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1523; CHECK-LABEL: xor_v8i16_y: 1524; CHECK: @ %bb.0: @ %entry 1525; CHECK-NEXT: vctp.16 r0 1526; CHECK-NEXT: vpst 1527; CHECK-NEXT: veort q1, q1, q0 1528; CHECK-NEXT: vmov q0, q1 1529; CHECK-NEXT: bx lr 1530entry: 1531 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1532 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer 1533 %b = xor <8 x i16> %a, %y 1534 ret <8 x i16> %b 1535} 1536 1537define arm_aapcs_vfpcc <16 x i8> @xor_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1538; CHECK-LABEL: xor_v16i8_y: 1539; CHECK: @ %bb.0: @ %entry 1540; CHECK-NEXT: vctp.8 r0 1541; CHECK-NEXT: vpst 1542; CHECK-NEXT: veort q1, q1, q0 1543; CHECK-NEXT: vmov q0, q1 1544; CHECK-NEXT: bx lr 1545entry: 1546 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1547 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer 1548 %b = xor <16 x i8> %a, %y 1549 ret <16 x i8> %b 1550} 1551 1552define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1553; CHECK-LABEL: andnot_v4i32_y: 1554; CHECK: @ %bb.0: @ %entry 1555; CHECK-NEXT: vctp.32 r0 1556; CHECK-NEXT: vpst 1557; CHECK-NEXT: vbict q1, q0, q1 1558; CHECK-NEXT: vmov q0, q1 1559; CHECK-NEXT: bx lr 1560entry: 1561 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1562 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 1563 %a = and <4 x i32> %y1, %x 1564 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1565 ret <4 x i32> %b 1566} 1567 1568define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1569; CHECK-LABEL: andnot_v8i16_y: 1570; CHECK: @ %bb.0: @ %entry 1571; CHECK-NEXT: vctp.16 r0 1572; CHECK-NEXT: vpst 1573; CHECK-NEXT: vbict q1, q0, q1 1574; CHECK-NEXT: vmov q0, q1 1575; CHECK-NEXT: bx lr 1576entry: 1577 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1578 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1579 %a = and <8 x i16> %y1, %x 1580 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1581 ret <8 x i16> %b 1582} 1583 1584define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1585; CHECK-LABEL: andnot_v16i8_y: 1586; CHECK: @ %bb.0: @ %entry 1587; CHECK-NEXT: vctp.8 r0 1588; CHECK-NEXT: vpst 1589; CHECK-NEXT: vbict q1, q0, q1 1590; CHECK-NEXT: vmov q0, q1 1591; CHECK-NEXT: bx lr 1592entry: 1593 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1594 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1595 %a = and <16 x i8> %y1, %x 1596 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1597 ret <16 x i8> %b 1598} 1599 1600define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1601; CHECK-LABEL: ornot_v4i32_y: 1602; CHECK: @ %bb.0: @ %entry 1603; CHECK-NEXT: vctp.32 r0 1604; CHECK-NEXT: vpst 1605; CHECK-NEXT: vornt q1, q0, q1 1606; CHECK-NEXT: vmov q0, q1 1607; CHECK-NEXT: bx lr 1608entry: 1609 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1610 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 1611 %a = or <4 x i32> %y1, %x 1612 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1613 ret <4 x i32> %b 1614} 1615 1616define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1617; CHECK-LABEL: ornot_v8i16_y: 1618; CHECK: @ %bb.0: @ %entry 1619; CHECK-NEXT: vctp.16 r0 1620; CHECK-NEXT: vpst 1621; CHECK-NEXT: vornt q1, q0, q1 1622; CHECK-NEXT: vmov q0, q1 1623; CHECK-NEXT: bx lr 1624entry: 1625 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1626 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1627 %a = or <8 x i16> %y1, %x 1628 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1629 ret <8 x i16> %b 1630} 1631 1632define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1633; CHECK-LABEL: ornot_v16i8_y: 1634; CHECK: @ %bb.0: @ %entry 1635; CHECK-NEXT: vctp.8 r0 1636; CHECK-NEXT: vpst 1637; CHECK-NEXT: vornt q1, q0, q1 1638; CHECK-NEXT: vmov q0, q1 1639; CHECK-NEXT: bx lr 1640entry: 1641 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1642 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1643 %a = or <16 x i8> %y1, %x 1644 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1645 ret <16 x i8> %b 1646} 1647 1648define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 1649; CHECK-LABEL: fadd_v4f32_y: 1650; CHECK: @ %bb.0: @ %entry 1651; CHECK-NEXT: vctp.32 r0 1652; CHECK-NEXT: vpst 1653; CHECK-NEXT: vaddt.f32 q1, q0, q1 1654; CHECK-NEXT: vmov q0, q1 1655; CHECK-NEXT: bx lr 1656entry: 1657 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1658 %a = fadd <4 x float> %x, %y 1659 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y 1660 ret <4 x float> %b 1661} 1662 1663define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 1664; CHECK-LABEL: fadd_v8f16_y: 1665; CHECK: @ %bb.0: @ %entry 1666; CHECK-NEXT: vctp.16 r0 1667; CHECK-NEXT: vpst 1668; CHECK-NEXT: vaddt.f16 q1, q0, q1 1669; CHECK-NEXT: vmov q0, q1 1670; CHECK-NEXT: bx lr 1671entry: 1672 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1673 %a = fadd <8 x half> %x, %y 1674 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y 1675 ret <8 x half> %b 1676} 1677 1678define arm_aapcs_vfpcc <4 x float> @fsub_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 1679; CHECK-LABEL: fsub_v4f32_y: 1680; CHECK: @ %bb.0: @ %entry 1681; CHECK-NEXT: vctp.32 r0 1682; CHECK-NEXT: vpst 1683; CHECK-NEXT: vsubt.f32 q1, q0, q1 1684; CHECK-NEXT: vmov q0, q1 1685; CHECK-NEXT: bx lr 1686entry: 1687 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1688 %a = fsub <4 x float> %x, %y 1689 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y 1690 ret <4 x float> %b 1691} 1692 1693define arm_aapcs_vfpcc <8 x half> @fsub_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 1694; CHECK-LABEL: fsub_v8f16_y: 1695; CHECK: @ %bb.0: @ %entry 1696; CHECK-NEXT: vctp.16 r0 1697; CHECK-NEXT: vpst 1698; CHECK-NEXT: vsubt.f16 q1, q0, q1 1699; CHECK-NEXT: vmov q0, q1 1700; CHECK-NEXT: bx lr 1701entry: 1702 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1703 %a = fsub <8 x half> %x, %y 1704 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y 1705 ret <8 x half> %b 1706} 1707 1708define arm_aapcs_vfpcc <4 x float> @fmul_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 1709; CHECK-LABEL: fmul_v4f32_y: 1710; CHECK: @ %bb.0: @ %entry 1711; CHECK-NEXT: vctp.32 r0 1712; CHECK-NEXT: vpst 1713; CHECK-NEXT: vmult.f32 q1, q0, q1 1714; CHECK-NEXT: vmov q0, q1 1715; CHECK-NEXT: bx lr 1716entry: 1717 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1718 %a = fmul <4 x float> %x, %y 1719 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y 1720 ret <4 x float> %b 1721} 1722 1723define arm_aapcs_vfpcc <8 x half> @fmul_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 1724; CHECK-LABEL: fmul_v8f16_y: 1725; CHECK: @ %bb.0: @ %entry 1726; CHECK-NEXT: vctp.16 r0 1727; CHECK-NEXT: vpst 1728; CHECK-NEXT: vmult.f16 q1, q0, q1 1729; CHECK-NEXT: vmov q0, q1 1730; CHECK-NEXT: bx lr 1731entry: 1732 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1733 %a = fmul <8 x half> %x, %y 1734 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y 1735 ret <8 x half> %b 1736} 1737 1738define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1739; CHECK-LABEL: icmp_slt_v4i32_y: 1740; CHECK: @ %bb.0: @ %entry 1741; CHECK-NEXT: vctp.32 r0 1742; CHECK-NEXT: vpstt 1743; CHECK-NEXT: vcmpt.s32 gt, q1, q0 1744; CHECK-NEXT: vmovt q1, q0 1745; CHECK-NEXT: vmov q0, q1 1746; CHECK-NEXT: bx lr 1747entry: 1748 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1749 %a1 = icmp slt <4 x i32> %x, %y 1750 %0 = and <4 x i1> %c, %a1 1751 %b = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y 1752 ret <4 x i32> %b 1753} 1754 1755define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1756; CHECK-LABEL: icmp_slt_v8i16_y: 1757; CHECK: @ %bb.0: @ %entry 1758; CHECK-NEXT: vctp.16 r0 1759; CHECK-NEXT: vpstt 1760; CHECK-NEXT: vcmpt.s16 gt, q1, q0 1761; CHECK-NEXT: vmovt q1, q0 1762; CHECK-NEXT: vmov q0, q1 1763; CHECK-NEXT: bx lr 1764entry: 1765 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1766 %a1 = icmp slt <8 x i16> %x, %y 1767 %0 = and <8 x i1> %c, %a1 1768 %b = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y 1769 ret <8 x i16> %b 1770} 1771 1772define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1773; CHECK-LABEL: icmp_slt_v16i8_y: 1774; CHECK: @ %bb.0: @ %entry 1775; CHECK-NEXT: vctp.8 r0 1776; CHECK-NEXT: vpstt 1777; CHECK-NEXT: vcmpt.s8 gt, q1, q0 1778; CHECK-NEXT: vmovt q1, q0 1779; CHECK-NEXT: vmov q0, q1 1780; CHECK-NEXT: bx lr 1781entry: 1782 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1783 %a1 = icmp slt <16 x i8> %x, %y 1784 %0 = and <16 x i1> %c, %a1 1785 %b = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y 1786 ret <16 x i8> %b 1787} 1788 1789define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1790; CHECK-LABEL: icmp_sgt_v4i32_y: 1791; CHECK: @ %bb.0: @ %entry 1792; CHECK-NEXT: vctp.32 r0 1793; CHECK-NEXT: vpstt 1794; CHECK-NEXT: vcmpt.s32 gt, q0, q1 1795; CHECK-NEXT: vmovt q1, q0 1796; CHECK-NEXT: vmov q0, q1 1797; CHECK-NEXT: bx lr 1798entry: 1799 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1800 %a1 = icmp sgt <4 x i32> %x, %y 1801 %0 = and <4 x i1> %c, %a1 1802 %b = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y 1803 ret <4 x i32> %b 1804} 1805 1806define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1807; CHECK-LABEL: icmp_sgt_v8i16_y: 1808; CHECK: @ %bb.0: @ %entry 1809; CHECK-NEXT: vctp.16 r0 1810; CHECK-NEXT: vpstt 1811; CHECK-NEXT: vcmpt.s16 gt, q0, q1 1812; CHECK-NEXT: vmovt q1, q0 1813; CHECK-NEXT: vmov q0, q1 1814; CHECK-NEXT: bx lr 1815entry: 1816 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1817 %a1 = icmp sgt <8 x i16> %x, %y 1818 %0 = and <8 x i1> %c, %a1 1819 %b = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y 1820 ret <8 x i16> %b 1821} 1822 1823define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1824; CHECK-LABEL: icmp_sgt_v16i8_y: 1825; CHECK: @ %bb.0: @ %entry 1826; CHECK-NEXT: vctp.8 r0 1827; CHECK-NEXT: vpstt 1828; CHECK-NEXT: vcmpt.s8 gt, q0, q1 1829; CHECK-NEXT: vmovt q1, q0 1830; CHECK-NEXT: vmov q0, q1 1831; CHECK-NEXT: bx lr 1832entry: 1833 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1834 %a1 = icmp sgt <16 x i8> %x, %y 1835 %0 = and <16 x i1> %c, %a1 1836 %b = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y 1837 ret <16 x i8> %b 1838} 1839 1840define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1841; CHECK-LABEL: icmp_ult_v4i32_y: 1842; CHECK: @ %bb.0: @ %entry 1843; CHECK-NEXT: vctp.32 r0 1844; CHECK-NEXT: vpstt 1845; CHECK-NEXT: vcmpt.u32 hi, q1, q0 1846; CHECK-NEXT: vmovt q1, q0 1847; CHECK-NEXT: vmov q0, q1 1848; CHECK-NEXT: bx lr 1849entry: 1850 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1851 %a1 = icmp ult <4 x i32> %x, %y 1852 %0 = and <4 x i1> %c, %a1 1853 %b = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y 1854 ret <4 x i32> %b 1855} 1856 1857define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1858; CHECK-LABEL: icmp_ult_v8i16_y: 1859; CHECK: @ %bb.0: @ %entry 1860; CHECK-NEXT: vctp.16 r0 1861; CHECK-NEXT: vpstt 1862; CHECK-NEXT: vcmpt.u16 hi, q1, q0 1863; CHECK-NEXT: vmovt q1, q0 1864; CHECK-NEXT: vmov q0, q1 1865; CHECK-NEXT: bx lr 1866entry: 1867 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1868 %a1 = icmp ult <8 x i16> %x, %y 1869 %0 = and <8 x i1> %c, %a1 1870 %b = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y 1871 ret <8 x i16> %b 1872} 1873 1874define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1875; CHECK-LABEL: icmp_ult_v16i8_y: 1876; CHECK: @ %bb.0: @ %entry 1877; CHECK-NEXT: vctp.8 r0 1878; CHECK-NEXT: vpstt 1879; CHECK-NEXT: vcmpt.u8 hi, q1, q0 1880; CHECK-NEXT: vmovt q1, q0 1881; CHECK-NEXT: vmov q0, q1 1882; CHECK-NEXT: bx lr 1883entry: 1884 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1885 %a1 = icmp ult <16 x i8> %x, %y 1886 %0 = and <16 x i1> %c, %a1 1887 %b = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y 1888 ret <16 x i8> %b 1889} 1890 1891define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1892; CHECK-LABEL: icmp_ugt_v4i32_y: 1893; CHECK: @ %bb.0: @ %entry 1894; CHECK-NEXT: vctp.32 r0 1895; CHECK-NEXT: vpstt 1896; CHECK-NEXT: vcmpt.u32 hi, q0, q1 1897; CHECK-NEXT: vmovt q1, q0 1898; CHECK-NEXT: vmov q0, q1 1899; CHECK-NEXT: bx lr 1900entry: 1901 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1902 %a1 = icmp ugt <4 x i32> %x, %y 1903 %0 = and <4 x i1> %c, %a1 1904 %b = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y 1905 ret <4 x i32> %b 1906} 1907 1908define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1909; CHECK-LABEL: icmp_ugt_v8i16_y: 1910; CHECK: @ %bb.0: @ %entry 1911; CHECK-NEXT: vctp.16 r0 1912; CHECK-NEXT: vpstt 1913; CHECK-NEXT: vcmpt.u16 hi, q0, q1 1914; CHECK-NEXT: vmovt q1, q0 1915; CHECK-NEXT: vmov q0, q1 1916; CHECK-NEXT: bx lr 1917entry: 1918 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1919 %a1 = icmp ugt <8 x i16> %x, %y 1920 %0 = and <8 x i1> %c, %a1 1921 %b = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y 1922 ret <8 x i16> %b 1923} 1924 1925define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1926; CHECK-LABEL: icmp_ugt_v16i8_y: 1927; CHECK: @ %bb.0: @ %entry 1928; CHECK-NEXT: vctp.8 r0 1929; CHECK-NEXT: vpstt 1930; CHECK-NEXT: vcmpt.u8 hi, q0, q1 1931; CHECK-NEXT: vmovt q1, q0 1932; CHECK-NEXT: vmov q0, q1 1933; CHECK-NEXT: bx lr 1934entry: 1935 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1936 %a1 = icmp ugt <16 x i8> %x, %y 1937 %0 = and <16 x i1> %c, %a1 1938 %b = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y 1939 ret <16 x i8> %b 1940} 1941 1942define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 1943; CHECK-LABEL: fcmp_fast_olt_v4f32_y: 1944; CHECK: @ %bb.0: @ %entry 1945; CHECK-NEXT: vctp.32 r0 1946; CHECK-NEXT: vpstt 1947; CHECK-NEXT: vcmpt.f32 gt, q1, q0 1948; CHECK-NEXT: vmovt q1, q0 1949; CHECK-NEXT: vmov q0, q1 1950; CHECK-NEXT: bx lr 1951entry: 1952 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1953 %a1 = fcmp fast olt <4 x float> %x, %y 1954 %0 = and <4 x i1> %c, %a1 1955 %b = select <4 x i1> %0, <4 x float> %x, <4 x float> %y 1956 ret <4 x float> %b 1957} 1958 1959define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 1960; CHECK-LABEL: fcmp_fast_olt_v8f16_y: 1961; CHECK: @ %bb.0: @ %entry 1962; CHECK-NEXT: vctp.16 r0 1963; CHECK-NEXT: vpstt 1964; CHECK-NEXT: vcmpt.f16 gt, q1, q0 1965; CHECK-NEXT: vmovt q1, q0 1966; CHECK-NEXT: vmov q0, q1 1967; CHECK-NEXT: bx lr 1968entry: 1969 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1970 %a1 = fcmp fast olt <8 x half> %x, %y 1971 %0 = and <8 x i1> %c, %a1 1972 %b = select <8 x i1> %0, <8 x half> %x, <8 x half> %y 1973 ret <8 x half> %b 1974} 1975 1976define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 1977; CHECK-LABEL: fcmp_fast_ogt_v4f32_y: 1978; CHECK: @ %bb.0: @ %entry 1979; CHECK-NEXT: vctp.32 r0 1980; CHECK-NEXT: vpstt 1981; CHECK-NEXT: vcmpt.f32 gt, q0, q1 1982; CHECK-NEXT: vmovt q1, q0 1983; CHECK-NEXT: vmov q0, q1 1984; CHECK-NEXT: bx lr 1985entry: 1986 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1987 %a1 = fcmp fast ogt <4 x float> %x, %y 1988 %0 = and <4 x i1> %c, %a1 1989 %b = select <4 x i1> %0, <4 x float> %x, <4 x float> %y 1990 ret <4 x float> %b 1991} 1992 1993define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 1994; CHECK-LABEL: fcmp_fast_ogt_v8f16_y: 1995; CHECK: @ %bb.0: @ %entry 1996; CHECK-NEXT: vctp.16 r0 1997; CHECK-NEXT: vpstt 1998; CHECK-NEXT: vcmpt.f16 gt, q0, q1 1999; CHECK-NEXT: vmovt q1, q0 2000; CHECK-NEXT: vmov q0, q1 2001; CHECK-NEXT: bx lr 2002entry: 2003 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2004 %a1 = fcmp fast ogt <8 x half> %x, %y 2005 %0 = and <8 x i1> %c, %a1 2006 %b = select <8 x i1> %0, <8 x half> %x, <8 x half> %y 2007 ret <8 x half> %b 2008} 2009 2010define arm_aapcs_vfpcc <4 x i32> @sadd_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2011; CHECK-LABEL: sadd_sat_v4i32_y: 2012; CHECK: @ %bb.0: @ %entry 2013; CHECK-NEXT: vctp.32 r0 2014; CHECK-NEXT: vpst 2015; CHECK-NEXT: vqaddt.s32 q1, q0, q1 2016; CHECK-NEXT: vmov q0, q1 2017; CHECK-NEXT: bx lr 2018entry: 2019 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2020 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 2021 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2022 ret <4 x i32> %b 2023} 2024 2025define arm_aapcs_vfpcc <8 x i16> @sadd_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2026; CHECK-LABEL: sadd_sat_v8i16_y: 2027; CHECK: @ %bb.0: @ %entry 2028; CHECK-NEXT: vctp.16 r0 2029; CHECK-NEXT: vpst 2030; CHECK-NEXT: vqaddt.s16 q1, q0, q1 2031; CHECK-NEXT: vmov q0, q1 2032; CHECK-NEXT: bx lr 2033entry: 2034 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2035 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 2036 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2037 ret <8 x i16> %b 2038} 2039 2040define arm_aapcs_vfpcc <16 x i8> @sadd_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2041; CHECK-LABEL: sadd_sat_v16i8_y: 2042; CHECK: @ %bb.0: @ %entry 2043; CHECK-NEXT: vctp.8 r0 2044; CHECK-NEXT: vpst 2045; CHECK-NEXT: vqaddt.s8 q1, q0, q1 2046; CHECK-NEXT: vmov q0, q1 2047; CHECK-NEXT: bx lr 2048entry: 2049 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2050 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 2051 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2052 ret <16 x i8> %b 2053} 2054 2055define arm_aapcs_vfpcc <4 x i32> @uadd_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2056; CHECK-LABEL: uadd_sat_v4i32_y: 2057; CHECK: @ %bb.0: @ %entry 2058; CHECK-NEXT: vctp.32 r0 2059; CHECK-NEXT: vpst 2060; CHECK-NEXT: vqaddt.u32 q1, q0, q1 2061; CHECK-NEXT: vmov q0, q1 2062; CHECK-NEXT: bx lr 2063entry: 2064 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2065 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 2066 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2067 ret <4 x i32> %b 2068} 2069 2070define arm_aapcs_vfpcc <8 x i16> @uadd_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2071; CHECK-LABEL: uadd_sat_v8i16_y: 2072; CHECK: @ %bb.0: @ %entry 2073; CHECK-NEXT: vctp.16 r0 2074; CHECK-NEXT: vpst 2075; CHECK-NEXT: vqaddt.u16 q1, q0, q1 2076; CHECK-NEXT: vmov q0, q1 2077; CHECK-NEXT: bx lr 2078entry: 2079 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2080 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 2081 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2082 ret <8 x i16> %b 2083} 2084 2085define arm_aapcs_vfpcc <16 x i8> @uadd_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2086; CHECK-LABEL: uadd_sat_v16i8_y: 2087; CHECK: @ %bb.0: @ %entry 2088; CHECK-NEXT: vctp.8 r0 2089; CHECK-NEXT: vpst 2090; CHECK-NEXT: vqaddt.u8 q1, q0, q1 2091; CHECK-NEXT: vmov q0, q1 2092; CHECK-NEXT: bx lr 2093entry: 2094 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2095 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 2096 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2097 ret <16 x i8> %b 2098} 2099 2100define arm_aapcs_vfpcc <4 x i32> @ssub_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2101; CHECK-LABEL: ssub_sat_v4i32_y: 2102; CHECK: @ %bb.0: @ %entry 2103; CHECK-NEXT: vctp.32 r0 2104; CHECK-NEXT: vpst 2105; CHECK-NEXT: vqsubt.s32 q1, q0, q1 2106; CHECK-NEXT: vmov q0, q1 2107; CHECK-NEXT: bx lr 2108entry: 2109 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2110 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 2111 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2112 ret <4 x i32> %b 2113} 2114 2115define arm_aapcs_vfpcc <8 x i16> @ssub_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2116; CHECK-LABEL: ssub_sat_v8i16_y: 2117; CHECK: @ %bb.0: @ %entry 2118; CHECK-NEXT: vctp.16 r0 2119; CHECK-NEXT: vpst 2120; CHECK-NEXT: vqsubt.s16 q1, q0, q1 2121; CHECK-NEXT: vmov q0, q1 2122; CHECK-NEXT: bx lr 2123entry: 2124 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2125 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 2126 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2127 ret <8 x i16> %b 2128} 2129 2130define arm_aapcs_vfpcc <16 x i8> @ssub_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2131; CHECK-LABEL: ssub_sat_v16i8_y: 2132; CHECK: @ %bb.0: @ %entry 2133; CHECK-NEXT: vctp.8 r0 2134; CHECK-NEXT: vpst 2135; CHECK-NEXT: vqsubt.s8 q1, q0, q1 2136; CHECK-NEXT: vmov q0, q1 2137; CHECK-NEXT: bx lr 2138entry: 2139 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2140 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 2141 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2142 ret <16 x i8> %b 2143} 2144 2145define arm_aapcs_vfpcc <4 x i32> @usub_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2146; CHECK-LABEL: usub_sat_v4i32_y: 2147; CHECK: @ %bb.0: @ %entry 2148; CHECK-NEXT: vctp.32 r0 2149; CHECK-NEXT: vpst 2150; CHECK-NEXT: vqsubt.u32 q1, q0, q1 2151; CHECK-NEXT: vmov q0, q1 2152; CHECK-NEXT: bx lr 2153entry: 2154 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2155 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 2156 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2157 ret <4 x i32> %b 2158} 2159 2160define arm_aapcs_vfpcc <8 x i16> @usub_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2161; CHECK-LABEL: usub_sat_v8i16_y: 2162; CHECK: @ %bb.0: @ %entry 2163; CHECK-NEXT: vctp.16 r0 2164; CHECK-NEXT: vpst 2165; CHECK-NEXT: vqsubt.u16 q1, q0, q1 2166; CHECK-NEXT: vmov q0, q1 2167; CHECK-NEXT: bx lr 2168entry: 2169 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2170 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 2171 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2172 ret <8 x i16> %b 2173} 2174 2175define arm_aapcs_vfpcc <16 x i8> @usub_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2176; CHECK-LABEL: usub_sat_v16i8_y: 2177; CHECK: @ %bb.0: @ %entry 2178; CHECK-NEXT: vctp.8 r0 2179; CHECK-NEXT: vpst 2180; CHECK-NEXT: vqsubt.u8 q1, q0, q1 2181; CHECK-NEXT: vmov q0, q1 2182; CHECK-NEXT: bx lr 2183entry: 2184 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2185 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 2186 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2187 ret <16 x i8> %b 2188} 2189 2190define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2191; CHECK-LABEL: addqr_v4i32_y: 2192; CHECK: @ %bb.0: @ %entry 2193; CHECK-NEXT: vdup.32 q1, r0 2194; CHECK-NEXT: vctp.32 r1 2195; CHECK-NEXT: vpst 2196; CHECK-NEXT: vaddt.i32 q1, q1, q0 2197; CHECK-NEXT: vmov q0, q1 2198; CHECK-NEXT: bx lr 2199entry: 2200 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2201 %i = insertelement <4 x i32> undef, i32 %y, i32 0 2202 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2203 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer 2204 %b = add <4 x i32> %ys, %a 2205 ret <4 x i32> %b 2206} 2207 2208define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2209; CHECK-LABEL: addqr_v8i16_y: 2210; CHECK: @ %bb.0: @ %entry 2211; CHECK-NEXT: vdup.16 q1, r0 2212; CHECK-NEXT: vctp.16 r1 2213; CHECK-NEXT: vpst 2214; CHECK-NEXT: vaddt.i16 q1, q1, q0 2215; CHECK-NEXT: vmov q0, q1 2216; CHECK-NEXT: bx lr 2217entry: 2218 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2219 %i = insertelement <8 x i16> undef, i16 %y, i32 0 2220 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2221 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer 2222 %b = add <8 x i16> %ys, %a 2223 ret <8 x i16> %b 2224} 2225 2226define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2227; CHECK-LABEL: addqr_v16i8_y: 2228; CHECK: @ %bb.0: @ %entry 2229; CHECK-NEXT: vdup.8 q1, r0 2230; CHECK-NEXT: vctp.8 r1 2231; CHECK-NEXT: vpst 2232; CHECK-NEXT: vaddt.i8 q1, q1, q0 2233; CHECK-NEXT: vmov q0, q1 2234; CHECK-NEXT: bx lr 2235entry: 2236 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2237 %i = insertelement <16 x i8> undef, i8 %y, i32 0 2238 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2239 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer 2240 %b = add <16 x i8> %ys, %a 2241 ret <16 x i8> %b 2242} 2243 2244define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2245; CHECK-LABEL: subqr_v4i32_y: 2246; CHECK: @ %bb.0: @ %entry 2247; CHECK-NEXT: vdup.32 q1, r0 2248; CHECK-NEXT: vctp.32 r1 2249; CHECK-NEXT: vpst 2250; CHECK-NEXT: vsubt.i32 q1, q0, r0 2251; CHECK-NEXT: vmov q0, q1 2252; CHECK-NEXT: bx lr 2253entry: 2254 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2255 %i = insertelement <4 x i32> undef, i32 %y, i32 0 2256 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2257 %a = sub <4 x i32> %x, %ys 2258 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 2259 ret <4 x i32> %b 2260} 2261 2262define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2263; CHECK-LABEL: subqr_v8i16_y: 2264; CHECK: @ %bb.0: @ %entry 2265; CHECK-NEXT: vdup.16 q1, r0 2266; CHECK-NEXT: vctp.16 r1 2267; CHECK-NEXT: vpst 2268; CHECK-NEXT: vsubt.i16 q1, q0, r0 2269; CHECK-NEXT: vmov q0, q1 2270; CHECK-NEXT: bx lr 2271entry: 2272 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2273 %i = insertelement <8 x i16> undef, i16 %y, i32 0 2274 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2275 %a = sub <8 x i16> %x, %ys 2276 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 2277 ret <8 x i16> %b 2278} 2279 2280define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2281; CHECK-LABEL: subqr_v16i8_y: 2282; CHECK: @ %bb.0: @ %entry 2283; CHECK-NEXT: vdup.8 q1, r0 2284; CHECK-NEXT: vctp.8 r1 2285; CHECK-NEXT: vpst 2286; CHECK-NEXT: vsubt.i8 q1, q0, r0 2287; CHECK-NEXT: vmov q0, q1 2288; CHECK-NEXT: bx lr 2289entry: 2290 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2291 %i = insertelement <16 x i8> undef, i8 %y, i32 0 2292 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2293 %a = sub <16 x i8> %x, %ys 2294 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 2295 ret <16 x i8> %b 2296} 2297 2298define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2299; CHECK-LABEL: mulqr_v4i32_y: 2300; CHECK: @ %bb.0: @ %entry 2301; CHECK-NEXT: vdup.32 q1, r0 2302; CHECK-NEXT: vctp.32 r1 2303; CHECK-NEXT: vpst 2304; CHECK-NEXT: vmult.i32 q1, q1, q0 2305; CHECK-NEXT: vmov q0, q1 2306; CHECK-NEXT: bx lr 2307entry: 2308 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2309 %i = insertelement <4 x i32> undef, i32 %y, i32 0 2310 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2311 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 2312 %b = mul <4 x i32> %ys, %a 2313 ret <4 x i32> %b 2314} 2315 2316define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2317; CHECK-LABEL: mulqr_v8i16_y: 2318; CHECK: @ %bb.0: @ %entry 2319; CHECK-NEXT: vdup.16 q1, r0 2320; CHECK-NEXT: vctp.16 r1 2321; CHECK-NEXT: vpst 2322; CHECK-NEXT: vmult.i16 q1, q1, q0 2323; CHECK-NEXT: vmov q0, q1 2324; CHECK-NEXT: bx lr 2325entry: 2326 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2327 %i = insertelement <8 x i16> undef, i16 %y, i32 0 2328 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2329 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 2330 %b = mul <8 x i16> %ys, %a 2331 ret <8 x i16> %b 2332} 2333 2334define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2335; CHECK-LABEL: mulqr_v16i8_y: 2336; CHECK: @ %bb.0: @ %entry 2337; CHECK-NEXT: vdup.8 q1, r0 2338; CHECK-NEXT: vctp.8 r1 2339; CHECK-NEXT: vpst 2340; CHECK-NEXT: vmult.i8 q1, q1, q0 2341; CHECK-NEXT: vmov q0, q1 2342; CHECK-NEXT: bx lr 2343entry: 2344 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2345 %i = insertelement <16 x i8> undef, i8 %y, i32 0 2346 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2347 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 2348 %b = mul <16 x i8> %ys, %a 2349 ret <16 x i8> %b 2350} 2351 2352define arm_aapcs_vfpcc <4 x float> @faddqr_v4f32_y(<4 x float> %x, float %y, i32 %n) { 2353; CHECK-LABEL: faddqr_v4f32_y: 2354; CHECK: @ %bb.0: @ %entry 2355; CHECK-NEXT: vmov r1, s4 2356; CHECK-NEXT: vctp.32 r0 2357; CHECK-NEXT: vdup.32 q1, r1 2358; CHECK-NEXT: vpst 2359; CHECK-NEXT: vaddt.f32 q1, q0, r1 2360; CHECK-NEXT: vmov q0, q1 2361; CHECK-NEXT: bx lr 2362entry: 2363 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2364 %i = insertelement <4 x float> undef, float %y, i32 0 2365 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 2366 %a = fadd <4 x float> %ys, %x 2367 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys 2368 ret <4 x float> %b 2369} 2370 2371define arm_aapcs_vfpcc <8 x half> @faddqr_v8f16_y(<8 x half> %x, half %y, i32 %n) { 2372; CHECK-LABEL: faddqr_v8f16_y: 2373; CHECK: @ %bb.0: @ %entry 2374; CHECK-NEXT: vmov.f16 r1, s4 2375; CHECK-NEXT: vctp.16 r0 2376; CHECK-NEXT: vdup.16 q1, r1 2377; CHECK-NEXT: vpst 2378; CHECK-NEXT: vaddt.f16 q1, q0, r1 2379; CHECK-NEXT: vmov q0, q1 2380; CHECK-NEXT: bx lr 2381entry: 2382 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2383 %i = insertelement <8 x half> undef, half %y, i32 0 2384 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 2385 %a = fadd <8 x half> %ys, %x 2386 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys 2387 ret <8 x half> %b 2388} 2389 2390define arm_aapcs_vfpcc <4 x float> @fsubqr_v4f32_y(<4 x float> %x, float %y, i32 %n) { 2391; CHECK-LABEL: fsubqr_v4f32_y: 2392; CHECK: @ %bb.0: @ %entry 2393; CHECK-NEXT: vmov r1, s4 2394; CHECK-NEXT: vctp.32 r0 2395; CHECK-NEXT: vdup.32 q1, r1 2396; CHECK-NEXT: vpst 2397; CHECK-NEXT: vsubt.f32 q1, q0, r1 2398; CHECK-NEXT: vmov q0, q1 2399; CHECK-NEXT: bx lr 2400entry: 2401 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2402 %i = insertelement <4 x float> undef, float %y, i32 0 2403 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 2404 %a = fsub <4 x float> %x, %ys 2405 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys 2406 ret <4 x float> %b 2407} 2408 2409define arm_aapcs_vfpcc <8 x half> @fsubqr_v8f16_y(<8 x half> %x, half %y, i32 %n) { 2410; CHECK-LABEL: fsubqr_v8f16_y: 2411; CHECK: @ %bb.0: @ %entry 2412; CHECK-NEXT: vmov.f16 r1, s4 2413; CHECK-NEXT: vctp.16 r0 2414; CHECK-NEXT: vdup.16 q1, r1 2415; CHECK-NEXT: vpst 2416; CHECK-NEXT: vsubt.f16 q1, q0, r1 2417; CHECK-NEXT: vmov q0, q1 2418; CHECK-NEXT: bx lr 2419entry: 2420 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2421 %i = insertelement <8 x half> undef, half %y, i32 0 2422 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 2423 %a = fsub <8 x half> %x, %ys 2424 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys 2425 ret <8 x half> %b 2426} 2427 2428define arm_aapcs_vfpcc <4 x float> @fmulqr_v4f32_y(<4 x float> %x, float %y, i32 %n) { 2429; CHECK-LABEL: fmulqr_v4f32_y: 2430; CHECK: @ %bb.0: @ %entry 2431; CHECK-NEXT: vmov r1, s4 2432; CHECK-NEXT: vctp.32 r0 2433; CHECK-NEXT: vdup.32 q1, r1 2434; CHECK-NEXT: vpst 2435; CHECK-NEXT: vmult.f32 q1, q0, r1 2436; CHECK-NEXT: vmov q0, q1 2437; CHECK-NEXT: bx lr 2438entry: 2439 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2440 %i = insertelement <4 x float> undef, float %y, i32 0 2441 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 2442 %a = fmul <4 x float> %ys, %x 2443 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys 2444 ret <4 x float> %b 2445} 2446 2447define arm_aapcs_vfpcc <8 x half> @fmulqr_v8f16_y(<8 x half> %x, half %y, i32 %n) { 2448; CHECK-LABEL: fmulqr_v8f16_y: 2449; CHECK: @ %bb.0: @ %entry 2450; CHECK-NEXT: vmov.f16 r1, s4 2451; CHECK-NEXT: vctp.16 r0 2452; CHECK-NEXT: vdup.16 q1, r1 2453; CHECK-NEXT: vpst 2454; CHECK-NEXT: vmult.f16 q1, q0, r1 2455; CHECK-NEXT: vmov q0, q1 2456; CHECK-NEXT: bx lr 2457entry: 2458 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2459 %i = insertelement <8 x half> undef, half %y, i32 0 2460 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 2461 %a = fmul <8 x half> %ys, %x 2462 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys 2463 ret <8 x half> %b 2464} 2465 2466define arm_aapcs_vfpcc <4 x i32> @sadd_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2467; CHECK-LABEL: sadd_satqr_v4i32_y: 2468; CHECK: @ %bb.0: @ %entry 2469; CHECK-NEXT: vdup.32 q1, r0 2470; CHECK-NEXT: vctp.32 r1 2471; CHECK-NEXT: vpst 2472; CHECK-NEXT: vqaddt.s32 q1, q0, r0 2473; CHECK-NEXT: vmov q0, q1 2474; CHECK-NEXT: bx lr 2475entry: 2476 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2477 %i = insertelement <4 x i32> undef, i32 %y, i32 0 2478 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2479 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 2480 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 2481 ret <4 x i32> %b 2482} 2483 2484define arm_aapcs_vfpcc <8 x i16> @sadd_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2485; CHECK-LABEL: sadd_satqr_v8i16_y: 2486; CHECK: @ %bb.0: @ %entry 2487; CHECK-NEXT: vdup.16 q1, r0 2488; CHECK-NEXT: vctp.16 r1 2489; CHECK-NEXT: vpst 2490; CHECK-NEXT: vqaddt.s16 q1, q0, r0 2491; CHECK-NEXT: vmov q0, q1 2492; CHECK-NEXT: bx lr 2493entry: 2494 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2495 %i = insertelement <8 x i16> undef, i16 %y, i32 0 2496 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2497 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 2498 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 2499 ret <8 x i16> %b 2500} 2501 2502define arm_aapcs_vfpcc <16 x i8> @sadd_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2503; CHECK-LABEL: sadd_satqr_v16i8_y: 2504; CHECK: @ %bb.0: @ %entry 2505; CHECK-NEXT: vdup.8 q1, r0 2506; CHECK-NEXT: vctp.8 r1 2507; CHECK-NEXT: vpst 2508; CHECK-NEXT: vqaddt.s8 q1, q0, r0 2509; CHECK-NEXT: vmov q0, q1 2510; CHECK-NEXT: bx lr 2511entry: 2512 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2513 %i = insertelement <16 x i8> undef, i8 %y, i32 0 2514 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2515 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 2516 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 2517 ret <16 x i8> %b 2518} 2519 2520define arm_aapcs_vfpcc <4 x i32> @uadd_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2521; CHECK-LABEL: uadd_satqr_v4i32_y: 2522; CHECK: @ %bb.0: @ %entry 2523; CHECK-NEXT: vdup.32 q1, r0 2524; CHECK-NEXT: vctp.32 r1 2525; CHECK-NEXT: vpst 2526; CHECK-NEXT: vqaddt.u32 q1, q0, r0 2527; CHECK-NEXT: vmov q0, q1 2528; CHECK-NEXT: bx lr 2529entry: 2530 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2531 %i = insertelement <4 x i32> undef, i32 %y, i32 0 2532 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2533 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 2534 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 2535 ret <4 x i32> %b 2536} 2537 2538define arm_aapcs_vfpcc <8 x i16> @uadd_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2539; CHECK-LABEL: uadd_satqr_v8i16_y: 2540; CHECK: @ %bb.0: @ %entry 2541; CHECK-NEXT: vdup.16 q1, r0 2542; CHECK-NEXT: vctp.16 r1 2543; CHECK-NEXT: vpst 2544; CHECK-NEXT: vqaddt.u16 q1, q0, r0 2545; CHECK-NEXT: vmov q0, q1 2546; CHECK-NEXT: bx lr 2547entry: 2548 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2549 %i = insertelement <8 x i16> undef, i16 %y, i32 0 2550 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2551 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 2552 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 2553 ret <8 x i16> %b 2554} 2555 2556define arm_aapcs_vfpcc <16 x i8> @uadd_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2557; CHECK-LABEL: uadd_satqr_v16i8_y: 2558; CHECK: @ %bb.0: @ %entry 2559; CHECK-NEXT: vdup.8 q1, r0 2560; CHECK-NEXT: vctp.8 r1 2561; CHECK-NEXT: vpst 2562; CHECK-NEXT: vqaddt.u8 q1, q0, r0 2563; CHECK-NEXT: vmov q0, q1 2564; CHECK-NEXT: bx lr 2565entry: 2566 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2567 %i = insertelement <16 x i8> undef, i8 %y, i32 0 2568 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2569 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 2570 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 2571 ret <16 x i8> %b 2572} 2573 2574define arm_aapcs_vfpcc <4 x i32> @ssub_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2575; CHECK-LABEL: ssub_satqr_v4i32_y: 2576; CHECK: @ %bb.0: @ %entry 2577; CHECK-NEXT: vdup.32 q1, r0 2578; CHECK-NEXT: vctp.32 r1 2579; CHECK-NEXT: vpst 2580; CHECK-NEXT: vqsubt.s32 q1, q0, r0 2581; CHECK-NEXT: vmov q0, q1 2582; CHECK-NEXT: bx lr 2583entry: 2584 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2585 %i = insertelement <4 x i32> undef, i32 %y, i32 0 2586 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2587 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 2588 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 2589 ret <4 x i32> %b 2590} 2591 2592define arm_aapcs_vfpcc <8 x i16> @ssub_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2593; CHECK-LABEL: ssub_satqr_v8i16_y: 2594; CHECK: @ %bb.0: @ %entry 2595; CHECK-NEXT: vdup.16 q1, r0 2596; CHECK-NEXT: vctp.16 r1 2597; CHECK-NEXT: vpst 2598; CHECK-NEXT: vqsubt.s16 q1, q0, r0 2599; CHECK-NEXT: vmov q0, q1 2600; CHECK-NEXT: bx lr 2601entry: 2602 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2603 %i = insertelement <8 x i16> undef, i16 %y, i32 0 2604 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2605 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 2606 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 2607 ret <8 x i16> %b 2608} 2609 2610define arm_aapcs_vfpcc <16 x i8> @ssub_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2611; CHECK-LABEL: ssub_satqr_v16i8_y: 2612; CHECK: @ %bb.0: @ %entry 2613; CHECK-NEXT: vdup.8 q1, r0 2614; CHECK-NEXT: vctp.8 r1 2615; CHECK-NEXT: vpst 2616; CHECK-NEXT: vqsubt.s8 q1, q0, r0 2617; CHECK-NEXT: vmov q0, q1 2618; CHECK-NEXT: bx lr 2619entry: 2620 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2621 %i = insertelement <16 x i8> undef, i8 %y, i32 0 2622 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2623 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 2624 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 2625 ret <16 x i8> %b 2626} 2627 2628define arm_aapcs_vfpcc <4 x i32> @usub_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2629; CHECK-LABEL: usub_satqr_v4i32_y: 2630; CHECK: @ %bb.0: @ %entry 2631; CHECK-NEXT: vdup.32 q1, r0 2632; CHECK-NEXT: vctp.32 r1 2633; CHECK-NEXT: vpst 2634; CHECK-NEXT: vqsubt.u32 q1, q0, r0 2635; CHECK-NEXT: vmov q0, q1 2636; CHECK-NEXT: bx lr 2637entry: 2638 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2639 %i = insertelement <4 x i32> undef, i32 %y, i32 0 2640 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2641 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 2642 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 2643 ret <4 x i32> %b 2644} 2645 2646define arm_aapcs_vfpcc <8 x i16> @usub_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2647; CHECK-LABEL: usub_satqr_v8i16_y: 2648; CHECK: @ %bb.0: @ %entry 2649; CHECK-NEXT: vdup.16 q1, r0 2650; CHECK-NEXT: vctp.16 r1 2651; CHECK-NEXT: vpst 2652; CHECK-NEXT: vqsubt.u16 q1, q0, r0 2653; CHECK-NEXT: vmov q0, q1 2654; CHECK-NEXT: bx lr 2655entry: 2656 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2657 %i = insertelement <8 x i16> undef, i16 %y, i32 0 2658 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2659 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 2660 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 2661 ret <8 x i16> %b 2662} 2663 2664define arm_aapcs_vfpcc <16 x i8> @usub_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2665; CHECK-LABEL: usub_satqr_v16i8_y: 2666; CHECK: @ %bb.0: @ %entry 2667; CHECK-NEXT: vdup.8 q1, r0 2668; CHECK-NEXT: vctp.8 r1 2669; CHECK-NEXT: vpst 2670; CHECK-NEXT: vqsubt.u8 q1, q0, r0 2671; CHECK-NEXT: vmov q0, q1 2672; CHECK-NEXT: bx lr 2673entry: 2674 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2675 %i = insertelement <16 x i8> undef, i8 %y, i32 0 2676 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2677 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 2678 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 2679 ret <16 x i8> %b 2680} 2681 2682declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) 2683declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) 2684declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) 2685declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>) 2686declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) 2687declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>) 2688declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) 2689declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) 2690declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) 2691declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>) 2692declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) 2693declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>) 2694 2695declare <16 x i1> @llvm.arm.mve.vctp8(i32) 2696declare <8 x i1> @llvm.arm.mve.vctp16(i32) 2697declare <4 x i1> @llvm.arm.mve.vctp32(i32) 2698