1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s 3 4declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>) 5declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>) 6declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) 7declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>) 8declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>) 9declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) 10declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>) 11declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>) 12declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) 13declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) 14declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>) 15declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) 16 17define arm_aapcs_vfpcc i8 @vmaxv_s_v16i8(<16 x i8> %s1) { 18; CHECK-LABEL: vmaxv_s_v16i8: 19; CHECK: @ %bb.0: 20; CHECK-NEXT: mvn r0, #127 21; CHECK-NEXT: vmaxv.s8 r0, q0 22; CHECK-NEXT: bx lr 23 %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %s1) 24 ret i8 %r 25} 26 27define arm_aapcs_vfpcc i16 @vmaxv_s_v8i16(<8 x i16> %s1) { 28; CHECK-LABEL: vmaxv_s_v8i16: 29; CHECK: @ %bb.0: 30; CHECK-NEXT: movw r0, #32768 31; CHECK-NEXT: movt r0, #65535 32; CHECK-NEXT: vmaxv.s16 r0, q0 33; CHECK-NEXT: bx lr 34 %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %s1) 35 ret i16 %r 36} 37 38define arm_aapcs_vfpcc i32 @vmaxv_s_v4i32(<4 x i32> %s1) { 39; CHECK-LABEL: vmaxv_s_v4i32: 40; CHECK: @ %bb.0: 41; CHECK-NEXT: mov.w r0, #-2147483648 42; CHECK-NEXT: vmaxv.s32 r0, q0 43; CHECK-NEXT: bx lr 44 %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %s1) 45 ret i32 %r 46} 47 48define arm_aapcs_vfpcc i8 @vmaxv_u_v16i8(<16 x i8> %s1) { 49; CHECK-LABEL: vmaxv_u_v16i8: 50; CHECK: @ %bb.0: 51; CHECK-NEXT: movs r0, #0 52; CHECK-NEXT: vmaxv.u8 r0, q0 53; CHECK-NEXT: bx lr 54 %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %s1) 55 ret i8 %r 56} 57 58define arm_aapcs_vfpcc i16 @vmaxv_u_v8i16(<8 x i16> %s1) { 59; CHECK-LABEL: vmaxv_u_v8i16: 60; CHECK: @ %bb.0: 61; CHECK-NEXT: movs r0, #0 62; CHECK-NEXT: vmaxv.u16 r0, q0 63; CHECK-NEXT: bx lr 64 %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %s1) 65 ret i16 %r 66} 67 68define arm_aapcs_vfpcc i32 @vmaxv_u_v4i32(<4 x i32> %s1) { 69; CHECK-LABEL: vmaxv_u_v4i32: 70; CHECK: @ %bb.0: 71; CHECK-NEXT: movs r0, #0 72; CHECK-NEXT: vmaxv.u32 r0, q0 73; CHECK-NEXT: bx lr 74 %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %s1) 75 ret i32 %r 76} 77 78define arm_aapcs_vfpcc i8 @vminv_s_v16i8(<16 x i8> %s1) { 79; CHECK-LABEL: vminv_s_v16i8: 80; CHECK: @ %bb.0: 81; CHECK-NEXT: movs r0, #127 82; CHECK-NEXT: vminv.s8 r0, q0 83; CHECK-NEXT: bx lr 84 %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %s1) 85 ret i8 %r 86} 87 88define arm_aapcs_vfpcc i16 @vminv_s_v8i16(<8 x i16> %s1) { 89; CHECK-LABEL: vminv_s_v8i16: 90; CHECK: @ %bb.0: 91; CHECK-NEXT: movw r0, #32767 92; CHECK-NEXT: vminv.s16 r0, q0 93; CHECK-NEXT: bx lr 94 %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %s1) 95 ret i16 %r 96} 97 98define arm_aapcs_vfpcc i32 @vminv_s_v4i32(<4 x i32> %s1) { 99; CHECK-LABEL: vminv_s_v4i32: 100; CHECK: @ %bb.0: 101; CHECK-NEXT: mvn r0, #-2147483648 102; CHECK-NEXT: vminv.s32 r0, q0 103; CHECK-NEXT: bx lr 104 %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %s1) 105 ret i32 %r 106} 107 108define arm_aapcs_vfpcc i8 @vminv_u_v16i8(<16 x i8> %s1) { 109; CHECK-LABEL: vminv_u_v16i8: 110; CHECK: @ %bb.0: 111; CHECK-NEXT: movs r0, #255 112; CHECK-NEXT: vminv.u8 r0, q0 113; CHECK-NEXT: bx lr 114 %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %s1) 115 ret i8 %r 116} 117 118define arm_aapcs_vfpcc i16 @vminv_u_v8i16(<8 x i16> %s1) { 119; CHECK-LABEL: vminv_u_v8i16: 120; CHECK: @ %bb.0: 121; CHECK-NEXT: movw r0, #65535 122; CHECK-NEXT: vminv.u16 r0, q0 123; CHECK-NEXT: bx lr 124 %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %s1) 125 ret i16 %r 126} 127 128define arm_aapcs_vfpcc i32 @vminv_u_v4i32(<4 x i32> %s1) { 129; CHECK-LABEL: vminv_u_v4i32: 130; CHECK: @ %bb.0: 131; CHECK-NEXT: mov.w r0, #-1 132; CHECK-NEXT: vminv.u32 r0, q0 133; CHECK-NEXT: bx lr 134 %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %s1) 135 ret i32 %r 136} 137 138 139 140define arm_aapcs_vfpcc i8 @vmaxv_s_v16i8_i8(<16 x i8> %s1, i8 %s2) { 141; CHECK-LABEL: vmaxv_s_v16i8_i8: 142; CHECK: @ %bb.0: 143; CHECK-NEXT: vmaxv.s8 r0, q0 144; CHECK-NEXT: bx lr 145 %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %s1) 146 %c = icmp sgt i8 %r, %s2 147 %s = select i1 %c, i8 %r, i8 %s2 148 ret i8 %s 149} 150 151define arm_aapcs_vfpcc i32 @vmaxv_s_v16i8_i32(<16 x i8> %s1, i32 %s2) { 152; CHECK-LABEL: vmaxv_s_v16i8_i32: 153; CHECK: @ %bb.0: 154; CHECK-NEXT: mvn r1, #127 155; CHECK-NEXT: vmaxv.s8 r1, q0 156; CHECK-NEXT: sxtb r1, r1 157; CHECK-NEXT: cmp r1, r0 158; CHECK-NEXT: csel r0, r1, r0, gt 159; CHECK-NEXT: bx lr 160 %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %s1) 161 %rs = sext i8 %r to i32 162 %c = icmp sgt i32 %rs, %s2 163 %s = select i1 %c, i32 %rs, i32 %s2 164 ret i32 %s 165} 166 167define arm_aapcs_vfpcc i16 @vmaxv_s_v8i16_i16(<8 x i16> %s1, i16 %s2) { 168; CHECK-LABEL: vmaxv_s_v8i16_i16: 169; CHECK: @ %bb.0: 170; CHECK-NEXT: vmaxv.s16 r0, q0 171; CHECK-NEXT: bx lr 172 %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %s1) 173 %c = icmp sgt i16 %r, %s2 174 %s = select i1 %c, i16 %r, i16 %s2 175 ret i16 %s 176} 177 178define arm_aapcs_vfpcc i32 @vmaxv_s_v8i16_i32(<8 x i16> %s1, i32 %s2) { 179; CHECK-LABEL: vmaxv_s_v8i16_i32: 180; CHECK: @ %bb.0: 181; CHECK-NEXT: movw r1, #32768 182; CHECK-NEXT: movt r1, #65535 183; CHECK-NEXT: vmaxv.s16 r1, q0 184; CHECK-NEXT: sxth r1, r1 185; CHECK-NEXT: cmp r1, r0 186; CHECK-NEXT: csel r0, r1, r0, gt 187; CHECK-NEXT: bx lr 188 %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %s1) 189 %rs = sext i16 %r to i32 190 %c = icmp sgt i32 %rs, %s2 191 %s = select i1 %c, i32 %rs, i32 %s2 192 ret i32 %s 193} 194 195define arm_aapcs_vfpcc i32 @vmaxv_s_v4i32_i32(<4 x i32> %s1, i32 %s2) { 196; CHECK-LABEL: vmaxv_s_v4i32_i32: 197; CHECK: @ %bb.0: 198; CHECK-NEXT: vmaxv.s32 r0, q0 199; CHECK-NEXT: bx lr 200 %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %s1) 201 %c = icmp sgt i32 %r, %s2 202 %s = select i1 %c, i32 %r, i32 %s2 203 ret i32 %s 204} 205 206define arm_aapcs_vfpcc i8 @vmaxv_u_v16i8_i8(<16 x i8> %s1, i8 %s2) { 207; CHECK-LABEL: vmaxv_u_v16i8_i8: 208; CHECK: @ %bb.0: 209; CHECK-NEXT: vmaxv.u8 r0, q0 210; CHECK-NEXT: bx lr 211 %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %s1) 212 %c = icmp ugt i8 %r, %s2 213 %s = select i1 %c, i8 %r, i8 %s2 214 ret i8 %s 215} 216 217define arm_aapcs_vfpcc i32 @vmaxv_u_v16i8_i32(<16 x i8> %s1, i32 %s2) { 218; CHECK-LABEL: vmaxv_u_v16i8_i32: 219; CHECK: @ %bb.0: 220; CHECK-NEXT: movs r1, #0 221; CHECK-NEXT: vmaxv.u8 r1, q0 222; CHECK-NEXT: uxtb r1, r1 223; CHECK-NEXT: cmp r1, r0 224; CHECK-NEXT: csel r0, r1, r0, hi 225; CHECK-NEXT: bx lr 226 %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %s1) 227 %rs = zext i8 %r to i32 228 %c = icmp ugt i32 %rs, %s2 229 %s = select i1 %c, i32 %rs, i32 %s2 230 ret i32 %s 231} 232 233define arm_aapcs_vfpcc i16 @vmaxv_u_v8i16_i16(<8 x i16> %s1, i16 %s2) { 234; CHECK-LABEL: vmaxv_u_v8i16_i16: 235; CHECK: @ %bb.0: 236; CHECK-NEXT: vmaxv.u16 r0, q0 237; CHECK-NEXT: bx lr 238 %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %s1) 239 %c = icmp ugt i16 %r, %s2 240 %s = select i1 %c, i16 %r, i16 %s2 241 ret i16 %s 242} 243 244define arm_aapcs_vfpcc i32 @vmaxv_u_v8i16_i32(<8 x i16> %s1, i32 %s2) { 245; CHECK-LABEL: vmaxv_u_v8i16_i32: 246; CHECK: @ %bb.0: 247; CHECK-NEXT: movs r1, #0 248; CHECK-NEXT: vmaxv.u16 r1, q0 249; CHECK-NEXT: uxth r1, r1 250; CHECK-NEXT: cmp r1, r0 251; CHECK-NEXT: csel r0, r1, r0, hi 252; CHECK-NEXT: bx lr 253 %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %s1) 254 %rs = zext i16 %r to i32 255 %c = icmp ugt i32 %rs, %s2 256 %s = select i1 %c, i32 %rs, i32 %s2 257 ret i32 %s 258} 259 260define arm_aapcs_vfpcc i32 @vmaxv_u_v4i32_i32(<4 x i32> %s1, i32 %s2) { 261; CHECK-LABEL: vmaxv_u_v4i32_i32: 262; CHECK: @ %bb.0: 263; CHECK-NEXT: vmaxv.u32 r0, q0 264; CHECK-NEXT: bx lr 265 %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %s1) 266 %c = icmp ugt i32 %r, %s2 267 %s = select i1 %c, i32 %r, i32 %s2 268 ret i32 %s 269} 270 271define arm_aapcs_vfpcc i8 @vminv_s_v16i8_i8(<16 x i8> %s1, i8 %s2) { 272; CHECK-LABEL: vminv_s_v16i8_i8: 273; CHECK: @ %bb.0: 274; CHECK-NEXT: vminv.s8 r0, q0 275; CHECK-NEXT: bx lr 276 %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %s1) 277 %c = icmp slt i8 %r, %s2 278 %s = select i1 %c, i8 %r, i8 %s2 279 ret i8 %s 280} 281 282define arm_aapcs_vfpcc i32 @vminv_s_v16i8_i32(<16 x i8> %s1, i32 %s2) { 283; CHECK-LABEL: vminv_s_v16i8_i32: 284; CHECK: @ %bb.0: 285; CHECK-NEXT: movs r1, #127 286; CHECK-NEXT: vminv.s8 r1, q0 287; CHECK-NEXT: sxtb r1, r1 288; CHECK-NEXT: cmp r1, r0 289; CHECK-NEXT: csel r0, r1, r0, lt 290; CHECK-NEXT: bx lr 291 %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %s1) 292 %rs = sext i8 %r to i32 293 %c = icmp slt i32 %rs, %s2 294 %s = select i1 %c, i32 %rs, i32 %s2 295 ret i32 %s 296} 297 298define arm_aapcs_vfpcc i16 @vminv_s_v8i16_i16(<8 x i16> %s1, i16 %s2) { 299; CHECK-LABEL: vminv_s_v8i16_i16: 300; CHECK: @ %bb.0: 301; CHECK-NEXT: vminv.s16 r0, q0 302; CHECK-NEXT: bx lr 303 %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %s1) 304 %c = icmp slt i16 %r, %s2 305 %s = select i1 %c, i16 %r, i16 %s2 306 ret i16 %s 307} 308 309define arm_aapcs_vfpcc i32 @vminv_s_v8i16_i32(<8 x i16> %s1, i32 %s2) { 310; CHECK-LABEL: vminv_s_v8i16_i32: 311; CHECK: @ %bb.0: 312; CHECK-NEXT: movw r1, #32767 313; CHECK-NEXT: vminv.s16 r1, q0 314; CHECK-NEXT: sxth r1, r1 315; CHECK-NEXT: cmp r1, r0 316; CHECK-NEXT: csel r0, r1, r0, lt 317; CHECK-NEXT: bx lr 318 %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %s1) 319 %rs = sext i16 %r to i32 320 %c = icmp slt i32 %rs, %s2 321 %s = select i1 %c, i32 %rs, i32 %s2 322 ret i32 %s 323} 324 325define arm_aapcs_vfpcc i32 @vminv_s_v4i32_i32(<4 x i32> %s1, i32 %s2) { 326; CHECK-LABEL: vminv_s_v4i32_i32: 327; CHECK: @ %bb.0: 328; CHECK-NEXT: vminv.s32 r0, q0 329; CHECK-NEXT: bx lr 330 %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %s1) 331 %c = icmp slt i32 %r, %s2 332 %s = select i1 %c, i32 %r, i32 %s2 333 ret i32 %s 334} 335 336define arm_aapcs_vfpcc i8 @vminv_u_v16i8_i8(<16 x i8> %s1, i8 %s2) { 337; CHECK-LABEL: vminv_u_v16i8_i8: 338; CHECK: @ %bb.0: 339; CHECK-NEXT: vminv.u8 r0, q0 340; CHECK-NEXT: bx lr 341 %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %s1) 342 %c = icmp ult i8 %r, %s2 343 %s = select i1 %c, i8 %r, i8 %s2 344 ret i8 %s 345} 346 347define arm_aapcs_vfpcc i32 @vminv_u_v16i8_i32(<16 x i8> %s1, i32 %s2) { 348; CHECK-LABEL: vminv_u_v16i8_i32: 349; CHECK: @ %bb.0: 350; CHECK-NEXT: movs r1, #255 351; CHECK-NEXT: vminv.u8 r1, q0 352; CHECK-NEXT: uxtb r1, r1 353; CHECK-NEXT: cmp r1, r0 354; CHECK-NEXT: csel r0, r1, r0, lo 355; CHECK-NEXT: bx lr 356 %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %s1) 357 %rs = zext i8 %r to i32 358 %c = icmp ult i32 %rs, %s2 359 %s = select i1 %c, i32 %rs, i32 %s2 360 ret i32 %s 361} 362 363define arm_aapcs_vfpcc i16 @vminv_u_v8i16_i16(<8 x i16> %s1, i16 %s2) { 364; CHECK-LABEL: vminv_u_v8i16_i16: 365; CHECK: @ %bb.0: 366; CHECK-NEXT: vminv.u16 r0, q0 367; CHECK-NEXT: bx lr 368 %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %s1) 369 %c = icmp ult i16 %r, %s2 370 %s = select i1 %c, i16 %r, i16 %s2 371 ret i16 %s 372} 373 374define arm_aapcs_vfpcc i32 @vminv_u_v8i16_i32(<8 x i16> %s1, i32 %s2) { 375; CHECK-LABEL: vminv_u_v8i16_i32: 376; CHECK: @ %bb.0: 377; CHECK-NEXT: movw r1, #65535 378; CHECK-NEXT: vminv.u16 r1, q0 379; CHECK-NEXT: uxth r1, r1 380; CHECK-NEXT: cmp r1, r0 381; CHECK-NEXT: csel r0, r1, r0, lo 382; CHECK-NEXT: bx lr 383 %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %s1) 384 %rs = zext i16 %r to i32 385 %c = icmp ult i32 %rs, %s2 386 %s = select i1 %c, i32 %rs, i32 %s2 387 ret i32 %s 388} 389 390define arm_aapcs_vfpcc i32 @vminv_u_v4i32_i32(<4 x i32> %s1, i32 %s2) { 391; CHECK-LABEL: vminv_u_v4i32_i32: 392; CHECK: @ %bb.0: 393; CHECK-NEXT: vminv.u32 r0, q0 394; CHECK-NEXT: bx lr 395 %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %s1) 396 %c = icmp ult i32 %r, %s2 397 %s = select i1 %c, i32 %r, i32 %s2 398 ret i32 %s 399} 400