1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s 3 4declare i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8>) 5 6define signext i8 @vreduce_add_nxv1i8(<vscale x 1 x i8> %v) { 7; CHECK-LABEL: vreduce_add_nxv1i8: 8; CHECK: # %bb.0: 9; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 10; CHECK-NEXT: vmv.v.i v25, 0 11; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 12; CHECK-NEXT: vredsum.vs v25, v8, v25 13; CHECK-NEXT: vmv.x.s a0, v25 14; CHECK-NEXT: ret 15 %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v) 16 ret i8 %red 17} 18 19declare i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8>) 20 21define signext i8 @vreduce_umax_nxv1i8(<vscale x 1 x i8> %v) { 22; CHECK-LABEL: vreduce_umax_nxv1i8: 23; CHECK: # %bb.0: 24; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 25; CHECK-NEXT: vmv.v.i v25, 0 26; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 27; CHECK-NEXT: vredmaxu.vs v25, v8, v25 28; CHECK-NEXT: vmv.x.s a0, v25 29; CHECK-NEXT: ret 30 %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v) 31 ret i8 %red 32} 33 34declare i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8>) 35 36define signext i8 @vreduce_smax_nxv1i8(<vscale x 1 x i8> %v) { 37; CHECK-LABEL: vreduce_smax_nxv1i8: 38; CHECK: # %bb.0: 39; CHECK-NEXT: addi a0, zero, -128 40; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu 41; CHECK-NEXT: vmv.v.x v25, a0 42; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 43; CHECK-NEXT: vredmax.vs v25, v8, v25 44; CHECK-NEXT: vmv.x.s a0, v25 45; CHECK-NEXT: ret 46 %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v) 47 ret i8 %red 48} 49 50declare i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8>) 51 52define signext i8 @vreduce_umin_nxv1i8(<vscale x 1 x i8> %v) { 53; CHECK-LABEL: vreduce_umin_nxv1i8: 54; CHECK: # %bb.0: 55; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 56; CHECK-NEXT: vmv.v.i v25, -1 57; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 58; CHECK-NEXT: vredminu.vs v25, v8, v25 59; CHECK-NEXT: vmv.x.s a0, v25 60; CHECK-NEXT: ret 61 %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v) 62 ret i8 %red 63} 64 65declare i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8>) 66 67define signext i8 @vreduce_smin_nxv1i8(<vscale x 1 x i8> %v) { 68; CHECK-LABEL: vreduce_smin_nxv1i8: 69; CHECK: # %bb.0: 70; CHECK-NEXT: addi a0, zero, 127 71; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu 72; CHECK-NEXT: vmv.v.x v25, a0 73; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 74; CHECK-NEXT: vredmin.vs v25, v8, v25 75; CHECK-NEXT: vmv.x.s a0, v25 76; CHECK-NEXT: ret 77 %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v) 78 ret i8 %red 79} 80 81declare i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8>) 82 83define signext i8 @vreduce_and_nxv1i8(<vscale x 1 x i8> %v) { 84; CHECK-LABEL: vreduce_and_nxv1i8: 85; CHECK: # %bb.0: 86; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 87; CHECK-NEXT: vmv.v.i v25, -1 88; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 89; CHECK-NEXT: vredand.vs v25, v8, v25 90; CHECK-NEXT: vmv.x.s a0, v25 91; CHECK-NEXT: ret 92 %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v) 93 ret i8 %red 94} 95 96declare i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8>) 97 98define signext i8 @vreduce_or_nxv1i8(<vscale x 1 x i8> %v) { 99; CHECK-LABEL: vreduce_or_nxv1i8: 100; CHECK: # %bb.0: 101; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 102; CHECK-NEXT: vmv.v.i v25, 0 103; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 104; CHECK-NEXT: vredor.vs v25, v8, v25 105; CHECK-NEXT: vmv.x.s a0, v25 106; CHECK-NEXT: ret 107 %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v) 108 ret i8 %red 109} 110 111declare i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8>) 112 113define signext i8 @vreduce_xor_nxv1i8(<vscale x 1 x i8> %v) { 114; CHECK-LABEL: vreduce_xor_nxv1i8: 115; CHECK: # %bb.0: 116; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 117; CHECK-NEXT: vmv.v.i v25, 0 118; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 119; CHECK-NEXT: vredxor.vs v25, v8, v25 120; CHECK-NEXT: vmv.x.s a0, v25 121; CHECK-NEXT: ret 122 %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v) 123 ret i8 %red 124} 125 126declare i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8>) 127 128define signext i8 @vreduce_add_nxv2i8(<vscale x 2 x i8> %v) { 129; CHECK-LABEL: vreduce_add_nxv2i8: 130; CHECK: # %bb.0: 131; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 132; CHECK-NEXT: vmv.v.i v25, 0 133; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 134; CHECK-NEXT: vredsum.vs v25, v8, v25 135; CHECK-NEXT: vmv.x.s a0, v25 136; CHECK-NEXT: ret 137 %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v) 138 ret i8 %red 139} 140 141declare i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8>) 142 143define signext i8 @vreduce_umax_nxv2i8(<vscale x 2 x i8> %v) { 144; CHECK-LABEL: vreduce_umax_nxv2i8: 145; CHECK: # %bb.0: 146; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 147; CHECK-NEXT: vmv.v.i v25, 0 148; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 149; CHECK-NEXT: vredmaxu.vs v25, v8, v25 150; CHECK-NEXT: vmv.x.s a0, v25 151; CHECK-NEXT: ret 152 %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v) 153 ret i8 %red 154} 155 156declare i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8>) 157 158define signext i8 @vreduce_smax_nxv2i8(<vscale x 2 x i8> %v) { 159; CHECK-LABEL: vreduce_smax_nxv2i8: 160; CHECK: # %bb.0: 161; CHECK-NEXT: addi a0, zero, -128 162; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu 163; CHECK-NEXT: vmv.v.x v25, a0 164; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 165; CHECK-NEXT: vredmax.vs v25, v8, v25 166; CHECK-NEXT: vmv.x.s a0, v25 167; CHECK-NEXT: ret 168 %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v) 169 ret i8 %red 170} 171 172declare i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8>) 173 174define signext i8 @vreduce_umin_nxv2i8(<vscale x 2 x i8> %v) { 175; CHECK-LABEL: vreduce_umin_nxv2i8: 176; CHECK: # %bb.0: 177; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 178; CHECK-NEXT: vmv.v.i v25, -1 179; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 180; CHECK-NEXT: vredminu.vs v25, v8, v25 181; CHECK-NEXT: vmv.x.s a0, v25 182; CHECK-NEXT: ret 183 %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v) 184 ret i8 %red 185} 186 187declare i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8>) 188 189define signext i8 @vreduce_smin_nxv2i8(<vscale x 2 x i8> %v) { 190; CHECK-LABEL: vreduce_smin_nxv2i8: 191; CHECK: # %bb.0: 192; CHECK-NEXT: addi a0, zero, 127 193; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu 194; CHECK-NEXT: vmv.v.x v25, a0 195; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 196; CHECK-NEXT: vredmin.vs v25, v8, v25 197; CHECK-NEXT: vmv.x.s a0, v25 198; CHECK-NEXT: ret 199 %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v) 200 ret i8 %red 201} 202 203declare i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8>) 204 205define signext i8 @vreduce_and_nxv2i8(<vscale x 2 x i8> %v) { 206; CHECK-LABEL: vreduce_and_nxv2i8: 207; CHECK: # %bb.0: 208; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 209; CHECK-NEXT: vmv.v.i v25, -1 210; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 211; CHECK-NEXT: vredand.vs v25, v8, v25 212; CHECK-NEXT: vmv.x.s a0, v25 213; CHECK-NEXT: ret 214 %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v) 215 ret i8 %red 216} 217 218declare i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8>) 219 220define signext i8 @vreduce_or_nxv2i8(<vscale x 2 x i8> %v) { 221; CHECK-LABEL: vreduce_or_nxv2i8: 222; CHECK: # %bb.0: 223; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 224; CHECK-NEXT: vmv.v.i v25, 0 225; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 226; CHECK-NEXT: vredor.vs v25, v8, v25 227; CHECK-NEXT: vmv.x.s a0, v25 228; CHECK-NEXT: ret 229 %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v) 230 ret i8 %red 231} 232 233declare i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8>) 234 235define signext i8 @vreduce_xor_nxv2i8(<vscale x 2 x i8> %v) { 236; CHECK-LABEL: vreduce_xor_nxv2i8: 237; CHECK: # %bb.0: 238; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 239; CHECK-NEXT: vmv.v.i v25, 0 240; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 241; CHECK-NEXT: vredxor.vs v25, v8, v25 242; CHECK-NEXT: vmv.x.s a0, v25 243; CHECK-NEXT: ret 244 %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v) 245 ret i8 %red 246} 247 248declare i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8>) 249 250define signext i8 @vreduce_add_nxv4i8(<vscale x 4 x i8> %v) { 251; CHECK-LABEL: vreduce_add_nxv4i8: 252; CHECK: # %bb.0: 253; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 254; CHECK-NEXT: vmv.v.i v25, 0 255; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 256; CHECK-NEXT: vredsum.vs v25, v8, v25 257; CHECK-NEXT: vmv.x.s a0, v25 258; CHECK-NEXT: ret 259 %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v) 260 ret i8 %red 261} 262 263declare i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8>) 264 265define signext i8 @vreduce_umax_nxv4i8(<vscale x 4 x i8> %v) { 266; CHECK-LABEL: vreduce_umax_nxv4i8: 267; CHECK: # %bb.0: 268; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 269; CHECK-NEXT: vmv.v.i v25, 0 270; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 271; CHECK-NEXT: vredmaxu.vs v25, v8, v25 272; CHECK-NEXT: vmv.x.s a0, v25 273; CHECK-NEXT: ret 274 %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v) 275 ret i8 %red 276} 277 278declare i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8>) 279 280define signext i8 @vreduce_smax_nxv4i8(<vscale x 4 x i8> %v) { 281; CHECK-LABEL: vreduce_smax_nxv4i8: 282; CHECK: # %bb.0: 283; CHECK-NEXT: addi a0, zero, -128 284; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu 285; CHECK-NEXT: vmv.v.x v25, a0 286; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 287; CHECK-NEXT: vredmax.vs v25, v8, v25 288; CHECK-NEXT: vmv.x.s a0, v25 289; CHECK-NEXT: ret 290 %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v) 291 ret i8 %red 292} 293 294declare i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8>) 295 296define signext i8 @vreduce_umin_nxv4i8(<vscale x 4 x i8> %v) { 297; CHECK-LABEL: vreduce_umin_nxv4i8: 298; CHECK: # %bb.0: 299; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 300; CHECK-NEXT: vmv.v.i v25, -1 301; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 302; CHECK-NEXT: vredminu.vs v25, v8, v25 303; CHECK-NEXT: vmv.x.s a0, v25 304; CHECK-NEXT: ret 305 %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v) 306 ret i8 %red 307} 308 309declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>) 310 311define signext i8 @vreduce_smin_nxv4i8(<vscale x 4 x i8> %v) { 312; CHECK-LABEL: vreduce_smin_nxv4i8: 313; CHECK: # %bb.0: 314; CHECK-NEXT: addi a0, zero, 127 315; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu 316; CHECK-NEXT: vmv.v.x v25, a0 317; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 318; CHECK-NEXT: vredmin.vs v25, v8, v25 319; CHECK-NEXT: vmv.x.s a0, v25 320; CHECK-NEXT: ret 321 %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v) 322 ret i8 %red 323} 324 325declare i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8>) 326 327define signext i8 @vreduce_and_nxv4i8(<vscale x 4 x i8> %v) { 328; CHECK-LABEL: vreduce_and_nxv4i8: 329; CHECK: # %bb.0: 330; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 331; CHECK-NEXT: vmv.v.i v25, -1 332; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 333; CHECK-NEXT: vredand.vs v25, v8, v25 334; CHECK-NEXT: vmv.x.s a0, v25 335; CHECK-NEXT: ret 336 %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v) 337 ret i8 %red 338} 339 340declare i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8>) 341 342define signext i8 @vreduce_or_nxv4i8(<vscale x 4 x i8> %v) { 343; CHECK-LABEL: vreduce_or_nxv4i8: 344; CHECK: # %bb.0: 345; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 346; CHECK-NEXT: vmv.v.i v25, 0 347; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 348; CHECK-NEXT: vredor.vs v25, v8, v25 349; CHECK-NEXT: vmv.x.s a0, v25 350; CHECK-NEXT: ret 351 %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v) 352 ret i8 %red 353} 354 355declare i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8>) 356 357define signext i8 @vreduce_xor_nxv4i8(<vscale x 4 x i8> %v) { 358; CHECK-LABEL: vreduce_xor_nxv4i8: 359; CHECK: # %bb.0: 360; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 361; CHECK-NEXT: vmv.v.i v25, 0 362; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 363; CHECK-NEXT: vredxor.vs v25, v8, v25 364; CHECK-NEXT: vmv.x.s a0, v25 365; CHECK-NEXT: ret 366 %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v) 367 ret i8 %red 368} 369 370declare i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16>) 371 372define signext i16 @vreduce_add_nxv1i16(<vscale x 1 x i16> %v) { 373; CHECK-LABEL: vreduce_add_nxv1i16: 374; CHECK: # %bb.0: 375; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 376; CHECK-NEXT: vmv.v.i v25, 0 377; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 378; CHECK-NEXT: vredsum.vs v25, v8, v25 379; CHECK-NEXT: vmv.x.s a0, v25 380; CHECK-NEXT: ret 381 %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v) 382 ret i16 %red 383} 384 385declare i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16>) 386 387define signext i16 @vreduce_umax_nxv1i16(<vscale x 1 x i16> %v) { 388; CHECK-LABEL: vreduce_umax_nxv1i16: 389; CHECK: # %bb.0: 390; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 391; CHECK-NEXT: vmv.v.i v25, 0 392; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 393; CHECK-NEXT: vredmaxu.vs v25, v8, v25 394; CHECK-NEXT: vmv.x.s a0, v25 395; CHECK-NEXT: ret 396 %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v) 397 ret i16 %red 398} 399 400declare i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16>) 401 402define signext i16 @vreduce_smax_nxv1i16(<vscale x 1 x i16> %v) { 403; CHECK-LABEL: vreduce_smax_nxv1i16: 404; CHECK: # %bb.0: 405; CHECK-NEXT: lui a0, 1048568 406; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu 407; CHECK-NEXT: vmv.v.x v25, a0 408; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 409; CHECK-NEXT: vredmax.vs v25, v8, v25 410; CHECK-NEXT: vmv.x.s a0, v25 411; CHECK-NEXT: ret 412 %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v) 413 ret i16 %red 414} 415 416declare i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16>) 417 418define signext i16 @vreduce_umin_nxv1i16(<vscale x 1 x i16> %v) { 419; CHECK-LABEL: vreduce_umin_nxv1i16: 420; CHECK: # %bb.0: 421; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 422; CHECK-NEXT: vmv.v.i v25, -1 423; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 424; CHECK-NEXT: vredminu.vs v25, v8, v25 425; CHECK-NEXT: vmv.x.s a0, v25 426; CHECK-NEXT: ret 427 %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v) 428 ret i16 %red 429} 430 431declare i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16>) 432 433define signext i16 @vreduce_smin_nxv1i16(<vscale x 1 x i16> %v) { 434; CHECK-LABEL: vreduce_smin_nxv1i16: 435; CHECK: # %bb.0: 436; CHECK-NEXT: lui a0, 8 437; CHECK-NEXT: addi a0, a0, -1 438; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu 439; CHECK-NEXT: vmv.v.x v25, a0 440; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 441; CHECK-NEXT: vredmin.vs v25, v8, v25 442; CHECK-NEXT: vmv.x.s a0, v25 443; CHECK-NEXT: ret 444 %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v) 445 ret i16 %red 446} 447 448declare i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16>) 449 450define signext i16 @vreduce_and_nxv1i16(<vscale x 1 x i16> %v) { 451; CHECK-LABEL: vreduce_and_nxv1i16: 452; CHECK: # %bb.0: 453; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 454; CHECK-NEXT: vmv.v.i v25, -1 455; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 456; CHECK-NEXT: vredand.vs v25, v8, v25 457; CHECK-NEXT: vmv.x.s a0, v25 458; CHECK-NEXT: ret 459 %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v) 460 ret i16 %red 461} 462 463declare i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16>) 464 465define signext i16 @vreduce_or_nxv1i16(<vscale x 1 x i16> %v) { 466; CHECK-LABEL: vreduce_or_nxv1i16: 467; CHECK: # %bb.0: 468; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 469; CHECK-NEXT: vmv.v.i v25, 0 470; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 471; CHECK-NEXT: vredor.vs v25, v8, v25 472; CHECK-NEXT: vmv.x.s a0, v25 473; CHECK-NEXT: ret 474 %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v) 475 ret i16 %red 476} 477 478declare i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16>) 479 480define signext i16 @vreduce_xor_nxv1i16(<vscale x 1 x i16> %v) { 481; CHECK-LABEL: vreduce_xor_nxv1i16: 482; CHECK: # %bb.0: 483; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 484; CHECK-NEXT: vmv.v.i v25, 0 485; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 486; CHECK-NEXT: vredxor.vs v25, v8, v25 487; CHECK-NEXT: vmv.x.s a0, v25 488; CHECK-NEXT: ret 489 %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v) 490 ret i16 %red 491} 492 493declare i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16>) 494 495define signext i16 @vreduce_add_nxv2i16(<vscale x 2 x i16> %v) { 496; CHECK-LABEL: vreduce_add_nxv2i16: 497; CHECK: # %bb.0: 498; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 499; CHECK-NEXT: vmv.v.i v25, 0 500; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 501; CHECK-NEXT: vredsum.vs v25, v8, v25 502; CHECK-NEXT: vmv.x.s a0, v25 503; CHECK-NEXT: ret 504 %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v) 505 ret i16 %red 506} 507 508declare i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16>) 509 510define signext i16 @vreduce_umax_nxv2i16(<vscale x 2 x i16> %v) { 511; CHECK-LABEL: vreduce_umax_nxv2i16: 512; CHECK: # %bb.0: 513; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 514; CHECK-NEXT: vmv.v.i v25, 0 515; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 516; CHECK-NEXT: vredmaxu.vs v25, v8, v25 517; CHECK-NEXT: vmv.x.s a0, v25 518; CHECK-NEXT: ret 519 %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v) 520 ret i16 %red 521} 522 523declare i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16>) 524 525define signext i16 @vreduce_smax_nxv2i16(<vscale x 2 x i16> %v) { 526; CHECK-LABEL: vreduce_smax_nxv2i16: 527; CHECK: # %bb.0: 528; CHECK-NEXT: lui a0, 1048568 529; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu 530; CHECK-NEXT: vmv.v.x v25, a0 531; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 532; CHECK-NEXT: vredmax.vs v25, v8, v25 533; CHECK-NEXT: vmv.x.s a0, v25 534; CHECK-NEXT: ret 535 %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v) 536 ret i16 %red 537} 538 539declare i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16>) 540 541define signext i16 @vreduce_umin_nxv2i16(<vscale x 2 x i16> %v) { 542; CHECK-LABEL: vreduce_umin_nxv2i16: 543; CHECK: # %bb.0: 544; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 545; CHECK-NEXT: vmv.v.i v25, -1 546; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 547; CHECK-NEXT: vredminu.vs v25, v8, v25 548; CHECK-NEXT: vmv.x.s a0, v25 549; CHECK-NEXT: ret 550 %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v) 551 ret i16 %red 552} 553 554declare i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16>) 555 556define signext i16 @vreduce_smin_nxv2i16(<vscale x 2 x i16> %v) { 557; CHECK-LABEL: vreduce_smin_nxv2i16: 558; CHECK: # %bb.0: 559; CHECK-NEXT: lui a0, 8 560; CHECK-NEXT: addi a0, a0, -1 561; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu 562; CHECK-NEXT: vmv.v.x v25, a0 563; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 564; CHECK-NEXT: vredmin.vs v25, v8, v25 565; CHECK-NEXT: vmv.x.s a0, v25 566; CHECK-NEXT: ret 567 %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v) 568 ret i16 %red 569} 570 571declare i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16>) 572 573define signext i16 @vreduce_and_nxv2i16(<vscale x 2 x i16> %v) { 574; CHECK-LABEL: vreduce_and_nxv2i16: 575; CHECK: # %bb.0: 576; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 577; CHECK-NEXT: vmv.v.i v25, -1 578; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 579; CHECK-NEXT: vredand.vs v25, v8, v25 580; CHECK-NEXT: vmv.x.s a0, v25 581; CHECK-NEXT: ret 582 %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v) 583 ret i16 %red 584} 585 586declare i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16>) 587 588define signext i16 @vreduce_or_nxv2i16(<vscale x 2 x i16> %v) { 589; CHECK-LABEL: vreduce_or_nxv2i16: 590; CHECK: # %bb.0: 591; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 592; CHECK-NEXT: vmv.v.i v25, 0 593; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 594; CHECK-NEXT: vredor.vs v25, v8, v25 595; CHECK-NEXT: vmv.x.s a0, v25 596; CHECK-NEXT: ret 597 %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v) 598 ret i16 %red 599} 600 601declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>) 602 603define signext i16 @vreduce_xor_nxv2i16(<vscale x 2 x i16> %v) { 604; CHECK-LABEL: vreduce_xor_nxv2i16: 605; CHECK: # %bb.0: 606; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 607; CHECK-NEXT: vmv.v.i v25, 0 608; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 609; CHECK-NEXT: vredxor.vs v25, v8, v25 610; CHECK-NEXT: vmv.x.s a0, v25 611; CHECK-NEXT: ret 612 %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v) 613 ret i16 %red 614} 615 616declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>) 617 618define signext i16 @vreduce_add_nxv4i16(<vscale x 4 x i16> %v) { 619; CHECK-LABEL: vreduce_add_nxv4i16: 620; CHECK: # %bb.0: 621; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 622; CHECK-NEXT: vmv.v.i v25, 0 623; CHECK-NEXT: vredsum.vs v25, v8, v25 624; CHECK-NEXT: vmv.x.s a0, v25 625; CHECK-NEXT: ret 626 %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v) 627 ret i16 %red 628} 629 630declare i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16>) 631 632define signext i16 @vreduce_umax_nxv4i16(<vscale x 4 x i16> %v) { 633; CHECK-LABEL: vreduce_umax_nxv4i16: 634; CHECK: # %bb.0: 635; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 636; CHECK-NEXT: vmv.v.i v25, 0 637; CHECK-NEXT: vredmaxu.vs v25, v8, v25 638; CHECK-NEXT: vmv.x.s a0, v25 639; CHECK-NEXT: ret 640 %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v) 641 ret i16 %red 642} 643 644declare i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16>) 645 646define signext i16 @vreduce_smax_nxv4i16(<vscale x 4 x i16> %v) { 647; CHECK-LABEL: vreduce_smax_nxv4i16: 648; CHECK: # %bb.0: 649; CHECK-NEXT: lui a0, 1048568 650; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu 651; CHECK-NEXT: vmv.v.x v25, a0 652; CHECK-NEXT: vredmax.vs v25, v8, v25 653; CHECK-NEXT: vmv.x.s a0, v25 654; CHECK-NEXT: ret 655 %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v) 656 ret i16 %red 657} 658 659declare i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16>) 660 661define signext i16 @vreduce_umin_nxv4i16(<vscale x 4 x i16> %v) { 662; CHECK-LABEL: vreduce_umin_nxv4i16: 663; CHECK: # %bb.0: 664; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 665; CHECK-NEXT: vmv.v.i v25, -1 666; CHECK-NEXT: vredminu.vs v25, v8, v25 667; CHECK-NEXT: vmv.x.s a0, v25 668; CHECK-NEXT: ret 669 %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v) 670 ret i16 %red 671} 672 673declare i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16>) 674 675define signext i16 @vreduce_smin_nxv4i16(<vscale x 4 x i16> %v) { 676; CHECK-LABEL: vreduce_smin_nxv4i16: 677; CHECK: # %bb.0: 678; CHECK-NEXT: lui a0, 8 679; CHECK-NEXT: addi a0, a0, -1 680; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu 681; CHECK-NEXT: vmv.v.x v25, a0 682; CHECK-NEXT: vredmin.vs v25, v8, v25 683; CHECK-NEXT: vmv.x.s a0, v25 684; CHECK-NEXT: ret 685 %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v) 686 ret i16 %red 687} 688 689declare i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16>) 690 691define signext i16 @vreduce_and_nxv4i16(<vscale x 4 x i16> %v) { 692; CHECK-LABEL: vreduce_and_nxv4i16: 693; CHECK: # %bb.0: 694; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 695; CHECK-NEXT: vmv.v.i v25, -1 696; CHECK-NEXT: vredand.vs v25, v8, v25 697; CHECK-NEXT: vmv.x.s a0, v25 698; CHECK-NEXT: ret 699 %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v) 700 ret i16 %red 701} 702 703declare i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16>) 704 705define signext i16 @vreduce_or_nxv4i16(<vscale x 4 x i16> %v) { 706; CHECK-LABEL: vreduce_or_nxv4i16: 707; CHECK: # %bb.0: 708; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 709; CHECK-NEXT: vmv.v.i v25, 0 710; CHECK-NEXT: vredor.vs v25, v8, v25 711; CHECK-NEXT: vmv.x.s a0, v25 712; CHECK-NEXT: ret 713 %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v) 714 ret i16 %red 715} 716 717declare i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16>) 718 719define signext i16 @vreduce_xor_nxv4i16(<vscale x 4 x i16> %v) { 720; CHECK-LABEL: vreduce_xor_nxv4i16: 721; CHECK: # %bb.0: 722; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 723; CHECK-NEXT: vmv.v.i v25, 0 724; CHECK-NEXT: vredxor.vs v25, v8, v25 725; CHECK-NEXT: vmv.x.s a0, v25 726; CHECK-NEXT: ret 727 %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v) 728 ret i16 %red 729} 730 731declare i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32>) 732 733define i32 @vreduce_add_nxv1i32(<vscale x 1 x i32> %v) { 734; CHECK-LABEL: vreduce_add_nxv1i32: 735; CHECK: # %bb.0: 736; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu 737; CHECK-NEXT: vmv.v.i v25, 0 738; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 739; CHECK-NEXT: vredsum.vs v25, v8, v25 740; CHECK-NEXT: vmv.x.s a0, v25 741; CHECK-NEXT: ret 742 %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v) 743 ret i32 %red 744} 745 746declare i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32>) 747 748define i32 @vreduce_umax_nxv1i32(<vscale x 1 x i32> %v) { 749; CHECK-LABEL: vreduce_umax_nxv1i32: 750; CHECK: # %bb.0: 751; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu 752; CHECK-NEXT: vmv.v.i v25, 0 753; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 754; CHECK-NEXT: vredmaxu.vs v25, v8, v25 755; CHECK-NEXT: vmv.x.s a0, v25 756; CHECK-NEXT: ret 757 %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v) 758 ret i32 %red 759} 760 761declare i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32>) 762 763define i32 @vreduce_smax_nxv1i32(<vscale x 1 x i32> %v) { 764; CHECK-LABEL: vreduce_smax_nxv1i32: 765; CHECK: # %bb.0: 766; CHECK-NEXT: lui a0, 524288 767; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu 768; CHECK-NEXT: vmv.v.x v25, a0 769; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 770; CHECK-NEXT: vredmax.vs v25, v8, v25 771; CHECK-NEXT: vmv.x.s a0, v25 772; CHECK-NEXT: ret 773 %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v) 774 ret i32 %red 775} 776 777declare i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32>) 778 779define i32 @vreduce_umin_nxv1i32(<vscale x 1 x i32> %v) { 780; CHECK-LABEL: vreduce_umin_nxv1i32: 781; CHECK: # %bb.0: 782; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu 783; CHECK-NEXT: vmv.v.i v25, -1 784; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 785; CHECK-NEXT: vredminu.vs v25, v8, v25 786; CHECK-NEXT: vmv.x.s a0, v25 787; CHECK-NEXT: ret 788 %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v) 789 ret i32 %red 790} 791 792declare i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32>) 793 794define i32 @vreduce_smin_nxv1i32(<vscale x 1 x i32> %v) { 795; CHECK-LABEL: vreduce_smin_nxv1i32: 796; CHECK: # %bb.0: 797; CHECK-NEXT: lui a0, 524288 798; CHECK-NEXT: addi a0, a0, -1 799; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu 800; CHECK-NEXT: vmv.v.x v25, a0 801; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 802; CHECK-NEXT: vredmin.vs v25, v8, v25 803; CHECK-NEXT: vmv.x.s a0, v25 804; CHECK-NEXT: ret 805 %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v) 806 ret i32 %red 807} 808 809declare i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32>) 810 811define i32 @vreduce_and_nxv1i32(<vscale x 1 x i32> %v) { 812; CHECK-LABEL: vreduce_and_nxv1i32: 813; CHECK: # %bb.0: 814; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu 815; CHECK-NEXT: vmv.v.i v25, -1 816; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 817; CHECK-NEXT: vredand.vs v25, v8, v25 818; CHECK-NEXT: vmv.x.s a0, v25 819; CHECK-NEXT: ret 820 %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v) 821 ret i32 %red 822} 823 824declare i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32>) 825 826define i32 @vreduce_or_nxv1i32(<vscale x 1 x i32> %v) { 827; CHECK-LABEL: vreduce_or_nxv1i32: 828; CHECK: # %bb.0: 829; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu 830; CHECK-NEXT: vmv.v.i v25, 0 831; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 832; CHECK-NEXT: vredor.vs v25, v8, v25 833; CHECK-NEXT: vmv.x.s a0, v25 834; CHECK-NEXT: ret 835 %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v) 836 ret i32 %red 837} 838 839declare i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32>) 840 841define i32 @vreduce_xor_nxv1i32(<vscale x 1 x i32> %v) { 842; CHECK-LABEL: vreduce_xor_nxv1i32: 843; CHECK: # %bb.0: 844; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu 845; CHECK-NEXT: vmv.v.i v25, 0 846; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 847; CHECK-NEXT: vredxor.vs v25, v8, v25 848; CHECK-NEXT: vmv.x.s a0, v25 849; CHECK-NEXT: ret 850 %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v) 851 ret i32 %red 852} 853 854declare i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32>) 855 856define i32 @vreduce_add_nxv2i32(<vscale x 2 x i32> %v) { 857; CHECK-LABEL: vreduce_add_nxv2i32: 858; CHECK: # %bb.0: 859; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu 860; CHECK-NEXT: vmv.v.i v25, 0 861; CHECK-NEXT: vredsum.vs v25, v8, v25 862; CHECK-NEXT: vmv.x.s a0, v25 863; CHECK-NEXT: ret 864 %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v) 865 ret i32 %red 866} 867 868declare i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32>) 869 870define i32 @vreduce_umax_nxv2i32(<vscale x 2 x i32> %v) { 871; CHECK-LABEL: vreduce_umax_nxv2i32: 872; CHECK: # %bb.0: 873; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu 874; CHECK-NEXT: vmv.v.i v25, 0 875; CHECK-NEXT: vredmaxu.vs v25, v8, v25 876; CHECK-NEXT: vmv.x.s a0, v25 877; CHECK-NEXT: ret 878 %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v) 879 ret i32 %red 880} 881 882declare i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32>) 883 884define i32 @vreduce_smax_nxv2i32(<vscale x 2 x i32> %v) { 885; CHECK-LABEL: vreduce_smax_nxv2i32: 886; CHECK: # %bb.0: 887; CHECK-NEXT: lui a0, 524288 888; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu 889; CHECK-NEXT: vmv.v.x v25, a0 890; CHECK-NEXT: vredmax.vs v25, v8, v25 891; CHECK-NEXT: vmv.x.s a0, v25 892; CHECK-NEXT: ret 893 %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v) 894 ret i32 %red 895} 896 897declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>) 898 899define i32 @vreduce_umin_nxv2i32(<vscale x 2 x i32> %v) { 900; CHECK-LABEL: vreduce_umin_nxv2i32: 901; CHECK: # %bb.0: 902; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu 903; CHECK-NEXT: vmv.v.i v25, -1 904; CHECK-NEXT: vredminu.vs v25, v8, v25 905; CHECK-NEXT: vmv.x.s a0, v25 906; CHECK-NEXT: ret 907 %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v) 908 ret i32 %red 909} 910 911declare i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32>) 912 913define i32 @vreduce_smin_nxv2i32(<vscale x 2 x i32> %v) { 914; CHECK-LABEL: vreduce_smin_nxv2i32: 915; CHECK: # %bb.0: 916; CHECK-NEXT: lui a0, 524288 917; CHECK-NEXT: addi a0, a0, -1 918; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu 919; CHECK-NEXT: vmv.v.x v25, a0 920; CHECK-NEXT: vredmin.vs v25, v8, v25 921; CHECK-NEXT: vmv.x.s a0, v25 922; CHECK-NEXT: ret 923 %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v) 924 ret i32 %red 925} 926 927declare i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32>) 928 929define i32 @vreduce_and_nxv2i32(<vscale x 2 x i32> %v) { 930; CHECK-LABEL: vreduce_and_nxv2i32: 931; CHECK: # %bb.0: 932; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu 933; CHECK-NEXT: vmv.v.i v25, -1 934; CHECK-NEXT: vredand.vs v25, v8, v25 935; CHECK-NEXT: vmv.x.s a0, v25 936; CHECK-NEXT: ret 937 %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v) 938 ret i32 %red 939} 940 941declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>) 942 943define i32 @vreduce_or_nxv2i32(<vscale x 2 x i32> %v) { 944; CHECK-LABEL: vreduce_or_nxv2i32: 945; CHECK: # %bb.0: 946; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu 947; CHECK-NEXT: vmv.v.i v25, 0 948; CHECK-NEXT: vredor.vs v25, v8, v25 949; CHECK-NEXT: vmv.x.s a0, v25 950; CHECK-NEXT: ret 951 %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v) 952 ret i32 %red 953} 954 955declare i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32>) 956 957define i32 @vreduce_xor_nxv2i32(<vscale x 2 x i32> %v) { 958; CHECK-LABEL: vreduce_xor_nxv2i32: 959; CHECK: # %bb.0: 960; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu 961; CHECK-NEXT: vmv.v.i v25, 0 962; CHECK-NEXT: vredxor.vs v25, v8, v25 963; CHECK-NEXT: vmv.x.s a0, v25 964; CHECK-NEXT: ret 965 %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v) 966 ret i32 %red 967} 968 969declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>) 970 971define i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) { 972; CHECK-LABEL: vreduce_add_nxv4i32: 973; CHECK: # %bb.0: 974; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu 975; CHECK-NEXT: vmv.v.i v25, 0 976; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu 977; CHECK-NEXT: vredsum.vs v25, v8, v25 978; CHECK-NEXT: vmv.x.s a0, v25 979; CHECK-NEXT: ret 980 %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v) 981 ret i32 %red 982} 983 984declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>) 985 986define i32 @vreduce_umax_nxv4i32(<vscale x 4 x i32> %v) { 987; CHECK-LABEL: vreduce_umax_nxv4i32: 988; CHECK: # %bb.0: 989; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu 990; CHECK-NEXT: vmv.v.i v25, 0 991; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu 992; CHECK-NEXT: vredmaxu.vs v25, v8, v25 993; CHECK-NEXT: vmv.x.s a0, v25 994; CHECK-NEXT: ret 995 %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v) 996 ret i32 %red 997} 998 999declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>) 1000 1001define i32 @vreduce_smax_nxv4i32(<vscale x 4 x i32> %v) { 1002; CHECK-LABEL: vreduce_smax_nxv4i32: 1003; CHECK: # %bb.0: 1004; CHECK-NEXT: lui a0, 524288 1005; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu 1006; CHECK-NEXT: vmv.v.x v25, a0 1007; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1008; CHECK-NEXT: vredmax.vs v25, v8, v25 1009; CHECK-NEXT: vmv.x.s a0, v25 1010; CHECK-NEXT: ret 1011 %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v) 1012 ret i32 %red 1013} 1014 1015declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>) 1016 1017define i32 @vreduce_umin_nxv4i32(<vscale x 4 x i32> %v) { 1018; CHECK-LABEL: vreduce_umin_nxv4i32: 1019; CHECK: # %bb.0: 1020; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu 1021; CHECK-NEXT: vmv.v.i v25, -1 1022; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1023; CHECK-NEXT: vredminu.vs v25, v8, v25 1024; CHECK-NEXT: vmv.x.s a0, v25 1025; CHECK-NEXT: ret 1026 %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v) 1027 ret i32 %red 1028} 1029 1030declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>) 1031 1032define i32 @vreduce_smin_nxv4i32(<vscale x 4 x i32> %v) { 1033; CHECK-LABEL: vreduce_smin_nxv4i32: 1034; CHECK: # %bb.0: 1035; CHECK-NEXT: lui a0, 524288 1036; CHECK-NEXT: addi a0, a0, -1 1037; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu 1038; CHECK-NEXT: vmv.v.x v25, a0 1039; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1040; CHECK-NEXT: vredmin.vs v25, v8, v25 1041; CHECK-NEXT: vmv.x.s a0, v25 1042; CHECK-NEXT: ret 1043 %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v) 1044 ret i32 %red 1045} 1046 1047declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>) 1048 1049define i32 @vreduce_and_nxv4i32(<vscale x 4 x i32> %v) { 1050; CHECK-LABEL: vreduce_and_nxv4i32: 1051; CHECK: # %bb.0: 1052; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu 1053; CHECK-NEXT: vmv.v.i v25, -1 1054; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1055; CHECK-NEXT: vredand.vs v25, v8, v25 1056; CHECK-NEXT: vmv.x.s a0, v25 1057; CHECK-NEXT: ret 1058 %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v) 1059 ret i32 %red 1060} 1061 1062declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>) 1063 1064define i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) { 1065; CHECK-LABEL: vreduce_or_nxv4i32: 1066; CHECK: # %bb.0: 1067; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu 1068; CHECK-NEXT: vmv.v.i v25, 0 1069; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1070; CHECK-NEXT: vredor.vs v25, v8, v25 1071; CHECK-NEXT: vmv.x.s a0, v25 1072; CHECK-NEXT: ret 1073 %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v) 1074 ret i32 %red 1075} 1076 1077declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>) 1078 1079define i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) { 1080; CHECK-LABEL: vreduce_xor_nxv4i32: 1081; CHECK: # %bb.0: 1082; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu 1083; CHECK-NEXT: vmv.v.i v25, 0 1084; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1085; CHECK-NEXT: vredxor.vs v25, v8, v25 1086; CHECK-NEXT: vmv.x.s a0, v25 1087; CHECK-NEXT: ret 1088 %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v) 1089 ret i32 %red 1090} 1091 1092declare i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64>) 1093 1094define i64 @vreduce_add_nxv1i64(<vscale x 1 x i64> %v) { 1095; CHECK-LABEL: vreduce_add_nxv1i64: 1096; CHECK: # %bb.0: 1097; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1098; CHECK-NEXT: vmv.v.i v25, 0 1099; CHECK-NEXT: vredsum.vs v25, v8, v25 1100; CHECK-NEXT: vmv.x.s a0, v25 1101; CHECK-NEXT: addi a1, zero, 32 1102; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1103; CHECK-NEXT: vsrl.vx v25, v25, a1 1104; CHECK-NEXT: vmv.x.s a1, v25 1105; CHECK-NEXT: ret 1106 %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v) 1107 ret i64 %red 1108} 1109 1110declare i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64>) 1111 1112define i64 @vreduce_umax_nxv1i64(<vscale x 1 x i64> %v) { 1113; CHECK-LABEL: vreduce_umax_nxv1i64: 1114; CHECK: # %bb.0: 1115; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1116; CHECK-NEXT: vmv.v.i v25, 0 1117; CHECK-NEXT: vredmaxu.vs v25, v8, v25 1118; CHECK-NEXT: vmv.x.s a0, v25 1119; CHECK-NEXT: addi a1, zero, 32 1120; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1121; CHECK-NEXT: vsrl.vx v25, v25, a1 1122; CHECK-NEXT: vmv.x.s a1, v25 1123; CHECK-NEXT: ret 1124 %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v) 1125 ret i64 %red 1126} 1127 1128declare i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64>) 1129 1130define i64 @vreduce_smax_nxv1i64(<vscale x 1 x i64> %v) { 1131; CHECK-LABEL: vreduce_smax_nxv1i64: 1132; CHECK: # %bb.0: 1133; CHECK-NEXT: addi sp, sp, -16 1134; CHECK-NEXT: .cfi_def_cfa_offset 16 1135; CHECK-NEXT: lui a0, 524288 1136; CHECK-NEXT: sw a0, 12(sp) 1137; CHECK-NEXT: sw zero, 8(sp) 1138; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1139; CHECK-NEXT: addi a0, sp, 8 1140; CHECK-NEXT: vlse64.v v25, (a0), zero 1141; CHECK-NEXT: vredmax.vs v25, v8, v25 1142; CHECK-NEXT: vmv.x.s a0, v25 1143; CHECK-NEXT: addi a1, zero, 32 1144; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1145; CHECK-NEXT: vsrl.vx v25, v25, a1 1146; CHECK-NEXT: vmv.x.s a1, v25 1147; CHECK-NEXT: addi sp, sp, 16 1148; CHECK-NEXT: ret 1149 %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v) 1150 ret i64 %red 1151} 1152 1153declare i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64>) 1154 1155define i64 @vreduce_umin_nxv1i64(<vscale x 1 x i64> %v) { 1156; CHECK-LABEL: vreduce_umin_nxv1i64: 1157; CHECK: # %bb.0: 1158; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1159; CHECK-NEXT: vmv.v.i v25, -1 1160; CHECK-NEXT: vredminu.vs v25, v8, v25 1161; CHECK-NEXT: vmv.x.s a0, v25 1162; CHECK-NEXT: addi a1, zero, 32 1163; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1164; CHECK-NEXT: vsrl.vx v25, v25, a1 1165; CHECK-NEXT: vmv.x.s a1, v25 1166; CHECK-NEXT: ret 1167 %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v) 1168 ret i64 %red 1169} 1170 1171declare i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64>) 1172 1173define i64 @vreduce_smin_nxv1i64(<vscale x 1 x i64> %v) { 1174; CHECK-LABEL: vreduce_smin_nxv1i64: 1175; CHECK: # %bb.0: 1176; CHECK-NEXT: addi sp, sp, -16 1177; CHECK-NEXT: .cfi_def_cfa_offset 16 1178; CHECK-NEXT: addi a0, zero, -1 1179; CHECK-NEXT: sw a0, 8(sp) 1180; CHECK-NEXT: lui a0, 524288 1181; CHECK-NEXT: addi a0, a0, -1 1182; CHECK-NEXT: sw a0, 12(sp) 1183; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1184; CHECK-NEXT: addi a0, sp, 8 1185; CHECK-NEXT: vlse64.v v25, (a0), zero 1186; CHECK-NEXT: vredmin.vs v25, v8, v25 1187; CHECK-NEXT: vmv.x.s a0, v25 1188; CHECK-NEXT: addi a1, zero, 32 1189; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1190; CHECK-NEXT: vsrl.vx v25, v25, a1 1191; CHECK-NEXT: vmv.x.s a1, v25 1192; CHECK-NEXT: addi sp, sp, 16 1193; CHECK-NEXT: ret 1194 %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v) 1195 ret i64 %red 1196} 1197 1198declare i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64>) 1199 1200define i64 @vreduce_and_nxv1i64(<vscale x 1 x i64> %v) { 1201; CHECK-LABEL: vreduce_and_nxv1i64: 1202; CHECK: # %bb.0: 1203; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1204; CHECK-NEXT: vmv.v.i v25, -1 1205; CHECK-NEXT: vredand.vs v25, v8, v25 1206; CHECK-NEXT: vmv.x.s a0, v25 1207; CHECK-NEXT: addi a1, zero, 32 1208; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1209; CHECK-NEXT: vsrl.vx v25, v25, a1 1210; CHECK-NEXT: vmv.x.s a1, v25 1211; CHECK-NEXT: ret 1212 %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v) 1213 ret i64 %red 1214} 1215 1216declare i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64>) 1217 1218define i64 @vreduce_or_nxv1i64(<vscale x 1 x i64> %v) { 1219; CHECK-LABEL: vreduce_or_nxv1i64: 1220; CHECK: # %bb.0: 1221; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1222; CHECK-NEXT: vmv.v.i v25, 0 1223; CHECK-NEXT: vredor.vs v25, v8, v25 1224; CHECK-NEXT: vmv.x.s a0, v25 1225; CHECK-NEXT: addi a1, zero, 32 1226; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1227; CHECK-NEXT: vsrl.vx v25, v25, a1 1228; CHECK-NEXT: vmv.x.s a1, v25 1229; CHECK-NEXT: ret 1230 %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v) 1231 ret i64 %red 1232} 1233 1234declare i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64>) 1235 1236define i64 @vreduce_xor_nxv1i64(<vscale x 1 x i64> %v) { 1237; CHECK-LABEL: vreduce_xor_nxv1i64: 1238; CHECK: # %bb.0: 1239; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1240; CHECK-NEXT: vmv.v.i v25, 0 1241; CHECK-NEXT: vredxor.vs v25, v8, v25 1242; CHECK-NEXT: vmv.x.s a0, v25 1243; CHECK-NEXT: addi a1, zero, 32 1244; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1245; CHECK-NEXT: vsrl.vx v25, v25, a1 1246; CHECK-NEXT: vmv.x.s a1, v25 1247; CHECK-NEXT: ret 1248 %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v) 1249 ret i64 %red 1250} 1251 1252declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>) 1253 1254define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) { 1255; CHECK-LABEL: vreduce_add_nxv2i64: 1256; CHECK: # %bb.0: 1257; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1258; CHECK-NEXT: vmv.v.i v25, 0 1259; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu 1260; CHECK-NEXT: vredsum.vs v25, v8, v25 1261; CHECK-NEXT: vmv.x.s a0, v25 1262; CHECK-NEXT: addi a1, zero, 32 1263; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1264; CHECK-NEXT: vsrl.vx v25, v25, a1 1265; CHECK-NEXT: vmv.x.s a1, v25 1266; CHECK-NEXT: ret 1267 %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v) 1268 ret i64 %red 1269} 1270 1271declare i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64>) 1272 1273define i64 @vreduce_umax_nxv2i64(<vscale x 2 x i64> %v) { 1274; CHECK-LABEL: vreduce_umax_nxv2i64: 1275; CHECK: # %bb.0: 1276; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1277; CHECK-NEXT: vmv.v.i v25, 0 1278; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu 1279; CHECK-NEXT: vredmaxu.vs v25, v8, v25 1280; CHECK-NEXT: vmv.x.s a0, v25 1281; CHECK-NEXT: addi a1, zero, 32 1282; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1283; CHECK-NEXT: vsrl.vx v25, v25, a1 1284; CHECK-NEXT: vmv.x.s a1, v25 1285; CHECK-NEXT: ret 1286 %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v) 1287 ret i64 %red 1288} 1289 1290declare i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64>) 1291 1292define i64 @vreduce_smax_nxv2i64(<vscale x 2 x i64> %v) { 1293; CHECK-LABEL: vreduce_smax_nxv2i64: 1294; CHECK: # %bb.0: 1295; CHECK-NEXT: addi sp, sp, -16 1296; CHECK-NEXT: .cfi_def_cfa_offset 16 1297; CHECK-NEXT: lui a0, 524288 1298; CHECK-NEXT: sw a0, 12(sp) 1299; CHECK-NEXT: sw zero, 8(sp) 1300; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1301; CHECK-NEXT: addi a0, sp, 8 1302; CHECK-NEXT: vlse64.v v25, (a0), zero 1303; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu 1304; CHECK-NEXT: vredmax.vs v25, v8, v25 1305; CHECK-NEXT: vmv.x.s a0, v25 1306; CHECK-NEXT: addi a1, zero, 32 1307; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1308; CHECK-NEXT: vsrl.vx v25, v25, a1 1309; CHECK-NEXT: vmv.x.s a1, v25 1310; CHECK-NEXT: addi sp, sp, 16 1311; CHECK-NEXT: ret 1312 %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v) 1313 ret i64 %red 1314} 1315 1316declare i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64>) 1317 1318define i64 @vreduce_umin_nxv2i64(<vscale x 2 x i64> %v) { 1319; CHECK-LABEL: vreduce_umin_nxv2i64: 1320; CHECK: # %bb.0: 1321; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1322; CHECK-NEXT: vmv.v.i v25, -1 1323; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu 1324; CHECK-NEXT: vredminu.vs v25, v8, v25 1325; CHECK-NEXT: vmv.x.s a0, v25 1326; CHECK-NEXT: addi a1, zero, 32 1327; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1328; CHECK-NEXT: vsrl.vx v25, v25, a1 1329; CHECK-NEXT: vmv.x.s a1, v25 1330; CHECK-NEXT: ret 1331 %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v) 1332 ret i64 %red 1333} 1334 1335declare i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64>) 1336 1337define i64 @vreduce_smin_nxv2i64(<vscale x 2 x i64> %v) { 1338; CHECK-LABEL: vreduce_smin_nxv2i64: 1339; CHECK: # %bb.0: 1340; CHECK-NEXT: addi sp, sp, -16 1341; CHECK-NEXT: .cfi_def_cfa_offset 16 1342; CHECK-NEXT: addi a0, zero, -1 1343; CHECK-NEXT: sw a0, 8(sp) 1344; CHECK-NEXT: lui a0, 524288 1345; CHECK-NEXT: addi a0, a0, -1 1346; CHECK-NEXT: sw a0, 12(sp) 1347; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1348; CHECK-NEXT: addi a0, sp, 8 1349; CHECK-NEXT: vlse64.v v25, (a0), zero 1350; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu 1351; CHECK-NEXT: vredmin.vs v25, v8, v25 1352; CHECK-NEXT: vmv.x.s a0, v25 1353; CHECK-NEXT: addi a1, zero, 32 1354; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1355; CHECK-NEXT: vsrl.vx v25, v25, a1 1356; CHECK-NEXT: vmv.x.s a1, v25 1357; CHECK-NEXT: addi sp, sp, 16 1358; CHECK-NEXT: ret 1359 %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v) 1360 ret i64 %red 1361} 1362 1363declare i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64>) 1364 1365define i64 @vreduce_and_nxv2i64(<vscale x 2 x i64> %v) { 1366; CHECK-LABEL: vreduce_and_nxv2i64: 1367; CHECK: # %bb.0: 1368; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1369; CHECK-NEXT: vmv.v.i v25, -1 1370; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu 1371; CHECK-NEXT: vredand.vs v25, v8, v25 1372; CHECK-NEXT: vmv.x.s a0, v25 1373; CHECK-NEXT: addi a1, zero, 32 1374; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1375; CHECK-NEXT: vsrl.vx v25, v25, a1 1376; CHECK-NEXT: vmv.x.s a1, v25 1377; CHECK-NEXT: ret 1378 %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v) 1379 ret i64 %red 1380} 1381 1382declare i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64>) 1383 1384define i64 @vreduce_or_nxv2i64(<vscale x 2 x i64> %v) { 1385; CHECK-LABEL: vreduce_or_nxv2i64: 1386; CHECK: # %bb.0: 1387; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1388; CHECK-NEXT: vmv.v.i v25, 0 1389; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu 1390; CHECK-NEXT: vredor.vs v25, v8, v25 1391; CHECK-NEXT: vmv.x.s a0, v25 1392; CHECK-NEXT: addi a1, zero, 32 1393; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1394; CHECK-NEXT: vsrl.vx v25, v25, a1 1395; CHECK-NEXT: vmv.x.s a1, v25 1396; CHECK-NEXT: ret 1397 %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v) 1398 ret i64 %red 1399} 1400 1401declare i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64>) 1402 1403define i64 @vreduce_xor_nxv2i64(<vscale x 2 x i64> %v) { 1404; CHECK-LABEL: vreduce_xor_nxv2i64: 1405; CHECK: # %bb.0: 1406; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1407; CHECK-NEXT: vmv.v.i v25, 0 1408; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu 1409; CHECK-NEXT: vredxor.vs v25, v8, v25 1410; CHECK-NEXT: vmv.x.s a0, v25 1411; CHECK-NEXT: addi a1, zero, 32 1412; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1413; CHECK-NEXT: vsrl.vx v25, v25, a1 1414; CHECK-NEXT: vmv.x.s a1, v25 1415; CHECK-NEXT: ret 1416 %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v) 1417 ret i64 %red 1418} 1419 1420declare i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64>) 1421 1422define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) { 1423; CHECK-LABEL: vreduce_add_nxv4i64: 1424; CHECK: # %bb.0: 1425; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1426; CHECK-NEXT: vmv.v.i v25, 0 1427; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1428; CHECK-NEXT: vredsum.vs v25, v8, v25 1429; CHECK-NEXT: vmv.x.s a0, v25 1430; CHECK-NEXT: addi a1, zero, 32 1431; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1432; CHECK-NEXT: vsrl.vx v25, v25, a1 1433; CHECK-NEXT: vmv.x.s a1, v25 1434; CHECK-NEXT: ret 1435 %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v) 1436 ret i64 %red 1437} 1438 1439declare i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64>) 1440 1441define i64 @vreduce_umax_nxv4i64(<vscale x 4 x i64> %v) { 1442; CHECK-LABEL: vreduce_umax_nxv4i64: 1443; CHECK: # %bb.0: 1444; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1445; CHECK-NEXT: vmv.v.i v25, 0 1446; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1447; CHECK-NEXT: vredmaxu.vs v25, v8, v25 1448; CHECK-NEXT: vmv.x.s a0, v25 1449; CHECK-NEXT: addi a1, zero, 32 1450; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1451; CHECK-NEXT: vsrl.vx v25, v25, a1 1452; CHECK-NEXT: vmv.x.s a1, v25 1453; CHECK-NEXT: ret 1454 %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v) 1455 ret i64 %red 1456} 1457 1458declare i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64>) 1459 1460define i64 @vreduce_smax_nxv4i64(<vscale x 4 x i64> %v) { 1461; CHECK-LABEL: vreduce_smax_nxv4i64: 1462; CHECK: # %bb.0: 1463; CHECK-NEXT: addi sp, sp, -16 1464; CHECK-NEXT: .cfi_def_cfa_offset 16 1465; CHECK-NEXT: lui a0, 524288 1466; CHECK-NEXT: sw a0, 12(sp) 1467; CHECK-NEXT: sw zero, 8(sp) 1468; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1469; CHECK-NEXT: addi a0, sp, 8 1470; CHECK-NEXT: vlse64.v v25, (a0), zero 1471; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1472; CHECK-NEXT: vredmax.vs v25, v8, v25 1473; CHECK-NEXT: vmv.x.s a0, v25 1474; CHECK-NEXT: addi a1, zero, 32 1475; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1476; CHECK-NEXT: vsrl.vx v25, v25, a1 1477; CHECK-NEXT: vmv.x.s a1, v25 1478; CHECK-NEXT: addi sp, sp, 16 1479; CHECK-NEXT: ret 1480 %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v) 1481 ret i64 %red 1482} 1483 1484declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>) 1485 1486define i64 @vreduce_umin_nxv4i64(<vscale x 4 x i64> %v) { 1487; CHECK-LABEL: vreduce_umin_nxv4i64: 1488; CHECK: # %bb.0: 1489; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1490; CHECK-NEXT: vmv.v.i v25, -1 1491; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1492; CHECK-NEXT: vredminu.vs v25, v8, v25 1493; CHECK-NEXT: vmv.x.s a0, v25 1494; CHECK-NEXT: addi a1, zero, 32 1495; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1496; CHECK-NEXT: vsrl.vx v25, v25, a1 1497; CHECK-NEXT: vmv.x.s a1, v25 1498; CHECK-NEXT: ret 1499 %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v) 1500 ret i64 %red 1501} 1502 1503declare i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64>) 1504 1505define i64 @vreduce_smin_nxv4i64(<vscale x 4 x i64> %v) { 1506; CHECK-LABEL: vreduce_smin_nxv4i64: 1507; CHECK: # %bb.0: 1508; CHECK-NEXT: addi sp, sp, -16 1509; CHECK-NEXT: .cfi_def_cfa_offset 16 1510; CHECK-NEXT: addi a0, zero, -1 1511; CHECK-NEXT: sw a0, 8(sp) 1512; CHECK-NEXT: lui a0, 524288 1513; CHECK-NEXT: addi a0, a0, -1 1514; CHECK-NEXT: sw a0, 12(sp) 1515; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1516; CHECK-NEXT: addi a0, sp, 8 1517; CHECK-NEXT: vlse64.v v25, (a0), zero 1518; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1519; CHECK-NEXT: vredmin.vs v25, v8, v25 1520; CHECK-NEXT: vmv.x.s a0, v25 1521; CHECK-NEXT: addi a1, zero, 32 1522; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1523; CHECK-NEXT: vsrl.vx v25, v25, a1 1524; CHECK-NEXT: vmv.x.s a1, v25 1525; CHECK-NEXT: addi sp, sp, 16 1526; CHECK-NEXT: ret 1527 %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v) 1528 ret i64 %red 1529} 1530 1531declare i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64>) 1532 1533define i64 @vreduce_and_nxv4i64(<vscale x 4 x i64> %v) { 1534; CHECK-LABEL: vreduce_and_nxv4i64: 1535; CHECK: # %bb.0: 1536; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1537; CHECK-NEXT: vmv.v.i v25, -1 1538; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1539; CHECK-NEXT: vredand.vs v25, v8, v25 1540; CHECK-NEXT: vmv.x.s a0, v25 1541; CHECK-NEXT: addi a1, zero, 32 1542; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1543; CHECK-NEXT: vsrl.vx v25, v25, a1 1544; CHECK-NEXT: vmv.x.s a1, v25 1545; CHECK-NEXT: ret 1546 %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v) 1547 ret i64 %red 1548} 1549 1550declare i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64>) 1551 1552define i64 @vreduce_or_nxv4i64(<vscale x 4 x i64> %v) { 1553; CHECK-LABEL: vreduce_or_nxv4i64: 1554; CHECK: # %bb.0: 1555; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1556; CHECK-NEXT: vmv.v.i v25, 0 1557; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1558; CHECK-NEXT: vredor.vs v25, v8, v25 1559; CHECK-NEXT: vmv.x.s a0, v25 1560; CHECK-NEXT: addi a1, zero, 32 1561; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1562; CHECK-NEXT: vsrl.vx v25, v25, a1 1563; CHECK-NEXT: vmv.x.s a1, v25 1564; CHECK-NEXT: ret 1565 %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v) 1566 ret i64 %red 1567} 1568 1569declare i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64>) 1570 1571define i64 @vreduce_xor_nxv4i64(<vscale x 4 x i64> %v) { 1572; CHECK-LABEL: vreduce_xor_nxv4i64: 1573; CHECK: # %bb.0: 1574; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1575; CHECK-NEXT: vmv.v.i v25, 0 1576; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1577; CHECK-NEXT: vredxor.vs v25, v8, v25 1578; CHECK-NEXT: vmv.x.s a0, v25 1579; CHECK-NEXT: addi a1, zero, 32 1580; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 1581; CHECK-NEXT: vsrl.vx v25, v25, a1 1582; CHECK-NEXT: vmv.x.s a1, v25 1583; CHECK-NEXT: ret 1584 %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v) 1585 ret i64 %red 1586} 1587